From 5647b25c3335a25ba32d73e61850a374a708788a Mon Sep 17 00:00:00 2001 From: Jonathan Liu Date: Mon, 26 Sep 2016 20:21:45 +1000 Subject: drm/sun4i: rgb: Enable panel after controller The panel should be enabled after the controller so that we do not have visual glitches on the panel while the controller is setup. Similarly, the panel should be disabled before the controller. Signed-off-by: Jonathan Liu Reviewed-by: Sean Paul Signed-off-by: Maxime Ripard --- drivers/gpu/drm/sun4i/sun4i_rgb.c | 14 ++++++++------ 1 file changed, 8 insertions(+), 6 deletions(-) diff --git a/drivers/gpu/drm/sun4i/sun4i_rgb.c b/drivers/gpu/drm/sun4i/sun4i_rgb.c index c3ff10f559cc..4e4bea6f395c 100644 --- a/drivers/gpu/drm/sun4i/sun4i_rgb.c +++ b/drivers/gpu/drm/sun4i/sun4i_rgb.c @@ -152,15 +152,16 @@ static void sun4i_rgb_encoder_enable(struct drm_encoder *encoder) DRM_DEBUG_DRIVER("Enabling RGB output\n"); - if (!IS_ERR(tcon->panel)) { + if (!IS_ERR(tcon->panel)) drm_panel_prepare(tcon->panel); - drm_panel_enable(tcon->panel); - } /* encoder->bridge can be NULL; drm_bridge_enable checks for it */ drm_bridge_enable(encoder->bridge); sun4i_tcon_channel_enable(tcon, 0); + + if (!IS_ERR(tcon->panel)) + drm_panel_enable(tcon->panel); } static void sun4i_rgb_encoder_disable(struct drm_encoder *encoder) @@ -171,15 +172,16 @@ static void sun4i_rgb_encoder_disable(struct drm_encoder *encoder) DRM_DEBUG_DRIVER("Disabling RGB output\n"); + if (!IS_ERR(tcon->panel)) + drm_panel_disable(tcon->panel); + sun4i_tcon_channel_disable(tcon, 0); /* encoder->bridge can be NULL; drm_bridge_disable checks for it */ drm_bridge_disable(encoder->bridge); - if (!IS_ERR(tcon->panel)) { - drm_panel_disable(tcon->panel); + if (!IS_ERR(tcon->panel)) drm_panel_unprepare(tcon->panel); - } } static void sun4i_rgb_encoder_mode_set(struct drm_encoder *encoder, -- cgit v1.2.3-59-g8ed1b From 0df03b43035afd0a64916fe4e5bca978562ffa5a Mon Sep 17 00:00:00 2001 From: Maxime Ripard Date: Thu, 29 Sep 2016 14:05:05 +0200 Subject: drm/sun4i: rgb: Remove the bridge enable/disable functions The atomic helpers already call the drm_bridge_enable on our behalf, there's no need to do it a second time. Reported-by: Sean Paul Reviewed-by: Sean Paul Signed-off-by: Maxime Ripard --- drivers/gpu/drm/sun4i/sun4i_rgb.c | 6 ------ 1 file changed, 6 deletions(-) diff --git a/drivers/gpu/drm/sun4i/sun4i_rgb.c b/drivers/gpu/drm/sun4i/sun4i_rgb.c index 4e4bea6f395c..d198ad7e5323 100644 --- a/drivers/gpu/drm/sun4i/sun4i_rgb.c +++ b/drivers/gpu/drm/sun4i/sun4i_rgb.c @@ -155,9 +155,6 @@ static void sun4i_rgb_encoder_enable(struct drm_encoder *encoder) if (!IS_ERR(tcon->panel)) drm_panel_prepare(tcon->panel); - /* encoder->bridge can be NULL; drm_bridge_enable checks for it */ - drm_bridge_enable(encoder->bridge); - sun4i_tcon_channel_enable(tcon, 0); if (!IS_ERR(tcon->panel)) @@ -177,9 +174,6 @@ static void sun4i_rgb_encoder_disable(struct drm_encoder *encoder) sun4i_tcon_channel_disable(tcon, 0); - /* encoder->bridge can be NULL; drm_bridge_disable checks for it */ - drm_bridge_disable(encoder->bridge); - if (!IS_ERR(tcon->panel)) drm_panel_unprepare(tcon->panel); } -- cgit v1.2.3-59-g8ed1b From 0ce267ff95a0302cf6fb2a552833abbfb7861a43 Mon Sep 17 00:00:00 2001 From: Miklos Szeredi Date: Tue, 18 Oct 2016 15:36:48 +0200 Subject: fuse: fix root dentry initialization Add missing dentry initialization to root dentry. Fixes: f75fdf22b0a8 ("fuse: don't use ->d_time") Reported-by: Andreas Reis Signed-off-by: Miklos Szeredi --- fs/fuse/dir.c | 5 +++++ fs/fuse/fuse_i.h | 1 + fs/fuse/inode.c | 3 ++- 3 files changed, 8 insertions(+), 1 deletion(-) diff --git a/fs/fuse/dir.c b/fs/fuse/dir.c index 6a4d0e5418a1..b3ebe512d64c 100644 --- a/fs/fuse/dir.c +++ b/fs/fuse/dir.c @@ -286,6 +286,11 @@ const struct dentry_operations fuse_dentry_operations = { .d_release = fuse_dentry_release, }; +const struct dentry_operations fuse_root_dentry_operations = { + .d_init = fuse_dentry_init, + .d_release = fuse_dentry_release, +}; + int fuse_valid_type(int m) { return S_ISREG(m) || S_ISDIR(m) || S_ISLNK(m) || S_ISCHR(m) || diff --git a/fs/fuse/fuse_i.h b/fs/fuse/fuse_i.h index 0dfbb136e59a..91307940c8ac 100644 --- a/fs/fuse/fuse_i.h +++ b/fs/fuse/fuse_i.h @@ -692,6 +692,7 @@ static inline u64 get_node_id(struct inode *inode) extern const struct file_operations fuse_dev_operations; extern const struct dentry_operations fuse_dentry_operations; +extern const struct dentry_operations fuse_root_dentry_operations; /** * Inode to nodeid comparison. diff --git a/fs/fuse/inode.c b/fs/fuse/inode.c index 17141099f2e7..6fe6a88ecb4a 100644 --- a/fs/fuse/inode.c +++ b/fs/fuse/inode.c @@ -1131,10 +1131,11 @@ static int fuse_fill_super(struct super_block *sb, void *data, int silent) err = -ENOMEM; root = fuse_get_root_inode(sb, d.rootmode); + sb->s_d_op = &fuse_root_dentry_operations; root_dentry = d_make_root(root); if (!root_dentry) goto err_dev_free; - /* only now - we want root dentry with NULL ->d_op */ + /* Root dentry doesn't have .d_revalidate */ sb->s_d_op = &fuse_dentry_operations; init_req = fuse_request_alloc(0); -- cgit v1.2.3-59-g8ed1b From f95df7d6cd92787d54c9ad3d4843f9bcd137f9db Mon Sep 17 00:00:00 2001 From: Wei Yongjun Date: Mon, 17 Oct 2016 15:16:35 +0000 Subject: dmaengine: edma: Fix error return code in edma_alloc_chan_resources() Fix to return a negative error code from the error handling case instead of 0, as done elsewhere in this function. Signed-off-by: Wei Yongjun Signed-off-by: Vinod Koul --- drivers/dma/edma.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/dma/edma.c b/drivers/dma/edma.c index e18a58068bca..77242b37ef87 100644 --- a/drivers/dma/edma.c +++ b/drivers/dma/edma.c @@ -1628,6 +1628,7 @@ static int edma_alloc_chan_resources(struct dma_chan *chan) if (echan->slot[0] < 0) { dev_err(dev, "Entry slot allocation failed for channel %u\n", EDMA_CHAN_SLOT(echan->ch_num)); + ret = echan->slot[0]; goto err_slot; } -- cgit v1.2.3-59-g8ed1b From d6619761068cf573cae406f176d00b82a39a37fc Mon Sep 17 00:00:00 2001 From: Jérémy Lefaure Date: Thu, 6 Oct 2016 17:59:53 -0400 Subject: dmaengine: mmp_tdma: add missing select GENERIC_ALLOCATOR in Kconfig MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit There are some compilation errors when CONFIG_MMP_TDMA is enabled and CONFIG_GENERIC_ALLOCATOR is disabled: drivers/built-in.o: In function `mmp_tdma_prep_dma_cyclic': mmp_tdma.c:(.text+0x7890e): undefined reference to `gen_pool_dma_alloc' drivers/built-in.o: In function `mmp_tdma_free_chan_resources': mmp_tdma.c:(.text+0x78aca): undefined reference to `gen_pool_free' drivers/built-in.o: In function `mmp_tdma_probe': mmp_tdma.c:(.text+0x78ea8): undefined reference to `of_gen_pool_get' This commit fix this problem by selecting GENERIC_ALLOCATOR when CONFIG_MMP_TDMA is enabled. Signed-off-by: Jérémy Lefaure Signed-off-by: Vinod Koul --- drivers/dma/Kconfig | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/dma/Kconfig b/drivers/dma/Kconfig index af63a6bcf564..141aefbe37ec 100644 --- a/drivers/dma/Kconfig +++ b/drivers/dma/Kconfig @@ -306,6 +306,7 @@ config MMP_TDMA depends on ARCH_MMP || COMPILE_TEST select DMA_ENGINE select MMP_SRAM if ARCH_MMP + select GENERIC_ALLOCATOR help Support the MMP Two-Channel DMA engine. This engine used for MMP Audio DMA and pxa910 SQU. -- cgit v1.2.3-59-g8ed1b From 83ba62bc700bab710b22be3a1bf6cf973f754273 Mon Sep 17 00:00:00 2001 From: Bibby Hsieh Date: Tue, 18 Oct 2016 16:23:59 +0800 Subject: drm/mediatek: fix a typo of OD_CFG to OD_RELAYMODE If we want to set the hardware OD to relay mode, we have to set OD_CFG register rather than OD_RELAYMODE; otherwise, the system will access the wrong address. Fixes: 7216436420414144646f5d8343d061355fd23483 ("drm/mediatek: set mt8173 dithering function") Cc: stable@vger.kernel.org # v4.9+ Signed-off-by: Bibby Hsieh Acked-by: CK Hu --- drivers/gpu/drm/mediatek/mtk_drm_ddp_comp.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/mediatek/mtk_drm_ddp_comp.c b/drivers/gpu/drm/mediatek/mtk_drm_ddp_comp.c index df33b3ca6ffd..aa5f20fabd10 100644 --- a/drivers/gpu/drm/mediatek/mtk_drm_ddp_comp.c +++ b/drivers/gpu/drm/mediatek/mtk_drm_ddp_comp.c @@ -123,7 +123,7 @@ static void mtk_od_config(struct mtk_ddp_comp *comp, unsigned int w, unsigned int bpc) { writel(w << 16 | h, comp->regs + DISP_OD_SIZE); - writel(OD_RELAYMODE, comp->regs + OD_RELAYMODE); + writel(OD_RELAYMODE, comp->regs + OD_CFG); mtk_dither_set(comp, bpc, DISP_OD_CFG); } -- cgit v1.2.3-59-g8ed1b From f752fff611b99f5679224f3990a1f531ea64b1ec Mon Sep 17 00:00:00 2001 From: Bibby Hsieh Date: Thu, 29 Sep 2016 11:29:48 +0800 Subject: drm/mediatek: set vblank_disable_allowed to true MTK DRM driver didn't set the vblank_disable_allowed to true, it cause that the irq_handler is called every 16.6 ms (every vblank) when the display didn't be updated. Signed-off-by: Bibby Hsieh Acked-by: CK Hu --- drivers/gpu/drm/mediatek/mtk_drm_drv.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/gpu/drm/mediatek/mtk_drm_drv.c b/drivers/gpu/drm/mediatek/mtk_drm_drv.c index cf83f6507ec8..0b2ae47eb52c 100644 --- a/drivers/gpu/drm/mediatek/mtk_drm_drv.c +++ b/drivers/gpu/drm/mediatek/mtk_drm_drv.c @@ -217,6 +217,7 @@ static int mtk_drm_kms_init(struct drm_device *drm) if (ret < 0) goto err_component_unbind; + drm->vblank_disable_allowed = true; drm_kms_helper_poll_init(drm); drm_mode_config_reset(drm); -- cgit v1.2.3-59-g8ed1b From 56e4b1e183555c74097fa012f1606b22223f027b Mon Sep 17 00:00:00 2001 From: Bibby Hsieh Date: Thu, 29 Sep 2016 11:29:49 +0800 Subject: drm/mediatek: clear IRQ status before enable OVL interrupt To make sure that the first vblank IRQ after enabling vblank isn't too short or immediate, we have to clear the IRQ status before enable OVL interrupt. Signed-off-by: Bibby Hsieh Acked-by: CK Hu --- drivers/gpu/drm/mediatek/mtk_disp_ovl.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/gpu/drm/mediatek/mtk_disp_ovl.c b/drivers/gpu/drm/mediatek/mtk_disp_ovl.c index 019b7ca392d7..f75c5b5a536c 100644 --- a/drivers/gpu/drm/mediatek/mtk_disp_ovl.c +++ b/drivers/gpu/drm/mediatek/mtk_disp_ovl.c @@ -80,6 +80,7 @@ static void mtk_ovl_enable_vblank(struct mtk_ddp_comp *comp, ddp_comp); priv->crtc = crtc; + writel(0x0, comp->regs + DISP_REG_OVL_INTSTA); writel_relaxed(OVL_FME_CPL_INT, comp->regs + DISP_REG_OVL_INTEN); } -- cgit v1.2.3-59-g8ed1b From d542b7c473f0eb34455974d66ea93653b3eb40ce Mon Sep 17 00:00:00 2001 From: Junzhi Zhao Date: Thu, 29 Sep 2016 11:02:13 +0800 Subject: drm/mediatek: do mtk_hdmi_send_infoframe after HDMI clock enable The mtk_hdmi_send_infoframe have to be run after PLL and PIXEL clock of HDMI enable. Make sure that HDMI inforframes can be sent successfully. Signed-off-by: Junzhi Zhao Signed-off-by: Bibby Hsieh --- drivers/gpu/drm/mediatek/mtk_hdmi.c | 17 +++++++++++------ 1 file changed, 11 insertions(+), 6 deletions(-) diff --git a/drivers/gpu/drm/mediatek/mtk_hdmi.c b/drivers/gpu/drm/mediatek/mtk_hdmi.c index 71227deef21b..0e8c4d9af340 100644 --- a/drivers/gpu/drm/mediatek/mtk_hdmi.c +++ b/drivers/gpu/drm/mediatek/mtk_hdmi.c @@ -1133,12 +1133,6 @@ static int mtk_hdmi_output_set_display_mode(struct mtk_hdmi *hdmi, phy_power_on(hdmi->phy); mtk_hdmi_aud_output_config(hdmi, mode); - mtk_hdmi_setup_audio_infoframe(hdmi); - mtk_hdmi_setup_avi_infoframe(hdmi, mode); - mtk_hdmi_setup_spd_infoframe(hdmi, "mediatek", "On-chip HDMI"); - if (mode->flags & DRM_MODE_FLAG_3D_MASK) - mtk_hdmi_setup_vendor_specific_infoframe(hdmi, mode); - mtk_hdmi_hw_vid_black(hdmi, false); mtk_hdmi_hw_aud_unmute(hdmi); mtk_hdmi_hw_send_av_unmute(hdmi); @@ -1401,6 +1395,16 @@ static void mtk_hdmi_bridge_pre_enable(struct drm_bridge *bridge) hdmi->powered = true; } +static void mtk_hdmi_send_infoframe(struct mtk_hdmi *hdmi, + struct drm_display_mode *mode) +{ + mtk_hdmi_setup_audio_infoframe(hdmi); + mtk_hdmi_setup_avi_infoframe(hdmi, mode); + mtk_hdmi_setup_spd_infoframe(hdmi, "mediatek", "On-chip HDMI"); + if (mode->flags & DRM_MODE_FLAG_3D_MASK) + mtk_hdmi_setup_vendor_specific_infoframe(hdmi, mode); +} + static void mtk_hdmi_bridge_enable(struct drm_bridge *bridge) { struct mtk_hdmi *hdmi = hdmi_ctx_from_bridge(bridge); @@ -1409,6 +1413,7 @@ static void mtk_hdmi_bridge_enable(struct drm_bridge *bridge) clk_prepare_enable(hdmi->clk[MTK_HDMI_CLK_HDMI_PLL]); clk_prepare_enable(hdmi->clk[MTK_HDMI_CLK_HDMI_PIXEL]); phy_power_on(hdmi->phy); + mtk_hdmi_send_infoframe(hdmi, &hdmi->mode); hdmi->enabled = true; } -- cgit v1.2.3-59-g8ed1b From 968253bd7caae5621f6806dd5055353fe33d366e Mon Sep 17 00:00:00 2001 From: Junzhi Zhao Date: Thu, 29 Sep 2016 11:02:14 +0800 Subject: drm/mediatek: enhance the HDMI driving current In order to improve 4K resolution performance, we have to enhance the HDMI driving current when clock rate is greater than 165MHz. Signed-off-by: Junzhi Zhao Signed-off-by: Bibby Hsieh --- drivers/gpu/drm/mediatek/mtk_mt8173_hdmi_phy.c | 42 ++++++++++++++++++-------- 1 file changed, 30 insertions(+), 12 deletions(-) diff --git a/drivers/gpu/drm/mediatek/mtk_mt8173_hdmi_phy.c b/drivers/gpu/drm/mediatek/mtk_mt8173_hdmi_phy.c index 8a24754b440f..51cb9cfb6646 100644 --- a/drivers/gpu/drm/mediatek/mtk_mt8173_hdmi_phy.c +++ b/drivers/gpu/drm/mediatek/mtk_mt8173_hdmi_phy.c @@ -265,6 +265,9 @@ static int mtk_hdmi_pll_set_rate(struct clk_hw *hw, unsigned long rate, struct mtk_hdmi_phy *hdmi_phy = to_mtk_hdmi_phy(hw); unsigned int pre_div; unsigned int div; + unsigned int pre_ibias; + unsigned int hdmi_ibias; + unsigned int imp_en; dev_dbg(hdmi_phy->dev, "%s: %lu Hz, parent: %lu Hz\n", __func__, rate, parent_rate); @@ -298,18 +301,31 @@ static int mtk_hdmi_pll_set_rate(struct clk_hw *hw, unsigned long rate, (0x1 << PLL_BR_SHIFT), RG_HDMITX_PLL_BP | RG_HDMITX_PLL_BC | RG_HDMITX_PLL_BR); - mtk_hdmi_phy_clear_bits(hdmi_phy, HDMI_CON3, RG_HDMITX_PRD_IMP_EN); + if (rate < 165000000) { + mtk_hdmi_phy_clear_bits(hdmi_phy, HDMI_CON3, + RG_HDMITX_PRD_IMP_EN); + pre_ibias = 0x3; + imp_en = 0x0; + hdmi_ibias = hdmi_phy->ibias; + } else { + mtk_hdmi_phy_set_bits(hdmi_phy, HDMI_CON3, + RG_HDMITX_PRD_IMP_EN); + pre_ibias = 0x6; + imp_en = 0xf; + hdmi_ibias = hdmi_phy->ibias_up; + } mtk_hdmi_phy_mask(hdmi_phy, HDMI_CON4, - (0x3 << PRD_IBIAS_CLK_SHIFT) | - (0x3 << PRD_IBIAS_D2_SHIFT) | - (0x3 << PRD_IBIAS_D1_SHIFT) | - (0x3 << PRD_IBIAS_D0_SHIFT), + (pre_ibias << PRD_IBIAS_CLK_SHIFT) | + (pre_ibias << PRD_IBIAS_D2_SHIFT) | + (pre_ibias << PRD_IBIAS_D1_SHIFT) | + (pre_ibias << PRD_IBIAS_D0_SHIFT), RG_HDMITX_PRD_IBIAS_CLK | RG_HDMITX_PRD_IBIAS_D2 | RG_HDMITX_PRD_IBIAS_D1 | RG_HDMITX_PRD_IBIAS_D0); mtk_hdmi_phy_mask(hdmi_phy, HDMI_CON3, - (0x0 << DRV_IMP_EN_SHIFT), RG_HDMITX_DRV_IMP_EN); + (imp_en << DRV_IMP_EN_SHIFT), + RG_HDMITX_DRV_IMP_EN); mtk_hdmi_phy_mask(hdmi_phy, HDMI_CON6, (hdmi_phy->drv_imp_clk << DRV_IMP_CLK_SHIFT) | (hdmi_phy->drv_imp_d2 << DRV_IMP_D2_SHIFT) | @@ -318,12 +334,14 @@ static int mtk_hdmi_pll_set_rate(struct clk_hw *hw, unsigned long rate, RG_HDMITX_DRV_IMP_CLK | RG_HDMITX_DRV_IMP_D2 | RG_HDMITX_DRV_IMP_D1 | RG_HDMITX_DRV_IMP_D0); mtk_hdmi_phy_mask(hdmi_phy, HDMI_CON5, - (hdmi_phy->ibias << DRV_IBIAS_CLK_SHIFT) | - (hdmi_phy->ibias << DRV_IBIAS_D2_SHIFT) | - (hdmi_phy->ibias << DRV_IBIAS_D1_SHIFT) | - (hdmi_phy->ibias << DRV_IBIAS_D0_SHIFT), - RG_HDMITX_DRV_IBIAS_CLK | RG_HDMITX_DRV_IBIAS_D2 | - RG_HDMITX_DRV_IBIAS_D1 | RG_HDMITX_DRV_IBIAS_D0); + (hdmi_ibias << DRV_IBIAS_CLK_SHIFT) | + (hdmi_ibias << DRV_IBIAS_D2_SHIFT) | + (hdmi_ibias << DRV_IBIAS_D1_SHIFT) | + (hdmi_ibias << DRV_IBIAS_D0_SHIFT), + RG_HDMITX_DRV_IBIAS_CLK | + RG_HDMITX_DRV_IBIAS_D2 | + RG_HDMITX_DRV_IBIAS_D1 | + RG_HDMITX_DRV_IBIAS_D0); return 0; } -- cgit v1.2.3-59-g8ed1b From 0d2200794f0a2c1ebb3b6613842914d8ce4b67f9 Mon Sep 17 00:00:00 2001 From: Junzhi Zhao Date: Thu, 29 Sep 2016 11:02:15 +0800 Subject: drm/mediatek: modify the factor to make the pll_rate set in the 1G-2G range Currently, the code sets the "pll" to the desired multiple of the pixel clock manully(4*3m 8*3,etc). The valid range of the pll is 1G-2G, however, when the pixel clock is bigger than 167MHz, the "pll" will be set to a invalid value( > 2G), then the "pll" will be 2GHz, thus the pixel clock will be in correct. Change the factor to make the "pll" be set in the (1G, 2G) range. Signed-off-by: Junzhi Zhao Signed-off-by: Bibby Hsieh --- drivers/gpu/drm/mediatek/mtk_dpi.c | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/mediatek/mtk_dpi.c b/drivers/gpu/drm/mediatek/mtk_dpi.c index 0186e500d2a5..90fb831ef031 100644 --- a/drivers/gpu/drm/mediatek/mtk_dpi.c +++ b/drivers/gpu/drm/mediatek/mtk_dpi.c @@ -432,11 +432,16 @@ static int mtk_dpi_set_display_mode(struct mtk_dpi *dpi, unsigned long pll_rate; unsigned int factor; + /* let pll_rate can fix the valid range of tvdpll (1G~2GHz) */ pix_rate = 1000UL * mode->clock; - if (mode->clock <= 74000) + if (mode->clock <= 27000) + factor = 16 * 3; + else if (mode->clock <= 84000) factor = 8 * 3; - else + else if (mode->clock <= 167000) factor = 4 * 3; + else + factor = 2 * 3; pll_rate = pix_rate * factor; dev_dbg(dpi->dev, "Want PLL %lu Hz, pixel clock %lu Hz\n", -- cgit v1.2.3-59-g8ed1b From 989cea5c14be024e879c0055dc6d033680a52610 Mon Sep 17 00:00:00 2001 From: Nicholas Piggin Date: Fri, 21 Oct 2016 01:13:33 +1100 Subject: kbuild: prevent lib-ksyms.o rebuilds Signed-off-by: Nicholas Piggin Reported-by: Russell King Tested-by: Russell King Signed-off-by: Michal Marek --- scripts/Makefile.build | 3 +++ 1 file changed, 3 insertions(+) diff --git a/scripts/Makefile.build b/scripts/Makefile.build index de46ab03f063..e1f25d6d132e 100644 --- a/scripts/Makefile.build +++ b/scripts/Makefile.build @@ -430,6 +430,9 @@ cmd_export_list = $(OBJDUMP) -h $< | \ $(obj)/lib-ksyms.o: $(lib-target) FORCE $(call if_changed,export_list) + +targets += $(obj)/lib-ksyms.o + endif # -- cgit v1.2.3-59-g8ed1b From b7f865ede20c87073216f77fe97f6fc56666e3da Mon Sep 17 00:00:00 2001 From: Icenowy Zheng Date: Tue, 25 Oct 2016 01:08:31 +0800 Subject: ARM: dts: sun8i: fix the pinmux for UART1 When the patch is applied, the allwinner,driver and allwinner,pull properties are removed. Although they're described to be optional in the devicetree binding, without them, the pinmux cannot be initialized, and the uart cannot be used. Add them back to fix the problem, and makes the bluetooth on iNet D978 Rev2 board work. Fixes: 82eec384249f (ARM: dts: sun8i: add pinmux for UART1 at PG) Signed-off-by: Icenowy Zheng Signed-off-by: Maxime Ripard --- arch/arm/boot/dts/sun8i-a23-a33.dtsi | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/arch/arm/boot/dts/sun8i-a23-a33.dtsi b/arch/arm/boot/dts/sun8i-a23-a33.dtsi index 48fc24f36fcb..300a1bd5a6ec 100644 --- a/arch/arm/boot/dts/sun8i-a23-a33.dtsi +++ b/arch/arm/boot/dts/sun8i-a23-a33.dtsi @@ -282,11 +282,15 @@ uart1_pins_a: uart1@0 { allwinner,pins = "PG6", "PG7"; allwinner,function = "uart1"; + allwinner,drive = ; + allwinner,pull = ; }; uart1_pins_cts_rts_a: uart1-cts-rts@0 { allwinner,pins = "PG8", "PG9"; allwinner,function = "uart1"; + allwinner,drive = ; + allwinner,pull = ; }; mmc0_pins_a: mmc0@0 { -- cgit v1.2.3-59-g8ed1b From 4efca4ed05cbdfd13ec3e8cb623fb77d6e4ab187 Mon Sep 17 00:00:00 2001 From: Nicholas Piggin Date: Tue, 1 Nov 2016 12:46:19 +1100 Subject: kbuild: modversions for EXPORT_SYMBOL() for asm Allow architectures to create asm/asm-prototypes.h file that provides C prototypes for exported asm functions, which enables proper CRC versions to be generated for them. Signed-off-by: Nicholas Piggin Signed-off-by: Michal Marek --- scripts/Makefile.build | 78 ++++++++++++++++++++++++++++++++++++++++++++++---- 1 file changed, 72 insertions(+), 6 deletions(-) diff --git a/scripts/Makefile.build b/scripts/Makefile.build index e1f25d6d132e..3e223c264469 100644 --- a/scripts/Makefile.build +++ b/scripts/Makefile.build @@ -159,7 +159,8 @@ cmd_cpp_i_c = $(CPP) $(c_flags) -o $@ $< $(obj)/%.i: $(src)/%.c FORCE $(call if_changed_dep,cpp_i_c) -cmd_gensymtypes = \ +# These mirror gensymtypes_S and co below, keep them in synch. +cmd_gensymtypes_c = \ $(CPP) -D__GENKSYMS__ $(c_flags) $< | \ $(GENKSYMS) $(if $(1), -T $(2)) \ $(patsubst y,-s _,$(CONFIG_HAVE_UNDERSCORE_SYMBOL_PREFIX)) \ @@ -169,7 +170,7 @@ cmd_gensymtypes = \ quiet_cmd_cc_symtypes_c = SYM $(quiet_modtag) $@ cmd_cc_symtypes_c = \ set -e; \ - $(call cmd_gensymtypes,true,$@) >/dev/null; \ + $(call cmd_gensymtypes_c,true,$@) >/dev/null; \ test -s $@ || rm -f $@ $(obj)/%.symtypes : $(src)/%.c FORCE @@ -198,9 +199,10 @@ else # the actual value of the checksum generated by genksyms cmd_cc_o_c = $(CC) $(c_flags) -c -o $(@D)/.tmp_$(@F) $< -cmd_modversions = \ + +cmd_modversions_c = \ if $(OBJDUMP) -h $(@D)/.tmp_$(@F) | grep -q __ksymtab; then \ - $(call cmd_gensymtypes,$(KBUILD_SYMTYPES),$(@:.o=.symtypes)) \ + $(call cmd_gensymtypes_c,$(KBUILD_SYMTYPES),$(@:.o=.symtypes)) \ > $(@D)/.tmp_$(@F:.o=.ver); \ \ $(LD) $(LDFLAGS) -r -o $@ $(@D)/.tmp_$(@F) \ @@ -268,13 +270,14 @@ endif # CONFIG_STACK_VALIDATION define rule_cc_o_c $(call echo-cmd,checksrc) $(cmd_checksrc) \ $(call cmd_and_fixdep,cc_o_c) \ - $(cmd_modversions) \ + $(cmd_modversions_c) \ $(cmd_objtool) \ $(call echo-cmd,record_mcount) $(cmd_record_mcount) endef define rule_as_o_S $(call cmd_and_fixdep,as_o_S) \ + $(cmd_modversions_S) \ $(cmd_objtool) endef @@ -314,6 +317,39 @@ modkern_aflags := $(KBUILD_AFLAGS_KERNEL) $(AFLAGS_KERNEL) $(real-objs-m) : modkern_aflags := $(KBUILD_AFLAGS_MODULE) $(AFLAGS_MODULE) $(real-objs-m:.o=.s): modkern_aflags := $(KBUILD_AFLAGS_MODULE) $(AFLAGS_MODULE) +# .S file exports must have their C prototypes defined in asm/asm-prototypes.h +# or a file that it includes, in order to get versioned symbols. We build a +# dummy C file that includes asm-prototypes and the EXPORT_SYMBOL lines from +# the .S file (with trailing ';'), and run genksyms on that, to extract vers. +# +# This is convoluted. The .S file must first be preprocessed to run guards and +# expand names, then the resulting exports must be constructed into plain +# EXPORT_SYMBOL(symbol); to build our dummy C file, and that gets preprocessed +# to make the genksyms input. +# +# These mirror gensymtypes_c and co above, keep them in synch. +cmd_gensymtypes_S = \ + (echo "\#include " ; \ + echo "\#include " ; \ + $(CPP) $(a_flags) $< | \ + grep ^___EXPORT_SYMBOL | \ + sed 's/___EXPORT_SYMBOL \([a-zA-Z0-9_]*\),.*/EXPORT_SYMBOL(\1);/' ) | \ + $(CPP) -D__GENKSYMS__ $(c_flags) -xc - | \ + $(GENKSYMS) $(if $(1), -T $(2)) \ + $(patsubst y,-s _,$(CONFIG_HAVE_UNDERSCORE_SYMBOL_PREFIX)) \ + $(if $(KBUILD_PRESERVE),-p) \ + -r $(firstword $(wildcard $(2:.symtypes=.symref) /dev/null)) + +quiet_cmd_cc_symtypes_S = SYM $(quiet_modtag) $@ +cmd_cc_symtypes_S = \ + set -e; \ + $(call cmd_gensymtypes_S,true,$@) >/dev/null; \ + test -s $@ || rm -f $@ + +$(obj)/%.symtypes : $(src)/%.S FORCE + $(call cmd,cc_symtypes_S) + + quiet_cmd_cpp_s_S = CPP $(quiet_modtag) $@ cmd_cpp_s_S = $(CPP) $(a_flags) -o $@ $< @@ -321,7 +357,37 @@ $(obj)/%.s: $(src)/%.S FORCE $(call if_changed_dep,cpp_s_S) quiet_cmd_as_o_S = AS $(quiet_modtag) $@ -cmd_as_o_S = $(CC) $(a_flags) -c -o $@ $< + +ifndef CONFIG_MODVERSIONS +cmd_as_o_S = $(CC) $(a_flags) -c -o $@ $< + +else + +ASM_PROTOTYPES := $(wildcard $(srctree)/arch/$(SRCARCH)/include/asm/asm-prototypes.h) + +ifeq ($(ASM_PROTOTYPES),) +cmd_as_o_S = $(CC) $(a_flags) -c -o $@ $< + +else + +# versioning matches the C process described above, with difference that +# we parse asm-prototypes.h C header to get function definitions. + +cmd_as_o_S = $(CC) $(a_flags) -c -o $(@D)/.tmp_$(@F) $< + +cmd_modversions_S = \ + if $(OBJDUMP) -h $(@D)/.tmp_$(@F) | grep -q __ksymtab; then \ + $(call cmd_gensymtypes_S,$(KBUILD_SYMTYPES),$(@:.o=.symtypes)) \ + > $(@D)/.tmp_$(@F:.o=.ver); \ + \ + $(LD) $(LDFLAGS) -r -o $@ $(@D)/.tmp_$(@F) \ + -T $(@D)/.tmp_$(@F:.o=.ver); \ + rm -f $(@D)/.tmp_$(@F) $(@D)/.tmp_$(@F:.o=.ver); \ + else \ + mv -f $(@D)/.tmp_$(@F) $@; \ + fi; +endif +endif $(obj)/%.o: $(src)/%.S $(objtool_obj) FORCE $(call if_changed_rule,as_o_S) -- cgit v1.2.3-59-g8ed1b From 6c08d7ab23dd07c046e8de1520073053bdc76ae2 Mon Sep 17 00:00:00 2001 From: Christophe JAILLET Date: Sun, 30 Oct 2016 09:49:26 +0100 Subject: drm/sun4i: Fix error handling 'sun4i_layers_init()' returns an error pointer in case of error, not NULL. So test it with IS_ERR. Signed-off-by: Christophe JAILLET Signed-off-by: Maxime Ripard --- drivers/gpu/drm/sun4i/sun4i_drv.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/sun4i/sun4i_drv.c b/drivers/gpu/drm/sun4i/sun4i_drv.c index 0da9862ad8ed..077f3785439e 100644 --- a/drivers/gpu/drm/sun4i/sun4i_drv.c +++ b/drivers/gpu/drm/sun4i/sun4i_drv.c @@ -142,7 +142,7 @@ static int sun4i_drv_bind(struct device *dev) /* Create our layers */ drv->layers = sun4i_layers_init(drm); - if (!drv->layers) { + if (IS_ERR(drv->layers)) { dev_err(drm->dev, "Couldn't create the planes\n"); ret = -EINVAL; goto free_drm; -- cgit v1.2.3-59-g8ed1b From cfbd950d5e6e649c6c1a88925feada64f890c894 Mon Sep 17 00:00:00 2001 From: Linus Walleij Date: Thu, 20 Oct 2016 08:46:32 +0200 Subject: video: ARM CLCD: fix Vexpress regression The CLCD does not come up on Versatile Express as it does not (currently) have a syscon node for controlling the block apart from the CLCD itself. Make sure the .init() function can bail out without an error making it probe again. Reported-by: Amit Pundir Signed-off-by: Linus Walleij Tested-by: Amit Pundir Tested-by: Nicolae Rosia Signed-off-by: Tomi Valkeinen --- drivers/video/fbdev/amba-clcd-versatile.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/video/fbdev/amba-clcd-versatile.c b/drivers/video/fbdev/amba-clcd-versatile.c index 19ad8645d93c..e5d9bfc1703a 100644 --- a/drivers/video/fbdev/amba-clcd-versatile.c +++ b/drivers/video/fbdev/amba-clcd-versatile.c @@ -526,8 +526,8 @@ int versatile_clcd_init_panel(struct clcd_fb *fb, np = of_find_matching_node_and_match(NULL, versatile_clcd_of_match, &clcd_id); if (!np) { - dev_err(dev, "no Versatile syscon node\n"); - return -ENODEV; + /* Vexpress does not have this */ + return 0; } versatile_clcd_type = (enum versatile_clcd)clcd_id->data; -- cgit v1.2.3-59-g8ed1b From 27915aa61060fd8954a68a86657784705955088a Mon Sep 17 00:00:00 2001 From: Sven Eckelmann Date: Wed, 2 Nov 2016 18:14:43 +0100 Subject: batman-adv: Revert "fix splat on disabling an interface" MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The commit 9799c50372b2 ("batman-adv: fix splat on disabling an interface") fixed a warning but at the same time broke the rtnl function add_slave for devices which were temporarily removed. batadv_softif_slave_add requires soft_iface of and hard_iface to be NULL before it is allowed to be enslaved. But this resetting of soft_iface to NULL in batadv_hardif_disable_interface was removed with the aforementioned commit. Reported-by: Julian Labus Signed-off-by: Sven Eckelmann Acked-by: Linus Lüssing Signed-off-by: Simon Wunderlich --- net/batman-adv/hard-interface.c | 1 + 1 file changed, 1 insertion(+) diff --git a/net/batman-adv/hard-interface.c b/net/batman-adv/hard-interface.c index e034afbd1bb0..08ce36147c4c 100644 --- a/net/batman-adv/hard-interface.c +++ b/net/batman-adv/hard-interface.c @@ -652,6 +652,7 @@ void batadv_hardif_disable_interface(struct batadv_hard_iface *hard_iface, batadv_softif_destroy_sysfs(hard_iface->soft_iface); } + hard_iface->soft_iface = NULL; batadv_hardif_put(hard_iface); out: -- cgit v1.2.3-59-g8ed1b From e13258f38e927b61cdb5f4ad25309450d3b127d1 Mon Sep 17 00:00:00 2001 From: Sven Eckelmann Date: Sat, 29 Oct 2016 09:18:43 +0200 Subject: batman-adv: Detect missing primaryif during tp_send as error The throughput meter detects different situations as problems for the current test. It stops the test after these and reports it to userspace. This also has to be done when the primary interface disappeared during the test. Fixes: 33a3bb4a3345 ("batman-adv: throughput meter implementation") Reported-by: Joe Perches Signed-off-by: Sven Eckelmann Signed-off-by: Simon Wunderlich --- net/batman-adv/tp_meter.c | 1 + 1 file changed, 1 insertion(+) diff --git a/net/batman-adv/tp_meter.c b/net/batman-adv/tp_meter.c index 2333777f919d..8af1611b8ab2 100644 --- a/net/batman-adv/tp_meter.c +++ b/net/batman-adv/tp_meter.c @@ -837,6 +837,7 @@ static int batadv_tp_send(void *arg) primary_if = batadv_primary_if_get_selected(bat_priv); if (unlikely(!primary_if)) { err = BATADV_TP_REASON_DST_UNREACHABLE; + tp_vars->reason = err; goto out; } -- cgit v1.2.3-59-g8ed1b From e3c9d9d6ebfeeeee29c6240e1b5978d40d31d21f Mon Sep 17 00:00:00 2001 From: Fabio Estevam Date: Thu, 27 Oct 2016 13:06:44 -0200 Subject: ARM: dts: imx53-qsb: Fix regulator constraints Since commit fa93fd4ecc9c ("regulator: core: Ensure we are at least in bounds for our constraints") the imx53-qsb board populated with a Dialog DA9053 PMIC fails to boot: LDO3: Bringing 3300000uV into 1800000-1800000uV The LDO3 voltage constraints passed in the device tree do not match the valid range according to the datasheet, so fix this accordingly to allow the board booting again. While at it, fix the other voltage constraints as well. Cc: # 4.7.x Signed-off-by: Fabio Estevam Signed-off-by: Shawn Guo --- arch/arm/boot/dts/imx53-qsb.dts | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/arch/arm/boot/dts/imx53-qsb.dts b/arch/arm/boot/dts/imx53-qsb.dts index dec4b073ceb1..379939699164 100644 --- a/arch/arm/boot/dts/imx53-qsb.dts +++ b/arch/arm/boot/dts/imx53-qsb.dts @@ -64,8 +64,8 @@ }; ldo3_reg: ldo3 { - regulator-min-microvolt = <600000>; - regulator-max-microvolt = <1800000>; + regulator-min-microvolt = <1725000>; + regulator-max-microvolt = <3300000>; regulator-always-on; }; @@ -76,8 +76,8 @@ }; ldo5_reg: ldo5 { - regulator-min-microvolt = <1725000>; - regulator-max-microvolt = <3300000>; + regulator-min-microvolt = <1200000>; + regulator-max-microvolt = <3600000>; regulator-always-on; }; @@ -100,14 +100,14 @@ }; ldo9_reg: ldo9 { - regulator-min-microvolt = <1200000>; + regulator-min-microvolt = <1250000>; regulator-max-microvolt = <3600000>; regulator-always-on; }; ldo10_reg: ldo10 { - regulator-min-microvolt = <1250000>; - regulator-max-microvolt = <3650000>; + regulator-min-microvolt = <1200000>; + regulator-max-microvolt = <3600000>; regulator-always-on; }; }; -- cgit v1.2.3-59-g8ed1b From d4eccafcaf339de77ec562e96e6b223d447f924a Mon Sep 17 00:00:00 2001 From: Max Filippov Date: Fri, 4 Nov 2016 14:45:08 -0700 Subject: xtensa: clean up printk usage for boot/crash logging Convert printk(KERN_* to pr_* and printk's without level to pr_cont. This fixes torn register dumps, stack dumps, stack traces and timestamps in the middle of 'Calibrating CPU frequency' message. Also drop unused show_code and drop false comment about show_stack. Signed-off-by: Max Filippov --- arch/xtensa/kernel/time.c | 14 ++++----- arch/xtensa/kernel/traps.c | 74 ++++++++++++++-------------------------------- 2 files changed, 29 insertions(+), 59 deletions(-) diff --git a/arch/xtensa/kernel/time.c b/arch/xtensa/kernel/time.c index 9a5bcd0381a7..be81e69b25bc 100644 --- a/arch/xtensa/kernel/time.c +++ b/arch/xtensa/kernel/time.c @@ -172,10 +172,11 @@ void __init time_init(void) { of_clk_init(NULL); #ifdef CONFIG_XTENSA_CALIBRATE_CCOUNT - printk("Calibrating CPU frequency "); + pr_info("Calibrating CPU frequency "); calibrate_ccount(); - printk("%d.%02d MHz\n", (int)ccount_freq/1000000, - (int)(ccount_freq/10000)%100); + pr_cont("%d.%02d MHz\n", + (int)ccount_freq / 1000000, + (int)(ccount_freq / 10000) % 100); #else ccount_freq = CONFIG_XTENSA_CPU_CLOCK*1000000UL; #endif @@ -210,9 +211,8 @@ irqreturn_t timer_interrupt(int irq, void *dev_id) void calibrate_delay(void) { loops_per_jiffy = ccount_freq / HZ; - printk("Calibrating delay loop (skipped)... " - "%lu.%02lu BogoMIPS preset\n", - loops_per_jiffy/(1000000/HZ), - (loops_per_jiffy/(10000/HZ)) % 100); + pr_info("Calibrating delay loop (skipped)... %lu.%02lu BogoMIPS preset\n", + loops_per_jiffy / (1000000 / HZ), + (loops_per_jiffy / (10000 / HZ)) % 100); } #endif diff --git a/arch/xtensa/kernel/traps.c b/arch/xtensa/kernel/traps.c index d02fc304b31c..ce37d5b899fe 100644 --- a/arch/xtensa/kernel/traps.c +++ b/arch/xtensa/kernel/traps.c @@ -465,26 +465,25 @@ void show_regs(struct pt_regs * regs) for (i = 0; i < 16; i++) { if ((i % 8) == 0) - printk(KERN_INFO "a%02d:", i); - printk(KERN_CONT " %08lx", regs->areg[i]); + pr_info("a%02d:", i); + pr_cont(" %08lx", regs->areg[i]); } - printk(KERN_CONT "\n"); - - printk("pc: %08lx, ps: %08lx, depc: %08lx, excvaddr: %08lx\n", - regs->pc, regs->ps, regs->depc, regs->excvaddr); - printk("lbeg: %08lx, lend: %08lx lcount: %08lx, sar: %08lx\n", - regs->lbeg, regs->lend, regs->lcount, regs->sar); + pr_cont("\n"); + pr_info("pc: %08lx, ps: %08lx, depc: %08lx, excvaddr: %08lx\n", + regs->pc, regs->ps, regs->depc, regs->excvaddr); + pr_info("lbeg: %08lx, lend: %08lx lcount: %08lx, sar: %08lx\n", + regs->lbeg, regs->lend, regs->lcount, regs->sar); if (user_mode(regs)) - printk("wb: %08lx, ws: %08lx, wmask: %08lx, syscall: %ld\n", - regs->windowbase, regs->windowstart, regs->wmask, - regs->syscall); + pr_cont("wb: %08lx, ws: %08lx, wmask: %08lx, syscall: %ld\n", + regs->windowbase, regs->windowstart, regs->wmask, + regs->syscall); } static int show_trace_cb(struct stackframe *frame, void *data) { if (kernel_text_address(frame->pc)) { - printk(" [<%08lx>] ", frame->pc); - print_symbol("%s\n", frame->pc); + pr_cont(" [<%08lx>]", frame->pc); + print_symbol(" %s\n", frame->pc); } return 0; } @@ -494,19 +493,13 @@ void show_trace(struct task_struct *task, unsigned long *sp) if (!sp) sp = stack_pointer(task); - printk("Call Trace:"); -#ifdef CONFIG_KALLSYMS - printk("\n"); -#endif + pr_info("Call Trace:\n"); walk_stackframe(sp, show_trace_cb, NULL); - printk("\n"); +#ifndef CONFIG_KALLSYMS + pr_cont("\n"); +#endif } -/* - * This routine abuses get_user()/put_user() to reference pointers - * with at least a bit of error checking ... - */ - static int kstack_depth_to_print = 24; void show_stack(struct task_struct *task, unsigned long *sp) @@ -518,52 +511,29 @@ void show_stack(struct task_struct *task, unsigned long *sp) sp = stack_pointer(task); stack = sp; - printk("\nStack: "); + pr_info("Stack:\n"); for (i = 0; i < kstack_depth_to_print; i++) { if (kstack_end(sp)) break; - if (i && ((i % 8) == 0)) - printk("\n "); - printk("%08lx ", *sp++); + pr_cont(" %08lx", *sp++); + if (i % 8 == 7) + pr_cont("\n"); } - printk("\n"); show_trace(task, stack); } -void show_code(unsigned int *pc) -{ - long i; - - printk("\nCode:"); - - for(i = -3 ; i < 6 ; i++) { - unsigned long insn; - if (__get_user(insn, pc + i)) { - printk(" (Bad address in pc)\n"); - break; - } - printk("%c%08lx%c",(i?' ':'<'),insn,(i?' ':'>')); - } -} - DEFINE_SPINLOCK(die_lock); void die(const char * str, struct pt_regs * regs, long err) { static int die_counter; - int nl = 0; console_verbose(); spin_lock_irq(&die_lock); - printk("%s: sig: %ld [#%d]\n", str, err, ++die_counter); -#ifdef CONFIG_PREEMPT - printk("PREEMPT "); - nl = 1; -#endif - if (nl) - printk("\n"); + pr_info("%s: sig: %ld [#%d]%s\n", str, err, ++die_counter, + IS_ENABLED(CONFIG_PREEMPT) ? " PREEMPT" : ""); show_regs(regs); if (!user_mode(regs)) show_stack(NULL, (unsigned long*)regs->areg[1]); -- cgit v1.2.3-59-g8ed1b From 4db069a2bf990e278ea57ff615dcaa89b85376bd Mon Sep 17 00:00:00 2001 From: Christophe JAILLET Date: Fri, 4 Nov 2016 07:13:52 +0100 Subject: drm/sun4i: Propagate error to the caller If 'sun4i_layers_init()' returns an error, propagate it instead of returning -EINVAL unconditionally. Signed-off-by: Christophe JAILLET Reviewed-by: Gustavo Padovan Signed-off-by: Maxime Ripard --- drivers/gpu/drm/sun4i/sun4i_drv.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/sun4i/sun4i_drv.c b/drivers/gpu/drm/sun4i/sun4i_drv.c index 077f3785439e..70e9fd59c5a2 100644 --- a/drivers/gpu/drm/sun4i/sun4i_drv.c +++ b/drivers/gpu/drm/sun4i/sun4i_drv.c @@ -144,7 +144,7 @@ static int sun4i_drv_bind(struct device *dev) drv->layers = sun4i_layers_init(drm); if (IS_ERR(drv->layers)) { dev_err(drm->dev, "Couldn't create the planes\n"); - ret = -EINVAL; + ret = PTR_ERR(drv->layers); goto free_drm; } -- cgit v1.2.3-59-g8ed1b From 85566ca6b6d7e131837cd197a441d98e83146fae Mon Sep 17 00:00:00 2001 From: Tony Lindgren Date: Mon, 7 Nov 2016 14:52:39 -0700 Subject: ARM: OMAP3: Fix formatting of features printed With the printk cleanups merged into v4.9-rc1, we now get the omap revision printed on multiple lines. Let's fix that and also remove the extra empty space at the end of the features. And let's update things to use scnprintf as suggested by Ivaylo Dimitrov . Reported-by: Adam Ford Cc: Ivaylo Dimitrov Reviewed-by: Sebastian Reichel Signed-off-by: Tony Lindgren --- arch/arm/mach-omap2/id.c | 16 +++++++++++----- 1 file changed, 11 insertions(+), 5 deletions(-) diff --git a/arch/arm/mach-omap2/id.c b/arch/arm/mach-omap2/id.c index 2abd53ae3e7a..cc6d9fa60924 100644 --- a/arch/arm/mach-omap2/id.c +++ b/arch/arm/mach-omap2/id.c @@ -205,11 +205,15 @@ void __init omap2xxx_check_revision(void) #define OMAP3_SHOW_FEATURE(feat) \ if (omap3_has_ ##feat()) \ - printk(#feat" "); + n += scnprintf(buf + n, sizeof(buf) - n, #feat " "); static void __init omap3_cpuinfo(void) { const char *cpu_name; + char buf[64]; + int n = 0; + + memset(buf, 0, sizeof(buf)); /* * OMAP3430 and OMAP3530 are assumed to be same. @@ -241,10 +245,10 @@ static void __init omap3_cpuinfo(void) cpu_name = "OMAP3503"; } - sprintf(soc_name, "%s", cpu_name); + scnprintf(soc_name, sizeof(soc_name), "%s", cpu_name); /* Print verbose information */ - pr_info("%s %s (", soc_name, soc_rev); + n += scnprintf(buf, sizeof(buf) - n, "%s %s (", soc_name, soc_rev); OMAP3_SHOW_FEATURE(l2cache); OMAP3_SHOW_FEATURE(iva); @@ -252,8 +256,10 @@ static void __init omap3_cpuinfo(void) OMAP3_SHOW_FEATURE(neon); OMAP3_SHOW_FEATURE(isp); OMAP3_SHOW_FEATURE(192mhz_clk); - - printk(")\n"); + if (*(buf + n - 1) == ' ') + n--; + n += scnprintf(buf + n, sizeof(buf) - n, ")\n"); + pr_info("%s", buf); } #define OMAP3_CHECK_FEATURE(status,feat) \ -- cgit v1.2.3-59-g8ed1b From 72bb40b8b7620f1390c84c10309a40e886bf449e Mon Sep 17 00:00:00 2001 From: Dave Gerlach Date: Wed, 19 Oct 2016 15:44:12 -0500 Subject: ARM: AM43XX: Select OMAP_INTERCONNECT in Kconfig AM437x makes use of the omap_l3_noc driver so explicitly select OMAP_INTERCONNECT in the Kconfig for SOC_AM43XX to ensure it gets enabled for AM43XX only builds. Signed-off-by: Dave Gerlach Signed-off-by: Tony Lindgren --- arch/arm/mach-omap2/Kconfig | 1 + 1 file changed, 1 insertion(+) diff --git a/arch/arm/mach-omap2/Kconfig b/arch/arm/mach-omap2/Kconfig index a9afeebd59f2..0465338183c7 100644 --- a/arch/arm/mach-omap2/Kconfig +++ b/arch/arm/mach-omap2/Kconfig @@ -71,6 +71,7 @@ config SOC_AM43XX select HAVE_ARM_TWD select ARM_ERRATA_754322 select ARM_ERRATA_775420 + select OMAP_INTERCONNECT config SOC_DRA7XX bool "TI DRA7XX" -- cgit v1.2.3-59-g8ed1b From 271a3024db1f32ca34f504178fade6ef95cd6c9b Mon Sep 17 00:00:00 2001 From: Adam Ford Date: Fri, 21 Oct 2016 09:12:31 -0500 Subject: ARM: dts: omap3: Fix memory node in Torpedo board Commit ("766a1fe78fc3 ARM: omap3: Add missing memory node") added the memory node, but the patch didn't have the correct starting address. This patch fixes the correct starting address. Signed-off-by: Adam Ford Signed-off-by: Tony Lindgren --- arch/arm/boot/dts/logicpd-torpedo-som.dtsi | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/arch/arm/boot/dts/logicpd-torpedo-som.dtsi b/arch/arm/boot/dts/logicpd-torpedo-som.dtsi index 731ec37aed5b..8f9a69ca818c 100644 --- a/arch/arm/boot/dts/logicpd-torpedo-som.dtsi +++ b/arch/arm/boot/dts/logicpd-torpedo-som.dtsi @@ -13,9 +13,9 @@ }; }; - memory@0 { + memory@80000000 { device_type = "memory"; - reg = <0 0>; + reg = <0x80000000 0>; }; leds { -- cgit v1.2.3-59-g8ed1b From 4ae46efcff19445afbf49fe7038de6020f37fefe Mon Sep 17 00:00:00 2001 From: Colin Ian King Date: Mon, 24 Oct 2016 12:00:21 +0100 Subject: ARM: OMAP2+: PRM: initialize en_uart4_mask and grpsel_uart4_mask In the case where has_uart4 is false, en_uart4_mask and grpsel_uart4_mask are not initialized and so any garbage value is being logically or'd into the write of PM_WKEN and OMAP3430_PM_MPUGRPSEL. Fix this by initializing these masks to zero. Signed-off-by: Colin Ian King Signed-off-by: Tony Lindgren --- arch/arm/mach-omap2/prm3xxx.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/arch/arm/mach-omap2/prm3xxx.c b/arch/arm/mach-omap2/prm3xxx.c index 62680aad2126..718981bb80cd 100644 --- a/arch/arm/mach-omap2/prm3xxx.c +++ b/arch/arm/mach-omap2/prm3xxx.c @@ -319,6 +319,9 @@ void __init omap3_prm_init_pm(bool has_uart4, bool has_iva) if (has_uart4) { en_uart4_mask = OMAP3630_EN_UART4_MASK; grpsel_uart4_mask = OMAP3630_GRPSEL_UART4_MASK; + } else { + en_uart4_mask = 0; + grpsel_uart4_mask = 0; } /* Enable wakeups in PER */ -- cgit v1.2.3-59-g8ed1b From 0ab11d8ea46fd6faf67df4461c795091429a1496 Mon Sep 17 00:00:00 2001 From: Nicolae Rosia Date: Tue, 1 Nov 2016 11:49:25 +0200 Subject: ARM: OMAP2+: avoid NULL pointer dereference For OMAP4, volt_data is set in omap44xx_voltagedomains_init. If the SoC is neither OMAP443X or OMAP446X, we end up with a NULL in volt_data which causes a kernel oops. This is the case when booting OMAP4470. Signed-off-by: Nicolae Rosia Signed-off-by: Tony Lindgren --- arch/arm/mach-omap2/voltage.c | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/arch/arm/mach-omap2/voltage.c b/arch/arm/mach-omap2/voltage.c index cba8cada8c81..cd15dbd62671 100644 --- a/arch/arm/mach-omap2/voltage.c +++ b/arch/arm/mach-omap2/voltage.c @@ -87,6 +87,12 @@ int voltdm_scale(struct voltagedomain *voltdm, return -ENODATA; } + if (!voltdm->volt_data) { + pr_err("%s: No voltage data defined for vdd_%s\n", + __func__, voltdm->name); + return -ENODATA; + } + /* Adjust voltage to the exact voltage from the OPP table */ for (i = 0; voltdm->volt_data[i].volt_nominal != 0; i++) { if (voltdm->volt_data[i].volt_nominal >= target_volt) { -- cgit v1.2.3-59-g8ed1b From 725ed2238cdb3807c19e7edcb20fde8d0f91597f Mon Sep 17 00:00:00 2001 From: "H. Nikolaus Schaller" Date: Tue, 25 Oct 2016 19:38:09 +0200 Subject: dts: omap5: board-common: add phandle to reference Palmas gpadc Will be needed for iio based drivers. Signed-off-by: H. Nikolaus Schaller Signed-off-by: Tony Lindgren --- arch/arm/boot/dts/omap5-board-common.dtsi | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/arm/boot/dts/omap5-board-common.dtsi b/arch/arm/boot/dts/omap5-board-common.dtsi index 6365635fea5c..7cd1c674ceec 100644 --- a/arch/arm/boot/dts/omap5-board-common.dtsi +++ b/arch/arm/boot/dts/omap5-board-common.dtsi @@ -415,7 +415,7 @@ ti,backup-battery-charge-high-current; }; - gpadc { + gpadc: gpadc { compatible = "ti,palmas-gpadc"; interrupts = <18 0 16 0 -- cgit v1.2.3-59-g8ed1b From 0b68f1beea9ed2b31ff7873d5ed0cfbd087da0eb Mon Sep 17 00:00:00 2001 From: "H. Nikolaus Schaller" Date: Tue, 25 Oct 2016 19:38:10 +0200 Subject: dts: omap5: board-common: enable twl6040 headset jack detection Signed-off-by: H. Nikolaus Schaller Reviewed-by: Peter Ujfalusi Signed-off-by: Tony Lindgren --- arch/arm/boot/dts/omap5-board-common.dtsi | 1 + 1 file changed, 1 insertion(+) diff --git a/arch/arm/boot/dts/omap5-board-common.dtsi b/arch/arm/boot/dts/omap5-board-common.dtsi index 7cd1c674ceec..60a33c4b7b82 100644 --- a/arch/arm/boot/dts/omap5-board-common.dtsi +++ b/arch/arm/boot/dts/omap5-board-common.dtsi @@ -124,6 +124,7 @@ compatible = "ti,abe-twl6040"; ti,model = "omap5-uevm"; + ti,jack-detection; ti,mclk-freq = <19200000>; ti,mcpdm = <&mcpdm>; -- cgit v1.2.3-59-g8ed1b From 1219e3db7ecb59ab269e9c8b1a199d82b8d088bb Mon Sep 17 00:00:00 2001 From: "H. Nikolaus Schaller" Date: Tue, 25 Oct 2016 19:38:11 +0200 Subject: ASoC: omap-abe-twl6040: fix typo in bindings documentation Signed-off-by: H. Nikolaus Schaller Acked-by: Peter Ujfalusi Acked-by: Rob Herring Signed-off-by: Tony Lindgren --- Documentation/devicetree/bindings/sound/omap-abe-twl6040.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Documentation/devicetree/bindings/sound/omap-abe-twl6040.txt b/Documentation/devicetree/bindings/sound/omap-abe-twl6040.txt index fd40c852d7c7..462b04e8209f 100644 --- a/Documentation/devicetree/bindings/sound/omap-abe-twl6040.txt +++ b/Documentation/devicetree/bindings/sound/omap-abe-twl6040.txt @@ -12,7 +12,7 @@ Required properties: Optional properties: - ti,dmic: phandle for the OMAP dmic node if the machine have it connected -- ti,jack_detection: Need to be present if the board capable to detect jack +- ti,jack-detection: Need to be present if the board capable to detect jack insertion, removal. Available audio endpoints for the audio-routing table: -- cgit v1.2.3-59-g8ed1b From 53f8d322234649b4d6f1515b20c127a577efd164 Mon Sep 17 00:00:00 2001 From: Phil Reid Date: Tue, 8 Nov 2016 14:00:45 +0800 Subject: gpio: pca953x: Fix corruption of other gpios in set_multiple. gpiod_set_array_value_complex does not clear the bits field. Therefore when the drivers set_multiple funciton is called bits outside the mask are undefined and can be either set or not. So bank_val needs to be masked with bank_mask before or with the reg_val cache. Cc: stable@vger.kernel.org Fixes: b4818afeacbd ("gpio: pca953x: Add set_multiple to allow multiple") Signed-off-by: Phil Reid Signed-off-by: Linus Walleij --- drivers/gpio/gpio-pca953x.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/gpio/gpio-pca953x.c b/drivers/gpio/gpio-pca953x.c index e422568e14ad..4a8d0fe60e0c 100644 --- a/drivers/gpio/gpio-pca953x.c +++ b/drivers/gpio/gpio-pca953x.c @@ -380,6 +380,7 @@ static void pca953x_gpio_set_multiple(struct gpio_chip *gc, if (bank_mask) { bank_val = bits[bank / sizeof(*bits)] >> ((bank % sizeof(*bits)) * 8); + bank_val &= bank_mask; reg_val[bank] = (reg_val[bank] & ~bank_mask) | bank_val; } } -- cgit v1.2.3-59-g8ed1b From 386377b5473043c09b2de40bfe5abfb0fc87e1b4 Mon Sep 17 00:00:00 2001 From: Phil Reid Date: Tue, 8 Nov 2016 13:18:11 +0800 Subject: gpio: pca953x: Move memcpy into mutex lock for set multiple Need to ensure that reg_output is not updated while setting multiple bits. This makes the mutex locking behaviour for the set_multiple call consistent with that of the set_value call. Cc: stable@vger.kernel.org Fixes: b4818afeacbd ("gpio: pca953x: Add set_multiple to allow multiple") Signed-off-by: Phil Reid Signed-off-by: Linus Walleij --- drivers/gpio/gpio-pca953x.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/drivers/gpio/gpio-pca953x.c b/drivers/gpio/gpio-pca953x.c index 4a8d0fe60e0c..fe731f094257 100644 --- a/drivers/gpio/gpio-pca953x.c +++ b/drivers/gpio/gpio-pca953x.c @@ -372,8 +372,8 @@ static void pca953x_gpio_set_multiple(struct gpio_chip *gc, bank_shift = fls((chip->gpio_chip.ngpio - 1) / BANK_SZ); - memcpy(reg_val, chip->reg_output, NBANK(chip)); mutex_lock(&chip->i2c_lock); + memcpy(reg_val, chip->reg_output, NBANK(chip)); for (bank = 0; bank < NBANK(chip); bank++) { bank_mask = mask[bank / sizeof(*mask)] >> ((bank % sizeof(*mask)) * 8); @@ -608,7 +608,6 @@ static int pca953x_irq_setup(struct pca953x_chip *chip, if (client->irq && irq_base != -1 && (chip->driver_data & PCA_INT)) { - ret = pca953x_read_regs(chip, chip->regs->input, chip->irq_stat); if (ret) -- cgit v1.2.3-59-g8ed1b From 8ae94224c9d72fc4d9aaac93b2d7833cf46d7141 Mon Sep 17 00:00:00 2001 From: Sebastian Andrzej Siewior Date: Fri, 4 Nov 2016 19:39:38 +0100 Subject: kbuild: add -fno-PIE MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Debian started to build the gcc with -fPIE by default so the kernel build ends before it starts properly with: |kernel/bounds.c:1:0: error: code model kernel does not support PIC mode Also add to KBUILD_AFLAGS due to: |gcc -Wp,-MD,arch/x86/entry/vdso/vdso32/.note.o.d … -mfentry -DCC_USING_FENTRY … vdso/vdso32/note.S |arch/x86/entry/vdso/vdso32/note.S:1:0: sorry, unimplemented: -mfentry isn’t supported for 32-bit in combination with -fpic Tagging it stable so it is possible to compile recent stable kernels as well. Cc: stable@vger.kernel.org Signed-off-by: Sebastian Andrzej Siewior Signed-off-by: Michal Marek --- Makefile | 2 ++ 1 file changed, 2 insertions(+) diff --git a/Makefile b/Makefile index 512e47a53e9a..58fc5d935ce6 100644 --- a/Makefile +++ b/Makefile @@ -622,6 +622,8 @@ include arch/$(SRCARCH)/Makefile KBUILD_CFLAGS += $(call cc-option,-fno-delete-null-pointer-checks,) KBUILD_CFLAGS += $(call cc-disable-warning,maybe-uninitialized,) KBUILD_CFLAGS += $(call cc-disable-warning,frame-address,) +KBUILD_CFLAGS += $(call cc-option,-fno-PIE) +KBUILD_AFLAGS += $(call cc-option,-fno-PIE) ifdef CONFIG_LD_DEAD_CODE_DATA_ELIMINATION KBUILD_CFLAGS += $(call cc-option,-ffunction-sections,) -- cgit v1.2.3-59-g8ed1b From 34a515d27c6573b6f550877b30dd5e0f440c3d8f Mon Sep 17 00:00:00 2001 From: Stefan Agner Date: Mon, 4 Jan 2016 16:34:22 -0800 Subject: drm/fsl-dcu: do not update when modifying irq registers The IRQ status and mask registers are not "double buffered" according to the reference manual. Hence, there is no extra transfer/update write needed when modifying these registers. Signed-off-by: Stefan Agner --- drivers/gpu/drm/fsl-dcu/fsl_dcu_drm_drv.c | 4 ---- 1 file changed, 4 deletions(-) diff --git a/drivers/gpu/drm/fsl-dcu/fsl_dcu_drm_drv.c b/drivers/gpu/drm/fsl-dcu/fsl_dcu_drm_drv.c index e04efbed1a54..cc2fde2ae5ef 100644 --- a/drivers/gpu/drm/fsl-dcu/fsl_dcu_drm_drv.c +++ b/drivers/gpu/drm/fsl-dcu/fsl_dcu_drm_drv.c @@ -59,8 +59,6 @@ static int fsl_dcu_drm_irq_init(struct drm_device *dev) regmap_write(fsl_dev->regmap, DCU_INT_STATUS, 0); regmap_write(fsl_dev->regmap, DCU_INT_MASK, ~0); - regmap_write(fsl_dev->regmap, DCU_UPDATE_MODE, - DCU_UPDATE_MODE_READREG); return ret; } @@ -139,8 +137,6 @@ static irqreturn_t fsl_dcu_drm_irq(int irq, void *arg) drm_handle_vblank(dev, 0); regmap_write(fsl_dev->regmap, DCU_INT_STATUS, int_status); - regmap_write(fsl_dev->regmap, DCU_UPDATE_MODE, - DCU_UPDATE_MODE_READREG); return IRQ_HANDLED; } -- cgit v1.2.3-59-g8ed1b From 93daeeca2c9472a47d419884a64f6ca2b7f006e4 Mon Sep 17 00:00:00 2001 From: Stefan Agner Date: Mon, 5 Sep 2016 19:05:12 -0700 Subject: drm/fsl-dcu: update all registers on flush Use the UPDATE_MODE READREG bit to initiate a register transfer on flush. This makes sure that we flush all registers only once for all planes. Signed-off-by: Stefan Agner --- drivers/gpu/drm/fsl-dcu/fsl_dcu_drm_crtc.c | 5 +++++ drivers/gpu/drm/fsl-dcu/fsl_dcu_drm_plane.c | 5 ----- 2 files changed, 5 insertions(+), 5 deletions(-) diff --git a/drivers/gpu/drm/fsl-dcu/fsl_dcu_drm_crtc.c b/drivers/gpu/drm/fsl-dcu/fsl_dcu_drm_crtc.c index b2d5e188b1b8..2ea9dbd9be30 100644 --- a/drivers/gpu/drm/fsl-dcu/fsl_dcu_drm_crtc.c +++ b/drivers/gpu/drm/fsl-dcu/fsl_dcu_drm_crtc.c @@ -25,8 +25,13 @@ static void fsl_dcu_drm_crtc_atomic_flush(struct drm_crtc *crtc, struct drm_crtc_state *old_crtc_state) { + struct drm_device *dev = crtc->dev; + struct fsl_dcu_drm_device *fsl_dev = dev->dev_private; struct drm_pending_vblank_event *event = crtc->state->event; + regmap_write(fsl_dev->regmap, + DCU_UPDATE_MODE, DCU_UPDATE_MODE_READREG); + if (event) { crtc->state->event = NULL; diff --git a/drivers/gpu/drm/fsl-dcu/fsl_dcu_drm_plane.c b/drivers/gpu/drm/fsl-dcu/fsl_dcu_drm_plane.c index 9e6f7d8112b3..a99f48847420 100644 --- a/drivers/gpu/drm/fsl-dcu/fsl_dcu_drm_plane.c +++ b/drivers/gpu/drm/fsl-dcu/fsl_dcu_drm_plane.c @@ -160,11 +160,6 @@ static void fsl_dcu_drm_plane_atomic_update(struct drm_plane *plane, DCU_LAYER_POST_SKIP(0) | DCU_LAYER_PRE_SKIP(0)); } - regmap_update_bits(fsl_dev->regmap, DCU_DCU_MODE, - DCU_MODE_DCU_MODE_MASK, - DCU_MODE_DCU_MODE(DCU_MODE_NORMAL)); - regmap_write(fsl_dev->regmap, - DCU_UPDATE_MODE, DCU_UPDATE_MODE_READREG); return; } -- cgit v1.2.3-59-g8ed1b From 3d6f37102bd6e4b55a7f336d44974c0bd1c22a15 Mon Sep 17 00:00:00 2001 From: Stefan Agner Date: Mon, 31 Oct 2016 09:51:19 -0700 Subject: drm/fsl-dcu: disable planes before disabling CRTC After disabling and reenabling the CRTC the DCU sometimes got stuck displaying the whole screen with a solid color. Disabling and reenabling the CRTC did not recover from the situation. This was often reproducable by just restarting the X-Server. The disabling sequence is not explicitly documented. But it turns out that disabling the planes before disabling the CRTC seems to prevent the above situation from happening. Use the callback ->atomic_disable instead of ->disable which allows to use the drm_atomic_helper_disable_planes_on_crtc() helper to disable planes before disabling the controller. Signed-off-by: Stefan Agner --- drivers/gpu/drm/fsl-dcu/fsl_dcu_drm_crtc.c | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/fsl-dcu/fsl_dcu_drm_crtc.c b/drivers/gpu/drm/fsl-dcu/fsl_dcu_drm_crtc.c index 2ea9dbd9be30..deb57435cc89 100644 --- a/drivers/gpu/drm/fsl-dcu/fsl_dcu_drm_crtc.c +++ b/drivers/gpu/drm/fsl-dcu/fsl_dcu_drm_crtc.c @@ -44,11 +44,15 @@ static void fsl_dcu_drm_crtc_atomic_flush(struct drm_crtc *crtc, } } -static void fsl_dcu_drm_disable_crtc(struct drm_crtc *crtc) +static void fsl_dcu_drm_crtc_atomic_disable(struct drm_crtc *crtc, + struct drm_crtc_state *old_crtc_state) { struct drm_device *dev = crtc->dev; struct fsl_dcu_drm_device *fsl_dev = dev->dev_private; + /* always disable planes on the CRTC */ + drm_atomic_helper_disable_planes_on_crtc(old_crtc_state, true); + drm_crtc_vblank_off(crtc); regmap_update_bits(fsl_dev->regmap, DCU_DCU_MODE, @@ -127,8 +131,8 @@ static void fsl_dcu_drm_crtc_mode_set_nofb(struct drm_crtc *crtc) } static const struct drm_crtc_helper_funcs fsl_dcu_drm_crtc_helper_funcs = { + .atomic_disable = fsl_dcu_drm_crtc_atomic_disable, .atomic_flush = fsl_dcu_drm_crtc_atomic_flush, - .disable = fsl_dcu_drm_disable_crtc, .enable = fsl_dcu_drm_crtc_enable, .mode_set_nofb = fsl_dcu_drm_crtc_mode_set_nofb, }; -- cgit v1.2.3-59-g8ed1b From 29f0c9edbdd98a977a4c629f411260f6e0356c67 Mon Sep 17 00:00:00 2001 From: Gregory CLEMENT Date: Tue, 8 Nov 2016 17:28:02 +0100 Subject: arm64: dts: marvell: Fix typo in label name on Armada 37xx The label names of the peripheral clocks have a typo. Fix it before it is more widely used. Reported-by: Thomas Petazzoni Signed-off-by: Gregory CLEMENT --- arch/arm64/boot/dts/marvell/armada-37xx.dtsi | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/arch/arm64/boot/dts/marvell/armada-37xx.dtsi b/arch/arm64/boot/dts/marvell/armada-37xx.dtsi index c4762538ec01..e9bd58793464 100644 --- a/arch/arm64/boot/dts/marvell/armada-37xx.dtsi +++ b/arch/arm64/boot/dts/marvell/armada-37xx.dtsi @@ -105,7 +105,7 @@ status = "disabled"; }; - nb_perih_clk: nb-periph-clk@13000{ + nb_periph_clk: nb-periph-clk@13000 { compatible = "marvell,armada-3700-periph-clock-nb"; reg = <0x13000 0x100>; clocks = <&tbg 0>, <&tbg 1>, <&tbg 2>, @@ -113,7 +113,7 @@ #clock-cells = <1>; }; - sb_perih_clk: sb-periph-clk@18000{ + sb_periph_clk: sb-periph-clk@18000 { compatible = "marvell,armada-3700-periph-clock-sb"; reg = <0x18000 0x100>; clocks = <&tbg 0>, <&tbg 1>, <&tbg 2>, -- cgit v1.2.3-59-g8ed1b From 2ec27be33898effa47fcb2cd45abb552a97fac89 Mon Sep 17 00:00:00 2001 From: Marcin Wojtas Date: Tue, 6 Sep 2016 19:41:12 +0200 Subject: arm64: dts: marvell: fix clocksource for CP110 slave SPI0 I2C and SPI interfaces share common clock trees within the CP110 HW block. It occurred that SPI0 interface has wrong clock assignment in the device tree, which is fixed in this commit to a proper value. Fixes: c749b8d9de32 ("arm64: dts: marvell: add description for the ...") Signed-off-by: Marcin Wojtas Signed-off-by: Gregory CLEMENT --- arch/arm64/boot/dts/marvell/armada-cp110-slave.dtsi | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/arm64/boot/dts/marvell/armada-cp110-slave.dtsi b/arch/arm64/boot/dts/marvell/armada-cp110-slave.dtsi index 842fb333285c..565b3cb3d7ff 100644 --- a/arch/arm64/boot/dts/marvell/armada-cp110-slave.dtsi +++ b/arch/arm64/boot/dts/marvell/armada-cp110-slave.dtsi @@ -131,7 +131,7 @@ #address-cells = <0x1>; #size-cells = <0x0>; cell-index = <1>; - clocks = <&cps_syscon0 0 3>; + clocks = <&cps_syscon0 1 21>; status = "disabled"; }; -- cgit v1.2.3-59-g8ed1b From 8d897006fe9206d64cbe353310be26d7c911e69d Mon Sep 17 00:00:00 2001 From: Marcin Wojtas Date: Tue, 8 Nov 2016 17:31:32 +0100 Subject: arm64: dts: marvell: add unique identifiers for Armada A8k SPI controllers Enabling SPI controllers, which are attached to different busses inside an SoC, may result in overlapping enumeration and cause sysfs registration failure. Example log after enabling two controllers on Armada 8040 SoC with same identifiers: [ 3.740415] sysfs: cannot create duplicate filename '/class/spi_master/spi0' [ 3.747510] ------------[ cut here ]------------ [ 3.752145] WARNING: at fs/sysfs/dir.c:31 [...] [ 4.002299] orion_spi: probe of f4700600.spi failed with error -17 spi-orion driver offers dedicated DT property ('cell-index'), that allow setting unique identifiers. Recently added support for CP110-slave HW block introduced two new SPI controllers' nodes with same ID as ones from CP110-master. This commit fixes the issue by assigning different 'cell-index' values for CP110-slave SPI controllers. Fixes: 4eef78a0091b ("arm64: dts: marvell: add description for the slave CP110 in Armada 8K") Signed-off-by: Marcin Wojtas Acked-by: Thomas Petazzoni Signed-off-by: Gregory CLEMENT --- arch/arm64/boot/dts/marvell/armada-cp110-slave.dtsi | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/arch/arm64/boot/dts/marvell/armada-cp110-slave.dtsi b/arch/arm64/boot/dts/marvell/armada-cp110-slave.dtsi index 565b3cb3d7ff..6bf9e241179b 100644 --- a/arch/arm64/boot/dts/marvell/armada-cp110-slave.dtsi +++ b/arch/arm64/boot/dts/marvell/armada-cp110-slave.dtsi @@ -130,7 +130,7 @@ reg = <0x700600 0x50>; #address-cells = <0x1>; #size-cells = <0x0>; - cell-index = <1>; + cell-index = <3>; clocks = <&cps_syscon0 1 21>; status = "disabled"; }; @@ -140,7 +140,7 @@ reg = <0x700680 0x50>; #address-cells = <1>; #size-cells = <0>; - cell-index = <2>; + cell-index = <4>; clocks = <&cps_syscon0 1 21>; status = "disabled"; }; -- cgit v1.2.3-59-g8ed1b From 82031ea29e454b574bc6f49a33683a693ca5d907 Mon Sep 17 00:00:00 2001 From: Sebastian Andrzej Siewior Date: Fri, 4 Nov 2016 19:39:39 +0100 Subject: scripts/has-stack-protector: add -fno-PIE Adding -no-PIE to the fstack protector check. -no-PIE was introduced before -fstack-protector so there is no need for a runtime check. Without it the build stops: |Cannot use CONFIG_CC_STACKPROTECTOR_STRONG: -fstack-protector-strong available but compiler is broken due to -mcmodel=kernel + -fPIE if -fPIE is enabled by default. Tagging it stable so it is possible to compile recent stable kernels as well. Cc: stable@vger.kernel.org Signed-off-by: Sebastian Andrzej Siewior Signed-off-by: Michal Marek --- scripts/gcc-x86_64-has-stack-protector.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/scripts/gcc-x86_64-has-stack-protector.sh b/scripts/gcc-x86_64-has-stack-protector.sh index 973e8c141567..17867e723a51 100755 --- a/scripts/gcc-x86_64-has-stack-protector.sh +++ b/scripts/gcc-x86_64-has-stack-protector.sh @@ -1,6 +1,6 @@ #!/bin/sh -echo "int foo(void) { char X[200]; return 3; }" | $* -S -x c -c -O0 -mcmodel=kernel -fstack-protector - -o - 2> /dev/null | grep -q "%gs" +echo "int foo(void) { char X[200]; return 3; }" | $* -S -x c -c -O0 -mcmodel=kernel -fno-PIE -fstack-protector - -o - 2> /dev/null | grep -q "%gs" if [ "$?" -eq "0" ] ; then echo y else -- cgit v1.2.3-59-g8ed1b From 90944e40ba1838de4b2a9290cf273f9d76bd3bdd Mon Sep 17 00:00:00 2001 From: Sebastian Andrzej Siewior Date: Fri, 4 Nov 2016 19:39:40 +0100 Subject: x86/kexec: add -fno-PIE If the gcc is configured to do -fPIE by default then the build aborts later with: | Unsupported relocation type: unknown type rel type name (29) Tagging it stable so it is possible to compile recent stable kernels as well. Cc: stable@vger.kernel.org Signed-off-by: Sebastian Andrzej Siewior Signed-off-by: Michal Marek --- arch/x86/purgatory/Makefile | 1 + 1 file changed, 1 insertion(+) diff --git a/arch/x86/purgatory/Makefile b/arch/x86/purgatory/Makefile index ac58c1616408..555b9fa0ad43 100644 --- a/arch/x86/purgatory/Makefile +++ b/arch/x86/purgatory/Makefile @@ -16,6 +16,7 @@ KCOV_INSTRUMENT := n KBUILD_CFLAGS := -fno-strict-aliasing -Wall -Wstrict-prototypes -fno-zero-initialized-in-bss -fno-builtin -ffreestanding -c -MD -Os -mcmodel=large KBUILD_CFLAGS += -m$(BITS) +KBUILD_CFLAGS += $(call cc-option,-fno-PIE) $(obj)/purgatory.ro: $(PURGATORY_OBJS) FORCE $(call if_changed,ld) -- cgit v1.2.3-59-g8ed1b From cc6acc11cad1eb1ae39707a3a6e4a97fafbeeabd Mon Sep 17 00:00:00 2001 From: Nicholas Piggin Date: Wed, 9 Nov 2016 15:34:05 +1100 Subject: kbuild: be more careful about matching preprocessed asm ___EXPORT_SYMBOL MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The CRC code for asm exports grabs the preprocessed asm, finds the ___EXPORT_SYMBOL and turns those into EXPORT_SYMBOL in a C program that can be preprocessed and parsed to create the CRC signatures from the type. The existing regex matching and replacement is too strict, and doesn't deal well with whitespace among other things. The line " EXPORT_SYMBOL(sym)" in a .S file would not match due to initial whitespace, for example, which resulted in x86's ___preempt_schedule failing to get CRCs. Reported-by: Philip Müller Signed-off-by: Nicholas Piggin Signed-off-by: Michal Marek --- scripts/Makefile.build | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/scripts/Makefile.build b/scripts/Makefile.build index 3e223c264469..7675d11ee65e 100644 --- a/scripts/Makefile.build +++ b/scripts/Makefile.build @@ -332,8 +332,8 @@ cmd_gensymtypes_S = \ (echo "\#include " ; \ echo "\#include " ; \ $(CPP) $(a_flags) $< | \ - grep ^___EXPORT_SYMBOL | \ - sed 's/___EXPORT_SYMBOL \([a-zA-Z0-9_]*\),.*/EXPORT_SYMBOL(\1);/' ) | \ + grep "\<___EXPORT_SYMBOL\>" | \ + sed 's/.*___EXPORT_SYMBOL[[:space:]]*\([a-zA-Z0-9_]*\)[[:space:]]*,.*/EXPORT_SYMBOL(\1);/' ) | \ $(CPP) -D__GENKSYMS__ $(c_flags) -xc - | \ $(GENKSYMS) $(if $(1), -T $(2)) \ $(patsubst y,-s _,$(CONFIG_HAVE_UNDERSCORE_SYMBOL_PREFIX)) \ -- cgit v1.2.3-59-g8ed1b From 80513a2b9f0448eadd10ae81a42229b33ef451fb Mon Sep 17 00:00:00 2001 From: Adam Ford Date: Fri, 21 Oct 2016 08:34:59 -0500 Subject: ARM: omap3: Add missing memory node in SOM-LV The skeleton.dtsi file was removed in ARM64 for different reasons as explained in commit ("3ebee5a2e141 arm64: dts: kill skeleton.dtsi"). commit ("766a1fe78fc3 ARM: omap3: Add missing memory node") had fixes for Torpedo and Overo boards, but this SOM-LV was missed. This should help prevent the DTC warning: "Node /memory has a reg or ranges property, but no unit name" Signed-off-by: Adam Ford Signed-off-by: Tony Lindgren --- arch/arm/boot/dts/logicpd-som-lv.dtsi | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/arch/arm/boot/dts/logicpd-som-lv.dtsi b/arch/arm/boot/dts/logicpd-som-lv.dtsi index 0ff1c2de95bf..26cce4d18405 100644 --- a/arch/arm/boot/dts/logicpd-som-lv.dtsi +++ b/arch/arm/boot/dts/logicpd-som-lv.dtsi @@ -13,6 +13,11 @@ }; }; + memory@80000000 { + device_type = "memory"; + reg = <0x80000000 0>; + }; + wl12xx_vmmc: wl12xx_vmmc { compatible = "regulator-fixed"; regulator-name = "vwl1271"; -- cgit v1.2.3-59-g8ed1b From 2ecb704a1290edb5e3d53a75529192e7ed2a1a28 Mon Sep 17 00:00:00 2001 From: Hui Wang Date: Thu, 10 Nov 2016 13:20:05 +0800 Subject: ALSA: hda - add a new condition to check if it is thinkpad Latest Thinkpad laptops use the HKEY_HID LEN0268 instead of the LEN0068, as a result neither audio mute led nor mic mute led can work any more. After adding the new HKEY_HID into the is_thinkpad(), both of them works well as before. Cc: Signed-off-by: Hui Wang Signed-off-by: Takashi Iwai --- sound/pci/hda/thinkpad_helper.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/sound/pci/hda/thinkpad_helper.c b/sound/pci/hda/thinkpad_helper.c index 6a23302297c9..4d9d320a7971 100644 --- a/sound/pci/hda/thinkpad_helper.c +++ b/sound/pci/hda/thinkpad_helper.c @@ -13,7 +13,8 @@ static void (*old_vmaster_hook)(void *, int); static bool is_thinkpad(struct hda_codec *codec) { return (codec->core.subsystem_id >> 16 == 0x17aa) && - (acpi_dev_found("LEN0068") || acpi_dev_found("IBM0068")); + (acpi_dev_found("LEN0068") || acpi_dev_found("LEN0268") || + acpi_dev_found("IBM0068")); } static void update_tpacpi_mute_led(void *private_data, int enabled) -- cgit v1.2.3-59-g8ed1b From d052db11c153cfb469f13a4121966f30ecb57c66 Mon Sep 17 00:00:00 2001 From: Wolfram Sang Date: Sun, 6 Nov 2016 21:20:32 +0100 Subject: i2c: mux: demux-pinctrl: make drivers with no pinctrl work again Some drivers like i2c-gpio do not have dedicated pinctrl states. They broke when error checking for pinctrl was added. Detect them now, and in their case, simply skip over pinctrl configuration. Signed-off-by: Wolfram Sang Signed-off-by: Wolfram Sang --- drivers/i2c/muxes/i2c-demux-pinctrl.c | 22 ++++++++++++++++++++-- 1 file changed, 20 insertions(+), 2 deletions(-) diff --git a/drivers/i2c/muxes/i2c-demux-pinctrl.c b/drivers/i2c/muxes/i2c-demux-pinctrl.c index b3893f6282ba..3e6fe1760d82 100644 --- a/drivers/i2c/muxes/i2c-demux-pinctrl.c +++ b/drivers/i2c/muxes/i2c-demux-pinctrl.c @@ -69,10 +69,28 @@ static int i2c_demux_activate_master(struct i2c_demux_pinctrl_priv *priv, u32 ne goto err_with_revert; } - p = devm_pinctrl_get_select(adap->dev.parent, priv->bus_name); + /* + * Check if there are pinctrl states at all. Note: we cant' use + * devm_pinctrl_get_select() because we need to distinguish between + * the -ENODEV from devm_pinctrl_get() and pinctrl_lookup_state(). + */ + p = devm_pinctrl_get(adap->dev.parent); if (IS_ERR(p)) { ret = PTR_ERR(p); - goto err_with_put; + /* continue if just no pinctrl states (e.g. i2c-gpio), otherwise exit */ + if (ret != -ENODEV) + goto err_with_put; + } else { + /* there are states. check and use them */ + struct pinctrl_state *s = pinctrl_lookup_state(p, priv->bus_name); + + if (IS_ERR(s)) { + ret = PTR_ERR(s); + goto err_with_put; + } + ret = pinctrl_select_state(p, s); + if (ret < 0) + goto err_with_put; } priv->chan[new_chan].parent_adap = adap; -- cgit v1.2.3-59-g8ed1b From f10a59eb8c1087f0ce03cf0392cd483922187066 Mon Sep 17 00:00:00 2001 From: Peter Rosin Date: Thu, 10 Nov 2016 15:03:21 +0100 Subject: i2c: Documentation: i2c-topology: fix minor whitespace nit Signed-off-by: Peter Rosin Signed-off-by: Wolfram Sang --- Documentation/i2c/i2c-topology | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/Documentation/i2c/i2c-topology b/Documentation/i2c/i2c-topology index e0aefeece551..1a014fede0b7 100644 --- a/Documentation/i2c/i2c-topology +++ b/Documentation/i2c/i2c-topology @@ -326,7 +326,7 @@ Two parent-locked sibling muxes This is a good topology. - .--------. + .--------. .----------. .--| dev D1 | | parent- |--' '--------' .--| locked | .--------. @@ -350,7 +350,7 @@ Mux-locked and parent-locked sibling muxes This is a good topology. - .--------. + .--------. .----------. .--| dev D1 | | mux- |--' '--------' .--| locked | .--------. -- cgit v1.2.3-59-g8ed1b From 74a5ed5c4f692df2ff0a2313ea71e81243525519 Mon Sep 17 00:00:00 2001 From: Thomas Tai Date: Thu, 3 Nov 2016 09:19:01 -0700 Subject: sparc64: Fix find_node warning if numa node cannot be found When booting up LDOM, find_node() warns that a physical address doesn't match a NUMA node. WARNING: CPU: 0 PID: 0 at arch/sparc/mm/init_64.c:835 find_node+0xf4/0x120 find_node: A physical address doesn't match a NUMA node rule. Some physical memory will be owned by node 0.Modules linked in: CPU: 0 PID: 0 Comm: swapper Not tainted 4.9.0-rc3 #4 Call Trace: [0000000000468ba0] __warn+0xc0/0xe0 [0000000000468c74] warn_slowpath_fmt+0x34/0x60 [00000000004592f4] find_node+0xf4/0x120 [0000000000dd0774] add_node_ranges+0x38/0xe4 [0000000000dd0b1c] numa_parse_mdesc+0x268/0x2e4 [0000000000dd0e9c] bootmem_init+0xb8/0x160 [0000000000dd174c] paging_init+0x808/0x8fc [0000000000dcb0d0] setup_arch+0x2c8/0x2f0 [0000000000dc68a0] start_kernel+0x48/0x424 [0000000000dcb374] start_early_boot+0x27c/0x28c [0000000000a32c08] tlb_fixup_done+0x4c/0x64 [0000000000027f08] 0x27f08 It is because linux use an internal structure node_masks[] to keep the best memory latency node only. However, LDOM mdesc can contain single latency-group with multiple memory latency nodes. If the address doesn't match the best latency node within node_masks[], it should check for an alternative via mdesc. The warning message should only be printed if the address doesn't match any node_masks[] nor within mdesc. To minimize the impact of searching mdesc every time, the last matched mask and index is stored in a variable. Signed-off-by: Thomas Tai Reviewed-by: Chris Hyser Reviewed-by: Liam Merwick Signed-off-by: David S. Miller --- arch/sparc/mm/init_64.c | 65 ++++++++++++++++++++++++++++++++++++++++++++++--- 1 file changed, 61 insertions(+), 4 deletions(-) diff --git a/arch/sparc/mm/init_64.c b/arch/sparc/mm/init_64.c index 439784b7b7ac..068eb3dcbcb5 100644 --- a/arch/sparc/mm/init_64.c +++ b/arch/sparc/mm/init_64.c @@ -802,6 +802,8 @@ struct mdesc_mblock { }; static struct mdesc_mblock *mblocks; static int num_mblocks; +static int find_numa_node_for_addr(unsigned long pa, + struct node_mem_mask *pnode_mask); static unsigned long ra_to_pa(unsigned long addr) { @@ -821,6 +823,9 @@ static unsigned long ra_to_pa(unsigned long addr) static int find_node(unsigned long addr) { + static bool search_mdesc = true; + static struct node_mem_mask last_mem_mask = { ~0UL, ~0UL }; + static int last_index; int i; addr = ra_to_pa(addr); @@ -830,10 +835,27 @@ static int find_node(unsigned long addr) if ((addr & p->mask) == p->val) return i; } - /* The following condition has been observed on LDOM guests.*/ - WARN_ONCE(1, "find_node: A physical address doesn't match a NUMA node" - " rule. Some physical memory will be owned by node 0."); - return 0; + /* The following condition has been observed on LDOM guests because + * node_masks only contains the best latency mask and value. + * LDOM guest's mdesc can contain a single latency group to + * cover multiple address range. Print warning message only if the + * address cannot be found in node_masks nor mdesc. + */ + if ((search_mdesc) && + ((addr & last_mem_mask.mask) != last_mem_mask.val)) { + /* find the available node in the mdesc */ + last_index = find_numa_node_for_addr(addr, &last_mem_mask); + numadbg("find_node: latency group for address 0x%lx is %d\n", + addr, last_index); + if ((last_index < 0) || (last_index >= num_node_masks)) { + /* WARN_ONCE() and use default group 0 */ + WARN_ONCE(1, "find_node: A physical address doesn't match a NUMA node rule. Some physical memory will be owned by node 0."); + search_mdesc = false; + last_index = 0; + } + } + + return last_index; } static u64 memblock_nid_range(u64 start, u64 end, int *nid) @@ -1160,6 +1182,41 @@ int __node_distance(int from, int to) return numa_latency[from][to]; } +static int find_numa_node_for_addr(unsigned long pa, + struct node_mem_mask *pnode_mask) +{ + struct mdesc_handle *md = mdesc_grab(); + u64 node, arc; + int i = 0; + + node = mdesc_node_by_name(md, MDESC_NODE_NULL, "latency-groups"); + if (node == MDESC_NODE_NULL) + goto out; + + mdesc_for_each_node_by_name(md, node, "group") { + mdesc_for_each_arc(arc, md, node, MDESC_ARC_TYPE_FWD) { + u64 target = mdesc_arc_target(md, arc); + struct mdesc_mlgroup *m = find_mlgroup(target); + + if (!m) + continue; + if ((pa & m->mask) == m->match) { + if (pnode_mask) { + pnode_mask->mask = m->mask; + pnode_mask->val = m->match; + } + mdesc_release(md); + return i; + } + } + i++; + } + +out: + mdesc_release(md); + return -1; +} + static int __init find_best_numa_node_for_mlgroup(struct mdesc_mlgroup *grp) { int i; -- cgit v1.2.3-59-g8ed1b From 07b5ab3f71d318e52c18cc3b73c1d44c908aacfa Mon Sep 17 00:00:00 2001 From: Andreas Larsson Date: Wed, 9 Nov 2016 10:43:05 +0100 Subject: sparc32: Fix inverted invalid_frame_pointer checks on sigreturns Signed-off-by: Andreas Larsson Signed-off-by: David S. Miller --- arch/sparc/kernel/signal_32.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/arch/sparc/kernel/signal_32.c b/arch/sparc/kernel/signal_32.c index c3c12efe0bc0..9c0c8fd0b292 100644 --- a/arch/sparc/kernel/signal_32.c +++ b/arch/sparc/kernel/signal_32.c @@ -89,7 +89,7 @@ asmlinkage void do_sigreturn(struct pt_regs *regs) sf = (struct signal_frame __user *) regs->u_regs[UREG_FP]; /* 1. Make sure we are not getting garbage from the user */ - if (!invalid_frame_pointer(sf, sizeof(*sf))) + if (invalid_frame_pointer(sf, sizeof(*sf))) goto segv_and_exit; if (get_user(ufp, &sf->info.si_regs.u_regs[UREG_FP])) @@ -150,7 +150,7 @@ asmlinkage void do_rt_sigreturn(struct pt_regs *regs) synchronize_user_stack(); sf = (struct rt_signal_frame __user *) regs->u_regs[UREG_FP]; - if (!invalid_frame_pointer(sf, sizeof(*sf))) + if (invalid_frame_pointer(sf, sizeof(*sf))) goto segv; if (get_user(ufp, &sf->regs.u_regs[UREG_FP])) -- cgit v1.2.3-59-g8ed1b From 7bc61cc5df808008b77a3b72cf814960c675518b Mon Sep 17 00:00:00 2001 From: Eugeniy Paltsev Date: Wed, 19 Oct 2016 10:46:18 +0300 Subject: drm/arcpgu: Accommodate adv7511 switch to DRM bridge ARC PGU driver starts crashing on initialization after 'commit e12c2f645557 ("drm/i2c: adv7511: Convert to drm_bridge")' This happenes because in "arcpgu_drm_hdmi_init" function we get pointer of "drm_i2c_encoder_driver" structure, which doesn't exist after adv7511 hdmi encoder interface changed from slave encoder to drm bridge. So, when we call "encoder_init" function from this structure driver crashes. Bootlog: ------------------------------------->8-------------------------------- [drm] Initialized drm 1.1.0 20060810 arcpgu e0017000.pgu: arc_pgu ID: 0xabbabaab arcpgu e0017000.pgu: assigned reserved memory node frame_buffer@9e000000 Path: (null) CPU: 0 PID: 1 Comm: swapper Not tainted 4.8.0-00001-gb5642252fa01-dirty #8 task: 9a058000 task.stack: 9a032000 [ECR ]: 0x00220100 => Invalid Read @ 0x00000004 by insn @ 0x803934e8 [EFA ]: 0x00000004 [BLINK ]: drm_atomic_helper_connector_dpms+0xa6/0x230 [ERET ]: drm_atomic_helper_connector_dpms+0xa4/0x230 [STAT32]: 0x00000846 : K DE E2 E1 BTA: 0x8016d949 SP: 0x9a033e34 FP: 0x00000000 LPS: 0x8036f6fc LPE: 0x8036f700 LPC: 0x00000000 r00: 0x8063c118 r01: 0x805b98ac r02: 0x00000b11 r03: 0x00000000 r04: 0x9a010f54 r05: 0x00000000 r06: 0x00000001 r07: 0x00000000 r08: 0x00000028 r09: 0x00000001 r10: 0x00000007 r11: 0x00000054 r12: 0x720a3033 Stack Trace: drm_atomic_helper_connector_dpms+0xa4/0x230 arcpgu_drm_hdmi_init+0xbc/0x228 arcpgu_probe+0x168/0x244 platform_drv_probe+0x26/0x64 really_probe+0x1f0/0x32c __driver_attach+0xa8/0xd0 bus_for_each_dev+0x3c/0x74 bus_add_driver+0xc2/0x184 driver_register+0x50/0xec do_one_initcall+0x3a/0x120 kernel_init_freeable+0x108/0x1a0 ------------------------------------->8-------------------------------- Fix ARC PGU driver to be able work with drm bridge hdmi encoder interface. The hdmi connector code isn't needed anymore as we expect the adv7511 bridge driver to create/manage the connector. Signed-off-by: Eugeniy Paltsev Reviewed-by: Archit Taneja Signed-off-by: Alexey Brodkin --- drivers/gpu/drm/arc/arcpgu_hdmi.c | 159 ++++---------------------------------- 1 file changed, 17 insertions(+), 142 deletions(-) diff --git a/drivers/gpu/drm/arc/arcpgu_hdmi.c b/drivers/gpu/drm/arc/arcpgu_hdmi.c index b7a8b2ac4055..b69c66b4897e 100644 --- a/drivers/gpu/drm/arc/arcpgu_hdmi.c +++ b/drivers/gpu/drm/arc/arcpgu_hdmi.c @@ -14,170 +14,45 @@ * */ -#include +#include #include -#include #include "arcpgu.h" -struct arcpgu_drm_connector { - struct drm_connector connector; - struct drm_encoder_slave *encoder_slave; -}; - -static int arcpgu_drm_connector_get_modes(struct drm_connector *connector) -{ - const struct drm_encoder_slave_funcs *sfuncs; - struct drm_encoder_slave *slave; - struct arcpgu_drm_connector *con = - container_of(connector, struct arcpgu_drm_connector, connector); - - slave = con->encoder_slave; - if (slave == NULL) { - dev_err(connector->dev->dev, - "connector_get_modes: cannot find slave encoder for connector\n"); - return 0; - } - - sfuncs = slave->slave_funcs; - if (sfuncs->get_modes == NULL) - return 0; - - return sfuncs->get_modes(&slave->base, connector); -} - -static enum drm_connector_status -arcpgu_drm_connector_detect(struct drm_connector *connector, bool force) -{ - enum drm_connector_status status = connector_status_unknown; - const struct drm_encoder_slave_funcs *sfuncs; - struct drm_encoder_slave *slave; - - struct arcpgu_drm_connector *con = - container_of(connector, struct arcpgu_drm_connector, connector); - - slave = con->encoder_slave; - if (slave == NULL) { - dev_err(connector->dev->dev, - "connector_detect: cannot find slave encoder for connector\n"); - return status; - } - - sfuncs = slave->slave_funcs; - if (sfuncs && sfuncs->detect) - return sfuncs->detect(&slave->base, connector); - - dev_err(connector->dev->dev, "connector_detect: could not detect slave funcs\n"); - return status; -} - -static void arcpgu_drm_connector_destroy(struct drm_connector *connector) -{ - drm_connector_unregister(connector); - drm_connector_cleanup(connector); -} - -static const struct drm_connector_helper_funcs -arcpgu_drm_connector_helper_funcs = { - .get_modes = arcpgu_drm_connector_get_modes, -}; - -static const struct drm_connector_funcs arcpgu_drm_connector_funcs = { - .dpms = drm_helper_connector_dpms, - .reset = drm_atomic_helper_connector_reset, - .detect = arcpgu_drm_connector_detect, - .fill_modes = drm_helper_probe_single_connector_modes, - .destroy = arcpgu_drm_connector_destroy, - .atomic_duplicate_state = drm_atomic_helper_connector_duplicate_state, - .atomic_destroy_state = drm_atomic_helper_connector_destroy_state, -}; - -static struct drm_encoder_helper_funcs arcpgu_drm_encoder_helper_funcs = { - .dpms = drm_i2c_encoder_dpms, - .mode_fixup = drm_i2c_encoder_mode_fixup, - .mode_set = drm_i2c_encoder_mode_set, - .prepare = drm_i2c_encoder_prepare, - .commit = drm_i2c_encoder_commit, - .detect = drm_i2c_encoder_detect, -}; - static struct drm_encoder_funcs arcpgu_drm_encoder_funcs = { .destroy = drm_encoder_cleanup, }; int arcpgu_drm_hdmi_init(struct drm_device *drm, struct device_node *np) { - struct arcpgu_drm_connector *arcpgu_connector; - struct drm_i2c_encoder_driver *driver; - struct drm_encoder_slave *encoder; - struct drm_connector *connector; - struct i2c_client *i2c_slave; - int ret; + struct drm_encoder *encoder; + struct drm_bridge *bridge; + + int ret = 0; encoder = devm_kzalloc(drm->dev, sizeof(*encoder), GFP_KERNEL); if (encoder == NULL) return -ENOMEM; - i2c_slave = of_find_i2c_device_by_node(np); - if (!i2c_slave || !i2c_get_clientdata(i2c_slave)) { - dev_err(drm->dev, "failed to find i2c slave encoder\n"); - return -EPROBE_DEFER; - } - - if (i2c_slave->dev.driver == NULL) { - dev_err(drm->dev, "failed to find i2c slave driver\n"); + /* Locate drm bridge from the hdmi encoder DT node */ + bridge = of_drm_find_bridge(np); + if (!bridge) return -EPROBE_DEFER; - } - driver = - to_drm_i2c_encoder_driver(to_i2c_driver(i2c_slave->dev.driver)); - ret = driver->encoder_init(i2c_slave, drm, encoder); - if (ret) { - dev_err(drm->dev, "failed to initialize i2c encoder slave\n"); - return ret; - } - - encoder->base.possible_crtcs = 1; - encoder->base.possible_clones = 0; - ret = drm_encoder_init(drm, &encoder->base, &arcpgu_drm_encoder_funcs, + encoder->possible_crtcs = 1; + encoder->possible_clones = 0; + ret = drm_encoder_init(drm, encoder, &arcpgu_drm_encoder_funcs, DRM_MODE_ENCODER_TMDS, NULL); if (ret) return ret; - drm_encoder_helper_add(&encoder->base, - &arcpgu_drm_encoder_helper_funcs); - - arcpgu_connector = devm_kzalloc(drm->dev, sizeof(*arcpgu_connector), - GFP_KERNEL); - if (!arcpgu_connector) { - ret = -ENOMEM; - goto error_encoder_cleanup; - } - - connector = &arcpgu_connector->connector; - drm_connector_helper_add(connector, &arcpgu_drm_connector_helper_funcs); - ret = drm_connector_init(drm, connector, &arcpgu_drm_connector_funcs, - DRM_MODE_CONNECTOR_HDMIA); - if (ret < 0) { - dev_err(drm->dev, "failed to initialize drm connector\n"); - goto error_encoder_cleanup; - } + /* Link drm_bridge to encoder */ + bridge->encoder = encoder; + encoder->bridge = bridge; - ret = drm_mode_connector_attach_encoder(connector, &encoder->base); - if (ret < 0) { - dev_err(drm->dev, "could not attach connector to encoder\n"); - drm_connector_unregister(connector); - goto error_connector_cleanup; - } - - arcpgu_connector->encoder_slave = encoder; - - return 0; - -error_connector_cleanup: - drm_connector_cleanup(connector); + ret = drm_bridge_attach(drm, bridge); + if (ret) + drm_encoder_cleanup(encoder); -error_encoder_cleanup: - drm_encoder_cleanup(&encoder->base); return ret; } -- cgit v1.2.3-59-g8ed1b From 48004881f6935704e5e4ffaf9e0ec921a25db243 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Mon, 7 Nov 2016 16:52:04 +0000 Subject: drm/i915: Mark CPU cache as dirty when used for rendering MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit On LLC, or even snooped, machines rendering via the GPU ends up in the CPU cache. This cacheline dirt also needs to be flushed to main memory when moving to an incoherent domain, such as the display's scanout engine. Mostly, this happens because either the object is marked as dirty from its first use or is avoided by setting the object into the display domain from the start. v2: Treat WT as not requiring a clflush prior to use on the display engine as well. Fixes: 0f71979ab7fb ("drm/i915: Performed deferred clflush inside set-cache-level") References: https://bugs.freedesktop.org/show_bug.cgi?id=95414 Signed-off-by: Chris Wilson Cc: Jani Nikula Cc: Ville Syrjälä Cc: # v4.0+ Reviewed-by: Ville Syrjälä Link: http://patchwork.freedesktop.org/patch/msgid/20161107165204.7008-1-chris@chris-wilson.co.uk (cherry picked from commit 7aa6ca61ee5546d74b76610894924cdb0d4a1af0) Signed-off-by: Jani Nikula --- drivers/gpu/drm/i915/i915_gem_execbuffer.c | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/drivers/gpu/drm/i915/i915_gem_execbuffer.c b/drivers/gpu/drm/i915/i915_gem_execbuffer.c index 7adb4c77cc7f..a218c2e395e7 100644 --- a/drivers/gpu/drm/i915/i915_gem_execbuffer.c +++ b/drivers/gpu/drm/i915/i915_gem_execbuffer.c @@ -1281,6 +1281,12 @@ i915_gem_validate_context(struct drm_device *dev, struct drm_file *file, return ctx; } +static bool gpu_write_needs_clflush(struct drm_i915_gem_object *obj) +{ + return !(obj->cache_level == I915_CACHE_NONE || + obj->cache_level == I915_CACHE_WT); +} + void i915_vma_move_to_active(struct i915_vma *vma, struct drm_i915_gem_request *req, unsigned int flags) @@ -1311,6 +1317,8 @@ void i915_vma_move_to_active(struct i915_vma *vma, /* update for the implicit flush after a batch */ obj->base.write_domain &= ~I915_GEM_GPU_DOMAINS; + if (!obj->cache_dirty && gpu_write_needs_clflush(obj)) + obj->cache_dirty = true; } if (flags & EXEC_OBJECT_NEEDS_FENCE) -- cgit v1.2.3-59-g8ed1b From 9f1a7ab260300c670608a9db861187069f8b179a Mon Sep 17 00:00:00 2001 From: Ville Syrjälä Date: Mon, 7 Nov 2016 22:20:55 +0200 Subject: drm/i915: Grab the rotation from the passed plane state for VLV sprites MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Use the passed in plane_state instead of plane->state in vlv_update_plane(). Currently the two are one and the same, but if we start queuing up multiple plane updates they might not be. Looks like this was rebase fail on my part. Cc: Daniel Vetter Fixes: 8d0deca8c6e0 ("drm/i915: Pass 90/270 vs. 0/180 rotation info for intel_gen4_compute_page_offset()") Signed-off-by: Ville Syrjälä Link: http://patchwork.freedesktop.org/patch/msgid/1478550057-24864-4-git-send-email-ville.syrjala@linux.intel.com Reviewed-by: Chris Wilson (cherry picked from commit 11df4d95b3ad9e6a6a6e0907bb200610a4d24887) Signed-off-by: Jani Nikula --- drivers/gpu/drm/i915/intel_sprite.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/i915/intel_sprite.c b/drivers/gpu/drm/i915/intel_sprite.c index 73a521fdf1bd..dbed12c484c9 100644 --- a/drivers/gpu/drm/i915/intel_sprite.c +++ b/drivers/gpu/drm/i915/intel_sprite.c @@ -358,7 +358,7 @@ vlv_update_plane(struct drm_plane *dplane, int plane = intel_plane->plane; u32 sprctl; u32 sprsurf_offset, linear_offset; - unsigned int rotation = dplane->state->rotation; + unsigned int rotation = plane_state->base.rotation; const struct drm_intel_sprite_colorkey *key = &plane_state->ckey; int crtc_x = plane_state->base.dst.x1; int crtc_y = plane_state->base.dst.y1; -- cgit v1.2.3-59-g8ed1b From fc22b787890f9f9067fd130feec42297a4ee62ba Mon Sep 17 00:00:00 2001 From: Ville Syrjälä Date: Fri, 21 Oct 2016 16:44:38 +0300 Subject: drm/i915: Refresh that status of MST capable connectors in ->detect() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Once we've determined that the sink is MST capable we never end up running through the full detect cycle again, despite getting HPDs. Fix tht by ripping out the incorrect piece of code responsible. This got broken when I moved the long HPD handling to the ->detect() hook, but failed to remove the leftover code. Cc: Ander Conselvan de Oliveira Cc: drm-intel-fixes@lists.freedesktop.org Cc: Rui Tiago Matos Tested-by: Rui Tiago Matos Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=98323 Cc: Kirill A. Shutemov Tested-by: Kirill A. Shutemov References: https://bugs.freedesktop.org/show_bug.cgi?id=98306 Fixes: 1015811609c0 ("drm/i915: Move long hpd handling into the hotplug work") Cc: stable@vger.kernel.org Signed-off-by: Ville Syrjälä Link: http://patchwork.freedesktop.org/patch/msgid/1477057478-29328-1-git-send-email-ville.syrjala@linux.intel.com Reviewed-by: Chris Wilson (cherry picked from commit 1aab956c7b8872fb6976328316bfad62c6e67cf8) Signed-off-by: Jani Nikula --- drivers/gpu/drm/i915/intel_dp.c | 10 ---------- 1 file changed, 10 deletions(-) diff --git a/drivers/gpu/drm/i915/intel_dp.c b/drivers/gpu/drm/i915/intel_dp.c index 3581b5a7f716..bf344d08356a 100644 --- a/drivers/gpu/drm/i915/intel_dp.c +++ b/drivers/gpu/drm/i915/intel_dp.c @@ -4463,21 +4463,11 @@ static enum drm_connector_status intel_dp_detect(struct drm_connector *connector, bool force) { struct intel_dp *intel_dp = intel_attached_dp(connector); - struct intel_digital_port *intel_dig_port = dp_to_dig_port(intel_dp); - struct intel_encoder *intel_encoder = &intel_dig_port->base; enum drm_connector_status status = connector->status; DRM_DEBUG_KMS("[CONNECTOR:%d:%s]\n", connector->base.id, connector->name); - if (intel_dp->is_mst) { - /* MST devices are disconnected from a monitor POV */ - intel_dp_unset_edid(intel_dp); - if (intel_encoder->type != INTEL_OUTPUT_EDP) - intel_encoder->type = INTEL_OUTPUT_DP; - return connector_status_disconnected; - } - /* If full detect is not performed yet, do a full detect */ if (!intel_dp->detect_done) status = intel_dp_long_pulse(intel_dp->attached_connector); -- cgit v1.2.3-59-g8ed1b From 9a2541910dc7eaaa6859eea8a0ffda673059a623 Mon Sep 17 00:00:00 2001 From: Takashi Iwai Date: Fri, 11 Nov 2016 12:33:20 +0100 Subject: ALSA: hda - Fix mic regression by ASRock mobo fixup The commit [1a3f099101b8: ALSA: hda - Fix surround output pins for ASRock B150M mobo] introduced a fixup of pin configs for ASRock mobos to fix the surround outputs. However, this overrides the pin configs of the mic pins as if they are outputs-only, effectively disabling the mic inputs. Of course, it's a regression wrt mic functionality. Actually the pins 0x18 and 0x1a don't need to be changed; we just need to disable the bogus pins 0x14 and 0x15. Then the auto-parser will pick up mic pins as switchable and assign the surround outputs there. This patch removes the incorrect pin overrides of NID 0x18 and 0x1a from the ASRock fixup. Fixes: 1a3f099101b8 ('ALSA: hda - Fix surround output pins for ASRock...') Reported-and-tested-by: Vitor Antunes Bugzilla: https://bugzilla.kernel.org/show_bug.cgi?id=187431 Cc: Signed-off-by: Takashi Iwai --- sound/pci/hda/patch_realtek.c | 2 -- 1 file changed, 2 deletions(-) diff --git a/sound/pci/hda/patch_realtek.c b/sound/pci/hda/patch_realtek.c index 2f909dd8b7b8..ea81c08ddc7a 100644 --- a/sound/pci/hda/patch_realtek.c +++ b/sound/pci/hda/patch_realtek.c @@ -6907,8 +6907,6 @@ static const struct hda_fixup alc662_fixups[] = { .v.pins = (const struct hda_pintbl[]) { { 0x15, 0x40f000f0 }, /* disabled */ { 0x16, 0x40f000f0 }, /* disabled */ - { 0x18, 0x01014011 }, /* LO */ - { 0x1a, 0x01014012 }, /* LO */ { } } }, -- cgit v1.2.3-59-g8ed1b From 8e94a46c1770884166b31adc99eba7da65a446a7 Mon Sep 17 00:00:00 2001 From: Mario Kleiner Date: Wed, 9 Nov 2016 02:25:15 +0100 Subject: drm/amdgpu: Attach exclusive fence to prime exported bo's. (v5) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit External clients which import our bo's wait only for exclusive dmabuf-fences, not on shared ones, ditto for bo's which we import from external providers and write to. Therefore attach exclusive fences on prime shared buffers if our exported buffer gets imported by an external client, or if we import a buffer from an external exporter. See discussion in thread: https://lists.freedesktop.org/archives/dri-devel/2016-October/122370.html Prime export tested on Intel iGPU + AMD Tonga dGPU as DRI3/Present Prime render offload, and with the Tonga standalone as primary gpu. v2: Add a wait for all shared fences before prime export, as suggested by Christian Koenig. v3: - Mark buffer prime_exported in amdgpu_gem_prime_pin, so we only use the exclusive fence when exporting a bo to external clients like a separate iGPU, but not when exporting/importing from/to ourselves as part of regular DRI3 fd passing. - Propagate failure of reservation_object_wait_rcu back to caller. v4: - Switch to a prime_shared_count counter instead of a flag, which gets in/decremented on prime_pin/unpin, so we can switch back to shared fences if all clients detach from our exported bo. - Also switch to exclusive fence for prime imported bo's. v5: - Drop lret, instead use int ret -> long ret, as proposed by Christian. Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=95472 Tested-by: Mike Lothian (v1) Signed-off-by: Mario Kleiner Reviewed-by: Christian König . Cc: Christian König Cc: Michel Dänzer Signed-off-by: Alex Deucher Cc: stable@vger.kernel.org --- drivers/gpu/drm/amd/amdgpu/amdgpu.h | 1 + drivers/gpu/drm/amd/amdgpu/amdgpu_bo_list.c | 2 +- drivers/gpu/drm/amd/amdgpu/amdgpu_prime.c | 20 +++++++++++++++++++- 3 files changed, 21 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h b/drivers/gpu/drm/amd/amdgpu/amdgpu.h index 039b57e4644c..496f72b134eb 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h @@ -459,6 +459,7 @@ struct amdgpu_bo { u64 metadata_flags; void *metadata; u32 metadata_size; + unsigned prime_shared_count; /* list of all virtual address to which this bo * is associated to */ diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_bo_list.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_bo_list.c index 651115dcce12..c02db01f6583 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_bo_list.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_bo_list.c @@ -132,7 +132,7 @@ static int amdgpu_bo_list_set(struct amdgpu_device *adev, entry->priority = min(info[i].bo_priority, AMDGPU_BO_LIST_MAX_PRIORITY); entry->tv.bo = &entry->robj->tbo; - entry->tv.shared = true; + entry->tv.shared = !entry->robj->prime_shared_count; if (entry->robj->prefered_domains == AMDGPU_GEM_DOMAIN_GDS) gds_obj = entry->robj; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_prime.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_prime.c index 7700dc22f243..3826d5aea0a6 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_prime.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_prime.c @@ -74,20 +74,36 @@ amdgpu_gem_prime_import_sg_table(struct drm_device *dev, if (ret) return ERR_PTR(ret); + bo->prime_shared_count = 1; return &bo->gem_base; } int amdgpu_gem_prime_pin(struct drm_gem_object *obj) { struct amdgpu_bo *bo = gem_to_amdgpu_bo(obj); - int ret = 0; + long ret = 0; ret = amdgpu_bo_reserve(bo, false); if (unlikely(ret != 0)) return ret; + /* + * Wait for all shared fences to complete before we switch to future + * use of exclusive fence on this prime shared bo. + */ + ret = reservation_object_wait_timeout_rcu(bo->tbo.resv, true, false, + MAX_SCHEDULE_TIMEOUT); + if (unlikely(ret < 0)) { + DRM_DEBUG_PRIME("Fence wait failed: %li\n", ret); + amdgpu_bo_unreserve(bo); + return ret; + } + /* pin buffer into GTT */ ret = amdgpu_bo_pin(bo, AMDGPU_GEM_DOMAIN_GTT, NULL); + if (likely(ret == 0)) + bo->prime_shared_count++; + amdgpu_bo_unreserve(bo); return ret; } @@ -102,6 +118,8 @@ void amdgpu_gem_prime_unpin(struct drm_gem_object *obj) return; amdgpu_bo_unpin(bo); + if (bo->prime_shared_count) + bo->prime_shared_count--; amdgpu_bo_unreserve(bo); } -- cgit v1.2.3-59-g8ed1b From f23ed166f283b1a6f0a1f0b0c889e8df9a10ff85 Mon Sep 17 00:00:00 2001 From: Nicholas Piggin Date: Wed, 2 Nov 2016 17:57:01 +1100 Subject: powerpc/64s: Fix system reset interrupt winkle wakeups Wakeups from winkle set the low bit of the HSPRG0 register, to distinguish it from other sleep states. This is also the PACA pointer. The system reset exception handler fails to mask this bit away before using this value before using it as the PACA pointer. Fix this by adding a new type of exception prolog macro where we already have the PACA set in r13, and have the system reset vector mask it out. The winkle wakeup handler will store the masked value back into HSPRG0. Fixes: fb479e44a9e2 ("powerpc/64s: relocation, register save fixes for system reset interrupt") Cc: stable@vger.kernel.org # v3.0+ Signed-off-by: Nicholas Piggin Reviewed-by: Mahesh Salgaonkar Signed-off-by: Michael Ellerman --- arch/powerpc/include/asm/exception-64s.h | 13 +++++++++++-- arch/powerpc/kernel/exceptions-64s.S | 11 ++++++++--- 2 files changed, 19 insertions(+), 5 deletions(-) diff --git a/arch/powerpc/include/asm/exception-64s.h b/arch/powerpc/include/asm/exception-64s.h index 84d49b197c32..3ce43664eadf 100644 --- a/arch/powerpc/include/asm/exception-64s.h +++ b/arch/powerpc/include/asm/exception-64s.h @@ -158,14 +158,17 @@ BEGIN_FTR_SECTION_NESTED(943) \ std ra,offset(r13); \ END_FTR_SECTION_NESTED(ftr,ftr,943) -#define EXCEPTION_PROLOG_0(area) \ - GET_PACA(r13); \ +#define EXCEPTION_PROLOG_0_PACA(area) \ std r9,area+EX_R9(r13); /* save r9 */ \ OPT_GET_SPR(r9, SPRN_PPR, CPU_FTR_HAS_PPR); \ HMT_MEDIUM; \ std r10,area+EX_R10(r13); /* save r10 - r12 */ \ OPT_GET_SPR(r10, SPRN_CFAR, CPU_FTR_CFAR) +#define EXCEPTION_PROLOG_0(area) \ + GET_PACA(r13); \ + EXCEPTION_PROLOG_0_PACA(area) + #define __EXCEPTION_PROLOG_1(area, extra, vec) \ OPT_SAVE_REG_TO_PACA(area+EX_PPR, r9, CPU_FTR_HAS_PPR); \ OPT_SAVE_REG_TO_PACA(area+EX_CFAR, r10, CPU_FTR_CFAR); \ @@ -196,6 +199,12 @@ END_FTR_SECTION_NESTED(ftr,ftr,943) EXCEPTION_PROLOG_1(area, extra, vec); \ EXCEPTION_PROLOG_PSERIES_1(label, h); +/* Have the PACA in r13 already */ +#define EXCEPTION_PROLOG_PSERIES_PACA(area, label, h, extra, vec) \ + EXCEPTION_PROLOG_0_PACA(area); \ + EXCEPTION_PROLOG_1(area, extra, vec); \ + EXCEPTION_PROLOG_PSERIES_1(label, h); + #define __KVMTEST(h, n) \ lbz r10,HSTATE_IN_GUEST(r13); \ cmpwi r10,0; \ diff --git a/arch/powerpc/kernel/exceptions-64s.S b/arch/powerpc/kernel/exceptions-64s.S index 08ba447a4b3d..1ba82ea90230 100644 --- a/arch/powerpc/kernel/exceptions-64s.S +++ b/arch/powerpc/kernel/exceptions-64s.S @@ -116,7 +116,9 @@ EXC_VIRT_NONE(0x4000, 0x4100) EXC_REAL_BEGIN(system_reset, 0x100, 0x200) SET_SCRATCH0(r13) - EXCEPTION_PROLOG_PSERIES(PACA_EXGEN, system_reset_common, EXC_STD, + GET_PACA(r13) + clrrdi r13,r13,1 /* Last bit of HSPRG0 is set if waking from winkle */ + EXCEPTION_PROLOG_PSERIES_PACA(PACA_EXGEN, system_reset_common, EXC_STD, IDLETEST, 0x100) EXC_REAL_END(system_reset, 0x100, 0x200) @@ -124,6 +126,9 @@ EXC_VIRT_NONE(0x4100, 0x4200) #ifdef CONFIG_PPC_P7_NAP EXC_COMMON_BEGIN(system_reset_idle_common) +BEGIN_FTR_SECTION + GET_PACA(r13) /* Restore HSPRG0 to get the winkle bit in r13 */ +END_FTR_SECTION_IFCLR(CPU_FTR_ARCH_300) bl pnv_restore_hyp_resource li r0,PNV_THREAD_RUNNING @@ -169,7 +174,7 @@ EXC_REAL_BEGIN(machine_check, 0x200, 0x300) SET_SCRATCH0(r13) /* save r13 */ /* * Running native on arch 2.06 or later, we may wakeup from winkle - * inside machine check. If yes, then last bit of HSPGR0 would be set + * inside machine check. If yes, then last bit of HSPRG0 would be set * to 1. Hence clear it unconditionally. */ GET_PACA(r13) @@ -388,7 +393,7 @@ EXC_COMMON_BEGIN(machine_check_handle_early) /* * Go back to winkle. Please note that this thread was woken up in * machine check from winkle and have not restored the per-subcore - * state. Hence before going back to winkle, set last bit of HSPGR0 + * state. Hence before going back to winkle, set last bit of HSPRG0 * to 1. This will make sure that if this thread gets woken up * again at reset vector 0x100 then it will get chance to restore * the subcore state. -- cgit v1.2.3-59-g8ed1b From e6740ae631db02e4f3a6742e2a38ea63718d8d17 Mon Sep 17 00:00:00 2001 From: Hugh Dickins Date: Mon, 7 Nov 2016 22:28:21 -0800 Subject: powerpc: Fix exception vector build with 2.23 era binutils The changes to use gas sections for constructing the exception vectors causes a build break when using binutils 2.23: arch/powerpc/kernel/exceptions-64s.S:770: Error: operand out of range (0xffffffffffff8100 is not between 0x0000000000000000 and 0x000000000000ffff) And so on. Reported by Hugh with binutils-2.23.2-8.1.4.ppc64 from openSUSE 13.1 and also Naveen & Denis using 2.23.52.0.1-26.el7 from RHEL 7. Strangely binutils 2.22 (what I test with) is not affected. This is caused by the use of @l in LOAD_HANDLER(). The @l was only recently added in commit a24553dd02dc ("powerpc/pseries: Remove unnecessary syscall trampoline"). Luckily the gas section changes split out the LOAD_SYSCALL_HANDLER() macro, which means we actually *don't* need to use @l in LOAD_HANDLER() any more, only in LOAD_SYSCALL_HANDLER(). So drop the @l from LOAD_HANDLER(). Fixes: 57f266497d81 ("powerpc: Use gas sections for arranging exception vectors") Signed-off-by: Hugh Dickins [mpe: Add gory details to change log] Signed-off-by: Michael Ellerman --- arch/powerpc/include/asm/exception-64s.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/powerpc/include/asm/exception-64s.h b/arch/powerpc/include/asm/exception-64s.h index 3ce43664eadf..9a3eee661297 100644 --- a/arch/powerpc/include/asm/exception-64s.h +++ b/arch/powerpc/include/asm/exception-64s.h @@ -91,7 +91,7 @@ */ #define LOAD_HANDLER(reg, label) \ ld reg,PACAKBASE(r13); /* get high part of &label */ \ - ori reg,reg,(FIXED_SYMBOL_ABS_ADDR(label))@l; + ori reg,reg,FIXED_SYMBOL_ABS_ADDR(label); #define __LOAD_HANDLER(reg, label) \ ld reg,PACAKBASE(r13); \ -- cgit v1.2.3-59-g8ed1b From 9a1f490f358e44a1cf463ba8124ca39fcc042992 Mon Sep 17 00:00:00 2001 From: Michael Ellerman Date: Wed, 2 Nov 2016 22:20:46 +1100 Subject: powerpc/oops: Fix missing pr_cont()s in show_stack() Previously we got away with printing the stack trace in multiple pieces and it usually looked right. But since commit 4bcc595ccd80 ("printk: reinstate KERN_CONT for printing continuation lines"), KERN_CONT is now required when printing continuation lines. Use pr_cont() as appropriate. Signed-off-by: Michael Ellerman --- arch/powerpc/kernel/process.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/arch/powerpc/kernel/process.c b/arch/powerpc/kernel/process.c index ce6dc61b15b2..621d9b23df72 100644 --- a/arch/powerpc/kernel/process.c +++ b/arch/powerpc/kernel/process.c @@ -1900,14 +1900,14 @@ void show_stack(struct task_struct *tsk, unsigned long *stack) printk("["REG"] ["REG"] %pS", sp, ip, (void *)ip); #ifdef CONFIG_FUNCTION_GRAPH_TRACER if ((ip == rth) && curr_frame >= 0) { - printk(" (%pS)", + pr_cont(" (%pS)", (void *)current->ret_stack[curr_frame].ret); curr_frame--; } #endif if (firstframe) - printk(" (unreliable)"); - printk("\n"); + pr_cont(" (unreliable)"); + pr_cont("\n"); } firstframe = 0; -- cgit v1.2.3-59-g8ed1b From db5ba5ae6e8d5374429212de8e20933a8a0ce52e Mon Sep 17 00:00:00 2001 From: Michael Ellerman Date: Wed, 2 Nov 2016 22:20:47 +1100 Subject: powerpc/oops: Fix missing pr_cont()s in print_msr_bits() et. al. Since the KERN_CONT changes these are being horribly split across lines, for example: MSR: 8000000000009033 < SF,EE ,ME,IR ,DR,RI ,LE> So fix it by using pr_cont() where appropriate. Signed-off-by: Michael Ellerman --- arch/powerpc/kernel/process.c | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/arch/powerpc/kernel/process.c b/arch/powerpc/kernel/process.c index 621d9b23df72..38f85d7a1e06 100644 --- a/arch/powerpc/kernel/process.c +++ b/arch/powerpc/kernel/process.c @@ -1282,7 +1282,7 @@ static void print_bits(unsigned long val, struct regbit *bits, const char *sep) for (; bits->bit; ++bits) if (val & bits->bit) { - printk("%s%s", s, bits->name); + pr_cont("%s%s", s, bits->name); s = sep; } } @@ -1305,9 +1305,9 @@ static void print_tm_bits(unsigned long val) * T: Transactional (bit 34) */ if (val & (MSR_TM | MSR_TS_S | MSR_TS_T)) { - printk(",TM["); + pr_cont(",TM["); print_bits(val, msr_tm_bits, ""); - printk("]"); + pr_cont("]"); } } #else @@ -1316,10 +1316,10 @@ static void print_tm_bits(unsigned long val) {} static void print_msr_bits(unsigned long val) { - printk("<"); + pr_cont("<"); print_bits(val, msr_bits, ","); print_tm_bits(val); - printk(">"); + pr_cont(">"); } #ifdef CONFIG_PPC64 -- cgit v1.2.3-59-g8ed1b From 7dae865f5878fc0c2edfb3b9165712ef33ce03df Mon Sep 17 00:00:00 2001 From: Michael Ellerman Date: Thu, 3 Nov 2016 20:45:26 +1100 Subject: powerpc/oops: Fix missing pr_cont()s in show_regs() Fix up our oops output by converting continuation lines to use pr_cont(). Some of these are dubious, eg. printing a continuation line which starts with a newline, but seem to work OK for now. This whole function needs a rewrite in the next release. Signed-off-by: Michael Ellerman --- arch/powerpc/kernel/process.c | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/arch/powerpc/kernel/process.c b/arch/powerpc/kernel/process.c index 38f85d7a1e06..6fe8fa481f8a 100644 --- a/arch/powerpc/kernel/process.c +++ b/arch/powerpc/kernel/process.c @@ -1347,29 +1347,29 @@ void show_regs(struct pt_regs * regs) printk(" CR: %08lx XER: %08lx\n", regs->ccr, regs->xer); trap = TRAP(regs); if ((regs->trap != 0xc00) && cpu_has_feature(CPU_FTR_CFAR)) - printk("CFAR: "REG" ", regs->orig_gpr3); + pr_cont("CFAR: "REG" ", regs->orig_gpr3); if (trap == 0x200 || trap == 0x300 || trap == 0x600) #if defined(CONFIG_4xx) || defined(CONFIG_BOOKE) - printk("DEAR: "REG" ESR: "REG" ", regs->dar, regs->dsisr); + pr_cont("DEAR: "REG" ESR: "REG" ", regs->dar, regs->dsisr); #else - printk("DAR: "REG" DSISR: %08lx ", regs->dar, regs->dsisr); + pr_cont("DAR: "REG" DSISR: %08lx ", regs->dar, regs->dsisr); #endif #ifdef CONFIG_PPC64 - printk("SOFTE: %ld ", regs->softe); + pr_cont("SOFTE: %ld ", regs->softe); #endif #ifdef CONFIG_PPC_TRANSACTIONAL_MEM if (MSR_TM_ACTIVE(regs->msr)) - printk("\nPACATMSCRATCH: %016llx ", get_paca()->tm_scratch); + pr_cont("\nPACATMSCRATCH: %016llx ", get_paca()->tm_scratch); #endif for (i = 0; i < 32; i++) { if ((i % REGS_PER_LINE) == 0) - printk("\nGPR%02d: ", i); - printk(REG " ", regs->gpr[i]); + pr_cont("\nGPR%02d: ", i); + pr_cont(REG " ", regs->gpr[i]); if (i == LAST_VOLATILE && !FULL_REGS(regs)) break; } - printk("\n"); + pr_cont("\n"); #ifdef CONFIG_KALLSYMS /* * Lookup NIP late so we have the best change of getting the -- cgit v1.2.3-59-g8ed1b From 2ffd04dee0dacff36c03a02434965a96da032bcd Mon Sep 17 00:00:00 2001 From: Andrew Donnellan Date: Fri, 4 Nov 2016 17:20:40 +1100 Subject: powerpc/oops: Fix missing pr_cont()s in instruction dump Since the KERN_CONT changes, the current code in show_instructions() prints out a whole bunch of unnecessary newlines. Change occurrences of printk("\n") to pr_cont("\n"). While we're here, change all the other cases of printk(KERN_CONT ...) to pr_cont() as well. Signed-off-by: Andrew Donnellan Signed-off-by: Michael Ellerman --- arch/powerpc/kernel/process.c | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/arch/powerpc/kernel/process.c b/arch/powerpc/kernel/process.c index 6fe8fa481f8a..49a680d5ae37 100644 --- a/arch/powerpc/kernel/process.c +++ b/arch/powerpc/kernel/process.c @@ -1215,7 +1215,7 @@ static void show_instructions(struct pt_regs *regs) int instr; if (!(i % 8)) - printk("\n"); + pr_cont("\n"); #if !defined(CONFIG_BOOKE) /* If executing with the IMMU off, adjust pc rather @@ -1227,18 +1227,18 @@ static void show_instructions(struct pt_regs *regs) if (!__kernel_text_address(pc) || probe_kernel_address((unsigned int __user *)pc, instr)) { - printk(KERN_CONT "XXXXXXXX "); + pr_cont("XXXXXXXX "); } else { if (regs->nip == pc) - printk(KERN_CONT "<%08x> ", instr); + pr_cont("<%08x> ", instr); else - printk(KERN_CONT "%08x ", instr); + pr_cont("%08x ", instr); } pc += sizeof(int); } - printk("\n"); + pr_cont("\n"); } struct regbit { -- cgit v1.2.3-59-g8ed1b From 83d2c9a9c17b1e9f23a3a0c24c03cd18e4b02520 Mon Sep 17 00:00:00 2001 From: Sven Ebenfeld Date: Mon, 7 Nov 2016 18:51:34 +0100 Subject: crypto: caam - do not register AES-XTS mode on LP units MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit When using AES-XTS on a Wandboard, we receive a Mode error: caam_jr 2102000.jr1: 20001311: CCB: desc idx 19: AES: Mode error. According to the Security Reference Manual, the Low Power AES units of the i.MX6 do not support the XTS mode. Therefore we must not register XTS implementations in the Crypto API. Signed-off-by: Sven Ebenfeld Reviewed-by: Horia Geantă Cc: # 4.4+ Fixes: c6415a6016bf "crypto: caam - add support for acipher xts(aes)" Signed-off-by: Herbert Xu --- drivers/crypto/caam/caamalg.c | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/drivers/crypto/caam/caamalg.c b/drivers/crypto/caam/caamalg.c index 156aad167cd6..f5a63ba97023 100644 --- a/drivers/crypto/caam/caamalg.c +++ b/drivers/crypto/caam/caamalg.c @@ -4583,6 +4583,15 @@ static int __init caam_algapi_init(void) if (!aes_inst && (alg_sel == OP_ALG_ALGSEL_AES)) continue; + /* + * Check support for AES modes not available + * on LP devices. + */ + if ((cha_vid & CHA_ID_LS_AES_MASK) == CHA_ID_LS_AES_LP) + if ((alg->class1_alg_type & OP_ALG_AAI_MASK) == + OP_ALG_AAI_XTS) + continue; + t_alg = caam_alg_alloc(alg); if (IS_ERR(t_alg)) { err = PTR_ERR(t_alg); -- cgit v1.2.3-59-g8ed1b From fa14a0acea1ffe67913ba384a2897130a36dfe03 Mon Sep 17 00:00:00 2001 From: Bart Van Assche Date: Tue, 1 Nov 2016 18:36:46 +0200 Subject: nvmet-rdma: Fix possible NULL deref when handling rdma cm events When we initiate queue teardown sequence we call rdma_destroy_qp which clears cm_id->qp, afterwards we call rdma_destroy_id, but we might see a rdma_cm event in between with a cleared cm_id->qp so watch out for that and silently ignore the event because this means that the queue teardown sequence is in progress. Signed-off-by: Bart Van Assche Signed-off-by: Sagi Grimberg --- drivers/nvme/target/rdma.c | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/drivers/nvme/target/rdma.c b/drivers/nvme/target/rdma.c index f8d23999e0f2..cf60759cc169 100644 --- a/drivers/nvme/target/rdma.c +++ b/drivers/nvme/target/rdma.c @@ -1352,7 +1352,13 @@ static int nvmet_rdma_cm_handler(struct rdma_cm_id *cm_id, case RDMA_CM_EVENT_ADDR_CHANGE: case RDMA_CM_EVENT_DISCONNECTED: case RDMA_CM_EVENT_TIMEWAIT_EXIT: - nvmet_rdma_queue_disconnect(queue); + /* + * We might end up here when we already freed the qp + * which means queue release sequence is in progress, + * so don't get in the way... + */ + if (queue) + nvmet_rdma_queue_disconnect(queue); break; case RDMA_CM_EVENT_DEVICE_REMOVAL: ret = nvmet_rdma_device_removal(cm_id, queue); -- cgit v1.2.3-59-g8ed1b From 553cd9ef82edd811948782a8f73ae73c4bfeedd3 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Wed, 2 Nov 2016 08:49:18 -0600 Subject: nvme-rdma: reject non-connect commands before the queue is live If we reconncect we might have command queue up that get resent as soon as the queue is restarted. But until the connect command succeeded we can't send other command. Add a new flag that marks a queue as live when connect finishes, and delay any non-connect command until the queue is live based on it. Signed-off-by: Christoph Hellwig Reported-by: Steve Wise Tested-by: Steve Wise [sagig: fixes admin queue LIVE setting] Signed-off-by: Sagi Grimberg --- drivers/nvme/host/rdma.c | 31 ++++++++++++++++++++++++++++++- 1 file changed, 30 insertions(+), 1 deletion(-) diff --git a/drivers/nvme/host/rdma.c b/drivers/nvme/host/rdma.c index 5a8388177959..438d6948895f 100644 --- a/drivers/nvme/host/rdma.c +++ b/drivers/nvme/host/rdma.c @@ -83,6 +83,7 @@ enum nvme_rdma_queue_flags { NVME_RDMA_Q_CONNECTED = (1 << 0), NVME_RDMA_IB_QUEUE_ALLOCATED = (1 << 1), NVME_RDMA_Q_DELETING = (1 << 2), + NVME_RDMA_Q_LIVE = (1 << 3), }; struct nvme_rdma_queue { @@ -626,6 +627,7 @@ static int nvme_rdma_connect_io_queues(struct nvme_rdma_ctrl *ctrl) ret = nvmf_connect_io_queue(&ctrl->ctrl, i); if (ret) break; + set_bit(NVME_RDMA_Q_LIVE, &ctrl->queues[i].flags); } return ret; @@ -712,6 +714,8 @@ static void nvme_rdma_reconnect_ctrl_work(struct work_struct *work) if (ret) goto stop_admin_q; + set_bit(NVME_RDMA_Q_LIVE, &ctrl->queues[0].flags); + ret = nvme_enable_ctrl(&ctrl->ctrl, ctrl->cap); if (ret) goto stop_admin_q; @@ -761,8 +765,10 @@ static void nvme_rdma_error_recovery_work(struct work_struct *work) nvme_stop_keep_alive(&ctrl->ctrl); - for (i = 0; i < ctrl->queue_count; i++) + for (i = 0; i < ctrl->queue_count; i++) { clear_bit(NVME_RDMA_Q_CONNECTED, &ctrl->queues[i].flags); + clear_bit(NVME_RDMA_Q_LIVE, &ctrl->queues[i].flags); + } if (ctrl->queue_count > 1) nvme_stop_queues(&ctrl->ctrl); @@ -1378,6 +1384,24 @@ nvme_rdma_timeout(struct request *rq, bool reserved) return BLK_EH_HANDLED; } +/* + * We cannot accept any other command until the Connect command has completed. + */ +static inline bool nvme_rdma_queue_is_ready(struct nvme_rdma_queue *queue, + struct request *rq) +{ + if (unlikely(!test_bit(NVME_RDMA_Q_LIVE, &queue->flags))) { + struct nvme_command *cmd = (struct nvme_command *)rq->cmd; + + if (rq->cmd_type != REQ_TYPE_DRV_PRIV || + cmd->common.opcode != nvme_fabrics_command || + cmd->fabrics.fctype != nvme_fabrics_type_connect) + return false; + } + + return true; +} + static int nvme_rdma_queue_rq(struct blk_mq_hw_ctx *hctx, const struct blk_mq_queue_data *bd) { @@ -1394,6 +1418,9 @@ static int nvme_rdma_queue_rq(struct blk_mq_hw_ctx *hctx, WARN_ON_ONCE(rq->tag < 0); + if (!nvme_rdma_queue_is_ready(queue, rq)) + return BLK_MQ_RQ_QUEUE_BUSY; + dev = queue->device->dev; ib_dma_sync_single_for_cpu(dev, sqe->dma, sizeof(struct nvme_command), DMA_TO_DEVICE); @@ -1544,6 +1571,8 @@ static int nvme_rdma_configure_admin_queue(struct nvme_rdma_ctrl *ctrl) if (error) goto out_cleanup_queue; + set_bit(NVME_RDMA_Q_LIVE, &ctrl->queues[0].flags); + error = nvmf_reg_read64(&ctrl->ctrl, NVME_REG_CAP, &ctrl->cap); if (error) { dev_err(ctrl->ctrl.device, -- cgit v1.2.3-59-g8ed1b From 8242ddac1bfcf6eb8873b4d0a4e7a172c2b5b625 Mon Sep 17 00:00:00 2001 From: Sagi Grimberg Date: Sun, 6 Nov 2016 11:03:30 +0200 Subject: nvmet: Don't queue fatal error work if csts.cfs is set In the transport, in case of an interal queue error like error completion in rdma we trigger a fatal error. However, multiple queues in the same controller can serr error completions and we don't want to trigger fatal error work more than once. Reviewed-by: Christoph Hellwig Signed-off-by: Sagi Grimberg --- drivers/nvme/target/core.c | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) diff --git a/drivers/nvme/target/core.c b/drivers/nvme/target/core.c index b4cacb6f0258..a21437a33adb 100644 --- a/drivers/nvme/target/core.c +++ b/drivers/nvme/target/core.c @@ -838,9 +838,13 @@ static void nvmet_fatal_error_handler(struct work_struct *work) void nvmet_ctrl_fatal_error(struct nvmet_ctrl *ctrl) { - ctrl->csts |= NVME_CSTS_CFS; - INIT_WORK(&ctrl->fatal_err_work, nvmet_fatal_error_handler); - schedule_work(&ctrl->fatal_err_work); + mutex_lock(&ctrl->lock); + if (!(ctrl->csts & NVME_CSTS_CFS)) { + ctrl->csts |= NVME_CSTS_CFS; + INIT_WORK(&ctrl->fatal_err_work, nvmet_fatal_error_handler); + schedule_work(&ctrl->fatal_err_work); + } + mutex_unlock(&ctrl->lock); } EXPORT_SYMBOL_GPL(nvmet_ctrl_fatal_error); -- cgit v1.2.3-59-g8ed1b From 766dbb179d41d6337fed2b3ca00caa5845d298ce Mon Sep 17 00:00:00 2001 From: Sagi Grimberg Date: Sun, 6 Nov 2016 11:09:49 +0200 Subject: nvmet-rdma: don't forget to delete a queue from the list of connection failed In case we accepted a queue connection and it failed, we might not remove the queue from the list until we unload and clean it up. We should delete it from the queue list on the relevant handler. Signed-off-by: Sagi Grimberg --- drivers/nvme/target/rdma.c | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/drivers/nvme/target/rdma.c b/drivers/nvme/target/rdma.c index cf60759cc169..8c06675c2305 100644 --- a/drivers/nvme/target/rdma.c +++ b/drivers/nvme/target/rdma.c @@ -1066,6 +1066,7 @@ nvmet_rdma_alloc_queue(struct nvmet_rdma_device *ndev, spin_lock_init(&queue->rsp_wr_wait_lock); INIT_LIST_HEAD(&queue->free_rsps); spin_lock_init(&queue->rsps_lock); + INIT_LIST_HEAD(&queue->queue_list); queue->idx = ida_simple_get(&nvmet_rdma_queue_ida, 0, 0, GFP_KERNEL); if (queue->idx < 0) { @@ -1269,7 +1270,12 @@ static void nvmet_rdma_queue_connect_fail(struct rdma_cm_id *cm_id, { WARN_ON_ONCE(queue->state != NVMET_RDMA_Q_CONNECTING); - pr_err("failed to connect queue\n"); + mutex_lock(&nvmet_rdma_queue_mutex); + if (!list_empty(&queue->queue_list)) + list_del_init(&queue->queue_list); + mutex_unlock(&nvmet_rdma_queue_mutex); + + pr_err("failed to connect queue %d\n", queue->idx); schedule_work(&queue->release_work); } -- cgit v1.2.3-59-g8ed1b From c8dbc37cd81d4705fce51123f5d81ea3267a5b88 Mon Sep 17 00:00:00 2001 From: Steve Wise Date: Tue, 8 Nov 2016 09:16:02 -0800 Subject: nvme-rdma: stop and free io queues on connect failure While testing nvme-rdma with the spdk nvmf target over iw_cxgb4, I configured the target (mistakenly) to generate an error creating the NVMF IO queues. This resulted a "Invalid SQE Parameter" error sent back to the host on the first IO queue connect: [ 9610.928182] nvme nvme1: queue_size 128 > ctrl maxcmd 120, clamping down [ 9610.938745] nvme nvme1: creating 32 I/O queues. So nvmf_connect_io_queue() returns an error to nvmf_connect_io_queue() / nvmf_connect_io_queues(), and that is returned to nvme_rdma_create_io_queues(). In the error path, nvmf_rdma_create_io_queues() frees the queue tagset memory _before_ stopping and freeing the IB queues, which causes yet another touch-after-free crash due to SQ CQEs being flushed after the ib_cqe structs pointed-to by the flushed WRs have been freed (since they are part of the nvme_rdma_request struct). The fix is to stop and free the queues in nvmf_connect_io_queues() if there is an error connecting any of the queues. Signed-off-by: Steve Wise Signed-off-by: Sagi Grimberg --- drivers/nvme/host/rdma.c | 11 +++++++++-- 1 file changed, 9 insertions(+), 2 deletions(-) diff --git a/drivers/nvme/host/rdma.c b/drivers/nvme/host/rdma.c index 438d6948895f..3d25add36d91 100644 --- a/drivers/nvme/host/rdma.c +++ b/drivers/nvme/host/rdma.c @@ -625,11 +625,18 @@ static int nvme_rdma_connect_io_queues(struct nvme_rdma_ctrl *ctrl) for (i = 1; i < ctrl->queue_count; i++) { ret = nvmf_connect_io_queue(&ctrl->ctrl, i); - if (ret) - break; + if (ret) { + dev_info(ctrl->ctrl.device, + "failed to connect i/o queue: %d\n", ret); + goto out_free_queues; + } set_bit(NVME_RDMA_Q_LIVE, &ctrl->queues[i].flags); } + return 0; + +out_free_queues: + nvme_rdma_free_io_queues(ctrl); return ret; } -- cgit v1.2.3-59-g8ed1b From 14c862dbb0a0e0a9baec20480d441e32cb54b2b9 Mon Sep 17 00:00:00 2001 From: Sagi Grimberg Date: Sun, 6 Nov 2016 11:03:59 +0200 Subject: nvmet-rdma: drain the queue-pair just before freeing it draining the qp right after disconnect might not suffice because the nvmet sq is not fully drained (in nvmet_sq_destroy) and we might see completions after the drain. Instead, drain right before the qp destroy which comes after the sq destruction and we can be sure that no posts come after the drain. Tested-by: Steve Wise Signed-off-by: Sagi Grimberg --- drivers/nvme/target/rdma.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/nvme/target/rdma.c b/drivers/nvme/target/rdma.c index 8c06675c2305..005ef5d17a19 100644 --- a/drivers/nvme/target/rdma.c +++ b/drivers/nvme/target/rdma.c @@ -951,6 +951,7 @@ err_destroy_cq: static void nvmet_rdma_destroy_queue_ib(struct nvmet_rdma_queue *queue) { + ib_drain_qp(queue->cm_id->qp); rdma_destroy_qp(queue->cm_id); ib_free_cq(queue->cq); } @@ -1245,7 +1246,6 @@ static void __nvmet_rdma_queue_disconnect(struct nvmet_rdma_queue *queue) if (disconnect) { rdma_disconnect(queue->cm_id); - ib_drain_qp(queue->cm_id->qp); schedule_work(&queue->release_work); } } -- cgit v1.2.3-59-g8ed1b From f732c5b7c734cfc2c563c918fe2842175c7eb073 Mon Sep 17 00:00:00 2001 From: Axl-zhang Date: Wed, 2 Nov 2016 13:31:12 +0800 Subject: dmaengine: sun6i: fix the uninitialized value for v_lli dma_pool_alloc does not initialize the value of the newly allocated block for the v_lli, and the uninitilize value make the tests failed which is on pine64 with dmatest. we can fix it just change the "|=" to "=" for the v_lli->cfg. Signed-off-by: Hao Zhang Acked-by: Maxime Ripard Signed-off-by: Vinod Koul --- drivers/dma/sun6i-dma.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/dma/sun6i-dma.c b/drivers/dma/sun6i-dma.c index 83461994e418..a2358780ab2c 100644 --- a/drivers/dma/sun6i-dma.c +++ b/drivers/dma/sun6i-dma.c @@ -578,7 +578,7 @@ static struct dma_async_tx_descriptor *sun6i_dma_prep_dma_memcpy( burst = convert_burst(8); width = convert_buswidth(DMA_SLAVE_BUSWIDTH_4_BYTES); - v_lli->cfg |= DMA_CHAN_CFG_SRC_DRQ(DRQ_SDRAM) | + v_lli->cfg = DMA_CHAN_CFG_SRC_DRQ(DRQ_SDRAM) | DMA_CHAN_CFG_DST_DRQ(DRQ_SDRAM) | DMA_CHAN_CFG_DST_LINEAR_MODE | DMA_CHAN_CFG_SRC_LINEAR_MODE | -- cgit v1.2.3-59-g8ed1b From 12f5908080bdccca2cb2f7ad850cb360c92f481a Mon Sep 17 00:00:00 2001 From: Tony Lindgren Date: Wed, 9 Nov 2016 09:47:58 -0700 Subject: dmaengine: cppi41: Fix list not empty warning on module removal If musb controller is configured with USB peripherals and we have enumerated with a USB host, we can get warnings on removal of the modules: WARNING: CPU: 0 PID: 1269 at drivers/dma/cppi41.c:391 cppi41_dma_free_chan_resources Fix the issue by adding the missing pm_runtime_get to cppi41_dma_free_chan_resources to make sure the pending work list is cleared on removal. Fixes: fdea2d09b997 ("dmaengine: cppi41: Add basic PM runtime support") Signed-off-by: Tony Lindgren Signed-off-by: Vinod Koul --- drivers/dma/cppi41.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/drivers/dma/cppi41.c b/drivers/dma/cppi41.c index bac5f023013b..6ed99d926358 100644 --- a/drivers/dma/cppi41.c +++ b/drivers/dma/cppi41.c @@ -1072,7 +1072,12 @@ err_get_sync: static int cppi41_dma_remove(struct platform_device *pdev) { struct cppi41_dd *cdd = platform_get_drvdata(pdev); + int error; + error = pm_runtime_get_sync(&pdev->dev); + if (error < 0) + dev_err(&pdev->dev, "%s could not pm_runtime_get: %i\n", + __func__, error); of_dma_controller_free(pdev->dev.of_node); dma_async_device_unregister(&cdd->ddev); -- cgit v1.2.3-59-g8ed1b From 098de42ad6708866501a00155ba85350bc0b29e5 Mon Sep 17 00:00:00 2001 From: Tony Lindgren Date: Wed, 9 Nov 2016 09:47:59 -0700 Subject: dmaengine: cppi41: Fix unpaired pm runtime when only a USB hub is connected On am335x with musb host we can end up with unpaired pm runtime calls if a hub with no devices is connected and disconnected. This is because of the conditional pm runtime calls which are always a bad idea. Let's fix the issue by making them unconditional and paired in each function. Fixes: fdea2d09b997 ("dmaengine: cppi41: Add basic PM runtime support") Signed-off-by: Tony Lindgren Signed-off-by: Vinod Koul --- drivers/dma/cppi41.c | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/drivers/dma/cppi41.c b/drivers/dma/cppi41.c index 6ed99d926358..3d2d8b5a6c91 100644 --- a/drivers/dma/cppi41.c +++ b/drivers/dma/cppi41.c @@ -318,6 +318,11 @@ static irqreturn_t cppi41_irq(int irq, void *data) while (val) { u32 desc, len; + status = pm_runtime_get(cdd->ddev.dev); + if (status < 0) + dev_err(cdd->ddev.dev, "%s pm runtime get: %i\n", + __func__, status); + q_num = __fls(val); val &= ~(1 << q_num); q_num += 32 * i; @@ -338,7 +343,6 @@ static irqreturn_t cppi41_irq(int irq, void *data) dma_cookie_complete(&c->txd); dmaengine_desc_get_callback_invoke(&c->txd, NULL); - /* Paired with cppi41_dma_issue_pending */ pm_runtime_mark_last_busy(cdd->ddev.dev); pm_runtime_put_autosuspend(cdd->ddev.dev); } @@ -460,7 +464,6 @@ static void cppi41_dma_issue_pending(struct dma_chan *chan) struct cppi41_dd *cdd = c->cdd; int error; - /* PM runtime paired with dmaengine_desc_get_callback_invoke */ error = pm_runtime_get(cdd->ddev.dev); if ((error != -EINPROGRESS) && error < 0) { dev_err(cdd->ddev.dev, "Failed to pm_runtime_get: %i\n", @@ -473,6 +476,9 @@ static void cppi41_dma_issue_pending(struct dma_chan *chan) push_desc_queue(c); else pending_desc(c); + + pm_runtime_mark_last_busy(cdd->ddev.dev); + pm_runtime_put_autosuspend(cdd->ddev.dev); } static u32 get_host_pd0(u32 length) -- cgit v1.2.3-59-g8ed1b From 740b4be3f742100ea66f0f9ee9715b10ee304a90 Mon Sep 17 00:00:00 2001 From: Tony Lindgren Date: Fri, 11 Nov 2016 11:28:52 -0800 Subject: dmaengine: cpp41: Fix handling of error path If we return early on pm_runtime_get() error, we need to also call pm_runtime_put_noidle() as pointed out in a musb related thread by Johan Hovold . This is to keep the PM runtime use counts happy. Fixes: fdea2d09b997 ("dmaengine: cppi41: Add basic PM runtime support") Cc: Johan Hovold Signed-off-by: Tony Lindgren Signed-off-by: Vinod Koul --- drivers/dma/cppi41.c | 11 +++++++++-- 1 file changed, 9 insertions(+), 2 deletions(-) diff --git a/drivers/dma/cppi41.c b/drivers/dma/cppi41.c index 3d2d8b5a6c91..4b52126c13cf 100644 --- a/drivers/dma/cppi41.c +++ b/drivers/dma/cppi41.c @@ -366,8 +366,11 @@ static int cppi41_dma_alloc_chan_resources(struct dma_chan *chan) int error; error = pm_runtime_get_sync(cdd->ddev.dev); - if (error < 0) + if (error < 0) { + pm_runtime_put_noidle(cdd->ddev.dev); + return error; + } dma_cookie_init(chan); dma_async_tx_descriptor_init(&c->txd, chan); @@ -389,8 +392,11 @@ static void cppi41_dma_free_chan_resources(struct dma_chan *chan) int error; error = pm_runtime_get_sync(cdd->ddev.dev); - if (error < 0) + if (error < 0) { + pm_runtime_put_noidle(cdd->ddev.dev); + return; + } WARN_ON(!list_empty(&cdd->pending)); @@ -466,6 +472,7 @@ static void cppi41_dma_issue_pending(struct dma_chan *chan) error = pm_runtime_get(cdd->ddev.dev); if ((error != -EINPROGRESS) && error < 0) { + pm_runtime_put_noidle(cdd->ddev.dev); dev_err(cdd->ddev.dev, "Failed to pm_runtime_get: %i\n", error); -- cgit v1.2.3-59-g8ed1b From ea08e39230e898844d9de5b60cdbb30067cebfe7 Mon Sep 17 00:00:00 2001 From: Scott Mayhew Date: Fri, 11 Nov 2016 13:16:22 -0500 Subject: sunrpc: svc_age_temp_xprts_now should not call setsockopt non-tcp transports This fixes the following panic that can occur with NFSoRDMA. general protection fault: 0000 [#1] SMP Modules linked in: rpcrdma ib_isert iscsi_target_mod ib_iser libiscsi scsi_transport_iscsi ib_srpt target_core_mod ib_srp scsi_transport_srp scsi_tgt ib_ipoib rdma_ucm ib_ucm ib_uverbs ib_umad rdma_cm ib_cm iw_cm mlx5_ib ib_core intel_powerclamp coretemp kvm_intel kvm sg ioatdma ipmi_devintf ipmi_ssif dcdbas iTCO_wdt iTCO_vendor_support pcspkr irqbypass sb_edac shpchp dca crc32_pclmul ghash_clmulni_intel edac_core lpc_ich aesni_intel lrw gf128mul glue_helper ablk_helper mei_me mei ipmi_si cryptd wmi ipmi_msghandler acpi_pad acpi_power_meter nfsd auth_rpcgss nfs_acl lockd grace sunrpc ip_tables xfs libcrc32c sd_mod crc_t10dif crct10dif_generic mgag200 i2c_algo_bit drm_kms_helper syscopyarea sysfillrect sysimgblt ahci fb_sys_fops ttm libahci mlx5_core tg3 crct10dif_pclmul drm crct10dif_common ptp i2c_core libata crc32c_intel pps_core fjes dm_mirror dm_region_hash dm_log dm_mod CPU: 1 PID: 120 Comm: kworker/1:1 Not tainted 3.10.0-514.el7.x86_64 #1 Hardware name: Dell Inc. PowerEdge R320/0KM5PX, BIOS 2.4.2 01/29/2015 Workqueue: events check_lifetime task: ffff88031f506dd0 ti: ffff88031f584000 task.ti: ffff88031f584000 RIP: 0010:[] [] _raw_spin_lock_bh+0x17/0x50 RSP: 0018:ffff88031f587ba8 EFLAGS: 00010206 RAX: 0000000000020000 RBX: 20041fac02080072 RCX: ffff88031f587fd8 RDX: 0000000000000000 RSI: 0000000000000000 RDI: 20041fac02080072 RBP: ffff88031f587bb0 R08: 0000000000000008 R09: ffffffff8155be77 R10: ffff880322a59b00 R11: ffffea000bf39f00 R12: 20041fac02080072 R13: 000000000000000d R14: ffff8800c4fbd800 R15: 0000000000000001 FS: 0000000000000000(0000) GS:ffff880322a40000(0000) knlGS:0000000000000000 CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 CR2: 00007f3c52d4547e CR3: 00000000019ba000 CR4: 00000000001407e0 DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 DR3: 0000000000000000 DR6: 00000000ffff0ff0 DR7: 0000000000000400 Stack: 20041fac02080002 ffff88031f587bd0 ffffffff81557830 20041fac02080002 ffff88031f587c78 ffff88031f587c40 ffffffff8155ae08 000000010157df32 0000000800000001 ffff88031f587c20 ffffffff81096acb ffffffff81aa37d0 Call Trace: [] lock_sock_nested+0x20/0x50 [] sock_setsockopt+0x78/0x940 [] ? lock_timer_base.isra.33+0x2b/0x50 [] kernel_setsockopt+0x4d/0x50 [] svc_age_temp_xprts_now+0x174/0x1e0 [sunrpc] [] nfsd_inetaddr_event+0x9d/0xd0 [nfsd] [] notifier_call_chain+0x4c/0x70 [] __blocking_notifier_call_chain+0x4d/0x70 [] blocking_notifier_call_chain+0x16/0x20 [] __inet_del_ifa+0x168/0x2d0 [] check_lifetime+0x25f/0x270 [] process_one_work+0x17b/0x470 [] worker_thread+0x126/0x410 [] ? rescuer_thread+0x460/0x460 [] kthread+0xcf/0xe0 [] ? kthread_create_on_node+0x140/0x140 [] ret_from_fork+0x58/0x90 [] ? kthread_create_on_node+0x140/0x140 Code: ca 75 f1 5d c3 0f 1f 80 00 00 00 00 eb d9 66 0f 1f 44 00 00 0f 1f 44 00 00 55 48 89 e5 53 48 89 fb e8 7e 04 a0 ff b8 00 00 02 00 0f c1 03 89 c2 c1 ea 10 66 39 c2 75 03 5b 5d c3 83 e2 fe 0f RIP [] _raw_spin_lock_bh+0x17/0x50 RSP Signed-off-by: Scott Mayhew Fixes: c3d4879e ("sunrpc: Add a function to close temporary transports immediately") Reviewed-by: Chuck Lever Signed-off-by: J. Bruce Fields --- include/linux/sunrpc/svc_xprt.h | 1 + net/sunrpc/svc_xprt.c | 11 +---------- net/sunrpc/svcsock.c | 21 +++++++++++++++++++++ net/sunrpc/xprtrdma/svc_rdma_transport.c | 6 ++++++ 4 files changed, 29 insertions(+), 10 deletions(-) diff --git a/include/linux/sunrpc/svc_xprt.h b/include/linux/sunrpc/svc_xprt.h index ab02a457da1f..e5d193440374 100644 --- a/include/linux/sunrpc/svc_xprt.h +++ b/include/linux/sunrpc/svc_xprt.h @@ -25,6 +25,7 @@ struct svc_xprt_ops { void (*xpo_detach)(struct svc_xprt *); void (*xpo_free)(struct svc_xprt *); int (*xpo_secure_port)(struct svc_rqst *); + void (*xpo_kill_temp_xprt)(struct svc_xprt *); }; struct svc_xprt_class { diff --git a/net/sunrpc/svc_xprt.c b/net/sunrpc/svc_xprt.c index c3f652395a80..3bc1d61694cb 100644 --- a/net/sunrpc/svc_xprt.c +++ b/net/sunrpc/svc_xprt.c @@ -1002,14 +1002,8 @@ static void svc_age_temp_xprts(unsigned long closure) void svc_age_temp_xprts_now(struct svc_serv *serv, struct sockaddr *server_addr) { struct svc_xprt *xprt; - struct svc_sock *svsk; - struct socket *sock; struct list_head *le, *next; LIST_HEAD(to_be_closed); - struct linger no_linger = { - .l_onoff = 1, - .l_linger = 0, - }; spin_lock_bh(&serv->sv_lock); list_for_each_safe(le, next, &serv->sv_tempsocks) { @@ -1027,10 +1021,7 @@ void svc_age_temp_xprts_now(struct svc_serv *serv, struct sockaddr *server_addr) list_del_init(le); xprt = list_entry(le, struct svc_xprt, xpt_list); dprintk("svc_age_temp_xprts_now: closing %p\n", xprt); - svsk = container_of(xprt, struct svc_sock, sk_xprt); - sock = svsk->sk_sock; - kernel_setsockopt(sock, SOL_SOCKET, SO_LINGER, - (char *)&no_linger, sizeof(no_linger)); + xprt->xpt_ops->xpo_kill_temp_xprt(xprt); svc_close_xprt(xprt); } } diff --git a/net/sunrpc/svcsock.c b/net/sunrpc/svcsock.c index 57625f64efd5..a4bc98265d88 100644 --- a/net/sunrpc/svcsock.c +++ b/net/sunrpc/svcsock.c @@ -438,6 +438,21 @@ static int svc_tcp_has_wspace(struct svc_xprt *xprt) return !test_bit(SOCK_NOSPACE, &svsk->sk_sock->flags); } +static void svc_tcp_kill_temp_xprt(struct svc_xprt *xprt) +{ + struct svc_sock *svsk; + struct socket *sock; + struct linger no_linger = { + .l_onoff = 1, + .l_linger = 0, + }; + + svsk = container_of(xprt, struct svc_sock, sk_xprt); + sock = svsk->sk_sock; + kernel_setsockopt(sock, SOL_SOCKET, SO_LINGER, + (char *)&no_linger, sizeof(no_linger)); +} + /* * See net/ipv6/ip_sockglue.c : ip_cmsg_recv_pktinfo */ @@ -648,6 +663,10 @@ static struct svc_xprt *svc_udp_accept(struct svc_xprt *xprt) return NULL; } +static void svc_udp_kill_temp_xprt(struct svc_xprt *xprt) +{ +} + static struct svc_xprt *svc_udp_create(struct svc_serv *serv, struct net *net, struct sockaddr *sa, int salen, @@ -667,6 +686,7 @@ static struct svc_xprt_ops svc_udp_ops = { .xpo_has_wspace = svc_udp_has_wspace, .xpo_accept = svc_udp_accept, .xpo_secure_port = svc_sock_secure_port, + .xpo_kill_temp_xprt = svc_udp_kill_temp_xprt, }; static struct svc_xprt_class svc_udp_class = { @@ -1242,6 +1262,7 @@ static struct svc_xprt_ops svc_tcp_ops = { .xpo_has_wspace = svc_tcp_has_wspace, .xpo_accept = svc_tcp_accept, .xpo_secure_port = svc_sock_secure_port, + .xpo_kill_temp_xprt = svc_tcp_kill_temp_xprt, }; static struct svc_xprt_class svc_tcp_class = { diff --git a/net/sunrpc/xprtrdma/svc_rdma_transport.c b/net/sunrpc/xprtrdma/svc_rdma_transport.c index 6864fb967038..1334de2715c2 100644 --- a/net/sunrpc/xprtrdma/svc_rdma_transport.c +++ b/net/sunrpc/xprtrdma/svc_rdma_transport.c @@ -67,6 +67,7 @@ static void svc_rdma_detach(struct svc_xprt *xprt); static void svc_rdma_free(struct svc_xprt *xprt); static int svc_rdma_has_wspace(struct svc_xprt *xprt); static int svc_rdma_secure_port(struct svc_rqst *); +static void svc_rdma_kill_temp_xprt(struct svc_xprt *); static struct svc_xprt_ops svc_rdma_ops = { .xpo_create = svc_rdma_create, @@ -79,6 +80,7 @@ static struct svc_xprt_ops svc_rdma_ops = { .xpo_has_wspace = svc_rdma_has_wspace, .xpo_accept = svc_rdma_accept, .xpo_secure_port = svc_rdma_secure_port, + .xpo_kill_temp_xprt = svc_rdma_kill_temp_xprt, }; struct svc_xprt_class svc_rdma_class = { @@ -1317,6 +1319,10 @@ static int svc_rdma_secure_port(struct svc_rqst *rqstp) return 1; } +static void svc_rdma_kill_temp_xprt(struct svc_xprt *xprt) +{ +} + int svc_rdma_send(struct svcxprt_rdma *xprt, struct ib_send_wr *wr) { struct ib_send_wr *bad_wr, *n_wr; -- cgit v1.2.3-59-g8ed1b From 93d710a65ef02fb7fd48ae207e78f460bd7a6089 Mon Sep 17 00:00:00 2001 From: Linus Walleij Date: Mon, 14 Nov 2016 15:34:17 +0100 Subject: i2c: mux: fix up dependencies We get the following build error from UM Linux after adding an entry to drivers/iio/gyro/Kconfig that issues "select I2C_MUX": ERROR: "devm_ioremap_resource" [drivers/i2c/muxes/i2c-mux-reg.ko] undefined! ERROR: "of_address_to_resource" [drivers/i2c/muxes/i2c-mux-reg.ko] undefined! It appears that the I2C mux core code depends on HAS_IOMEM for historical reasons, while CONFIG_I2C_MUX_REG does *not* have a direct dependency on HAS_IOMEM. This creates a situation where a allyesconfig or allmodconfig for UM Linux will select I2C_MUX, and will implicitly enable I2C_MUX_REG as well, and the compilation will fail for the register driver. Fix this up by making I2C_MUX_REG depend on HAS_IOMEM and removing the dependency from I2C_MUX. Reported-by: kbuild test robot Reported-by: Jonathan Cameron Signed-off-by: Linus Walleij Acked-by: Jonathan Cameron Acked-by: Peter Rosin Signed-off-by: Wolfram Sang Cc: stable@kernel.org --- drivers/i2c/Kconfig | 1 - drivers/i2c/muxes/Kconfig | 1 + 2 files changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/i2c/Kconfig b/drivers/i2c/Kconfig index d223650a97e4..11edabf425ae 100644 --- a/drivers/i2c/Kconfig +++ b/drivers/i2c/Kconfig @@ -59,7 +59,6 @@ config I2C_CHARDEV config I2C_MUX tristate "I2C bus multiplexing support" - depends on HAS_IOMEM help Say Y here if you want the I2C core to support the ability to handle multiplexed I2C bus topologies, by presenting each diff --git a/drivers/i2c/muxes/Kconfig b/drivers/i2c/muxes/Kconfig index e280c8ecc0b5..96de9ce5669b 100644 --- a/drivers/i2c/muxes/Kconfig +++ b/drivers/i2c/muxes/Kconfig @@ -63,6 +63,7 @@ config I2C_MUX_PINCTRL config I2C_MUX_REG tristate "Register-based I2C multiplexer" + depends on HAS_IOMEM help If you say yes to this option, support will be included for a register based I2C multiplexer. This driver provides access to -- cgit v1.2.3-59-g8ed1b From 87a349f9cc0908bc0cfac0c9ece3179f650ae95a Mon Sep 17 00:00:00 2001 From: Thomas Tai Date: Fri, 11 Nov 2016 16:41:00 -0800 Subject: sparc64: fix compile warning section mismatch in find_node() A compile warning is introduced by a commit to fix the find_node(). This patch fix the compile warning by moving find_node() into __init section. Because find_node() is only used by memblock_nid_range() which is only used by a __init add_node_ranges(). find_node() and memblock_nid_range() should also be inside __init section. Signed-off-by: Thomas Tai Signed-off-by: David S. Miller --- arch/sparc/mm/init_64.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/arch/sparc/mm/init_64.c b/arch/sparc/mm/init_64.c index 068eb3dcbcb5..37aa537b3ad8 100644 --- a/arch/sparc/mm/init_64.c +++ b/arch/sparc/mm/init_64.c @@ -805,7 +805,7 @@ static int num_mblocks; static int find_numa_node_for_addr(unsigned long pa, struct node_mem_mask *pnode_mask); -static unsigned long ra_to_pa(unsigned long addr) +static unsigned long __init ra_to_pa(unsigned long addr) { int i; @@ -821,7 +821,7 @@ static unsigned long ra_to_pa(unsigned long addr) return addr; } -static int find_node(unsigned long addr) +static int __init find_node(unsigned long addr) { static bool search_mdesc = true; static struct node_mem_mask last_mem_mask = { ~0UL, ~0UL }; @@ -858,7 +858,7 @@ static int find_node(unsigned long addr) return last_index; } -static u64 memblock_nid_range(u64 start, u64 end, int *nid) +static u64 __init memblock_nid_range(u64 start, u64 end, int *nid) { *nid = find_node(start); start += PAGE_SIZE; -- cgit v1.2.3-59-g8ed1b From e2174b0c24caca170ca61eda2ae49c9561ff8896 Mon Sep 17 00:00:00 2001 From: "Rafael J. Wysocki" Date: Mon, 14 Nov 2016 20:56:17 +0100 Subject: Revert "ACPICA: FADT support cleanup" Pavel Machek reports that commit 6ea8c546f365 (ACPICA: FADT support cleanup) breaks thermal management on his Thinkpad X60 and T40p, so revert it. Link: https://bugzilla.kernel.org/show_bug.cgi?id=187311 Fixes: 6ea8c546f365 (ACPICA: FADT support cleanup) Reported-by: Pavel Machek Signed-off-by: Rafael J. Wysocki --- drivers/acpi/acpica/tbfadt.c | 10 ++- include/acpi/actbl.h | 164 ++++++++++++++++++------------------------- 2 files changed, 74 insertions(+), 100 deletions(-) diff --git a/drivers/acpi/acpica/tbfadt.c b/drivers/acpi/acpica/tbfadt.c index 046c4d0394ee..5fb838e592dc 100644 --- a/drivers/acpi/acpica/tbfadt.c +++ b/drivers/acpi/acpica/tbfadt.c @@ -480,19 +480,17 @@ static void acpi_tb_convert_fadt(void) u32 i; /* - * For ACPI 1.0 FADTs (revision 1), ensure that reserved fields which + * For ACPI 1.0 FADTs (revision 1 or 2), ensure that reserved fields which * should be zero are indeed zero. This will workaround BIOSs that * inadvertently place values in these fields. * * The ACPI 1.0 reserved fields that will be zeroed are the bytes located * at offset 45, 55, 95, and the word located at offset 109, 110. * - * Note: The FADT revision value is unreliable because of BIOS errors. - * The table length is instead used as the final word on the version. - * - * Note: FADT revision 3 is the ACPI 2.0 version of the FADT. + * Note: The FADT revision value is unreliable. Only the length can be + * trusted. */ - if (acpi_gbl_FADT.header.length <= ACPI_FADT_V3_SIZE) { + if (acpi_gbl_FADT.header.length <= ACPI_FADT_V2_SIZE) { acpi_gbl_FADT.preferred_profile = 0; acpi_gbl_FADT.pstate_control = 0; acpi_gbl_FADT.cst_control = 0; diff --git a/include/acpi/actbl.h b/include/acpi/actbl.h index 1b949e08015c..c19700e2a2fe 100644 --- a/include/acpi/actbl.h +++ b/include/acpi/actbl.h @@ -230,72 +230,62 @@ struct acpi_table_facs { /* Fields common to all versions of the FADT */ struct acpi_table_fadt { - struct acpi_table_header header; /* [V1] Common ACPI table header */ - u32 facs; /* [V1] 32-bit physical address of FACS */ - u32 dsdt; /* [V1] 32-bit physical address of DSDT */ - u8 model; /* [V1] System Interrupt Model (ACPI 1.0) - not used in ACPI 2.0+ */ - u8 preferred_profile; /* [V1] Conveys preferred power management profile to OSPM. */ - u16 sci_interrupt; /* [V1] System vector of SCI interrupt */ - u32 smi_command; /* [V1] 32-bit Port address of SMI command port */ - u8 acpi_enable; /* [V1] Value to write to SMI_CMD to enable ACPI */ - u8 acpi_disable; /* [V1] Value to write to SMI_CMD to disable ACPI */ - u8 s4_bios_request; /* [V1] Value to write to SMI_CMD to enter S4BIOS state */ - u8 pstate_control; /* [V1] Processor performance state control */ - u32 pm1a_event_block; /* [V1] 32-bit port address of Power Mgt 1a Event Reg Blk */ - u32 pm1b_event_block; /* [V1] 32-bit port address of Power Mgt 1b Event Reg Blk */ - u32 pm1a_control_block; /* [V1] 32-bit port address of Power Mgt 1a Control Reg Blk */ - u32 pm1b_control_block; /* [V1] 32-bit port address of Power Mgt 1b Control Reg Blk */ - u32 pm2_control_block; /* [V1] 32-bit port address of Power Mgt 2 Control Reg Blk */ - u32 pm_timer_block; /* [V1] 32-bit port address of Power Mgt Timer Ctrl Reg Blk */ - u32 gpe0_block; /* [V1] 32-bit port address of General Purpose Event 0 Reg Blk */ - u32 gpe1_block; /* [V1] 32-bit port address of General Purpose Event 1 Reg Blk */ - u8 pm1_event_length; /* [V1] Byte Length of ports at pm1x_event_block */ - u8 pm1_control_length; /* [V1] Byte Length of ports at pm1x_control_block */ - u8 pm2_control_length; /* [V1] Byte Length of ports at pm2_control_block */ - u8 pm_timer_length; /* [V1] Byte Length of ports at pm_timer_block */ - u8 gpe0_block_length; /* [V1] Byte Length of ports at gpe0_block */ - u8 gpe1_block_length; /* [V1] Byte Length of ports at gpe1_block */ - u8 gpe1_base; /* [V1] Offset in GPE number space where GPE1 events start */ - u8 cst_control; /* [V1] Support for the _CST object and C-States change notification */ - u16 c2_latency; /* [V1] Worst case HW latency to enter/exit C2 state */ - u16 c3_latency; /* [V1] Worst case HW latency to enter/exit C3 state */ - u16 flush_size; /* [V1] Processor memory cache line width, in bytes */ - u16 flush_stride; /* [V1] Number of flush strides that need to be read */ - u8 duty_offset; /* [V1] Processor duty cycle index in processor P_CNT reg */ - u8 duty_width; /* [V1] Processor duty cycle value bit width in P_CNT register */ - u8 day_alarm; /* [V1] Index to day-of-month alarm in RTC CMOS RAM */ - u8 month_alarm; /* [V1] Index to month-of-year alarm in RTC CMOS RAM */ - u8 century; /* [V1] Index to century in RTC CMOS RAM */ - u16 boot_flags; /* [V3] IA-PC Boot Architecture Flags (see below for individual flags) */ - u8 reserved; /* [V1] Reserved, must be zero */ - u32 flags; /* [V1] Miscellaneous flag bits (see below for individual flags) */ - /* End of Version 1 FADT fields (ACPI 1.0) */ - - struct acpi_generic_address reset_register; /* [V3] 64-bit address of the Reset register */ - u8 reset_value; /* [V3] Value to write to the reset_register port to reset the system */ - u16 arm_boot_flags; /* [V5] ARM-Specific Boot Flags (see below for individual flags) (ACPI 5.1) */ - u8 minor_revision; /* [V5] FADT Minor Revision (ACPI 5.1) */ - u64 Xfacs; /* [V3] 64-bit physical address of FACS */ - u64 Xdsdt; /* [V3] 64-bit physical address of DSDT */ - struct acpi_generic_address xpm1a_event_block; /* [V3] 64-bit Extended Power Mgt 1a Event Reg Blk address */ - struct acpi_generic_address xpm1b_event_block; /* [V3] 64-bit Extended Power Mgt 1b Event Reg Blk address */ - struct acpi_generic_address xpm1a_control_block; /* [V3] 64-bit Extended Power Mgt 1a Control Reg Blk address */ - struct acpi_generic_address xpm1b_control_block; /* [V3] 64-bit Extended Power Mgt 1b Control Reg Blk address */ - struct acpi_generic_address xpm2_control_block; /* [V3] 64-bit Extended Power Mgt 2 Control Reg Blk address */ - struct acpi_generic_address xpm_timer_block; /* [V3] 64-bit Extended Power Mgt Timer Ctrl Reg Blk address */ - struct acpi_generic_address xgpe0_block; /* [V3] 64-bit Extended General Purpose Event 0 Reg Blk address */ - struct acpi_generic_address xgpe1_block; /* [V3] 64-bit Extended General Purpose Event 1 Reg Blk address */ - /* End of Version 3 FADT fields (ACPI 2.0) */ - - struct acpi_generic_address sleep_control; /* [V4] 64-bit Sleep Control register (ACPI 5.0) */ - /* End of Version 4 FADT fields (ACPI 3.0 and ACPI 4.0) (Field was originally reserved in ACPI 3.0) */ - - struct acpi_generic_address sleep_status; /* [V5] 64-bit Sleep Status register (ACPI 5.0) */ - /* End of Version 5 FADT fields (ACPI 5.0) */ - - u64 hypervisor_id; /* [V6] Hypervisor Vendor ID (ACPI 6.0) */ - /* End of Version 6 FADT fields (ACPI 6.0) */ - + struct acpi_table_header header; /* Common ACPI table header */ + u32 facs; /* 32-bit physical address of FACS */ + u32 dsdt; /* 32-bit physical address of DSDT */ + u8 model; /* System Interrupt Model (ACPI 1.0) - not used in ACPI 2.0+ */ + u8 preferred_profile; /* Conveys preferred power management profile to OSPM. */ + u16 sci_interrupt; /* System vector of SCI interrupt */ + u32 smi_command; /* 32-bit Port address of SMI command port */ + u8 acpi_enable; /* Value to write to SMI_CMD to enable ACPI */ + u8 acpi_disable; /* Value to write to SMI_CMD to disable ACPI */ + u8 s4_bios_request; /* Value to write to SMI_CMD to enter S4BIOS state */ + u8 pstate_control; /* Processor performance state control */ + u32 pm1a_event_block; /* 32-bit port address of Power Mgt 1a Event Reg Blk */ + u32 pm1b_event_block; /* 32-bit port address of Power Mgt 1b Event Reg Blk */ + u32 pm1a_control_block; /* 32-bit port address of Power Mgt 1a Control Reg Blk */ + u32 pm1b_control_block; /* 32-bit port address of Power Mgt 1b Control Reg Blk */ + u32 pm2_control_block; /* 32-bit port address of Power Mgt 2 Control Reg Blk */ + u32 pm_timer_block; /* 32-bit port address of Power Mgt Timer Ctrl Reg Blk */ + u32 gpe0_block; /* 32-bit port address of General Purpose Event 0 Reg Blk */ + u32 gpe1_block; /* 32-bit port address of General Purpose Event 1 Reg Blk */ + u8 pm1_event_length; /* Byte Length of ports at pm1x_event_block */ + u8 pm1_control_length; /* Byte Length of ports at pm1x_control_block */ + u8 pm2_control_length; /* Byte Length of ports at pm2_control_block */ + u8 pm_timer_length; /* Byte Length of ports at pm_timer_block */ + u8 gpe0_block_length; /* Byte Length of ports at gpe0_block */ + u8 gpe1_block_length; /* Byte Length of ports at gpe1_block */ + u8 gpe1_base; /* Offset in GPE number space where GPE1 events start */ + u8 cst_control; /* Support for the _CST object and C-States change notification */ + u16 c2_latency; /* Worst case HW latency to enter/exit C2 state */ + u16 c3_latency; /* Worst case HW latency to enter/exit C3 state */ + u16 flush_size; /* Processor memory cache line width, in bytes */ + u16 flush_stride; /* Number of flush strides that need to be read */ + u8 duty_offset; /* Processor duty cycle index in processor P_CNT reg */ + u8 duty_width; /* Processor duty cycle value bit width in P_CNT register */ + u8 day_alarm; /* Index to day-of-month alarm in RTC CMOS RAM */ + u8 month_alarm; /* Index to month-of-year alarm in RTC CMOS RAM */ + u8 century; /* Index to century in RTC CMOS RAM */ + u16 boot_flags; /* IA-PC Boot Architecture Flags (see below for individual flags) */ + u8 reserved; /* Reserved, must be zero */ + u32 flags; /* Miscellaneous flag bits (see below for individual flags) */ + struct acpi_generic_address reset_register; /* 64-bit address of the Reset register */ + u8 reset_value; /* Value to write to the reset_register port to reset the system */ + u16 arm_boot_flags; /* ARM-Specific Boot Flags (see below for individual flags) (ACPI 5.1) */ + u8 minor_revision; /* FADT Minor Revision (ACPI 5.1) */ + u64 Xfacs; /* 64-bit physical address of FACS */ + u64 Xdsdt; /* 64-bit physical address of DSDT */ + struct acpi_generic_address xpm1a_event_block; /* 64-bit Extended Power Mgt 1a Event Reg Blk address */ + struct acpi_generic_address xpm1b_event_block; /* 64-bit Extended Power Mgt 1b Event Reg Blk address */ + struct acpi_generic_address xpm1a_control_block; /* 64-bit Extended Power Mgt 1a Control Reg Blk address */ + struct acpi_generic_address xpm1b_control_block; /* 64-bit Extended Power Mgt 1b Control Reg Blk address */ + struct acpi_generic_address xpm2_control_block; /* 64-bit Extended Power Mgt 2 Control Reg Blk address */ + struct acpi_generic_address xpm_timer_block; /* 64-bit Extended Power Mgt Timer Ctrl Reg Blk address */ + struct acpi_generic_address xgpe0_block; /* 64-bit Extended General Purpose Event 0 Reg Blk address */ + struct acpi_generic_address xgpe1_block; /* 64-bit Extended General Purpose Event 1 Reg Blk address */ + struct acpi_generic_address sleep_control; /* 64-bit Sleep Control register (ACPI 5.0) */ + struct acpi_generic_address sleep_status; /* 64-bit Sleep Status register (ACPI 5.0) */ + u64 hypervisor_id; /* Hypervisor Vendor ID (ACPI 6.0) */ }; /* Masks for FADT IA-PC Boot Architecture Flags (boot_flags) [Vx]=Introduced in this FADT revision */ @@ -311,8 +301,8 @@ struct acpi_table_fadt { /* Masks for FADT ARM Boot Architecture Flags (arm_boot_flags) ACPI 5.1 */ -#define ACPI_FADT_PSCI_COMPLIANT (1) /* 00: [V5] PSCI 0.2+ is implemented */ -#define ACPI_FADT_PSCI_USE_HVC (1<<1) /* 01: [V5] HVC must be used instead of SMC as the PSCI conduit */ +#define ACPI_FADT_PSCI_COMPLIANT (1) /* 00: [V5+] PSCI 0.2+ is implemented */ +#define ACPI_FADT_PSCI_USE_HVC (1<<1) /* 01: [V5+] HVC must be used instead of SMC as the PSCI conduit */ /* Masks for FADT flags */ @@ -409,34 +399,20 @@ struct acpi_table_desc { * match the expected length. In other words, the length of the * FADT is the bottom line as to what the version really is. * - * NOTE: There is no officialy released V2 of the FADT. This - * version was used only for prototyping and testing during the - * 32-bit to 64-bit transition. V3 was the first official 64-bit - * version of the FADT. - * - * Update this list of defines when a new version of the FADT is - * added to the ACPI specification. Note that the FADT version is - * only incremented when new fields are appended to the existing - * version. Therefore, the FADT version is competely independent - * from the version of the ACPI specification where it is - * defined. - * - * For reference, the various FADT lengths are as follows: - * FADT V1 size: 0x074 ACPI 1.0 - * FADT V3 size: 0x0F4 ACPI 2.0 - * FADT V4 size: 0x100 ACPI 3.0 and ACPI 4.0 - * FADT V5 size: 0x10C ACPI 5.0 - * FADT V6 size: 0x114 ACPI 6.0 + * For reference, the values below are as follows: + * FADT V1 size: 0x074 + * FADT V2 size: 0x084 + * FADT V3 size: 0x0F4 + * FADT V4 size: 0x0F4 + * FADT V5 size: 0x10C + * FADT V6 size: 0x114 */ -#define ACPI_FADT_V1_SIZE (u32) (ACPI_FADT_OFFSET (flags) + 4) /* ACPI 1.0 */ -#define ACPI_FADT_V3_SIZE (u32) (ACPI_FADT_OFFSET (sleep_control)) /* ACPI 2.0 */ -#define ACPI_FADT_V4_SIZE (u32) (ACPI_FADT_OFFSET (sleep_status)) /* ACPI 3.0 and ACPI 4.0 */ -#define ACPI_FADT_V5_SIZE (u32) (ACPI_FADT_OFFSET (hypervisor_id)) /* ACPI 5.0 */ -#define ACPI_FADT_V6_SIZE (u32) (sizeof (struct acpi_table_fadt)) /* ACPI 6.0 */ - -/* Update these when new FADT versions are added */ +#define ACPI_FADT_V1_SIZE (u32) (ACPI_FADT_OFFSET (flags) + 4) +#define ACPI_FADT_V2_SIZE (u32) (ACPI_FADT_OFFSET (minor_revision) + 1) +#define ACPI_FADT_V3_SIZE (u32) (ACPI_FADT_OFFSET (sleep_control)) +#define ACPI_FADT_V5_SIZE (u32) (ACPI_FADT_OFFSET (hypervisor_id)) +#define ACPI_FADT_V6_SIZE (u32) (sizeof (struct acpi_table_fadt)) -#define ACPI_FADT_MAX_VERSION 6 #define ACPI_FADT_CONFORMANCE "ACPI 6.1 (FADT version 6)" #endif /* __ACTBL_H__ */ -- cgit v1.2.3-59-g8ed1b From 709fb1f961ea5c287107c3f903e81c9529224c8b Mon Sep 17 00:00:00 2001 From: Max Filippov Date: Mon, 14 Nov 2016 12:31:49 -0800 Subject: xtensa: wire up new pkey_{mprotect,alloc,free} syscalls Signed-off-by: Max Filippov --- arch/xtensa/include/uapi/asm/unistd.h | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/arch/xtensa/include/uapi/asm/unistd.h b/arch/xtensa/include/uapi/asm/unistd.h index de9b14b2d348..cd400af4a6b2 100644 --- a/arch/xtensa/include/uapi/asm/unistd.h +++ b/arch/xtensa/include/uapi/asm/unistd.h @@ -767,7 +767,14 @@ __SYSCALL(346, sys_preadv2, 6) #define __NR_pwritev2 347 __SYSCALL(347, sys_pwritev2, 6) -#define __NR_syscall_count 348 +#define __NR_pkey_mprotect 348 +__SYSCALL(348, sys_pkey_mprotect, 4) +#define __NR_pkey_alloc 349 +__SYSCALL(349, sys_pkey_alloc, 2) +#define __NR_pkey_free 350 +__SYSCALL(350, sys_pkey_free, 1) + +#define __NR_syscall_count 351 /* * sysxtensa syscall handler -- cgit v1.2.3-59-g8ed1b From 1bc2f5fac34535aeb3878ce32a762a221be7a851 Mon Sep 17 00:00:00 2001 From: "H. Nikolaus Schaller" Date: Mon, 14 Nov 2016 12:55:15 +0100 Subject: ARM: dts: omap5: board-common: fix wrong SMPS6 (VDD-DDR3) voltage DDR3L is usually specified as JEDEC standard 1.35V(1.28V~1.45V) & 1.5V(1.425V~1.575V) Therefore setting smps6 regulator to 1.2V is definitively below minimum. It appears that real world chips are more forgiving than data sheets indicate, but let's set the regulator right. Note: a board that uses other voltages (DDR with 1.5V) can overwrite by referencing &smps6_reg. Signed-off-by: H. Nikolaus Schaller Signed-off-by: Tony Lindgren --- arch/arm/boot/dts/omap5-board-common.dtsi | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/arch/arm/boot/dts/omap5-board-common.dtsi b/arch/arm/boot/dts/omap5-board-common.dtsi index 60a33c4b7b82..4caadb253249 100644 --- a/arch/arm/boot/dts/omap5-board-common.dtsi +++ b/arch/arm/boot/dts/omap5-board-common.dtsi @@ -476,8 +476,8 @@ smps6_reg: smps6 { /* VDD_DDR3 - over VDD_SMPS6 */ regulator-name = "smps6"; - regulator-min-microvolt = <1200000>; - regulator-max-microvolt = <1200000>; + regulator-min-microvolt = <1350000>; + regulator-max-microvolt = <1350000>; regulator-always-on; regulator-boot-on; }; -- cgit v1.2.3-59-g8ed1b From 6ff1a25318ebf688ef9593fe09cd449f6fb4ad31 Mon Sep 17 00:00:00 2001 From: Takashi Iwai Date: Mon, 14 Nov 2016 21:46:47 +0100 Subject: ALSA: usb-audio: Fix use-after-free of usb_device at disconnect The usb-audio driver implements the deferred device disconnection for the device in use. In this mode, the disconnection callback returns immediately while the actual ALSA card object removal happens later when all files get closed. As Shuah reported, this code flow, however, leads to a use-after-free, detected by KASAN: BUG: KASAN: use-after-free in snd_usb_audio_free+0x134/0x160 [snd_usb_audio] at addr ffff8801c863ce10 Write of size 8 by task pulseaudio/2244 Call Trace: [] dump_stack+0x67/0x94 [] kasan_object_err+0x21/0x70 [] kasan_report_error+0x1fa/0x4e0 [] ? kasan_slab_free+0x87/0xb0 [] __asan_report_store8_noabort+0x43/0x50 [] ? snd_usb_audio_free+0x134/0x160 [snd_usb_audio] [] snd_usb_audio_free+0x134/0x160 [snd_usb_audio] [] snd_usb_audio_dev_free+0x31/0x40 [snd_usb_audio] [] __snd_device_free+0x12a/0x210 [] snd_device_free_all+0x85/0xd0 [] release_card_device+0x34/0x130 [] device_release+0x76/0x1e0 [] kobject_release+0x107/0x370 ..... Object at ffff8801c863cc80, in cache kmalloc-2048 size: 2048 Allocated: [] save_stack_trace+0x2b/0x50 [] save_stack+0x46/0xd0 [] kasan_kmalloc+0xad/0xe0 [] kmem_cache_alloc_trace+0xfa/0x240 [] usb_alloc_dev+0x57/0xc90 [] hub_event+0xf1d/0x35f0 .... Freed: [] save_stack_trace+0x2b/0x50 [] save_stack+0x46/0xd0 [] kasan_slab_free+0x71/0xb0 [] kfree+0xd9/0x280 [] usb_release_dev+0xde/0x110 [] device_release+0x76/0x1e0 .... It's the code trying to clear drvdata of the assigned usb_device where the usb_device itself was already released in usb_release_dev() after the disconnect callback. This patch fixes it by checking whether the code path is via the disconnect callback, i.e. chip->shutdown flag is set. Fixes: 79289e24194a ('ALSA: usb-audio: Refer to chip->usb_id for quirks...') Reported-and-tested-by: Shuah Khan Cc: # v4.6+ Signed-off-by: Takashi Iwai --- sound/usb/card.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/sound/usb/card.c b/sound/usb/card.c index 9e5276d6dda0..2ddc034673a8 100644 --- a/sound/usb/card.c +++ b/sound/usb/card.c @@ -315,7 +315,8 @@ static int snd_usb_audio_free(struct snd_usb_audio *chip) snd_usb_endpoint_free(ep); mutex_destroy(&chip->mutex); - dev_set_drvdata(&chip->dev->dev, NULL); + if (!atomic_read(&chip->shutdown)) + dev_set_drvdata(&chip->dev->dev, NULL); kfree(chip); return 0; } -- cgit v1.2.3-59-g8ed1b From 6ca595a70bc46e1a0eea3ee0681360f41555bfd9 Mon Sep 17 00:00:00 2001 From: Hoan Tran Date: Mon, 14 Nov 2016 11:19:02 -0800 Subject: mailbox: PCC: Fix lockdep warning when request PCC channel This patch fixes the lockdep warning below DEBUG_LOCKS_WARN_ON(irqs_disabled_flags(flags)) ------------[ cut here ]------------ WARNING: CPU: 1 PID: 1 at linux-next/kernel/locking/lockdep.c:2876 lockdep_trace_alloc+0xe0/0xf0 Modules linked in: CPU: 1 PID: 1 Comm: swapper/0 Not tainted 4.8.0-11756-g86c5152 #46 ... Call trace: Exception stack(0xffff8007da837890 to 0xffff8007da8379c0) 7880: ffff8007da834000 0001000000000000 78a0: ffff8007da837a70 ffff0000081111a0 00000000600000c5 000000000000003d 78c0: 9374bc6a7f3c7832 0000000000381878 ffff000009db7ab8 000000000000002f 78e0: ffff00000811aabc ffff000008be2548 ffff8007da837990 ffff00000811adf8 7900: ffff8007da834000 00000000024080c0 00000000000000c0 ffff000009021000 7920: 0000000000000000 0000000000000000 ffff000008c8f7c8 ffff8007da579810 7940: 000000000000002f ffff8007da858000 0000000000000000 0000000000000001 7960: 0000000000000001 0000000000000000 ffff00000811a468 0000000000000002 7980: 656c62617369645f 0000000000038187 00000000000000ee ffff8007da837850 79a0: ffff000009db50c0 ffff000009db569d 0000000000000006 ffff000089db568f [] lockdep_trace_alloc+0xe0/0xf0 [] __kmalloc_track_caller+0x50/0x250 [] devres_alloc_node+0x28/0x60 [] devm_request_threaded_irq+0x50/0xe0 [] pcc_mbox_request_channel+0x110/0x170 [] acpi_cppc_processor_probe+0x264/0x414 [] __acpi_processor_start+0x28/0xa0 [] acpi_processor_start+0x44/0x54 [] driver_probe_device+0x1fc/0x2b0 [] __driver_attach+0xb4/0xc0 [] bus_for_each_dev+0x5c/0xa0 [] driver_attach+0x20/0x30 [] bus_add_driver+0x110/0x230 [] driver_register+0x60/0x100 [] acpi_processor_driver_init+0x2c/0xb0 [] do_one_initcall+0x38/0x130 [] kernel_init_freeable+0x210/0x2b4 [] kernel_init+0x10/0x110 [] ret_from_fork+0x10/0x50 It's because the spinlock inside pcc_mbox_request_channel() is kept too long. This patch releases spinlock before request_irq() and free_irq() to fix this issue as spinlock is only needed to protect the channel data. Signed-off-by: Hoan Tran Reviewed-by: Prashanth Prakash Signed-off-by: Rafael J. Wysocki --- drivers/mailbox/pcc.c | 13 +++++++------ 1 file changed, 7 insertions(+), 6 deletions(-) diff --git a/drivers/mailbox/pcc.c b/drivers/mailbox/pcc.c index 08c87fadca8c..1f32688c312d 100644 --- a/drivers/mailbox/pcc.c +++ b/drivers/mailbox/pcc.c @@ -65,6 +65,7 @@ #include #include #include +#include #include "mailbox.h" @@ -267,6 +268,8 @@ struct mbox_chan *pcc_mbox_request_channel(struct mbox_client *cl, if (chan->txdone_method == TXDONE_BY_POLL && cl->knows_txdone) chan->txdone_method |= TXDONE_BY_ACK; + spin_unlock_irqrestore(&chan->lock, flags); + if (pcc_doorbell_irq[subspace_id] > 0) { int rc; @@ -275,12 +278,11 @@ struct mbox_chan *pcc_mbox_request_channel(struct mbox_client *cl, if (unlikely(rc)) { dev_err(dev, "failed to register PCC interrupt %d\n", pcc_doorbell_irq[subspace_id]); + pcc_mbox_free_channel(chan); chan = ERR_PTR(rc); } } - spin_unlock_irqrestore(&chan->lock, flags); - return chan; } EXPORT_SYMBOL_GPL(pcc_mbox_request_channel); @@ -304,20 +306,19 @@ void pcc_mbox_free_channel(struct mbox_chan *chan) return; } + if (pcc_doorbell_irq[id] > 0) + devm_free_irq(chan->mbox->dev, pcc_doorbell_irq[id], chan); + spin_lock_irqsave(&chan->lock, flags); chan->cl = NULL; chan->active_req = NULL; if (chan->txdone_method == (TXDONE_BY_POLL | TXDONE_BY_ACK)) chan->txdone_method = TXDONE_BY_POLL; - if (pcc_doorbell_irq[id] > 0) - devm_free_irq(chan->mbox->dev, pcc_doorbell_irq[id], chan); - spin_unlock_irqrestore(&chan->lock, flags); } EXPORT_SYMBOL_GPL(pcc_mbox_free_channel); - /** * pcc_send_data - Called from Mailbox Controller code. Used * here only to ring the channel doorbell. The PCC client -- cgit v1.2.3-59-g8ed1b From 977c1f9c8c022d0173181766b34a0db3705265a4 Mon Sep 17 00:00:00 2001 From: Alexei Starovoitov Date: Mon, 7 Nov 2016 15:14:20 -0800 Subject: ftrace: Ignore FTRACE_FL_DISABLED while walking dyn_ftrace records ftrace_shutdown() checks for sanity of ftrace records and if dyn_ftrace->flags is not zero, it will warn. It can happen that 'flags' are set to FTRACE_FL_DISABLED at this point, since some module was loaded, but before ftrace_module_enable() cleared the flags for this module. In other words the module.c is doing: ftrace_module_init(mod); // calls ftrace_update_code() that sets flags=FTRACE_FL_DISABLED ... // here ftrace_shutdown() is called that warns, since err = prepare_coming_module(mod); // didn't have a chance to clear FTRACE_FL_DISABLED Fix it by ignoring disabled records. It's similar to what __ftrace_hash_rec_update() is already doing. Link: http://lkml.kernel.org/r/1478560460-3818619-1-git-send-email-ast@fb.com Cc: stable@vger.kernel.org Fixes: b7ffffbb46f2 "ftrace: Add infrastructure for delayed enabling of module functions" Signed-off-by: Alexei Starovoitov Signed-off-by: Steven Rostedt --- kernel/trace/ftrace.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/kernel/trace/ftrace.c b/kernel/trace/ftrace.c index 2050a7652a86..326498baab83 100644 --- a/kernel/trace/ftrace.c +++ b/kernel/trace/ftrace.c @@ -2763,7 +2763,7 @@ static int ftrace_shutdown(struct ftrace_ops *ops, int command) struct dyn_ftrace *rec; do_for_each_ftrace_rec(pg, rec) { - if (FTRACE_WARN_ON_ONCE(rec->flags)) + if (FTRACE_WARN_ON_ONCE(rec->flags & ~FTRACE_FL_DISABLED)) pr_warn(" %pS flags:%lx\n", (void *)rec->ip, rec->flags); } while_for_each_ftrace_rec(); -- cgit v1.2.3-59-g8ed1b From 546fece4eae871f033925ccf0ff2b740725ae915 Mon Sep 17 00:00:00 2001 From: "Steven Rostedt (Red Hat)" Date: Mon, 14 Nov 2016 16:31:49 -0500 Subject: ftrace: Add more checks for FTRACE_FL_DISABLED in processing ip records When a module is first loaded and its function ip records are added to the ftrace list of functions to modify, they are set to DISABLED, as their text is still in a read only state. When the module is fully loaded, and can be updated, the flag is cleared, and if their's any functions that should be tracing them, it is updated at that moment. But there's several locations that do record accounting and should ignore records that are marked as disabled, or they can cause issues. Alexei already fixed one location, but others need to be addressed. Cc: stable@vger.kernel.org Fixes: b7ffffbb46f2 "ftrace: Add infrastructure for delayed enabling of module functions" Reported-by: Alexei Starovoitov Signed-off-by: Steven Rostedt --- kernel/trace/ftrace.c | 22 ++++++++++++++++++++++ 1 file changed, 22 insertions(+) diff --git a/kernel/trace/ftrace.c b/kernel/trace/ftrace.c index 326498baab83..da87b3cba5b3 100644 --- a/kernel/trace/ftrace.c +++ b/kernel/trace/ftrace.c @@ -1862,6 +1862,10 @@ static int __ftrace_hash_update_ipmodify(struct ftrace_ops *ops, /* Update rec->flags */ do_for_each_ftrace_rec(pg, rec) { + + if (rec->flags & FTRACE_FL_DISABLED) + continue; + /* We need to update only differences of filter_hash */ in_old = !!ftrace_lookup_ip(old_hash, rec->ip); in_new = !!ftrace_lookup_ip(new_hash, rec->ip); @@ -1884,6 +1888,10 @@ rollback: /* Roll back what we did above */ do_for_each_ftrace_rec(pg, rec) { + + if (rec->flags & FTRACE_FL_DISABLED) + continue; + if (rec == end) goto err_out; @@ -2397,6 +2405,10 @@ void __weak ftrace_replace_code(int enable) return; do_for_each_ftrace_rec(pg, rec) { + + if (rec->flags & FTRACE_FL_DISABLED) + continue; + failed = __ftrace_replace_code(rec, enable); if (failed) { ftrace_bug(failed, rec); @@ -3598,6 +3610,10 @@ match_records(struct ftrace_hash *hash, char *func, int len, char *mod) goto out_unlock; do_for_each_ftrace_rec(pg, rec) { + + if (rec->flags & FTRACE_FL_DISABLED) + continue; + if (ftrace_match_record(rec, &func_g, mod_match, exclude_mod)) { ret = enter_record(hash, rec, clear_filter); if (ret < 0) { @@ -3793,6 +3809,9 @@ register_ftrace_function_probe(char *glob, struct ftrace_probe_ops *ops, do_for_each_ftrace_rec(pg, rec) { + if (rec->flags & FTRACE_FL_DISABLED) + continue; + if (!ftrace_match_record(rec, &func_g, NULL, 0)) continue; @@ -4685,6 +4704,9 @@ ftrace_set_func(unsigned long *array, int *idx, int size, char *buffer) do_for_each_ftrace_rec(pg, rec) { + if (rec->flags & FTRACE_FL_DISABLED) + continue; + if (ftrace_match_record(rec, &func_g, NULL, 0)) { /* if it is in the array */ exists = false; -- cgit v1.2.3-59-g8ed1b From 60f8339eb388df8a46f8eb4282ff0e15f08f218c Mon Sep 17 00:00:00 2001 From: Linus Walleij Date: Sat, 12 Nov 2016 15:01:09 +0100 Subject: gpio: do not double-check direction on sleeping chips When locking a GPIO line as IRQ, we go to lengths to double-check that the line is really set as input before marking it as used for IRQ. This is not good on GPIO chips that can sleep, because this function is called in IRQ-safe context. Just skip this if it can't be checked quickly. Currently this happens on sleeping expanders such as STMPE or TC3589x: BUG: scheduling while atomic: swapper/1/0x00000002 Modules linked in: CPU: 0 PID: 1 Comm: swapper Not tainted 4.9.0-rc1+ #38 Hardware name: Nomadik STn8815 [] (unwind_backtrace) from [] (show_stack+0x10/0x14) [] (show_stack) from [] (__schedule_bug+0x54/0x80) [] (__schedule_bug) from [] (__schedule+0x3a0/0x460) [] (__schedule) from [] (schedule+0x54/0xb8) (...) This patch fixes that problem and relies on the direction read from the chip when it was added. Cc: stable@vger.kernel.org Fixes: 9c10280d85c1 ("gpio: flush direction status in gpiochip_lock_as_irq()") Cc: Patrice Chotard Signed-off-by: Linus Walleij --- drivers/gpio/gpiolib.c | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/drivers/gpio/gpiolib.c b/drivers/gpio/gpiolib.c index 93ed0e00c578..868128a676ba 100644 --- a/drivers/gpio/gpiolib.c +++ b/drivers/gpio/gpiolib.c @@ -2737,8 +2737,11 @@ int gpiochip_lock_as_irq(struct gpio_chip *chip, unsigned int offset) if (IS_ERR(desc)) return PTR_ERR(desc); - /* Flush direction if something changed behind our back */ - if (chip->get_direction) { + /* + * If it's fast: flush the direction setting if something changed + * behind our back + */ + if (!chip->can_sleep && chip->get_direction) { int dir = chip->get_direction(chip, offset); if (dir) -- cgit v1.2.3-59-g8ed1b From 220a04f0e53276eb3da666174bcf97489fd8644e Mon Sep 17 00:00:00 2001 From: Linus Walleij Date: Mon, 14 Nov 2016 15:10:29 +0100 Subject: gpio: tc3589x: fix up .get_direction() The bit in the TC3589x direction register is 0 for input and 1 for output, but the gpiolib expects the reverse. Fix up the logic. Cc: stable@vger.kernel.org Fixes: 14063d71e5e6 ("gpio: tc3589x: add .get_direction() and small cleanup") Signed-off-by: Linus Walleij --- drivers/gpio/gpio-tc3589x.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpio/gpio-tc3589x.c b/drivers/gpio/gpio-tc3589x.c index 5a5a6cb00eea..d6e21f1a70a9 100644 --- a/drivers/gpio/gpio-tc3589x.c +++ b/drivers/gpio/gpio-tc3589x.c @@ -97,7 +97,7 @@ static int tc3589x_gpio_get_direction(struct gpio_chip *chip, if (ret < 0) return ret; - return !!(ret & BIT(pos)); + return !(ret & BIT(pos)); } static int tc3589x_gpio_set_single_ended(struct gpio_chip *chip, -- cgit v1.2.3-59-g8ed1b From c0a36013639b06760f7c2c21a8387eac855432e1 Mon Sep 17 00:00:00 2001 From: Benjamin Herrenschmidt Date: Tue, 15 Nov 2016 15:28:33 +1100 Subject: powerpc/64: Fix setting of AIL in hypervisor mode Commit d3cbff1b5 "powerpc: Put exception configuration in a common place" broke the setting of the AIL bit (which enables taking exceptions with the MMU still on) on all processors, moving it incorrectly to a function called only on the boot CPU. This was correct for the guest case but not when running in hypervisor mode. This fixes it by partially reverting that commit, putting the setting back in cpu_ready_for_interrupts() Fixes: d3cbff1b5a90 ("powerpc: Put exception configuration in a common place") Cc: stable@vger.kernel.org # v4.8+ Signed-off-by: Benjamin Herrenschmidt Signed-off-by: Michael Ellerman --- arch/powerpc/kernel/setup_64.c | 20 ++++++++++++++------ 1 file changed, 14 insertions(+), 6 deletions(-) diff --git a/arch/powerpc/kernel/setup_64.c b/arch/powerpc/kernel/setup_64.c index 7ac8e6eaab5b..8d586cff8a41 100644 --- a/arch/powerpc/kernel/setup_64.c +++ b/arch/powerpc/kernel/setup_64.c @@ -226,17 +226,25 @@ static void __init configure_exceptions(void) if (firmware_has_feature(FW_FEATURE_OPAL)) opal_configure_cores(); - /* Enable AIL if supported, and we are in hypervisor mode */ - if (early_cpu_has_feature(CPU_FTR_HVMODE) && - early_cpu_has_feature(CPU_FTR_ARCH_207S)) { - unsigned long lpcr = mfspr(SPRN_LPCR); - mtspr(SPRN_LPCR, lpcr | LPCR_AIL_3); - } + /* AIL on native is done in cpu_ready_for_interrupts() */ } } static void cpu_ready_for_interrupts(void) { + /* + * Enable AIL if supported, and we are in hypervisor mode. This + * is called once for every processor. + * + * If we are not in hypervisor mode the job is done once for + * the whole partition in configure_exceptions(). + */ + if (early_cpu_has_feature(CPU_FTR_HVMODE) && + early_cpu_has_feature(CPU_FTR_ARCH_207S)) { + unsigned long lpcr = mfspr(SPRN_LPCR); + mtspr(SPRN_LPCR, lpcr | LPCR_AIL_3); + } + /* Set IR and DR in PACA MSR */ get_paca()->kernel_msr = MSR_KERNEL; } -- cgit v1.2.3-59-g8ed1b From 5bf7b6e86f29f064979d7b3e6dd21c5dd1feb855 Mon Sep 17 00:00:00 2001 From: Loic Pallardy Date: Tue, 15 Nov 2016 09:47:00 +0100 Subject: ARM: dts: STiH410-b2260: Fix typo in spi0 chipselect definition Change cs-gpio to cs-gpios. Signed-off-by: Loic Pallardy Acked-by: Patrice Chotard --- arch/arm/boot/dts/stih410-b2260.dts | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/arm/boot/dts/stih410-b2260.dts b/arch/arm/boot/dts/stih410-b2260.dts index ef2ff2f518f6..7fb507fcba7e 100644 --- a/arch/arm/boot/dts/stih410-b2260.dts +++ b/arch/arm/boot/dts/stih410-b2260.dts @@ -74,7 +74,7 @@ /* Low speed expansion connector */ spi0: spi@9844000 { label = "LS-SPI0"; - cs-gpio = <&pio30 3 0>; + cs-gpios = <&pio30 3 0>; status = "okay"; }; -- cgit v1.2.3-59-g8ed1b From 59c3b76cc61d1d676f965c192cc7969aa5cb2744 Mon Sep 17 00:00:00 2001 From: Miklos Szeredi Date: Thu, 18 Aug 2016 09:10:44 +0200 Subject: fuse: fix fuse_write_end() if zero bytes were copied If pos is at the beginning of a page and copied is zero then page is not zeroed but is marked uptodate. Fix by skipping everything except unlock/put of page if zero bytes were copied. Reported-by: Al Viro Fixes: 6b12c1b37e55 ("fuse: Implement write_begin/write_end callbacks") Cc: # v3.15+ Signed-off-by: Miklos Szeredi --- fs/fuse/file.c | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/fs/fuse/file.c b/fs/fuse/file.c index abc66a6237fd..2401c5dabb2a 100644 --- a/fs/fuse/file.c +++ b/fs/fuse/file.c @@ -1985,6 +1985,10 @@ static int fuse_write_end(struct file *file, struct address_space *mapping, { struct inode *inode = page->mapping->host; + /* Haven't copied anything? Skip zeroing, size extending, dirtying. */ + if (!copied) + goto unlock; + if (!PageUptodate(page)) { /* Zero any unwritten bytes at the end of the page */ size_t endoff = (pos + copied) & ~PAGE_MASK; @@ -1995,6 +1999,8 @@ static int fuse_write_end(struct file *file, struct address_space *mapping, fuse_write_update_size(inode, pos + copied); set_page_dirty(page); + +unlock: unlock_page(page); put_page(page); -- cgit v1.2.3-59-g8ed1b From c8eaf3479e521e973eb2d4111b8ee8f5b7b564ab Mon Sep 17 00:00:00 2001 From: Filip Matusiak Date: Wed, 2 Nov 2016 10:04:26 +0100 Subject: mac80211: Ignore VHT IE from peer with wrong rx_mcs_map This is a workaround for VHT-enabled STAs which break the spec and have the VHT-MCS Rx map filled in with value 3 for all eight spacial streams, an example is AR9462 in AP mode. As per spec, in section 22.1.1 Introduction to the VHT PHY A VHT STA shall support at least single spactial stream VHT-MCSs 0 to 7 (transmit and receive) in all supported channel widths. Some devices in STA mode will get firmware assert when trying to associate, examples are QCA9377 & QCA6174. Packet example of broken VHT Cap IE of AR9462: Tag: VHT Capabilities (IEEE Std 802.11ac/D3.1) Tag Number: VHT Capabilities (IEEE Std 802.11ac/D3.1) (191) Tag length: 12 VHT Capabilities Info: 0x00000000 VHT Supported MCS Set Rx MCS Map: 0xffff .... .... .... ..11 = Rx 1 SS: Not Supported (0x0003) .... .... .... 11.. = Rx 2 SS: Not Supported (0x0003) .... .... ..11 .... = Rx 3 SS: Not Supported (0x0003) .... .... 11.. .... = Rx 4 SS: Not Supported (0x0003) .... ..11 .... .... = Rx 5 SS: Not Supported (0x0003) .... 11.. .... .... = Rx 6 SS: Not Supported (0x0003) ..11 .... .... .... = Rx 7 SS: Not Supported (0x0003) 11.. .... .... .... = Rx 8 SS: Not Supported (0x0003) ...0 0000 0000 0000 = Rx Highest Long GI Data Rate (in Mb/s, 0 = subfield not in use): 0x0000 Tx MCS Map: 0xffff ...0 0000 0000 0000 = Tx Highest Long GI Data Rate (in Mb/s, 0 = subfield not in use): 0x0000 Signed-off-by: Filip Matusiak Signed-off-by: Johannes Berg --- net/mac80211/vht.c | 16 ++++++++++++++++ 1 file changed, 16 insertions(+) diff --git a/net/mac80211/vht.c b/net/mac80211/vht.c index ee715764a828..6832bf6ab69f 100644 --- a/net/mac80211/vht.c +++ b/net/mac80211/vht.c @@ -270,6 +270,22 @@ ieee80211_vht_cap_ie_to_sta_vht_cap(struct ieee80211_sub_if_data *sdata, vht_cap->vht_mcs.tx_mcs_map |= cpu_to_le16(peer_tx << i * 2); } + /* + * This is a workaround for VHT-enabled STAs which break the spec + * and have the VHT-MCS Rx map filled in with value 3 for all eight + * spacial streams, an example is AR9462. + * + * As per spec, in section 22.1.1 Introduction to the VHT PHY + * A VHT STA shall support at least single spactial stream VHT-MCSs + * 0 to 7 (transmit and receive) in all supported channel widths. + */ + if (vht_cap->vht_mcs.rx_mcs_map == cpu_to_le16(0xFFFF)) { + vht_cap->vht_supported = false; + sdata_info(sdata, "Ignoring VHT IE from %pM due to invalid rx_mcs_map\n", + sta->addr); + return; + } + /* finally set up the bandwidth */ switch (vht_cap->cap & IEEE80211_VHT_CAP_SUPP_CHAN_WIDTH_MASK) { case IEEE80211_VHT_CAP_SUPP_CHAN_WIDTH_160MHZ: -- cgit v1.2.3-59-g8ed1b From 6c18a6b4e79953ba38bc110e1e42ac45a951b25f Mon Sep 17 00:00:00 2001 From: Felix Fietkau Date: Thu, 3 Nov 2016 12:12:47 +0100 Subject: Revert "mac80211: allow using AP_LINK_PS with mac80211-generated TIM IE" This reverts commit c68df2e7be0c1238ea3c281fd744a204ef3b15a0. __sta_info_recalc_tim turns into a no-op if local->ops->set_tim is not set. This prevents the beacon TIM bit from being set for all drivers that do not implement this op (almost all of them), thus thoroughly essential AP mode powersave functionality. Cc: Emmanuel Grumbach Fixes: c68df2e7be0c ("mac80211: allow using AP_LINK_PS with mac80211-generated TIM IE") Signed-off-by: Felix Fietkau Signed-off-by: Johannes Berg --- net/mac80211/sta_info.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/mac80211/sta_info.c b/net/mac80211/sta_info.c index 78e9ecbc96e6..8e05032689f0 100644 --- a/net/mac80211/sta_info.c +++ b/net/mac80211/sta_info.c @@ -688,7 +688,7 @@ static void __sta_info_recalc_tim(struct sta_info *sta, bool ignore_pending) } /* No need to do anything if the driver does all */ - if (!local->ops->set_tim) + if (ieee80211_hw_check(&local->hw, AP_LINK_PS)) return; if (sta->dead) -- cgit v1.2.3-59-g8ed1b From 8fdd136f2200e6b7237e7e48453f4a591d768e3e Mon Sep 17 00:00:00 2001 From: "Pedersen, Thomas" Date: Mon, 31 Oct 2016 11:28:40 -0700 Subject: cfg80211: add bitrate for 20MHz MCS 9 Some drivers (ath10k) report MCS 9 @ 20MHz, which technically isn't defined. To get more meaningful value than 0 out of this however, just extrapolate a bitrate from ratio of MCS 7 and 9 in channels where it is allowed. Signed-off-by: Thomas Pedersen [add a comment about it in the code] Signed-off-by: Johannes Berg --- net/wireless/util.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/net/wireless/util.c b/net/wireless/util.c index 5ea12afc7706..659b507b347d 100644 --- a/net/wireless/util.c +++ b/net/wireless/util.c @@ -1158,7 +1158,8 @@ static u32 cfg80211_calculate_bitrate_vht(struct rate_info *rate) 58500000, 65000000, 78000000, - 0, + /* not in the spec, but some devices use this: */ + 86500000, }, { 13500000, 27000000, -- cgit v1.2.3-59-g8ed1b From c1f4c9ede3c799da9f920c1df9ce524145781637 Mon Sep 17 00:00:00 2001 From: Felix Fietkau Date: Fri, 4 Nov 2016 10:27:52 +0100 Subject: mac80211: update A-MPDU flag on tx dequeue MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The sequence number counter is used to derive the starting sequence number. Since that counter is updated on tx dequeue, the A-MPDU flag needs to be up to date at the tme of dequeue as well. This patch prevents sending more A-MPDU frames after the session has been terminated and also ensures that aggregation starts right after the session has been established Fixes: bb42f2d13ffc ("mac80211: Move reorder-sensitive TX handlers to after TXQ dequeue") Signed-off-by: Felix Fietkau Acked-by: Toke Høiland-Jørgensen Signed-off-by: Johannes Berg --- net/mac80211/tx.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/net/mac80211/tx.c b/net/mac80211/tx.c index 1c56abc49627..d08a8492a846 100644 --- a/net/mac80211/tx.c +++ b/net/mac80211/tx.c @@ -3426,6 +3426,11 @@ begin: goto begin; } + if (test_bit(IEEE80211_TXQ_AMPDU, &txqi->flags)) + info->flags |= IEEE80211_TX_CTL_AMPDU; + else + info->flags &= ~IEEE80211_TX_CTL_AMPDU; + if (info->control.flags & IEEE80211_TX_CTRL_FAST_XMIT) { struct sta_info *sta = container_of(txq->sta, struct sta_info, sta); -- cgit v1.2.3-59-g8ed1b From fff712cbe38b6d4e211df9c22aabcfd9739c1c2a Mon Sep 17 00:00:00 2001 From: Felix Fietkau Date: Fri, 4 Nov 2016 10:27:53 +0100 Subject: mac80211: remove bogus skb vif assignment MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The call to ieee80211_txq_enqueue overwrites the vif pointer with the codel enqueue time, so setting it just before that call makes no sense. Signed-off-by: Felix Fietkau Acked-by: Toke Høiland-Jørgensen Signed-off-by: Johannes Berg --- net/mac80211/tx.c | 3 --- 1 file changed, 3 deletions(-) diff --git a/net/mac80211/tx.c b/net/mac80211/tx.c index d08a8492a846..fb73e86bdf41 100644 --- a/net/mac80211/tx.c +++ b/net/mac80211/tx.c @@ -1501,7 +1501,6 @@ static bool ieee80211_queue_skb(struct ieee80211_local *local, struct sta_info *sta, struct sk_buff *skb) { - struct ieee80211_tx_info *info = IEEE80211_SKB_CB(skb); struct fq *fq = &local->fq; struct ieee80211_vif *vif; struct txq_info *txqi; @@ -1526,8 +1525,6 @@ static bool ieee80211_queue_skb(struct ieee80211_local *local, if (!txqi) return false; - info->control.vif = vif; - spin_lock_bh(&fq->lock); ieee80211_txq_enqueue(local, txqi, skb); spin_unlock_bh(&fq->lock); -- cgit v1.2.3-59-g8ed1b From a786f96da0d657bf8bd56d8eebb3f31cc45605bb Mon Sep 17 00:00:00 2001 From: Felix Fietkau Date: Fri, 4 Nov 2016 10:27:54 +0100 Subject: mac80211: fix A-MSDU aggregation with fast-xmit + txq MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit A-MSDU aggregation alters the QoS header after a frame has been enqueued, so it needs to be ready before enqueue and not overwritten again afterwards Fixes: bb42f2d13ffc ("mac80211: Move reorder-sensitive TX handlers to after TXQ dequeue") Signed-off-by: Felix Fietkau Acked-by: Toke Høiland-Jørgensen Signed-off-by: Johannes Berg --- net/mac80211/tx.c | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/net/mac80211/tx.c b/net/mac80211/tx.c index fb73e86bdf41..bd5f4be89435 100644 --- a/net/mac80211/tx.c +++ b/net/mac80211/tx.c @@ -3210,7 +3210,6 @@ static void ieee80211_xmit_fast_finish(struct ieee80211_sub_if_data *sdata, if (hdr->frame_control & cpu_to_le16(IEEE80211_STYPE_QOS_DATA)) { tid = skb->priority & IEEE80211_QOS_CTL_TAG1D_MASK; - *ieee80211_get_qos_ctl(hdr) = tid; hdr->seq_ctrl = ieee80211_tx_next_seq(sta, tid); } else { info->flags |= IEEE80211_TX_CTL_ASSIGN_SEQ; @@ -3335,6 +3334,11 @@ static bool ieee80211_xmit_fast(struct ieee80211_sub_if_data *sdata, (tid_tx ? IEEE80211_TX_CTL_AMPDU : 0); info->control.flags = IEEE80211_TX_CTRL_FAST_XMIT; + if (hdr->frame_control & cpu_to_le16(IEEE80211_STYPE_QOS_DATA)) { + tid = skb->priority & IEEE80211_QOS_CTL_TAG1D_MASK; + *ieee80211_get_qos_ctl(hdr) = tid; + } + __skb_queue_head_init(&tx.skbs); tx.flags = IEEE80211_TX_UNICAST; -- cgit v1.2.3-59-g8ed1b From 4fb7f8af1f4c14a2a6cee7c9ff0cf999d918c72d Mon Sep 17 00:00:00 2001 From: Benjamin Beichler Date: Fri, 11 Nov 2016 17:37:56 +0100 Subject: mac80211_hwsim: fix beacon delta calculation Due to the cast from uint32_t to int64_t, a wrong next beacon timing is calculated and effectively the beacon timer stops working. This is especially bad for 802.11s mesh networks, because discovery breaks without beacons. Signed-off-by: Benjamin Beichler Signed-off-by: Johannes Berg --- drivers/net/wireless/mac80211_hwsim.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/wireless/mac80211_hwsim.c b/drivers/net/wireless/mac80211_hwsim.c index 431f13b4faf6..d3bad5779376 100644 --- a/drivers/net/wireless/mac80211_hwsim.c +++ b/drivers/net/wireless/mac80211_hwsim.c @@ -826,7 +826,7 @@ static void mac80211_hwsim_set_tsf(struct ieee80211_hw *hw, data->bcn_delta = do_div(delta, bcn_int); } else { data->tsf_offset -= delta; - data->bcn_delta = -do_div(delta, bcn_int); + data->bcn_delta = -(s64)do_div(delta, bcn_int); } } -- cgit v1.2.3-59-g8ed1b From 24c66dfd569c4744fc43aea638155ad2dc1499d8 Mon Sep 17 00:00:00 2001 From: Russell King Date: Tue, 15 Nov 2016 13:55:59 +0000 Subject: ARM: fix backtrace Recent kernels have changed their behaviour to be more inconsistent when handling printk continuations. With todays kernels, the output looks sane on the console, but dmesg splits individual printk()s which do not have the KERN_CONT prefix into separate lines. Since the assembly code is not trivial to add the KERN_CONT, and we ideally want to avoid using KERN_CONT (as multiple printk()s can race between different threads), convert the assembly dumping the register values to C code, and have the C code build the output a line at a time before dumping to the console. This avoids the KERN_CONT issue, and also avoids situations where the output is intermixed with other console activity. Signed-off-by: Russell King --- arch/arm/kernel/traps.c | 20 ++++++++++++++++++++ arch/arm/lib/backtrace.S | 37 +++---------------------------------- 2 files changed, 23 insertions(+), 34 deletions(-) diff --git a/arch/arm/kernel/traps.c b/arch/arm/kernel/traps.c index bc698383e822..9688ec0c6ef4 100644 --- a/arch/arm/kernel/traps.c +++ b/arch/arm/kernel/traps.c @@ -74,6 +74,26 @@ void dump_backtrace_entry(unsigned long where, unsigned long from, unsigned long dump_mem("", "Exception stack", frame + 4, frame + 4 + sizeof(struct pt_regs)); } +void dump_backtrace_stm(u32 *stack, u32 instruction) +{ + char str[80], *p; + unsigned int x; + int reg; + + for (reg = 10, x = 0, p = str; reg >= 0; reg--) { + if (instruction & BIT(reg)) { + p += sprintf(p, " r%d:%08x", reg, *stack--); + if (++x == 6) { + x = 0; + p = str; + printk("%s\n", str); + } + } + } + if (p != str) + printk("%s\n", str); +} + #ifndef CONFIG_ARM_UNWIND /* * Stack pointers should always be within the kernels view of diff --git a/arch/arm/lib/backtrace.S b/arch/arm/lib/backtrace.S index fab5a50503ae..7d7952e5a3b1 100644 --- a/arch/arm/lib/backtrace.S +++ b/arch/arm/lib/backtrace.S @@ -10,6 +10,7 @@ * 27/03/03 Ian Molton Clean up CONFIG_CPU * */ +#include #include #include .text @@ -83,13 +84,13 @@ for_each_frame: tst frame, mask @ Check for address exceptions teq r3, r1, lsr #11 ldreq r0, [frame, #-8] @ get sp subeq r0, r0, #4 @ point at the last arg - bleq .Ldumpstm @ dump saved registers + bleq dump_backtrace_stm @ dump saved registers 1004: ldr r1, [sv_pc, #0] @ if stmfd sp!, {..., fp, ip, lr, pc} ldr r3, .Ldsi @ instruction exists, teq r3, r1, lsr #11 subeq r0, frame, #16 - bleq .Ldumpstm @ dump saved registers + bleq dump_backtrace_stm @ dump saved registers teq sv_fp, #0 @ zero saved fp means beq no_frame @ no further frames @@ -112,38 +113,6 @@ ENDPROC(c_backtrace) .long 1004b, 1006b .popsection -#define instr r4 -#define reg r5 -#define stack r6 - -.Ldumpstm: stmfd sp!, {instr, reg, stack, r7, lr} - mov stack, r0 - mov instr, r1 - mov reg, #10 - mov r7, #0 -1: mov r3, #1 - ARM( tst instr, r3, lsl reg ) - THUMB( lsl r3, reg ) - THUMB( tst instr, r3 ) - beq 2f - add r7, r7, #1 - teq r7, #6 - moveq r7, #0 - adr r3, .Lcr - addne r3, r3, #1 @ skip newline - ldr r2, [stack], #-4 - mov r1, reg - adr r0, .Lfp - bl printk -2: subs reg, reg, #1 - bpl 1b - teq r7, #0 - adrne r0, .Lcr - blne printk - ldmfd sp!, {instr, reg, stack, r7, pc} - -.Lfp: .asciz " r%d:%08x%s" -.Lcr: .asciz "\n" .Lbad: .asciz "Backtrace aborted due to bad frame pointer <%p>\n" .align .Ldsi: .word 0xe92dd800 >> 11 @ stmfd sp!, {... fp, ip, lr, pc} -- cgit v1.2.3-59-g8ed1b From 544457fa278216c5fcea6a16e9b2ee8aadaca0ca Mon Sep 17 00:00:00 2001 From: Nicolas Pitre Date: Tue, 1 Nov 2016 21:58:36 +0100 Subject: ARM: 8624/1: proc-v7m.S: fix init section name There is no .text.init sections. Signed-off-by: Nicolas Pitre Signed-off-by: Russell King --- arch/arm/mm/proc-v7m.S | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/arm/mm/proc-v7m.S b/arch/arm/mm/proc-v7m.S index f6d333f09bfe..8dea61640cc1 100644 --- a/arch/arm/mm/proc-v7m.S +++ b/arch/arm/mm/proc-v7m.S @@ -96,7 +96,7 @@ ENTRY(cpu_cm7_proc_fin) ret lr ENDPROC(cpu_cm7_proc_fin) - .section ".text.init", #alloc, #execinstr + .section ".init.text", #alloc, #execinstr __v7m_cm7_setup: mov r8, #(V7M_SCB_CCR_DC | V7M_SCB_CCR_IC| V7M_SCB_CCR_BP) -- cgit v1.2.3-59-g8ed1b From 256ff1cf6b44cba9c9c2059f4516259e9319a808 Mon Sep 17 00:00:00 2001 From: Marek Szyprowski Date: Tue, 15 Nov 2016 14:00:53 +0100 Subject: ARM: 8628/1: dma-mapping: preallocate DMA-debug hash tables in core_initcall fs_initcall is definitely too late to initialize DMA-debug hash tables, because some drivers might get probed and use DMA mapping framework already in core_initcall. Late initialization of DMA-debug results in false warning about accessing memory, that was not allocated, like this one: ------------[ cut here ]------------ WARNING: CPU: 5 PID: 1 at lib/dma-debug.c:1104 check_unmap+0xa1c/0xe50 exynos-sysmmu 10a60000.sysmmu: DMA-API: device driver tries to free DMA memory it has not allocated [device address=0x000000006ebd0000] [size=16384 bytes] Modules linked in: CPU: 5 PID: 1 Comm: swapper/0 Not tainted 4.9.0-rc5-00028-g39dde3d-dirty #44 Hardware name: SAMSUNG EXYNOS (Flattened Device Tree) [] (unwind_backtrace) from [] (show_stack+0x20/0x24) [] (show_stack) from [] (dump_stack+0x84/0xa0) [] (dump_stack) from [] (__warn+0x14c/0x180) [] (__warn) from [] (warn_slowpath_fmt+0x48/0x50) [] (warn_slowpath_fmt) from [] (check_unmap+0xa1c/0xe50) [] (check_unmap) from [] (debug_dma_unmap_page+0x98/0xc8) [] (debug_dma_unmap_page) from [] (exynos_iommu_domain_free+0x158/0x380) [] (exynos_iommu_domain_free) from [] (iommu_domain_free+0x34/0x60) [] (iommu_domain_free) from [] (release_iommu_mapping+0x30/0xb8) [] (release_iommu_mapping) from [] (arm_iommu_release_mapping+0x4c/0x50) [] (arm_iommu_release_mapping) from [] (s5p_mfc_probe+0x640/0x80c) [] (s5p_mfc_probe) from [] (platform_drv_probe+0x70/0x148) [] (platform_drv_probe) from [] (driver_probe_device+0x12c/0x6b0) [] (driver_probe_device) from [] (__driver_attach+0x128/0x17c) [] (__driver_attach) from [] (bus_for_each_dev+0x88/0xc8) [] (bus_for_each_dev) from [] (driver_attach+0x34/0x58) [] (driver_attach) from [] (bus_add_driver+0x18c/0x32c) [] (bus_add_driver) from [] (driver_register+0x98/0x148) [] (driver_register) from [] (__platform_driver_register+0x58/0x74) [] (__platform_driver_register) from [] (s5p_mfc_driver_init+0x1c/0x20) [] (s5p_mfc_driver_init) from [] (do_one_initcall+0x64/0x258) [] (do_one_initcall) from [] (kernel_init_freeable+0x3d0/0x4d0) [] (kernel_init_freeable) from [] (kernel_init+0x18/0x134) [] (kernel_init) from [] (ret_from_fork+0x14/0x3c) ---[ end trace dc54c54bd3581296 ]--- This patch moves initialization of DMA-debug to core_initcall. This is safe from the initialization perspective. dma_debug_do_init() internally calls debugfs functions and debugfs also gets initialised at core_initcall(), and that is earlier than arch code in the link order, so it will get initialized just before the DMA-debug. Reported-by: Seung-Woo Kim Signed-off-by: Marek Szyprowski Signed-off-by: Russell King --- arch/arm/mm/dma-mapping.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/arm/mm/dma-mapping.c b/arch/arm/mm/dma-mapping.c index ab4f74536057..ab7710002ba6 100644 --- a/arch/arm/mm/dma-mapping.c +++ b/arch/arm/mm/dma-mapping.c @@ -1167,7 +1167,7 @@ static int __init dma_debug_do_init(void) dma_debug_init(PREALLOC_DMA_DEBUG_ENTRIES); return 0; } -fs_initcall(dma_debug_do_init); +core_initcall(dma_debug_do_init); #ifdef CONFIG_ARM_DMA_USE_IOMMU -- cgit v1.2.3-59-g8ed1b From c6a385539175ebc603da53aafb7753d39089f32e Mon Sep 17 00:00:00 2001 From: Borislav Petkov Date: Mon, 14 Nov 2016 19:41:31 +0100 Subject: kbuild: Steal gcc's pie from the very beginning So Sebastian turned off the PIE for kernel builds but that was too late - Kbuild.include already uses KBUILD_CFLAGS and trying to disable gcc options with, say cc-disable-warning, fails: gcc -D__KERNEL__ -Wall -Wundef -Wstrict-prototypes -Wno-trigraphs ... -Wno-sign-compare -fno-asynchronous-unwind-tables -Wframe-address -c -x c /dev/null -o .31392.tmp /dev/null:1:0: error: code model kernel does not support PIC mode because that returns an error and we can't disable the warning. For example in this case: KBUILD_CFLAGS += $(call cc-disable-warning,frame-address,) which leads to gcc issuing all those warnings again. So let's turn off PIE/PIC at the earliest possible moment, when we declare KBUILD_CFLAGS so that cc-disable-warning picks it up too. Also, we need the $(call cc-option ...) because -fno-PIE is supported since gcc v3.4 and our lowest supported gcc version is 3.2 right now. Signed-off-by: Borislav Petkov Cc: stable@vger.kernel.org Cc: Ben Hutchings Cc: Sebastian Andrzej Siewior Signed-off-by: Michal Marek --- Makefile | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/Makefile b/Makefile index 58fc5d935ce6..77ac3f88ec37 100644 --- a/Makefile +++ b/Makefile @@ -399,11 +399,12 @@ KBUILD_CFLAGS := -Wall -Wundef -Wstrict-prototypes -Wno-trigraphs \ -fno-strict-aliasing -fno-common \ -Werror-implicit-function-declaration \ -Wno-format-security \ - -std=gnu89 + -std=gnu89 $(call cc-option,-fno-PIE) + KBUILD_AFLAGS_KERNEL := KBUILD_CFLAGS_KERNEL := -KBUILD_AFLAGS := -D__ASSEMBLY__ +KBUILD_AFLAGS := -D__ASSEMBLY__ $(call cc-option,-fno-PIE) KBUILD_AFLAGS_MODULE := -DMODULE KBUILD_CFLAGS_MODULE := -DMODULE KBUILD_LDFLAGS_MODULE := -T $(srctree)/scripts/module-common.lds @@ -622,8 +623,6 @@ include arch/$(SRCARCH)/Makefile KBUILD_CFLAGS += $(call cc-option,-fno-delete-null-pointer-checks,) KBUILD_CFLAGS += $(call cc-disable-warning,maybe-uninitialized,) KBUILD_CFLAGS += $(call cc-disable-warning,frame-address,) -KBUILD_CFLAGS += $(call cc-option,-fno-PIE) -KBUILD_AFLAGS += $(call cc-option,-fno-PIE) ifdef CONFIG_LD_DEAD_CODE_DATA_ELIMINATION KBUILD_CFLAGS += $(call cc-option,-ffunction-sections,) -- cgit v1.2.3-59-g8ed1b From f6c365fad1034c66f9969d1435ffad9102f966bb Mon Sep 17 00:00:00 2001 From: Jia Jie Ho Date: Mon, 14 Nov 2016 17:06:49 +0800 Subject: net: ethernet: Fix SGMII unable to switch speed and autonego failure TSE PCS SGMII ethernet has an issue where switching speed doesn't work caused by a faulty register macro offset. This fixes the issue. Signed-off-by: Jia Jie Ho Signed-off-by: David S. Miller --- drivers/net/ethernet/stmicro/stmmac/altr_tse_pcs.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/net/ethernet/stmicro/stmmac/altr_tse_pcs.c b/drivers/net/ethernet/stmicro/stmmac/altr_tse_pcs.c index 2920e2ee3864..489ef146201e 100644 --- a/drivers/net/ethernet/stmicro/stmmac/altr_tse_pcs.c +++ b/drivers/net/ethernet/stmicro/stmmac/altr_tse_pcs.c @@ -63,8 +63,8 @@ #define TSE_PCS_SGMII_LINK_TIMER_0 0x0D40 #define TSE_PCS_SGMII_LINK_TIMER_1 0x0003 #define TSE_PCS_SW_RESET_TIMEOUT 100 -#define TSE_PCS_USE_SGMII_AN_MASK BIT(2) -#define TSE_PCS_USE_SGMII_ENA BIT(1) +#define TSE_PCS_USE_SGMII_AN_MASK BIT(1) +#define TSE_PCS_USE_SGMII_ENA BIT(0) #define SGMII_ADAPTER_CTRL_REG 0x00 #define SGMII_ADAPTER_DISABLE 0x0001 -- cgit v1.2.3-59-g8ed1b From cb434658a8ff151c221a9ac1d44fb6788100cd0d Mon Sep 17 00:00:00 2001 From: Alex Deucher Date: Tue, 15 Nov 2016 11:39:08 -0500 Subject: drm/amdgpu/powerplay: drop a redundant NULL check Left over from an earlier rev of the patch. Acked-by: Colin Ian King Cc: Dan Carpenter Cc: Colin King Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/powerplay/hwmgr/smu7_hwmgr.c | 2 -- 1 file changed, 2 deletions(-) diff --git a/drivers/gpu/drm/amd/powerplay/hwmgr/smu7_hwmgr.c b/drivers/gpu/drm/amd/powerplay/hwmgr/smu7_hwmgr.c index b0c929dd8beb..13f2b705ea49 100644 --- a/drivers/gpu/drm/amd/powerplay/hwmgr/smu7_hwmgr.c +++ b/drivers/gpu/drm/amd/powerplay/hwmgr/smu7_hwmgr.c @@ -1469,8 +1469,6 @@ static int smu7_get_evv_voltages(struct pp_hwmgr *hwmgr) table_info->vddgfx_lookup_table, vv_id, &sclk)) { if (phm_cap_enabled(hwmgr->platform_descriptor.platformCaps, PHM_PlatformCaps_ClockStretcher)) { - if (table_info == NULL) - return -EINVAL; sclk_table = table_info->vdd_dep_on_sclk; for (j = 1; j < sclk_table->count; j++) { -- cgit v1.2.3-59-g8ed1b From 1da2c326e43b0834105993d13610647337bbad67 Mon Sep 17 00:00:00 2001 From: Monk Liu Date: Fri, 11 Nov 2016 11:24:29 +0800 Subject: drm/amdgpu:fix vpost_needed routine 1,cleanup description/comments 2,for FIJI & passthrough, force post when smc fw version below 22.15 3,for other cases, follow regular rules Signed-off-by: Monk Liu Reviewed-by: Alex Deucher Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_device.c | 27 +++++++-------------------- 1 file changed, 7 insertions(+), 20 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c index 7ca07e7b25c1..3161d77bf299 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c @@ -658,12 +658,10 @@ static bool amdgpu_vpost_needed(struct amdgpu_device *adev) return false; if (amdgpu_passthrough(adev)) { - /* for FIJI: In whole GPU pass-through virtualization case - * old smc fw won't clear some registers (e.g. MEM_SIZE, BIOS_SCRATCH) - * so amdgpu_card_posted return false and driver will incorrectly skip vPost. - * but if we force vPost do in pass-through case, the driver reload will hang. - * whether doing vPost depends on amdgpu_card_posted if smc version is above - * 00160e00 for FIJI. + /* for FIJI: In whole GPU pass-through virtualization case, after VM reboot + * some old smc fw still need driver do vPost otherwise gpu hang, while + * those smc fw version above 22.15 doesn't have this flaw, so we force + * vpost executed for smc version below 22.15 */ if (adev->asic_type == CHIP_FIJI) { int err; @@ -674,22 +672,11 @@ static bool amdgpu_vpost_needed(struct amdgpu_device *adev) return true; fw_ver = *((uint32_t *)adev->pm.fw->data + 69); - if (fw_ver >= 0x00160e00) - return !amdgpu_card_posted(adev); + if (fw_ver < 0x00160e00) + return true; } - } else { - /* in bare-metal case, amdgpu_card_posted return false - * after system reboot/boot, and return true if driver - * reloaded. - * we shouldn't do vPost after driver reload otherwise GPU - * could hang. - */ - if (amdgpu_card_posted(adev)) - return false; } - - /* we assume vPost is neede for all other cases */ - return true; + return !amdgpu_card_posted(adev); } /** -- cgit v1.2.3-59-g8ed1b From e1fafdcbe0e3e769c6a83317dd845bc99b4fe61d Mon Sep 17 00:00:00 2001 From: Dennis Dalessandro Date: Mon, 10 Oct 2016 06:14:45 -0700 Subject: IB/rdmavt: rdmavt can handle non aligned page maps The initial code for rdmavt carried with it a restriction that was a vestige from the qib driver, that to dma map a page it had to be less than a page size. This is not the case on modern hardware, both qib and hfi1 will be just fine with unaligned map requests. This fixes a 4.8 regression where by an IPoIB transfer of > PAGE_SIZE will hang because the dma map page call always fails. This was introduced after commit 5faba5469522 ("IB/ipoib: Report SG feature regardless of HW UD CSUM capability") added the capability to use SG by default. Rather than override this, the HW supports it, so allow SG. Cc: Stable # 4.8 Reviewed-by: Mike Marciniszyn Signed-off-by: Dennis Dalessandro Signed-off-by: Doug Ledford --- drivers/infiniband/sw/rdmavt/dma.c | 3 --- 1 file changed, 3 deletions(-) diff --git a/drivers/infiniband/sw/rdmavt/dma.c b/drivers/infiniband/sw/rdmavt/dma.c index 01f71caa3ac4..f2cefb0d9180 100644 --- a/drivers/infiniband/sw/rdmavt/dma.c +++ b/drivers/infiniband/sw/rdmavt/dma.c @@ -90,9 +90,6 @@ static u64 rvt_dma_map_page(struct ib_device *dev, struct page *page, if (WARN_ON(!valid_dma_direction(direction))) return BAD_DMA_ADDRESS; - if (offset + size > PAGE_SIZE) - return BAD_DMA_ADDRESS; - addr = (u64)page_address(page); if (addr) addr += offset; -- cgit v1.2.3-59-g8ed1b From 39eb2795f19233330bc14a8450b4042d784b15a7 Mon Sep 17 00:00:00 2001 From: Tadeusz Struk Date: Mon, 10 Oct 2016 06:14:50 -0700 Subject: IB/hfi1: Remove redundant sysfs irq affinity entry The IRQ affinity entry is not needed after the irq notifier patch has been added to the hfi1 driver. The irq affinity settings for SDMA engine should be set using the standard /proc/irq// interface. Reviewed-by: Jianxin Xiong Signed-off-by: Tadeusz Struk Signed-off-by: Dennis Dalessandro Signed-off-by: Doug Ledford --- drivers/infiniband/hw/hfi1/affinity.c | 72 ----------------------------------- drivers/infiniband/hw/hfi1/affinity.h | 4 -- drivers/infiniband/hw/hfi1/sysfs.c | 25 ------------ 3 files changed, 101 deletions(-) diff --git a/drivers/infiniband/hw/hfi1/affinity.c b/drivers/infiniband/hw/hfi1/affinity.c index a26a9a0bfc41..67ea85a56945 100644 --- a/drivers/infiniband/hw/hfi1/affinity.c +++ b/drivers/infiniband/hw/hfi1/affinity.c @@ -775,75 +775,3 @@ void hfi1_put_proc_affinity(int cpu) } mutex_unlock(&affinity->lock); } - -int hfi1_set_sdma_affinity(struct hfi1_devdata *dd, const char *buf, - size_t count) -{ - struct hfi1_affinity_node *entry; - cpumask_var_t mask; - int ret, i; - - mutex_lock(&node_affinity.lock); - entry = node_affinity_lookup(dd->node); - - if (!entry) { - ret = -EINVAL; - goto unlock; - } - - ret = zalloc_cpumask_var(&mask, GFP_KERNEL); - if (!ret) { - ret = -ENOMEM; - goto unlock; - } - - ret = cpulist_parse(buf, mask); - if (ret) - goto out; - - if (!cpumask_subset(mask, cpu_online_mask) || cpumask_empty(mask)) { - dd_dev_warn(dd, "Invalid CPU mask\n"); - ret = -EINVAL; - goto out; - } - - /* reset the SDMA interrupt affinity details */ - init_cpu_mask_set(&entry->def_intr); - cpumask_copy(&entry->def_intr.mask, mask); - - /* Reassign the affinity for each SDMA interrupt. */ - for (i = 0; i < dd->num_msix_entries; i++) { - struct hfi1_msix_entry *msix; - - msix = &dd->msix_entries[i]; - if (msix->type != IRQ_SDMA) - continue; - - ret = get_irq_affinity(dd, msix); - - if (ret) - break; - } -out: - free_cpumask_var(mask); -unlock: - mutex_unlock(&node_affinity.lock); - return ret ? ret : strnlen(buf, PAGE_SIZE); -} - -int hfi1_get_sdma_affinity(struct hfi1_devdata *dd, char *buf) -{ - struct hfi1_affinity_node *entry; - - mutex_lock(&node_affinity.lock); - entry = node_affinity_lookup(dd->node); - - if (!entry) { - mutex_unlock(&node_affinity.lock); - return -EINVAL; - } - - cpumap_print_to_pagebuf(true, buf, &entry->def_intr.mask); - mutex_unlock(&node_affinity.lock); - return strnlen(buf, PAGE_SIZE); -} diff --git a/drivers/infiniband/hw/hfi1/affinity.h b/drivers/infiniband/hw/hfi1/affinity.h index b89ea3c0ee1a..42e63316afd1 100644 --- a/drivers/infiniband/hw/hfi1/affinity.h +++ b/drivers/infiniband/hw/hfi1/affinity.h @@ -102,10 +102,6 @@ int hfi1_get_proc_affinity(int); /* Release a CPU used by a user process. */ void hfi1_put_proc_affinity(int); -int hfi1_get_sdma_affinity(struct hfi1_devdata *dd, char *buf); -int hfi1_set_sdma_affinity(struct hfi1_devdata *dd, const char *buf, - size_t count); - struct hfi1_affinity_node { int node; struct cpu_mask_set def_intr; diff --git a/drivers/infiniband/hw/hfi1/sysfs.c b/drivers/infiniband/hw/hfi1/sysfs.c index edba22461a9c..919a5474e651 100644 --- a/drivers/infiniband/hw/hfi1/sysfs.c +++ b/drivers/infiniband/hw/hfi1/sysfs.c @@ -49,7 +49,6 @@ #include "hfi.h" #include "mad.h" #include "trace.h" -#include "affinity.h" /* * Start of per-port congestion control structures and support code @@ -623,27 +622,6 @@ static ssize_t show_tempsense(struct device *device, return ret; } -static ssize_t show_sdma_affinity(struct device *device, - struct device_attribute *attr, char *buf) -{ - struct hfi1_ibdev *dev = - container_of(device, struct hfi1_ibdev, rdi.ibdev.dev); - struct hfi1_devdata *dd = dd_from_dev(dev); - - return hfi1_get_sdma_affinity(dd, buf); -} - -static ssize_t store_sdma_affinity(struct device *device, - struct device_attribute *attr, - const char *buf, size_t count) -{ - struct hfi1_ibdev *dev = - container_of(device, struct hfi1_ibdev, rdi.ibdev.dev); - struct hfi1_devdata *dd = dd_from_dev(dev); - - return hfi1_set_sdma_affinity(dd, buf, count); -} - /* * end of per-unit (or driver, in some cases, but replicated * per unit) functions @@ -658,8 +636,6 @@ static DEVICE_ATTR(serial, S_IRUGO, show_serial, NULL); static DEVICE_ATTR(boardversion, S_IRUGO, show_boardversion, NULL); static DEVICE_ATTR(tempsense, S_IRUGO, show_tempsense, NULL); static DEVICE_ATTR(chip_reset, S_IWUSR, NULL, store_chip_reset); -static DEVICE_ATTR(sdma_affinity, S_IWUSR | S_IRUGO, show_sdma_affinity, - store_sdma_affinity); static struct device_attribute *hfi1_attributes[] = { &dev_attr_hw_rev, @@ -670,7 +646,6 @@ static struct device_attribute *hfi1_attributes[] = { &dev_attr_boardversion, &dev_attr_tempsense, &dev_attr_chip_reset, - &dev_attr_sdma_affinity, }; int hfi1_create_port_files(struct ib_device *ibdev, u8 port_num, -- cgit v1.2.3-59-g8ed1b From d9ac4555fb2bcd6b794aaa0b39acad81111d9f42 Mon Sep 17 00:00:00 2001 From: Jakub Pawlak Date: Mon, 10 Oct 2016 06:14:56 -0700 Subject: IB/hfi1: Fix integrity check flags default values Prevent setting up integrity check flags when module is loaded with NO_INTEGRITY capability. Reviewed-by: Dean Luick Signed-off-by: Jakub Pawlak Signed-off-by: Dennis Dalessandro Signed-off-by: Doug Ledford --- drivers/infiniband/hw/hfi1/hfi.h | 40 ++++++++++++++++++++++++++------------- drivers/infiniband/hw/hfi1/pio.c | 13 +++---------- drivers/infiniband/hw/hfi1/sdma.c | 19 ++----------------- 3 files changed, 32 insertions(+), 40 deletions(-) diff --git a/drivers/infiniband/hw/hfi1/hfi.h b/drivers/infiniband/hw/hfi1/hfi.h index 7eef11b316ff..3c06d204bafd 100644 --- a/drivers/infiniband/hw/hfi1/hfi.h +++ b/drivers/infiniband/hw/hfi1/hfi.h @@ -1848,7 +1848,13 @@ extern struct mutex hfi1_mutex; static inline u64 hfi1_pkt_default_send_ctxt_mask(struct hfi1_devdata *dd, u16 ctxt_type) { - u64 base_sc_integrity = + u64 base_sc_integrity; + + /* No integrity checks if HFI1_CAP_NO_INTEGRITY is set */ + if (HFI1_CAP_IS_KSET(NO_INTEGRITY)) + return 0; + + base_sc_integrity = SEND_CTXT_CHECK_ENABLE_DISALLOW_BYPASS_BAD_PKT_LEN_SMASK | SEND_CTXT_CHECK_ENABLE_DISALLOW_PBC_STATIC_RATE_CONTROL_SMASK | SEND_CTXT_CHECK_ENABLE_DISALLOW_TOO_LONG_BYPASS_PACKETS_SMASK @@ -1863,7 +1869,6 @@ static inline u64 hfi1_pkt_default_send_ctxt_mask(struct hfi1_devdata *dd, | SEND_CTXT_CHECK_ENABLE_CHECK_VL_MAPPING_SMASK | SEND_CTXT_CHECK_ENABLE_CHECK_OPCODE_SMASK | SEND_CTXT_CHECK_ENABLE_CHECK_SLID_SMASK - | SEND_CTXT_CHECK_ENABLE_CHECK_JOB_KEY_SMASK | SEND_CTXT_CHECK_ENABLE_CHECK_VL_SMASK | SEND_CTXT_CHECK_ENABLE_CHECK_ENABLE_SMASK; @@ -1872,18 +1877,23 @@ static inline u64 hfi1_pkt_default_send_ctxt_mask(struct hfi1_devdata *dd, else base_sc_integrity |= HFI1_PKT_KERNEL_SC_INTEGRITY; - if (is_ax(dd)) - /* turn off send-side job key checks - A0 */ - return base_sc_integrity & - ~SEND_CTXT_CHECK_ENABLE_CHECK_JOB_KEY_SMASK; + /* turn on send-side job key checks if !A0 */ + if (!is_ax(dd)) + base_sc_integrity |= SEND_CTXT_CHECK_ENABLE_CHECK_JOB_KEY_SMASK; + return base_sc_integrity; } static inline u64 hfi1_pkt_base_sdma_integrity(struct hfi1_devdata *dd) { - u64 base_sdma_integrity = + u64 base_sdma_integrity; + + /* No integrity checks if HFI1_CAP_NO_INTEGRITY is set */ + if (HFI1_CAP_IS_KSET(NO_INTEGRITY)) + return 0; + + base_sdma_integrity = SEND_DMA_CHECK_ENABLE_DISALLOW_BYPASS_BAD_PKT_LEN_SMASK - | SEND_DMA_CHECK_ENABLE_DISALLOW_PBC_STATIC_RATE_CONTROL_SMASK | SEND_DMA_CHECK_ENABLE_DISALLOW_TOO_LONG_BYPASS_PACKETS_SMASK | SEND_DMA_CHECK_ENABLE_DISALLOW_TOO_LONG_IB_PACKETS_SMASK | SEND_DMA_CHECK_ENABLE_DISALLOW_BAD_PKT_LEN_SMASK @@ -1895,14 +1905,18 @@ static inline u64 hfi1_pkt_base_sdma_integrity(struct hfi1_devdata *dd) | SEND_DMA_CHECK_ENABLE_CHECK_VL_MAPPING_SMASK | SEND_DMA_CHECK_ENABLE_CHECK_OPCODE_SMASK | SEND_DMA_CHECK_ENABLE_CHECK_SLID_SMASK - | SEND_DMA_CHECK_ENABLE_CHECK_JOB_KEY_SMASK | SEND_DMA_CHECK_ENABLE_CHECK_VL_SMASK | SEND_DMA_CHECK_ENABLE_CHECK_ENABLE_SMASK; - if (is_ax(dd)) - /* turn off send-side job key checks - A0 */ - return base_sdma_integrity & - ~SEND_DMA_CHECK_ENABLE_CHECK_JOB_KEY_SMASK; + if (!HFI1_CAP_IS_KSET(STATIC_RATE_CTRL)) + base_sdma_integrity |= + SEND_DMA_CHECK_ENABLE_DISALLOW_PBC_STATIC_RATE_CONTROL_SMASK; + + /* turn on send-side job key checks if !A0 */ + if (!is_ax(dd)) + base_sdma_integrity |= + SEND_DMA_CHECK_ENABLE_CHECK_JOB_KEY_SMASK; + return base_sdma_integrity; } diff --git a/drivers/infiniband/hw/hfi1/pio.c b/drivers/infiniband/hw/hfi1/pio.c index 50a3a36d9363..d89b8745d4c1 100644 --- a/drivers/infiniband/hw/hfi1/pio.c +++ b/drivers/infiniband/hw/hfi1/pio.c @@ -668,19 +668,12 @@ void sc_set_cr_threshold(struct send_context *sc, u32 new_threshold) void set_pio_integrity(struct send_context *sc) { struct hfi1_devdata *dd = sc->dd; - u64 reg = 0; u32 hw_context = sc->hw_context; int type = sc->type; - /* - * No integrity checks if HFI1_CAP_NO_INTEGRITY is set, or if - * we're snooping. - */ - if (likely(!HFI1_CAP_IS_KSET(NO_INTEGRITY)) && - dd->hfi1_snoop.mode_flag != HFI1_PORT_SNOOP_MODE) - reg = hfi1_pkt_default_send_ctxt_mask(dd, type); - - write_kctxt_csr(dd, hw_context, SC(CHECK_ENABLE), reg); + write_kctxt_csr(dd, hw_context, + SC(CHECK_ENABLE), + hfi1_pkt_default_send_ctxt_mask(dd, type)); } static u32 get_buffers_allocated(struct send_context *sc) diff --git a/drivers/infiniband/hw/hfi1/sdma.c b/drivers/infiniband/hw/hfi1/sdma.c index fd39bcaa062d..9cbe52d21077 100644 --- a/drivers/infiniband/hw/hfi1/sdma.c +++ b/drivers/infiniband/hw/hfi1/sdma.c @@ -2009,11 +2009,6 @@ static void sdma_hw_start_up(struct sdma_engine *sde) write_sde_csr(sde, SD(ENG_ERR_CLEAR), reg); } -#define CLEAR_STATIC_RATE_CONTROL_SMASK(r) \ -(r &= ~SEND_DMA_CHECK_ENABLE_DISALLOW_PBC_STATIC_RATE_CONTROL_SMASK) - -#define SET_STATIC_RATE_CONTROL_SMASK(r) \ -(r |= SEND_DMA_CHECK_ENABLE_DISALLOW_PBC_STATIC_RATE_CONTROL_SMASK) /* * set_sdma_integrity * @@ -2022,19 +2017,9 @@ static void sdma_hw_start_up(struct sdma_engine *sde) static void set_sdma_integrity(struct sdma_engine *sde) { struct hfi1_devdata *dd = sde->dd; - u64 reg; - - if (unlikely(HFI1_CAP_IS_KSET(NO_INTEGRITY))) - return; - - reg = hfi1_pkt_base_sdma_integrity(dd); - - if (HFI1_CAP_IS_KSET(STATIC_RATE_CTRL)) - CLEAR_STATIC_RATE_CONTROL_SMASK(reg); - else - SET_STATIC_RATE_CONTROL_SMASK(reg); - write_sde_csr(sde, SD(CHECK_ENABLE), reg); + write_sde_csr(sde, SD(CHECK_ENABLE), + hfi1_pkt_base_sdma_integrity(dd)); } static void init_sdma_regs( -- cgit v1.2.3-59-g8ed1b From acd7c8fe14938a315f0ac1b92a92375f7226c2fd Mon Sep 17 00:00:00 2001 From: Tadeusz Struk Date: Tue, 25 Oct 2016 08:57:55 -0700 Subject: IB/hfi1: Fix an Oops on pci device force remove This patch fixes an Oops on device unbind, when the device is used by a PSM user process. PSM processes access device resources which are freed on device removal. Similar protection exists in uverbs in ib_core for Verbs clients, but PSM doesn't use ib_uverbs hence a separate protection is required for PSM clients. Cc: Jason Gunthorpe Reviewed-by: Ira Weiny Reviewed-by: Dean Luick Reviewed-by: Dennis Dalessandro Signed-off-by: Tadeusz Struk Signed-off-by: Doug Ledford --- drivers/infiniband/hw/hfi1/chip.c | 5 +++++ drivers/infiniband/hw/hfi1/file_ops.c | 19 ++++++++++++++++--- drivers/infiniband/hw/hfi1/hfi.h | 4 ++++ drivers/infiniband/hw/hfi1/init.c | 21 +++++++++++++++++++-- 4 files changed, 44 insertions(+), 5 deletions(-) diff --git a/drivers/infiniband/hw/hfi1/chip.c b/drivers/infiniband/hw/hfi1/chip.c index 9bf5f23544d4..799215255b49 100644 --- a/drivers/infiniband/hw/hfi1/chip.c +++ b/drivers/infiniband/hw/hfi1/chip.c @@ -14691,6 +14691,11 @@ struct hfi1_devdata *hfi1_init_dd(struct pci_dev *pdev, if (ret) goto bail_free_cntrs; + init_completion(&dd->user_comp); + + /* The user refcount starts with one to inidicate an active device */ + atomic_set(&dd->user_refcount, 1); + goto bail; bail_free_rcverr: diff --git a/drivers/infiniband/hw/hfi1/file_ops.c b/drivers/infiniband/hw/hfi1/file_ops.c index 677efa0e8cd6..bd786b7bd30b 100644 --- a/drivers/infiniband/hw/hfi1/file_ops.c +++ b/drivers/infiniband/hw/hfi1/file_ops.c @@ -172,6 +172,9 @@ static int hfi1_file_open(struct inode *inode, struct file *fp) struct hfi1_devdata, user_cdev); + if (!atomic_inc_not_zero(&dd->user_refcount)) + return -ENXIO; + /* Just take a ref now. Not all opens result in a context assign */ kobject_get(&dd->kobj); @@ -183,11 +186,17 @@ static int hfi1_file_open(struct inode *inode, struct file *fp) fd->rec_cpu_num = -1; /* no cpu affinity by default */ fd->mm = current->mm; atomic_inc(&fd->mm->mm_count); - } + fp->private_data = fd; + } else { + fp->private_data = NULL; + + if (atomic_dec_and_test(&dd->user_refcount)) + complete(&dd->user_comp); - fp->private_data = fd; + return -ENOMEM; + } - return fd ? 0 : -ENOMEM; + return 0; } static long hfi1_file_ioctl(struct file *fp, unsigned int cmd, @@ -798,6 +807,10 @@ static int hfi1_file_close(struct inode *inode, struct file *fp) done: mmdrop(fdata->mm); kobject_put(&dd->kobj); + + if (atomic_dec_and_test(&dd->user_refcount)) + complete(&dd->user_comp); + kfree(fdata); return 0; } diff --git a/drivers/infiniband/hw/hfi1/hfi.h b/drivers/infiniband/hw/hfi1/hfi.h index 3c06d204bafd..368e96c109a5 100644 --- a/drivers/infiniband/hw/hfi1/hfi.h +++ b/drivers/infiniband/hw/hfi1/hfi.h @@ -1174,6 +1174,10 @@ struct hfi1_devdata { spinlock_t aspm_lock; /* Number of verbs contexts which have disabled ASPM */ atomic_t aspm_disabled_cnt; + /* Keeps track of user space clients */ + atomic_t user_refcount; + /* Used to wait for outstanding user space clients before dev removal */ + struct completion user_comp; struct hfi1_affinity *affinity; struct rhashtable sdma_rht; diff --git a/drivers/infiniband/hw/hfi1/init.c b/drivers/infiniband/hw/hfi1/init.c index 60db61536fed..e28a6b633ea9 100644 --- a/drivers/infiniband/hw/hfi1/init.c +++ b/drivers/infiniband/hw/hfi1/init.c @@ -1538,12 +1538,31 @@ bail: return ret; } +static void wait_for_clients(struct hfi1_devdata *dd) +{ + /* + * Remove the device init value and complete the device if there is + * no clients or wait for active clients to finish. + */ + if (atomic_dec_and_test(&dd->user_refcount)) + complete(&dd->user_comp); + + wait_for_completion(&dd->user_comp); +} + static void remove_one(struct pci_dev *pdev) { struct hfi1_devdata *dd = pci_get_drvdata(pdev); /* close debugfs files before ib unregister */ hfi1_dbg_ibdev_exit(&dd->verbs_dev); + + /* remove the /dev hfi1 interface */ + hfi1_device_remove(dd); + + /* wait for existing user space clients to finish */ + wait_for_clients(dd); + /* unregister from IB core */ hfi1_unregister_ib_device(dd); @@ -1558,8 +1577,6 @@ static void remove_one(struct pci_dev *pdev) /* wait until all of our (qsfp) queue_work() calls complete */ flush_workqueue(ib_wq); - hfi1_device_remove(dd); - postinit_cleanup(dd); } -- cgit v1.2.3-59-g8ed1b From 83fb4af6800deb4f3d19b297df6148cda5c016de Mon Sep 17 00:00:00 2001 From: Krzysztof Blaszkowski Date: Mon, 17 Oct 2016 04:19:24 -0700 Subject: IB/hfi1: Return ENODEV for unsupported PCI device ids. Clean up device type checking. Reviewed-by: Dean Luick Signed-off-by: Krzysztof Blaszkowski Signed-off-by: Tymoteusz Kielan Signed-off-by: Dennis Dalessandro Signed-off-by: Doug Ledford --- drivers/infiniband/hw/hfi1/init.c | 25 ++++++++++++------------- 1 file changed, 12 insertions(+), 13 deletions(-) diff --git a/drivers/infiniband/hw/hfi1/init.c b/drivers/infiniband/hw/hfi1/init.c index e28a6b633ea9..baea53f862f9 100644 --- a/drivers/infiniband/hw/hfi1/init.c +++ b/drivers/infiniband/hw/hfi1/init.c @@ -1402,7 +1402,7 @@ static void postinit_cleanup(struct hfi1_devdata *dd) static int init_one(struct pci_dev *pdev, const struct pci_device_id *ent) { int ret = 0, j, pidx, initfail; - struct hfi1_devdata *dd = ERR_PTR(-EINVAL); + struct hfi1_devdata *dd; struct hfi1_pportdata *ppd; /* First, lock the non-writable module parameters */ @@ -1461,26 +1461,25 @@ static int init_one(struct pci_dev *pdev, const struct pci_device_id *ent) if (ret) goto bail; - /* - * Do device-specific initialization, function table setup, dd - * allocation, etc. - */ - switch (ent->device) { - case PCI_DEVICE_ID_INTEL0: - case PCI_DEVICE_ID_INTEL1: - dd = hfi1_init_dd(pdev, ent); - break; - default: + if (!(ent->device == PCI_DEVICE_ID_INTEL0 || + ent->device == PCI_DEVICE_ID_INTEL1)) { hfi1_early_err(&pdev->dev, "Failing on unknown Intel deviceid 0x%x\n", ent->device); ret = -ENODEV; + goto clean_bail; } - if (IS_ERR(dd)) + /* + * Do device-specific initialization, function table setup, dd + * allocation, etc. + */ + dd = hfi1_init_dd(pdev, ent); + + if (IS_ERR(dd)) { ret = PTR_ERR(dd); - if (ret) goto clean_bail; /* error already printed */ + } ret = create_workqueues(dd); if (ret) -- cgit v1.2.3-59-g8ed1b From 4dfe7cceb2bfd98783b4966d7c881a7552932d31 Mon Sep 17 00:00:00 2001 From: Jianxin Xiong Date: Mon, 17 Oct 2016 04:19:41 -0700 Subject: IB/hfi1: Fix a potential memory leak in hfi1_create_ctxts() In the function hfi1_create_ctxts the array "dd->rcd" is allocated and then populated with allocated resources in a loop. Previously, if error happened during the loop, only resource allocated in the current iteration would be freed. The array itself would then be freed, leaving the resources that were allocated in previous iterations and referenced by the array elements in limbo. This patch makes sure all allocated resources are freed before freeing the array "dd->rcd". Also the resource allocation now takes account of the numa node the device is attached to. Reviewed-by: Tadeusz Struk Signed-off-by: Jianxin Xiong Signed-off-by: Dennis Dalessandro Signed-off-by: Doug Ledford --- drivers/infiniband/hw/hfi1/init.c | 13 +++++++------ 1 file changed, 7 insertions(+), 6 deletions(-) diff --git a/drivers/infiniband/hw/hfi1/init.c b/drivers/infiniband/hw/hfi1/init.c index baea53f862f9..e27b65dbe293 100644 --- a/drivers/infiniband/hw/hfi1/init.c +++ b/drivers/infiniband/hw/hfi1/init.c @@ -144,6 +144,8 @@ int hfi1_create_ctxts(struct hfi1_devdata *dd) struct hfi1_ctxtdata *rcd; ppd = dd->pport + (i % dd->num_pports); + + /* dd->rcd[i] gets assigned inside the callee */ rcd = hfi1_create_ctxtdata(ppd, i, dd->node); if (!rcd) { dd_dev_err(dd, @@ -169,8 +171,6 @@ int hfi1_create_ctxts(struct hfi1_devdata *dd) if (!rcd->sc) { dd_dev_err(dd, "Unable to allocate kernel send context, failing\n"); - dd->rcd[rcd->ctxt] = NULL; - hfi1_free_ctxtdata(dd, rcd); goto nomem; } @@ -178,9 +178,6 @@ int hfi1_create_ctxts(struct hfi1_devdata *dd) if (ret < 0) { dd_dev_err(dd, "Failed to setup kernel receive context, failing\n"); - sc_free(rcd->sc); - dd->rcd[rcd->ctxt] = NULL; - hfi1_free_ctxtdata(dd, rcd); ret = -EFAULT; goto bail; } @@ -196,6 +193,10 @@ int hfi1_create_ctxts(struct hfi1_devdata *dd) nomem: ret = -ENOMEM; bail: + if (dd->rcd) { + for (i = 0; i < dd->num_rcv_contexts; ++i) + hfi1_free_ctxtdata(dd, dd->rcd[i]); + } kfree(dd->rcd); dd->rcd = NULL; return ret; @@ -216,7 +217,7 @@ struct hfi1_ctxtdata *hfi1_create_ctxtdata(struct hfi1_pportdata *ppd, u32 ctxt, dd->num_rcv_contexts - dd->first_user_ctxt) kctxt_ngroups = (dd->rcv_entries.nctxt_extra - (dd->num_rcv_contexts - dd->first_user_ctxt)); - rcd = kzalloc(sizeof(*rcd), GFP_KERNEL); + rcd = kzalloc_node(sizeof(*rcd), GFP_KERNEL, numa); if (rcd) { u32 rcvtids, max_entries; -- cgit v1.2.3-59-g8ed1b From eacc830f95c0d8c5cbbda1bdba2ddc8f14bc248d Mon Sep 17 00:00:00 2001 From: Dennis Dalessandro Date: Mon, 17 Oct 2016 04:19:52 -0700 Subject: IB/hfi1: Remove leftover snoop references A few snoop related variables were missed in the snoop/capture removal to get out of staging. Go back and clean those up too. Reviewed-by: Dean Luick Signed-off-by: Dennis Dalessandro Signed-off-by: Doug Ledford --- drivers/infiniband/hw/hfi1/chip.c | 18 ++--------- drivers/infiniband/hw/hfi1/hfi.h | 40 ++--------------------- drivers/infiniband/hw/hfi1/trace_rx.h | 60 ----------------------------------- 3 files changed, 4 insertions(+), 114 deletions(-) diff --git a/drivers/infiniband/hw/hfi1/chip.c b/drivers/infiniband/hw/hfi1/chip.c index 799215255b49..859341a25e56 100644 --- a/drivers/infiniband/hw/hfi1/chip.c +++ b/drivers/infiniband/hw/hfi1/chip.c @@ -6301,19 +6301,8 @@ void set_up_vl15(struct hfi1_devdata *dd, u8 vau, u16 vl15buf) /* leave shared count at zero for both global and VL15 */ write_global_credit(dd, vau, vl15buf, 0); - /* We may need some credits for another VL when sending packets - * with the snoop interface. Dividing it down the middle for VL15 - * and VL0 should suffice. - */ - if (unlikely(dd->hfi1_snoop.mode_flag == HFI1_PORT_SNOOP_MODE)) { - write_csr(dd, SEND_CM_CREDIT_VL15, (u64)(vl15buf >> 1) - << SEND_CM_CREDIT_VL15_DEDICATED_LIMIT_VL_SHIFT); - write_csr(dd, SEND_CM_CREDIT_VL, (u64)(vl15buf >> 1) - << SEND_CM_CREDIT_VL_DEDICATED_LIMIT_VL_SHIFT); - } else { - write_csr(dd, SEND_CM_CREDIT_VL15, (u64)vl15buf - << SEND_CM_CREDIT_VL15_DEDICATED_LIMIT_VL_SHIFT); - } + write_csr(dd, SEND_CM_CREDIT_VL15, (u64)vl15buf + << SEND_CM_CREDIT_VL15_DEDICATED_LIMIT_VL_SHIFT); } /* @@ -9915,9 +9904,6 @@ static void set_lidlmc(struct hfi1_pportdata *ppd) u32 mask = ~((1U << ppd->lmc) - 1); u64 c1 = read_csr(ppd->dd, DCC_CFG_PORT_CONFIG1); - if (dd->hfi1_snoop.mode_flag) - dd_dev_info(dd, "Set lid/lmc while snooping"); - c1 &= ~(DCC_CFG_PORT_CONFIG1_TARGET_DLID_SMASK | DCC_CFG_PORT_CONFIG1_DLID_MASK_SMASK); c1 |= ((ppd->lid & DCC_CFG_PORT_CONFIG1_TARGET_DLID_MASK) diff --git a/drivers/infiniband/hw/hfi1/hfi.h b/drivers/infiniband/hw/hfi1/hfi.h index 368e96c109a5..93e2fcebd083 100644 --- a/drivers/infiniband/hw/hfi1/hfi.h +++ b/drivers/infiniband/hw/hfi1/hfi.h @@ -367,26 +367,6 @@ struct hfi1_packet { u8 etype; }; -/* - * Private data for snoop/capture support. - */ -struct hfi1_snoop_data { - int mode_flag; - struct cdev cdev; - struct device *class_dev; - /* protect snoop data */ - spinlock_t snoop_lock; - struct list_head queue; - wait_queue_head_t waitq; - void *filter_value; - int (*filter_callback)(void *hdr, void *data, void *value); - u64 dcc_cfg; /* saved value of DCC Cfg register */ -}; - -/* snoop mode_flag values */ -#define HFI1_PORT_SNOOP_MODE 1U -#define HFI1_PORT_CAPTURE_MODE 2U - struct rvt_sge_state; /* @@ -1104,8 +1084,6 @@ struct hfi1_devdata { char *portcntrnames; size_t portcntrnameslen; - struct hfi1_snoop_data hfi1_snoop; - struct err_info_rcvport err_info_rcvport; struct err_info_constraint err_info_rcv_constraint; struct err_info_constraint err_info_xmit_constraint; @@ -1141,8 +1119,8 @@ struct hfi1_devdata { rhf_rcv_function_ptr normal_rhf_rcv_functions[8]; /* - * Handlers for outgoing data so that snoop/capture does not - * have to have its hooks in the send path + * Capability to have different send engines simply by changing a + * pointer value. */ send_routine process_pio_send; send_routine process_dma_send; @@ -1225,8 +1203,6 @@ struct hfi1_devdata *hfi1_lookup(int unit); extern u32 hfi1_cpulist_count; extern unsigned long *hfi1_cpulist; -extern unsigned int snoop_drop_send; -extern unsigned int snoop_force_capture; int hfi1_init(struct hfi1_devdata *, int); int hfi1_count_units(int *npresentp, int *nupp); int hfi1_count_active_units(void); @@ -1561,13 +1537,6 @@ void set_up_vl15(struct hfi1_devdata *dd, u8 vau, u16 vl15buf); void reset_link_credits(struct hfi1_devdata *dd); void assign_remote_cm_au_table(struct hfi1_devdata *dd, u8 vcu); -int snoop_recv_handler(struct hfi1_packet *packet); -int snoop_send_dma_handler(struct rvt_qp *qp, struct hfi1_pkt_state *ps, - u64 pbc); -int snoop_send_pio_handler(struct rvt_qp *qp, struct hfi1_pkt_state *ps, - u64 pbc); -void snoop_inline_pio_send(struct hfi1_devdata *dd, struct pio_buf *pbuf, - u64 pbc, const void *from, size_t count); int set_buffer_control(struct hfi1_pportdata *ppd, struct buffer_control *bc); static inline struct hfi1_devdata *dd_from_ppd(struct hfi1_pportdata *ppd) @@ -1803,8 +1772,6 @@ int kdeth_process_expected(struct hfi1_packet *packet); int kdeth_process_eager(struct hfi1_packet *packet); int process_receive_invalid(struct hfi1_packet *packet); -extern rhf_rcv_function_ptr snoop_rhf_rcv_functions[8]; - void update_sge(struct rvt_sge_state *ss, u32 length); /* global module parameter variables */ @@ -1831,9 +1798,6 @@ extern struct mutex hfi1_mutex; #define DRIVER_NAME "hfi1" #define HFI1_USER_MINOR_BASE 0 #define HFI1_TRACE_MINOR 127 -#define HFI1_DIAGPKT_MINOR 128 -#define HFI1_DIAG_MINOR_BASE 129 -#define HFI1_SNOOP_CAPTURE_BASE 200 #define HFI1_NMINORS 255 #define PCI_VENDOR_ID_INTEL 0x8086 diff --git a/drivers/infiniband/hw/hfi1/trace_rx.h b/drivers/infiniband/hw/hfi1/trace_rx.h index 11e02b228922..f77e59fb43fe 100644 --- a/drivers/infiniband/hw/hfi1/trace_rx.h +++ b/drivers/infiniband/hw/hfi1/trace_rx.h @@ -253,66 +253,6 @@ TRACE_EVENT(hfi1_mmu_invalidate, ) ); -#define SNOOP_PRN \ - "slid %.4x dlid %.4x qpn 0x%.6x opcode 0x%.2x,%s " \ - "svc lvl %d pkey 0x%.4x [header = %d bytes] [data = %d bytes]" - -TRACE_EVENT(snoop_capture, - TP_PROTO(struct hfi1_devdata *dd, - int hdr_len, - struct ib_header *hdr, - int data_len, - void *data), - TP_ARGS(dd, hdr_len, hdr, data_len, data), - TP_STRUCT__entry( - DD_DEV_ENTRY(dd) - __field(u16, slid) - __field(u16, dlid) - __field(u32, qpn) - __field(u8, opcode) - __field(u8, sl) - __field(u16, pkey) - __field(u32, hdr_len) - __field(u32, data_len) - __field(u8, lnh) - __dynamic_array(u8, raw_hdr, hdr_len) - __dynamic_array(u8, raw_pkt, data_len) - ), - TP_fast_assign( - struct ib_other_headers *ohdr; - - __entry->lnh = (u8)(be16_to_cpu(hdr->lrh[0]) & 3); - if (__entry->lnh == HFI1_LRH_BTH) - ohdr = &hdr->u.oth; - else - ohdr = &hdr->u.l.oth; - DD_DEV_ASSIGN(dd); - __entry->slid = be16_to_cpu(hdr->lrh[3]); - __entry->dlid = be16_to_cpu(hdr->lrh[1]); - __entry->qpn = be32_to_cpu(ohdr->bth[1]) & RVT_QPN_MASK; - __entry->opcode = (be32_to_cpu(ohdr->bth[0]) >> 24) & 0xff; - __entry->sl = (u8)(be16_to_cpu(hdr->lrh[0]) >> 4) & 0xf; - __entry->pkey = be32_to_cpu(ohdr->bth[0]) & 0xffff; - __entry->hdr_len = hdr_len; - __entry->data_len = data_len; - memcpy(__get_dynamic_array(raw_hdr), hdr, hdr_len); - memcpy(__get_dynamic_array(raw_pkt), data, data_len); - ), - TP_printk( - "[%s] " SNOOP_PRN, - __get_str(dev), - __entry->slid, - __entry->dlid, - __entry->qpn, - __entry->opcode, - show_ib_opcode(__entry->opcode), - __entry->sl, - __entry->pkey, - __entry->hdr_len, - __entry->data_len - ) -); - #endif /* __HFI1_TRACE_RX_H */ #undef TRACE_INCLUDE_PATH -- cgit v1.2.3-59-g8ed1b From 26ea2544ddbe8855cb251e41ff3641c61655a15f Mon Sep 17 00:00:00 2001 From: Easwar Hariharan Date: Mon, 17 Oct 2016 04:19:58 -0700 Subject: IB/hfi1: Clean up unused argument hfi1_pcie_ddinit takes the PCI device id as an argument but never uses it. Clean it up. Reviewed-by: Dennis Dalessandro Signed-off-by: Easwar Hariharan Signed-off-by: Dennis Dalessandro Signed-off-by: Doug Ledford --- drivers/infiniband/hw/hfi1/chip.c | 2 +- drivers/infiniband/hw/hfi1/hfi.h | 3 +-- drivers/infiniband/hw/hfi1/pcie.c | 3 +-- 3 files changed, 3 insertions(+), 5 deletions(-) diff --git a/drivers/infiniband/hw/hfi1/chip.c b/drivers/infiniband/hw/hfi1/chip.c index 859341a25e56..156ddf8f3dca 100644 --- a/drivers/infiniband/hw/hfi1/chip.c +++ b/drivers/infiniband/hw/hfi1/chip.c @@ -14449,7 +14449,7 @@ struct hfi1_devdata *hfi1_init_dd(struct pci_dev *pdev, * Any error printing is already done by the init code. * On return, we have the chip mapped. */ - ret = hfi1_pcie_ddinit(dd, pdev, ent); + ret = hfi1_pcie_ddinit(dd, pdev); if (ret < 0) goto bail_free; diff --git a/drivers/infiniband/hw/hfi1/hfi.h b/drivers/infiniband/hw/hfi1/hfi.h index 93e2fcebd083..d906cf08504f 100644 --- a/drivers/infiniband/hw/hfi1/hfi.h +++ b/drivers/infiniband/hw/hfi1/hfi.h @@ -1736,8 +1736,7 @@ int qsfp_dump(struct hfi1_pportdata *ppd, char *buf, int len); int hfi1_pcie_init(struct pci_dev *, const struct pci_device_id *); void hfi1_pcie_cleanup(struct pci_dev *); -int hfi1_pcie_ddinit(struct hfi1_devdata *, struct pci_dev *, - const struct pci_device_id *); +int hfi1_pcie_ddinit(struct hfi1_devdata *, struct pci_dev *); void hfi1_pcie_ddcleanup(struct hfi1_devdata *); void hfi1_pcie_flr(struct hfi1_devdata *); int pcie_speeds(struct hfi1_devdata *); diff --git a/drivers/infiniband/hw/hfi1/pcie.c b/drivers/infiniband/hw/hfi1/pcie.c index 89c68da1c273..4ac8f330c5cb 100644 --- a/drivers/infiniband/hw/hfi1/pcie.c +++ b/drivers/infiniband/hw/hfi1/pcie.c @@ -157,8 +157,7 @@ void hfi1_pcie_cleanup(struct pci_dev *pdev) * fields required to re-initialize after a chip reset, or for * various other purposes */ -int hfi1_pcie_ddinit(struct hfi1_devdata *dd, struct pci_dev *pdev, - const struct pci_device_id *ent) +int hfi1_pcie_ddinit(struct hfi1_devdata *dd, struct pci_dev *pdev) { unsigned long len; resource_size_t addr; -- cgit v1.2.3-59-g8ed1b From f0f98f74c91c68502e97e0d5526aa4e81b40b28a Mon Sep 17 00:00:00 2001 From: Easwar Hariharan Date: Mon, 17 Oct 2016 04:20:04 -0700 Subject: IB/hfi1: Delete unused lock The lock is an unused vestige from qib. Remove it. Reviewed-by: Mike Marciniszyn Signed-off-by: Easwar Hariharan Signed-off-by: Dennis Dalessandro Signed-off-by: Doug Ledford --- drivers/infiniband/hw/hfi1/hfi.h | 2 -- drivers/infiniband/hw/hfi1/init.c | 1 - 2 files changed, 3 deletions(-) diff --git a/drivers/infiniband/hw/hfi1/hfi.h b/drivers/infiniband/hw/hfi1/hfi.h index d906cf08504f..cc87fd4e534b 100644 --- a/drivers/infiniband/hw/hfi1/hfi.h +++ b/drivers/infiniband/hw/hfi1/hfi.h @@ -593,8 +593,6 @@ struct hfi1_pportdata { struct mutex hls_lock; u32 host_link_state; - spinlock_t sdma_alllock ____cacheline_aligned_in_smp; - u32 lstate; /* logical link state */ /* these are the "32 bit" regs */ diff --git a/drivers/infiniband/hw/hfi1/init.c b/drivers/infiniband/hw/hfi1/init.c index e27b65dbe293..0f82eebc4b9e 100644 --- a/drivers/infiniband/hw/hfi1/init.c +++ b/drivers/infiniband/hw/hfi1/init.c @@ -507,7 +507,6 @@ void hfi1_init_pportdata(struct pci_dev *pdev, struct hfi1_pportdata *ppd, INIT_WORK(&ppd->qsfp_info.qsfp_work, qsfp_event); mutex_init(&ppd->hls_lock); - spin_lock_init(&ppd->sdma_alllock); spin_lock_init(&ppd->qsfp_info.qsfp_lock); ppd->qsfp_info.ppd = ppd; -- cgit v1.2.3-59-g8ed1b From 458ed666fe14a54dfb6690a1a7f541782d1342c9 Mon Sep 17 00:00:00 2001 From: Ira Weiny Date: Mon, 17 Oct 2016 04:20:09 -0700 Subject: IB/hfi1: Fix rnr_timer addition The new s_rnr_timeout was not properly being set and the code was incorrectly setting a different timer. Found by code inspection. Cc: # 4.7.x Fixes: 08279d5c9424 ("staging/rdma/hfi1: use new RNR timer") Reviewed-by: Mike Marciniszyn Signed-off-by: Ira Weiny Signed-off-by: Dennis Dalessandro Signed-off-by: Doug Ledford --- drivers/infiniband/hw/hfi1/rc.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/infiniband/hw/hfi1/rc.c b/drivers/infiniband/hw/hfi1/rc.c index 8bc5013f39a1..83198a8a8797 100644 --- a/drivers/infiniband/hw/hfi1/rc.c +++ b/drivers/infiniband/hw/hfi1/rc.c @@ -89,7 +89,7 @@ void hfi1_add_rnr_timer(struct rvt_qp *qp, u32 to) lockdep_assert_held(&qp->s_lock); qp->s_flags |= RVT_S_WAIT_RNR; - qp->s_timer.expires = jiffies + usecs_to_jiffies(to); + priv->s_rnr_timer.expires = jiffies + usecs_to_jiffies(to); add_timer(&priv->s_rnr_timer); } -- cgit v1.2.3-59-g8ed1b From 11501ab9df687c6f0852719a5165e16cd3eb3c10 Mon Sep 17 00:00:00 2001 From: Krzysztof Blaszkowski Date: Tue, 25 Oct 2016 13:12:11 -0700 Subject: IB/hfi1: Relocate rcvhdrcnt module parameter check. Validate the rcvhdrcnt module parameter in a single function at module load time. This allows proper error reporting. Reviewed-by: Dean Luick Signed-off-by: Krzysztof Blaszkowski Signed-off-by: Tymoteusz Kielan Signed-off-by: Dennis Dalessandro Signed-off-by: Doug Ledford --- drivers/infiniband/hw/hfi1/init.c | 44 +++++++++++++++++++++++---------------- 1 file changed, 26 insertions(+), 18 deletions(-) diff --git a/drivers/infiniband/hw/hfi1/init.c b/drivers/infiniband/hw/hfi1/init.c index 0f82eebc4b9e..e3b5bc93bc70 100644 --- a/drivers/infiniband/hw/hfi1/init.c +++ b/drivers/infiniband/hw/hfi1/init.c @@ -262,13 +262,6 @@ struct hfi1_ctxtdata *hfi1_create_ctxtdata(struct hfi1_pportdata *ppd, u32 ctxt, } rcd->eager_base = base * dd->rcv_entries.group_size; - /* Validate and initialize Rcv Hdr Q variables */ - if (rcvhdrcnt % HDRQ_INCREMENT) { - dd_dev_err(dd, - "ctxt%u: header queue count %d must be divisible by %lu\n", - rcd->ctxt, rcvhdrcnt, HDRQ_INCREMENT); - goto bail; - } rcd->rcvhdrq_cnt = rcvhdrcnt; rcd->rcvhdrqentsize = hfi1_hdrq_entsize; /* @@ -1399,6 +1392,29 @@ static void postinit_cleanup(struct hfi1_devdata *dd) hfi1_free_devdata(dd); } +static int init_validate_rcvhdrcnt(struct device *dev, uint thecnt) +{ + if (thecnt <= HFI1_MIN_HDRQ_EGRBUF_CNT) { + hfi1_early_err(dev, "Receive header queue count too small\n"); + return -EINVAL; + } + + if (thecnt > HFI1_MAX_HDRQ_EGRBUF_CNT) { + hfi1_early_err(dev, + "Receive header queue count cannot be greater than %u\n", + HFI1_MAX_HDRQ_EGRBUF_CNT); + return -EINVAL; + } + + if (thecnt % HDRQ_INCREMENT) { + hfi1_early_err(dev, "Receive header queue count %d must be divisible by %lu\n", + thecnt, HDRQ_INCREMENT); + return -EINVAL; + } + + return 0; +} + static int init_one(struct pci_dev *pdev, const struct pci_device_id *ent) { int ret = 0, j, pidx, initfail; @@ -1409,18 +1425,10 @@ static int init_one(struct pci_dev *pdev, const struct pci_device_id *ent) HFI1_CAP_LOCK(); /* Validate some global module parameters */ - if (rcvhdrcnt <= HFI1_MIN_HDRQ_EGRBUF_CNT) { - hfi1_early_err(&pdev->dev, "Header queue count too small\n"); - ret = -EINVAL; - goto bail; - } - if (rcvhdrcnt > HFI1_MAX_HDRQ_EGRBUF_CNT) { - hfi1_early_err(&pdev->dev, - "Receive header queue count cannot be greater than %u\n", - HFI1_MAX_HDRQ_EGRBUF_CNT); - ret = -EINVAL; + ret = init_validate_rcvhdrcnt(&pdev->dev, rcvhdrcnt); + if (ret) goto bail; - } + /* use the encoding function as a sanitization check */ if (!encode_rcv_header_entry_size(hfi1_hdrq_entsize)) { hfi1_early_err(&pdev->dev, "Invalid HdrQ Entry size %u\n", -- cgit v1.2.3-59-g8ed1b From 505efe3e46d5eaab726295cd023fb86d5b789d00 Mon Sep 17 00:00:00 2001 From: Jakub Pawlak Date: Tue, 25 Oct 2016 13:12:17 -0700 Subject: IB/hfi1: Fix status error code for unsupported packets Set the status code BAD_L2 when unsupported type of packet is received and dropped. Reviewed-by: Dennis Dalessandro Signed-off-by: Jakub Pawlak Signed-off-by: Dennis Dalessandro Signed-off-by: Doug Ledford --- drivers/infiniband/hw/hfi1/chip.h | 3 +++ drivers/infiniband/hw/hfi1/driver.c | 17 +++++++++++++++-- 2 files changed, 18 insertions(+), 2 deletions(-) diff --git a/drivers/infiniband/hw/hfi1/chip.h b/drivers/infiniband/hw/hfi1/chip.h index 92345259a8f4..043fd21dc5f3 100644 --- a/drivers/infiniband/hw/hfi1/chip.h +++ b/drivers/infiniband/hw/hfi1/chip.h @@ -320,6 +320,9 @@ /* DC_DC8051_CFG_MODE.GENERAL bits */ #define DISABLE_SELF_GUID_CHECK 0x2 +/* Bad L2 frame error code */ +#define BAD_L2_ERR 0x6 + /* * Eager buffer minimum and maximum sizes supported by the hardware. * All power-of-two sizes in between are supported as well. diff --git a/drivers/infiniband/hw/hfi1/driver.c b/drivers/infiniband/hw/hfi1/driver.c index 6563e4d38b80..dadd35eedc01 100644 --- a/drivers/infiniband/hw/hfi1/driver.c +++ b/drivers/infiniband/hw/hfi1/driver.c @@ -1360,12 +1360,25 @@ int process_receive_ib(struct hfi1_packet *packet) int process_receive_bypass(struct hfi1_packet *packet) { + struct hfi1_devdata *dd = packet->rcd->dd; + if (unlikely(rhf_err_flags(packet->rhf))) handle_eflags(packet); - dd_dev_err(packet->rcd->dd, + dd_dev_err(dd, "Bypass packets are not supported in normal operation. Dropping\n"); - incr_cntr64(&packet->rcd->dd->sw_rcv_bypass_packet_errors); + incr_cntr64(&dd->sw_rcv_bypass_packet_errors); + if (!(dd->err_info_rcvport.status_and_code & OPA_EI_STATUS_SMASK)) { + u64 *flits = packet->ebuf; + + if (flits && !(packet->rhf & RHF_LEN_ERR)) { + dd->err_info_rcvport.packet_flit1 = flits[0]; + dd->err_info_rcvport.packet_flit2 = + packet->tlen > sizeof(flits[0]) ? flits[1] : 0; + } + dd->err_info_rcvport.status_and_code |= + (OPA_EI_STATUS_SMASK | BAD_L2_ERR); + } return RHF_RCV_CONTINUE; } -- cgit v1.2.3-59-g8ed1b From f2d8a0b367e735ab157222ce74a5f2481216c878 Mon Sep 17 00:00:00 2001 From: Dasaratharaman Chandramouli Date: Tue, 25 Oct 2016 13:12:23 -0700 Subject: IB/hfi1: Fix ECN processing in prescan_rxq When processing ECN via the prescan_rxq path, some fields in the packet structure are passed uninitialized. This can potentially cause NULL pointer exceptions during ECN handling. Reviewed-by: Ira Weiny Reviewed-by: Dennis Dalessandro Signed-off-by: Dasaratharaman Chandramouli Signed-off-by: Dennis Dalessandro Signed-off-by: Doug Ledford --- drivers/infiniband/hw/hfi1/driver.c | 20 +++++++++++--------- 1 file changed, 11 insertions(+), 9 deletions(-) diff --git a/drivers/infiniband/hw/hfi1/driver.c b/drivers/infiniband/hw/hfi1/driver.c index dadd35eedc01..c5efff29c147 100644 --- a/drivers/infiniband/hw/hfi1/driver.c +++ b/drivers/infiniband/hw/hfi1/driver.c @@ -599,7 +599,6 @@ static void __prescan_rxq(struct hfi1_packet *packet) dd->rhf_offset; struct rvt_qp *qp; struct ib_header *hdr; - struct ib_other_headers *ohdr; struct rvt_dev_info *rdi = &dd->verbs_dev.rdi; u64 rhf = rhf_to_cpu(rhf_addr); u32 etype = rhf_rcv_type(rhf), qpn, bth1; @@ -615,18 +614,21 @@ static void __prescan_rxq(struct hfi1_packet *packet) if (etype != RHF_RCV_TYPE_IB) goto next; - hdr = hfi1_get_msgheader(dd, rhf_addr); + packet->hdr = hfi1_get_msgheader(dd, rhf_addr); + hdr = packet->hdr; lnh = be16_to_cpu(hdr->lrh[0]) & 3; - if (lnh == HFI1_LRH_BTH) - ohdr = &hdr->u.oth; - else if (lnh == HFI1_LRH_GRH) - ohdr = &hdr->u.l.oth; - else + if (lnh == HFI1_LRH_BTH) { + packet->ohdr = &hdr->u.oth; + } else if (lnh == HFI1_LRH_GRH) { + packet->ohdr = &hdr->u.l.oth; + packet->rcv_flags |= HFI1_HAS_GRH; + } else { goto next; /* just in case */ + } - bth1 = be32_to_cpu(ohdr->bth[1]); + bth1 = be32_to_cpu(packet->ohdr->bth[1]); is_ecn = !!(bth1 & (HFI1_FECN_SMASK | HFI1_BECN_SMASK)); if (!is_ecn) @@ -646,7 +648,7 @@ static void __prescan_rxq(struct hfi1_packet *packet) /* turn off BECN, FECN */ bth1 &= ~(HFI1_FECN_SMASK | HFI1_BECN_SMASK); - ohdr->bth[1] = cpu_to_be32(bth1); + packet->ohdr->bth[1] = cpu_to_be32(bth1); next: update_ps_mdata(&mdata, rcd); } -- cgit v1.2.3-59-g8ed1b From 09a7908b1ba616eed349d49058ee909907ee0885 Mon Sep 17 00:00:00 2001 From: Jianxin Xiong Date: Tue, 25 Oct 2016 13:12:40 -0700 Subject: IB/hfi1: Prevent hardware counter names from being cut off Increase the size of the buffer that is used to construct per-VL and per-SDMA counter names. Reviewed-by: Dennis Dalessandro Signed-off-by: Jianxin Xiong Signed-off-by: Dennis Dalessandro Signed-off-by: Doug Ledford --- drivers/infiniband/hw/hfi1/chip.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/infiniband/hw/hfi1/chip.c b/drivers/infiniband/hw/hfi1/chip.c index 156ddf8f3dca..24d0820873cf 100644 --- a/drivers/infiniband/hw/hfi1/chip.c +++ b/drivers/infiniband/hw/hfi1/chip.c @@ -12098,7 +12098,7 @@ static void update_synth_timer(unsigned long opaque) mod_timer(&dd->synth_stats_timer, jiffies + HZ * SYNTH_CNT_TIME); } -#define C_MAX_NAME 13 /* 12 chars + one for /0 */ +#define C_MAX_NAME 16 /* 15 chars + one for /0 */ static int init_cntrs(struct hfi1_devdata *dd) { int i, rcv_ctxts, j; -- cgit v1.2.3-59-g8ed1b From 2b16056f845207967a32497f41cf92b57849f934 Mon Sep 17 00:00:00 2001 From: Dennis Dalessandro Date: Tue, 25 Oct 2016 13:12:46 -0700 Subject: IB/hfi1: Remove incorrect IS_ERR check Remove IS_ERR check from caching code as the function being called does not actually return error pointers. Fixes: f19bd643dbde: "IB/hfi1: Prevent NULL pointer deferences in caching code" Reported-by: Dan Carpenter Reviewed-by: Dean Luick Signed-off-by: Dennis Dalessandro Signed-off-by: Doug Ledford --- drivers/infiniband/hw/hfi1/user_sdma.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/infiniband/hw/hfi1/user_sdma.c b/drivers/infiniband/hw/hfi1/user_sdma.c index a761f804111e..77697d690f3e 100644 --- a/drivers/infiniband/hw/hfi1/user_sdma.c +++ b/drivers/infiniband/hw/hfi1/user_sdma.c @@ -1144,7 +1144,7 @@ static int pin_vector_pages(struct user_sdma_request *req, rb_node = hfi1_mmu_rb_extract(pq->handler, (unsigned long)iovec->iov.iov_base, iovec->iov.iov_len); - if (rb_node && !IS_ERR(rb_node)) + if (rb_node) node = container_of(rb_node, struct sdma_mmu_node, rb); else rb_node = NULL; -- cgit v1.2.3-59-g8ed1b From 24803f38a5c0b6c57ed800b47e695f9ce474bc3a Mon Sep 17 00:00:00 2001 From: Hangbin Liu Date: Mon, 14 Nov 2016 16:16:28 +0800 Subject: igmp: do not remove igmp souce list info when set link down In commit 24cf3af3fed5 ("igmp: call ip_mc_clear_src..."), we forgot to remove igmpv3_clear_delrec() in ip_mc_down(), which also called ip_mc_clear_src(). This make us clear all IGMPv3 source filter info after NETDEV_DOWN. Move igmpv3_clear_delrec() to ip_mc_destroy_dev() and then no need ip_mc_clear_src() in ip_mc_destroy_dev(). On the other hand, we should restore back instead of free all source filter info in igmpv3_del_delrec(). Or we will not able to restore IGMPv3 source filter info after NETDEV_UP and NETDEV_POST_TYPE_CHANGE. Fixes: 24cf3af3fed5 ("igmp: call ip_mc_clear_src() only when ...") Signed-off-by: Hangbin Liu Signed-off-by: David S. Miller --- net/ipv4/igmp.c | 50 ++++++++++++++++++++++++++++++++++++-------------- 1 file changed, 36 insertions(+), 14 deletions(-) diff --git a/net/ipv4/igmp.c b/net/ipv4/igmp.c index 606cc3e85d2b..15db786d50ed 100644 --- a/net/ipv4/igmp.c +++ b/net/ipv4/igmp.c @@ -162,7 +162,7 @@ static int unsolicited_report_interval(struct in_device *in_dev) } static void igmpv3_add_delrec(struct in_device *in_dev, struct ip_mc_list *im); -static void igmpv3_del_delrec(struct in_device *in_dev, __be32 multiaddr); +static void igmpv3_del_delrec(struct in_device *in_dev, struct ip_mc_list *im); static void igmpv3_clear_delrec(struct in_device *in_dev); static int sf_setstate(struct ip_mc_list *pmc); static void sf_markstate(struct ip_mc_list *pmc); @@ -1130,10 +1130,15 @@ static void igmpv3_add_delrec(struct in_device *in_dev, struct ip_mc_list *im) spin_unlock_bh(&in_dev->mc_tomb_lock); } -static void igmpv3_del_delrec(struct in_device *in_dev, __be32 multiaddr) +/* + * restore ip_mc_list deleted records + */ +static void igmpv3_del_delrec(struct in_device *in_dev, struct ip_mc_list *im) { struct ip_mc_list *pmc, *pmc_prev; - struct ip_sf_list *psf, *psf_next; + struct ip_sf_list *psf; + struct net *net = dev_net(in_dev->dev); + __be32 multiaddr = im->multiaddr; spin_lock_bh(&in_dev->mc_tomb_lock); pmc_prev = NULL; @@ -1149,16 +1154,26 @@ static void igmpv3_del_delrec(struct in_device *in_dev, __be32 multiaddr) in_dev->mc_tomb = pmc->next; } spin_unlock_bh(&in_dev->mc_tomb_lock); + + spin_lock_bh(&im->lock); if (pmc) { - for (psf = pmc->tomb; psf; psf = psf_next) { - psf_next = psf->sf_next; - kfree(psf); + im->interface = pmc->interface; + im->crcount = in_dev->mr_qrv ?: net->ipv4.sysctl_igmp_qrv; + im->sfmode = pmc->sfmode; + if (pmc->sfmode == MCAST_INCLUDE) { + im->tomb = pmc->tomb; + im->sources = pmc->sources; + for (psf = im->sources; psf; psf = psf->sf_next) + psf->sf_crcount = im->crcount; } in_dev_put(pmc->interface); - kfree(pmc); } + spin_unlock_bh(&im->lock); } +/* + * flush ip_mc_list deleted records + */ static void igmpv3_clear_delrec(struct in_device *in_dev) { struct ip_mc_list *pmc, *nextpmc; @@ -1366,7 +1381,7 @@ void ip_mc_inc_group(struct in_device *in_dev, __be32 addr) ip_mc_hash_add(in_dev, im); #ifdef CONFIG_IP_MULTICAST - igmpv3_del_delrec(in_dev, im->multiaddr); + igmpv3_del_delrec(in_dev, im); #endif igmp_group_added(im); if (!in_dev->dead) @@ -1626,8 +1641,12 @@ void ip_mc_remap(struct in_device *in_dev) ASSERT_RTNL(); - for_each_pmc_rtnl(in_dev, pmc) + for_each_pmc_rtnl(in_dev, pmc) { +#ifdef CONFIG_IP_MULTICAST + igmpv3_del_delrec(in_dev, pmc); +#endif igmp_group_added(pmc); + } } /* Device going down */ @@ -1648,7 +1667,6 @@ void ip_mc_down(struct in_device *in_dev) in_dev->mr_gq_running = 0; if (del_timer(&in_dev->mr_gq_timer)) __in_dev_put(in_dev); - igmpv3_clear_delrec(in_dev); #endif ip_mc_dec_group(in_dev, IGMP_ALL_HOSTS); @@ -1688,8 +1706,12 @@ void ip_mc_up(struct in_device *in_dev) #endif ip_mc_inc_group(in_dev, IGMP_ALL_HOSTS); - for_each_pmc_rtnl(in_dev, pmc) + for_each_pmc_rtnl(in_dev, pmc) { +#ifdef CONFIG_IP_MULTICAST + igmpv3_del_delrec(in_dev, pmc); +#endif igmp_group_added(pmc); + } } /* @@ -1704,13 +1726,13 @@ void ip_mc_destroy_dev(struct in_device *in_dev) /* Deactivate timers */ ip_mc_down(in_dev); +#ifdef CONFIG_IP_MULTICAST + igmpv3_clear_delrec(in_dev); +#endif while ((i = rtnl_dereference(in_dev->mc_list)) != NULL) { in_dev->mc_list = i->next_rcu; in_dev->mc_count--; - - /* We've dropped the groups in ip_mc_down already */ - ip_mc_clear_src(i); ip_ma_put(i); } } -- cgit v1.2.3-59-g8ed1b From d2042052a0aa6a54f01a0c9e14243ec040b100e2 Mon Sep 17 00:00:00 2001 From: Giuseppe CAVALLARO Date: Mon, 14 Nov 2016 09:27:28 +0100 Subject: stmmac: update the PTP header file This patch is to update this file by using BIT macros, removing not used defines and fixes some typos. Signed-off-by: Giuseppe Cavallaro Acked-by: Rayagond Kokatanur Acked-by: Alexandre TORGUE Acked-by: Richard Cochran Signed-off-by: David S. Miller --- drivers/net/ethernet/stmicro/stmmac/stmmac_ptp.h | 72 ++++++++++++------------ 1 file changed, 37 insertions(+), 35 deletions(-) diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_ptp.h b/drivers/net/ethernet/stmicro/stmmac/stmmac_ptp.h index 4535df37c227..c06938c47af5 100644 --- a/drivers/net/ethernet/stmicro/stmmac/stmmac_ptp.h +++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_ptp.h @@ -22,51 +22,53 @@ Author: Rayagond Kokatanur ******************************************************************************/ -#ifndef __STMMAC_PTP_H__ -#define __STMMAC_PTP_H__ +#ifndef __STMMAC_PTP_H__ +#define __STMMAC_PTP_H__ -/* IEEE 1588 PTP register offsets */ -#define PTP_TCR 0x0700 /* Timestamp Control Reg */ -#define PTP_SSIR 0x0704 /* Sub-Second Increment Reg */ -#define PTP_STSR 0x0708 /* System Time – Seconds Regr */ -#define PTP_STNSR 0x070C /* System Time – Nanoseconds Reg */ -#define PTP_STSUR 0x0710 /* System Time – Seconds Update Reg */ -#define PTP_STNSUR 0x0714 /* System Time – Nanoseconds Update Reg */ -#define PTP_TAR 0x0718 /* Timestamp Addend Reg */ -#define PTP_TTSR 0x071C /* Target Time Seconds Reg */ -#define PTP_TTNSR 0x0720 /* Target Time Nanoseconds Reg */ -#define PTP_STHWSR 0x0724 /* System Time - Higher Word Seconds Reg */ -#define PTP_TSR 0x0728 /* Timestamp Status */ +#define PTP_GMAC4_OFFSET 0xb00 +#define PTP_GMAC3_X_OFFSET 0x700 -#define PTP_STNSUR_ADDSUB_SHIFT 31 +/* IEEE 1588 PTP register offsets */ +#define PTP_TCR 0x00 /* Timestamp Control Reg */ +#define PTP_SSIR 0x04 /* Sub-Second Increment Reg */ +#define PTP_STSR 0x08 /* System Time – Seconds Regr */ +#define PTP_STNSR 0x0c /* System Time – Nanoseconds Reg */ +#define PTP_STSUR 0x10 /* System Time – Seconds Update Reg */ +#define PTP_STNSUR 0x14 /* System Time – Nanoseconds Update Reg */ +#define PTP_TAR 0x18 /* Timestamp Addend Reg */ -/* PTP TCR defines */ -#define PTP_TCR_TSENA 0x00000001 /* Timestamp Enable */ -#define PTP_TCR_TSCFUPDT 0x00000002 /* Timestamp Fine/Coarse Update */ -#define PTP_TCR_TSINIT 0x00000004 /* Timestamp Initialize */ -#define PTP_TCR_TSUPDT 0x00000008 /* Timestamp Update */ -/* Timestamp Interrupt Trigger Enable */ -#define PTP_TCR_TSTRIG 0x00000010 -#define PTP_TCR_TSADDREG 0x00000020 /* Addend Reg Update */ -#define PTP_TCR_TSENALL 0x00000100 /* Enable Timestamp for All Frames */ -/* Timestamp Digital or Binary Rollover Control */ -#define PTP_TCR_TSCTRLSSR 0x00000200 +#define PTP_STNSUR_ADDSUB_SHIFT 31 +#define PTP_DIGITAL_ROLLOVER_MODE 0x3B9ACA00 /* 10e9-1 ns */ +#define PTP_BINARY_ROLLOVER_MODE 0x80000000 /* ~0.466 ns */ +/* PTP Timestamp control register defines */ +#define PTP_TCR_TSENA BIT(0) /* Timestamp Enable */ +#define PTP_TCR_TSCFUPDT BIT(1) /* Timestamp Fine/Coarse Update */ +#define PTP_TCR_TSINIT BIT(2) /* Timestamp Initialize */ +#define PTP_TCR_TSUPDT BIT(3) /* Timestamp Update */ +#define PTP_TCR_TSTRIG BIT(4) /* Timestamp Interrupt Trigger Enable */ +#define PTP_TCR_TSADDREG BIT(5) /* Addend Reg Update */ +#define PTP_TCR_TSENALL BIT(8) /* Enable Timestamp for All Frames */ +#define PTP_TCR_TSCTRLSSR BIT(9) /* Digital or Binary Rollover Control */ /* Enable PTP packet Processing for Version 2 Format */ -#define PTP_TCR_TSVER2ENA 0x00000400 +#define PTP_TCR_TSVER2ENA BIT(10) /* Enable Processing of PTP over Ethernet Frames */ -#define PTP_TCR_TSIPENA 0x00000800 +#define PTP_TCR_TSIPENA BIT(11) /* Enable Processing of PTP Frames Sent over IPv6-UDP */ -#define PTP_TCR_TSIPV6ENA 0x00001000 +#define PTP_TCR_TSIPV6ENA BIT(12) /* Enable Processing of PTP Frames Sent over IPv4-UDP */ -#define PTP_TCR_TSIPV4ENA 0x00002000 +#define PTP_TCR_TSIPV4ENA BIT(13) /* Enable Timestamp Snapshot for Event Messages */ -#define PTP_TCR_TSEVNTENA 0x00004000 +#define PTP_TCR_TSEVNTENA BIT(14) /* Enable Snapshot for Messages Relevant to Master */ -#define PTP_TCR_TSMSTRENA 0x00008000 +#define PTP_TCR_TSMSTRENA BIT(15) /* Select PTP packets for Taking Snapshots */ -#define PTP_TCR_SNAPTYPSEL_1 0x00010000 +#define PTP_TCR_SNAPTYPSEL_1 GENMASK(17, 16) /* Enable MAC address for PTP Frame Filtering */ -#define PTP_TCR_TSENMACADDR 0x00040000 +#define PTP_TCR_TSENMACADDR BIT(18) + +/* SSIR defines */ +#define PTP_SSIR_SSINC_MASK 0xff +#define GMAC4_PTP_SSIR_SSINC_SHIFT 16 -#endif /* __STMMAC_PTP_H__ */ +#endif /* __STMMAC_PTP_H__ */ -- cgit v1.2.3-59-g8ed1b From ba1ffd74df74a9efa5290f87632a0ed55f1aa387 Mon Sep 17 00:00:00 2001 From: Giuseppe CAVALLARO Date: Mon, 14 Nov 2016 09:27:29 +0100 Subject: stmmac: fix PTP support for GMAC4 Due to bad management of the descriptors, when use ptp4l, kernel panics as shown below: ----------------------------------------------------------- Unable to handle kernel NULL pointer dereference at virtual address 000001ac ... Internal error: Oops: 17 [#1] SMP ARM ... Hardware name: STi SoC with Flattened Device Tree task: c0c05e80 task.stack: c0c00000 PC is at dwmac4_wrback_get_tx_timestamp_status+0x0/0xc LR is at stmmac_tx_clean+0x2f8/0x4d4 ----------------------------------------------------------- In case of GMAC4 the extended descriptor pointers were used for getting the timestamp. These are NULL for this HW, and the normal ones must be used. The PTP also had problems on this chip due to the bad register management and issues on the algo adopted to setup the PTP and getting the timestamp values from the descriptors. Signed-off-by: Giuseppe Cavallaro Acked-by: Rayagond Kokatanur Acked-by: Alexandre TORGUE Acked-by: Richard Cochran Signed-off-by: David S. Miller --- drivers/net/ethernet/stmicro/stmmac/common.h | 5 +- drivers/net/ethernet/stmicro/stmmac/dwmac4_descs.c | 68 ++++++++++++--- drivers/net/ethernet/stmicro/stmmac/dwmac4_descs.h | 4 + drivers/net/ethernet/stmicro/stmmac/stmmac.h | 1 + .../net/ethernet/stmicro/stmmac/stmmac_hwtstamp.c | 43 ++++++++-- drivers/net/ethernet/stmicro/stmmac/stmmac_main.c | 97 +++++++++++----------- drivers/net/ethernet/stmicro/stmmac/stmmac_ptp.c | 9 +- 7 files changed, 154 insertions(+), 73 deletions(-) diff --git a/drivers/net/ethernet/stmicro/stmmac/common.h b/drivers/net/ethernet/stmicro/stmmac/common.h index d3292c4a6eda..6fc214ce2958 100644 --- a/drivers/net/ethernet/stmicro/stmmac/common.h +++ b/drivers/net/ethernet/stmicro/stmmac/common.h @@ -482,11 +482,12 @@ struct stmmac_ops { /* PTP and HW Timer helpers */ struct stmmac_hwtimestamp { void (*config_hw_tstamping) (void __iomem *ioaddr, u32 data); - u32 (*config_sub_second_increment) (void __iomem *ioaddr, u32 clk_rate); + u32 (*config_sub_second_increment)(void __iomem *ioaddr, u32 ptp_clock, + int gmac4); int (*init_systime) (void __iomem *ioaddr, u32 sec, u32 nsec); int (*config_addend) (void __iomem *ioaddr, u32 addend); int (*adjust_systime) (void __iomem *ioaddr, u32 sec, u32 nsec, - int add_sub); + int add_sub, int gmac4); u64(*get_systime) (void __iomem *ioaddr); }; diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac4_descs.c b/drivers/net/ethernet/stmicro/stmmac/dwmac4_descs.c index a1b17cd7886b..2ef2f0c03e76 100644 --- a/drivers/net/ethernet/stmicro/stmmac/dwmac4_descs.c +++ b/drivers/net/ethernet/stmicro/stmmac/dwmac4_descs.c @@ -204,14 +204,18 @@ static void dwmac4_rd_enable_tx_timestamp(struct dma_desc *p) static int dwmac4_wrback_get_tx_timestamp_status(struct dma_desc *p) { - return (p->des3 & TDES3_TIMESTAMP_STATUS) - >> TDES3_TIMESTAMP_STATUS_SHIFT; + /* Context type from W/B descriptor must be zero */ + if (p->des3 & TDES3_CONTEXT_TYPE) + return -EINVAL; + + /* Tx Timestamp Status is 1 so des0 and des1'll have valid values */ + if (p->des3 & TDES3_TIMESTAMP_STATUS) + return 0; + + return 1; } -/* NOTE: For RX CTX bit has to be checked before - * HAVE a specific function for TX and another one for RX - */ -static u64 dwmac4_wrback_get_timestamp(void *desc, u32 ats) +static inline u64 dwmac4_get_timestamp(void *desc, u32 ats) { struct dma_desc *p = (struct dma_desc *)desc; u64 ns; @@ -223,12 +227,54 @@ static u64 dwmac4_wrback_get_timestamp(void *desc, u32 ats) return ns; } -static int dwmac4_context_get_rx_timestamp_status(void *desc, u32 ats) +static int dwmac4_rx_check_timestamp(void *desc) +{ + struct dma_desc *p = (struct dma_desc *)desc; + u32 own, ctxt; + int ret = 1; + + own = p->des3 & RDES3_OWN; + ctxt = ((p->des3 & RDES3_CONTEXT_DESCRIPTOR) + >> RDES3_CONTEXT_DESCRIPTOR_SHIFT); + + if (likely(!own && ctxt)) { + if ((p->des0 == 0xffffffff) && (p->des1 == 0xffffffff)) + /* Corrupted value */ + ret = -EINVAL; + else + /* A valid Timestamp is ready to be read */ + ret = 0; + } + + /* Timestamp not ready */ + return ret; +} + +static int dwmac4_wrback_get_rx_timestamp_status(void *desc, u32 ats) { struct dma_desc *p = (struct dma_desc *)desc; + int ret = -EINVAL; + + /* Get the status from normal w/b descriptor */ + if (likely(p->des3 & TDES3_RS1V)) { + if (likely(p->des1 & RDES1_TIMESTAMP_AVAILABLE)) { + int i = 0; + + /* Check if timestamp is OK from context descriptor */ + do { + ret = dwmac4_rx_check_timestamp(desc); + if (ret < 0) + goto exit; + i++; - return (p->des1 & RDES1_TIMESTAMP_AVAILABLE) - >> RDES1_TIMESTAMP_AVAILABLE_SHIFT; + } while ((ret == 1) || (i < 10)); + + if (i == 10) + ret = -EBUSY; + } + } +exit: + return ret; } static void dwmac4_rd_init_rx_desc(struct dma_desc *p, int disable_rx_ic, @@ -373,8 +419,8 @@ const struct stmmac_desc_ops dwmac4_desc_ops = { .get_rx_frame_len = dwmac4_wrback_get_rx_frame_len, .enable_tx_timestamp = dwmac4_rd_enable_tx_timestamp, .get_tx_timestamp_status = dwmac4_wrback_get_tx_timestamp_status, - .get_timestamp = dwmac4_wrback_get_timestamp, - .get_rx_timestamp_status = dwmac4_context_get_rx_timestamp_status, + .get_rx_timestamp_status = dwmac4_wrback_get_rx_timestamp_status, + .get_timestamp = dwmac4_get_timestamp, .set_tx_ic = dwmac4_rd_set_tx_ic, .prepare_tx_desc = dwmac4_rd_prepare_tx_desc, .prepare_tso_tx_desc = dwmac4_rd_prepare_tso_tx_desc, diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac4_descs.h b/drivers/net/ethernet/stmicro/stmmac/dwmac4_descs.h index 0902a2edeaa9..9736c505211a 100644 --- a/drivers/net/ethernet/stmicro/stmmac/dwmac4_descs.h +++ b/drivers/net/ethernet/stmicro/stmmac/dwmac4_descs.h @@ -59,10 +59,13 @@ #define TDES3_CTXT_TCMSSV BIT(26) /* TDES3 Common */ +#define TDES3_RS1V BIT(26) +#define TDES3_RS1V_SHIFT 26 #define TDES3_LAST_DESCRIPTOR BIT(28) #define TDES3_LAST_DESCRIPTOR_SHIFT 28 #define TDES3_FIRST_DESCRIPTOR BIT(29) #define TDES3_CONTEXT_TYPE BIT(30) +#define TDES3_CONTEXT_TYPE_SHIFT 30 /* TDS3 use for both format (read and write back) */ #define TDES3_OWN BIT(31) @@ -117,6 +120,7 @@ #define RDES3_LAST_DESCRIPTOR BIT(28) #define RDES3_FIRST_DESCRIPTOR BIT(29) #define RDES3_CONTEXT_DESCRIPTOR BIT(30) +#define RDES3_CONTEXT_DESCRIPTOR_SHIFT 30 /* RDES3 (read format) */ #define RDES3_BUFFER1_VALID_ADDR BIT(24) diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac.h b/drivers/net/ethernet/stmicro/stmmac/stmmac.h index b15fc55f1b96..4d2a759b8465 100644 --- a/drivers/net/ethernet/stmicro/stmmac/stmmac.h +++ b/drivers/net/ethernet/stmicro/stmmac/stmmac.h @@ -129,6 +129,7 @@ struct stmmac_priv { int irq_wake; spinlock_t ptp_lock; void __iomem *mmcaddr; + void __iomem *ptpaddr; u32 rx_tail_addr; u32 tx_tail_addr; u32 mss; diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_hwtstamp.c b/drivers/net/ethernet/stmicro/stmmac/stmmac_hwtstamp.c index a77f68918010..10d6059b2f26 100644 --- a/drivers/net/ethernet/stmicro/stmmac/stmmac_hwtstamp.c +++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_hwtstamp.c @@ -34,21 +34,29 @@ static void stmmac_config_hw_tstamping(void __iomem *ioaddr, u32 data) } static u32 stmmac_config_sub_second_increment(void __iomem *ioaddr, - u32 ptp_clock) + u32 ptp_clock, int gmac4) { u32 value = readl(ioaddr + PTP_TCR); unsigned long data; - /* Convert the ptp_clock to nano second - * formula = (2/ptp_clock) * 1000000000 - * where, ptp_clock = 50MHz. + /* For GMAC3.x, 4.x versions, convert the ptp_clock to nano second + * formula = (1/ptp_clock) * 1000000000 + * where ptp_clock is 50MHz if fine method is used to update system */ - data = (2000000000ULL / ptp_clock); + if (value & PTP_TCR_TSCFUPDT) + data = (1000000000ULL / 50000000); + else + data = (1000000000ULL / ptp_clock); /* 0.465ns accuracy */ if (!(value & PTP_TCR_TSCTRLSSR)) data = (data * 1000) / 465; + data &= PTP_SSIR_SSINC_MASK; + + if (gmac4) + data = data << GMAC4_PTP_SSIR_SSINC_SHIFT; + writel(data, ioaddr + PTP_SSIR); return data; @@ -104,14 +112,30 @@ static int stmmac_config_addend(void __iomem *ioaddr, u32 addend) } static int stmmac_adjust_systime(void __iomem *ioaddr, u32 sec, u32 nsec, - int add_sub) + int add_sub, int gmac4) { u32 value; int limit; + if (add_sub) { + /* If the new sec value needs to be subtracted with + * the system time, then MAC_STSUR reg should be + * programmed with (2^32 – ) + */ + if (gmac4) + sec = (100000000ULL - sec); + + value = readl(ioaddr + PTP_TCR); + if (value & PTP_TCR_TSCTRLSSR) + nsec = (PTP_DIGITAL_ROLLOVER_MODE - nsec); + else + nsec = (PTP_BINARY_ROLLOVER_MODE - nsec); + } + writel(sec, ioaddr + PTP_STSUR); - writel(((add_sub << PTP_STNSUR_ADDSUB_SHIFT) | nsec), - ioaddr + PTP_STNSUR); + value = (add_sub << PTP_STNSUR_ADDSUB_SHIFT) | nsec; + writel(value, ioaddr + PTP_STNSUR); + /* issue command to initialize the system time value */ value = readl(ioaddr + PTP_TCR); value |= PTP_TCR_TSUPDT; @@ -134,8 +158,9 @@ static u64 stmmac_get_systime(void __iomem *ioaddr) { u64 ns; + /* Get the TSSS value */ ns = readl(ioaddr + PTP_STNSR); - /* convert sec time value to nanosecond */ + /* Get the TSS and convert sec time value to nanosecond */ ns += readl(ioaddr + PTP_STSR) * 1000000000ULL; return ns; diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c index e2c94ec4edd0..1f9ec02fa7f8 100644 --- a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c +++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c @@ -340,18 +340,17 @@ out: /* stmmac_get_tx_hwtstamp - get HW TX timestamps * @priv: driver private structure - * @entry : descriptor index to be used. + * @p : descriptor pointer * @skb : the socket buffer * Description : * This function will read timestamp from the descriptor & pass it to stack. * and also perform some sanity checks. */ static void stmmac_get_tx_hwtstamp(struct stmmac_priv *priv, - unsigned int entry, struct sk_buff *skb) + struct dma_desc *p, struct sk_buff *skb) { struct skb_shared_hwtstamps shhwtstamp; u64 ns; - void *desc = NULL; if (!priv->hwts_tx_en) return; @@ -360,58 +359,55 @@ static void stmmac_get_tx_hwtstamp(struct stmmac_priv *priv, if (likely(!skb || !(skb_shinfo(skb)->tx_flags & SKBTX_IN_PROGRESS))) return; - if (priv->adv_ts) - desc = (priv->dma_etx + entry); - else - desc = (priv->dma_tx + entry); - /* check tx tstamp status */ - if (!priv->hw->desc->get_tx_timestamp_status((struct dma_desc *)desc)) - return; + if (!priv->hw->desc->get_tx_timestamp_status(p)) { + /* get the valid tstamp */ + ns = priv->hw->desc->get_timestamp(p, priv->adv_ts); - /* get the valid tstamp */ - ns = priv->hw->desc->get_timestamp(desc, priv->adv_ts); + memset(&shhwtstamp, 0, sizeof(struct skb_shared_hwtstamps)); + shhwtstamp.hwtstamp = ns_to_ktime(ns); - memset(&shhwtstamp, 0, sizeof(struct skb_shared_hwtstamps)); - shhwtstamp.hwtstamp = ns_to_ktime(ns); - /* pass tstamp to stack */ - skb_tstamp_tx(skb, &shhwtstamp); + netdev_info(priv->dev, "get valid TX hw timestamp %llu\n", ns); + /* pass tstamp to stack */ + skb_tstamp_tx(skb, &shhwtstamp); + } return; } /* stmmac_get_rx_hwtstamp - get HW RX timestamps * @priv: driver private structure - * @entry : descriptor index to be used. + * @p : descriptor pointer + * @np : next descriptor pointer * @skb : the socket buffer * Description : * This function will read received packet's timestamp from the descriptor * and pass it to stack. It also perform some sanity checks. */ -static void stmmac_get_rx_hwtstamp(struct stmmac_priv *priv, - unsigned int entry, struct sk_buff *skb) +static void stmmac_get_rx_hwtstamp(struct stmmac_priv *priv, struct dma_desc *p, + struct dma_desc *np, struct sk_buff *skb) { struct skb_shared_hwtstamps *shhwtstamp = NULL; u64 ns; - void *desc = NULL; if (!priv->hwts_rx_en) return; - if (priv->adv_ts) - desc = (priv->dma_erx + entry); - else - desc = (priv->dma_rx + entry); - - /* exit if rx tstamp is not valid */ - if (!priv->hw->desc->get_rx_timestamp_status(desc, priv->adv_ts)) - return; + /* Check if timestamp is available */ + if (!priv->hw->desc->get_rx_timestamp_status(p, priv->adv_ts)) { + /* For GMAC4, the valid timestamp is from CTX next desc. */ + if (priv->plat->has_gmac4) + ns = priv->hw->desc->get_timestamp(np, priv->adv_ts); + else + ns = priv->hw->desc->get_timestamp(p, priv->adv_ts); - /* get valid tstamp */ - ns = priv->hw->desc->get_timestamp(desc, priv->adv_ts); - shhwtstamp = skb_hwtstamps(skb); - memset(shhwtstamp, 0, sizeof(struct skb_shared_hwtstamps)); - shhwtstamp->hwtstamp = ns_to_ktime(ns); + netdev_info(priv->dev, "get valid RX hw timestamp %llu\n", ns); + shhwtstamp = skb_hwtstamps(skb); + memset(shhwtstamp, 0, sizeof(struct skb_shared_hwtstamps)); + shhwtstamp->hwtstamp = ns_to_ktime(ns); + } else { + netdev_err(priv->dev, "cannot get RX hw timestamp\n"); + } } /** @@ -598,17 +594,18 @@ static int stmmac_hwtstamp_ioctl(struct net_device *dev, struct ifreq *ifr) priv->hwts_tx_en = config.tx_type == HWTSTAMP_TX_ON; if (!priv->hwts_tx_en && !priv->hwts_rx_en) - priv->hw->ptp->config_hw_tstamping(priv->ioaddr, 0); + priv->hw->ptp->config_hw_tstamping(priv->ptpaddr, 0); else { value = (PTP_TCR_TSENA | PTP_TCR_TSCFUPDT | PTP_TCR_TSCTRLSSR | tstamp_all | ptp_v2 | ptp_over_ethernet | ptp_over_ipv6_udp | ptp_over_ipv4_udp | ts_event_en | ts_master_en | snap_type_sel); - priv->hw->ptp->config_hw_tstamping(priv->ioaddr, value); + priv->hw->ptp->config_hw_tstamping(priv->ptpaddr, value); /* program Sub Second Increment reg */ sec_inc = priv->hw->ptp->config_sub_second_increment( - priv->ioaddr, priv->clk_ptp_rate); + priv->ptpaddr, priv->clk_ptp_rate, + priv->plat->has_gmac4); temp = div_u64(1000000000ULL, sec_inc); /* calculate default added value: @@ -618,14 +615,14 @@ static int stmmac_hwtstamp_ioctl(struct net_device *dev, struct ifreq *ifr) */ temp = (u64)(temp << 32); priv->default_addend = div_u64(temp, priv->clk_ptp_rate); - priv->hw->ptp->config_addend(priv->ioaddr, + priv->hw->ptp->config_addend(priv->ptpaddr, priv->default_addend); /* initialize system time */ ktime_get_real_ts64(&now); /* lower 32 bits of tv_sec are safe until y2106 */ - priv->hw->ptp->init_systime(priv->ioaddr, (u32)now.tv_sec, + priv->hw->ptp->init_systime(priv->ptpaddr, (u32)now.tv_sec, now.tv_nsec); } @@ -1340,7 +1337,7 @@ static void stmmac_tx_clean(struct stmmac_priv *priv) priv->dev->stats.tx_packets++; priv->xstats.tx_pkt_n++; } - stmmac_get_tx_hwtstamp(priv, entry, skb); + stmmac_get_tx_hwtstamp(priv, p, skb); } if (likely(priv->tx_skbuff_dma[entry].buf)) { @@ -1486,10 +1483,13 @@ static void stmmac_mmc_setup(struct stmmac_priv *priv) unsigned int mode = MMC_CNTRL_RESET_ON_READ | MMC_CNTRL_COUNTER_RESET | MMC_CNTRL_PRESET | MMC_CNTRL_FULL_HALF_PRESET; - if (priv->synopsys_id >= DWMAC_CORE_4_00) + if (priv->synopsys_id >= DWMAC_CORE_4_00) { + priv->ptpaddr = priv->ioaddr + PTP_GMAC4_OFFSET; priv->mmcaddr = priv->ioaddr + MMC_GMAC4_OFFSET; - else + } else { + priv->ptpaddr = priv->ioaddr + PTP_GMAC3_X_OFFSET; priv->mmcaddr = priv->ioaddr + MMC_GMAC3_X_OFFSET; + } dwmac_mmc_intr_all_mask(priv->mmcaddr); @@ -2484,7 +2484,7 @@ static int stmmac_rx(struct stmmac_priv *priv, int limit) if (netif_msg_rx_status(priv)) { void *rx_head; - pr_debug("%s: descriptor ring:\n", __func__); + pr_info(">>>>>> %s: descriptor ring:\n", __func__); if (priv->extend_desc) rx_head = (void *)priv->dma_erx; else @@ -2495,6 +2495,7 @@ static int stmmac_rx(struct stmmac_priv *priv, int limit) while (count < limit) { int status; struct dma_desc *p; + struct dma_desc *np; if (priv->extend_desc) p = (struct dma_desc *)(priv->dma_erx + entry); @@ -2514,9 +2515,11 @@ static int stmmac_rx(struct stmmac_priv *priv, int limit) next_entry = priv->cur_rx; if (priv->extend_desc) - prefetch(priv->dma_erx + next_entry); + np = (struct dma_desc *)(priv->dma_erx + next_entry); else - prefetch(priv->dma_rx + next_entry); + np = priv->dma_rx + next_entry; + + prefetch(np); if ((priv->extend_desc) && (priv->hw->desc->rx_extended_status)) priv->hw->desc->rx_extended_status(&priv->dev->stats, @@ -2568,7 +2571,7 @@ static int stmmac_rx(struct stmmac_priv *priv, int limit) frame_len -= ETH_FCS_LEN; if (netif_msg_rx_status(priv)) { - pr_debug("\tdesc: %p [entry %d] buff=0x%x\n", + pr_info("\tdesc: %p [entry %d] buff=0x%x\n", p, entry, des); if (frame_len > ETH_FRAME_LEN) pr_debug("\tframe size %d, COE: %d\n", @@ -2625,13 +2628,13 @@ static int stmmac_rx(struct stmmac_priv *priv, int limit) DMA_FROM_DEVICE); } - stmmac_get_rx_hwtstamp(priv, entry, skb); - if (netif_msg_pktdata(priv)) { pr_debug("frame received (%dbytes)", frame_len); print_pkt(skb->data, frame_len); } + stmmac_get_rx_hwtstamp(priv, p, np, skb); + stmmac_rx_vlan(priv->dev, skb); skb->protocol = eth_type_trans(skb, priv->dev); diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_ptp.c b/drivers/net/ethernet/stmicro/stmmac/stmmac_ptp.c index 1477471f8d44..3eb281d1db08 100644 --- a/drivers/net/ethernet/stmicro/stmmac/stmmac_ptp.c +++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_ptp.c @@ -54,7 +54,7 @@ static int stmmac_adjust_freq(struct ptp_clock_info *ptp, s32 ppb) spin_lock_irqsave(&priv->ptp_lock, flags); - priv->hw->ptp->config_addend(priv->ioaddr, addend); + priv->hw->ptp->config_addend(priv->ptpaddr, addend); spin_unlock_irqrestore(&priv->ptp_lock, flags); @@ -89,7 +89,8 @@ static int stmmac_adjust_time(struct ptp_clock_info *ptp, s64 delta) spin_lock_irqsave(&priv->ptp_lock, flags); - priv->hw->ptp->adjust_systime(priv->ioaddr, sec, nsec, neg_adj); + priv->hw->ptp->adjust_systime(priv->ptpaddr, sec, nsec, neg_adj, + priv->plat->has_gmac4); spin_unlock_irqrestore(&priv->ptp_lock, flags); @@ -114,7 +115,7 @@ static int stmmac_get_time(struct ptp_clock_info *ptp, struct timespec64 *ts) spin_lock_irqsave(&priv->ptp_lock, flags); - ns = priv->hw->ptp->get_systime(priv->ioaddr); + ns = priv->hw->ptp->get_systime(priv->ptpaddr); spin_unlock_irqrestore(&priv->ptp_lock, flags); @@ -141,7 +142,7 @@ static int stmmac_set_time(struct ptp_clock_info *ptp, spin_lock_irqsave(&priv->ptp_lock, flags); - priv->hw->ptp->init_systime(priv->ioaddr, ts->tv_sec, ts->tv_nsec); + priv->hw->ptp->init_systime(priv->ptpaddr, ts->tv_sec, ts->tv_nsec); spin_unlock_irqrestore(&priv->ptp_lock, flags); -- cgit v1.2.3-59-g8ed1b From ee112c12ebd22baca85812175008ef584250e415 Mon Sep 17 00:00:00 2001 From: Giuseppe CAVALLARO Date: Mon, 14 Nov 2016 09:27:30 +0100 Subject: stmmac: fix PTP type ethtool stats This patch fixes the ethtool stats for PTP frames; previous version does not take care about some message types: i.e. announce, management and signaling. It also provided a broken statistic in case of "No PTP message received". Signed-off-by: Giuseppe Cavallaro Acked-by: Rayagond Kokatanur Acked-by: Alexandre TORGUE Acked-by: Richard Cochran Signed-off-by: David S. Miller --- drivers/net/ethernet/stmicro/stmmac/common.h | 19 ++++++++------- drivers/net/ethernet/stmicro/stmmac/descs.h | 20 +++++++++------- drivers/net/ethernet/stmicro/stmmac/dwmac4_descs.c | 27 +++++++++++++-------- drivers/net/ethernet/stmicro/stmmac/enh_desc.c | 28 ++++++++++++++-------- .../net/ethernet/stmicro/stmmac/stmmac_ethtool.c | 19 ++++++++------- 5 files changed, 69 insertions(+), 44 deletions(-) diff --git a/drivers/net/ethernet/stmicro/stmmac/common.h b/drivers/net/ethernet/stmicro/stmmac/common.h index 6fc214ce2958..6d2de4e01f6d 100644 --- a/drivers/net/ethernet/stmicro/stmmac/common.h +++ b/drivers/net/ethernet/stmicro/stmmac/common.h @@ -120,14 +120,17 @@ struct stmmac_extra_stats { unsigned long ip_csum_bypassed; unsigned long ipv4_pkt_rcvd; unsigned long ipv6_pkt_rcvd; - unsigned long rx_msg_type_ext_no_ptp; - unsigned long rx_msg_type_sync; - unsigned long rx_msg_type_follow_up; - unsigned long rx_msg_type_delay_req; - unsigned long rx_msg_type_delay_resp; - unsigned long rx_msg_type_pdelay_req; - unsigned long rx_msg_type_pdelay_resp; - unsigned long rx_msg_type_pdelay_follow_up; + unsigned long no_ptp_rx_msg_type_ext; + unsigned long ptp_rx_msg_type_sync; + unsigned long ptp_rx_msg_type_follow_up; + unsigned long ptp_rx_msg_type_delay_req; + unsigned long ptp_rx_msg_type_delay_resp; + unsigned long ptp_rx_msg_type_pdelay_req; + unsigned long ptp_rx_msg_type_pdelay_resp; + unsigned long ptp_rx_msg_type_pdelay_follow_up; + unsigned long ptp_rx_msg_type_announce; + unsigned long ptp_rx_msg_type_management; + unsigned long ptp_rx_msg_pkt_reserved_type; unsigned long ptp_frame_type; unsigned long ptp_ver; unsigned long timestamp_dropped; diff --git a/drivers/net/ethernet/stmicro/stmmac/descs.h b/drivers/net/ethernet/stmicro/stmmac/descs.h index 2e4c171a2b41..e3c86d422109 100644 --- a/drivers/net/ethernet/stmicro/stmmac/descs.h +++ b/drivers/net/ethernet/stmicro/stmmac/descs.h @@ -155,14 +155,18 @@ #define ERDES4_L3_L4_FILT_NO_MATCH_MASK GENMASK(27, 26) /* Extended RDES4 message type definitions */ -#define RDES_EXT_NO_PTP 0 -#define RDES_EXT_SYNC 1 -#define RDES_EXT_FOLLOW_UP 2 -#define RDES_EXT_DELAY_REQ 3 -#define RDES_EXT_DELAY_RESP 4 -#define RDES_EXT_PDELAY_REQ 5 -#define RDES_EXT_PDELAY_RESP 6 -#define RDES_EXT_PDELAY_FOLLOW_UP 7 +#define RDES_EXT_NO_PTP 0x0 +#define RDES_EXT_SYNC 0x1 +#define RDES_EXT_FOLLOW_UP 0x2 +#define RDES_EXT_DELAY_REQ 0x3 +#define RDES_EXT_DELAY_RESP 0x4 +#define RDES_EXT_PDELAY_REQ 0x5 +#define RDES_EXT_PDELAY_RESP 0x6 +#define RDES_EXT_PDELAY_FOLLOW_UP 0x7 +#define RDES_PTP_ANNOUNCE 0x8 +#define RDES_PTP_MANAGEMENT 0x9 +#define RDES_PTP_SIGNALING 0xa +#define RDES_PTP_PKT_RESERVED_TYPE 0xf /* Basic descriptor structure for normal and alternate descriptors */ struct dma_desc { diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac4_descs.c b/drivers/net/ethernet/stmicro/stmmac/dwmac4_descs.c index 2ef2f0c03e76..a601f8d43b75 100644 --- a/drivers/net/ethernet/stmicro/stmmac/dwmac4_descs.c +++ b/drivers/net/ethernet/stmicro/stmmac/dwmac4_descs.c @@ -123,22 +123,29 @@ static int dwmac4_wrback_get_rx_status(void *data, struct stmmac_extra_stats *x, x->ipv4_pkt_rcvd++; if (rdes1 & RDES1_IPV6_HEADER) x->ipv6_pkt_rcvd++; - if (message_type == RDES_EXT_SYNC) - x->rx_msg_type_sync++; + + if (message_type == RDES_EXT_NO_PTP) + x->no_ptp_rx_msg_type_ext++; + else if (message_type == RDES_EXT_SYNC) + x->ptp_rx_msg_type_sync++; else if (message_type == RDES_EXT_FOLLOW_UP) - x->rx_msg_type_follow_up++; + x->ptp_rx_msg_type_follow_up++; else if (message_type == RDES_EXT_DELAY_REQ) - x->rx_msg_type_delay_req++; + x->ptp_rx_msg_type_delay_req++; else if (message_type == RDES_EXT_DELAY_RESP) - x->rx_msg_type_delay_resp++; + x->ptp_rx_msg_type_delay_resp++; else if (message_type == RDES_EXT_PDELAY_REQ) - x->rx_msg_type_pdelay_req++; + x->ptp_rx_msg_type_pdelay_req++; else if (message_type == RDES_EXT_PDELAY_RESP) - x->rx_msg_type_pdelay_resp++; + x->ptp_rx_msg_type_pdelay_resp++; else if (message_type == RDES_EXT_PDELAY_FOLLOW_UP) - x->rx_msg_type_pdelay_follow_up++; - else - x->rx_msg_type_ext_no_ptp++; + x->ptp_rx_msg_type_pdelay_follow_up++; + else if (message_type == RDES_PTP_ANNOUNCE) + x->ptp_rx_msg_type_announce++; + else if (message_type == RDES_PTP_MANAGEMENT) + x->ptp_rx_msg_type_management++; + else if (message_type == RDES_PTP_PKT_RESERVED_TYPE) + x->ptp_rx_msg_pkt_reserved_type++; if (rdes1 & RDES1_PTP_PACKET_TYPE) x->ptp_frame_type++; diff --git a/drivers/net/ethernet/stmicro/stmmac/enh_desc.c b/drivers/net/ethernet/stmicro/stmmac/enh_desc.c index 38f19c99cf59..e75549327c34 100644 --- a/drivers/net/ethernet/stmicro/stmmac/enh_desc.c +++ b/drivers/net/ethernet/stmicro/stmmac/enh_desc.c @@ -150,22 +150,30 @@ static void enh_desc_get_ext_status(void *data, struct stmmac_extra_stats *x, x->ipv4_pkt_rcvd++; if (rdes4 & ERDES4_IPV6_PKT_RCVD) x->ipv6_pkt_rcvd++; - if (message_type == RDES_EXT_SYNC) - x->rx_msg_type_sync++; + + if (message_type == RDES_EXT_NO_PTP) + x->no_ptp_rx_msg_type_ext++; + else if (message_type == RDES_EXT_SYNC) + x->ptp_rx_msg_type_sync++; else if (message_type == RDES_EXT_FOLLOW_UP) - x->rx_msg_type_follow_up++; + x->ptp_rx_msg_type_follow_up++; else if (message_type == RDES_EXT_DELAY_REQ) - x->rx_msg_type_delay_req++; + x->ptp_rx_msg_type_delay_req++; else if (message_type == RDES_EXT_DELAY_RESP) - x->rx_msg_type_delay_resp++; + x->ptp_rx_msg_type_delay_resp++; else if (message_type == RDES_EXT_PDELAY_REQ) - x->rx_msg_type_pdelay_req++; + x->ptp_rx_msg_type_pdelay_req++; else if (message_type == RDES_EXT_PDELAY_RESP) - x->rx_msg_type_pdelay_resp++; + x->ptp_rx_msg_type_pdelay_resp++; else if (message_type == RDES_EXT_PDELAY_FOLLOW_UP) - x->rx_msg_type_pdelay_follow_up++; - else - x->rx_msg_type_ext_no_ptp++; + x->ptp_rx_msg_type_pdelay_follow_up++; + else if (message_type == RDES_PTP_ANNOUNCE) + x->ptp_rx_msg_type_announce++; + else if (message_type == RDES_PTP_MANAGEMENT) + x->ptp_rx_msg_type_management++; + else if (message_type == RDES_PTP_PKT_RESERVED_TYPE) + x->ptp_rx_msg_pkt_reserved_type++; + if (rdes4 & ERDES4_PTP_FRAME_TYPE) x->ptp_frame_type++; if (rdes4 & ERDES4_PTP_VER) diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_ethtool.c b/drivers/net/ethernet/stmicro/stmmac/stmmac_ethtool.c index 1e06173fc9d7..c5d0142adda2 100644 --- a/drivers/net/ethernet/stmicro/stmmac/stmmac_ethtool.c +++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_ethtool.c @@ -115,14 +115,17 @@ static const struct stmmac_stats stmmac_gstrings_stats[] = { STMMAC_STAT(ip_csum_bypassed), STMMAC_STAT(ipv4_pkt_rcvd), STMMAC_STAT(ipv6_pkt_rcvd), - STMMAC_STAT(rx_msg_type_ext_no_ptp), - STMMAC_STAT(rx_msg_type_sync), - STMMAC_STAT(rx_msg_type_follow_up), - STMMAC_STAT(rx_msg_type_delay_req), - STMMAC_STAT(rx_msg_type_delay_resp), - STMMAC_STAT(rx_msg_type_pdelay_req), - STMMAC_STAT(rx_msg_type_pdelay_resp), - STMMAC_STAT(rx_msg_type_pdelay_follow_up), + STMMAC_STAT(no_ptp_rx_msg_type_ext), + STMMAC_STAT(ptp_rx_msg_type_sync), + STMMAC_STAT(ptp_rx_msg_type_follow_up), + STMMAC_STAT(ptp_rx_msg_type_delay_req), + STMMAC_STAT(ptp_rx_msg_type_delay_resp), + STMMAC_STAT(ptp_rx_msg_type_pdelay_req), + STMMAC_STAT(ptp_rx_msg_type_pdelay_resp), + STMMAC_STAT(ptp_rx_msg_type_pdelay_follow_up), + STMMAC_STAT(ptp_rx_msg_type_announce), + STMMAC_STAT(ptp_rx_msg_type_management), + STMMAC_STAT(ptp_rx_msg_pkt_reserved_type), STMMAC_STAT(ptp_frame_type), STMMAC_STAT(ptp_ver), STMMAC_STAT(timestamp_dropped), -- cgit v1.2.3-59-g8ed1b From c7a4e3d8c0d43a4f31f8b2ccf476e5a26eb85142 Mon Sep 17 00:00:00 2001 From: Alexander Kochetkov Date: Mon, 14 Nov 2016 16:32:52 +0300 Subject: net: arc_emac: annonce IFF_MULTICAST support Multicast support was implemented by commit 775dd682e2b0ec7 ('arc_emac: implement promiscuous mode and multicast filtering'). It can be enabled explicity using 'ifconfig eth0 multicast'. The patch is needed in order to remove explicit configuration as most devices has multicast mode enabled by default. Signed-off-by: Alexander Kochetkov Signed-off-by: David S. Miller --- drivers/net/ethernet/arc/emac_main.c | 2 -- 1 file changed, 2 deletions(-) diff --git a/drivers/net/ethernet/arc/emac_main.c b/drivers/net/ethernet/arc/emac_main.c index b0da9693f28a..2e4ee86a7e51 100644 --- a/drivers/net/ethernet/arc/emac_main.c +++ b/drivers/net/ethernet/arc/emac_main.c @@ -764,8 +764,6 @@ int arc_emac_probe(struct net_device *ndev, int interface) ndev->netdev_ops = &arc_emac_netdev_ops; ndev->ethtool_ops = &arc_emac_ethtool_ops; ndev->watchdog_timeo = TX_TIMEOUT; - /* FIXME :: no multicast support yet */ - ndev->flags &= ~IFF_MULTICAST; priv = netdev_priv(ndev); priv->dev = dev; -- cgit v1.2.3-59-g8ed1b From d0e3f65b34c528ec2b7d1ba9a620b483f71788d3 Mon Sep 17 00:00:00 2001 From: Alexander Kochetkov Date: Mon, 14 Nov 2016 16:32:53 +0300 Subject: net: arc_emac: don't pass multicast packets to kernel in non-multicast mode The patch disable capturing multicast packets when multicast mode disabled for ethernet ('ifconfig eth0 -multicast'). In that case no multicast packet will be passed to kernel. Signed-off-by: Alexander Kochetkov Signed-off-by: David S. Miller --- drivers/net/ethernet/arc/emac_main.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/drivers/net/ethernet/arc/emac_main.c b/drivers/net/ethernet/arc/emac_main.c index 2e4ee86a7e51..be865b4dada2 100644 --- a/drivers/net/ethernet/arc/emac_main.c +++ b/drivers/net/ethernet/arc/emac_main.c @@ -460,7 +460,7 @@ static void arc_emac_set_rx_mode(struct net_device *ndev) if (ndev->flags & IFF_ALLMULTI) { arc_reg_set(priv, R_LAFL, ~0); arc_reg_set(priv, R_LAFH, ~0); - } else { + } else if (ndev->flags & IFF_MULTICAST) { struct netdev_hw_addr *ha; unsigned int filter[2] = { 0, 0 }; int bit; @@ -472,6 +472,9 @@ static void arc_emac_set_rx_mode(struct net_device *ndev) arc_reg_set(priv, R_LAFL, filter[0]); arc_reg_set(priv, R_LAFH, filter[1]); + } else { + arc_reg_set(priv, R_LAFL, 0); + arc_reg_set(priv, R_LAFH, 0); } } } -- cgit v1.2.3-59-g8ed1b From 73e2d5e34b6cdd1080038daf3d6d6d744a9eefe6 Mon Sep 17 00:00:00 2001 From: Pablo Neira Date: Mon, 14 Nov 2016 23:40:30 +0100 Subject: udp: restore UDPlite many-cast delivery Honor udptable parameter that is passed to __udp*_lib_mcast_deliver(), otherwise udplite broadcast/multicast use the wrong table and it breaks. Fixes: 2dc41cff7545 ("udp: Use hash2 for long hash1 chains in __udp*_lib_mcast_deliver.") Signed-off-by: Pablo Neira Ayuso Acked-by: Eric Dumazet Signed-off-by: David S. Miller --- net/ipv4/udp.c | 6 +++--- net/ipv6/udp.c | 6 +++--- 2 files changed, 6 insertions(+), 6 deletions(-) diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c index d123d68f4d1d..0de9d5d2b9ae 100644 --- a/net/ipv4/udp.c +++ b/net/ipv4/udp.c @@ -1652,10 +1652,10 @@ static int __udp4_lib_mcast_deliver(struct net *net, struct sk_buff *skb, if (use_hash2) { hash2_any = udp4_portaddr_hash(net, htonl(INADDR_ANY), hnum) & - udp_table.mask; - hash2 = udp4_portaddr_hash(net, daddr, hnum) & udp_table.mask; + udptable->mask; + hash2 = udp4_portaddr_hash(net, daddr, hnum) & udptable->mask; start_lookup: - hslot = &udp_table.hash2[hash2]; + hslot = &udptable->hash2[hash2]; offset = offsetof(typeof(*sk), __sk_common.skc_portaddr_node); } diff --git a/net/ipv6/udp.c b/net/ipv6/udp.c index b2ef061e6836..e5056d4873d1 100644 --- a/net/ipv6/udp.c +++ b/net/ipv6/udp.c @@ -706,10 +706,10 @@ static int __udp6_lib_mcast_deliver(struct net *net, struct sk_buff *skb, if (use_hash2) { hash2_any = udp6_portaddr_hash(net, &in6addr_any, hnum) & - udp_table.mask; - hash2 = udp6_portaddr_hash(net, daddr, hnum) & udp_table.mask; + udptable->mask; + hash2 = udp6_portaddr_hash(net, daddr, hnum) & udptable->mask; start_lookup: - hslot = &udp_table.hash2[hash2]; + hslot = &udptable->hash2[hash2]; offset = offsetof(typeof(*sk), __sk_common.skc_portaddr_node); } -- cgit v1.2.3-59-g8ed1b From e88a2766143a27bfe6704b4493b214de4094cf29 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Mon, 14 Nov 2016 16:28:42 -0800 Subject: gro_cells: mark napi struct as not busy poll candidates Rolf Neugebauer reported very long delays at netns dismantle. Eric W. Biederman was kind enough to look at this problem and noticed synchronize_net() occurring from netif_napi_del() that was added in linux-4.5 Busy polling makes no sense for tunnels NAPI. If busy poll is used for sessions over tunnels, the poller will need to poll the physical device queue anyway. netif_tx_napi_add() could be used here, but function name is misleading, and renaming it is not stable material, so set NAPI_STATE_NO_BUSY_POLL bit directly. This will avoid inserting gro_cells napi structures in napi_hash[] and avoid the problematic synchronize_net() (per possible cpu) that Rolf reported. Fixes: 93d05d4a320c ("net: provide generic busy polling to all NAPI drivers") Signed-off-by: Eric Dumazet Reported-by: Rolf Neugebauer Reported-by: Eric W. Biederman Acked-by: Cong Wang Tested-by: Rolf Neugebauer Signed-off-by: David S. Miller --- include/net/gro_cells.h | 3 +++ 1 file changed, 3 insertions(+) diff --git a/include/net/gro_cells.h b/include/net/gro_cells.h index d15214d673b2..2a1abbf8da74 100644 --- a/include/net/gro_cells.h +++ b/include/net/gro_cells.h @@ -68,6 +68,9 @@ static inline int gro_cells_init(struct gro_cells *gcells, struct net_device *de struct gro_cell *cell = per_cpu_ptr(gcells->cells, i); __skb_queue_head_init(&cell->napi_skbs); + + set_bit(NAPI_STATE_NO_BUSY_POLL, &cell->napi.state); + netif_napi_add(dev, &cell->napi, gro_cell_poll, 64); napi_enable(&cell->napi); } -- cgit v1.2.3-59-g8ed1b From 7e75f74a171a8146cc3ee92d5562878b40c25fb5 Mon Sep 17 00:00:00 2001 From: Sabrina Dubroca Date: Tue, 15 Nov 2016 10:39:03 +0100 Subject: rtnetlink: fix rtnl_vfinfo_size The size reported by rtnl_vfinfo_size doesn't match the space used by rtnl_fill_vfinfo. rtnl_vfinfo_size currently doesn't account for the nest attributes used by statistics (added in commit 3b766cd83232), nor for struct ifla_vf_tx_rate (since commit ed616689a3d9, which added ifla_vf_rate to the dump without removing ifla_vf_tx_rate, but replaced ifla_vf_tx_rate with ifla_vf_rate in the size computation). Fixes: 3b766cd83232 ("net/core: Add reading VF statistics through the PF netdevice") Fixes: ed616689a3d9 ("net-next:v4: Add support to configure SR-IOV VF minimum and maximum Tx rate through ip tool") Signed-off-by: Sabrina Dubroca Signed-off-by: David S. Miller --- net/core/rtnetlink.c | 12 +++++++----- 1 file changed, 7 insertions(+), 5 deletions(-) diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c index db313ec7af32..96f4bf274e30 100644 --- a/net/core/rtnetlink.c +++ b/net/core/rtnetlink.c @@ -840,18 +840,20 @@ static inline int rtnl_vfinfo_size(const struct net_device *dev, if (dev->dev.parent && dev_is_pci(dev->dev.parent) && (ext_filter_mask & RTEXT_FILTER_VF)) { int num_vfs = dev_num_vf(dev->dev.parent); - size_t size = nla_total_size(sizeof(struct nlattr)); - size += nla_total_size(num_vfs * sizeof(struct nlattr)); + size_t size = nla_total_size(0); size += num_vfs * - (nla_total_size(sizeof(struct ifla_vf_mac)) + - nla_total_size(MAX_VLAN_LIST_LEN * - sizeof(struct nlattr)) + + (nla_total_size(0) + + nla_total_size(sizeof(struct ifla_vf_mac)) + + nla_total_size(sizeof(struct ifla_vf_vlan)) + + nla_total_size(0) + /* nest IFLA_VF_VLAN_LIST */ nla_total_size(MAX_VLAN_LIST_LEN * sizeof(struct ifla_vf_vlan_info)) + nla_total_size(sizeof(struct ifla_vf_spoofchk)) + + nla_total_size(sizeof(struct ifla_vf_tx_rate)) + nla_total_size(sizeof(struct ifla_vf_rate)) + nla_total_size(sizeof(struct ifla_vf_link_state)) + nla_total_size(sizeof(struct ifla_vf_rss_query_en)) + + nla_total_size(0) + /* nest IFLA_VF_STATS */ /* IFLA_VF_STATS_RX_PACKETS */ nla_total_size_64bit(sizeof(__u64)) + /* IFLA_VF_STATS_TX_PACKETS */ -- cgit v1.2.3-59-g8ed1b From b3cfaa31e3851c743d3f9d3441710f7ff6f7e868 Mon Sep 17 00:00:00 2001 From: Sabrina Dubroca Date: Tue, 15 Nov 2016 11:16:35 +0100 Subject: rtnetlink: fix rtnl message size computation for XDP rtnl_xdp_size() only considers the size of the actual payload attribute, and misses the space taken by the attribute used for nesting (IFLA_XDP). Fixes: d1fdd9138682 ("rtnl: add option for setting link xdp prog") Signed-off-by: Sabrina Dubroca Reviewed-by: Brenden Blanco Signed-off-by: David S. Miller --- net/core/rtnetlink.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c index 96f4bf274e30..a6529c55ffb7 100644 --- a/net/core/rtnetlink.c +++ b/net/core/rtnetlink.c @@ -901,7 +901,8 @@ static size_t rtnl_port_size(const struct net_device *dev, static size_t rtnl_xdp_size(const struct net_device *dev) { - size_t xdp_size = nla_total_size(1); /* XDP_ATTACHED */ + size_t xdp_size = nla_total_size(0) + /* nest IFLA_XDP */ + nla_total_size(1); /* XDP_ATTACHED */ if (!dev->netdev_ops->ndo_xdp) return 0; -- cgit v1.2.3-59-g8ed1b From bc9db5ad3253c8e17969bd802c47b73e63f125ab Mon Sep 17 00:00:00 2001 From: Ville Syrjälä Date: Fri, 11 Nov 2016 19:14:24 +0200 Subject: drm/i915: Assume non-DP++ port if dvo_port is HDMI and there's no AUX ch specified in the VBT MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit My heuristic for detecting type 1 DVI DP++ adaptors based on the VBT port information apparently didn't survive the reality of buggy VBTs. In this particular case we have a machine with a natice HDMI port, but the VBT tells us it's a DP++ port based on its capabilities. The dvo_port information in VBT does claim that we're dealing with a HDMI port though, but we have other machines which do the same even when they actually have DP++ ports. So that piece of information alone isn't sufficient to tell the two apart. After staring at a bunch of VBTs from various machines, I have to conclude that the only other semi-reliable clue we can use is the presence of the AUX channel in the VBT. On this particular machine AUX channel is specified as zero, whereas on some of the other machines which listed the DP++ port as HDMI have a non-zero AUX channel. I've also seen VBTs which have dvo_port a DP but have a zero AUX channel. I believe those we need to treat as DP ports, so we'll limit the AUX channel check to just the cases where dvo_port is HDMI. If we encounter any more serious failures with this heuristic I think we'll have to have to throw it out entirely. But that could mean that there is a risk of type 1 DVI dongle users getting greeted by a black screen, so I'd rather not go there unless absolutely necessary. v2: Remove the duplicate PORT_A check (Daniel) Fix some typos in the commit message Cc: Daniel Otero Cc: stable@vger.kernel.org Tested-by: Daniel Otero Fixes: d61992565bd3 ("drm/i915: Determine DP++ type 1 DVI adaptor presence based on VBT") Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=97994 Signed-off-by: Ville Syrjälä Link: http://patchwork.freedesktop.org/patch/msgid/1478884464-14251-1-git-send-email-ville.syrjala@linux.intel.com Reviewed-by: Daniel Vetter (cherry picked from commit 7a17995a3dc8613f778a9e2fd20e870f17789544) Signed-off-by: Jani Nikula --- drivers/gpu/drm/i915/intel_bios.c | 30 ++++++++++++++++++++++-------- drivers/gpu/drm/i915/intel_vbt_defs.h | 3 ++- 2 files changed, 24 insertions(+), 9 deletions(-) diff --git a/drivers/gpu/drm/i915/intel_bios.c b/drivers/gpu/drm/i915/intel_bios.c index 1f8af87c6294..cf2560708e03 100644 --- a/drivers/gpu/drm/i915/intel_bios.c +++ b/drivers/gpu/drm/i915/intel_bios.c @@ -1143,7 +1143,7 @@ static void parse_ddi_port(struct drm_i915_private *dev_priv, enum port port, if (!child) return; - aux_channel = child->raw[25]; + aux_channel = child->common.aux_channel; ddc_pin = child->common.ddc_pin; is_dvi = child->common.device_type & DEVICE_TYPE_TMDS_DVI_SIGNALING; @@ -1673,7 +1673,8 @@ bool intel_bios_is_port_edp(struct drm_i915_private *dev_priv, enum port port) return false; } -bool intel_bios_is_port_dp_dual_mode(struct drm_i915_private *dev_priv, enum port port) +static bool child_dev_is_dp_dual_mode(const union child_device_config *p_child, + enum port port) { static const struct { u16 dp, hdmi; @@ -1687,22 +1688,35 @@ bool intel_bios_is_port_dp_dual_mode(struct drm_i915_private *dev_priv, enum por [PORT_D] = { DVO_PORT_DPD, DVO_PORT_HDMID, }, [PORT_E] = { DVO_PORT_DPE, DVO_PORT_HDMIE, }, }; - int i; if (port == PORT_A || port >= ARRAY_SIZE(port_mapping)) return false; - if (!dev_priv->vbt.child_dev_num) + if ((p_child->common.device_type & DEVICE_TYPE_DP_DUAL_MODE_BITS) != + (DEVICE_TYPE_DP_DUAL_MODE & DEVICE_TYPE_DP_DUAL_MODE_BITS)) return false; + if (p_child->common.dvo_port == port_mapping[port].dp) + return true; + + /* Only accept a HDMI dvo_port as DP++ if it has an AUX channel */ + if (p_child->common.dvo_port == port_mapping[port].hdmi && + p_child->common.aux_channel != 0) + return true; + + return false; +} + +bool intel_bios_is_port_dp_dual_mode(struct drm_i915_private *dev_priv, + enum port port) +{ + int i; + for (i = 0; i < dev_priv->vbt.child_dev_num; i++) { const union child_device_config *p_child = &dev_priv->vbt.child_dev[i]; - if ((p_child->common.dvo_port == port_mapping[port].dp || - p_child->common.dvo_port == port_mapping[port].hdmi) && - (p_child->common.device_type & DEVICE_TYPE_DP_DUAL_MODE_BITS) == - (DEVICE_TYPE_DP_DUAL_MODE & DEVICE_TYPE_DP_DUAL_MODE_BITS)) + if (child_dev_is_dp_dual_mode(p_child, port)) return true; } diff --git a/drivers/gpu/drm/i915/intel_vbt_defs.h b/drivers/gpu/drm/i915/intel_vbt_defs.h index 68db9621f1f0..8886cab19f98 100644 --- a/drivers/gpu/drm/i915/intel_vbt_defs.h +++ b/drivers/gpu/drm/i915/intel_vbt_defs.h @@ -280,7 +280,8 @@ struct common_child_dev_config { u8 dp_support:1; u8 tmds_support:1; u8 support_reserved:5; - u8 not_common3[12]; + u8 aux_channel; + u8 not_common3[11]; u8 iboost_level; } __packed; -- cgit v1.2.3-59-g8ed1b From 2c8c34167c987e463d62a55384fcec7fa8d03a54 Mon Sep 17 00:00:00 2001 From: Jarkko Nikula Date: Thu, 29 Sep 2016 12:59:39 +0300 Subject: mfd: lpss: Fix Intel Kaby Lake PCH-H properties There are a few issues on Intel Kaby Lake PCH-H properties added by commit a6a576b78e09 ("mfd: lpss: Add Intel Kaby Lake PCH-H PCI IDs"): - Input clock of I2C controller on Intel Kaby Lake PCH-H is 120 MHz not 133 MHz. This was probably copy-paste error from Intel Broxton I2C properties. - There is no default I2C SDA hold time specified which is used when ACPI doesn't provide it. I got information from Windows driver team that Kaby Lake PCH-H can use the same configuration than Intel Sunrisepoint PCH. - Common HS-UART properties are not used. Fix these by reusing the Sunrisepoint properties on Kaby Lake PCH-H. Fixes: a6a576b78e09 ("mfd: lpss: Add Intel Kaby Lake PCH-H PCI IDs") Reported-by: Xiang A Wang Signed-off-by: Jarkko Nikula Acked-by: Mika Westerberg Signed-off-by: Lee Jones --- drivers/mfd/intel-lpss-pci.c | 31 +++++++++---------------------- 1 file changed, 9 insertions(+), 22 deletions(-) diff --git a/drivers/mfd/intel-lpss-pci.c b/drivers/mfd/intel-lpss-pci.c index 3228fd182a99..9ff243970e93 100644 --- a/drivers/mfd/intel-lpss-pci.c +++ b/drivers/mfd/intel-lpss-pci.c @@ -123,19 +123,6 @@ static const struct intel_lpss_platform_info apl_i2c_info = { .properties = apl_i2c_properties, }; -static const struct intel_lpss_platform_info kbl_info = { - .clk_rate = 120000000, -}; - -static const struct intel_lpss_platform_info kbl_uart_info = { - .clk_rate = 120000000, - .clk_con_id = "baudclk", -}; - -static const struct intel_lpss_platform_info kbl_i2c_info = { - .clk_rate = 133000000, -}; - static const struct pci_device_id intel_lpss_pci_ids[] = { /* BXT A-Step */ { PCI_VDEVICE(INTEL, 0x0aac), (kernel_ulong_t)&bxt_i2c_info }, @@ -207,15 +194,15 @@ static const struct pci_device_id intel_lpss_pci_ids[] = { { PCI_VDEVICE(INTEL, 0xa161), (kernel_ulong_t)&spt_i2c_info }, { PCI_VDEVICE(INTEL, 0xa166), (kernel_ulong_t)&spt_uart_info }, /* KBL-H */ - { PCI_VDEVICE(INTEL, 0xa2a7), (kernel_ulong_t)&kbl_uart_info }, - { PCI_VDEVICE(INTEL, 0xa2a8), (kernel_ulong_t)&kbl_uart_info }, - { PCI_VDEVICE(INTEL, 0xa2a9), (kernel_ulong_t)&kbl_info }, - { PCI_VDEVICE(INTEL, 0xa2aa), (kernel_ulong_t)&kbl_info }, - { PCI_VDEVICE(INTEL, 0xa2e0), (kernel_ulong_t)&kbl_i2c_info }, - { PCI_VDEVICE(INTEL, 0xa2e1), (kernel_ulong_t)&kbl_i2c_info }, - { PCI_VDEVICE(INTEL, 0xa2e2), (kernel_ulong_t)&kbl_i2c_info }, - { PCI_VDEVICE(INTEL, 0xa2e3), (kernel_ulong_t)&kbl_i2c_info }, - { PCI_VDEVICE(INTEL, 0xa2e6), (kernel_ulong_t)&kbl_uart_info }, + { PCI_VDEVICE(INTEL, 0xa2a7), (kernel_ulong_t)&spt_uart_info }, + { PCI_VDEVICE(INTEL, 0xa2a8), (kernel_ulong_t)&spt_uart_info }, + { PCI_VDEVICE(INTEL, 0xa2a9), (kernel_ulong_t)&spt_info }, + { PCI_VDEVICE(INTEL, 0xa2aa), (kernel_ulong_t)&spt_info }, + { PCI_VDEVICE(INTEL, 0xa2e0), (kernel_ulong_t)&spt_i2c_info }, + { PCI_VDEVICE(INTEL, 0xa2e1), (kernel_ulong_t)&spt_i2c_info }, + { PCI_VDEVICE(INTEL, 0xa2e2), (kernel_ulong_t)&spt_i2c_info }, + { PCI_VDEVICE(INTEL, 0xa2e3), (kernel_ulong_t)&spt_i2c_info }, + { PCI_VDEVICE(INTEL, 0xa2e6), (kernel_ulong_t)&spt_uart_info }, { } }; MODULE_DEVICE_TABLE(pci, intel_lpss_pci_ids); -- cgit v1.2.3-59-g8ed1b From 274e43edcda6f709aa67e436b3123e45a6270923 Mon Sep 17 00:00:00 2001 From: Azhar Shaikh Date: Wed, 12 Oct 2016 10:12:20 -0700 Subject: mfd: intel-lpss: Do not put device in reset state on suspend Commit 41a3da2b8e163 ("mfd: intel-lpss: Save register context on suspend") saved the register context while going to suspend and also put the device in reset state. Due to the resetting of device, system cannot enter S3/S0ix states when no_console_suspend flag is enabled. The system and serial console both hang. The resetting of device is not needed while going to suspend. Hence remove this code. Cc: stable@vger.kernel.org Fixes: 41a3da2b8e163 ("mfd: intel-lpss: Save register context on suspend") Signed-off-by: Azhar Shaikh Acked-by: Mika Westerberg Reviewed-by: Andy Shevchenko Signed-off-by: Lee Jones --- drivers/mfd/intel-lpss.c | 3 --- 1 file changed, 3 deletions(-) diff --git a/drivers/mfd/intel-lpss.c b/drivers/mfd/intel-lpss.c index 41b113875d64..70c646b0097d 100644 --- a/drivers/mfd/intel-lpss.c +++ b/drivers/mfd/intel-lpss.c @@ -502,9 +502,6 @@ int intel_lpss_suspend(struct device *dev) for (i = 0; i < LPSS_PRIV_REG_COUNT; i++) lpss->priv_ctx[i] = readl(lpss->priv + i * 4); - /* Put the device into reset state */ - writel(0, lpss->priv + LPSS_PRIV_RESETS); - return 0; } EXPORT_SYMBOL_GPL(intel_lpss_suspend); -- cgit v1.2.3-59-g8ed1b From 9600702082b29fd3f8a6d744df74ad4c48d4a432 Mon Sep 17 00:00:00 2001 From: Heikki Krogerus Date: Mon, 17 Oct 2016 10:32:13 +0300 Subject: mfd: intel_soc_pmic_bxtwc: Fix usbc interrupt The wcove USB Type-C driver is currently being flooded with interrupts that are not targeted to it. The reason for that is because all CHRG first level interrupts are mapped to it. This fixes the issue by introducing separate irq for the usbc device, and mapping only USB Type-C PHY interrupts to it. Fixes: 9c6235c86332 ("mfd: intel_soc_pmic_bxtwc: Add bxt_wcove_usbc device") Signed-off-by: Heikki Krogerus Signed-off-by: Lee Jones --- drivers/mfd/intel_soc_pmic_bxtwc.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/drivers/mfd/intel_soc_pmic_bxtwc.c b/drivers/mfd/intel_soc_pmic_bxtwc.c index 43e54b7e908f..f9a8c5203873 100644 --- a/drivers/mfd/intel_soc_pmic_bxtwc.c +++ b/drivers/mfd/intel_soc_pmic_bxtwc.c @@ -86,6 +86,7 @@ enum bxtwc_irqs_level2 { BXTWC_THRM2_IRQ, BXTWC_BCU_IRQ, BXTWC_ADC_IRQ, + BXTWC_USBC_IRQ, BXTWC_CHGR0_IRQ, BXTWC_CHGR1_IRQ, BXTWC_GPIO0_IRQ, @@ -111,7 +112,8 @@ static const struct regmap_irq bxtwc_regmap_irqs_level2[] = { REGMAP_IRQ_REG(BXTWC_THRM2_IRQ, 2, 0xff), REGMAP_IRQ_REG(BXTWC_BCU_IRQ, 3, 0x1f), REGMAP_IRQ_REG(BXTWC_ADC_IRQ, 4, 0xff), - REGMAP_IRQ_REG(BXTWC_CHGR0_IRQ, 5, 0x3f), + REGMAP_IRQ_REG(BXTWC_USBC_IRQ, 5, BIT(5)), + REGMAP_IRQ_REG(BXTWC_CHGR0_IRQ, 5, 0x1f), REGMAP_IRQ_REG(BXTWC_CHGR1_IRQ, 6, 0x1f), REGMAP_IRQ_REG(BXTWC_GPIO0_IRQ, 7, 0xff), REGMAP_IRQ_REG(BXTWC_GPIO1_IRQ, 8, 0x3f), @@ -146,7 +148,7 @@ static struct resource adc_resources[] = { }; static struct resource usbc_resources[] = { - DEFINE_RES_IRQ_NAMED(BXTWC_CHGR0_IRQ, "USBC"), + DEFINE_RES_IRQ(BXTWC_USBC_IRQ), }; static struct resource charger_resources[] = { -- cgit v1.2.3-59-g8ed1b From f40584200bc4af7aa4399635b9ac213c62a13ae7 Mon Sep 17 00:00:00 2001 From: Linus Walleij Date: Tue, 1 Nov 2016 10:22:53 +0100 Subject: mfd: stmpe: Fix RESET regression on STMPE2401 Since commit c4dd1ba355aae2bc3d1213da6c66c53e3c31e028 ("mfd: stmpe: Add reset support for all STMPE variant") we're resetting the STMPE expanders before use. This caused a regression on the STMP2401 on the Nomadik NHK8815: stmpe-i2c 0-0043: stmpe2401 detected, chip id: 0x101 nmk-i2c 101f8000.i2c0: write to slave 0x43 timed out nmk-i2c 101f8000.i2c0: no ack received after address transmission stmpe-i2c 0-0044: stmpe2401 detected, chip id: 0x101 nmk-i2c 101f8000.i2c0: write to slave 0x44 timed out nmk-i2c 101f8000.i2c0: no ack received after address transmission It turns out that we start to poll for the reset bit to go low again too quickly: the STMPE2401 is not yet online and ready to be asked for the status of the RESET bit. By introducing a 10ms delay before starting to hammer the register for information, we get back to normal: stmpe-i2c 0-0043: stmpe2401 detected, chip id: 0x101 stmpe-i2c 0-0044: stmpe2401 detected, chip id: 0x101 Cc: stable@vger.kernel.org Cc: Amelie Delaunay Fixes: c4dd1ba355aa ("mfd: stmpe: Add reset support for all STMPE variant") Signed-off-by: Linus Walleij Acked-by: Patrice Chotard Signed-off-by: Lee Jones --- drivers/mfd/stmpe.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/mfd/stmpe.c b/drivers/mfd/stmpe.c index cfdae8a3d779..b0c7bcdaf5df 100644 --- a/drivers/mfd/stmpe.c +++ b/drivers/mfd/stmpe.c @@ -851,6 +851,8 @@ static int stmpe_reset(struct stmpe *stmpe) if (ret < 0) return ret; + msleep(10); + timeout = jiffies + msecs_to_jiffies(100); while (time_before(jiffies, timeout)) { ret = __stmpe_reg_read(stmpe, stmpe->regs[STMPE_IDX_SYS_CTRL]); -- cgit v1.2.3-59-g8ed1b From 722f191080de641f023feaa7d5648caf377844f5 Mon Sep 17 00:00:00 2001 From: Johan Hovold Date: Tue, 1 Nov 2016 11:38:18 +0100 Subject: mfd: core: Fix device reference leak in mfd_clone_cell Make sure to drop the reference taken by bus_find_device_by_name() before returning from mfd_clone_cell(). Fixes: a9bbba996302 ("mfd: add platform_device sharing support for mfd") Signed-off-by: Johan Hovold Signed-off-by: Lee Jones --- drivers/mfd/mfd-core.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/mfd/mfd-core.c b/drivers/mfd/mfd-core.c index 3ac486a597f3..c57e407020f1 100644 --- a/drivers/mfd/mfd-core.c +++ b/drivers/mfd/mfd-core.c @@ -399,6 +399,8 @@ int mfd_clone_cell(const char *cell, const char **clones, size_t n_clones) clones[i]); } + put_device(dev); + return 0; } EXPORT_SYMBOL(mfd_clone_cell); -- cgit v1.2.3-59-g8ed1b From 19ff7fcc76e6911a955742b40f85ba1030ccba5e Mon Sep 17 00:00:00 2001 From: Mike Marshall Date: Wed, 16 Nov 2016 11:52:19 -0500 Subject: orangefs: add .owner to debugfs file_operations Without ".owner = THIS_MODULE" it is possible to crash the kernel by unloading the Orangefs module while someone is reading debugfs files. Signed-off-by: Mike Marshall --- fs/orangefs/orangefs-debugfs.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/fs/orangefs/orangefs-debugfs.c b/fs/orangefs/orangefs-debugfs.c index d484068ca716..38887cc5577f 100644 --- a/fs/orangefs/orangefs-debugfs.c +++ b/fs/orangefs/orangefs-debugfs.c @@ -114,6 +114,7 @@ static const struct seq_operations help_debug_ops = { }; const struct file_operations debug_help_fops = { + .owner = THIS_MODULE, .open = orangefs_debug_help_open, .read = seq_read, .release = seq_release, @@ -121,6 +122,7 @@ const struct file_operations debug_help_fops = { }; static const struct file_operations kernel_debug_fops = { + .owner = THIS_MODULE, .open = orangefs_debug_open, .read = orangefs_debug_read, .write = orangefs_debug_write, -- cgit v1.2.3-59-g8ed1b From f23cc643f9baec7f71f2b74692da3cf03abbbfda Mon Sep 17 00:00:00 2001 From: Josef Bacik Date: Mon, 14 Nov 2016 15:45:36 -0500 Subject: bpf: fix range arithmetic for bpf map access I made some invalid assumptions with BPF_AND and BPF_MOD that could result in invalid accesses to bpf map entries. Fix this up by doing a few things 1) Kill BPF_MOD support. This doesn't actually get used by the compiler in real life and just adds extra complexity. 2) Fix the logic for BPF_AND, don't allow AND of negative numbers and set the minimum value to 0 for positive AND's. 3) Don't do operations on the ranges if they are set to the limits, as they are by definition undefined, and allowing arithmetic operations on those values could make them appear valid when they really aren't. This fixes the testcase provided by Jann as well as a few other theoretical problems. Reported-by: Jann Horn Signed-off-by: Josef Bacik Acked-by: Alexei Starovoitov Signed-off-by: David S. Miller --- include/linux/bpf_verifier.h | 5 ++-- kernel/bpf/verifier.c | 70 +++++++++++++++++++++++++++++--------------- 2 files changed, 50 insertions(+), 25 deletions(-) diff --git a/include/linux/bpf_verifier.h b/include/linux/bpf_verifier.h index 7035b997aaa5..6aaf425cebc3 100644 --- a/include/linux/bpf_verifier.h +++ b/include/linux/bpf_verifier.h @@ -14,7 +14,7 @@ * are obviously wrong for any sort of memory access. */ #define BPF_REGISTER_MAX_RANGE (1024 * 1024 * 1024) -#define BPF_REGISTER_MIN_RANGE -(1024 * 1024 * 1024) +#define BPF_REGISTER_MIN_RANGE -1 struct bpf_reg_state { enum bpf_reg_type type; @@ -22,7 +22,8 @@ struct bpf_reg_state { * Used to determine if any memory access using this register will * result in a bad access. */ - u64 min_value, max_value; + s64 min_value; + u64 max_value; union { /* valid when type == CONST_IMM | PTR_TO_STACK | UNKNOWN_VALUE */ s64 imm; diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c index 99a7e5b388f2..6a936159c6e0 100644 --- a/kernel/bpf/verifier.c +++ b/kernel/bpf/verifier.c @@ -216,8 +216,8 @@ static void print_verifier_state(struct bpf_verifier_state *state) reg->map_ptr->key_size, reg->map_ptr->value_size); if (reg->min_value != BPF_REGISTER_MIN_RANGE) - verbose(",min_value=%llu", - (unsigned long long)reg->min_value); + verbose(",min_value=%lld", + (long long)reg->min_value); if (reg->max_value != BPF_REGISTER_MAX_RANGE) verbose(",max_value=%llu", (unsigned long long)reg->max_value); @@ -758,7 +758,7 @@ static int check_mem_access(struct bpf_verifier_env *env, u32 regno, int off, * index'es we need to make sure that whatever we use * will have a set floor within our range. */ - if ((s64)reg->min_value < 0) { + if (reg->min_value < 0) { verbose("R%d min value is negative, either use unsigned index or do a if (index >=0) check.\n", regno); return -EACCES; @@ -1468,7 +1468,8 @@ static void check_reg_overflow(struct bpf_reg_state *reg) { if (reg->max_value > BPF_REGISTER_MAX_RANGE) reg->max_value = BPF_REGISTER_MAX_RANGE; - if ((s64)reg->min_value < BPF_REGISTER_MIN_RANGE) + if (reg->min_value < BPF_REGISTER_MIN_RANGE || + reg->min_value > BPF_REGISTER_MAX_RANGE) reg->min_value = BPF_REGISTER_MIN_RANGE; } @@ -1476,7 +1477,8 @@ static void adjust_reg_min_max_vals(struct bpf_verifier_env *env, struct bpf_insn *insn) { struct bpf_reg_state *regs = env->cur_state.regs, *dst_reg; - u64 min_val = BPF_REGISTER_MIN_RANGE, max_val = BPF_REGISTER_MAX_RANGE; + s64 min_val = BPF_REGISTER_MIN_RANGE; + u64 max_val = BPF_REGISTER_MAX_RANGE; bool min_set = false, max_set = false; u8 opcode = BPF_OP(insn->code); @@ -1512,22 +1514,43 @@ static void adjust_reg_min_max_vals(struct bpf_verifier_env *env, return; } + /* If one of our values was at the end of our ranges then we can't just + * do our normal operations to the register, we need to set the values + * to the min/max since they are undefined. + */ + if (min_val == BPF_REGISTER_MIN_RANGE) + dst_reg->min_value = BPF_REGISTER_MIN_RANGE; + if (max_val == BPF_REGISTER_MAX_RANGE) + dst_reg->max_value = BPF_REGISTER_MAX_RANGE; + switch (opcode) { case BPF_ADD: - dst_reg->min_value += min_val; - dst_reg->max_value += max_val; + if (dst_reg->min_value != BPF_REGISTER_MIN_RANGE) + dst_reg->min_value += min_val; + if (dst_reg->max_value != BPF_REGISTER_MAX_RANGE) + dst_reg->max_value += max_val; break; case BPF_SUB: - dst_reg->min_value -= min_val; - dst_reg->max_value -= max_val; + if (dst_reg->min_value != BPF_REGISTER_MIN_RANGE) + dst_reg->min_value -= min_val; + if (dst_reg->max_value != BPF_REGISTER_MAX_RANGE) + dst_reg->max_value -= max_val; break; case BPF_MUL: - dst_reg->min_value *= min_val; - dst_reg->max_value *= max_val; + if (dst_reg->min_value != BPF_REGISTER_MIN_RANGE) + dst_reg->min_value *= min_val; + if (dst_reg->max_value != BPF_REGISTER_MAX_RANGE) + dst_reg->max_value *= max_val; break; case BPF_AND: - /* & is special since it could end up with 0 bits set. */ - dst_reg->min_value &= min_val; + /* Disallow AND'ing of negative numbers, ain't nobody got time + * for that. Otherwise the minimum is 0 and the max is the max + * value we could AND against. + */ + if (min_val < 0) + dst_reg->min_value = BPF_REGISTER_MIN_RANGE; + else + dst_reg->min_value = 0; dst_reg->max_value = max_val; break; case BPF_LSH: @@ -1537,24 +1560,25 @@ static void adjust_reg_min_max_vals(struct bpf_verifier_env *env, */ if (min_val > ilog2(BPF_REGISTER_MAX_RANGE)) dst_reg->min_value = BPF_REGISTER_MIN_RANGE; - else + else if (dst_reg->min_value != BPF_REGISTER_MIN_RANGE) dst_reg->min_value <<= min_val; if (max_val > ilog2(BPF_REGISTER_MAX_RANGE)) dst_reg->max_value = BPF_REGISTER_MAX_RANGE; - else + else if (dst_reg->max_value != BPF_REGISTER_MAX_RANGE) dst_reg->max_value <<= max_val; break; case BPF_RSH: - dst_reg->min_value >>= min_val; - dst_reg->max_value >>= max_val; - break; - case BPF_MOD: - /* % is special since it is an unsigned modulus, so the floor - * will always be 0. + /* RSH by a negative number is undefined, and the BPF_RSH is an + * unsigned shift, so make the appropriate casts. */ - dst_reg->min_value = 0; - dst_reg->max_value = max_val - 1; + if (min_val < 0 || dst_reg->min_value < 0) + dst_reg->min_value = BPF_REGISTER_MIN_RANGE; + else + dst_reg->min_value = + (u64)(dst_reg->min_value) >> min_val; + if (dst_reg->max_value != BPF_REGISTER_MAX_RANGE) + dst_reg->max_value >>= max_val; break; default: reset_reg_range_values(regs, insn->dst_reg); -- cgit v1.2.3-59-g8ed1b From 3b7093346b326e5d3590c7d49f6aefe6fa5b2c9a Mon Sep 17 00:00:00 2001 From: Alexander Duyck Date: Tue, 15 Nov 2016 05:46:06 -0500 Subject: ipv4: Restore fib_trie_flush_external function and fix call ordering The patch that removed the FIB offload infrastructure was a bit too aggressive and also removed code needed to clean up us splitting the table if additional rules were added. Specifically the function fib_trie_flush_external was called at the end of a new rule being added to flush the foreign trie entries from the main trie. I updated the code so that we only call fib_trie_flush_external on the main table so that we flush the entries for local from main. This way we don't call it for every rule change which is what was happening previously. Fixes: 347e3b28c1ba2 ("switchdev: remove FIB offload infrastructure") Reported-by: Eric Dumazet Cc: Jiri Pirko Signed-off-by: Alexander Duyck Acked-by: Eric Dumazet Signed-off-by: David S. Miller --- include/net/ip_fib.h | 1 + net/ipv4/fib_frontend.c | 20 +++++++++++---- net/ipv4/fib_trie.c | 65 +++++++++++++++++++++++++++++++++++++++++++++++++ 3 files changed, 81 insertions(+), 5 deletions(-) diff --git a/include/net/ip_fib.h b/include/net/ip_fib.h index b9314b48e39f..f390c3bb05c5 100644 --- a/include/net/ip_fib.h +++ b/include/net/ip_fib.h @@ -243,6 +243,7 @@ int fib_table_dump(struct fib_table *table, struct sk_buff *skb, struct netlink_callback *cb); int fib_table_flush(struct net *net, struct fib_table *table); struct fib_table *fib_trie_unmerge(struct fib_table *main_tb); +void fib_table_flush_external(struct fib_table *table); void fib_free_table(struct fib_table *tb); #ifndef CONFIG_IP_MULTIPLE_TABLES diff --git a/net/ipv4/fib_frontend.c b/net/ipv4/fib_frontend.c index c3b80478226e..161fc0f0d752 100644 --- a/net/ipv4/fib_frontend.c +++ b/net/ipv4/fib_frontend.c @@ -151,7 +151,7 @@ static void fib_replace_table(struct net *net, struct fib_table *old, int fib_unmerge(struct net *net) { - struct fib_table *old, *new; + struct fib_table *old, *new, *main_table; /* attempt to fetch local table if it has been allocated */ old = fib_get_table(net, RT_TABLE_LOCAL); @@ -162,11 +162,21 @@ int fib_unmerge(struct net *net) if (!new) return -ENOMEM; + /* table is already unmerged */ + if (new == old) + return 0; + /* replace merged table with clean table */ - if (new != old) { - fib_replace_table(net, old, new); - fib_free_table(old); - } + fib_replace_table(net, old, new); + fib_free_table(old); + + /* attempt to fetch main table if it has been allocated */ + main_table = fib_get_table(net, RT_TABLE_MAIN); + if (!main_table) + return 0; + + /* flush local entries from main table */ + fib_table_flush_external(main_table); return 0; } diff --git a/net/ipv4/fib_trie.c b/net/ipv4/fib_trie.c index 4cff74d4133f..735edc9d41a2 100644 --- a/net/ipv4/fib_trie.c +++ b/net/ipv4/fib_trie.c @@ -1760,6 +1760,71 @@ out: return NULL; } +/* Caller must hold RTNL */ +void fib_table_flush_external(struct fib_table *tb) +{ + struct trie *t = (struct trie *)tb->tb_data; + struct key_vector *pn = t->kv; + unsigned long cindex = 1; + struct hlist_node *tmp; + struct fib_alias *fa; + + /* walk trie in reverse order */ + for (;;) { + unsigned char slen = 0; + struct key_vector *n; + + if (!(cindex--)) { + t_key pkey = pn->key; + + /* cannot resize the trie vector */ + if (IS_TRIE(pn)) + break; + + /* resize completed node */ + pn = resize(t, pn); + cindex = get_index(pkey, pn); + + continue; + } + + /* grab the next available node */ + n = get_child(pn, cindex); + if (!n) + continue; + + if (IS_TNODE(n)) { + /* record pn and cindex for leaf walking */ + pn = n; + cindex = 1ul << n->bits; + + continue; + } + + hlist_for_each_entry_safe(fa, tmp, &n->leaf, fa_list) { + /* if alias was cloned to local then we just + * need to remove the local copy from main + */ + if (tb->tb_id != fa->tb_id) { + hlist_del_rcu(&fa->fa_list); + alias_free_mem_rcu(fa); + continue; + } + + /* record local slen */ + slen = fa->fa_slen; + } + + /* update leaf slen */ + n->slen = slen; + + if (hlist_empty(&n->leaf)) { + put_child_root(pn, n->key, NULL); + node_free(n); + } + } +} + /* Caller must hold RTNL. */ int fib_table_flush(struct net *net, struct fib_table *tb) { -- cgit v1.2.3-59-g8ed1b From 3114cdfe66c156345b0ae34e2990472f277e0c1b Mon Sep 17 00:00:00 2001 From: Alexander Duyck Date: Tue, 15 Nov 2016 05:46:12 -0500 Subject: ipv4: Fix memory leak in exception case for splitting tries Fix a small memory leak that can occur where we leak a fib_alias in the event of us not being able to insert it into the local table. Fixes: 0ddcf43d5d4a0 ("ipv4: FIB Local/MAIN table collapse") Reported-by: Eric Dumazet Signed-off-by: Alexander Duyck Acked-by: Eric Dumazet Signed-off-by: David S. Miller --- net/ipv4/fib_trie.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/net/ipv4/fib_trie.c b/net/ipv4/fib_trie.c index 735edc9d41a2..026f309c51e9 100644 --- a/net/ipv4/fib_trie.c +++ b/net/ipv4/fib_trie.c @@ -1743,8 +1743,10 @@ struct fib_table *fib_trie_unmerge(struct fib_table *oldtb) local_l = fib_find_node(lt, &local_tp, l->key); if (fib_insert_alias(lt, local_tp, local_l, new_fa, - NULL, l->key)) + NULL, l->key)) { + kmem_cache_free(fn_alias_kmem, new_fa); goto out; + } } /* stop loop if key wrapped back to 0 */ -- cgit v1.2.3-59-g8ed1b From 612e94bd99912f3b2ac616c00c3dc7f166a98005 Mon Sep 17 00:00:00 2001 From: Radha Mohan Chintakuntla Date: Tue, 15 Nov 2016 17:37:16 +0530 Subject: net: thunderx: Introduce BGX_ID_MASK macro to extract bgx_id This patch fixes the 'bgx_id' determination on 83xx where there are 4 BGX blocks instead of 2 on other platforms. Signed-off-by: Radha Mohan Chintakuntla Signed-off-by: Sunil Goutham Signed-off-by: David S. Miller --- drivers/net/ethernet/cavium/thunder/thunder_bgx.c | 4 ++-- drivers/net/ethernet/cavium/thunder/thunder_bgx.h | 2 ++ 2 files changed, 4 insertions(+), 2 deletions(-) diff --git a/drivers/net/ethernet/cavium/thunder/thunder_bgx.c b/drivers/net/ethernet/cavium/thunder/thunder_bgx.c index 8bbaedbb7b94..050e21fbb147 100644 --- a/drivers/net/ethernet/cavium/thunder/thunder_bgx.c +++ b/drivers/net/ethernet/cavium/thunder/thunder_bgx.c @@ -1242,8 +1242,8 @@ static int bgx_probe(struct pci_dev *pdev, const struct pci_device_id *ent) pci_read_config_word(pdev, PCI_DEVICE_ID, &sdevid); if (sdevid != PCI_DEVICE_ID_THUNDER_RGX) { - bgx->bgx_id = - (pci_resource_start(pdev, PCI_CFG_REG_BAR_NUM) >> 24) & 1; + bgx->bgx_id = (pci_resource_start(pdev, + PCI_CFG_REG_BAR_NUM) >> 24) & BGX_ID_MASK; bgx->bgx_id += nic_get_node_id(pdev) * MAX_BGX_PER_NODE; bgx->max_lmac = MAX_LMAC_PER_BGX; bgx_vnic[bgx->bgx_id] = bgx; diff --git a/drivers/net/ethernet/cavium/thunder/thunder_bgx.h b/drivers/net/ethernet/cavium/thunder/thunder_bgx.h index d59c71e4a000..01cc7c859131 100644 --- a/drivers/net/ethernet/cavium/thunder/thunder_bgx.h +++ b/drivers/net/ethernet/cavium/thunder/thunder_bgx.h @@ -28,6 +28,8 @@ #define MAX_DMAC_PER_LMAC 8 #define MAX_FRAME_SIZE 9216 +#define BGX_ID_MASK 0x3 + #define MAX_DMAC_PER_LMAC_TNS_BYPASS_MODE 2 /* Registers */ -- cgit v1.2.3-59-g8ed1b From 712c3185344050c591d78584542bd945e4f6f778 Mon Sep 17 00:00:00 2001 From: Sunil Goutham Date: Tue, 15 Nov 2016 17:37:36 +0530 Subject: net: thunderx: Program LMAC credits based on MTU Programming LMAC credits taking 9K frame size by default is incorrect as for an interface which is one of the many on the same BGX/QLM no of credits available will be less as Tx FIFO will be divided across all interfaces. So let's say a BGX with 40G interface and another BGX with multiple 10G, bandwidth of 10G interfaces will be effected when traffic is running on both 40G and 10G interfaces simultaneously. This patch fixes this issue by programming credits based on netdev's MTU. Also fixed configuring MTU to HW and added CQE counter for pkts which exceed this value. Signed-off-by: Sunil Goutham Signed-off-by: David S. Miller --- drivers/net/ethernet/cavium/thunder/nic.h | 3 +- drivers/net/ethernet/cavium/thunder/nic_main.c | 36 +++++++++++++------- drivers/net/ethernet/cavium/thunder/nic_reg.h | 1 + drivers/net/ethernet/cavium/thunder/nicvf_main.c | 38 ++++++++++++---------- drivers/net/ethernet/cavium/thunder/nicvf_queues.c | 3 ++ drivers/net/ethernet/cavium/thunder/nicvf_queues.h | 2 ++ 6 files changed, 53 insertions(+), 30 deletions(-) diff --git a/drivers/net/ethernet/cavium/thunder/nic.h b/drivers/net/ethernet/cavium/thunder/nic.h index 30426109711c..cd2d379df5c5 100644 --- a/drivers/net/ethernet/cavium/thunder/nic.h +++ b/drivers/net/ethernet/cavium/thunder/nic.h @@ -47,7 +47,7 @@ /* Min/Max packet size */ #define NIC_HW_MIN_FRS 64 -#define NIC_HW_MAX_FRS 9200 /* 9216 max packet including FCS */ +#define NIC_HW_MAX_FRS 9190 /* Excluding L2 header and FCS */ /* Max pkinds */ #define NIC_MAX_PKIND 16 @@ -282,7 +282,6 @@ struct nicvf { u8 node; u8 cpi_alg; - u16 mtu; bool link_up; u8 duplex; u32 speed; diff --git a/drivers/net/ethernet/cavium/thunder/nic_main.c b/drivers/net/ethernet/cavium/thunder/nic_main.c index 2bbf4cbf08b2..85c9e6201e8b 100644 --- a/drivers/net/ethernet/cavium/thunder/nic_main.c +++ b/drivers/net/ethernet/cavium/thunder/nic_main.c @@ -11,6 +11,7 @@ #include #include #include +#include #include "nic_reg.h" #include "nic.h" @@ -260,18 +261,31 @@ static void nic_get_bgx_stats(struct nicpf *nic, struct bgx_stats_msg *bgx) /* Update hardware min/max frame size */ static int nic_update_hw_frs(struct nicpf *nic, int new_frs, int vf) { - if ((new_frs > NIC_HW_MAX_FRS) || (new_frs < NIC_HW_MIN_FRS)) { - dev_err(&nic->pdev->dev, - "Invalid MTU setting from VF%d rejected, should be between %d and %d\n", - vf, NIC_HW_MIN_FRS, NIC_HW_MAX_FRS); + int bgx, lmac, lmac_cnt; + u64 lmac_credits; + + if ((new_frs > NIC_HW_MAX_FRS) || (new_frs < NIC_HW_MIN_FRS)) return 1; - } - new_frs += ETH_HLEN; - if (new_frs <= nic->pkind.maxlen) - return 0; - nic->pkind.maxlen = new_frs; - nic_reg_write(nic, NIC_PF_PKIND_0_15_CFG, *(u64 *)&nic->pkind); + bgx = NIC_GET_BGX_FROM_VF_LMAC_MAP(nic->vf_lmac_map[vf]); + lmac = NIC_GET_LMAC_FROM_VF_LMAC_MAP(nic->vf_lmac_map[vf]); + lmac += bgx * MAX_LMAC_PER_BGX; + + new_frs += VLAN_ETH_HLEN + ETH_FCS_LEN + 4; + + /* Update corresponding LMAC credits */ + lmac_cnt = bgx_get_lmac_count(nic->node, bgx); + lmac_credits = nic_reg_read(nic, NIC_PF_LMAC_0_7_CREDIT + (lmac * 8)); + lmac_credits &= ~(0xFFFFFULL << 12); + lmac_credits |= (((((48 * 1024) / lmac_cnt) - new_frs) / 16) << 12); + nic_reg_write(nic, NIC_PF_LMAC_0_7_CREDIT + (lmac * 8), lmac_credits); + + /* Enforce MTU in HW + * This config is supported only from 88xx pass 2.0 onwards. + */ + if (!pass1_silicon(nic->pdev)) + nic_reg_write(nic, + NIC_PF_LMAC_0_7_CFG2 + (lmac * 8), new_frs); return 0; } @@ -464,7 +478,7 @@ static int nic_init_hw(struct nicpf *nic) /* PKIND configuration */ nic->pkind.minlen = 0; - nic->pkind.maxlen = NIC_HW_MAX_FRS + ETH_HLEN; + nic->pkind.maxlen = NIC_HW_MAX_FRS + VLAN_ETH_HLEN + ETH_FCS_LEN + 4; nic->pkind.lenerr_en = 1; nic->pkind.rx_hdr = 0; nic->pkind.hdr_sl = 0; diff --git a/drivers/net/ethernet/cavium/thunder/nic_reg.h b/drivers/net/ethernet/cavium/thunder/nic_reg.h index edf779f5a227..80d46337cf29 100644 --- a/drivers/net/ethernet/cavium/thunder/nic_reg.h +++ b/drivers/net/ethernet/cavium/thunder/nic_reg.h @@ -106,6 +106,7 @@ #define NIC_PF_MPI_0_2047_CFG (0x210000) #define NIC_PF_RSSI_0_4097_RQ (0x220000) #define NIC_PF_LMAC_0_7_CFG (0x240000) +#define NIC_PF_LMAC_0_7_CFG2 (0x240100) #define NIC_PF_LMAC_0_7_SW_XOFF (0x242000) #define NIC_PF_LMAC_0_7_CREDIT (0x244000) #define NIC_PF_CHAN_0_255_TX_CFG (0x400000) diff --git a/drivers/net/ethernet/cavium/thunder/nicvf_main.c b/drivers/net/ethernet/cavium/thunder/nicvf_main.c index 45a13f718863..8f833612da77 100644 --- a/drivers/net/ethernet/cavium/thunder/nicvf_main.c +++ b/drivers/net/ethernet/cavium/thunder/nicvf_main.c @@ -1189,6 +1189,17 @@ int nicvf_stop(struct net_device *netdev) return 0; } +static int nicvf_update_hw_max_frs(struct nicvf *nic, int mtu) +{ + union nic_mbx mbx = {}; + + mbx.frs.msg = NIC_MBOX_MSG_SET_MAX_FRS; + mbx.frs.max_frs = mtu; + mbx.frs.vf_id = nic->vf_id; + + return nicvf_send_msg_to_pf(nic, &mbx); +} + int nicvf_open(struct net_device *netdev) { int err, qidx; @@ -1196,8 +1207,6 @@ int nicvf_open(struct net_device *netdev) struct queue_set *qs = nic->qs; struct nicvf_cq_poll *cq_poll = NULL; - nic->mtu = netdev->mtu; - netif_carrier_off(netdev); err = nicvf_register_misc_interrupt(nic); @@ -1248,9 +1257,12 @@ int nicvf_open(struct net_device *netdev) if (nic->sqs_mode) nicvf_get_primary_vf_struct(nic); - /* Configure receive side scaling */ - if (!nic->sqs_mode) + /* Configure receive side scaling and MTU */ + if (!nic->sqs_mode) { nicvf_rss_init(nic); + if (nicvf_update_hw_max_frs(nic, netdev->mtu)) + goto cleanup; + } err = nicvf_register_interrupts(nic); if (err) @@ -1297,17 +1309,6 @@ napi_del: return err; } -static int nicvf_update_hw_max_frs(struct nicvf *nic, int mtu) -{ - union nic_mbx mbx = {}; - - mbx.frs.msg = NIC_MBOX_MSG_SET_MAX_FRS; - mbx.frs.max_frs = mtu; - mbx.frs.vf_id = nic->vf_id; - - return nicvf_send_msg_to_pf(nic, &mbx); -} - static int nicvf_change_mtu(struct net_device *netdev, int new_mtu) { struct nicvf *nic = netdev_priv(netdev); @@ -1318,10 +1319,13 @@ static int nicvf_change_mtu(struct net_device *netdev, int new_mtu) if (new_mtu < NIC_HW_MIN_FRS) return -EINVAL; + netdev->mtu = new_mtu; + + if (!netif_running(netdev)) + return 0; + if (nicvf_update_hw_max_frs(nic, new_mtu)) return -EINVAL; - netdev->mtu = new_mtu; - nic->mtu = new_mtu; return 0; } diff --git a/drivers/net/ethernet/cavium/thunder/nicvf_queues.c b/drivers/net/ethernet/cavium/thunder/nicvf_queues.c index a4fc50155881..f0e0ca61438e 100644 --- a/drivers/net/ethernet/cavium/thunder/nicvf_queues.c +++ b/drivers/net/ethernet/cavium/thunder/nicvf_queues.c @@ -1530,6 +1530,9 @@ int nicvf_check_cqe_tx_errs(struct nicvf *nic, case CQ_TX_ERROP_SUBDC_ERR: stats->tx.subdesc_err++; break; + case CQ_TX_ERROP_MAX_SIZE_VIOL: + stats->tx.max_size_exceeded++; + break; case CQ_TX_ERROP_IMM_SIZE_OFLOW: stats->tx.imm_size_oflow++; break; diff --git a/drivers/net/ethernet/cavium/thunder/nicvf_queues.h b/drivers/net/ethernet/cavium/thunder/nicvf_queues.h index 869f3386028b..8f4718edc0fe 100644 --- a/drivers/net/ethernet/cavium/thunder/nicvf_queues.h +++ b/drivers/net/ethernet/cavium/thunder/nicvf_queues.h @@ -158,6 +158,7 @@ enum CQ_TX_ERROP_E { CQ_TX_ERROP_DESC_FAULT = 0x10, CQ_TX_ERROP_HDR_CONS_ERR = 0x11, CQ_TX_ERROP_SUBDC_ERR = 0x12, + CQ_TX_ERROP_MAX_SIZE_VIOL = 0x13, CQ_TX_ERROP_IMM_SIZE_OFLOW = 0x80, CQ_TX_ERROP_DATA_SEQUENCE_ERR = 0x81, CQ_TX_ERROP_MEM_SEQUENCE_ERR = 0x82, @@ -177,6 +178,7 @@ struct cmp_queue_stats { u64 desc_fault; u64 hdr_cons_err; u64 subdesc_err; + u64 max_size_exceeded; u64 imm_size_oflow; u64 data_seq_err; u64 mem_seq_err; -- cgit v1.2.3-59-g8ed1b From cadcf95a4f70362c96a8fe39ff5d5df830d4db7f Mon Sep 17 00:00:00 2001 From: Sunil Goutham Date: Tue, 15 Nov 2016 17:37:54 +0530 Subject: net: thunderx: Fix configuration of L3/L4 length checking This patch fixes enabling of HW verification of L3/L4 length and TCP/UDP checksum which is currently being cleared. Also fixed VLAN stripping config which is being cleared when multiqset is enabled. Signed-off-by: Sunil Goutham Signed-off-by: David S. Miller --- drivers/net/ethernet/cavium/thunder/nicvf_queues.c | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/drivers/net/ethernet/cavium/thunder/nicvf_queues.c b/drivers/net/ethernet/cavium/thunder/nicvf_queues.c index f0e0ca61438e..f914eef6573a 100644 --- a/drivers/net/ethernet/cavium/thunder/nicvf_queues.c +++ b/drivers/net/ethernet/cavium/thunder/nicvf_queues.c @@ -538,9 +538,12 @@ static void nicvf_rcv_queue_config(struct nicvf *nic, struct queue_set *qs, mbx.rq.cfg = (1ULL << 62) | (RQ_CQ_DROP << 8); nicvf_send_msg_to_pf(nic, &mbx); - nicvf_queue_reg_write(nic, NIC_QSET_RQ_GEN_CFG, 0, 0x00); - if (!nic->sqs_mode) + if (!nic->sqs_mode && (qidx == 0)) { + /* Enable checking L3/L4 length and TCP/UDP checksums */ + nicvf_queue_reg_write(nic, NIC_QSET_RQ_GEN_CFG, 0, + (BIT(24) | BIT(23) | BIT(21))); nicvf_config_vlan_stripping(nic, nic->netdev->features); + } /* Enable Receive queue */ memset(&rq_cfg, 0, sizeof(struct rq_cfg)); -- cgit v1.2.3-59-g8ed1b From 964cb69bdc9db255f7c3a80f6e1bed8a25e4c60e Mon Sep 17 00:00:00 2001 From: Sunil Goutham Date: Tue, 15 Nov 2016 17:38:16 +0530 Subject: net: thunderx: Fix VF driver's interface statistics This patch fixes multiple issues 1. Convert all driver statistics to percpu counters for accuracy. 2. To avoid multiple CQEs posted by a TSO packet appended to HW, TSO pkt's SQE has 'post_cqe' not set but a dummy SQE is added for getting HW transmit completion notification. This dummy SQE has 'dont_send' set and HW drops the pkt pointed to in this thus Tx drop counter increases. This patch fixes this by subtracting SW tx tso counter from HW Tx drop counter for actual packet drop counter. 3. Reset all individual queue's and VNIC HW stats when interface is going down. 4. Getrid off unnecessary counters in hot path. 5. Bringout all CQE error stats i.e both Rx and Tx. Signed-off-by: Sunil Goutham Signed-off-by: David S. Miller --- drivers/net/ethernet/cavium/thunder/nic.h | 61 +++++++----- drivers/net/ethernet/cavium/thunder/nic_main.c | 1 + .../net/ethernet/cavium/thunder/nicvf_ethtool.c | 105 +++++++++++--------- drivers/net/ethernet/cavium/thunder/nicvf_main.c | 106 +++++++++++---------- drivers/net/ethernet/cavium/thunder/nicvf_queues.c | 96 +++++++++---------- drivers/net/ethernet/cavium/thunder/nicvf_queues.h | 24 +---- 6 files changed, 197 insertions(+), 196 deletions(-) diff --git a/drivers/net/ethernet/cavium/thunder/nic.h b/drivers/net/ethernet/cavium/thunder/nic.h index cd2d379df5c5..86bd93ce2ea3 100644 --- a/drivers/net/ethernet/cavium/thunder/nic.h +++ b/drivers/net/ethernet/cavium/thunder/nic.h @@ -178,11 +178,11 @@ enum tx_stats_reg_offset { struct nicvf_hw_stats { u64 rx_bytes; + u64 rx_frames; u64 rx_ucast_frames; u64 rx_bcast_frames; u64 rx_mcast_frames; - u64 rx_fcs_errors; - u64 rx_l2_errors; + u64 rx_drops; u64 rx_drop_red; u64 rx_drop_red_bytes; u64 rx_drop_overrun; @@ -191,6 +191,19 @@ struct nicvf_hw_stats { u64 rx_drop_mcast; u64 rx_drop_l3_bcast; u64 rx_drop_l3_mcast; + u64 rx_fcs_errors; + u64 rx_l2_errors; + + u64 tx_bytes; + u64 tx_frames; + u64 tx_ucast_frames; + u64 tx_bcast_frames; + u64 tx_mcast_frames; + u64 tx_drops; +}; + +struct nicvf_drv_stats { + /* CQE Rx errs */ u64 rx_bgx_truncated_pkts; u64 rx_jabber_errs; u64 rx_fcs_errs; @@ -216,34 +229,30 @@ struct nicvf_hw_stats { u64 rx_l4_pclp; u64 rx_truncated_pkts; - u64 tx_bytes_ok; - u64 tx_ucast_frames_ok; - u64 tx_bcast_frames_ok; - u64 tx_mcast_frames_ok; - u64 tx_drops; -}; - -struct nicvf_drv_stats { - /* Rx */ - u64 rx_frames_ok; - u64 rx_frames_64; - u64 rx_frames_127; - u64 rx_frames_255; - u64 rx_frames_511; - u64 rx_frames_1023; - u64 rx_frames_1518; - u64 rx_frames_jumbo; - u64 rx_drops; - + /* CQE Tx errs */ + u64 tx_desc_fault; + u64 tx_hdr_cons_err; + u64 tx_subdesc_err; + u64 tx_max_size_exceeded; + u64 tx_imm_size_oflow; + u64 tx_data_seq_err; + u64 tx_mem_seq_err; + u64 tx_lock_viol; + u64 tx_data_fault; + u64 tx_tstmp_conflict; + u64 tx_tstmp_timeout; + u64 tx_mem_fault; + u64 tx_csum_overlap; + u64 tx_csum_overflow; + + /* driver debug stats */ u64 rcv_buffer_alloc_failures; - - /* Tx */ - u64 tx_frames_ok; - u64 tx_drops; u64 tx_tso; u64 tx_timeout; u64 txq_stop; u64 txq_wake; + + struct u64_stats_sync syncp; }; struct nicvf { @@ -297,7 +306,7 @@ struct nicvf { /* Stats */ struct nicvf_hw_stats hw_stats; - struct nicvf_drv_stats drv_stats; + struct nicvf_drv_stats __percpu *drv_stats; struct bgx_stats bgx_stats; /* MSI-X */ diff --git a/drivers/net/ethernet/cavium/thunder/nic_main.c b/drivers/net/ethernet/cavium/thunder/nic_main.c index 85c9e6201e8b..6677b96e1f3f 100644 --- a/drivers/net/ethernet/cavium/thunder/nic_main.c +++ b/drivers/net/ethernet/cavium/thunder/nic_main.c @@ -851,6 +851,7 @@ static int nic_reset_stat_counters(struct nicpf *nic, nic_reg_write(nic, reg_addr, 0); } } + return 0; } diff --git a/drivers/net/ethernet/cavium/thunder/nicvf_ethtool.c b/drivers/net/ethernet/cavium/thunder/nicvf_ethtool.c index ad4fddb55421..432bf6be57cb 100644 --- a/drivers/net/ethernet/cavium/thunder/nicvf_ethtool.c +++ b/drivers/net/ethernet/cavium/thunder/nicvf_ethtool.c @@ -36,11 +36,11 @@ struct nicvf_stat { static const struct nicvf_stat nicvf_hw_stats[] = { NICVF_HW_STAT(rx_bytes), + NICVF_HW_STAT(rx_frames), NICVF_HW_STAT(rx_ucast_frames), NICVF_HW_STAT(rx_bcast_frames), NICVF_HW_STAT(rx_mcast_frames), - NICVF_HW_STAT(rx_fcs_errors), - NICVF_HW_STAT(rx_l2_errors), + NICVF_HW_STAT(rx_drops), NICVF_HW_STAT(rx_drop_red), NICVF_HW_STAT(rx_drop_red_bytes), NICVF_HW_STAT(rx_drop_overrun), @@ -49,50 +49,59 @@ static const struct nicvf_stat nicvf_hw_stats[] = { NICVF_HW_STAT(rx_drop_mcast), NICVF_HW_STAT(rx_drop_l3_bcast), NICVF_HW_STAT(rx_drop_l3_mcast), - NICVF_HW_STAT(rx_bgx_truncated_pkts), - NICVF_HW_STAT(rx_jabber_errs), - NICVF_HW_STAT(rx_fcs_errs), - NICVF_HW_STAT(rx_bgx_errs), - NICVF_HW_STAT(rx_prel2_errs), - NICVF_HW_STAT(rx_l2_hdr_malformed), - NICVF_HW_STAT(rx_oversize), - NICVF_HW_STAT(rx_undersize), - NICVF_HW_STAT(rx_l2_len_mismatch), - NICVF_HW_STAT(rx_l2_pclp), - NICVF_HW_STAT(rx_ip_ver_errs), - NICVF_HW_STAT(rx_ip_csum_errs), - NICVF_HW_STAT(rx_ip_hdr_malformed), - NICVF_HW_STAT(rx_ip_payload_malformed), - NICVF_HW_STAT(rx_ip_ttl_errs), - NICVF_HW_STAT(rx_l3_pclp), - NICVF_HW_STAT(rx_l4_malformed), - NICVF_HW_STAT(rx_l4_csum_errs), - NICVF_HW_STAT(rx_udp_len_errs), - NICVF_HW_STAT(rx_l4_port_errs), - NICVF_HW_STAT(rx_tcp_flag_errs), - NICVF_HW_STAT(rx_tcp_offset_errs), - NICVF_HW_STAT(rx_l4_pclp), - NICVF_HW_STAT(rx_truncated_pkts), - NICVF_HW_STAT(tx_bytes_ok), - NICVF_HW_STAT(tx_ucast_frames_ok), - NICVF_HW_STAT(tx_bcast_frames_ok), - NICVF_HW_STAT(tx_mcast_frames_ok), + NICVF_HW_STAT(rx_fcs_errors), + NICVF_HW_STAT(rx_l2_errors), + NICVF_HW_STAT(tx_bytes), + NICVF_HW_STAT(tx_frames), + NICVF_HW_STAT(tx_ucast_frames), + NICVF_HW_STAT(tx_bcast_frames), + NICVF_HW_STAT(tx_mcast_frames), + NICVF_HW_STAT(tx_drops), }; static const struct nicvf_stat nicvf_drv_stats[] = { - NICVF_DRV_STAT(rx_frames_ok), - NICVF_DRV_STAT(rx_frames_64), - NICVF_DRV_STAT(rx_frames_127), - NICVF_DRV_STAT(rx_frames_255), - NICVF_DRV_STAT(rx_frames_511), - NICVF_DRV_STAT(rx_frames_1023), - NICVF_DRV_STAT(rx_frames_1518), - NICVF_DRV_STAT(rx_frames_jumbo), - NICVF_DRV_STAT(rx_drops), + NICVF_DRV_STAT(rx_bgx_truncated_pkts), + NICVF_DRV_STAT(rx_jabber_errs), + NICVF_DRV_STAT(rx_fcs_errs), + NICVF_DRV_STAT(rx_bgx_errs), + NICVF_DRV_STAT(rx_prel2_errs), + NICVF_DRV_STAT(rx_l2_hdr_malformed), + NICVF_DRV_STAT(rx_oversize), + NICVF_DRV_STAT(rx_undersize), + NICVF_DRV_STAT(rx_l2_len_mismatch), + NICVF_DRV_STAT(rx_l2_pclp), + NICVF_DRV_STAT(rx_ip_ver_errs), + NICVF_DRV_STAT(rx_ip_csum_errs), + NICVF_DRV_STAT(rx_ip_hdr_malformed), + NICVF_DRV_STAT(rx_ip_payload_malformed), + NICVF_DRV_STAT(rx_ip_ttl_errs), + NICVF_DRV_STAT(rx_l3_pclp), + NICVF_DRV_STAT(rx_l4_malformed), + NICVF_DRV_STAT(rx_l4_csum_errs), + NICVF_DRV_STAT(rx_udp_len_errs), + NICVF_DRV_STAT(rx_l4_port_errs), + NICVF_DRV_STAT(rx_tcp_flag_errs), + NICVF_DRV_STAT(rx_tcp_offset_errs), + NICVF_DRV_STAT(rx_l4_pclp), + NICVF_DRV_STAT(rx_truncated_pkts), + + NICVF_DRV_STAT(tx_desc_fault), + NICVF_DRV_STAT(tx_hdr_cons_err), + NICVF_DRV_STAT(tx_subdesc_err), + NICVF_DRV_STAT(tx_max_size_exceeded), + NICVF_DRV_STAT(tx_imm_size_oflow), + NICVF_DRV_STAT(tx_data_seq_err), + NICVF_DRV_STAT(tx_mem_seq_err), + NICVF_DRV_STAT(tx_lock_viol), + NICVF_DRV_STAT(tx_data_fault), + NICVF_DRV_STAT(tx_tstmp_conflict), + NICVF_DRV_STAT(tx_tstmp_timeout), + NICVF_DRV_STAT(tx_mem_fault), + NICVF_DRV_STAT(tx_csum_overlap), + NICVF_DRV_STAT(tx_csum_overflow), + NICVF_DRV_STAT(rcv_buffer_alloc_failures), - NICVF_DRV_STAT(tx_frames_ok), NICVF_DRV_STAT(tx_tso), - NICVF_DRV_STAT(tx_drops), NICVF_DRV_STAT(tx_timeout), NICVF_DRV_STAT(txq_stop), NICVF_DRV_STAT(txq_wake), @@ -278,8 +287,8 @@ static void nicvf_get_ethtool_stats(struct net_device *netdev, struct ethtool_stats *stats, u64 *data) { struct nicvf *nic = netdev_priv(netdev); - int stat; - int sqs; + int stat, tmp_stats; + int sqs, cpu; nicvf_update_stats(nic); @@ -289,9 +298,13 @@ static void nicvf_get_ethtool_stats(struct net_device *netdev, for (stat = 0; stat < nicvf_n_hw_stats; stat++) *(data++) = ((u64 *)&nic->hw_stats) [nicvf_hw_stats[stat].index]; - for (stat = 0; stat < nicvf_n_drv_stats; stat++) - *(data++) = ((u64 *)&nic->drv_stats) - [nicvf_drv_stats[stat].index]; + for (stat = 0; stat < nicvf_n_drv_stats; stat++) { + tmp_stats = 0; + for_each_possible_cpu(cpu) + tmp_stats += ((u64 *)per_cpu_ptr(nic->drv_stats, cpu)) + [nicvf_drv_stats[stat].index]; + *(data++) = tmp_stats; + } nicvf_get_qset_stats(nic, stats, &data); diff --git a/drivers/net/ethernet/cavium/thunder/nicvf_main.c b/drivers/net/ethernet/cavium/thunder/nicvf_main.c index 8f833612da77..9dc79c0578d8 100644 --- a/drivers/net/ethernet/cavium/thunder/nicvf_main.c +++ b/drivers/net/ethernet/cavium/thunder/nicvf_main.c @@ -69,25 +69,6 @@ static inline u8 nicvf_netdev_qidx(struct nicvf *nic, u8 qidx) return qidx; } -static inline void nicvf_set_rx_frame_cnt(struct nicvf *nic, - struct sk_buff *skb) -{ - if (skb->len <= 64) - nic->drv_stats.rx_frames_64++; - else if (skb->len <= 127) - nic->drv_stats.rx_frames_127++; - else if (skb->len <= 255) - nic->drv_stats.rx_frames_255++; - else if (skb->len <= 511) - nic->drv_stats.rx_frames_511++; - else if (skb->len <= 1023) - nic->drv_stats.rx_frames_1023++; - else if (skb->len <= 1518) - nic->drv_stats.rx_frames_1518++; - else - nic->drv_stats.rx_frames_jumbo++; -} - /* The Cavium ThunderX network controller can *only* be found in SoCs * containing the ThunderX ARM64 CPU implementation. All accesses to the device * registers on this platform are implicitly strongly ordered with respect @@ -514,7 +495,6 @@ static int nicvf_init_resources(struct nicvf *nic) } static void nicvf_snd_pkt_handler(struct net_device *netdev, - struct cmp_queue *cq, struct cqe_send_t *cqe_tx, int cqe_type, int budget, unsigned int *tx_pkts, unsigned int *tx_bytes) @@ -536,7 +516,7 @@ static void nicvf_snd_pkt_handler(struct net_device *netdev, __func__, cqe_tx->sq_qs, cqe_tx->sq_idx, cqe_tx->sqe_ptr, hdr->subdesc_cnt); - nicvf_check_cqe_tx_errs(nic, cq, cqe_tx); + nicvf_check_cqe_tx_errs(nic, cqe_tx); skb = (struct sk_buff *)sq->skbuff[cqe_tx->sqe_ptr]; if (skb) { /* Check for dummy descriptor used for HW TSO offload on 88xx */ @@ -630,8 +610,6 @@ static void nicvf_rcv_pkt_handler(struct net_device *netdev, return; } - nicvf_set_rx_frame_cnt(nic, skb); - nicvf_set_rxhash(netdev, cqe_rx, skb); skb_record_rx_queue(skb, rq_idx); @@ -703,7 +681,7 @@ loop: work_done++; break; case CQE_TYPE_SEND: - nicvf_snd_pkt_handler(netdev, cq, + nicvf_snd_pkt_handler(netdev, (void *)cq_desc, CQE_TYPE_SEND, budget, &tx_pkts, &tx_bytes); tx_done++; @@ -740,7 +718,7 @@ done: nic = nic->pnicvf; if (netif_tx_queue_stopped(txq) && netif_carrier_ok(netdev)) { netif_tx_start_queue(txq); - nic->drv_stats.txq_wake++; + this_cpu_inc(nic->drv_stats->txq_wake); if (netif_msg_tx_err(nic)) netdev_warn(netdev, "%s: Transmit queue wakeup SQ%d\n", @@ -1084,7 +1062,7 @@ static netdev_tx_t nicvf_xmit(struct sk_buff *skb, struct net_device *netdev) if (!netif_tx_queue_stopped(txq) && !nicvf_sq_append_skb(nic, skb)) { netif_tx_stop_queue(txq); - nic->drv_stats.txq_stop++; + this_cpu_inc(nic->drv_stats->txq_stop); if (netif_msg_tx_err(nic)) netdev_warn(netdev, "%s: Transmit ring full, stopping SQ%d\n", @@ -1202,7 +1180,7 @@ static int nicvf_update_hw_max_frs(struct nicvf *nic, int mtu) int nicvf_open(struct net_device *netdev) { - int err, qidx; + int cpu, err, qidx; struct nicvf *nic = netdev_priv(netdev); struct queue_set *qs = nic->qs; struct nicvf_cq_poll *cq_poll = NULL; @@ -1262,6 +1240,11 @@ int nicvf_open(struct net_device *netdev) nicvf_rss_init(nic); if (nicvf_update_hw_max_frs(nic, netdev->mtu)) goto cleanup; + + /* Clear percpu stats */ + for_each_possible_cpu(cpu) + memset(per_cpu_ptr(nic->drv_stats, cpu), 0, + sizeof(struct nicvf_drv_stats)); } err = nicvf_register_interrupts(nic); @@ -1288,9 +1271,6 @@ int nicvf_open(struct net_device *netdev) for (qidx = 0; qidx < qs->rbdr_cnt; qidx++) nicvf_enable_intr(nic, NICVF_INTR_RBDR, qidx); - nic->drv_stats.txq_stop = 0; - nic->drv_stats.txq_wake = 0; - return 0; cleanup: nicvf_disable_intr(nic, NICVF_INTR_MBOX, 0); @@ -1383,9 +1363,10 @@ void nicvf_update_lmac_stats(struct nicvf *nic) void nicvf_update_stats(struct nicvf *nic) { - int qidx; + int qidx, cpu; + u64 tmp_stats = 0; struct nicvf_hw_stats *stats = &nic->hw_stats; - struct nicvf_drv_stats *drv_stats = &nic->drv_stats; + struct nicvf_drv_stats *drv_stats; struct queue_set *qs = nic->qs; #define GET_RX_STATS(reg) \ @@ -1408,21 +1389,33 @@ void nicvf_update_stats(struct nicvf *nic) stats->rx_drop_l3_bcast = GET_RX_STATS(RX_DRP_L3BCAST); stats->rx_drop_l3_mcast = GET_RX_STATS(RX_DRP_L3MCAST); - stats->tx_bytes_ok = GET_TX_STATS(TX_OCTS); - stats->tx_ucast_frames_ok = GET_TX_STATS(TX_UCAST); - stats->tx_bcast_frames_ok = GET_TX_STATS(TX_BCAST); - stats->tx_mcast_frames_ok = GET_TX_STATS(TX_MCAST); + stats->tx_bytes = GET_TX_STATS(TX_OCTS); + stats->tx_ucast_frames = GET_TX_STATS(TX_UCAST); + stats->tx_bcast_frames = GET_TX_STATS(TX_BCAST); + stats->tx_mcast_frames = GET_TX_STATS(TX_MCAST); stats->tx_drops = GET_TX_STATS(TX_DROP); - drv_stats->tx_frames_ok = stats->tx_ucast_frames_ok + - stats->tx_bcast_frames_ok + - stats->tx_mcast_frames_ok; - drv_stats->rx_frames_ok = stats->rx_ucast_frames + - stats->rx_bcast_frames + - stats->rx_mcast_frames; - drv_stats->rx_drops = stats->rx_drop_red + - stats->rx_drop_overrun; - drv_stats->tx_drops = stats->tx_drops; + /* On T88 pass 2.0, the dummy SQE added for TSO notification + * via CQE has 'dont_send' set. Hence HW drops the pkt pointed + * pointed by dummy SQE and results in tx_drops counter being + * incremented. Subtracting it from tx_tso counter will give + * exact tx_drops counter. + */ + if (nic->t88 && nic->hw_tso) { + for_each_possible_cpu(cpu) { + drv_stats = per_cpu_ptr(nic->drv_stats, cpu); + tmp_stats += drv_stats->tx_tso; + } + stats->tx_drops = tmp_stats - stats->tx_drops; + } + stats->tx_frames = stats->tx_ucast_frames + + stats->tx_bcast_frames + + stats->tx_mcast_frames; + stats->rx_frames = stats->rx_ucast_frames + + stats->rx_bcast_frames + + stats->rx_mcast_frames; + stats->rx_drops = stats->rx_drop_red + + stats->rx_drop_overrun; /* Update RQ and SQ stats */ for (qidx = 0; qidx < qs->rq_cnt; qidx++) @@ -1436,18 +1429,17 @@ static struct rtnl_link_stats64 *nicvf_get_stats64(struct net_device *netdev, { struct nicvf *nic = netdev_priv(netdev); struct nicvf_hw_stats *hw_stats = &nic->hw_stats; - struct nicvf_drv_stats *drv_stats = &nic->drv_stats; nicvf_update_stats(nic); stats->rx_bytes = hw_stats->rx_bytes; - stats->rx_packets = drv_stats->rx_frames_ok; - stats->rx_dropped = drv_stats->rx_drops; + stats->rx_packets = hw_stats->rx_frames; + stats->rx_dropped = hw_stats->rx_drops; stats->multicast = hw_stats->rx_mcast_frames; - stats->tx_bytes = hw_stats->tx_bytes_ok; - stats->tx_packets = drv_stats->tx_frames_ok; - stats->tx_dropped = drv_stats->tx_drops; + stats->tx_bytes = hw_stats->tx_bytes; + stats->tx_packets = hw_stats->tx_frames; + stats->tx_dropped = hw_stats->tx_drops; return stats; } @@ -1460,7 +1452,7 @@ static void nicvf_tx_timeout(struct net_device *dev) netdev_warn(dev, "%s: Transmit timed out, resetting\n", dev->name); - nic->drv_stats.tx_timeout++; + this_cpu_inc(nic->drv_stats->tx_timeout); schedule_work(&nic->reset_task); } @@ -1594,6 +1586,12 @@ static int nicvf_probe(struct pci_dev *pdev, const struct pci_device_id *ent) goto err_free_netdev; } + nic->drv_stats = netdev_alloc_pcpu_stats(struct nicvf_drv_stats); + if (!nic->drv_stats) { + err = -ENOMEM; + goto err_free_netdev; + } + err = nicvf_set_qset_resources(nic); if (err) goto err_free_netdev; @@ -1652,6 +1650,8 @@ err_unregister_interrupts: nicvf_unregister_interrupts(nic); err_free_netdev: pci_set_drvdata(pdev, NULL); + if (nic->drv_stats) + free_percpu(nic->drv_stats); free_netdev(netdev); err_release_regions: pci_release_regions(pdev); @@ -1679,6 +1679,8 @@ static void nicvf_remove(struct pci_dev *pdev) unregister_netdev(pnetdev); nicvf_unregister_interrupts(nic); pci_set_drvdata(pdev, NULL); + if (nic->drv_stats) + free_percpu(nic->drv_stats); free_netdev(netdev); pci_release_regions(pdev); pci_disable_device(pdev); diff --git a/drivers/net/ethernet/cavium/thunder/nicvf_queues.c b/drivers/net/ethernet/cavium/thunder/nicvf_queues.c index f914eef6573a..bdce5915baae 100644 --- a/drivers/net/ethernet/cavium/thunder/nicvf_queues.c +++ b/drivers/net/ethernet/cavium/thunder/nicvf_queues.c @@ -104,7 +104,8 @@ static inline int nicvf_alloc_rcv_buffer(struct nicvf *nic, gfp_t gfp, nic->rb_page = alloc_pages(gfp | __GFP_COMP | __GFP_NOWARN, order); if (!nic->rb_page) { - nic->drv_stats.rcv_buffer_alloc_failures++; + this_cpu_inc(nic->pnicvf->drv_stats-> + rcv_buffer_alloc_failures); return -ENOMEM; } nic->rb_page_offset = 0; @@ -483,9 +484,12 @@ static void nicvf_reset_rcv_queue_stats(struct nicvf *nic) { union nic_mbx mbx = {}; - /* Reset all RXQ's stats */ + /* Reset all RQ/SQ and VF stats */ mbx.reset_stat.msg = NIC_MBOX_MSG_RESET_STAT_COUNTER; + mbx.reset_stat.rx_stat_mask = 0x3FFF; + mbx.reset_stat.tx_stat_mask = 0x1F; mbx.reset_stat.rq_stat_mask = 0xFFFF; + mbx.reset_stat.sq_stat_mask = 0xFFFF; nicvf_send_msg_to_pf(nic, &mbx); } @@ -1032,7 +1036,7 @@ nicvf_sq_add_hdr_subdesc(struct nicvf *nic, struct snd_queue *sq, int qentry, hdr->tso_max_paysize = skb_shinfo(skb)->gso_size; /* For non-tunneled pkts, point this to L2 ethertype */ hdr->inner_l3_offset = skb_network_offset(skb) - 2; - nic->drv_stats.tx_tso++; + this_cpu_inc(nic->pnicvf->drv_stats->tx_tso); } } @@ -1164,7 +1168,7 @@ static int nicvf_sq_append_tso(struct nicvf *nic, struct snd_queue *sq, nicvf_sq_doorbell(nic, skb, sq_num, desc_cnt); - nic->drv_stats.tx_tso++; + this_cpu_inc(nic->pnicvf->drv_stats->tx_tso); return 1; } @@ -1425,8 +1429,6 @@ void nicvf_update_sq_stats(struct nicvf *nic, int sq_idx) /* Check for errors in the receive cmp.queue entry */ int nicvf_check_cqe_rx_errs(struct nicvf *nic, struct cqe_rx_t *cqe_rx) { - struct nicvf_hw_stats *stats = &nic->hw_stats; - if (!cqe_rx->err_level && !cqe_rx->err_opcode) return 0; @@ -1438,76 +1440,76 @@ int nicvf_check_cqe_rx_errs(struct nicvf *nic, struct cqe_rx_t *cqe_rx) switch (cqe_rx->err_opcode) { case CQ_RX_ERROP_RE_PARTIAL: - stats->rx_bgx_truncated_pkts++; + this_cpu_inc(nic->drv_stats->rx_bgx_truncated_pkts); break; case CQ_RX_ERROP_RE_JABBER: - stats->rx_jabber_errs++; + this_cpu_inc(nic->drv_stats->rx_jabber_errs); break; case CQ_RX_ERROP_RE_FCS: - stats->rx_fcs_errs++; + this_cpu_inc(nic->drv_stats->rx_fcs_errs); break; case CQ_RX_ERROP_RE_RX_CTL: - stats->rx_bgx_errs++; + this_cpu_inc(nic->drv_stats->rx_bgx_errs); break; case CQ_RX_ERROP_PREL2_ERR: - stats->rx_prel2_errs++; + this_cpu_inc(nic->drv_stats->rx_prel2_errs); break; case CQ_RX_ERROP_L2_MAL: - stats->rx_l2_hdr_malformed++; + this_cpu_inc(nic->drv_stats->rx_l2_hdr_malformed); break; case CQ_RX_ERROP_L2_OVERSIZE: - stats->rx_oversize++; + this_cpu_inc(nic->drv_stats->rx_oversize); break; case CQ_RX_ERROP_L2_UNDERSIZE: - stats->rx_undersize++; + this_cpu_inc(nic->drv_stats->rx_undersize); break; case CQ_RX_ERROP_L2_LENMISM: - stats->rx_l2_len_mismatch++; + this_cpu_inc(nic->drv_stats->rx_l2_len_mismatch); break; case CQ_RX_ERROP_L2_PCLP: - stats->rx_l2_pclp++; + this_cpu_inc(nic->drv_stats->rx_l2_pclp); break; case CQ_RX_ERROP_IP_NOT: - stats->rx_ip_ver_errs++; + this_cpu_inc(nic->drv_stats->rx_ip_ver_errs); break; case CQ_RX_ERROP_IP_CSUM_ERR: - stats->rx_ip_csum_errs++; + this_cpu_inc(nic->drv_stats->rx_ip_csum_errs); break; case CQ_RX_ERROP_IP_MAL: - stats->rx_ip_hdr_malformed++; + this_cpu_inc(nic->drv_stats->rx_ip_hdr_malformed); break; case CQ_RX_ERROP_IP_MALD: - stats->rx_ip_payload_malformed++; + this_cpu_inc(nic->drv_stats->rx_ip_payload_malformed); break; case CQ_RX_ERROP_IP_HOP: - stats->rx_ip_ttl_errs++; + this_cpu_inc(nic->drv_stats->rx_ip_ttl_errs); break; case CQ_RX_ERROP_L3_PCLP: - stats->rx_l3_pclp++; + this_cpu_inc(nic->drv_stats->rx_l3_pclp); break; case CQ_RX_ERROP_L4_MAL: - stats->rx_l4_malformed++; + this_cpu_inc(nic->drv_stats->rx_l4_malformed); break; case CQ_RX_ERROP_L4_CHK: - stats->rx_l4_csum_errs++; + this_cpu_inc(nic->drv_stats->rx_l4_csum_errs); break; case CQ_RX_ERROP_UDP_LEN: - stats->rx_udp_len_errs++; + this_cpu_inc(nic->drv_stats->rx_udp_len_errs); break; case CQ_RX_ERROP_L4_PORT: - stats->rx_l4_port_errs++; + this_cpu_inc(nic->drv_stats->rx_l4_port_errs); break; case CQ_RX_ERROP_TCP_FLAG: - stats->rx_tcp_flag_errs++; + this_cpu_inc(nic->drv_stats->rx_tcp_flag_errs); break; case CQ_RX_ERROP_TCP_OFFSET: - stats->rx_tcp_offset_errs++; + this_cpu_inc(nic->drv_stats->rx_tcp_offset_errs); break; case CQ_RX_ERROP_L4_PCLP: - stats->rx_l4_pclp++; + this_cpu_inc(nic->drv_stats->rx_l4_pclp); break; case CQ_RX_ERROP_RBDR_TRUNC: - stats->rx_truncated_pkts++; + this_cpu_inc(nic->drv_stats->rx_truncated_pkts); break; } @@ -1515,56 +1517,52 @@ int nicvf_check_cqe_rx_errs(struct nicvf *nic, struct cqe_rx_t *cqe_rx) } /* Check for errors in the send cmp.queue entry */ -int nicvf_check_cqe_tx_errs(struct nicvf *nic, - struct cmp_queue *cq, struct cqe_send_t *cqe_tx) +int nicvf_check_cqe_tx_errs(struct nicvf *nic, struct cqe_send_t *cqe_tx) { - struct cmp_queue_stats *stats = &cq->stats; - switch (cqe_tx->send_status) { case CQ_TX_ERROP_GOOD: - stats->tx.good++; return 0; case CQ_TX_ERROP_DESC_FAULT: - stats->tx.desc_fault++; + this_cpu_inc(nic->drv_stats->tx_desc_fault); break; case CQ_TX_ERROP_HDR_CONS_ERR: - stats->tx.hdr_cons_err++; + this_cpu_inc(nic->drv_stats->tx_hdr_cons_err); break; case CQ_TX_ERROP_SUBDC_ERR: - stats->tx.subdesc_err++; + this_cpu_inc(nic->drv_stats->tx_subdesc_err); break; case CQ_TX_ERROP_MAX_SIZE_VIOL: - stats->tx.max_size_exceeded++; + this_cpu_inc(nic->drv_stats->tx_max_size_exceeded); break; case CQ_TX_ERROP_IMM_SIZE_OFLOW: - stats->tx.imm_size_oflow++; + this_cpu_inc(nic->drv_stats->tx_imm_size_oflow); break; case CQ_TX_ERROP_DATA_SEQUENCE_ERR: - stats->tx.data_seq_err++; + this_cpu_inc(nic->drv_stats->tx_data_seq_err); break; case CQ_TX_ERROP_MEM_SEQUENCE_ERR: - stats->tx.mem_seq_err++; + this_cpu_inc(nic->drv_stats->tx_mem_seq_err); break; case CQ_TX_ERROP_LOCK_VIOL: - stats->tx.lock_viol++; + this_cpu_inc(nic->drv_stats->tx_lock_viol); break; case CQ_TX_ERROP_DATA_FAULT: - stats->tx.data_fault++; + this_cpu_inc(nic->drv_stats->tx_data_fault); break; case CQ_TX_ERROP_TSTMP_CONFLICT: - stats->tx.tstmp_conflict++; + this_cpu_inc(nic->drv_stats->tx_tstmp_conflict); break; case CQ_TX_ERROP_TSTMP_TIMEOUT: - stats->tx.tstmp_timeout++; + this_cpu_inc(nic->drv_stats->tx_tstmp_timeout); break; case CQ_TX_ERROP_MEM_FAULT: - stats->tx.mem_fault++; + this_cpu_inc(nic->drv_stats->tx_mem_fault); break; case CQ_TX_ERROP_CK_OVERLAP: - stats->tx.csum_overlap++; + this_cpu_inc(nic->drv_stats->tx_csum_overlap); break; case CQ_TX_ERROP_CK_OFLOW: - stats->tx.csum_overflow++; + this_cpu_inc(nic->drv_stats->tx_csum_overflow); break; } diff --git a/drivers/net/ethernet/cavium/thunder/nicvf_queues.h b/drivers/net/ethernet/cavium/thunder/nicvf_queues.h index 8f4718edc0fe..2e3c940c1093 100644 --- a/drivers/net/ethernet/cavium/thunder/nicvf_queues.h +++ b/drivers/net/ethernet/cavium/thunder/nicvf_queues.h @@ -172,26 +172,6 @@ enum CQ_TX_ERROP_E { CQ_TX_ERROP_ENUM_LAST = 0x8a, }; -struct cmp_queue_stats { - struct tx_stats { - u64 good; - u64 desc_fault; - u64 hdr_cons_err; - u64 subdesc_err; - u64 max_size_exceeded; - u64 imm_size_oflow; - u64 data_seq_err; - u64 mem_seq_err; - u64 lock_viol; - u64 data_fault; - u64 tstmp_conflict; - u64 tstmp_timeout; - u64 mem_fault; - u64 csum_overlap; - u64 csum_overflow; - } tx; -} ____cacheline_aligned_in_smp; - enum RQ_SQ_STATS { RQ_SQ_STATS_OCTS, RQ_SQ_STATS_PKTS, @@ -243,7 +223,6 @@ struct cmp_queue { spinlock_t lock; /* lock to serialize processing CQEs */ void *desc; struct q_desc_mem dmem; - struct cmp_queue_stats stats; int irq; } ____cacheline_aligned_in_smp; @@ -338,6 +317,5 @@ u64 nicvf_queue_reg_read(struct nicvf *nic, void nicvf_update_rq_stats(struct nicvf *nic, int rq_idx); void nicvf_update_sq_stats(struct nicvf *nic, int sq_idx); int nicvf_check_cqe_rx_errs(struct nicvf *nic, struct cqe_rx_t *cqe_rx); -int nicvf_check_cqe_tx_errs(struct nicvf *nic, - struct cmp_queue *cq, struct cqe_send_t *cqe_tx); +int nicvf_check_cqe_tx_errs(struct nicvf *nic, struct cqe_send_t *cqe_tx); #endif /* NICVF_QUEUES_H */ -- cgit v1.2.3-59-g8ed1b From c94acf805d93e7beb5898ac97ff327ae0b6f04dd Mon Sep 17 00:00:00 2001 From: Sunil Goutham Date: Tue, 15 Nov 2016 17:38:29 +0530 Subject: net: thunderx: Fix memory leak and other issues upon interface toggle This patch fixes the following 1. When interface is being teardown and queues are being cleaned up, free pending SKBs that are in SQ which are either not transmitted or freed as NAPI is disabled by that time. 2. While interface initialization, delay CFG_DONE notification till the end to avoid corner cases where TXQs are enabled but CQ interrupts are not which results blocking transmission and kicking off watchdog. 3. Check for IFF_UP while re-enabling RBDR interrupts from tasklet. Signed-off-by: Sunil Goutham Signed-off-by: David S. Miller --- drivers/net/ethernet/cavium/thunder/nicvf_main.c | 11 +++++------ drivers/net/ethernet/cavium/thunder/nicvf_queues.c | 14 +++++++++++++- 2 files changed, 18 insertions(+), 7 deletions(-) diff --git a/drivers/net/ethernet/cavium/thunder/nicvf_main.c b/drivers/net/ethernet/cavium/thunder/nicvf_main.c index 9dc79c0578d8..8a37012c9c89 100644 --- a/drivers/net/ethernet/cavium/thunder/nicvf_main.c +++ b/drivers/net/ethernet/cavium/thunder/nicvf_main.c @@ -473,9 +473,6 @@ int nicvf_set_real_num_queues(struct net_device *netdev, static int nicvf_init_resources(struct nicvf *nic) { int err; - union nic_mbx mbx = {}; - - mbx.msg.msg = NIC_MBOX_MSG_CFG_DONE; /* Enable Qset */ nicvf_qset_config(nic, true); @@ -488,9 +485,6 @@ static int nicvf_init_resources(struct nicvf *nic) return err; } - /* Send VF config done msg to PF */ - nicvf_write_to_mbx(nic, &mbx); - return 0; } @@ -1184,6 +1178,7 @@ int nicvf_open(struct net_device *netdev) struct nicvf *nic = netdev_priv(netdev); struct queue_set *qs = nic->qs; struct nicvf_cq_poll *cq_poll = NULL; + union nic_mbx mbx = {}; netif_carrier_off(netdev); @@ -1271,6 +1266,10 @@ int nicvf_open(struct net_device *netdev) for (qidx = 0; qidx < qs->rbdr_cnt; qidx++) nicvf_enable_intr(nic, NICVF_INTR_RBDR, qidx); + /* Send VF config done msg to PF */ + mbx.msg.msg = NIC_MBOX_MSG_CFG_DONE; + nicvf_write_to_mbx(nic, &mbx); + return 0; cleanup: nicvf_disable_intr(nic, NICVF_INTR_MBOX, 0); diff --git a/drivers/net/ethernet/cavium/thunder/nicvf_queues.c b/drivers/net/ethernet/cavium/thunder/nicvf_queues.c index bdce5915baae..747ef0882976 100644 --- a/drivers/net/ethernet/cavium/thunder/nicvf_queues.c +++ b/drivers/net/ethernet/cavium/thunder/nicvf_queues.c @@ -271,7 +271,8 @@ refill: rbdr_idx, new_rb); next_rbdr: /* Re-enable RBDR interrupts only if buffer allocation is success */ - if (!nic->rb_alloc_fail && rbdr->enable) + if (!nic->rb_alloc_fail && rbdr->enable && + netif_running(nic->pnicvf->netdev)) nicvf_enable_intr(nic, NICVF_INTR_RBDR, rbdr_idx); if (rbdr_idx) @@ -362,6 +363,8 @@ static int nicvf_init_snd_queue(struct nicvf *nic, static void nicvf_free_snd_queue(struct nicvf *nic, struct snd_queue *sq) { + struct sk_buff *skb; + if (!sq) return; if (!sq->dmem.base) @@ -372,6 +375,15 @@ static void nicvf_free_snd_queue(struct nicvf *nic, struct snd_queue *sq) sq->dmem.q_len * TSO_HEADER_SIZE, sq->tso_hdrs, sq->tso_hdrs_phys); + /* Free pending skbs in the queue */ + smp_rmb(); + while (sq->head != sq->tail) { + skb = (struct sk_buff *)sq->skbuff[sq->head]; + if (skb) + dev_kfree_skb_any(skb); + sq->head++; + sq->head &= (sq->dmem.q_len - 1); + } kfree(sq->skbuff); nicvf_free_q_desc_mem(nic, &sq->dmem); } -- cgit v1.2.3-59-g8ed1b From d48756228ee9161ac8836b346589a43fabdc9f3c Mon Sep 17 00:00:00 2001 From: Keith Busch Date: Tue, 15 Nov 2016 15:56:26 -0500 Subject: nvme/pci: Don't free queues on error The nvme_remove function tears down all allocated resources in the correct order, so no need to free queues on error during initialization. This fixes possible use-after-free errors when queues are still associated with a blk-mq hctx. Reported-by: Scott Bauer Tested-by: Scott Bauer Signed-off-by: Keith Busch Reviewed-by: Sagi Grimberg Reviewed-by: Christoph Hellwig Cc: stable@vger.kernel.org Signed-off-by: Jens Axboe --- drivers/nvme/host/pci.c | 18 ++++-------------- 1 file changed, 4 insertions(+), 14 deletions(-) diff --git a/drivers/nvme/host/pci.c b/drivers/nvme/host/pci.c index 0248d0e21fee..5e52034ab010 100644 --- a/drivers/nvme/host/pci.c +++ b/drivers/nvme/host/pci.c @@ -1242,20 +1242,16 @@ static int nvme_configure_admin_queue(struct nvme_dev *dev) result = nvme_enable_ctrl(&dev->ctrl, cap); if (result) - goto free_nvmeq; + return result; nvmeq->cq_vector = 0; result = queue_request_irq(nvmeq); if (result) { nvmeq->cq_vector = -1; - goto free_nvmeq; + return result; } return result; - - free_nvmeq: - nvme_free_queues(dev, 0); - return result; } static bool nvme_should_reset(struct nvme_dev *dev, u32 csts) @@ -1317,10 +1313,8 @@ static int nvme_create_io_queues(struct nvme_dev *dev) max = min(dev->max_qid, dev->queue_count - 1); for (i = dev->online_queues; i <= max; i++) { ret = nvme_create_queue(dev->queues[i], i); - if (ret) { - nvme_free_queues(dev, i); + if (ret) break; - } } /* @@ -1460,13 +1454,9 @@ static int nvme_setup_io_queues(struct nvme_dev *dev) result = queue_request_irq(adminq); if (result) { adminq->cq_vector = -1; - goto free_queues; + return result; } return nvme_create_io_queues(dev); - - free_queues: - nvme_free_queues(dev, 1); - return result; } static void nvme_del_queue_end(struct request *req, int error) -- cgit v1.2.3-59-g8ed1b From f9c22ec6c1c511285dc539b83aabdabdb6baf245 Mon Sep 17 00:00:00 2001 From: Keno Fischer Date: Tue, 15 Nov 2016 17:39:02 -0500 Subject: gpio: Remove GPIO_DEVRES option This option was added in 6a89a314ab107a12af08c71420c19a37a30fc2d3 to allow use of the devm_gpio_* functions without CONFIG_GPIOLIB. However, only a few months later in b69ac52449c658b7ac40034dc3c5f5f4a71a723d, CONFIG_GPIOLIB was added as a dependency, defeating the original purpose of this option. Instead of that patch, the original commit could have just been reverted (and in fact was partially so in 403c1d0be5ccbd750d25c59d8358843a81e52e3b). Further, since this option has a dependency on HAS_IOMEM, even though it does not require it, it causes build failures when !HAS_IOMEM (e.g. in a uml build). Fix that by completely removing the option, in essence completing the reversion of the original commit. Signed-off-by: Keno Fischer Signed-off-by: Linus Walleij --- drivers/gpio/Kconfig | 4 ---- drivers/gpio/Makefile | 2 +- 2 files changed, 1 insertion(+), 5 deletions(-) diff --git a/drivers/gpio/Kconfig b/drivers/gpio/Kconfig index d011cb89d25e..ed37e5908b91 100644 --- a/drivers/gpio/Kconfig +++ b/drivers/gpio/Kconfig @@ -22,10 +22,6 @@ menuconfig GPIOLIB if GPIOLIB -config GPIO_DEVRES - def_bool y - depends on HAS_IOMEM - config OF_GPIO def_bool y depends on OF diff --git a/drivers/gpio/Makefile b/drivers/gpio/Makefile index ab28a2daeacc..d074c2299393 100644 --- a/drivers/gpio/Makefile +++ b/drivers/gpio/Makefile @@ -2,7 +2,7 @@ ccflags-$(CONFIG_DEBUG_GPIO) += -DDEBUG -obj-$(CONFIG_GPIO_DEVRES) += devres.o +obj-$(CONFIG_GPIOLIB) += devres.o obj-$(CONFIG_GPIOLIB) += gpiolib.o obj-$(CONFIG_GPIOLIB) += gpiolib-legacy.o obj-$(CONFIG_OF_GPIO) += gpiolib-of.o -- cgit v1.2.3-59-g8ed1b From 963abe5c8a0273a1cf5913556da1b1189de0e57a Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Tue, 15 Nov 2016 22:24:12 -0800 Subject: virtio-net: add a missing synchronize_net() It seems many drivers do not respect napi_hash_del() contract. When napi_hash_del() is used before netif_napi_del(), an RCU grace period is needed before freeing NAPI object. Fixes: 91815639d880 ("virtio-net: rx busy polling support") Signed-off-by: Eric Dumazet Cc: Jason Wang Cc: Michael S. Tsirkin Acked-by: Michael S. Tsirkin Signed-off-by: David S. Miller --- drivers/net/virtio_net.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/drivers/net/virtio_net.c b/drivers/net/virtio_net.c index fd8b1e62301f..7276d5a95bd0 100644 --- a/drivers/net/virtio_net.c +++ b/drivers/net/virtio_net.c @@ -1497,6 +1497,11 @@ static void virtnet_free_queues(struct virtnet_info *vi) netif_napi_del(&vi->rq[i].napi); } + /* We called napi_hash_del() before netif_napi_del(), + * we need to respect an RCU grace period before freeing vi->rq + */ + synchronize_net(); + kfree(vi->rq); kfree(vi->sq); } -- cgit v1.2.3-59-g8ed1b From d5a4b1a540b8a9a44888b383472a80b84765aaa0 Mon Sep 17 00:00:00 2001 From: Lv Zheng Date: Wed, 16 Nov 2016 17:27:34 +0800 Subject: tools/power/acpi: Remove direct kernel source include reference Avoid breaking cross-compiled ACPI tools builds by rearranging the handling of kernel header files. This patch also contains OUTPUT/srctree cleanups in order to make above fix working for various build environments. Fixes: e323c02dee59 (ACPICA: MSVC9: Fix inclusion order issue) Reported-and-tested-by: Yisheng Xie Reported-by: Andy Shevchenko Signed-off-by: Lv Zheng [ rjw: Changelog ] Signed-off-by: Rafael J. Wysocki --- include/acpi/platform/aclinux.h | 3 +++ tools/power/acpi/Makefile.config | 23 +++++++++--------- tools/power/acpi/Makefile.rules | 40 +++++++++++++++++++++----------- tools/power/acpi/tools/acpidbg/Makefile | 4 +--- tools/power/acpi/tools/acpidbg/acpidbg.c | 8 ++++++- tools/power/acpi/tools/acpidump/Makefile | 12 +++++----- 6 files changed, 56 insertions(+), 34 deletions(-) diff --git a/include/acpi/platform/aclinux.h b/include/acpi/platform/aclinux.h index a5d98d171866..e861a24f06f2 100644 --- a/include/acpi/platform/aclinux.h +++ b/include/acpi/platform/aclinux.h @@ -191,6 +191,9 @@ #ifndef __init #define __init #endif +#ifndef __iomem +#define __iomem +#endif /* Host-dependent types and defines for user-space ACPICA */ diff --git a/tools/power/acpi/Makefile.config b/tools/power/acpi/Makefile.config index a538ff44b108..a1883bbb0144 100644 --- a/tools/power/acpi/Makefile.config +++ b/tools/power/acpi/Makefile.config @@ -8,18 +8,19 @@ # as published by the Free Software Foundation; version 2 # of the License. -include ../../../../scripts/Makefile.include - -OUTPUT=./ -ifeq ("$(origin O)", "command line") - OUTPUT := $(O)/ +ifeq ($(srctree),) +srctree := $(patsubst %/,%,$(dir $(shell pwd))) +srctree := $(patsubst %/,%,$(dir $(srctree))) +#$(info Determined 'srctree' to be $(srctree)) endif -ifneq ($(OUTPUT),) -# check that the output directory actually exists -OUTDIR := $(shell cd $(OUTPUT) && /bin/pwd) -$(if $(OUTDIR),, $(error output directory "$(OUTPUT)" does not exist)) +include $(srctree)/../../scripts/Makefile.include + +OUTPUT=$(srctree)/ +ifeq ("$(origin O)", "command line") + OUTPUT := $(O)/power/acpi/ endif +#$(info Determined 'OUTPUT' to be $(OUTPUT)) # --- CONFIGURATION BEGIN --- @@ -70,8 +71,8 @@ WARNINGS := -Wall WARNINGS += $(call cc-supports,-Wstrict-prototypes) WARNINGS += $(call cc-supports,-Wdeclaration-after-statement) -KERNEL_INCLUDE := ../../../include -ACPICA_INCLUDE := ../../../drivers/acpi/acpica +KERNEL_INCLUDE := $(OUTPUT)include +ACPICA_INCLUDE := $(srctree)/../../../drivers/acpi/acpica CFLAGS += -D_LINUX -I$(KERNEL_INCLUDE) -I$(ACPICA_INCLUDE) CFLAGS += $(WARNINGS) diff --git a/tools/power/acpi/Makefile.rules b/tools/power/acpi/Makefile.rules index ec87a9e562c0..373738338f51 100644 --- a/tools/power/acpi/Makefile.rules +++ b/tools/power/acpi/Makefile.rules @@ -8,28 +8,42 @@ # as published by the Free Software Foundation; version 2 # of the License. -$(OUTPUT)$(TOOL): $(TOOL_OBJS) FORCE - $(ECHO) " LD " $@ - $(QUIET) $(LD) $(CFLAGS) $(LDFLAGS) $(TOOL_OBJS) -L$(OUTPUT) -o $@ +objdir := $(OUTPUT)tools/$(TOOL)/ +toolobjs := $(addprefix $(objdir),$(TOOL_OBJS)) +$(OUTPUT)$(TOOL): $(toolobjs) FORCE + $(ECHO) " LD " $(subst $(OUTPUT),,$@) + $(QUIET) $(LD) $(CFLAGS) $(LDFLAGS) $(toolobjs) -L$(OUTPUT) -o $@ + $(ECHO) " STRIP " $(subst $(OUTPUT),,$@) $(QUIET) $(STRIPCMD) $@ -$(OUTPUT)%.o: %.c - $(ECHO) " CC " $@ +$(KERNEL_INCLUDE): + $(ECHO) " MKDIR " $(subst $(OUTPUT),,$@) + $(QUIET) mkdir -p $(KERNEL_INCLUDE) + $(ECHO) " CP " $(subst $(OUTPUT),,$@) + $(QUIET) cp -rf $(srctree)/../../../include/acpi $(KERNEL_INCLUDE)/ + +$(objdir)%.o: %.c $(KERNEL_INCLUDE) + $(ECHO) " CC " $(subst $(OUTPUT),,$@) $(QUIET) $(CC) -c $(CFLAGS) -o $@ $< all: $(OUTPUT)$(TOOL) clean: - -find $(OUTPUT) \( -not -type d \) \ - -and \( -name '*~' -o -name '*.[oas]' \) \ - -type f -print \ - | xargs rm -f - -rm -f $(OUTPUT)$(TOOL) + $(ECHO) " RMOBJ " $(subst $(OUTPUT),,$(objdir)) + $(QUIET) find $(objdir) \( -not -type d \)\ + -and \( -name '*~' -o -name '*.[oas]' \)\ + -type f -print | xargs rm -f + $(ECHO) " RM " $(TOOL) + $(QUIET) rm -f $(OUTPUT)$(TOOL) + $(ECHO) " RMINC " $(subst $(OUTPUT),,$(KERNEL_INCLUDE)) + $(QUIET) rm -rf $(KERNEL_INCLUDE) install-tools: - $(INSTALL) -d $(DESTDIR)${sbindir} - $(INSTALL_PROGRAM) $(OUTPUT)$(TOOL) $(DESTDIR)${sbindir} + $(ECHO) " INST " $(TOOL) + $(QUIET) $(INSTALL) -d $(DESTDIR)$(sbindir) + $(QUIET) $(INSTALL_PROGRAM) $(OUTPUT)$(TOOL) $(DESTDIR)$(sbindir) uninstall-tools: - - rm -f $(DESTDIR)${sbindir}/$(TOOL) + $(ECHO) " UNINST " $(TOOL) + $(QUIET) rm -f $(DESTDIR)$(sbindir)/$(TOOL) install: all install-tools $(EXTRA_INSTALL) uninstall: uninstall-tools $(EXTRA_UNINSTALL) diff --git a/tools/power/acpi/tools/acpidbg/Makefile b/tools/power/acpi/tools/acpidbg/Makefile index 352df4b41ae9..f2d06e773eb4 100644 --- a/tools/power/acpi/tools/acpidbg/Makefile +++ b/tools/power/acpi/tools/acpidbg/Makefile @@ -17,9 +17,7 @@ vpath %.c \ ../../os_specific/service_layers\ . CFLAGS += -DACPI_APPLICATION -DACPI_SINGLE_THREAD -DACPI_DEBUGGER\ - -I.\ - -I../../../../../drivers/acpi/acpica\ - -I../../../../../include + -I. LDFLAGS += -lpthread TOOL_OBJS = \ acpidbg.o diff --git a/tools/power/acpi/tools/acpidbg/acpidbg.c b/tools/power/acpi/tools/acpidbg/acpidbg.c index a88ac45b7756..4308362d7068 100644 --- a/tools/power/acpi/tools/acpidbg/acpidbg.c +++ b/tools/power/acpi/tools/acpidbg/acpidbg.c @@ -12,10 +12,16 @@ #include /* Headers not included by include/acpi/platform/aclinux.h */ +#include +#include +#include +#include +#include #include #include #include -#include +#include +#include "../../../../../include/linux/circ_buf.h" #define ACPI_AML_FILE "/sys/kernel/debug/acpi/acpidbg" #define ACPI_AML_SEC_TICK 1 diff --git a/tools/power/acpi/tools/acpidump/Makefile b/tools/power/acpi/tools/acpidump/Makefile index 04b5db7c7c0b..f7c7af1f9258 100644 --- a/tools/power/acpi/tools/acpidump/Makefile +++ b/tools/power/acpi/tools/acpidump/Makefile @@ -19,9 +19,7 @@ vpath %.c \ ./\ ../../common\ ../../os_specific/service_layers -CFLAGS += -DACPI_DUMP_APP -I.\ - -I../../../../../drivers/acpi/acpica\ - -I../../../../../include +CFLAGS += -DACPI_DUMP_APP -I. TOOL_OBJS = \ apdump.o\ apfiles.o\ @@ -49,7 +47,9 @@ TOOL_OBJS = \ include ../../Makefile.rules -install-man: ../../man/acpidump.8 - $(INSTALL_DATA) -D $< $(DESTDIR)${mandir}/man8/acpidump.8 +install-man: $(srctree)/man/acpidump.8 + $(ECHO) " INST " acpidump.8 + $(QUIET) $(INSTALL_DATA) -D $< $(DESTDIR)$(mandir)/man8/acpidump.8 uninstall-man: - - rm -f $(DESTDIR)${mandir}/man8/acpidump.8 + $(ECHO) " UNINST " acpidump.8 + $(QUIET) rm -f $(DESTDIR)$(mandir)/man8/acpidump.8 -- cgit v1.2.3-59-g8ed1b From ea339343d64a14594d882ccb52e8619d42defe5e Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Wed, 16 Nov 2016 06:12:42 -0800 Subject: be2net: do not call napi_hash_del() Calling napi_hash_del() before netif_napi_del() is dangerous if a synchronize_rcu() is not enforced before NAPI struct freeing. Lets leave this detail to core networking stack and feel more comfortable. Signed-off-by: Eric Dumazet Cc: Sathya Perla Cc: Ajit Khaparde Cc: Sriharsha Basavapatna Cc: Somnath Kotur Signed-off-by: David S. Miller --- drivers/net/ethernet/emulex/benet/be_main.c | 1 - 1 file changed, 1 deletion(-) diff --git a/drivers/net/ethernet/emulex/benet/be_main.c b/drivers/net/ethernet/emulex/benet/be_main.c index cece8a08edca..93aa2939142a 100644 --- a/drivers/net/ethernet/emulex/benet/be_main.c +++ b/drivers/net/ethernet/emulex/benet/be_main.c @@ -2813,7 +2813,6 @@ static void be_evt_queues_destroy(struct be_adapter *adapter) if (eqo->q.created) { be_eq_clean(eqo); be_cmd_q_destroy(adapter, &eqo->q, QTYPE_EQ); - napi_hash_del(&eqo->napi); netif_napi_del(&eqo->napi); free_cpumask_var(eqo->affinity_mask); } -- cgit v1.2.3-59-g8ed1b From 5f00a8d8a2c2fd99528ab1a3632f0e77f4d25202 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Wed, 16 Nov 2016 06:19:02 -0800 Subject: cxgb4: do not call napi_hash_del() Calling napi_hash_del() before netif_napi_del() is dangerous if a synchronize_rcu() is not enforced before NAPI struct freeing. Lets leave this detail to core networking stack and feel more comfortable. Signed-off-by: Eric Dumazet Cc: Hariprasad S Signed-off-by: David S. Miller --- drivers/net/ethernet/chelsio/cxgb4/sge.c | 1 - 1 file changed, 1 deletion(-) diff --git a/drivers/net/ethernet/chelsio/cxgb4/sge.c b/drivers/net/ethernet/chelsio/cxgb4/sge.c index 1e74fd6085df..e19a0ca8e5dd 100644 --- a/drivers/net/ethernet/chelsio/cxgb4/sge.c +++ b/drivers/net/ethernet/chelsio/cxgb4/sge.c @@ -2951,7 +2951,6 @@ void free_rspq_fl(struct adapter *adap, struct sge_rspq *rq, rq->cntxt_id, fl_id, 0xffff); dma_free_coherent(adap->pdev_dev, (rq->size + 1) * rq->iqe_len, rq->desc, rq->phys_addr); - napi_hash_del(&rq->napi); netif_napi_del(&rq->napi); rq->netdev = NULL; rq->cntxt_id = rq->abs_id = 0; -- cgit v1.2.3-59-g8ed1b From 955e16026d08a601d02b961d13b6db9d6c13c8c9 Mon Sep 17 00:00:00 2001 From: Alex Date: Wed, 16 Nov 2016 01:02:33 -0800 Subject: net/phy/vitesse: Configure RGMII skew on VSC8601, if needed With RGMII, we need a 1.5 to 2ns skew between clock and data lines. The VSC8601 can handle this internally. While the VSC8601 can set more fine-grained delays, the standard skew settings work out of the box. The same heuristic is used to determine when this skew should be enabled as in vsc824x_config_init(). Tested on custom board with AM3352 SOC and VSC801 PHY. Signed-off-by: Alexandru Gagniuc Signed-off-by: David S. Miller --- drivers/net/phy/vitesse.c | 34 +++++++++++++++++++++++++++++++++- 1 file changed, 33 insertions(+), 1 deletion(-) diff --git a/drivers/net/phy/vitesse.c b/drivers/net/phy/vitesse.c index 2e37eb337d48..24b4a09468dd 100644 --- a/drivers/net/phy/vitesse.c +++ b/drivers/net/phy/vitesse.c @@ -62,6 +62,10 @@ /* Vitesse Extended Page Access Register */ #define MII_VSC82X4_EXT_PAGE_ACCESS 0x1f +/* Vitesse VSC8601 Extended PHY Control Register 1 */ +#define MII_VSC8601_EPHY_CTL 0x17 +#define MII_VSC8601_EPHY_CTL_RGMII_SKEW (1 << 8) + #define PHY_ID_VSC8234 0x000fc620 #define PHY_ID_VSC8244 0x000fc6c0 #define PHY_ID_VSC8514 0x00070670 @@ -111,6 +115,34 @@ static int vsc824x_config_init(struct phy_device *phydev) return err; } +/* This adds a skew for both TX and RX clocks, so the skew should only be + * applied to "rgmii-id" interfaces. It may not work as expected + * on "rgmii-txid", "rgmii-rxid" or "rgmii" interfaces. */ +static int vsc8601_add_skew(struct phy_device *phydev) +{ + int ret; + + ret = phy_read(phydev, MII_VSC8601_EPHY_CTL); + if (ret < 0) + return ret; + + ret |= MII_VSC8601_EPHY_CTL_RGMII_SKEW; + return phy_write(phydev, MII_VSC8601_EPHY_CTL, ret); +} + +static int vsc8601_config_init(struct phy_device *phydev) +{ + int ret = 0; + + if (phydev->interface == PHY_INTERFACE_MODE_RGMII_ID) + ret = vsc8601_add_skew(phydev); + + if (ret < 0) + return ret; + + return genphy_config_init(phydev); +} + static int vsc824x_ack_interrupt(struct phy_device *phydev) { int err = 0; @@ -275,7 +307,7 @@ static struct phy_driver vsc82xx_driver[] = { .phy_id_mask = 0x000ffff0, .features = PHY_GBIT_FEATURES, .flags = PHY_HAS_INTERRUPT, - .config_init = &genphy_config_init, + .config_init = &vsc8601_config_init, .config_aneg = &genphy_config_aneg, .read_status = &genphy_read_status, .ack_interrupt = &vsc824x_ack_interrupt, -- cgit v1.2.3-59-g8ed1b From 2a3811068fbc6bf09bb09d166b65394b091c1085 Mon Sep 17 00:00:00 2001 From: Russell King Date: Wed, 16 Nov 2016 23:51:19 +0000 Subject: ARM: Fix XIP kernels Commit 7619751f8c90 ("ARM: 8595/2: apply more __ro_after_init") caused a regression with XIP kernels by moving the __ro_after_init data into the read-only section. With XIP kernels, the read-only section is located in read-only memory from the very beginning. Work around this by moving the __ro_after_init data back into the .data section, which will be in RAM, and hence will be writable. It should be noted that in doing so, this remains writable after init. Fixes: 7619751f8c90 ("ARM: 8595/2: apply more __ro_after_init") Reported-by: Andrea Merello Tested-by: Andrea Merello [ XIP stm32 ] Tested-by: Alexandre Torgue Signed-off-by: Russell King --- arch/arm/kernel/vmlinux-xip.lds.S | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/arch/arm/kernel/vmlinux-xip.lds.S b/arch/arm/kernel/vmlinux-xip.lds.S index 7fa487ef7e2f..37b2a11af345 100644 --- a/arch/arm/kernel/vmlinux-xip.lds.S +++ b/arch/arm/kernel/vmlinux-xip.lds.S @@ -3,6 +3,9 @@ * Written by Martin Mares */ +/* No __ro_after_init data in the .rodata section - which will always be ro */ +#define RO_AFTER_INIT_DATA + #include #include #include @@ -223,6 +226,8 @@ SECTIONS . = ALIGN(PAGE_SIZE); __init_end = .; + *(.data..ro_after_init) + NOSAVE_DATA CACHELINE_ALIGNED_DATA(L1_CACHE_BYTES) READ_MOSTLY_DATA(L1_CACHE_BYTES) -- cgit v1.2.3-59-g8ed1b From 5b810a242c28e1d8d64d718cebe75b79d86a0b2d Mon Sep 17 00:00:00 2001 From: Tariq Toukan Date: Thu, 27 Oct 2016 16:36:26 +0300 Subject: IB/uverbs: Fix leak of XRC target QPs The real QP is destroyed in case of the ref count reaches zero, but for XRC target QPs this call was missed and caused to QP leaks. Let's call to destroy for all flows. Fixes: 0e0ec7e0638e ('RDMA/core: Export ib_open_qp() to share XRC...') Signed-off-by: Tariq Toukan Signed-off-by: Noa Osherovich Signed-off-by: Leon Romanovsky Signed-off-by: Doug Ledford --- drivers/infiniband/core/uverbs_main.c | 7 ++----- 1 file changed, 2 insertions(+), 5 deletions(-) diff --git a/drivers/infiniband/core/uverbs_main.c b/drivers/infiniband/core/uverbs_main.c index 0012fa58c105..44b1104eb168 100644 --- a/drivers/infiniband/core/uverbs_main.c +++ b/drivers/infiniband/core/uverbs_main.c @@ -262,12 +262,9 @@ static int ib_uverbs_cleanup_ucontext(struct ib_uverbs_file *file, container_of(uobj, struct ib_uqp_object, uevent.uobject); idr_remove_uobj(&ib_uverbs_qp_idr, uobj); - if (qp != qp->real_qp) { - ib_close_qp(qp); - } else { + if (qp == qp->real_qp) ib_uverbs_detach_umcast(qp, uqp); - ib_destroy_qp(qp); - } + ib_destroy_qp(qp); ib_uverbs_release_uevent(file, &uqp->uevent); kfree(uqp); } -- cgit v1.2.3-59-g8ed1b From 9db0ff53cb9b43ed75bacd42a89c1a0ab048b2b0 Mon Sep 17 00:00:00 2001 From: Mark Bloch Date: Thu, 27 Oct 2016 16:36:27 +0300 Subject: IB/cm: Mark stale CM id's whenever the mad agent was unregistered When there is a CM id object that has port assigned to it, it means that the cm-id asked for the specific port that it should go by it, but if that port was removed (hot-unplug event) the cm-id was not updated. In order to fix that the port keeps a list of all the cm-id's that are planning to go by it, whenever the port is removed it marks all of them as invalid. This commit fixes a kernel panic which happens when running traffic between guests and we force reboot a guest mid traffic, it triggers a kernel panic: Call Trace: [] ? panic+0xa7/0x16f [] ? oops_end+0xe4/0x100 [] ? no_context+0xfb/0x260 [] ? del_timer_sync+0x22/0x30 [] ? __bad_area_nosemaphore+0x125/0x1e0 [] ? process_timeout+0x0/0x10 [] ? bad_area_nosemaphore+0x13/0x20 [] ? __do_page_fault+0x31f/0x480 [] ? default_wake_function+0x0/0x20 [] ? free_msg+0x55/0x70 [mlx5_core] [] ? cmd_exec+0x124/0x840 [mlx5_core] [] ? find_busiest_group+0x244/0x9f0 [] ? do_page_fault+0x3e/0xa0 [] ? page_fault+0x25/0x30 [] ? cm_alloc_msg+0x35/0xc0 [ib_cm] [] ? ib_send_cm_dreq+0xb1/0x1e0 [ib_cm] [] ? cm_destroy_id+0x176/0x320 [ib_cm] [] ? ib_destroy_cm_id+0x10/0x20 [ib_cm] [] ? ipoib_cm_free_rx_reap_list+0xa7/0x110 [ib_ipoib] [] ? ipoib_cm_rx_reap+0x0/0x20 [ib_ipoib] [] ? ipoib_cm_rx_reap+0x15/0x20 [ib_ipoib] [] ? worker_thread+0x170/0x2a0 [] ? autoremove_wake_function+0x0/0x40 [] ? worker_thread+0x0/0x2a0 [] ? kthread+0x96/0xa0 [] ? child_rip+0xa/0x20 [] ? kthread+0x0/0xa0 [] ? child_rip+0x0/0x20 Fixes: a977049dacde ("[PATCH] IB: Add the kernel CM implementation") Signed-off-by: Mark Bloch Signed-off-by: Erez Shitrit Reviewed-by: Maor Gottlieb Signed-off-by: Leon Romanovsky Signed-off-by: Doug Ledford --- drivers/infiniband/core/cm.c | 126 +++++++++++++++++++++++++++++++++++++------ 1 file changed, 110 insertions(+), 16 deletions(-) diff --git a/drivers/infiniband/core/cm.c b/drivers/infiniband/core/cm.c index c99525512b34..71c7c4c328ef 100644 --- a/drivers/infiniband/core/cm.c +++ b/drivers/infiniband/core/cm.c @@ -80,6 +80,8 @@ static struct ib_cm { __be32 random_id_operand; struct list_head timewait_list; struct workqueue_struct *wq; + /* Sync on cm change port state */ + spinlock_t state_lock; } cm; /* Counter indexes ordered by attribute ID */ @@ -161,6 +163,8 @@ struct cm_port { struct ib_mad_agent *mad_agent; struct kobject port_obj; u8 port_num; + struct list_head cm_priv_prim_list; + struct list_head cm_priv_altr_list; struct cm_counter_group counter_group[CM_COUNTER_GROUPS]; }; @@ -241,6 +245,12 @@ struct cm_id_private { u8 service_timeout; u8 target_ack_delay; + struct list_head prim_list; + struct list_head altr_list; + /* Indicates that the send port mad is registered and av is set */ + int prim_send_port_not_ready; + int altr_send_port_not_ready; + struct list_head work_list; atomic_t work_count; }; @@ -259,20 +269,47 @@ static int cm_alloc_msg(struct cm_id_private *cm_id_priv, struct ib_mad_agent *mad_agent; struct ib_mad_send_buf *m; struct ib_ah *ah; + struct cm_av *av; + unsigned long flags, flags2; + int ret = 0; + /* don't let the port to be released till the agent is down */ + spin_lock_irqsave(&cm.state_lock, flags2); + spin_lock_irqsave(&cm.lock, flags); + if (!cm_id_priv->prim_send_port_not_ready) + av = &cm_id_priv->av; + else if (!cm_id_priv->altr_send_port_not_ready && + (cm_id_priv->alt_av.port)) + av = &cm_id_priv->alt_av; + else { + pr_info("%s: not valid CM id\n", __func__); + ret = -ENODEV; + spin_unlock_irqrestore(&cm.lock, flags); + goto out; + } + spin_unlock_irqrestore(&cm.lock, flags); + /* Make sure the port haven't released the mad yet */ mad_agent = cm_id_priv->av.port->mad_agent; - ah = ib_create_ah(mad_agent->qp->pd, &cm_id_priv->av.ah_attr); - if (IS_ERR(ah)) - return PTR_ERR(ah); + if (!mad_agent) { + pr_info("%s: not a valid MAD agent\n", __func__); + ret = -ENODEV; + goto out; + } + ah = ib_create_ah(mad_agent->qp->pd, &av->ah_attr); + if (IS_ERR(ah)) { + ret = PTR_ERR(ah); + goto out; + } m = ib_create_send_mad(mad_agent, cm_id_priv->id.remote_cm_qpn, - cm_id_priv->av.pkey_index, + av->pkey_index, 0, IB_MGMT_MAD_HDR, IB_MGMT_MAD_DATA, GFP_ATOMIC, IB_MGMT_BASE_VERSION); if (IS_ERR(m)) { ib_destroy_ah(ah); - return PTR_ERR(m); + ret = PTR_ERR(m); + goto out; } /* Timeout set by caller if response is expected. */ @@ -282,7 +319,10 @@ static int cm_alloc_msg(struct cm_id_private *cm_id_priv, atomic_inc(&cm_id_priv->refcount); m->context[0] = cm_id_priv; *msg = m; - return 0; + +out: + spin_unlock_irqrestore(&cm.state_lock, flags2); + return ret; } static int cm_alloc_response_msg(struct cm_port *port, @@ -352,7 +392,8 @@ static void cm_init_av_for_response(struct cm_port *port, struct ib_wc *wc, grh, &av->ah_attr); } -static int cm_init_av_by_path(struct ib_sa_path_rec *path, struct cm_av *av) +static int cm_init_av_by_path(struct ib_sa_path_rec *path, struct cm_av *av, + struct cm_id_private *cm_id_priv) { struct cm_device *cm_dev; struct cm_port *port = NULL; @@ -387,7 +428,17 @@ static int cm_init_av_by_path(struct ib_sa_path_rec *path, struct cm_av *av) &av->ah_attr); av->timeout = path->packet_life_time + 1; - return 0; + spin_lock_irqsave(&cm.lock, flags); + if (&cm_id_priv->av == av) + list_add_tail(&cm_id_priv->prim_list, &port->cm_priv_prim_list); + else if (&cm_id_priv->alt_av == av) + list_add_tail(&cm_id_priv->altr_list, &port->cm_priv_altr_list); + else + ret = -EINVAL; + + spin_unlock_irqrestore(&cm.lock, flags); + + return ret; } static int cm_alloc_id(struct cm_id_private *cm_id_priv) @@ -677,6 +728,8 @@ struct ib_cm_id *ib_create_cm_id(struct ib_device *device, spin_lock_init(&cm_id_priv->lock); init_completion(&cm_id_priv->comp); INIT_LIST_HEAD(&cm_id_priv->work_list); + INIT_LIST_HEAD(&cm_id_priv->prim_list); + INIT_LIST_HEAD(&cm_id_priv->altr_list); atomic_set(&cm_id_priv->work_count, -1); atomic_set(&cm_id_priv->refcount, 1); return &cm_id_priv->id; @@ -892,6 +945,15 @@ retest: break; } + spin_lock_irq(&cm.lock); + if (!list_empty(&cm_id_priv->altr_list) && + (!cm_id_priv->altr_send_port_not_ready)) + list_del(&cm_id_priv->altr_list); + if (!list_empty(&cm_id_priv->prim_list) && + (!cm_id_priv->prim_send_port_not_ready)) + list_del(&cm_id_priv->prim_list); + spin_unlock_irq(&cm.lock); + cm_free_id(cm_id->local_id); cm_deref_id(cm_id_priv); wait_for_completion(&cm_id_priv->comp); @@ -1192,12 +1254,13 @@ int ib_send_cm_req(struct ib_cm_id *cm_id, goto out; } - ret = cm_init_av_by_path(param->primary_path, &cm_id_priv->av); + ret = cm_init_av_by_path(param->primary_path, &cm_id_priv->av, + cm_id_priv); if (ret) goto error1; if (param->alternate_path) { ret = cm_init_av_by_path(param->alternate_path, - &cm_id_priv->alt_av); + &cm_id_priv->alt_av, cm_id_priv); if (ret) goto error1; } @@ -1653,7 +1716,8 @@ static int cm_req_handler(struct cm_work *work) dev_put(gid_attr.ndev); } work->path[0].gid_type = gid_attr.gid_type; - ret = cm_init_av_by_path(&work->path[0], &cm_id_priv->av); + ret = cm_init_av_by_path(&work->path[0], &cm_id_priv->av, + cm_id_priv); } if (ret) { int err = ib_get_cached_gid(work->port->cm_dev->ib_device, @@ -1672,7 +1736,8 @@ static int cm_req_handler(struct cm_work *work) goto rejected; } if (req_msg->alt_local_lid) { - ret = cm_init_av_by_path(&work->path[1], &cm_id_priv->alt_av); + ret = cm_init_av_by_path(&work->path[1], &cm_id_priv->alt_av, + cm_id_priv); if (ret) { ib_send_cm_rej(cm_id, IB_CM_REJ_INVALID_ALT_GID, &work->path[0].sgid, @@ -2727,7 +2792,8 @@ int ib_send_cm_lap(struct ib_cm_id *cm_id, goto out; } - ret = cm_init_av_by_path(alternate_path, &cm_id_priv->alt_av); + ret = cm_init_av_by_path(alternate_path, &cm_id_priv->alt_av, + cm_id_priv); if (ret) goto out; cm_id_priv->alt_av.timeout = @@ -2839,7 +2905,8 @@ static int cm_lap_handler(struct cm_work *work) cm_init_av_for_response(work->port, work->mad_recv_wc->wc, work->mad_recv_wc->recv_buf.grh, &cm_id_priv->av); - cm_init_av_by_path(param->alternate_path, &cm_id_priv->alt_av); + cm_init_av_by_path(param->alternate_path, &cm_id_priv->alt_av, + cm_id_priv); ret = atomic_inc_and_test(&cm_id_priv->work_count); if (!ret) list_add_tail(&work->list, &cm_id_priv->work_list); @@ -3031,7 +3098,7 @@ int ib_send_cm_sidr_req(struct ib_cm_id *cm_id, return -EINVAL; cm_id_priv = container_of(cm_id, struct cm_id_private, id); - ret = cm_init_av_by_path(param->path, &cm_id_priv->av); + ret = cm_init_av_by_path(param->path, &cm_id_priv->av, cm_id_priv); if (ret) goto out; @@ -3468,7 +3535,9 @@ out: static int cm_migrate(struct ib_cm_id *cm_id) { struct cm_id_private *cm_id_priv; + struct cm_av tmp_av; unsigned long flags; + int tmp_send_port_not_ready; int ret = 0; cm_id_priv = container_of(cm_id, struct cm_id_private, id); @@ -3477,7 +3546,14 @@ static int cm_migrate(struct ib_cm_id *cm_id) (cm_id->lap_state == IB_CM_LAP_UNINIT || cm_id->lap_state == IB_CM_LAP_IDLE)) { cm_id->lap_state = IB_CM_LAP_IDLE; + /* Swap address vector */ + tmp_av = cm_id_priv->av; cm_id_priv->av = cm_id_priv->alt_av; + cm_id_priv->alt_av = tmp_av; + /* Swap port send ready state */ + tmp_send_port_not_ready = cm_id_priv->prim_send_port_not_ready; + cm_id_priv->prim_send_port_not_ready = cm_id_priv->altr_send_port_not_ready; + cm_id_priv->altr_send_port_not_ready = tmp_send_port_not_ready; } else ret = -EINVAL; spin_unlock_irqrestore(&cm_id_priv->lock, flags); @@ -3888,6 +3964,9 @@ static void cm_add_one(struct ib_device *ib_device) port->cm_dev = cm_dev; port->port_num = i; + INIT_LIST_HEAD(&port->cm_priv_prim_list); + INIT_LIST_HEAD(&port->cm_priv_altr_list); + ret = cm_create_port_fs(port); if (ret) goto error1; @@ -3945,6 +4024,8 @@ static void cm_remove_one(struct ib_device *ib_device, void *client_data) { struct cm_device *cm_dev = client_data; struct cm_port *port; + struct cm_id_private *cm_id_priv; + struct ib_mad_agent *cur_mad_agent; struct ib_port_modify port_modify = { .clr_port_cap_mask = IB_PORT_CM_SUP }; @@ -3968,15 +4049,27 @@ static void cm_remove_one(struct ib_device *ib_device, void *client_data) port = cm_dev->port[i-1]; ib_modify_port(ib_device, port->port_num, 0, &port_modify); + /* Mark all the cm_id's as not valid */ + spin_lock_irq(&cm.lock); + list_for_each_entry(cm_id_priv, &port->cm_priv_altr_list, altr_list) + cm_id_priv->altr_send_port_not_ready = 1; + list_for_each_entry(cm_id_priv, &port->cm_priv_prim_list, prim_list) + cm_id_priv->prim_send_port_not_ready = 1; + spin_unlock_irq(&cm.lock); /* * We flush the queue here after the going_down set, this * verify that no new works will be queued in the recv handler, * after that we can call the unregister_mad_agent */ flush_workqueue(cm.wq); - ib_unregister_mad_agent(port->mad_agent); + spin_lock_irq(&cm.state_lock); + cur_mad_agent = port->mad_agent; + port->mad_agent = NULL; + spin_unlock_irq(&cm.state_lock); + ib_unregister_mad_agent(cur_mad_agent); cm_remove_port_fs(port); } + device_unregister(cm_dev->device); kfree(cm_dev); } @@ -3989,6 +4082,7 @@ static int __init ib_cm_init(void) INIT_LIST_HEAD(&cm.device_list); rwlock_init(&cm.device_lock); spin_lock_init(&cm.lock); + spin_lock_init(&cm.state_lock); cm.listen_service_table = RB_ROOT; cm.listen_service_id = be64_to_cpu(IB_CM_ASSIGN_SERVICE_ID); cm.remote_id_table = RB_ROOT; -- cgit v1.2.3-59-g8ed1b From aeb76df46d1158d5f7f3d30f993a1bb6ee9c67a0 Mon Sep 17 00:00:00 2001 From: Leon Romanovsky Date: Mon, 31 Oct 2016 07:50:56 +0200 Subject: IB/core: Set routable RoCE gid type for ipv4/ipv6 networks On Thu, Oct 27, 2016 at 04:36:28PM +0300, Leon Romanovsky wrote: > From: Mark Bloch > > If the underlying netowrk type is ipv4 or ipv6 and the device supports > routable RoCE, prefer it so the traffic could cross subnets. > > Signed-off-by: Mark Bloch > Signed-off-by: Maor Gottlieb > Signed-off-by: Leon Romanovsky > --- Hi Doug, Please take the following v1 of this patch where I fixed spelling error from "netowrk" to be "network". Thanks. >From 09f96ba3e9b4442cfb44dca04c6726e55525c9c3 Mon Sep 17 00:00:00 2001 From: Mark Bloch Date: Sun, 11 Sep 2016 06:25:10 +0000 Subject: [PATCH rdma-rc v1 3/6] IB/core: Set routable RoCE gid type for ipv4/ipv6 networks If the underlying network type is ipv4 or ipv6 and the device supports routable RoCE, prefer it so the traffic could cross subnets. Signed-off-by: Mark Bloch Signed-off-by: Maor Gottlieb Signed-off-by: Leon Romanovsky Signed-off-by: Doug Ledford --- drivers/infiniband/core/cma.c | 21 ++++++++++++++++++++- 1 file changed, 20 insertions(+), 1 deletion(-) diff --git a/drivers/infiniband/core/cma.c b/drivers/infiniband/core/cma.c index 36bf50ebb187..9ca0da0a37c4 100644 --- a/drivers/infiniband/core/cma.c +++ b/drivers/infiniband/core/cma.c @@ -2436,6 +2436,18 @@ static int iboe_tos_to_sl(struct net_device *ndev, int tos) return 0; } +static enum ib_gid_type cma_route_gid_type(enum rdma_network_type network_type, + unsigned long supported_gids, + enum ib_gid_type default_gid) +{ + if ((network_type == RDMA_NETWORK_IPV4 || + network_type == RDMA_NETWORK_IPV6) && + test_bit(IB_GID_TYPE_ROCE_UDP_ENCAP, &supported_gids)) + return IB_GID_TYPE_ROCE_UDP_ENCAP; + + return default_gid; +} + static int cma_resolve_iboe_route(struct rdma_id_private *id_priv) { struct rdma_route *route = &id_priv->id.route; @@ -2461,6 +2473,8 @@ static int cma_resolve_iboe_route(struct rdma_id_private *id_priv) route->num_paths = 1; if (addr->dev_addr.bound_dev_if) { + unsigned long supported_gids; + ndev = dev_get_by_index(&init_net, addr->dev_addr.bound_dev_if); if (!ndev) { ret = -ENODEV; @@ -2484,7 +2498,12 @@ static int cma_resolve_iboe_route(struct rdma_id_private *id_priv) route->path_rec->net = &init_net; route->path_rec->ifindex = ndev->ifindex; - route->path_rec->gid_type = id_priv->gid_type; + supported_gids = roce_gid_type_mask_support(id_priv->id.device, + id_priv->id.port_num); + route->path_rec->gid_type = + cma_route_gid_type(addr->dev_addr.network, + supported_gids, + id_priv->gid_type); } if (!ndev) { ret = -ENODEV; -- cgit v1.2.3-59-g8ed1b From 61c3702863be9e9f1ef12ed5a5b17bae6cdfac0b Mon Sep 17 00:00:00 2001 From: Mark Bloch Date: Thu, 27 Oct 2016 16:36:29 +0300 Subject: IB/core: Add missing check for addr_resolve callback return value When calling rdma_resolve_ip inside rdma_addr_find_l2_eth_by_grh, the return status of the request was ignored in the callback function causing a successful return and an empty dmac. Signed-off-by: Mark Bloch Signed-off-by: Alex Vesker Reviewed-by: Or Gerlitz Signed-off-by: Leon Romanovsky Signed-off-by: Doug Ledford --- drivers/infiniband/core/addr.c | 11 +++++++++-- 1 file changed, 9 insertions(+), 2 deletions(-) diff --git a/drivers/infiniband/core/addr.c b/drivers/infiniband/core/addr.c index b136d3acc5bd..0f58f46dbad7 100644 --- a/drivers/infiniband/core/addr.c +++ b/drivers/infiniband/core/addr.c @@ -699,13 +699,16 @@ EXPORT_SYMBOL(rdma_addr_cancel); struct resolve_cb_context { struct rdma_dev_addr *addr; struct completion comp; + int status; }; static void resolve_cb(int status, struct sockaddr *src_addr, struct rdma_dev_addr *addr, void *context) { - memcpy(((struct resolve_cb_context *)context)->addr, addr, sizeof(struct - rdma_dev_addr)); + if (!status) + memcpy(((struct resolve_cb_context *)context)->addr, + addr, sizeof(struct rdma_dev_addr)); + ((struct resolve_cb_context *)context)->status = status; complete(&((struct resolve_cb_context *)context)->comp); } @@ -743,6 +746,10 @@ int rdma_addr_find_l2_eth_by_grh(const union ib_gid *sgid, wait_for_completion(&ctx.comp); + ret = ctx.status; + if (ret) + return ret; + memcpy(dmac, dev_addr.dst_dev_addr, ETH_ALEN); dev = dev_get_by_index(&init_net, dev_addr.bound_dev_if); if (!dev) -- cgit v1.2.3-59-g8ed1b From 3c7ba5760ab8eedec01159b267bb9bfcffe522ac Mon Sep 17 00:00:00 2001 From: Mark Bloch Date: Thu, 27 Oct 2016 16:36:31 +0300 Subject: IB/core: Avoid unsigned int overflow in sg_alloc_table sg_alloc_table gets unsigned int as parameter while the driver returns it as size_t. Check npages isn't greater than maximum unsigned int. Fixes: eeb8461e36c9 ("IB: Refactor umem to use linear SG table") Signed-off-by: Mark Bloch Signed-off-by: Maor Gottlieb Signed-off-by: Leon Romanovsky Signed-off-by: Doug Ledford --- drivers/infiniband/core/umem.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/infiniband/core/umem.c b/drivers/infiniband/core/umem.c index 224ad274ea0b..84b4eff90395 100644 --- a/drivers/infiniband/core/umem.c +++ b/drivers/infiniband/core/umem.c @@ -175,7 +175,7 @@ struct ib_umem *ib_umem_get(struct ib_ucontext *context, unsigned long addr, cur_base = addr & PAGE_MASK; - if (npages == 0) { + if (npages == 0 || npages > UINT_MAX) { ret = -EINVAL; goto out; } -- cgit v1.2.3-59-g8ed1b From 90be7c8ab72853ff9fc407f01518a898df1f3045 Mon Sep 17 00:00:00 2001 From: Majd Dibbiny Date: Thu, 27 Oct 2016 16:36:39 +0300 Subject: IB/mlx5: Fix memory leak in query device We need to free dev->port when we fail to enable RoCE or initialize node data. Fixes: 0837e86a7a34 ('IB/mlx5: Add per port counters') Signed-off-by: Majd Dibbiny Signed-off-by: Maor Gottlieb Reviewed-by: Mark Bloch Signed-off-by: Leon Romanovsky Signed-off-by: Doug Ledford --- drivers/infiniband/hw/mlx5/main.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/infiniband/hw/mlx5/main.c b/drivers/infiniband/hw/mlx5/main.c index 22174774dbb8..bb61487861cc 100644 --- a/drivers/infiniband/hw/mlx5/main.c +++ b/drivers/infiniband/hw/mlx5/main.c @@ -3115,7 +3115,7 @@ static void *mlx5_ib_add(struct mlx5_core_dev *mdev) } err = init_node_data(dev); if (err) - goto err_dealloc; + goto err_free_port; mutex_init(&dev->flow_db.lock); mutex_init(&dev->cap_mask_mutex); @@ -3125,7 +3125,7 @@ static void *mlx5_ib_add(struct mlx5_core_dev *mdev) if (ll == IB_LINK_LAYER_ETHERNET) { err = mlx5_enable_roce(dev); if (err) - goto err_dealloc; + goto err_free_port; } err = create_dev_resources(&dev->devr); -- cgit v1.2.3-59-g8ed1b From efd7f40082a0dfd112eb87ff2124467a5739216f Mon Sep 17 00:00:00 2001 From: Maor Gottlieb Date: Thu, 27 Oct 2016 16:36:40 +0300 Subject: IB/mlx5: Validate requested RQT size Validate that the requested size of RQT is supported by firmware. Fixes: c5f9092936fe ('IB/mlx5: Add Receive Work Queue Indirection table operations') Signed-off-by: Maor Gottlieb Reviewed-by: Yishai Hadas Signed-off-by: Leon Romanovsky Signed-off-by: Doug Ledford --- drivers/infiniband/hw/mlx5/qp.c | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/drivers/infiniband/hw/mlx5/qp.c b/drivers/infiniband/hw/mlx5/qp.c index 41f4c2afbcdd..2be0d06b27dc 100644 --- a/drivers/infiniband/hw/mlx5/qp.c +++ b/drivers/infiniband/hw/mlx5/qp.c @@ -4815,6 +4815,14 @@ struct ib_rwq_ind_table *mlx5_ib_create_rwq_ind_table(struct ib_device *device, udata->inlen)) return ERR_PTR(-EOPNOTSUPP); + if (init_attr->log_ind_tbl_size > + MLX5_CAP_GEN(dev->mdev, log_max_rqt_size)) { + mlx5_ib_dbg(dev, "log_ind_tbl_size = %d is bigger than supported = %d\n", + init_attr->log_ind_tbl_size, + MLX5_CAP_GEN(dev->mdev, log_max_rqt_size)); + return ERR_PTR(-EINVAL); + } + min_resp_len = offsetof(typeof(resp), reserved) + sizeof(resp.reserved); if (udata->outlen && udata->outlen < min_resp_len) return ERR_PTR(-EINVAL); -- cgit v1.2.3-59-g8ed1b From 16b0e0695a73b68d8ca40288c8f9614ef208917b Mon Sep 17 00:00:00 2001 From: Daniel Jurgens Date: Thu, 27 Oct 2016 16:36:41 +0300 Subject: IB/mlx5: Use cache line size to select CQE stride When creating kernel CQs use 128B CQE stride if the cache line size is 128B, 64B otherwise. This prevents multiple CQEs from residing in a 128B cache line, which can cause retries when there are concurrent read and writes in one cache line. Tested with IPoIB on PPC64, saw ~5% throughput improvement. Fixes: e126ba97dba9 ('mlx5: Add driver for Mellanox Connect-IB adapters') Signed-off-by: Daniel Jurgens Signed-off-by: Maor Gottlieb Signed-off-by: Leon Romanovsky Signed-off-by: Doug Ledford --- drivers/infiniband/hw/mlx5/cq.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/drivers/infiniband/hw/mlx5/cq.c b/drivers/infiniband/hw/mlx5/cq.c index 79d017baf6f4..fcd04b881ec1 100644 --- a/drivers/infiniband/hw/mlx5/cq.c +++ b/drivers/infiniband/hw/mlx5/cq.c @@ -932,8 +932,7 @@ struct ib_cq *mlx5_ib_create_cq(struct ib_device *ibdev, if (err) goto err_create; } else { - /* for now choose 64 bytes till we have a proper interface */ - cqe_size = 64; + cqe_size = cache_line_size() == 128 ? 128 : 64; err = create_cq_kernel(dev, cq, entries, cqe_size, &cqb, &index, &inlen); if (err) -- cgit v1.2.3-59-g8ed1b From 6bc1a656ab9f57f0112823b4a36930c9a29d1f89 Mon Sep 17 00:00:00 2001 From: Moshe Lazer Date: Thu, 27 Oct 2016 16:36:42 +0300 Subject: IB/mlx5: Resolve soft lock on massive reg MRs When calling reg_mr of large MRs (e.g. 4GB) from multiple processes and MR caches can't supply the required amount of MRs the slow-path of MR allocation may be used. In this case we need to serialize the slow-path between the processes to avoid soft lock. Fixes: e126ba97dba9 ('mlx5: Add driver for Mellanox Connect-IB adapters') Signed-off-by: Moshe Lazer Signed-off-by: Maor Gottlieb Reviewed-by: Eli Cohen Signed-off-by: Leon Romanovsky Signed-off-by: Doug Ledford --- drivers/infiniband/hw/mlx5/mlx5_ib.h | 2 ++ drivers/infiniband/hw/mlx5/mr.c | 6 +++++- 2 files changed, 7 insertions(+), 1 deletion(-) diff --git a/drivers/infiniband/hw/mlx5/mlx5_ib.h b/drivers/infiniband/hw/mlx5/mlx5_ib.h index dcdcd195fe53..7d689903c87c 100644 --- a/drivers/infiniband/hw/mlx5/mlx5_ib.h +++ b/drivers/infiniband/hw/mlx5/mlx5_ib.h @@ -626,6 +626,8 @@ struct mlx5_ib_dev { struct mlx5_ib_resources devr; struct mlx5_mr_cache cache; struct timer_list delay_timer; + /* Prevents soft lock on massive reg MRs */ + struct mutex slow_path_mutex; int fill_delay; #ifdef CONFIG_INFINIBAND_ON_DEMAND_PAGING struct ib_odp_caps odp_caps; diff --git a/drivers/infiniband/hw/mlx5/mr.c b/drivers/infiniband/hw/mlx5/mr.c index d4ad672b905b..4e9012463c37 100644 --- a/drivers/infiniband/hw/mlx5/mr.c +++ b/drivers/infiniband/hw/mlx5/mr.c @@ -610,6 +610,7 @@ int mlx5_mr_cache_init(struct mlx5_ib_dev *dev) int err; int i; + mutex_init(&dev->slow_path_mutex); cache->wq = alloc_ordered_workqueue("mkey_cache", WQ_MEM_RECLAIM); if (!cache->wq) { mlx5_ib_warn(dev, "failed to create work queue\n"); @@ -1182,9 +1183,12 @@ struct ib_mr *mlx5_ib_reg_user_mr(struct ib_pd *pd, u64 start, u64 length, goto error; } - if (!mr) + if (!mr) { + mutex_lock(&dev->slow_path_mutex); mr = reg_create(NULL, pd, virt_addr, length, umem, ncont, page_shift, access_flags); + mutex_unlock(&dev->slow_path_mutex); + } if (IS_ERR(mr)) { err = PTR_ERR(mr); -- cgit v1.2.3-59-g8ed1b From dbaaff2a2caa03d472b5cc53a3fbfd415c97dc26 Mon Sep 17 00:00:00 2001 From: Eli Cohen Date: Thu, 27 Oct 2016 16:36:44 +0300 Subject: IB/mlx5: Fix fatal error dispatching When an internal error condition is detected, make sure to set the device inactive after dispatching the event so ULPs can get a notification of this event. Fixes: e126ba97dba9 ('mlx5: Add driver for Mellanox Connect-IB adapters') Signed-off-by: Eli Cohen Signed-off-by: Maor Gottlieb Reviewed-by: Mohamad Haj Yahia Signed-off-by: Leon Romanovsky Signed-off-by: Doug Ledford --- drivers/infiniband/hw/mlx5/main.c | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/drivers/infiniband/hw/mlx5/main.c b/drivers/infiniband/hw/mlx5/main.c index bb61487861cc..a014ad38d889 100644 --- a/drivers/infiniband/hw/mlx5/main.c +++ b/drivers/infiniband/hw/mlx5/main.c @@ -2311,14 +2311,14 @@ static void mlx5_ib_event(struct mlx5_core_dev *dev, void *context, { struct mlx5_ib_dev *ibdev = (struct mlx5_ib_dev *)context; struct ib_event ibev; - + bool fatal = false; u8 port = 0; switch (event) { case MLX5_DEV_EVENT_SYS_ERROR: - ibdev->ib_active = false; ibev.event = IB_EVENT_DEVICE_FATAL; mlx5_ib_handle_internal_error(ibdev); + fatal = true; break; case MLX5_DEV_EVENT_PORT_UP: @@ -2370,6 +2370,9 @@ static void mlx5_ib_event(struct mlx5_core_dev *dev, void *context, if (ibdev->ib_active) ib_dispatch_event(&ibev); + + if (fatal) + ibdev->ib_active = false; } static void get_ext_port_caps(struct mlx5_ib_dev *dev) -- cgit v1.2.3-59-g8ed1b From a1ab8402d15d2305d2315d96ec3294bfdf16587e Mon Sep 17 00:00:00 2001 From: Eli Cohen Date: Thu, 27 Oct 2016 16:36:46 +0300 Subject: IB/mlx5: Fix NULL pointer dereference on debug print For XRC QP CQs may not exist. Check before attempting dereference. Fixes: e126ba97dba9 ('mlx5: Add driver for Mellanox Connect-IB adapters') Signed-off-by: Eli Cohen Signed-off-by: Maor Gottlieb Reviewed-by: Yishai Hadas Signed-off-by: Leon Romanovsky Signed-off-by: Doug Ledford --- drivers/infiniband/hw/mlx5/qp.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/infiniband/hw/mlx5/qp.c b/drivers/infiniband/hw/mlx5/qp.c index 2be0d06b27dc..59c4c89460d1 100644 --- a/drivers/infiniband/hw/mlx5/qp.c +++ b/drivers/infiniband/hw/mlx5/qp.c @@ -2052,8 +2052,8 @@ struct ib_qp *mlx5_ib_create_qp(struct ib_pd *pd, mlx5_ib_dbg(dev, "ib qpnum 0x%x, mlx qpn 0x%x, rcqn 0x%x, scqn 0x%x\n", qp->ibqp.qp_num, qp->trans_qp.base.mqp.qpn, - to_mcq(init_attr->recv_cq)->mcq.cqn, - to_mcq(init_attr->send_cq)->mcq.cqn); + init_attr->recv_cq ? to_mcq(init_attr->recv_cq)->mcq.cqn : -1, + init_attr->send_cq ? to_mcq(init_attr->send_cq)->mcq.cqn : -1); qp->trans_qp.xrcdn = xrcdn; -- cgit v1.2.3-59-g8ed1b From 37995116fecfce2b61ee3da6e73b3e394c6818f9 Mon Sep 17 00:00:00 2001 From: Daniel Jurgens Date: Thu, 10 Nov 2016 11:30:54 +0200 Subject: IB/mlx4: Check gid_index return value Check the returned GID index value and return an error if it is invalid. Fixes: 5070cd2239bd ('IB/mlx4: Replace mechanism for RoCE GID management') Signed-off-by: Daniel Jurgens Reviewed-by: Mark Bloch Reviewed-by: Yuval Shaia Signed-off-by: Leon Romanovsky Signed-off-by: Doug Ledford --- drivers/infiniband/hw/mlx4/ah.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/drivers/infiniband/hw/mlx4/ah.c b/drivers/infiniband/hw/mlx4/ah.c index 5fc623362731..b9bf0759f10a 100644 --- a/drivers/infiniband/hw/mlx4/ah.c +++ b/drivers/infiniband/hw/mlx4/ah.c @@ -102,7 +102,10 @@ static struct ib_ah *create_iboe_ah(struct ib_pd *pd, struct ib_ah_attr *ah_attr if (vlan_tag < 0x1000) vlan_tag |= (ah_attr->sl & 7) << 13; ah->av.eth.port_pd = cpu_to_be32(to_mpd(pd)->pdn | (ah_attr->port_num << 24)); - ah->av.eth.gid_index = mlx4_ib_gid_index_to_real_index(ibdev, ah_attr->port_num, ah_attr->grh.sgid_index); + ret = mlx4_ib_gid_index_to_real_index(ibdev, ah_attr->port_num, ah_attr->grh.sgid_index); + if (ret < 0) + return ERR_PTR(ret); + ah->av.eth.gid_index = ret; ah->av.eth.vlan = cpu_to_be16(vlan_tag); ah->av.eth.hop_limit = ah_attr->grh.hop_limit; if (ah_attr->static_rate) { -- cgit v1.2.3-59-g8ed1b From 593ff73bcfdc79f79a8a0df55504f75ad3e5d1a9 Mon Sep 17 00:00:00 2001 From: Matan Barak Date: Thu, 10 Nov 2016 11:30:55 +0200 Subject: IB/mlx4: Fix create CQ error flow Currently, if ib_copy_to_udata fails, the CQ won't be deleted from the radix tree and the HW (HW2SW). Fixes: 225c7b1feef1 ('IB/mlx4: Add a driver Mellanox ConnectX InfiniBand adapters') Signed-off-by: Matan Barak Signed-off-by: Daniel Jurgens Reviewed-by: Mark Bloch Signed-off-by: Leon Romanovsky Signed-off-by: Doug Ledford --- drivers/infiniband/hw/mlx4/cq.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/drivers/infiniband/hw/mlx4/cq.c b/drivers/infiniband/hw/mlx4/cq.c index 1ea686b9e0f9..6a0fec357dae 100644 --- a/drivers/infiniband/hw/mlx4/cq.c +++ b/drivers/infiniband/hw/mlx4/cq.c @@ -253,11 +253,14 @@ struct ib_cq *mlx4_ib_create_cq(struct ib_device *ibdev, if (context) if (ib_copy_to_udata(udata, &cq->mcq.cqn, sizeof (__u32))) { err = -EFAULT; - goto err_dbmap; + goto err_cq_free; } return &cq->ibcq; +err_cq_free: + mlx4_cq_free(dev->dev, &cq->mcq); + err_dbmap: if (context) mlx4_ib_db_unmap_user(to_mucontext(context), &cq->db); -- cgit v1.2.3-59-g8ed1b From 1454ca3a97e147bb91e98b087446c39cf6692a48 Mon Sep 17 00:00:00 2001 From: Yonatan Cohen Date: Wed, 16 Nov 2016 10:39:14 +0200 Subject: IB/rxe: Fix kernel panic in UDP tunnel with GRO and RX checksum Missing initialization of udp_tunnel_sock_cfg causes to following kernel panic, while kernel tries to execute gro_receive(). While being there, we converted udp_port_cfg to use the same initialization scheme as udp_tunnel_sock_cfg. ------------[ cut here ]------------ kernel tried to execute NX-protected page - exploit attempt? (uid: 0) BUG: unable to handle kernel paging request at ffffffffa0588c50 IP: [] __this_module+0x50/0xffffffffffff8400 [ib_rxe] PGD 1c09067 PUD 1c0a063 PMD bb394067 PTE 80000000ad5e8163 Oops: 0011 [#1] SMP Modules linked in: ib_rxe ip6_udp_tunnel udp_tunnel CPU: 5 PID: 0 Comm: swapper/5 Not tainted 4.7.0-rc3+ #2 Hardware name: Red Hat KVM, BIOS Bochs 01/01/2011 task: ffff880235e4e680 ti: ffff880235e68000 task.ti: ffff880235e68000 RIP: 0010:[] [] __this_module+0x50/0xffffffffffff8400 [ib_rxe] RSP: 0018:ffff880237343c80 EFLAGS: 00010282 RAX: 00000000dffe482d RBX: ffff8800ae330900 RCX: 000000002001b712 RDX: ffff8800ae330900 RSI: ffff8800ae102578 RDI: ffff880235589c00 RBP: ffff880237343cb0 R08: 0000000000000000 R09: 0000000000000000 R10: 0000000000000000 R11: 0000000000000000 R12: ffff8800ae33e262 R13: ffff880235589c00 R14: 0000000000000014 R15: ffff8800ae102578 FS: 0000000000000000(0000) GS:ffff880237340000(0000) knlGS:0000000000000000 CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 CR2: ffffffffa0588c50 CR3: 0000000001c06000 CR4: 00000000000006e0 Stack: ffffffff8160860e ffff8800ae330900 ffff8800ae102578 0000000000000014 000000000000004e ffff8800ae102578 ffff880237343ce0 ffffffff816088fb 0000000000000000 ffff8800ae330900 0000000000000000 00000000ffad0000 Call Trace: [] ? udp_gro_receive+0xde/0x130 [] udp4_gro_receive+0x10b/0x2d0 [] inet_gro_receive+0x1d3/0x270 [] dev_gro_receive+0x269/0x3b0 [] napi_gro_receive+0x38/0x120 [] mlx5e_handle_rx_cqe+0x27e/0x340 [mlx5_core] [] mlx5e_poll_rx_cq+0x66/0x6d0 [mlx5_core] [] mlx5e_napi_poll+0x8e/0x400 [mlx5_core] [] net_rx_action+0x160/0x380 [] __do_softirq+0xd7/0x2c5 [] irq_exit+0xf5/0x100 [] do_IRQ+0x56/0xd0 [] common_interrupt+0x8c/0x8c [] ? native_safe_halt+0x6/0x10 [] default_idle+0x1e/0xd0 [] arch_cpu_idle+0xf/0x20 [] default_idle_call+0x3c/0x50 [] cpu_startup_entry+0x323/0x3c0 [] start_secondary+0x15c/0x1a0 RIP [] __this_module+0x50/0xffffffffffff8400 [ib_rxe] RSP CR2: ffffffffa0588c50 ---[ end trace 489ee31fa7614ac5 ]--- Kernel panic - not syncing: Fatal exception in interrupt Kernel Offset: disabled ---[ end Kernel panic - not syncing: Fatal exception in interrupt ------------[ cut here ]------------ Fixes: 8700e3e7c485 ("Soft RoCE driver") Signed-off-by: Yonatan Cohen Reviewed-by: Moni Shoua Signed-off-by: Leon Romanovsky Signed-off-by: Doug Ledford --- drivers/infiniband/sw/rxe/rxe_net.c | 8 ++------ 1 file changed, 2 insertions(+), 6 deletions(-) diff --git a/drivers/infiniband/sw/rxe/rxe_net.c b/drivers/infiniband/sw/rxe/rxe_net.c index b8258e4f0aea..ffff5a54cb34 100644 --- a/drivers/infiniband/sw/rxe/rxe_net.c +++ b/drivers/infiniband/sw/rxe/rxe_net.c @@ -243,10 +243,8 @@ static struct socket *rxe_setup_udp_tunnel(struct net *net, __be16 port, { int err; struct socket *sock; - struct udp_port_cfg udp_cfg; - struct udp_tunnel_sock_cfg tnl_cfg; - - memset(&udp_cfg, 0, sizeof(udp_cfg)); + struct udp_port_cfg udp_cfg = {0}; + struct udp_tunnel_sock_cfg tnl_cfg = {0}; if (ipv6) { udp_cfg.family = AF_INET6; @@ -264,10 +262,8 @@ static struct socket *rxe_setup_udp_tunnel(struct net *net, __be16 port, return ERR_PTR(err); } - tnl_cfg.sk_user_data = NULL; tnl_cfg.encap_type = 1; tnl_cfg.encap_rcv = rxe_udp_encap_recv; - tnl_cfg.encap_destroy = NULL; /* Setup UDP tunnel */ setup_udp_tunnel_sock(net, sock, &tnl_cfg); -- cgit v1.2.3-59-g8ed1b From 002e062e13db10973adb8302f231e48b477c7ccf Mon Sep 17 00:00:00 2001 From: Yonatan Cohen Date: Wed, 16 Nov 2016 10:39:15 +0200 Subject: IB/rxe: Fix handling of erroneous WR To correctly handle a erroneous WR this fix does the following 1. Make sure the bad WQE causes a user completion event. 2. Call rxe_completer to handle the erred WQE. Before the fix, when rxe_requester found a bad WQE, it changed its status to IB_WC_LOC_PROT_ERR and exit with 0 for non RC QPs. If this was the 1st WQE then there would be no ACK to invoke the completer and this bad WQE would be stuck in the QP's send-q. On top of that the requester exiting with 0 caused rxe_do_task to endlessly invoke rxe_requester, resulting in a soft-lockup attached below. In case the WQE was not the 1st and rxe_completer did get a chance to handle the bad WQE, it did not cause a complete event since the WQE's IB_SEND_SIGNALED flag was not set. Setting WQE status to IB_SEND_SIGNALED is subject to IBA spec version 1.2.1, section 10.7.3.1 Signaled Completions. NMI watchdog: BUG: soft lockup - CPU#7 stuck for 22s! [] ? rxe_pool_get_index+0x35/0xb0 [rdma_rxe] [] lookup_mem+0x3c/0xc0 [rdma_rxe] [] copy_data+0x1c4/0x230 [rdma_rxe] [] rxe_requester+0x9d0/0x1100 [rdma_rxe] [] ? kfree_skbmem+0x5a/0x60 [] rxe_do_task+0x89/0xf0 [rdma_rxe] [] rxe_run_task+0x12/0x30 [rdma_rxe] [] rxe_post_send+0x41a/0x550 [rdma_rxe] [] ? __kmalloc+0x182/0x200 [] ? down_read+0x12/0x40 [] ib_uverbs_post_send+0x532/0x540 [ib_uverbs] [] ? tcp_sendmsg+0x402/0xb80 [] ib_uverbs_write+0x18c/0x3f0 [ib_uverbs] [] ? inet_recvmsg+0x7e/0xb0 [] ? sock_recvmsg+0x3d/0x50 [] __vfs_write+0x37/0x140 [] vfs_write+0xb2/0x1b0 [] SyS_write+0x55/0xc0 [] entry_SYSCALL_64_fastpath+0x1a/0xa Fixes: 8700e3e7c485 ("Soft RoCE driver") Signed-off-by: Yonatan Cohen Reviewed-by: Moni Shoua Signed-off-by: Leon Romanovsky Signed-off-by: Doug Ledford --- drivers/infiniband/sw/rxe/rxe_req.c | 21 +++++++++++++-------- 1 file changed, 13 insertions(+), 8 deletions(-) diff --git a/drivers/infiniband/sw/rxe/rxe_req.c b/drivers/infiniband/sw/rxe/rxe_req.c index 832846b73ea0..22bd9630dcd9 100644 --- a/drivers/infiniband/sw/rxe/rxe_req.c +++ b/drivers/infiniband/sw/rxe/rxe_req.c @@ -696,7 +696,8 @@ next_wqe: qp->req.wqe_index); wqe->state = wqe_state_done; wqe->status = IB_WC_SUCCESS; - goto complete; + __rxe_do_task(&qp->comp.task); + return 0; } payload = mtu; } @@ -745,13 +746,17 @@ err: wqe->status = IB_WC_LOC_PROT_ERR; wqe->state = wqe_state_error; -complete: - if (qp_type(qp) != IB_QPT_RC) { - while (rxe_completer(qp) == 0) - ; - } - - return 0; + /* + * IBA Spec. Section 10.7.3.1 SIGNALED COMPLETIONS + * ---------8<---------8<------------- + * ...Note that if a completion error occurs, a Work Completion + * will always be generated, even if the signaling + * indicator requests an Unsignaled Completion. + * ---------8<---------8<------------- + */ + wqe->wr.send_flags |= IB_SEND_SIGNALED; + __rxe_do_task(&qp->comp.task); + return -EAGAIN; exit: return -EAGAIN; -- cgit v1.2.3-59-g8ed1b From aa75b07b478a774b1432e2df1be5cd8ae834de0f Mon Sep 17 00:00:00 2001 From: Yonatan Cohen Date: Wed, 16 Nov 2016 10:39:17 +0200 Subject: IB/rxe: Clear queue buffer when modifying QP to reset RXE resets the send-q only once in rxe_qp_init_req() when QP is created, but when the QP is reused after QP reset, the send-q holds previous garbage data. This garbage data wrongly fails CQEs that otherwise should have completed successfully. Fixes: 8700e3e7c485 ("Soft RoCE driver") Signed-off-by: Yonatan Cohen Reviewed-by: Moni Shoua Signed-off-by: Leon Romanovsky Signed-off-by: Doug Ledford --- drivers/infiniband/sw/rxe/rxe_qp.c | 1 + drivers/infiniband/sw/rxe/rxe_queue.c | 9 +++++++++ drivers/infiniband/sw/rxe/rxe_queue.h | 2 ++ 3 files changed, 12 insertions(+) diff --git a/drivers/infiniband/sw/rxe/rxe_qp.c b/drivers/infiniband/sw/rxe/rxe_qp.c index b8036cfbce04..95aaaa282a04 100644 --- a/drivers/infiniband/sw/rxe/rxe_qp.c +++ b/drivers/infiniband/sw/rxe/rxe_qp.c @@ -522,6 +522,7 @@ static void rxe_qp_reset(struct rxe_qp *qp) if (qp->sq.queue) { __rxe_do_task(&qp->comp.task); __rxe_do_task(&qp->req.task); + rxe_queue_reset(qp->sq.queue); } /* cleanup attributes */ diff --git a/drivers/infiniband/sw/rxe/rxe_queue.c b/drivers/infiniband/sw/rxe/rxe_queue.c index 08274254eb88..d14bf496d62d 100644 --- a/drivers/infiniband/sw/rxe/rxe_queue.c +++ b/drivers/infiniband/sw/rxe/rxe_queue.c @@ -84,6 +84,15 @@ err1: return -EINVAL; } +inline void rxe_queue_reset(struct rxe_queue *q) +{ + /* queue is comprised from header and the memory + * of the actual queue. See "struct rxe_queue_buf" in rxe_queue.h + * reset only the queue itself and not the management header + */ + memset(q->buf->data, 0, q->buf_size - sizeof(struct rxe_queue_buf)); +} + struct rxe_queue *rxe_queue_init(struct rxe_dev *rxe, int *num_elem, unsigned int elem_size) diff --git a/drivers/infiniband/sw/rxe/rxe_queue.h b/drivers/infiniband/sw/rxe/rxe_queue.h index 239fd609c31e..8c8641c87817 100644 --- a/drivers/infiniband/sw/rxe/rxe_queue.h +++ b/drivers/infiniband/sw/rxe/rxe_queue.h @@ -84,6 +84,8 @@ int do_mmap_info(struct rxe_dev *rxe, size_t buf_size, struct rxe_mmap_info **ip_p); +void rxe_queue_reset(struct rxe_queue *q); + struct rxe_queue *rxe_queue_init(struct rxe_dev *rxe, int *num_elem, unsigned int elem_size); -- cgit v1.2.3-59-g8ed1b From 6d931308f55faaef3f30bd0346c47f99528b229d Mon Sep 17 00:00:00 2001 From: Yonatan Cohen Date: Wed, 16 Nov 2016 10:39:18 +0200 Subject: IB/rxe: Update qp state for user query The method rxe_qp_error() transitions QP to error state and make sure the QP is drained. It did not though update the QP state for user's query. This patch fixes this. Fixes: 8700e3e7c485 ("Soft RoCE driver") Signed-off-by: Yonatan Cohen Reviewed-by: Moni Shoua Signed-off-by: Leon Romanovsky Signed-off-by: Doug Ledford --- drivers/infiniband/sw/rxe/rxe_qp.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/infiniband/sw/rxe/rxe_qp.c b/drivers/infiniband/sw/rxe/rxe_qp.c index 95aaaa282a04..c3e60e4bde6e 100644 --- a/drivers/infiniband/sw/rxe/rxe_qp.c +++ b/drivers/infiniband/sw/rxe/rxe_qp.c @@ -574,6 +574,7 @@ void rxe_qp_error(struct rxe_qp *qp) { qp->req.state = QP_STATE_ERROR; qp->resp.state = QP_STATE_ERROR; + qp->attr.qp_state = IB_QPS_ERR; /* drain work and packet queues */ rxe_run_task(&qp->resp.task, 1); -- cgit v1.2.3-59-g8ed1b From 4ff522ea47944ffd3d4d27023ace8bc6a722c834 Mon Sep 17 00:00:00 2001 From: Steve Wise Date: Tue, 18 Oct 2016 14:04:39 -0700 Subject: iw_cxgb4: set *bad_wr for post_send/post_recv errors There are a few cases in c4iw_post_send() and c4iw_post_receive() where *bad_wr is not set when an error is returned. This can cause a crash if the application tries to use bad_wr. Signed-off-by: Steve Wise Signed-off-by: Doug Ledford --- drivers/infiniband/hw/cxgb4/qp.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/drivers/infiniband/hw/cxgb4/qp.c b/drivers/infiniband/hw/cxgb4/qp.c index f57deba6717c..5790e1dbd618 100644 --- a/drivers/infiniband/hw/cxgb4/qp.c +++ b/drivers/infiniband/hw/cxgb4/qp.c @@ -797,11 +797,13 @@ int c4iw_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr, spin_lock_irqsave(&qhp->lock, flag); if (t4_wq_in_error(&qhp->wq)) { spin_unlock_irqrestore(&qhp->lock, flag); + *bad_wr = wr; return -EINVAL; } num_wrs = t4_sq_avail(&qhp->wq); if (num_wrs == 0) { spin_unlock_irqrestore(&qhp->lock, flag); + *bad_wr = wr; return -ENOMEM; } while (wr) { @@ -934,11 +936,13 @@ int c4iw_post_receive(struct ib_qp *ibqp, struct ib_recv_wr *wr, spin_lock_irqsave(&qhp->lock, flag); if (t4_wq_in_error(&qhp->wq)) { spin_unlock_irqrestore(&qhp->lock, flag); + *bad_wr = wr; return -EINVAL; } num_wrs = t4_rq_avail(&qhp->wq); if (num_wrs == 0) { spin_unlock_irqrestore(&qhp->lock, flag); + *bad_wr = wr; return -ENOMEM; } while (wr) { -- cgit v1.2.3-59-g8ed1b From 5c6b2aaf9316fd0983c0c999d920306ddc65bd2d Mon Sep 17 00:00:00 2001 From: Steve Wise Date: Thu, 3 Nov 2016 12:09:38 -0700 Subject: iw_cxgb4: invalidate the mr when posting a read_w_inv wr Also, rearrange things a bit to have a common c4iw_invalidate_mr() function used everywhere that we need to invalidate. Fixes: 49b53a93a64a ("iw_cxgb4: add fast-path for small REG_MR operations") Signed-off-by: Steve Wise Signed-off-by: Doug Ledford --- drivers/infiniband/hw/cxgb4/cq.c | 17 +++-------------- drivers/infiniband/hw/cxgb4/iw_cxgb4.h | 2 +- drivers/infiniband/hw/cxgb4/mem.c | 12 ++++++++++++ drivers/infiniband/hw/cxgb4/qp.c | 16 ++++++++-------- 4 files changed, 24 insertions(+), 23 deletions(-) diff --git a/drivers/infiniband/hw/cxgb4/cq.c b/drivers/infiniband/hw/cxgb4/cq.c index 867b8cf82be8..19c6477af19f 100644 --- a/drivers/infiniband/hw/cxgb4/cq.c +++ b/drivers/infiniband/hw/cxgb4/cq.c @@ -666,18 +666,6 @@ skip_cqe: return ret; } -static void invalidate_mr(struct c4iw_dev *rhp, u32 rkey) -{ - struct c4iw_mr *mhp; - unsigned long flags; - - spin_lock_irqsave(&rhp->lock, flags); - mhp = get_mhp(rhp, rkey >> 8); - if (mhp) - mhp->attr.state = 0; - spin_unlock_irqrestore(&rhp->lock, flags); -} - /* * Get one cq entry from c4iw and map it to openib. * @@ -733,7 +721,7 @@ static int c4iw_poll_cq_one(struct c4iw_cq *chp, struct ib_wc *wc) CQE_OPCODE(&cqe) == FW_RI_SEND_WITH_SE_INV) { wc->ex.invalidate_rkey = CQE_WRID_STAG(&cqe); wc->wc_flags |= IB_WC_WITH_INVALIDATE; - invalidate_mr(qhp->rhp, wc->ex.invalidate_rkey); + c4iw_invalidate_mr(qhp->rhp, wc->ex.invalidate_rkey); } } else { switch (CQE_OPCODE(&cqe)) { @@ -762,7 +750,8 @@ static int c4iw_poll_cq_one(struct c4iw_cq *chp, struct ib_wc *wc) /* Invalidate the MR if the fastreg failed */ if (CQE_STATUS(&cqe) != T4_ERR_SUCCESS) - invalidate_mr(qhp->rhp, CQE_WRID_FR_STAG(&cqe)); + c4iw_invalidate_mr(qhp->rhp, + CQE_WRID_FR_STAG(&cqe)); break; default: printk(KERN_ERR MOD "Unexpected opcode %d " diff --git a/drivers/infiniband/hw/cxgb4/iw_cxgb4.h b/drivers/infiniband/hw/cxgb4/iw_cxgb4.h index 7e7f79e55006..4788e1a46fde 100644 --- a/drivers/infiniband/hw/cxgb4/iw_cxgb4.h +++ b/drivers/infiniband/hw/cxgb4/iw_cxgb4.h @@ -999,6 +999,6 @@ extern int db_coalescing_threshold; extern int use_dsgl; void c4iw_drain_rq(struct ib_qp *qp); void c4iw_drain_sq(struct ib_qp *qp); - +void c4iw_invalidate_mr(struct c4iw_dev *rhp, u32 rkey); #endif diff --git a/drivers/infiniband/hw/cxgb4/mem.c b/drivers/infiniband/hw/cxgb4/mem.c index 80e27749420a..410408f886c1 100644 --- a/drivers/infiniband/hw/cxgb4/mem.c +++ b/drivers/infiniband/hw/cxgb4/mem.c @@ -770,3 +770,15 @@ int c4iw_dereg_mr(struct ib_mr *ib_mr) kfree(mhp); return 0; } + +void c4iw_invalidate_mr(struct c4iw_dev *rhp, u32 rkey) +{ + struct c4iw_mr *mhp; + unsigned long flags; + + spin_lock_irqsave(&rhp->lock, flags); + mhp = get_mhp(rhp, rkey >> 8); + if (mhp) + mhp->attr.state = 0; + spin_unlock_irqrestore(&rhp->lock, flags); +} diff --git a/drivers/infiniband/hw/cxgb4/qp.c b/drivers/infiniband/hw/cxgb4/qp.c index 5790e1dbd618..b7ac97b27c88 100644 --- a/drivers/infiniband/hw/cxgb4/qp.c +++ b/drivers/infiniband/hw/cxgb4/qp.c @@ -706,12 +706,8 @@ static int build_memreg(struct t4_sq *sq, union t4_wr *wqe, return 0; } -static int build_inv_stag(struct c4iw_dev *dev, union t4_wr *wqe, - struct ib_send_wr *wr, u8 *len16) +static int build_inv_stag(union t4_wr *wqe, struct ib_send_wr *wr, u8 *len16) { - struct c4iw_mr *mhp = get_mhp(dev, wr->ex.invalidate_rkey >> 8); - - mhp->attr.state = 0; wqe->inv.stag_inv = cpu_to_be32(wr->ex.invalidate_rkey); wqe->inv.r2 = 0; *len16 = DIV_ROUND_UP(sizeof wqe->inv, 16); @@ -842,10 +838,13 @@ int c4iw_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr, case IB_WR_RDMA_READ_WITH_INV: fw_opcode = FW_RI_RDMA_READ_WR; swsqe->opcode = FW_RI_READ_REQ; - if (wr->opcode == IB_WR_RDMA_READ_WITH_INV) + if (wr->opcode == IB_WR_RDMA_READ_WITH_INV) { + c4iw_invalidate_mr(qhp->rhp, + wr->sg_list[0].lkey); fw_flags = FW_RI_RDMA_READ_INVALIDATE; - else + } else { fw_flags = 0; + } err = build_rdma_read(wqe, wr, &len16); if (err) break; @@ -878,7 +877,8 @@ int c4iw_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr, fw_flags |= FW_RI_LOCAL_FENCE_FLAG; fw_opcode = FW_RI_INV_LSTAG_WR; swsqe->opcode = FW_RI_LOCAL_INV; - err = build_inv_stag(qhp->rhp, wqe, wr, &len16); + err = build_inv_stag(wqe, wr, &len16); + c4iw_invalidate_mr(qhp->rhp, wr->ex.invalidate_rkey); break; default: PDBG("%s post of type=%d TBD!\n", __func__, -- cgit v1.2.3-59-g8ed1b From e47112d9d6009bf6b7438cedc0270316d6b0370d Mon Sep 17 00:00:00 2001 From: Florian Fainelli Date: Tue, 15 Nov 2016 15:58:15 -0800 Subject: net: dsa: b53: Fix VLAN usage and how we treat CPU port We currently have a fundamental problem in how we treat the CPU port and its VLAN membership. As soon as a second VLAN is configured to be untagged, the CPU automatically becomes untagged for that VLAN as well, and yet, we don't gracefully make sure that the CPU becomes tagged in the other VLANs it could be a member of. This results in only one VLAN being effectively usable from the CPU's perspective. Instead of having some pretty complex logic which tries to maintain the CPU port's default VLAN and its untagged properties, just do something very simple which consists in neither altering the CPU port's PVID settings, nor its untagged settings: - whenever a VLAN is added, the CPU is automatically a member of this VLAN group, as a tagged member - PVID settings for downstream ports do not alter the CPU port's PVID since it now is part of all VLANs in the system This means that a typical example where e.g: LAN ports are in VLAN1, and WAN port is in VLAN2, now require having two VLAN interfaces for the host to properly terminate and send traffic from/to. Fixes: Fixes: a2482d2ce349 ("net: dsa: b53: Plug in VLAN support") Reported-by: Hartmut Knaack Signed-off-by: Florian Fainelli Signed-off-by: David S. Miller --- drivers/net/dsa/b53/b53_common.c | 16 ++++------------ 1 file changed, 4 insertions(+), 12 deletions(-) diff --git a/drivers/net/dsa/b53/b53_common.c b/drivers/net/dsa/b53/b53_common.c index 7717b19dc806..947adda3397d 100644 --- a/drivers/net/dsa/b53/b53_common.c +++ b/drivers/net/dsa/b53/b53_common.c @@ -962,9 +962,10 @@ static void b53_vlan_add(struct dsa_switch *ds, int port, vl->members |= BIT(port) | BIT(cpu_port); if (untagged) - vl->untag |= BIT(port) | BIT(cpu_port); + vl->untag |= BIT(port); else - vl->untag &= ~(BIT(port) | BIT(cpu_port)); + vl->untag &= ~BIT(port); + vl->untag &= ~BIT(cpu_port); b53_set_vlan_entry(dev, vid, vl); b53_fast_age_vlan(dev, vid); @@ -973,8 +974,6 @@ static void b53_vlan_add(struct dsa_switch *ds, int port, if (pvid) { b53_write16(dev, B53_VLAN_PAGE, B53_VLAN_PORT_DEF_TAG(port), vlan->vid_end); - b53_write16(dev, B53_VLAN_PAGE, B53_VLAN_PORT_DEF_TAG(cpu_port), - vlan->vid_end); b53_fast_age_vlan(dev, vid); } } @@ -984,7 +983,6 @@ static int b53_vlan_del(struct dsa_switch *ds, int port, { struct b53_device *dev = ds->priv; bool untagged = vlan->flags & BRIDGE_VLAN_INFO_UNTAGGED; - unsigned int cpu_port = dev->cpu_port; struct b53_vlan *vl; u16 vid; u16 pvid; @@ -997,8 +995,6 @@ static int b53_vlan_del(struct dsa_switch *ds, int port, b53_get_vlan_entry(dev, vid, vl); vl->members &= ~BIT(port); - if ((vl->members & BIT(cpu_port)) == BIT(cpu_port)) - vl->members = 0; if (pvid == vid) { if (is5325(dev) || is5365(dev)) @@ -1007,18 +1003,14 @@ static int b53_vlan_del(struct dsa_switch *ds, int port, pvid = 0; } - if (untagged) { + if (untagged) vl->untag &= ~(BIT(port)); - if ((vl->untag & BIT(cpu_port)) == BIT(cpu_port)) - vl->untag = 0; - } b53_set_vlan_entry(dev, vid, vl); b53_fast_age_vlan(dev, vid); } b53_write16(dev, B53_VLAN_PAGE, B53_VLAN_PORT_DEF_TAG(port), pvid); - b53_write16(dev, B53_VLAN_PAGE, B53_VLAN_PORT_DEF_TAG(cpu_port), pvid); b53_fast_age_vlan(dev, pvid); return 0; -- cgit v1.2.3-59-g8ed1b From e5f6f564fd191d365fcd775c06a732a488205588 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Wed, 16 Nov 2016 06:31:52 -0800 Subject: bnxt: add a missing rcu synchronization Add a missing synchronize_net() call to avoid potential use after free, since we explicitly call napi_hash_del() to factorize the RCU grace period. Fixes: c0c050c58d84 ("bnxt_en: New Broadcom ethernet driver.") Signed-off-by: Eric Dumazet Cc: Michael Chan Acked-by: Michael Chan Signed-off-by: David S. Miller --- drivers/net/ethernet/broadcom/bnxt/bnxt.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt.c b/drivers/net/ethernet/broadcom/bnxt/bnxt.c index c6909660e097..e18635b2a002 100644 --- a/drivers/net/ethernet/broadcom/bnxt/bnxt.c +++ b/drivers/net/ethernet/broadcom/bnxt/bnxt.c @@ -4934,6 +4934,10 @@ static void bnxt_del_napi(struct bnxt *bp) napi_hash_del(&bnapi->napi); netif_napi_del(&bnapi->napi); } + /* We called napi_hash_del() before netif_napi_del(), we need + * to respect an RCU grace period before freeing napi structures. + */ + synchronize_net(); } static void bnxt_init_napi(struct bnxt *bp) -- cgit v1.2.3-59-g8ed1b From 4a59015372840a6fc35d7fd40638a9d5dc3ec958 Mon Sep 17 00:00:00 2001 From: Andreas Gruenbacher Date: Sun, 13 Nov 2016 21:23:34 +0100 Subject: xattr: Fix setting security xattrs on sockfs The IOP_XATTR flag is set on sockfs because sockfs supports getting the "system.sockprotoname" xattr. Since commit 6c6ef9f2, this flag is checked for setxattr support as well. This is wrong on sockfs because security xattr support there is supposed to be provided by security_inode_setsecurity. The smack security module relies on socket labels (xattrs). Fix this by adding a security xattr handler on sockfs that returns -EAGAIN, and by checking for -EAGAIN in setxattr. We cannot simply check for -EOPNOTSUPP in setxattr because there are filesystems that neither have direct security xattr support nor support via security_inode_setsecurity. A more proper fix might be to move the call to security_inode_setsecurity into sockfs, but it's not clear to me if that is safe: we would end up calling security_inode_post_setxattr after that as well. Signed-off-by: Andreas Gruenbacher Signed-off-by: Al Viro --- fs/xattr.c | 22 ++++++++++++++-------- net/socket.c | 15 +++++++++++++++ 2 files changed, 29 insertions(+), 8 deletions(-) diff --git a/fs/xattr.c b/fs/xattr.c index 3368659c471e..2d13b4e62fae 100644 --- a/fs/xattr.c +++ b/fs/xattr.c @@ -170,7 +170,7 @@ int __vfs_setxattr_noperm(struct dentry *dentry, const char *name, const void *value, size_t size, int flags) { struct inode *inode = dentry->d_inode; - int error = -EOPNOTSUPP; + int error = -EAGAIN; int issec = !strncmp(name, XATTR_SECURITY_PREFIX, XATTR_SECURITY_PREFIX_LEN); @@ -183,15 +183,21 @@ int __vfs_setxattr_noperm(struct dentry *dentry, const char *name, security_inode_post_setxattr(dentry, name, value, size, flags); } - } else if (issec) { - const char *suffix = name + XATTR_SECURITY_PREFIX_LEN; - + } else { if (unlikely(is_bad_inode(inode))) return -EIO; - error = security_inode_setsecurity(inode, suffix, value, - size, flags); - if (!error) - fsnotify_xattr(dentry); + } + if (error == -EAGAIN) { + error = -EOPNOTSUPP; + + if (issec) { + const char *suffix = name + XATTR_SECURITY_PREFIX_LEN; + + error = security_inode_setsecurity(inode, suffix, value, + size, flags); + if (!error) + fsnotify_xattr(dentry); + } } return error; diff --git a/net/socket.c b/net/socket.c index 272518b087c8..73dc69f9681e 100644 --- a/net/socket.c +++ b/net/socket.c @@ -341,8 +341,23 @@ static const struct xattr_handler sockfs_xattr_handler = { .get = sockfs_xattr_get, }; +static int sockfs_security_xattr_set(const struct xattr_handler *handler, + struct dentry *dentry, struct inode *inode, + const char *suffix, const void *value, + size_t size, int flags) +{ + /* Handled by LSM. */ + return -EAGAIN; +} + +static const struct xattr_handler sockfs_security_xattr_handler = { + .prefix = XATTR_SECURITY_PREFIX, + .set = sockfs_security_xattr_set, +}; + static const struct xattr_handler *sockfs_xattr_handlers[] = { &sockfs_xattr_handler, + &sockfs_security_xattr_handler, NULL }; -- cgit v1.2.3-59-g8ed1b From 680bb946a1ae04fe0ff369a4965f76b48c07dc54 Mon Sep 17 00:00:00 2001 From: Abhi Das Date: Wed, 16 Nov 2016 21:44:23 -0600 Subject: fix iov_iter_advance() for ITER_PIPE iov_iter_advance() needs to decrement iter->count by the number of bytes we'd moved beyond. Normal flavours do that, but ITER_PIPE doesn't and ITER_PIPE generic_file_read_iter() for O_DIRECT files ends up with a bogus fallback to page cache read, resulting in incorrect values for file offset and bytes read. Signed-off-by: Abhi Das Signed-off-by: Al Viro --- lib/iov_iter.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/lib/iov_iter.c b/lib/iov_iter.c index f0c7f1481bae..f2bd21b93dfc 100644 --- a/lib/iov_iter.c +++ b/lib/iov_iter.c @@ -683,10 +683,11 @@ static void pipe_advance(struct iov_iter *i, size_t size) struct pipe_inode_info *pipe = i->pipe; struct pipe_buffer *buf; int idx = i->idx; - size_t off = i->iov_offset; + size_t off = i->iov_offset, orig_sz; if (unlikely(i->count < size)) size = i->count; + orig_sz = size; if (size) { if (off) /* make it relative to the beginning of buffer */ @@ -713,6 +714,7 @@ static void pipe_advance(struct iov_iter *i, size_t size) pipe->nrbufs--; } } + i->count -= orig_sz; } void iov_iter_advance(struct iov_iter *i, size_t size) -- cgit v1.2.3-59-g8ed1b From d5afc1b68a6ddc27746d31f775025afe75ec8122 Mon Sep 17 00:00:00 2001 From: Tony Lindgren Date: Wed, 16 Nov 2016 10:24:15 -0800 Subject: dmaengine: cppi41: More PM runtime fixes Fix use of u32 instead of int for checking for negative errors values as pointed out by Dan Carpenter . And while testing the PM runtime error path by randomly returning failed values in runtime resume, I noticed two more places that need fixing: - If pm_runtime_get_sync() fails in probe, we still need to do pm_runtime_put_sync() to keep the use count happy. We could call pm_runtime_put_noidle() on the error path, but we're just going to call pm_runtime_disable() after that so pm_runtime_put_sync() will do what we want - We should print an error if pm_runtime_get_sync() fails in cppi41_dma_alloc_chan_resources() so we know where it happens Reported-by: Dan Carpenter Fixes: 740b4be3f742 ("dmaengine: cpp41: Fix handling of error path") Signed-off-by: Tony Lindgren Signed-off-by: Vinod Koul --- drivers/dma/cppi41.c | 11 +++++++---- 1 file changed, 7 insertions(+), 4 deletions(-) diff --git a/drivers/dma/cppi41.c b/drivers/dma/cppi41.c index 4b52126c13cf..d5ba43a87a68 100644 --- a/drivers/dma/cppi41.c +++ b/drivers/dma/cppi41.c @@ -317,11 +317,12 @@ static irqreturn_t cppi41_irq(int irq, void *data) while (val) { u32 desc, len; + int error; - status = pm_runtime_get(cdd->ddev.dev); - if (status < 0) + error = pm_runtime_get(cdd->ddev.dev); + if (error < 0) dev_err(cdd->ddev.dev, "%s pm runtime get: %i\n", - __func__, status); + __func__, error); q_num = __fls(val); val &= ~(1 << q_num); @@ -367,6 +368,8 @@ static int cppi41_dma_alloc_chan_resources(struct dma_chan *chan) error = pm_runtime_get_sync(cdd->ddev.dev); if (error < 0) { + dev_err(cdd->ddev.dev, "%s pm runtime get: %i\n", + __func__, error); pm_runtime_put_noidle(cdd->ddev.dev); return error; @@ -1072,8 +1075,8 @@ err_chans: deinit_cppi41(dev, cdd); err_init_cppi: pm_runtime_dont_use_autosuspend(dev); - pm_runtime_put_sync(dev); err_get_sync: + pm_runtime_put_sync(dev); pm_runtime_disable(dev); iounmap(cdd->usbss_mem); iounmap(cdd->ctrl_mem); -- cgit v1.2.3-59-g8ed1b From a5a40d4624cd2328c69768f6eb41716fc249d7be Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Tue, 25 Oct 2016 23:29:10 +0200 Subject: crypto: caam - fix type mismatch warning MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Building the caam driver on arm64 produces a harmless warning: drivers/crypto/caam/caamalg.c:140:139: warning: comparison of distinct pointer types lacks a cast We can use min_t to tell the compiler which type we want it to use here. Fixes: 5ecf8ef9103c ("crypto: caam - fix sg dump") Signed-off-by: Arnd Bergmann Reviewed-by: Horia Geantă Signed-off-by: Herbert Xu --- drivers/crypto/caam/caamalg.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/crypto/caam/caamalg.c b/drivers/crypto/caam/caamalg.c index f5a63ba97023..954a64c7757b 100644 --- a/drivers/crypto/caam/caamalg.c +++ b/drivers/crypto/caam/caamalg.c @@ -137,7 +137,7 @@ static void dbg_dump_sg(const char *level, const char *prefix_str, } buf = it_page + it->offset; - len = min(tlen, it->length); + len = min_t(size_t, tlen, it->length); print_hex_dump(level, prefix_str, prefix_type, rowsize, groupsize, buf, len, ascii); tlen -= len; -- cgit v1.2.3-59-g8ed1b From cfc44a4d147ea605d66ccb917cc24467d15ff867 Mon Sep 17 00:00:00 2001 From: WANG Cong Date: Wed, 16 Nov 2016 10:27:02 -0800 Subject: net: check dead netns for peernet2id_alloc() Andrei reports we still allocate netns ID from idr after we destroy it in cleanup_net(). cleanup_net(): ... idr_destroy(&net->netns_ids); ... list_for_each_entry_reverse(ops, &pernet_list, list) ops_exit_list(ops, &net_exit_list); -> rollback_registered_many() -> rtmsg_ifinfo_build_skb() -> rtnl_fill_ifinfo() -> peernet2id_alloc() After that point we should not even access net->netns_ids, we should check the death of the current netns as early as we can in peernet2id_alloc(). For net-next we can consider to avoid sending rtmsg totally, it is a good optimization for netns teardown path. Fixes: 0c7aecd4bde4 ("netns: add rtnl cmd to add and get peer netns ids") Reported-by: Andrei Vagin Cc: Nicolas Dichtel Signed-off-by: Cong Wang Acked-by: Andrei Vagin Signed-off-by: Nicolas Dichtel Signed-off-by: David S. Miller --- net/core/net_namespace.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/net/core/net_namespace.c b/net/core/net_namespace.c index f61c0e02a413..7001da910c6b 100644 --- a/net/core/net_namespace.c +++ b/net/core/net_namespace.c @@ -219,6 +219,8 @@ int peernet2id_alloc(struct net *net, struct net *peer) bool alloc; int id; + if (atomic_read(&net->count) == 0) + return NETNSA_NSID_NOT_ASSIGNED; spin_lock_irqsave(&net->nsid_lock, flags); alloc = atomic_read(&peer->count) == 0 ? false : true; id = __peernet2id_alloc(net, peer, &alloc); -- cgit v1.2.3-59-g8ed1b From 48c1699d5335bc045b50989a06b1c526b17a25ff Mon Sep 17 00:00:00 2001 From: Johan Hovold Date: Wed, 16 Nov 2016 15:20:36 +0100 Subject: of_mdio: fix node leak in of_phy_register_fixed_link error path Make sure to drop the of_node reference also on failure to parse the speed property in of_phy_register_fixed_link(). Fixes: 3be2a49e5c08 ("of: provide a binding for fixed link PHYs") Signed-off-by: Johan Hovold Signed-off-by: David S. Miller --- drivers/of/of_mdio.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/drivers/of/of_mdio.c b/drivers/of/of_mdio.c index b470f7e3521d..8f4648383fb2 100644 --- a/drivers/of/of_mdio.c +++ b/drivers/of/of_mdio.c @@ -456,8 +456,11 @@ int of_phy_register_fixed_link(struct device_node *np) status.link = 1; status.duplex = of_property_read_bool(fixed_link_node, "full-duplex"); - if (of_property_read_u32(fixed_link_node, "speed", &status.speed)) + if (of_property_read_u32(fixed_link_node, "speed", + &status.speed)) { + of_node_put(fixed_link_node); return -EINVAL; + } status.pause = of_property_read_bool(fixed_link_node, "pause"); status.asym_pause = of_property_read_bool(fixed_link_node, "asym-pause"); -- cgit v1.2.3-59-g8ed1b From 3ae30f4ce65e9d4de274b1472169ab3c27f5c666 Mon Sep 17 00:00:00 2001 From: Johan Hovold Date: Wed, 16 Nov 2016 15:20:37 +0100 Subject: of_mdio: fix device reference leak in of_phy_find_device Make sure to drop the reference taken by bus_find_device() before returning NULL from of_phy_find_device() when the found device is not a PHY. Fixes: 6ed742363b9c ("of: of_mdio: Ensure mdio device is a PHY") Signed-off-by: Johan Hovold Signed-off-by: David S. Miller --- drivers/of/of_mdio.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/of/of_mdio.c b/drivers/of/of_mdio.c index 8f4648383fb2..5a3145a02547 100644 --- a/drivers/of/of_mdio.c +++ b/drivers/of/of_mdio.c @@ -292,6 +292,7 @@ struct phy_device *of_phy_find_device(struct device_node *phy_np) mdiodev = to_mdio_device(d); if (mdiodev->flags & MDIO_DEVICE_FLAG_PHY) return to_phy_device(d); + put_device(d); } return NULL; -- cgit v1.2.3-59-g8ed1b From 13c9d934a5a1d04f055c20c2253090e9afd9a5d1 Mon Sep 17 00:00:00 2001 From: Johan Hovold Date: Wed, 16 Nov 2016 15:20:38 +0100 Subject: net: phy: fixed_phy: fix of_node leak in fixed_phy_unregister Make sure to drop the of_node reference taken in fixed_phy_register() when deregistering a PHY. Fixes: a75951217472 ("net: phy: extend fixed driver with fixed_phy_register()") Signed-off-by: Johan Hovold Signed-off-by: David S. Miller --- drivers/net/phy/fixed_phy.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/phy/fixed_phy.c b/drivers/net/phy/fixed_phy.c index c649c101bbab..eb5167210681 100644 --- a/drivers/net/phy/fixed_phy.c +++ b/drivers/net/phy/fixed_phy.c @@ -279,7 +279,7 @@ EXPORT_SYMBOL_GPL(fixed_phy_register); void fixed_phy_unregister(struct phy_device *phy) { phy_device_remove(phy); - + of_node_put(phy->mdio.dev.of_node); fixed_phy_del(phy->mdio.addr); } EXPORT_SYMBOL_GPL(fixed_phy_unregister); -- cgit v1.2.3-59-g8ed1b From b5c2d49544e5930c96e2632a7eece3f4325a1888 Mon Sep 17 00:00:00 2001 From: Paolo Abeni Date: Wed, 16 Nov 2016 16:26:46 +0100 Subject: ip6_tunnel: disable caching when the traffic class is inherited If an ip6 tunnel is configured to inherit the traffic class from the inner header, the dst_cache must be disabled or it will foul the policy routing. The issue is apprently there since at leat Linux-2.6.12-rc2. Reported-by: Liam McBirnie Cc: Liam McBirnie Acked-by: Hannes Frederic Sowa Signed-off-by: Paolo Abeni Signed-off-by: David S. Miller --- net/ipv6/ip6_tunnel.c | 13 +++++++++++-- 1 file changed, 11 insertions(+), 2 deletions(-) diff --git a/net/ipv6/ip6_tunnel.c b/net/ipv6/ip6_tunnel.c index 87784560dc46..0a4759b89da2 100644 --- a/net/ipv6/ip6_tunnel.c +++ b/net/ipv6/ip6_tunnel.c @@ -1034,6 +1034,7 @@ int ip6_tnl_xmit(struct sk_buff *skb, struct net_device *dev, __u8 dsfield, int mtu; unsigned int psh_hlen = sizeof(struct ipv6hdr) + t->encap_hlen; unsigned int max_headroom = psh_hlen; + bool use_cache = false; u8 hop_limit; int err = -1; @@ -1066,7 +1067,15 @@ int ip6_tnl_xmit(struct sk_buff *skb, struct net_device *dev, __u8 dsfield, memcpy(&fl6->daddr, addr6, sizeof(fl6->daddr)); neigh_release(neigh); - } else if (!fl6->flowi6_mark) + } else if (!(t->parms.flags & + (IP6_TNL_F_USE_ORIG_TCLASS | IP6_TNL_F_USE_ORIG_FWMARK))) { + /* enable the cache only only if the routing decision does + * not depend on the current inner header value + */ + use_cache = true; + } + + if (use_cache) dst = dst_cache_get(&t->dst_cache); if (!ip6_tnl_xmit_ctl(t, &fl6->saddr, &fl6->daddr)) @@ -1150,7 +1159,7 @@ route_lookup: if (t->encap.type != TUNNEL_ENCAP_NONE) goto tx_err_dst_release; } else { - if (!fl6->flowi6_mark && ndst) + if (use_cache && ndst) dst_cache_set_ip6(&t->dst_cache, ndst, &fl6->saddr); } skb_dst_set(skb, dst); -- cgit v1.2.3-59-g8ed1b From 5d1904204c99596b50a700f092fe49d78edba400 Mon Sep 17 00:00:00 2001 From: Aaron Lu Date: Thu, 10 Nov 2016 17:16:33 +0800 Subject: mremap: fix race between mremap() and page cleanning Prior to 3.15, there was a race between zap_pte_range() and page_mkclean() where writes to a page could be lost. Dave Hansen discovered by inspection that there is a similar race between move_ptes() and page_mkclean(). We've been able to reproduce the issue by enlarging the race window with a msleep(), but have not been able to hit it without modifying the code. So, we think it's a real issue, but is difficult or impossible to hit in practice. The zap_pte_range() issue is fixed by commit 1cf35d47712d("mm: split 'tlb_flush_mmu()' into tlb flushing and memory freeing parts"). And this patch is to fix the race between page_mkclean() and mremap(). Here is one possible way to hit the race: suppose a process mmapped a file with READ | WRITE and SHARED, it has two threads and they are bound to 2 different CPUs, e.g. CPU1 and CPU2. mmap returned X, then thread 1 did a write to addr X so that CPU1 now has a writable TLB for addr X on it. Thread 2 starts mremaping from addr X to Y while thread 1 cleaned the page and then did another write to the old addr X again. The 2nd write from thread 1 could succeed but the value will get lost. thread 1 thread 2 (bound to CPU1) (bound to CPU2) 1: write 1 to addr X to get a writeable TLB on this CPU 2: mremap starts 3: move_ptes emptied PTE for addr X and setup new PTE for addr Y and then dropped PTL for X and Y 4: page laundering for N by doing fadvise FADV_DONTNEED. When done, pageframe N is deemed clean. 5: *write 2 to addr X 6: tlb flush for addr X 7: munmap (Y, pagesize) to make the page unmapped 8: fadvise with FADV_DONTNEED again to kick the page off the pagecache 9: pread the page from file to verify the value. If 1 is there, it means we have lost the written 2. *the write may or may not cause segmentation fault, it depends on if the TLB is still on the CPU. Please note that this is only one specific way of how the race could occur, it didn't mean that the race could only occur in exact the above config, e.g. more than 2 threads could be involved and fadvise() could be done in another thread, etc. For anonymous pages, they could race between mremap() and page reclaim: THP: a huge PMD is moved by mremap to a new huge PMD, then the new huge PMD gets unmapped/splitted/pagedout before the flush tlb happened for the old huge PMD in move_page_tables() and we could still write data to it. The normal anonymous page has similar situation. To fix this, check for any dirty PTE in move_ptes()/move_huge_pmd() and if any, did the flush before dropping the PTL. If we did the flush for every move_ptes()/move_huge_pmd() call then we do not need to do the flush in move_pages_tables() for the whole range. But if we didn't, we still need to do the whole range flush. Alternatively, we can track which part of the range is flushed in move_ptes()/move_huge_pmd() and which didn't to avoid flushing the whole range in move_page_tables(). But that would require multiple tlb flushes for the different sub-ranges and should be less efficient than the single whole range flush. KBuild test on my Sandybridge desktop doesn't show any noticeable change. v4.9-rc4: real 5m14.048s user 32m19.800s sys 4m50.320s With this commit: real 5m13.888s user 32m19.330s sys 4m51.200s Reported-by: Dave Hansen Signed-off-by: Aaron Lu Signed-off-by: Linus Torvalds --- include/linux/huge_mm.h | 2 +- mm/huge_memory.c | 9 ++++++++- mm/mremap.c | 30 +++++++++++++++++++++--------- 3 files changed, 30 insertions(+), 11 deletions(-) diff --git a/include/linux/huge_mm.h b/include/linux/huge_mm.h index 9b9f65d99873..e35e6de633b9 100644 --- a/include/linux/huge_mm.h +++ b/include/linux/huge_mm.h @@ -22,7 +22,7 @@ extern int mincore_huge_pmd(struct vm_area_struct *vma, pmd_t *pmd, unsigned char *vec); extern bool move_huge_pmd(struct vm_area_struct *vma, unsigned long old_addr, unsigned long new_addr, unsigned long old_end, - pmd_t *old_pmd, pmd_t *new_pmd); + pmd_t *old_pmd, pmd_t *new_pmd, bool *need_flush); extern int change_huge_pmd(struct vm_area_struct *vma, pmd_t *pmd, unsigned long addr, pgprot_t newprot, int prot_numa); diff --git a/mm/huge_memory.c b/mm/huge_memory.c index cdcd25cb30fe..eff3de359d50 100644 --- a/mm/huge_memory.c +++ b/mm/huge_memory.c @@ -1426,11 +1426,12 @@ int zap_huge_pmd(struct mmu_gather *tlb, struct vm_area_struct *vma, bool move_huge_pmd(struct vm_area_struct *vma, unsigned long old_addr, unsigned long new_addr, unsigned long old_end, - pmd_t *old_pmd, pmd_t *new_pmd) + pmd_t *old_pmd, pmd_t *new_pmd, bool *need_flush) { spinlock_t *old_ptl, *new_ptl; pmd_t pmd; struct mm_struct *mm = vma->vm_mm; + bool force_flush = false; if ((old_addr & ~HPAGE_PMD_MASK) || (new_addr & ~HPAGE_PMD_MASK) || @@ -1455,6 +1456,8 @@ bool move_huge_pmd(struct vm_area_struct *vma, unsigned long old_addr, new_ptl = pmd_lockptr(mm, new_pmd); if (new_ptl != old_ptl) spin_lock_nested(new_ptl, SINGLE_DEPTH_NESTING); + if (pmd_present(*old_pmd) && pmd_dirty(*old_pmd)) + force_flush = true; pmd = pmdp_huge_get_and_clear(mm, old_addr, old_pmd); VM_BUG_ON(!pmd_none(*new_pmd)); @@ -1467,6 +1470,10 @@ bool move_huge_pmd(struct vm_area_struct *vma, unsigned long old_addr, set_pmd_at(mm, new_addr, new_pmd, pmd_mksoft_dirty(pmd)); if (new_ptl != old_ptl) spin_unlock(new_ptl); + if (force_flush) + flush_tlb_range(vma, old_addr, old_addr + PMD_SIZE); + else + *need_flush = true; spin_unlock(old_ptl); return true; } diff --git a/mm/mremap.c b/mm/mremap.c index da22ad2a5678..6ccecc03f56a 100644 --- a/mm/mremap.c +++ b/mm/mremap.c @@ -104,11 +104,13 @@ static pte_t move_soft_dirty_pte(pte_t pte) static void move_ptes(struct vm_area_struct *vma, pmd_t *old_pmd, unsigned long old_addr, unsigned long old_end, struct vm_area_struct *new_vma, pmd_t *new_pmd, - unsigned long new_addr, bool need_rmap_locks) + unsigned long new_addr, bool need_rmap_locks, bool *need_flush) { struct mm_struct *mm = vma->vm_mm; pte_t *old_pte, *new_pte, pte; spinlock_t *old_ptl, *new_ptl; + bool force_flush = false; + unsigned long len = old_end - old_addr; /* * When need_rmap_locks is true, we take the i_mmap_rwsem and anon_vma @@ -146,6 +148,14 @@ static void move_ptes(struct vm_area_struct *vma, pmd_t *old_pmd, new_pte++, new_addr += PAGE_SIZE) { if (pte_none(*old_pte)) continue; + + /* + * We are remapping a dirty PTE, make sure to + * flush TLB before we drop the PTL for the + * old PTE or we may race with page_mkclean(). + */ + if (pte_present(*old_pte) && pte_dirty(*old_pte)) + force_flush = true; pte = ptep_get_and_clear(mm, old_addr, old_pte); pte = move_pte(pte, new_vma->vm_page_prot, old_addr, new_addr); pte = move_soft_dirty_pte(pte); @@ -156,6 +166,10 @@ static void move_ptes(struct vm_area_struct *vma, pmd_t *old_pmd, if (new_ptl != old_ptl) spin_unlock(new_ptl); pte_unmap(new_pte - 1); + if (force_flush) + flush_tlb_range(vma, old_end - len, old_end); + else + *need_flush = true; pte_unmap_unlock(old_pte - 1, old_ptl); if (need_rmap_locks) drop_rmap_locks(vma); @@ -201,13 +215,12 @@ unsigned long move_page_tables(struct vm_area_struct *vma, if (need_rmap_locks) take_rmap_locks(vma); moved = move_huge_pmd(vma, old_addr, new_addr, - old_end, old_pmd, new_pmd); + old_end, old_pmd, new_pmd, + &need_flush); if (need_rmap_locks) drop_rmap_locks(vma); - if (moved) { - need_flush = true; + if (moved) continue; - } } split_huge_pmd(vma, old_pmd, old_addr); if (pmd_trans_unstable(old_pmd)) @@ -220,11 +233,10 @@ unsigned long move_page_tables(struct vm_area_struct *vma, extent = next - new_addr; if (extent > LATENCY_LIMIT) extent = LATENCY_LIMIT; - move_ptes(vma, old_pmd, old_addr, old_addr + extent, - new_vma, new_pmd, new_addr, need_rmap_locks); - need_flush = true; + move_ptes(vma, old_pmd, old_addr, old_addr + extent, new_vma, + new_pmd, new_addr, need_rmap_locks, &need_flush); } - if (likely(need_flush)) + if (need_flush) flush_tlb_range(vma, old_end-len, old_addr); mmu_notifier_invalidate_range_end(vma->vm_mm, mmun_start, mmun_end); -- cgit v1.2.3-59-g8ed1b From 30a391a13ab9215d7569da4e1773c5bb4deed96d Mon Sep 17 00:00:00 2001 From: Roman Mashak Date: Wed, 16 Nov 2016 17:16:10 -0500 Subject: net sched filters: pass netlink message flags in event notification Userland client should be able to read an event, and reflect it back to the kernel, therefore it needs to extract complete set of netlink flags. For example, this will allow "tc monitor" to distinguish Add and Replace operations. Signed-off-by: Roman Mashak Signed-off-by: Jamal Hadi Salim Signed-off-by: David S. Miller --- net/sched/cls_api.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/net/sched/cls_api.c b/net/sched/cls_api.c index 2b2a7974e4bb..8e93d4afe5ea 100644 --- a/net/sched/cls_api.c +++ b/net/sched/cls_api.c @@ -112,7 +112,7 @@ static void tfilter_notify_chain(struct net *net, struct sk_buff *oskb, for (it_chain = chain; (tp = rtnl_dereference(*it_chain)) != NULL; it_chain = &tp->next) - tfilter_notify(net, oskb, n, tp, 0, event, false); + tfilter_notify(net, oskb, n, tp, n->nlmsg_flags, event, false); } /* Select new prio value from the range, managed by kernel. */ @@ -430,7 +430,8 @@ static int tfilter_notify(struct net *net, struct sk_buff *oskb, if (!skb) return -ENOBUFS; - if (tcf_fill_node(net, skb, tp, fh, portid, n->nlmsg_seq, 0, event) <= 0) { + if (tcf_fill_node(net, skb, tp, fh, portid, n->nlmsg_seq, + n->nlmsg_flags, event) <= 0) { kfree_skb(skb); return -EINVAL; } -- cgit v1.2.3-59-g8ed1b From e9f01049d1ea4679a3258b8423fe54bae424ee0e Mon Sep 17 00:00:00 2001 From: Dave Airlie Date: Fri, 18 Nov 2016 10:26:39 +1000 Subject: Revert "drm/mediatek: fix a typo of OD_CFG to OD_RELAYMODE" This reverts commit 83ba62bc700bab710b22be3a1bf6cf973f754273. Signed-off-by: Dave Airlie --- drivers/gpu/drm/mediatek/mtk_drm_ddp_comp.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/mediatek/mtk_drm_ddp_comp.c b/drivers/gpu/drm/mediatek/mtk_drm_ddp_comp.c index aa5f20fabd10..df33b3ca6ffd 100644 --- a/drivers/gpu/drm/mediatek/mtk_drm_ddp_comp.c +++ b/drivers/gpu/drm/mediatek/mtk_drm_ddp_comp.c @@ -123,7 +123,7 @@ static void mtk_od_config(struct mtk_ddp_comp *comp, unsigned int w, unsigned int bpc) { writel(w << 16 | h, comp->regs + DISP_OD_SIZE); - writel(OD_RELAYMODE, comp->regs + OD_CFG); + writel(OD_RELAYMODE, comp->regs + OD_RELAYMODE); mtk_dither_set(comp, bpc, DISP_OD_CFG); } -- cgit v1.2.3-59-g8ed1b From 7d40c2cf080950eab63a0747482027f5f1dae0d3 Mon Sep 17 00:00:00 2001 From: Dave Airlie Date: Fri, 18 Nov 2016 10:27:00 +1000 Subject: Revert "drm/mediatek: set vblank_disable_allowed to true" This reverts commit f752fff611b99f5679224f3990a1f531ea64b1ec. Signed-off-by: Dave Airlie --- drivers/gpu/drm/mediatek/mtk_drm_drv.c | 1 - 1 file changed, 1 deletion(-) diff --git a/drivers/gpu/drm/mediatek/mtk_drm_drv.c b/drivers/gpu/drm/mediatek/mtk_drm_drv.c index 0b2ae47eb52c..cf83f6507ec8 100644 --- a/drivers/gpu/drm/mediatek/mtk_drm_drv.c +++ b/drivers/gpu/drm/mediatek/mtk_drm_drv.c @@ -217,7 +217,6 @@ static int mtk_drm_kms_init(struct drm_device *drm) if (ret < 0) goto err_component_unbind; - drm->vblank_disable_allowed = true; drm_kms_helper_poll_init(drm); drm_mode_config_reset(drm); -- cgit v1.2.3-59-g8ed1b From 1c8018f7a7a60a649260fdd7e8645a356299e920 Mon Sep 17 00:00:00 2001 From: Cédric Le Goater Date: Wed, 2 Nov 2016 08:57:04 +0100 Subject: ipmi/bt-bmc: change compatible node to 'aspeed, ast2400-ibt-bmc' MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The Aspeed SoCs have two BT interfaces : one is IPMI compliant and the other is H8S/2168 compliant. The current ipmi/bt-bmc driver implements the IPMI version and we should reflect its nature in the compatible node name using 'aspeed,ast2400-ibt-bmc' instead of 'aspeed,ast2400-bt-bmc'. The latter should be used for a H8S interface driver if it is implemented one day. Signed-off-by: Cédric Le Goater Signed-off-by: Olof Johansson --- .../bindings/ipmi/aspeed,ast2400-bt-bmc.txt | 23 ---------------------- .../bindings/ipmi/aspeed,ast2400-ibt-bmc.txt | 23 ++++++++++++++++++++++ drivers/char/ipmi/bt-bmc.c | 4 ++-- 3 files changed, 25 insertions(+), 25 deletions(-) delete mode 100644 Documentation/devicetree/bindings/ipmi/aspeed,ast2400-bt-bmc.txt create mode 100644 Documentation/devicetree/bindings/ipmi/aspeed,ast2400-ibt-bmc.txt diff --git a/Documentation/devicetree/bindings/ipmi/aspeed,ast2400-bt-bmc.txt b/Documentation/devicetree/bindings/ipmi/aspeed,ast2400-bt-bmc.txt deleted file mode 100644 index fbbacd958240..000000000000 --- a/Documentation/devicetree/bindings/ipmi/aspeed,ast2400-bt-bmc.txt +++ /dev/null @@ -1,23 +0,0 @@ -* Aspeed BT (Block Transfer) IPMI interface - -The Aspeed SOCs (AST2400 and AST2500) are commonly used as BMCs -(BaseBoard Management Controllers) and the BT interface can be used to -perform in-band IPMI communication with their host. - -Required properties: - -- compatible : should be "aspeed,ast2400-bt-bmc" -- reg: physical address and size of the registers - -Optional properties: - -- interrupts: interrupt generated by the BT interface. without an - interrupt, the driver will operate in poll mode. - -Example: - - ibt@1e789140 { - compatible = "aspeed,ast2400-bt-bmc"; - reg = <0x1e789140 0x18>; - interrupts = <8>; - }; diff --git a/Documentation/devicetree/bindings/ipmi/aspeed,ast2400-ibt-bmc.txt b/Documentation/devicetree/bindings/ipmi/aspeed,ast2400-ibt-bmc.txt new file mode 100644 index 000000000000..6f28969af9dc --- /dev/null +++ b/Documentation/devicetree/bindings/ipmi/aspeed,ast2400-ibt-bmc.txt @@ -0,0 +1,23 @@ +* Aspeed BT (Block Transfer) IPMI interface + +The Aspeed SOCs (AST2400 and AST2500) are commonly used as BMCs +(BaseBoard Management Controllers) and the BT interface can be used to +perform in-band IPMI communication with their host. + +Required properties: + +- compatible : should be "aspeed,ast2400-ibt-bmc" +- reg: physical address and size of the registers + +Optional properties: + +- interrupts: interrupt generated by the BT interface. without an + interrupt, the driver will operate in poll mode. + +Example: + + ibt@1e789140 { + compatible = "aspeed,ast2400-ibt-bmc"; + reg = <0x1e789140 0x18>; + interrupts = <8>; + }; diff --git a/drivers/char/ipmi/bt-bmc.c b/drivers/char/ipmi/bt-bmc.c index b49e61320952..fc9e8891eae3 100644 --- a/drivers/char/ipmi/bt-bmc.c +++ b/drivers/char/ipmi/bt-bmc.c @@ -484,7 +484,7 @@ static int bt_bmc_remove(struct platform_device *pdev) } static const struct of_device_id bt_bmc_match[] = { - { .compatible = "aspeed,ast2400-bt-bmc" }, + { .compatible = "aspeed,ast2400-ibt-bmc" }, { }, }; @@ -502,4 +502,4 @@ module_platform_driver(bt_bmc_driver); MODULE_DEVICE_TABLE(of, bt_bmc_match); MODULE_LICENSE("GPL"); MODULE_AUTHOR("Alistair Popple "); -MODULE_DESCRIPTION("Linux device interface to the BT interface"); +MODULE_DESCRIPTION("Linux device interface to the IPMI BT interface"); -- cgit v1.2.3-59-g8ed1b From 68d85d0e03eab60c238ebe673c7cea1cf70275d4 Mon Sep 17 00:00:00 2001 From: Wei Yongjun Date: Sat, 29 Oct 2016 16:31:17 +0000 Subject: i2c: digicolor: use clk_disable_unprepare instead of clk_unprepare since clk_prepare_enable() is used to get i2c->clk, we should use clk_disable_unprepare() to release it for the error path. Signed-off-by: Wei Yongjun Acked-by: Baruch Siach Signed-off-by: Wolfram Sang --- drivers/i2c/busses/i2c-digicolor.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/i2c/busses/i2c-digicolor.c b/drivers/i2c/busses/i2c-digicolor.c index 49f2084f7bb5..50813a24c541 100644 --- a/drivers/i2c/busses/i2c-digicolor.c +++ b/drivers/i2c/busses/i2c-digicolor.c @@ -347,7 +347,7 @@ static int dc_i2c_probe(struct platform_device *pdev) ret = i2c_add_adapter(&i2c->adap); if (ret < 0) { - clk_unprepare(i2c->clk); + clk_disable_unprepare(i2c->clk); return ret; } -- cgit v1.2.3-59-g8ed1b From 96ed1fe511a8b4948e53f3bad431d8737e8f231f Mon Sep 17 00:00:00 2001 From: Michael Neuling Date: Fri, 18 Nov 2016 14:08:56 +1100 Subject: powerpc/mm/radix: Invalidate ERAT on tlbiel for POWER9 DD1 On POWER9 DD1, when we do a local TLB invalidate we also need to explicitly invalidate the ERAT. Signed-off-by: Michael Neuling Signed-off-by: Michael Ellerman --- arch/powerpc/include/asm/ppc-opcode.h | 1 + arch/powerpc/mm/tlb-radix.c | 4 ++++ 2 files changed, 5 insertions(+) diff --git a/arch/powerpc/include/asm/ppc-opcode.h b/arch/powerpc/include/asm/ppc-opcode.h index 0132831b3081..c56ea8c84abb 100644 --- a/arch/powerpc/include/asm/ppc-opcode.h +++ b/arch/powerpc/include/asm/ppc-opcode.h @@ -460,5 +460,6 @@ #define PPC_SLBIA(IH) stringify_in_c(.long PPC_INST_SLBIA | \ ((IH & 0x7) << 21)) +#define PPC_INVALIDATE_ERAT PPC_SLBIA(7) #endif /* _ASM_POWERPC_PPC_OPCODE_H */ diff --git a/arch/powerpc/mm/tlb-radix.c b/arch/powerpc/mm/tlb-radix.c index bda8c43be78a..3493cf4e0452 100644 --- a/arch/powerpc/mm/tlb-radix.c +++ b/arch/powerpc/mm/tlb-radix.c @@ -50,6 +50,8 @@ static inline void _tlbiel_pid(unsigned long pid, unsigned long ric) for (set = 0; set < POWER9_TLB_SETS_RADIX ; set++) { __tlbiel_pid(pid, set, ric); } + if (cpu_has_feature(CPU_FTR_POWER9_DD1)) + asm volatile(PPC_INVALIDATE_ERAT : : :"memory"); return; } @@ -83,6 +85,8 @@ static inline void _tlbiel_va(unsigned long va, unsigned long pid, asm volatile(PPC_TLBIEL(%0, %4, %3, %2, %1) : : "r"(rb), "i"(r), "i"(prs), "i"(ric), "r"(rs) : "memory"); asm volatile("ptesync": : :"memory"); + if (cpu_has_feature(CPU_FTR_POWER9_DD1)) + asm volatile(PPC_INVALIDATE_ERAT : : :"memory"); } static inline void _tlbie_va(unsigned long va, unsigned long pid, -- cgit v1.2.3-59-g8ed1b From 9853a55ef1bb66d7411136046060bbfb69c714fa Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Tue, 15 Nov 2016 12:05:11 +0100 Subject: cfg80211: limit scan results cache size It's possible to make scanning consume almost arbitrary amounts of memory, e.g. by sending beacon frames with random BSSIDs at high rates while somebody is scanning. Limit the number of BSS table entries we're willing to cache to 1000, limiting maximum memory usage to maybe 4-5MB, but lower in practice - that would be the case for having both full-sized beacon and probe response frames for each entry; this seems not possible in practice, so a limit of 1000 entries will likely be closer to 0.5 MB. Cc: stable@vger.kernel.org Signed-off-by: Johannes Berg --- net/wireless/core.h | 1 + net/wireless/scan.c | 69 +++++++++++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 70 insertions(+) diff --git a/net/wireless/core.h b/net/wireless/core.h index 08d2e948c9ad..f0c0c8a48c92 100644 --- a/net/wireless/core.h +++ b/net/wireless/core.h @@ -71,6 +71,7 @@ struct cfg80211_registered_device { struct list_head bss_list; struct rb_root bss_tree; u32 bss_generation; + u32 bss_entries; struct cfg80211_scan_request *scan_req; /* protected by RTNL */ struct sk_buff *scan_msg; struct cfg80211_sched_scan_request __rcu *sched_scan_req; diff --git a/net/wireless/scan.c b/net/wireless/scan.c index b5bd58d0f731..35ad69fd0838 100644 --- a/net/wireless/scan.c +++ b/net/wireless/scan.c @@ -57,6 +57,19 @@ * also linked into the probe response struct. */ +/* + * Limit the number of BSS entries stored in mac80211. Each one is + * a bit over 4k at most, so this limits to roughly 4-5M of memory. + * If somebody wants to really attack this though, they'd likely + * use small beacons, and only one type of frame, limiting each of + * the entries to a much smaller size (in order to generate more + * entries in total, so overhead is bigger.) + */ +static int bss_entries_limit = 1000; +module_param(bss_entries_limit, int, 0644); +MODULE_PARM_DESC(bss_entries_limit, + "limit to number of scan BSS entries (per wiphy, default 1000)"); + #define IEEE80211_SCAN_RESULT_EXPIRE (30 * HZ) static void bss_free(struct cfg80211_internal_bss *bss) @@ -137,6 +150,10 @@ static bool __cfg80211_unlink_bss(struct cfg80211_registered_device *rdev, list_del_init(&bss->list); rb_erase(&bss->rbn, &rdev->bss_tree); + rdev->bss_entries--; + WARN_ONCE((rdev->bss_entries == 0) ^ list_empty(&rdev->bss_list), + "rdev bss entries[%d]/list[empty:%d] corruption\n", + rdev->bss_entries, list_empty(&rdev->bss_list)); bss_ref_put(rdev, bss); return true; } @@ -163,6 +180,40 @@ static void __cfg80211_bss_expire(struct cfg80211_registered_device *rdev, rdev->bss_generation++; } +static bool cfg80211_bss_expire_oldest(struct cfg80211_registered_device *rdev) +{ + struct cfg80211_internal_bss *bss, *oldest = NULL; + bool ret; + + lockdep_assert_held(&rdev->bss_lock); + + list_for_each_entry(bss, &rdev->bss_list, list) { + if (atomic_read(&bss->hold)) + continue; + + if (!list_empty(&bss->hidden_list) && + !bss->pub.hidden_beacon_bss) + continue; + + if (oldest && time_before(oldest->ts, bss->ts)) + continue; + oldest = bss; + } + + if (WARN_ON(!oldest)) + return false; + + /* + * The callers make sure to increase rdev->bss_generation if anything + * gets removed (and a new entry added), so there's no need to also do + * it here. + */ + + ret = __cfg80211_unlink_bss(rdev, oldest); + WARN_ON(!ret); + return ret; +} + void ___cfg80211_scan_done(struct cfg80211_registered_device *rdev, bool send_message) { @@ -689,6 +740,7 @@ static bool cfg80211_combine_bsses(struct cfg80211_registered_device *rdev, const u8 *ie; int i, ssidlen; u8 fold = 0; + u32 n_entries = 0; ies = rcu_access_pointer(new->pub.beacon_ies); if (WARN_ON(!ies)) @@ -712,6 +764,12 @@ static bool cfg80211_combine_bsses(struct cfg80211_registered_device *rdev, /* This is the bad part ... */ list_for_each_entry(bss, &rdev->bss_list, list) { + /* + * we're iterating all the entries anyway, so take the + * opportunity to validate the list length accounting + */ + n_entries++; + if (!ether_addr_equal(bss->pub.bssid, new->pub.bssid)) continue; if (bss->pub.channel != new->pub.channel) @@ -740,6 +798,10 @@ static bool cfg80211_combine_bsses(struct cfg80211_registered_device *rdev, new->pub.beacon_ies); } + WARN_ONCE(n_entries != rdev->bss_entries, + "rdev bss entries[%d]/list[len:%d] corruption\n", + rdev->bss_entries, n_entries); + return true; } @@ -894,7 +956,14 @@ cfg80211_bss_update(struct cfg80211_registered_device *rdev, } } + if (rdev->bss_entries >= bss_entries_limit && + !cfg80211_bss_expire_oldest(rdev)) { + kfree(new); + goto drop; + } + list_add_tail(&new->list, &rdev->bss_list); + rdev->bss_entries++; rb_insert_bss(rdev, new); found = new; } -- cgit v1.2.3-59-g8ed1b From 9e3f7a29694049edd728e2400ab57ad7553e5aa9 Mon Sep 17 00:00:00 2001 From: Wei Huang Date: Wed, 16 Nov 2016 09:20:57 +0000 Subject: arm64: KVM: pmu: Fix AArch32 cycle counter access We're missing the handling code for the cycle counter accessed from a 32bit guest, leading to unexpected results. Cc: stable@vger.kernel.org # 4.6+ Signed-off-by: Wei Huang Signed-off-by: Marc Zyngier --- arch/arm64/kvm/sys_regs.c | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/arch/arm64/kvm/sys_regs.c b/arch/arm64/kvm/sys_regs.c index f302fdb3a030..87e7e6608cd8 100644 --- a/arch/arm64/kvm/sys_regs.c +++ b/arch/arm64/kvm/sys_regs.c @@ -597,8 +597,14 @@ static bool access_pmu_evcntr(struct kvm_vcpu *vcpu, idx = ARMV8_PMU_CYCLE_IDX; } else { - BUG(); + return false; } + } else if (r->CRn == 0 && r->CRm == 9) { + /* PMCCNTR */ + if (pmu_access_event_counter_el0_disabled(vcpu)) + return false; + + idx = ARMV8_PMU_CYCLE_IDX; } else if (r->CRn == 14 && (r->CRm & 12) == 8) { /* PMEVCNTRn_EL0 */ if (pmu_access_event_counter_el0_disabled(vcpu)) @@ -606,7 +612,7 @@ static bool access_pmu_evcntr(struct kvm_vcpu *vcpu, idx = ((r->CRm & 3) << 3) | (r->Op2 & 7); } else { - BUG(); + return false; } if (!pmu_counter_idx_valid(vcpu, idx)) -- cgit v1.2.3-59-g8ed1b From b112c84a6ff035271d41d548c10215f18443d6a6 Mon Sep 17 00:00:00 2001 From: Wei Huang Date: Wed, 16 Nov 2016 11:09:20 -0600 Subject: KVM: arm64: Fix the issues when guest PMCCFILTR is configured KVM calls kvm_pmu_set_counter_event_type() when PMCCFILTR is configured. But this function can't deals with PMCCFILTR correctly because the evtCount bits of PMCCFILTR, which is reserved 0, conflits with the SW_INCR event type of other PMXEVTYPER registers. To fix it, when eventsel == 0, this function shouldn't return immediately; instead it needs to check further if select_idx is ARMV8_PMU_CYCLE_IDX. Another issue is that KVM shouldn't copy the eventsel bits of PMCCFILTER blindly to attr.config. Instead it ought to convert the request to the "cpu cycle" event type (i.e. 0x11). To support this patch and to prevent duplicated definitions, a limited set of ARMv8 perf event types were relocated from perf_event.c to asm/perf_event.h. Cc: stable@vger.kernel.org # 4.6+ Acked-by: Will Deacon Signed-off-by: Wei Huang Signed-off-by: Marc Zyngier --- arch/arm64/include/asm/perf_event.h | 10 +++++++++- arch/arm64/kernel/perf_event.c | 10 +--------- virt/kvm/arm/pmu.c | 8 +++++--- 3 files changed, 15 insertions(+), 13 deletions(-) diff --git a/arch/arm64/include/asm/perf_event.h b/arch/arm64/include/asm/perf_event.h index 2065f46fa740..38b6a2b49d68 100644 --- a/arch/arm64/include/asm/perf_event.h +++ b/arch/arm64/include/asm/perf_event.h @@ -46,7 +46,15 @@ #define ARMV8_PMU_EVTYPE_MASK 0xc800ffff /* Mask for writable bits */ #define ARMV8_PMU_EVTYPE_EVENT 0xffff /* Mask for EVENT bits */ -#define ARMV8_PMU_EVTYPE_EVENT_SW_INCR 0 /* Software increment event */ +/* + * PMUv3 event types: required events + */ +#define ARMV8_PMUV3_PERFCTR_SW_INCR 0x00 +#define ARMV8_PMUV3_PERFCTR_L1D_CACHE_REFILL 0x03 +#define ARMV8_PMUV3_PERFCTR_L1D_CACHE 0x04 +#define ARMV8_PMUV3_PERFCTR_BR_MIS_PRED 0x10 +#define ARMV8_PMUV3_PERFCTR_CPU_CYCLES 0x11 +#define ARMV8_PMUV3_PERFCTR_BR_PRED 0x12 /* * Event filters for PMUv3 diff --git a/arch/arm64/kernel/perf_event.c b/arch/arm64/kernel/perf_event.c index a9310a69fffd..57ae9d9ed9bb 100644 --- a/arch/arm64/kernel/perf_event.c +++ b/arch/arm64/kernel/perf_event.c @@ -31,17 +31,9 @@ /* * ARMv8 PMUv3 Performance Events handling code. - * Common event types. + * Common event types (some are defined in asm/perf_event.h). */ -/* Required events. */ -#define ARMV8_PMUV3_PERFCTR_SW_INCR 0x00 -#define ARMV8_PMUV3_PERFCTR_L1D_CACHE_REFILL 0x03 -#define ARMV8_PMUV3_PERFCTR_L1D_CACHE 0x04 -#define ARMV8_PMUV3_PERFCTR_BR_MIS_PRED 0x10 -#define ARMV8_PMUV3_PERFCTR_CPU_CYCLES 0x11 -#define ARMV8_PMUV3_PERFCTR_BR_PRED 0x12 - /* At least one of the following is required. */ #define ARMV8_PMUV3_PERFCTR_INST_RETIRED 0x08 #define ARMV8_PMUV3_PERFCTR_INST_SPEC 0x1B diff --git a/virt/kvm/arm/pmu.c b/virt/kvm/arm/pmu.c index 6e9c40eea208..69ccce308458 100644 --- a/virt/kvm/arm/pmu.c +++ b/virt/kvm/arm/pmu.c @@ -305,7 +305,7 @@ void kvm_pmu_software_increment(struct kvm_vcpu *vcpu, u64 val) continue; type = vcpu_sys_reg(vcpu, PMEVTYPER0_EL0 + i) & ARMV8_PMU_EVTYPE_EVENT; - if ((type == ARMV8_PMU_EVTYPE_EVENT_SW_INCR) + if ((type == ARMV8_PMUV3_PERFCTR_SW_INCR) && (enable & BIT(i))) { reg = vcpu_sys_reg(vcpu, PMEVCNTR0_EL0 + i) + 1; reg = lower_32_bits(reg); @@ -379,7 +379,8 @@ void kvm_pmu_set_counter_event_type(struct kvm_vcpu *vcpu, u64 data, eventsel = data & ARMV8_PMU_EVTYPE_EVENT; /* Software increment event does't need to be backed by a perf event */ - if (eventsel == ARMV8_PMU_EVTYPE_EVENT_SW_INCR) + if (eventsel == ARMV8_PMUV3_PERFCTR_SW_INCR && + select_idx != ARMV8_PMU_CYCLE_IDX) return; memset(&attr, 0, sizeof(struct perf_event_attr)); @@ -391,7 +392,8 @@ void kvm_pmu_set_counter_event_type(struct kvm_vcpu *vcpu, u64 data, attr.exclude_kernel = data & ARMV8_PMU_EXCLUDE_EL1 ? 1 : 0; attr.exclude_hv = 1; /* Don't count EL2 events */ attr.exclude_host = 1; /* Don't count host events */ - attr.config = eventsel; + attr.config = (select_idx == ARMV8_PMU_CYCLE_IDX) ? + ARMV8_PMUV3_PERFCTR_CPU_CYCLES : eventsel; counter = kvm_pmu_get_counter_value(vcpu, select_idx); /* The initial sample period (overflow count) of an event. */ -- cgit v1.2.3-59-g8ed1b From cac4a185405d4415eca269cae976438b44a37ae0 Mon Sep 17 00:00:00 2001 From: "Aneesh Kumar K.V" Date: Thu, 17 Nov 2016 15:46:23 +0530 Subject: powerpc/mm: Fix missing update of HID register on secondary CPUs We need to update on secondaries for the selected MMU mode. Fixes: ad410674f560 ("powerpc/mm: Update the HID bit when switching from radix to hash") Reported-by: Michael Neuling Signed-off-by: Aneesh Kumar K.V Signed-off-by: Michael Ellerman --- arch/powerpc/mm/hash_utils_64.c | 4 ++++ arch/powerpc/mm/pgtable-radix.c | 4 ++++ 2 files changed, 8 insertions(+) diff --git a/arch/powerpc/mm/hash_utils_64.c b/arch/powerpc/mm/hash_utils_64.c index 44d3c3a38e3e..5503078090cd 100644 --- a/arch/powerpc/mm/hash_utils_64.c +++ b/arch/powerpc/mm/hash_utils_64.c @@ -1029,6 +1029,10 @@ void hash__early_init_mmu_secondary(void) { /* Initialize hash table for that CPU */ if (!firmware_has_feature(FW_FEATURE_LPAR)) { + + if (cpu_has_feature(CPU_FTR_POWER9_DD1)) + update_hid_for_hash(); + if (!cpu_has_feature(CPU_FTR_ARCH_300)) mtspr(SPRN_SDR1, _SDR1); else diff --git a/arch/powerpc/mm/pgtable-radix.c b/arch/powerpc/mm/pgtable-radix.c index ed7bddc456b7..688b54517655 100644 --- a/arch/powerpc/mm/pgtable-radix.c +++ b/arch/powerpc/mm/pgtable-radix.c @@ -388,6 +388,10 @@ void radix__early_init_mmu_secondary(void) * update partition table control register and UPRT */ if (!firmware_has_feature(FW_FEATURE_LPAR)) { + + if (cpu_has_feature(CPU_FTR_POWER9_DD1)) + update_hid_for_radix(); + lpcr = mfspr(SPRN_LPCR); mtspr(SPRN_LPCR, lpcr | LPCR_UPRT | LPCR_HR); -- cgit v1.2.3-59-g8ed1b From a8348bca2944d397a528772f5c0ccb47a8b58af4 Mon Sep 17 00:00:00 2001 From: Herbert Xu Date: Thu, 17 Nov 2016 22:07:58 +0800 Subject: crypto: algif_hash - Fix NULL hash crash with shash Recently algif_hash has been changed to allow null hashes. This triggers a bug when used with an shash algorithm whereby it will cause a crash during the digest operation. This patch fixes it by avoiding the digest operation and instead doing an init followed by a final which avoids the buggy code in shash. This patch also ensures that the result buffer is freed after an error so that it is not returned as a genuine hash result on the next recv call. The shash/ahash wrapper code will be fixed later to handle this case correctly. Fixes: 493b2ed3f760 ("crypto: algif_hash - Handle NULL hashes correctly") Signed-off-by: Herbert Xu Tested-by: Laura Abbott --- crypto/algif_hash.c | 17 ++++++++++------- 1 file changed, 10 insertions(+), 7 deletions(-) diff --git a/crypto/algif_hash.c b/crypto/algif_hash.c index 2d8466f9e49b..05e21b464433 100644 --- a/crypto/algif_hash.c +++ b/crypto/algif_hash.c @@ -214,23 +214,26 @@ static int hash_recvmsg(struct socket *sock, struct msghdr *msg, size_t len, ahash_request_set_crypt(&ctx->req, NULL, ctx->result, 0); - if (ctx->more) { + if (!result) { + err = af_alg_wait_for_completion( + crypto_ahash_init(&ctx->req), + &ctx->completion); + if (err) + goto unlock; + } + + if (!result || ctx->more) { ctx->more = 0; err = af_alg_wait_for_completion(crypto_ahash_final(&ctx->req), &ctx->completion); if (err) goto unlock; - } else if (!result) { - err = af_alg_wait_for_completion( - crypto_ahash_digest(&ctx->req), - &ctx->completion); } err = memcpy_to_msg(msg, ctx->result, len); - hash_free_result(sk, ctx); - unlock: + hash_free_result(sk, ctx); release_sock(sk); return err ?: len; -- cgit v1.2.3-59-g8ed1b From 06ba3b2133dc203e1e9bc36cee7f0839b79a9e8b Mon Sep 17 00:00:00 2001 From: Jeremy Linton Date: Thu, 17 Nov 2016 09:14:25 -0600 Subject: net: sky2: Fix shutdown crash The sky2 frequently crashes during machine shutdown with: sky2_get_stats+0x60/0x3d8 [sky2] dev_get_stats+0x68/0xd8 rtnl_fill_stats+0x54/0x140 rtnl_fill_ifinfo+0x46c/0xc68 rtmsg_ifinfo_build_skb+0x7c/0xf0 rtmsg_ifinfo.part.22+0x3c/0x70 rtmsg_ifinfo+0x50/0x5c netdev_state_change+0x4c/0x58 linkwatch_do_dev+0x50/0x88 __linkwatch_run_queue+0x104/0x1a4 linkwatch_event+0x30/0x3c process_one_work+0x140/0x3e0 worker_thread+0x60/0x44c kthread+0xdc/0xf0 ret_from_fork+0x10/0x50 This is caused by the sky2 being called after it has been shutdown. A previous thread about this can be found here: https://lkml.org/lkml/2016/4/12/410 An alternative fix is to assure that IFF_UP gets cleared by calling dev_close() during shutdown. This is similar to what the bnx2/tg3/xgene and maybe others are doing to assure that the driver isn't being called following _shutdown(). Signed-off-by: Jeremy Linton Signed-off-by: David S. Miller --- drivers/net/ethernet/marvell/sky2.c | 13 +++++++++++++ 1 file changed, 13 insertions(+) diff --git a/drivers/net/ethernet/marvell/sky2.c b/drivers/net/ethernet/marvell/sky2.c index f05ea56dcff2..941c8e2c944e 100644 --- a/drivers/net/ethernet/marvell/sky2.c +++ b/drivers/net/ethernet/marvell/sky2.c @@ -5220,6 +5220,19 @@ static SIMPLE_DEV_PM_OPS(sky2_pm_ops, sky2_suspend, sky2_resume); static void sky2_shutdown(struct pci_dev *pdev) { + struct sky2_hw *hw = pci_get_drvdata(pdev); + int port; + + for (port = 0; port < hw->ports; port++) { + struct net_device *ndev = hw->dev[port]; + + rtnl_lock(); + if (netif_running(ndev)) { + dev_close(ndev); + netif_device_detach(ndev); + } + rtnl_unlock(); + } sky2_suspend(&pdev->dev); pci_wake_from_d3(pdev, device_may_wakeup(&pdev->dev)); pci_set_power_state(pdev, PCI_D3hot); -- cgit v1.2.3-59-g8ed1b From c46ab7e08c79be7400f6d59edbc6f26a91941c5a Mon Sep 17 00:00:00 2001 From: Johan Hovold Date: Thu, 17 Nov 2016 17:39:58 +0100 Subject: net: ethernet: ti: cpsw: fix bad register access in probe error path Make sure to keep the platform device runtime-resumed throughout probe to avoid accessing the CPSW registers in the error path (e.g. for deferred probe) with clocks disabled: Unhandled fault: external abort on non-linefetch (0x1008) at 0xd0872d08 ... [] (cpsw_ale_control_set) from [] (cpsw_ale_destroy+0x2c/0x44) [] (cpsw_ale_destroy) from [] (cpsw_probe+0xbd0/0x10c4) [] (cpsw_probe) from [] (platform_drv_probe+0x5c/0xc0) Fixes: df828598a755 ("netdev: driver: ethernet: Add TI CPSW driver") Signed-off-by: Johan Hovold Signed-off-by: David S. Miller --- drivers/net/ethernet/ti/cpsw.c | 11 +++++++---- 1 file changed, 7 insertions(+), 4 deletions(-) diff --git a/drivers/net/ethernet/ti/cpsw.c b/drivers/net/ethernet/ti/cpsw.c index c6cff3d2ff05..f60f8ab7c1e3 100644 --- a/drivers/net/ethernet/ti/cpsw.c +++ b/drivers/net/ethernet/ti/cpsw.c @@ -2641,13 +2641,12 @@ static int cpsw_probe(struct platform_device *pdev) goto clean_runtime_disable_ret; } cpsw->version = readl(&cpsw->regs->id_ver); - pm_runtime_put_sync(&pdev->dev); res = platform_get_resource(pdev, IORESOURCE_MEM, 1); cpsw->wr_regs = devm_ioremap_resource(&pdev->dev, res); if (IS_ERR(cpsw->wr_regs)) { ret = PTR_ERR(cpsw->wr_regs); - goto clean_runtime_disable_ret; + goto clean_pm_runtime_put_ret; } memset(&dma_params, 0, sizeof(dma_params)); @@ -2684,7 +2683,7 @@ static int cpsw_probe(struct platform_device *pdev) default: dev_err(priv->dev, "unknown version 0x%08x\n", cpsw->version); ret = -ENODEV; - goto clean_runtime_disable_ret; + goto clean_pm_runtime_put_ret; } for (i = 0; i < cpsw->data.slaves; i++) { struct cpsw_slave *slave = &cpsw->slaves[i]; @@ -2713,7 +2712,7 @@ static int cpsw_probe(struct platform_device *pdev) if (!cpsw->dma) { dev_err(priv->dev, "error initializing dma\n"); ret = -ENOMEM; - goto clean_runtime_disable_ret; + goto clean_pm_runtime_put_ret; } cpsw->txch[0] = cpdma_chan_create(cpsw->dma, 0, cpsw_tx_handler, 0); @@ -2815,12 +2814,16 @@ static int cpsw_probe(struct platform_device *pdev) } } + pm_runtime_put(&pdev->dev); + return 0; clean_ale_ret: cpsw_ale_destroy(cpsw->ale); clean_dma_ret: cpdma_ctlr_destroy(cpsw->dma); +clean_pm_runtime_put_ret: + pm_runtime_put_sync(&pdev->dev); clean_runtime_disable_ret: pm_runtime_disable(&pdev->dev); clean_ndev_ret: -- cgit v1.2.3-59-g8ed1b From 86e1d5adcef961eb383ce4eacbe0ef22f06e2045 Mon Sep 17 00:00:00 2001 From: Johan Hovold Date: Thu, 17 Nov 2016 17:39:59 +0100 Subject: net: ethernet: ti: cpsw: fix mdio device reference leak Make sure to drop the reference taken by of_find_device_by_node() when looking up an mdio device from a phy_id property during probe. Fixes: 549985ee9c72 ("cpsw: simplify the setup of the register pointers") Signed-off-by: Johan Hovold Signed-off-by: David S. Miller --- drivers/net/ethernet/ti/cpsw.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/net/ethernet/ti/cpsw.c b/drivers/net/ethernet/ti/cpsw.c index f60f8ab7c1e3..84c5d214557e 100644 --- a/drivers/net/ethernet/ti/cpsw.c +++ b/drivers/net/ethernet/ti/cpsw.c @@ -2397,6 +2397,7 @@ static int cpsw_probe_dt(struct cpsw_platform_data *data, } snprintf(slave_data->phy_id, sizeof(slave_data->phy_id), PHY_ID_FMT, mdio->name, phyid); + put_device(&mdio->dev); } else { dev_err(&pdev->dev, "No slave[%d] phy_id, phy-handle, or fixed-link property\n", -- cgit v1.2.3-59-g8ed1b From a4e32b0d0a26ba2f2ba1c65bd403d06ccc1df29c Mon Sep 17 00:00:00 2001 From: Johan Hovold Date: Thu, 17 Nov 2016 17:40:00 +0100 Subject: net: ethernet: ti: cpsw: fix deferred probe Make sure to deregister all child devices also on probe errors to avoid leaks and to fix probe deferral: cpsw 4a100000.ethernet: omap_device: omap_device_enable() called from invalid state 1 cpsw 4a100000.ethernet: use pm_runtime_put_sync_suspend() in driver? cpsw: probe of 4a100000.ethernet failed with error -22 Add generic helper to undo the effects of cpsw_probe_dt(), which will also be used in a follow-on patch to fix further leaks that have been introduced more recently. Note that the platform device is now runtime-resumed before registering any child devices in order to make sure that it is synchronously suspended after having deregistered the children in the error path. Fixes: 1fb19aa730e4 ("net: cpsw: Add parent<->child relation support between cpsw and mdio") Signed-off-by: Johan Hovold Signed-off-by: David S. Miller --- drivers/net/ethernet/ti/cpsw.c | 41 ++++++++++++++++++++++++----------------- 1 file changed, 24 insertions(+), 17 deletions(-) diff --git a/drivers/net/ethernet/ti/cpsw.c b/drivers/net/ethernet/ti/cpsw.c index 84c5d214557e..5d14abb06486 100644 --- a/drivers/net/ethernet/ti/cpsw.c +++ b/drivers/net/ethernet/ti/cpsw.c @@ -2441,6 +2441,11 @@ no_phy_slave: return 0; } +static void cpsw_remove_dt(struct platform_device *pdev) +{ + of_platform_depopulate(&pdev->dev); +} + static int cpsw_probe_dual_emac(struct cpsw_priv *priv) { struct cpsw_common *cpsw = priv->cpsw; @@ -2585,10 +2590,19 @@ static int cpsw_probe(struct platform_device *pdev) /* Select default pin state */ pinctrl_pm_select_default_state(&pdev->dev); + /* Need to enable clocks with runtime PM api to access module + * registers + */ + ret = pm_runtime_get_sync(&pdev->dev); + if (ret < 0) { + pm_runtime_put_noidle(&pdev->dev); + goto clean_runtime_disable_ret; + } + if (cpsw_probe_dt(&cpsw->data, pdev)) { dev_err(&pdev->dev, "cpsw: platform data missing\n"); ret = -ENODEV; - goto clean_runtime_disable_ret; + goto clean_dt_ret; } data = &cpsw->data; cpsw->rx_ch_num = 1; @@ -2609,7 +2623,7 @@ static int cpsw_probe(struct platform_device *pdev) GFP_KERNEL); if (!cpsw->slaves) { ret = -ENOMEM; - goto clean_runtime_disable_ret; + goto clean_dt_ret; } for (i = 0; i < data->slaves; i++) cpsw->slaves[i].slave_num = i; @@ -2621,7 +2635,7 @@ static int cpsw_probe(struct platform_device *pdev) if (IS_ERR(clk)) { dev_err(priv->dev, "fck is not found\n"); ret = -ENODEV; - goto clean_runtime_disable_ret; + goto clean_dt_ret; } cpsw->bus_freq_mhz = clk_get_rate(clk) / 1000000; @@ -2629,25 +2643,17 @@ static int cpsw_probe(struct platform_device *pdev) ss_regs = devm_ioremap_resource(&pdev->dev, ss_res); if (IS_ERR(ss_regs)) { ret = PTR_ERR(ss_regs); - goto clean_runtime_disable_ret; + goto clean_dt_ret; } cpsw->regs = ss_regs; - /* Need to enable clocks with runtime PM api to access module - * registers - */ - ret = pm_runtime_get_sync(&pdev->dev); - if (ret < 0) { - pm_runtime_put_noidle(&pdev->dev); - goto clean_runtime_disable_ret; - } cpsw->version = readl(&cpsw->regs->id_ver); res = platform_get_resource(pdev, IORESOURCE_MEM, 1); cpsw->wr_regs = devm_ioremap_resource(&pdev->dev, res); if (IS_ERR(cpsw->wr_regs)) { ret = PTR_ERR(cpsw->wr_regs); - goto clean_pm_runtime_put_ret; + goto clean_dt_ret; } memset(&dma_params, 0, sizeof(dma_params)); @@ -2684,7 +2690,7 @@ static int cpsw_probe(struct platform_device *pdev) default: dev_err(priv->dev, "unknown version 0x%08x\n", cpsw->version); ret = -ENODEV; - goto clean_pm_runtime_put_ret; + goto clean_dt_ret; } for (i = 0; i < cpsw->data.slaves; i++) { struct cpsw_slave *slave = &cpsw->slaves[i]; @@ -2713,7 +2719,7 @@ static int cpsw_probe(struct platform_device *pdev) if (!cpsw->dma) { dev_err(priv->dev, "error initializing dma\n"); ret = -ENOMEM; - goto clean_pm_runtime_put_ret; + goto clean_dt_ret; } cpsw->txch[0] = cpdma_chan_create(cpsw->dma, 0, cpsw_tx_handler, 0); @@ -2823,7 +2829,8 @@ clean_ale_ret: cpsw_ale_destroy(cpsw->ale); clean_dma_ret: cpdma_ctlr_destroy(cpsw->dma); -clean_pm_runtime_put_ret: +clean_dt_ret: + cpsw_remove_dt(pdev); pm_runtime_put_sync(&pdev->dev); clean_runtime_disable_ret: pm_runtime_disable(&pdev->dev); @@ -2850,7 +2857,7 @@ static int cpsw_remove(struct platform_device *pdev) cpsw_ale_destroy(cpsw->ale); cpdma_ctlr_destroy(cpsw->dma); - of_platform_depopulate(&pdev->dev); + cpsw_remove_dt(pdev); pm_runtime_put_sync(&pdev->dev); pm_runtime_disable(&pdev->dev); if (cpsw->data.dual_emac) -- cgit v1.2.3-59-g8ed1b From 8cbcc466fd4abd38a14b9d9b76c63a2cb7006554 Mon Sep 17 00:00:00 2001 From: Johan Hovold Date: Thu, 17 Nov 2016 17:40:01 +0100 Subject: net: ethernet: ti: cpsw: fix of_node and phydev leaks Make sure to drop references taken and deregister devices registered during probe on probe errors (including deferred probe) and driver unbind. Specifically, PHY of-node references were never released and fixed-link PHY devices were never deregistered. Fixes: 9e42f715264f ("drivers: net: cpsw: add phy-handle parsing") Fixes: 1f71e8c96fc6 ("drivers: net: cpsw: Add support for fixed-link PHY") Signed-off-by: Johan Hovold Signed-off-by: David S. Miller --- drivers/net/ethernet/ti/cpsw.c | 35 +++++++++++++++++++++++++++++++++++ 1 file changed, 35 insertions(+) diff --git a/drivers/net/ethernet/ti/cpsw.c b/drivers/net/ethernet/ti/cpsw.c index 5d14abb06486..c3b78bc4fe58 100644 --- a/drivers/net/ethernet/ti/cpsw.c +++ b/drivers/net/ethernet/ti/cpsw.c @@ -2443,6 +2443,41 @@ no_phy_slave: static void cpsw_remove_dt(struct platform_device *pdev) { + struct net_device *ndev = platform_get_drvdata(pdev); + struct cpsw_common *cpsw = ndev_to_cpsw(ndev); + struct cpsw_platform_data *data = &cpsw->data; + struct device_node *node = pdev->dev.of_node; + struct device_node *slave_node; + int i = 0; + + for_each_available_child_of_node(node, slave_node) { + struct cpsw_slave_data *slave_data = &data->slave_data[i]; + + if (strcmp(slave_node->name, "slave")) + continue; + + if (of_phy_is_fixed_link(slave_node)) { + struct phy_device *phydev; + + phydev = of_phy_find_device(slave_node); + if (phydev) { + fixed_phy_unregister(phydev); + /* Put references taken by + * of_phy_find_device() and + * of_phy_register_fixed_link(). + */ + phy_device_free(phydev); + phy_device_free(phydev); + } + } + + of_node_put(slave_data->phy_node); + + i++; + if (i == data->slaves) + break; + } + of_platform_depopulate(&pdev->dev); } -- cgit v1.2.3-59-g8ed1b From a7fe9d466f6a33558a38c7ca9d58bcc83512d577 Mon Sep 17 00:00:00 2001 From: Johan Hovold Date: Thu, 17 Nov 2016 17:40:02 +0100 Subject: net: ethernet: ti: cpsw: fix secondary-emac probe error path Make sure to deregister the primary device in case the secondary emac fails to probe. kernel BUG at /home/johan/work/omicron/src/linux/net/core/dev.c:7743! ... [] (free_netdev) from [] (cpsw_probe+0x9cc/0xe50) [] (cpsw_probe) from [] (platform_drv_probe+0x5c/0xc0) Fixes: d9ba8f9e6298 ("driver: net: ethernet: cpsw: dual emac interface implementation") Signed-off-by: Johan Hovold Signed-off-by: David S. Miller --- drivers/net/ethernet/ti/cpsw.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/drivers/net/ethernet/ti/cpsw.c b/drivers/net/ethernet/ti/cpsw.c index c3b78bc4fe58..11b2daef3158 100644 --- a/drivers/net/ethernet/ti/cpsw.c +++ b/drivers/net/ethernet/ti/cpsw.c @@ -2852,7 +2852,7 @@ static int cpsw_probe(struct platform_device *pdev) ret = cpsw_probe_dual_emac(priv); if (ret) { cpsw_err(priv, probe, "error probe slave 2 emac interface\n"); - goto clean_ale_ret; + goto clean_unregister_netdev_ret; } } @@ -2860,6 +2860,8 @@ static int cpsw_probe(struct platform_device *pdev) return 0; +clean_unregister_netdev_ret: + unregister_netdev(ndev); clean_ale_ret: cpsw_ale_destroy(cpsw->ale); clean_dma_ret: -- cgit v1.2.3-59-g8ed1b From 3420ea88509f9d585b39f36e737022faf0286d9a Mon Sep 17 00:00:00 2001 From: Johan Hovold Date: Thu, 17 Nov 2016 17:40:03 +0100 Subject: net: ethernet: ti: cpsw: add missing sanity check Make sure to check for allocation failures before dereferencing a NULL-pointer during probe. Fixes: 649a1688c960 ("net: ethernet: ti: cpsw: create common struct to hold shared driver data") Signed-off-by: Johan Hovold Signed-off-by: David S. Miller --- drivers/net/ethernet/ti/cpsw.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/net/ethernet/ti/cpsw.c b/drivers/net/ethernet/ti/cpsw.c index 11b2daef3158..1387299030e4 100644 --- a/drivers/net/ethernet/ti/cpsw.c +++ b/drivers/net/ethernet/ti/cpsw.c @@ -2588,6 +2588,9 @@ static int cpsw_probe(struct platform_device *pdev) int irq; cpsw = devm_kzalloc(&pdev->dev, sizeof(struct cpsw_common), GFP_KERNEL); + if (!cpsw) + return -ENOMEM; + cpsw->dev = &pdev->dev; ndev = alloc_etherdev_mq(sizeof(struct cpsw_priv), CPSW_MAX_QUEUES); -- cgit v1.2.3-59-g8ed1b From 23a09873221c02106cf767a86743a55873f0d05b Mon Sep 17 00:00:00 2001 From: Johan Hovold Date: Thu, 17 Nov 2016 17:40:04 +0100 Subject: net: ethernet: ti: cpsw: fix fixed-link phy probe deferral Make sure to propagate errors from of_phy_register_fixed_link() which can fail with -EPROBE_DEFER. Fixes: 1f71e8c96fc6 ("drivers: net: cpsw: Add support for fixed-link PHY") Signed-off-by: Johan Hovold Signed-off-by: David S. Miller --- drivers/net/ethernet/ti/cpsw.c | 12 +++++++----- 1 file changed, 7 insertions(+), 5 deletions(-) diff --git a/drivers/net/ethernet/ti/cpsw.c b/drivers/net/ethernet/ti/cpsw.c index 1387299030e4..58947aae31c7 100644 --- a/drivers/net/ethernet/ti/cpsw.c +++ b/drivers/net/ethernet/ti/cpsw.c @@ -2375,8 +2375,11 @@ static int cpsw_probe_dt(struct cpsw_platform_data *data, * to the PHY is the Ethernet MAC DT node. */ ret = of_phy_register_fixed_link(slave_node); - if (ret) + if (ret) { + if (ret != -EPROBE_DEFER) + dev_err(&pdev->dev, "failed to register fixed-link phy: %d\n", ret); return ret; + } slave_data->phy_node = of_node_get(slave_node); } else if (parp) { u32 phyid; @@ -2637,11 +2640,10 @@ static int cpsw_probe(struct platform_device *pdev) goto clean_runtime_disable_ret; } - if (cpsw_probe_dt(&cpsw->data, pdev)) { - dev_err(&pdev->dev, "cpsw: platform data missing\n"); - ret = -ENODEV; + ret = cpsw_probe_dt(&cpsw->data, pdev); + if (ret) goto clean_dt_ret; - } + data = &cpsw->data; cpsw->rx_ch_num = 1; cpsw->tx_ch_num = 1; -- cgit v1.2.3-59-g8ed1b From 06a77b07e3b44aea2b3c0e64de420ea2cfdcbaa9 Mon Sep 17 00:00:00 2001 From: WANG Cong Date: Thu, 17 Nov 2016 15:55:26 -0800 Subject: af_unix: conditionally use freezable blocking calls in read Commit 2b15af6f95 ("af_unix: use freezable blocking calls in read") converts schedule_timeout() to its freezable version, it was probably correct at that time, but later, commit 2b514574f7e8 ("net: af_unix: implement splice for stream af_unix sockets") breaks the strong requirement for a freezable sleep, according to commit 0f9548ca1091: We shouldn't try_to_freeze if locks are held. Holding a lock can cause a deadlock if the lock is later acquired in the suspend or hibernate path (e.g. by dpm). Holding a lock can also cause a deadlock in the case of cgroup_freezer if a lock is held inside a frozen cgroup that is later acquired by a process outside that group. The pipe_lock is still held at that point. So use freezable version only for the recvmsg call path, avoid impact for Android. Fixes: 2b514574f7e8 ("net: af_unix: implement splice for stream af_unix sockets") Reported-by: Dmitry Vyukov Cc: Tejun Heo Cc: Colin Cross Cc: Rafael J. Wysocki Cc: Hannes Frederic Sowa Signed-off-by: Cong Wang Signed-off-by: David S. Miller --- net/unix/af_unix.c | 17 +++++++++++------ 1 file changed, 11 insertions(+), 6 deletions(-) diff --git a/net/unix/af_unix.c b/net/unix/af_unix.c index 5d1c14a2f268..2358f2690ec5 100644 --- a/net/unix/af_unix.c +++ b/net/unix/af_unix.c @@ -2199,7 +2199,8 @@ out: * Sleep until more data has arrived. But check for races.. */ static long unix_stream_data_wait(struct sock *sk, long timeo, - struct sk_buff *last, unsigned int last_len) + struct sk_buff *last, unsigned int last_len, + bool freezable) { struct sk_buff *tail; DEFINE_WAIT(wait); @@ -2220,7 +2221,10 @@ static long unix_stream_data_wait(struct sock *sk, long timeo, sk_set_bit(SOCKWQ_ASYNC_WAITDATA, sk); unix_state_unlock(sk); - timeo = freezable_schedule_timeout(timeo); + if (freezable) + timeo = freezable_schedule_timeout(timeo); + else + timeo = schedule_timeout(timeo); unix_state_lock(sk); if (sock_flag(sk, SOCK_DEAD)) @@ -2250,7 +2254,8 @@ struct unix_stream_read_state { unsigned int splice_flags; }; -static int unix_stream_read_generic(struct unix_stream_read_state *state) +static int unix_stream_read_generic(struct unix_stream_read_state *state, + bool freezable) { struct scm_cookie scm; struct socket *sock = state->socket; @@ -2330,7 +2335,7 @@ again: mutex_unlock(&u->iolock); timeo = unix_stream_data_wait(sk, timeo, last, - last_len); + last_len, freezable); if (signal_pending(current)) { err = sock_intr_errno(timeo); @@ -2472,7 +2477,7 @@ static int unix_stream_recvmsg(struct socket *sock, struct msghdr *msg, .flags = flags }; - return unix_stream_read_generic(&state); + return unix_stream_read_generic(&state, true); } static int unix_stream_splice_actor(struct sk_buff *skb, @@ -2503,7 +2508,7 @@ static ssize_t unix_stream_splice_read(struct socket *sock, loff_t *ppos, flags & SPLICE_F_NONBLOCK) state.flags = MSG_DONTWAIT; - return unix_stream_read_generic(&state); + return unix_stream_read_generic(&state, false); } static int unix_shutdown(struct socket *sock, int mode) -- cgit v1.2.3-59-g8ed1b From 0f5258cd91e9d78a1ee30696314bec3c33321a93 Mon Sep 17 00:00:00 2001 From: Stefan Hajnoczi Date: Fri, 18 Nov 2016 09:41:46 +0000 Subject: netns: fix get_net_ns_by_fd(int pid) typo The argument to get_net_ns_by_fd() is a /proc/$PID/ns/net file descriptor not a pid. Fix the typo. Signed-off-by: Stefan Hajnoczi Acked-by: Rami Rosen Signed-off-by: David S. Miller --- include/net/net_namespace.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/include/net/net_namespace.h b/include/net/net_namespace.h index fc4f757107df..0940598c002f 100644 --- a/include/net/net_namespace.h +++ b/include/net/net_namespace.h @@ -170,7 +170,7 @@ static inline struct net *copy_net_ns(unsigned long flags, extern struct list_head net_namespace_list; struct net *get_net_ns_by_pid(pid_t pid); -struct net *get_net_ns_by_fd(int pid); +struct net *get_net_ns_by_fd(int fd); #ifdef CONFIG_SYSCTL void ipx_register_sysctl(void); -- cgit v1.2.3-59-g8ed1b From f82ef3e10a870acc19fa04f80ef5877eaa26f41e Mon Sep 17 00:00:00 2001 From: Sabrina Dubroca Date: Fri, 18 Nov 2016 15:50:39 +0100 Subject: rtnetlink: fix FDB size computation Add missing NDA_VLAN attribute's size. Fixes: 1e53d5bb8878 ("net: Pass VLAN ID to rtnl_fdb_notify.") Signed-off-by: Sabrina Dubroca Signed-off-by: David S. Miller --- net/core/rtnetlink.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c index a6529c55ffb7..2b9d7d08ed4d 100644 --- a/net/core/rtnetlink.c +++ b/net/core/rtnetlink.c @@ -2852,7 +2852,10 @@ nla_put_failure: static inline size_t rtnl_fdb_nlmsg_size(void) { - return NLMSG_ALIGN(sizeof(struct ndmsg)) + nla_total_size(ETH_ALEN); + return NLMSG_ALIGN(sizeof(struct ndmsg)) + + nla_total_size(ETH_ALEN) + /* NDA_LLADDR */ + nla_total_size(sizeof(u16)) + /* NDA_VLAN */ + 0; } static void rtnl_fdb_notify(struct net_device *dev, u8 *addr, u16 vid, int type, -- cgit v1.2.3-59-g8ed1b From c88c545bf3202ca2cdb45df93eb40e3bcdbb3742 Mon Sep 17 00:00:00 2001 From: Dave Kleikamp Date: Fri, 28 Oct 2016 10:12:40 -0700 Subject: sparc64: Add FORCE_MAX_ZONEORDER and default to 13 This change allows ATU (new IOMMU) in SPARC systems to request large (32M) contiguous memory during boot for creating IOTSB backing store. Signed-off-by: Dave Kleikamp Signed-off-by: Tushar Dave Signed-off-by: David S. Miller --- arch/sparc/Kconfig | 18 ++++++++++++++++++ 1 file changed, 18 insertions(+) diff --git a/arch/sparc/Kconfig b/arch/sparc/Kconfig index b23c76b42d6e..5202eb4ba2db 100644 --- a/arch/sparc/Kconfig +++ b/arch/sparc/Kconfig @@ -89,6 +89,10 @@ config ARCH_DEFCONFIG config ARCH_PROC_KCORE_TEXT def_bool y +config ARCH_ATU + bool + default y if SPARC64 + config IOMMU_HELPER bool default y if SPARC64 @@ -304,6 +308,20 @@ config ARCH_SPARSEMEM_ENABLE config ARCH_SPARSEMEM_DEFAULT def_bool y if SPARC64 +config FORCE_MAX_ZONEORDER + int "Maximum zone order" + default "13" + help + The kernel memory allocator divides physically contiguous memory + blocks into "zones", where each zone is a power of two number of + pages. This option selects the largest power of two that the kernel + keeps in the memory allocator. If you need to allocate very large + blocks of physically contiguous memory, then you may need to + increase this value. + + This config option is actually maximum order plus one. For example, + a value of 13 means that the largest free memory block is 2^12 pages. + source "mm/Kconfig" if SPARC64 -- cgit v1.2.3-59-g8ed1b From f0248c1524fae654e9746e6843b9657fb3917387 Mon Sep 17 00:00:00 2001 From: Tushar Dave Date: Fri, 28 Oct 2016 10:12:41 -0700 Subject: sparc64: Add ATU (new IOMMU) support ATU (Address Translation Unit) is a new IOMMU in SPARC supported with Hypervisor IOMMU v2 APIs. Current SPARC IOMMU supports only 32bit address ranges and one TSB per PCIe root complex that has a 2GB per root complex DVMA space limit. The limit has become a scalability bottleneck nowadays that a typical 10G/40G NIC can consume 300MB-500MB DVMA space per instance. When DVMA resource is exhausted, devices will not be usable since the driver can't allocate DVMA. ATU removes bottleneck by allowing guest os to create IOTSB of size 32G (or more) with 64bit address ranges available in ATU HW. 32G is more than enough DVMA space to be shared by all PCIe devices under root complex contrast to 2G space provided by legacy IOMMU. ATU allows PCIe devices to use 64bit DMA addressing. Devices which choose to use 32bit DMA mask will continue to work with the existing legacy IOMMU. Signed-off-by: Tushar Dave Reviewed-by: chris hyser Acked-by: Sowmini Varadhan Signed-off-by: David S. Miller --- arch/sparc/include/asm/hypervisor.h | 337 ++++++++++++++++++++++++++++++++++++ arch/sparc/include/asm/iommu_64.h | 26 +++ arch/sparc/kernel/hvapi.c | 1 + arch/sparc/kernel/pci_sun4v.c | 140 +++++++++++++++ arch/sparc/kernel/pci_sun4v.h | 7 + arch/sparc/kernel/pci_sun4v_asm.S | 18 ++ 6 files changed, 529 insertions(+) diff --git a/arch/sparc/include/asm/hypervisor.h b/arch/sparc/include/asm/hypervisor.h index 666d5ba230d2..7b15df8be008 100644 --- a/arch/sparc/include/asm/hypervisor.h +++ b/arch/sparc/include/asm/hypervisor.h @@ -2335,6 +2335,342 @@ unsigned long sun4v_vintr_set_target(unsigned long dev_handle, */ #define HV_FAST_PCI_MSG_SETVALID 0xd3 +/* PCI IOMMU v2 definitions and services + * + * While the PCI IO definitions above is valid IOMMU v2 adds new PCI IO + * definitions and services. + * + * CTE Clump Table Entry. First level table entry in the ATU. + * + * pci_device_list + * A 32-bit aligned list of pci_devices. + * + * pci_device_listp + * real address of a pci_device_list. 32-bit aligned. + * + * iotte IOMMU translation table entry. + * + * iotte_attributes + * IO Attributes for IOMMU v2 mappings. In addition to + * read, write IOMMU v2 supports relax ordering + * + * io_page_list A 64-bit aligned list of real addresses. Each real + * address in an io_page_list must be properly aligned + * to the pagesize of the given IOTSB. + * + * io_page_list_p Real address of an io_page_list, 64-bit aligned. + * + * IOTSB IO Translation Storage Buffer. An aligned table of + * IOTTEs. Each IOTSB has a pagesize, table size, and + * virtual address associated with it that must match + * a pagesize and table size supported by the un-derlying + * hardware implementation. The alignment requirements + * for an IOTSB depend on the pagesize used for that IOTSB. + * Each IOTTE in an IOTSB maps one pagesize-sized page. + * The size of the IOTSB dictates how large of a virtual + * address space the IOTSB is capable of mapping. + * + * iotsb_handle An opaque identifier for an IOTSB. A devhandle plus + * iotsb_handle represents a binding of an IOTSB to a + * PCI root complex. + * + * iotsb_index Zero-based IOTTE number within an IOTSB. + */ + +/* pci_iotsb_conf() + * TRAP: HV_FAST_TRAP + * FUNCTION: HV_FAST_PCI_IOTSB_CONF + * ARG0: devhandle + * ARG1: r_addr + * ARG2: size + * ARG3: pagesize + * ARG4: iova + * RET0: status + * RET1: iotsb_handle + * ERRORS: EINVAL Invalid devhandle, size, iova, or pagesize + * EBADALIGN r_addr is not properly aligned + * ENORADDR r_addr is not a valid real address + * ETOOMANY No further IOTSBs may be configured + * EBUSY Duplicate devhandle, raddir, iova combination + * + * Create an IOTSB suitable for the PCI root complex identified by devhandle, + * for the DMA virtual address defined by the argument iova. + * + * r_addr is the properly aligned base address of the IOTSB and size is the + * IOTSB (table) size in bytes.The IOTSB is required to be zeroed prior to + * being configured. If it contains any values other than zeros then the + * behavior is undefined. + * + * pagesize is the size of each page in the IOTSB. Note that the combination of + * size (table size) and pagesize must be valid. + * + * virt is the DMA virtual address this IOTSB will map. + * + * If successful, the opaque 64-bit handle iotsb_handle is returned in ret1. + * Once configured, privileged access to the IOTSB memory is prohibited and + * creates undefined behavior. The only permitted access is indirect via these + * services. + */ +#define HV_FAST_PCI_IOTSB_CONF 0x190 + +/* pci_iotsb_info() + * TRAP: HV_FAST_TRAP + * FUNCTION: HV_FAST_PCI_IOTSB_INFO + * ARG0: devhandle + * ARG1: iotsb_handle + * RET0: status + * RET1: r_addr + * RET2: size + * RET3: pagesize + * RET4: iova + * RET5: #bound + * ERRORS: EINVAL Invalid devhandle or iotsb_handle + * + * This service returns configuration information about an IOTSB previously + * created with pci_iotsb_conf. + * + * iotsb_handle value 0 may be used with this service to inquire about the + * legacy IOTSB that may or may not exist. If the service succeeds, the return + * values describe the legacy IOTSB and I/O virtual addresses mapped by that + * table. However, the table base address r_addr may contain the value -1 which + * indicates a memory range that cannot be accessed or be reclaimed. + * + * The return value #bound contains the number of PCI devices that iotsb_handle + * is currently bound to. + */ +#define HV_FAST_PCI_IOTSB_INFO 0x191 + +/* pci_iotsb_unconf() + * TRAP: HV_FAST_TRAP + * FUNCTION: HV_FAST_PCI_IOTSB_UNCONF + * ARG0: devhandle + * ARG1: iotsb_handle + * RET0: status + * ERRORS: EINVAL Invalid devhandle or iotsb_handle + * EBUSY The IOTSB is bound and may not be unconfigured + * + * This service unconfigures the IOTSB identified by the devhandle and + * iotsb_handle arguments, previously created with pci_iotsb_conf. + * The IOTSB must not be currently bound to any device or the service will fail + * + * If the call succeeds, iotsb_handle is no longer valid. + */ +#define HV_FAST_PCI_IOTSB_UNCONF 0x192 + +/* pci_iotsb_bind() + * TRAP: HV_FAST_TRAP + * FUNCTION: HV_FAST_PCI_IOTSB_BIND + * ARG0: devhandle + * ARG1: iotsb_handle + * ARG2: pci_device + * RET0: status + * ERRORS: EINVAL Invalid devhandle, iotsb_handle, or pci_device + * EBUSY A PCI function is already bound to an IOTSB at the same + * address range as specified by devhandle, iotsb_handle. + * + * This service binds the PCI function specified by the argument pci_device to + * the IOTSB specified by the arguments devhandle and iotsb_handle. + * + * The PCI device function is bound to the specified IOTSB with the IOVA range + * specified when the IOTSB was configured via pci_iotsb_conf. If the function + * is already bound then it is unbound first. + */ +#define HV_FAST_PCI_IOTSB_BIND 0x193 + +/* pci_iotsb_unbind() + * TRAP: HV_FAST_TRAP + * FUNCTION: HV_FAST_PCI_IOTSB_UNBIND + * ARG0: devhandle + * ARG1: iotsb_handle + * ARG2: pci_device + * RET0: status + * ERRORS: EINVAL Invalid devhandle, iotsb_handle, or pci_device + * ENOMAP The PCI function was not bound to the specified IOTSB + * + * This service unbinds the PCI device specified by the argument pci_device + * from the IOTSB identified * by the arguments devhandle and iotsb_handle. + * + * If the PCI device is not bound to the specified IOTSB then this service will + * fail with status ENOMAP + */ +#define HV_FAST_PCI_IOTSB_UNBIND 0x194 + +/* pci_iotsb_get_binding() + * TRAP: HV_FAST_TRAP + * FUNCTION: HV_FAST_PCI_IOTSB_GET_BINDING + * ARG0: devhandle + * ARG1: iotsb_handle + * ARG2: iova + * RET0: status + * RET1: iotsb_handle + * ERRORS: EINVAL Invalid devhandle, pci_device, or iova + * ENOMAP The PCI function is not bound to an IOTSB at iova + * + * This service returns the IOTSB binding, iotsb_handle, for a given pci_device + * and DMA virtual address, iova. + * + * iova must be the base address of a DMA virtual address range as defined by + * the iommu-address-ranges property in the root complex device node defined + * by the argument devhandle. + */ +#define HV_FAST_PCI_IOTSB_GET_BINDING 0x195 + +/* pci_iotsb_map() + * TRAP: HV_FAST_TRAP + * FUNCTION: HV_FAST_PCI_IOTSB_MAP + * ARG0: devhandle + * ARG1: iotsb_handle + * ARG2: index_count + * ARG3: iotte_attributes + * ARG4: io_page_list_p + * RET0: status + * RET1: #mapped + * ERRORS: EINVAL Invalid devhandle, iotsb_handle, #iottes, + * iotsb_index or iotte_attributes + * EBADALIGN Improperly aligned io_page_list_p or I/O page + * address in the I/O page list. + * ENORADDR Invalid io_page_list_p or I/O page address in + * the I/O page list. + * + * This service creates and flushes mappings in the IOTSB defined by the + * arguments devhandle, iotsb. + * + * The index_count argument consists of two fields. Bits 63:48 contain #iotte + * and bits 47:0 contain iotsb_index + * + * The first mapping is created in the IOTSB index specified by iotsb_index. + * Subsequent mappings are created at iotsb_index+1 and so on. + * + * The attributes of each mapping are defined by the argument iotte_attributes. + * + * The io_page_list_p specifies the real address of the 64-bit-aligned list of + * #iottes I/O page addresses. Each page address must be a properly aligned + * real address of a page to be mapped in the IOTSB. The first entry in the I/O + * page list contains the real address of the first page, the 2nd entry for the + * 2nd page, and so on. + * + * #iottes must be greater than zero. + * + * The return value #mapped is the actual number of mappings created, which may + * be less than or equal to the argument #iottes. If the function returns + * successfully with a #mapped value less than the requested #iottes then the + * caller should continue to invoke the service with updated iotsb_index, + * #iottes, and io_page_list_p arguments until all pages are mapped. + * + * This service must not be used to demap a mapping. In other words, all + * mappings must be valid and have one or both of the RW attribute bits set. + * + * Note: + * It is implementation-defined whether I/O page real address validity checking + * is done at time mappings are established or deferred until they are + * accessed. + */ +#define HV_FAST_PCI_IOTSB_MAP 0x196 + +/* pci_iotsb_map_one() + * TRAP: HV_FAST_TRAP + * FUNCTION: HV_FAST_PCI_IOTSB_MAP_ONE + * ARG0: devhandle + * ARG1: iotsb_handle + * ARG2: iotsb_index + * ARG3: iotte_attributes + * ARG4: r_addr + * RET0: status + * ERRORS: EINVAL Invalid devhandle,iotsb_handle, iotsb_index + * or iotte_attributes + * EBADALIGN Improperly aligned r_addr + * ENORADDR Invalid r_addr + * + * This service creates and flushes a single mapping in the IOTSB defined by the + * arguments devhandle, iotsb. + * + * The mapping for the page at r_addr is created at the IOTSB index specified by + * iotsb_index with the attributes iotte_attributes. + * + * This service must not be used to demap a mapping. In other words, the mapping + * must be valid and have one or both of the RW attribute bits set. + * + * Note: + * It is implementation-defined whether I/O page real address validity checking + * is done at time mappings are established or deferred until they are + * accessed. + */ +#define HV_FAST_PCI_IOTSB_MAP_ONE 0x197 + +/* pci_iotsb_demap() + * TRAP: HV_FAST_TRAP + * FUNCTION: HV_FAST_PCI_IOTSB_DEMAP + * ARG0: devhandle + * ARG1: iotsb_handle + * ARG2: iotsb_index + * ARG3: #iottes + * RET0: status + * RET1: #unmapped + * ERRORS: EINVAL Invalid devhandle, iotsb_handle, iotsb_index or #iottes + * + * This service unmaps and flushes up to #iottes mappings starting at index + * iotsb_index from the IOTSB defined by the arguments devhandle, iotsb. + * + * #iottes must be greater than zero. + * + * The actual number of IOTTEs unmapped is returned in #unmapped and may be less + * than or equal to the requested number of IOTTEs, #iottes. + * + * If #unmapped is less than #iottes, the caller should continue to invoke this + * service with updated iotsb_index and #iottes arguments until all pages are + * demapped. + */ +#define HV_FAST_PCI_IOTSB_DEMAP 0x198 + +/* pci_iotsb_getmap() + * TRAP: HV_FAST_TRAP + * FUNCTION: HV_FAST_PCI_IOTSB_GETMAP + * ARG0: devhandle + * ARG1: iotsb_handle + * ARG2: iotsb_index + * RET0: status + * RET1: r_addr + * RET2: iotte_attributes + * ERRORS: EINVAL Invalid devhandle, iotsb_handle, or iotsb_index + * ENOMAP No mapping was found + * + * This service returns the mapping specified by index iotsb_index from the + * IOTSB defined by the arguments devhandle, iotsb. + * + * Upon success, the real address of the mapping shall be returned in + * r_addr and thethe IOTTE mapping attributes shall be returned in + * iotte_attributes. + * + * The return value iotte_attributes may not include optional features used in + * the call to create the mapping. + */ +#define HV_FAST_PCI_IOTSB_GETMAP 0x199 + +/* pci_iotsb_sync_mappings() + * TRAP: HV_FAST_TRAP + * FUNCTION: HV_FAST_PCI_IOTSB_SYNC_MAPPINGS + * ARG0: devhandle + * ARG1: iotsb_handle + * ARG2: iotsb_index + * ARG3: #iottes + * RET0: status + * RET1: #synced + * ERROS: EINVAL Invalid devhandle, iotsb_handle, iotsb_index, or #iottes + * + * This service synchronizes #iottes mappings starting at index iotsb_index in + * the IOTSB defined by the arguments devhandle, iotsb. + * + * #iottes must be greater than zero. + * + * The actual number of IOTTEs synchronized is returned in #synced, which may + * be less than or equal to the requested number, #iottes. + * + * Upon a successful return, #synced is less than #iottes, the caller should + * continue to invoke this service with updated iotsb_index and #iottes + * arguments until all pages are synchronized. + */ +#define HV_FAST_PCI_IOTSB_SYNC_MAPPINGS 0x19a + /* Logical Domain Channel services. */ #define LDC_CHANNEL_DOWN 0 @@ -2993,6 +3329,7 @@ unsigned long sun4v_m7_set_perfreg(unsigned long reg_num, #define HV_GRP_SDIO 0x0108 #define HV_GRP_SDIO_ERR 0x0109 #define HV_GRP_REBOOT_DATA 0x0110 +#define HV_GRP_ATU 0x0111 #define HV_GRP_M7_PERF 0x0114 #define HV_GRP_NIAG_PERF 0x0200 #define HV_GRP_FIRE_PERF 0x0201 diff --git a/arch/sparc/include/asm/iommu_64.h b/arch/sparc/include/asm/iommu_64.h index cd0d69fa7592..93daa5965b3d 100644 --- a/arch/sparc/include/asm/iommu_64.h +++ b/arch/sparc/include/asm/iommu_64.h @@ -24,8 +24,34 @@ struct iommu_arena { unsigned int limit; }; +#define ATU_64_SPACE_SIZE 0x800000000 /* 32G */ + +/* Data structures for SPARC ATU architecture */ +struct atu_iotsb { + void *table; /* IOTSB table base virtual addr*/ + u64 ra; /* IOTSB table real addr */ + u64 dvma_size; /* ranges[3].size or OS slected 32G size */ + u64 dvma_base; /* ranges[3].base */ + u64 table_size; /* IOTSB table size */ + u64 page_size; /* IO PAGE size for IOTSB */ + u32 iotsb_num; /* tsbnum is same as iotsb_handle */ +}; + +struct atu_ranges { + u64 base; + u64 size; +}; + +struct atu { + struct atu_ranges *ranges; + struct atu_iotsb *iotsb; + u64 base; + u64 size; +}; + struct iommu { struct iommu_map_table tbl; + struct atu *atu; spinlock_t lock; u32 dma_addr_mask; iopte_t *page_table; diff --git a/arch/sparc/kernel/hvapi.c b/arch/sparc/kernel/hvapi.c index 662500fa555f..267731234ce8 100644 --- a/arch/sparc/kernel/hvapi.c +++ b/arch/sparc/kernel/hvapi.c @@ -39,6 +39,7 @@ static struct api_info api_table[] = { { .group = HV_GRP_SDIO, }, { .group = HV_GRP_SDIO_ERR, }, { .group = HV_GRP_REBOOT_DATA, }, + { .group = HV_GRP_ATU, .flags = FLAG_PRE_API }, { .group = HV_GRP_NIAG_PERF, .flags = FLAG_PRE_API }, { .group = HV_GRP_FIRE_PERF, }, { .group = HV_GRP_N2_CPU, }, diff --git a/arch/sparc/kernel/pci_sun4v.c b/arch/sparc/kernel/pci_sun4v.c index db57d8acdc01..2afb86c73da9 100644 --- a/arch/sparc/kernel/pci_sun4v.c +++ b/arch/sparc/kernel/pci_sun4v.c @@ -44,6 +44,9 @@ static struct vpci_version vpci_versions[] = { { .major = 1, .minor = 1 }, }; +static unsigned long vatu_major = 1; +static unsigned long vatu_minor = 1; + #define PGLIST_NENTS (PAGE_SIZE / sizeof(u64)) struct iommu_batch { @@ -581,6 +584,107 @@ static unsigned long probe_existing_entries(struct pci_pbm_info *pbm, return cnt; } +static int pci_sun4v_atu_alloc_iotsb(struct pci_pbm_info *pbm) +{ + struct atu *atu = pbm->iommu->atu; + struct atu_iotsb *iotsb; + void *table; + u64 table_size; + u64 iotsb_num; + unsigned long order; + unsigned long err; + + iotsb = kzalloc(sizeof(*iotsb), GFP_KERNEL); + if (!iotsb) { + err = -ENOMEM; + goto out_err; + } + atu->iotsb = iotsb; + + /* calculate size of IOTSB */ + table_size = (atu->size / IO_PAGE_SIZE) * 8; + order = get_order(table_size); + table = (void *)__get_free_pages(GFP_KERNEL | __GFP_ZERO, order); + if (!table) { + err = -ENOMEM; + goto table_failed; + } + iotsb->table = table; + iotsb->ra = __pa(table); + iotsb->dvma_size = atu->size; + iotsb->dvma_base = atu->base; + iotsb->table_size = table_size; + iotsb->page_size = IO_PAGE_SIZE; + + /* configure and register IOTSB with HV */ + err = pci_sun4v_iotsb_conf(pbm->devhandle, + iotsb->ra, + iotsb->table_size, + iotsb->page_size, + iotsb->dvma_base, + &iotsb_num); + if (err) { + pr_err(PFX "pci_iotsb_conf failed error: %ld\n", err); + goto iotsb_conf_failed; + } + iotsb->iotsb_num = iotsb_num; + + return 0; + +iotsb_conf_failed: + free_pages((unsigned long)table, order); +table_failed: + kfree(iotsb); +out_err: + return err; +} + +static int pci_sun4v_atu_init(struct pci_pbm_info *pbm) +{ + struct atu *atu = pbm->iommu->atu; + unsigned long err; + const u64 *ranges; + const u32 *page_size; + int len; + + ranges = of_get_property(pbm->op->dev.of_node, "iommu-address-ranges", + &len); + if (!ranges) { + pr_err(PFX "No iommu-address-ranges\n"); + return -EINVAL; + } + + page_size = of_get_property(pbm->op->dev.of_node, "iommu-pagesizes", + NULL); + if (!page_size) { + pr_err(PFX "No iommu-pagesizes\n"); + return -EINVAL; + } + + /* There are 4 iommu-address-ranges supported. Each range is pair of + * {base, size}. The ranges[0] and ranges[1] are 32bit address space + * while ranges[2] and ranges[3] are 64bit space. We want to use 64bit + * address ranges to support 64bit addressing. Because 'size' for + * address ranges[2] and ranges[3] are same we can select either of + * ranges[2] or ranges[3] for mapping. However due to 'size' is too + * large for OS to allocate IOTSB we are using fix size 32G + * (ATU_64_SPACE_SIZE) which is more than enough for all PCIe devices + * to share. + */ + atu->ranges = (struct atu_ranges *)ranges; + atu->base = atu->ranges[3].base; + atu->size = ATU_64_SPACE_SIZE; + + /* Create IOTSB */ + err = pci_sun4v_atu_alloc_iotsb(pbm); + if (err) { + pr_err(PFX "Error creating ATU IOTSB\n"); + return err; + } + + return 0; +} + static int pci_sun4v_iommu_init(struct pci_pbm_info *pbm) { static const u32 vdma_default[] = { 0x80000000, 0x80000000 }; @@ -918,6 +1022,18 @@ static int pci_sun4v_pbm_init(struct pci_pbm_info *pbm, pci_sun4v_scan_bus(pbm, &op->dev); + /* if atu_init fails its not complete failure. + * we can still continue using legacy iommu. + */ + if (pbm->iommu->atu) { + err = pci_sun4v_atu_init(pbm); + if (err) { + kfree(pbm->iommu->atu); + pbm->iommu->atu = NULL; + pr_err(PFX "ATU init failed, err=%d\n", err); + } + } + pbm->next = pci_pbm_root; pci_pbm_root = pbm; @@ -931,8 +1047,10 @@ static int pci_sun4v_probe(struct platform_device *op) struct pci_pbm_info *pbm; struct device_node *dp; struct iommu *iommu; + struct atu *atu; u32 devhandle; int i, err = -ENODEV; + static bool hv_atu = true; dp = op->dev.of_node; @@ -954,6 +1072,19 @@ static int pci_sun4v_probe(struct platform_device *op) pr_info(PFX "Registered hvapi major[%lu] minor[%lu]\n", vpci_major, vpci_minor); + err = sun4v_hvapi_register(HV_GRP_ATU, vatu_major, &vatu_minor); + if (err) { + /* don't return an error if we fail to register the + * ATU group, but ATU hcalls won't be available. + */ + hv_atu = false; + pr_err(PFX "Could not register hvapi ATU err=%d\n", + err); + } else { + pr_info(PFX "Registered hvapi ATU major[%lu] minor[%lu]\n", + vatu_major, vatu_minor); + } + dma_ops = &sun4v_dma_ops; } @@ -991,6 +1122,14 @@ static int pci_sun4v_probe(struct platform_device *op) } pbm->iommu = iommu; + iommu->atu = NULL; + if (hv_atu) { + atu = kzalloc(sizeof(*atu), GFP_KERNEL); + if (!atu) + pr_err(PFX "Could not allocate atu\n"); + else + iommu->atu = atu; + } err = pci_sun4v_pbm_init(pbm, op, devhandle); if (err) @@ -1001,6 +1140,7 @@ static int pci_sun4v_probe(struct platform_device *op) return 0; out_free_iommu: + kfree(iommu->atu); kfree(pbm->iommu); out_free_controller: diff --git a/arch/sparc/kernel/pci_sun4v.h b/arch/sparc/kernel/pci_sun4v.h index 5642212390b2..0ef6d1c456e7 100644 --- a/arch/sparc/kernel/pci_sun4v.h +++ b/arch/sparc/kernel/pci_sun4v.h @@ -89,4 +89,11 @@ unsigned long pci_sun4v_msg_setvalid(unsigned long devhandle, unsigned long msinum, unsigned long valid); +/* Sun4v HV IOMMU v2 APIs */ +unsigned long pci_sun4v_iotsb_conf(unsigned long devhandle, + unsigned long ra, + unsigned long table_size, + unsigned long page_size, + unsigned long dvma_base, + u64 *iotsb_num); #endif /* !(_PCI_SUN4V_H) */ diff --git a/arch/sparc/kernel/pci_sun4v_asm.S b/arch/sparc/kernel/pci_sun4v_asm.S index e606d46c6815..fd94d0e4a41d 100644 --- a/arch/sparc/kernel/pci_sun4v_asm.S +++ b/arch/sparc/kernel/pci_sun4v_asm.S @@ -360,3 +360,21 @@ ENTRY(pci_sun4v_msg_setvalid) mov %o0, %o0 ENDPROC(pci_sun4v_msg_setvalid) + /* + * %o0: devhandle + * %o1: r_addr + * %o2: size + * %o3: pagesize + * %o4: virt + * %o5: &iotsb_num/&iotsb_handle + * + * returns %o0: status + * %o1: iotsb_num/iotsb_handle + */ +ENTRY(pci_sun4v_iotsb_conf) + mov %o5, %g1 + mov HV_FAST_PCI_IOTSB_CONF, %o5 + ta HV_FAST_TRAP + retl + stx %o1, [%g1] +ENDPROC(pci_sun4v_iotsb_conf) -- cgit v1.2.3-59-g8ed1b From 31f077dc7dffd4a444932a9fe7fe84d9c7b90b73 Mon Sep 17 00:00:00 2001 From: Tushar Dave Date: Fri, 28 Oct 2016 10:12:42 -0700 Subject: sparc64: Initialize iommu_map_table and iommu_pool Like legacy IOMMU, use common iommu_map_table and iommu_pool for ATU. This change initializes iommu_map_table and iommu_pool for ATU. Signed-off-by: Tushar Dave Reviewed-by: chris hyser Reviewed-by: Sowmini Varadhan Signed-off-by: David S. Miller --- arch/sparc/include/asm/iommu_64.h | 2 ++ arch/sparc/kernel/pci_sun4v.c | 19 +++++++++++++++++++ 2 files changed, 21 insertions(+) diff --git a/arch/sparc/include/asm/iommu_64.h b/arch/sparc/include/asm/iommu_64.h index 93daa5965b3d..f24f356f2503 100644 --- a/arch/sparc/include/asm/iommu_64.h +++ b/arch/sparc/include/asm/iommu_64.h @@ -45,8 +45,10 @@ struct atu_ranges { struct atu { struct atu_ranges *ranges; struct atu_iotsb *iotsb; + struct iommu_map_table tbl; u64 base; u64 size; + u64 dma_addr_mask; }; struct iommu { diff --git a/arch/sparc/kernel/pci_sun4v.c b/arch/sparc/kernel/pci_sun4v.c index 2afb86c73da9..242477cbfdf2 100644 --- a/arch/sparc/kernel/pci_sun4v.c +++ b/arch/sparc/kernel/pci_sun4v.c @@ -644,6 +644,8 @@ static int pci_sun4v_atu_init(struct pci_pbm_info *pbm) struct atu *atu = pbm->iommu->atu; unsigned long err; const u64 *ranges; + u64 map_size, num_iotte; + u64 dma_mask; const u32 *page_size; int len; @@ -682,6 +684,23 @@ static int pci_sun4v_atu_init(struct pci_pbm_info *pbm) return err; } + /* Create ATU iommu map. + * One bit represents one iotte in IOTSB table. + */ + dma_mask = (roundup_pow_of_two(atu->size) - 1UL); + num_iotte = atu->size / IO_PAGE_SIZE; + map_size = num_iotte / 8; + atu->tbl.table_map_base = atu->base; + atu->dma_addr_mask = dma_mask; + atu->tbl.map = kzalloc(map_size, GFP_KERNEL); + if (!atu->tbl.map) + return -ENOMEM; + + iommu_tbl_pool_init(&atu->tbl, num_iotte, IO_PAGE_SHIFT, + NULL, false /* no large_pool */, + 0 /* default npools */, + false /* want span boundary checking */); + return 0; } -- cgit v1.2.3-59-g8ed1b From 5116ab4eabed575b7cca61a6e89b7d6fb7440970 Mon Sep 17 00:00:00 2001 From: Tushar Dave Date: Fri, 28 Oct 2016 10:12:43 -0700 Subject: sparc64: Bind PCIe devices to use IOMMU v2 service In order to use Hypervisor (HV) IOMMU v2 API for map/demap, each PCIe device has to be bound to IOTSB using HV API pci_iotsb_bind(). Signed-off-by: Tushar Dave Reviewed-by: chris hyser Acked-by: Sowmini Varadhan Signed-off-by: David S. Miller --- arch/sparc/kernel/pci_sun4v.c | 43 +++++++++++++++++++++++++++++++++++++++ arch/sparc/kernel/pci_sun4v.h | 3 +++ arch/sparc/kernel/pci_sun4v_asm.S | 14 +++++++++++++ 3 files changed, 60 insertions(+) diff --git a/arch/sparc/kernel/pci_sun4v.c b/arch/sparc/kernel/pci_sun4v.c index 242477cbfdf2..d4208aa93383 100644 --- a/arch/sparc/kernel/pci_sun4v.c +++ b/arch/sparc/kernel/pci_sun4v.c @@ -216,6 +216,43 @@ range_alloc_fail: return NULL; } +unsigned long dma_4v_iotsb_bind(unsigned long devhandle, + unsigned long iotsb_num, + struct pci_bus *bus_dev) +{ + struct pci_dev *pdev; + unsigned long err; + unsigned int bus; + unsigned int device; + unsigned int fun; + + list_for_each_entry(pdev, &bus_dev->devices, bus_list) { + if (pdev->subordinate) { + /* No need to bind pci bridge */ + dma_4v_iotsb_bind(devhandle, iotsb_num, + pdev->subordinate); + } else { + bus = bus_dev->number; + device = PCI_SLOT(pdev->devfn); + fun = PCI_FUNC(pdev->devfn); + err = pci_sun4v_iotsb_bind(devhandle, iotsb_num, + HV_PCI_DEVICE_BUILD(bus, + device, + fun)); + + /* If bind fails for one device it is going to fail + * for rest of the devices because we are sharing + * IOTSB. So in case of failure simply return with + * error. + */ + if (err) + return err; + } + } + + return 0; +} + static void dma_4v_iommu_demap(void *demap_arg, unsigned long entry, unsigned long npages) { @@ -629,6 +666,12 @@ static int pci_sun4v_atu_alloc_iotsb(struct pci_pbm_info *pbm) } iotsb->iotsb_num = iotsb_num; + err = dma_4v_iotsb_bind(pbm->devhandle, iotsb_num, pbm->pci_bus); + if (err) { + pr_err(PFX "pci_iotsb_bind failed error: %ld\n", err); + goto iotsb_conf_failed; + } + return 0; iotsb_conf_failed: diff --git a/arch/sparc/kernel/pci_sun4v.h b/arch/sparc/kernel/pci_sun4v.h index 0ef6d1c456e7..1019e0fe6e9d 100644 --- a/arch/sparc/kernel/pci_sun4v.h +++ b/arch/sparc/kernel/pci_sun4v.h @@ -96,4 +96,7 @@ unsigned long pci_sun4v_iotsb_conf(unsigned long devhandle, unsigned long page_size, unsigned long dvma_base, u64 *iotsb_num); +unsigned long pci_sun4v_iotsb_bind(unsigned long devhandle, + unsigned long iotsb_num, + unsigned int pci_device); #endif /* !(_PCI_SUN4V_H) */ diff --git a/arch/sparc/kernel/pci_sun4v_asm.S b/arch/sparc/kernel/pci_sun4v_asm.S index fd94d0e4a41d..22024a96c317 100644 --- a/arch/sparc/kernel/pci_sun4v_asm.S +++ b/arch/sparc/kernel/pci_sun4v_asm.S @@ -378,3 +378,17 @@ ENTRY(pci_sun4v_iotsb_conf) retl stx %o1, [%g1] ENDPROC(pci_sun4v_iotsb_conf) + + /* + * %o0: devhandle + * %o1: iotsb_num/iotsb_handle + * %o2: pci_device + * + * returns %o0: status + */ +ENTRY(pci_sun4v_iotsb_bind) + mov HV_FAST_PCI_IOTSB_BIND, %o5 + ta HV_FAST_TRAP + retl + nop +ENDPROC(pci_sun4v_iotsb_bind) -- cgit v1.2.3-59-g8ed1b From f08978b0fdbf37d3c91efb60a20bdee3ba8f59c6 Mon Sep 17 00:00:00 2001 From: Tushar Dave Date: Fri, 28 Oct 2016 10:12:44 -0700 Subject: sparc64: Enable sun4v dma ops to use IOMMU v2 APIs Add Hypervisor IOMMU v2 APIs pci_iotsb_map(), pci_iotsb_demap() and enable sun4v dma ops to use IOMMU v2 API for all PCIe devices with 64bit DMA mask. Signed-off-by: Tushar Dave Reviewed-by: chris hyser Acked-by: Sowmini Varadhan Signed-off-by: David S. Miller --- arch/sparc/include/asm/hypervisor.h | 6 + arch/sparc/kernel/pci_sun4v.c | 216 ++++++++++++++++++++++++++---------- arch/sparc/kernel/pci_sun4v.h | 11 ++ arch/sparc/kernel/pci_sun4v_asm.S | 36 ++++++ 4 files changed, 211 insertions(+), 58 deletions(-) diff --git a/arch/sparc/include/asm/hypervisor.h b/arch/sparc/include/asm/hypervisor.h index 7b15df8be008..73cb8978df58 100644 --- a/arch/sparc/include/asm/hypervisor.h +++ b/arch/sparc/include/asm/hypervisor.h @@ -2377,6 +2377,12 @@ unsigned long sun4v_vintr_set_target(unsigned long dev_handle, * iotsb_index Zero-based IOTTE number within an IOTSB. */ +/* The index_count argument consists of two fields: + * bits 63:48 #iottes and bits 47:0 iotsb_index + */ +#define HV_PCI_IOTSB_INDEX_COUNT(__iottes, __iotsb_index) \ + (((u64)(__iottes) << 48UL) | ((u64)(__iotsb_index))) + /* pci_iotsb_conf() * TRAP: HV_FAST_TRAP * FUNCTION: HV_FAST_PCI_IOTSB_CONF diff --git a/arch/sparc/kernel/pci_sun4v.c b/arch/sparc/kernel/pci_sun4v.c index d4208aa93383..06981cc716b6 100644 --- a/arch/sparc/kernel/pci_sun4v.c +++ b/arch/sparc/kernel/pci_sun4v.c @@ -72,34 +72,57 @@ static inline void iommu_batch_start(struct device *dev, unsigned long prot, uns } /* Interrupts must be disabled. */ -static long iommu_batch_flush(struct iommu_batch *p) +static long iommu_batch_flush(struct iommu_batch *p, u64 mask) { struct pci_pbm_info *pbm = p->dev->archdata.host_controller; + u64 *pglist = p->pglist; + u64 index_count; unsigned long devhandle = pbm->devhandle; unsigned long prot = p->prot; unsigned long entry = p->entry; - u64 *pglist = p->pglist; unsigned long npages = p->npages; + unsigned long iotsb_num; + unsigned long ret; + long num; /* VPCI maj=1, min=[0,1] only supports read and write */ if (vpci_major < 2) prot &= (HV_PCI_MAP_ATTR_READ | HV_PCI_MAP_ATTR_WRITE); while (npages != 0) { - long num; - - num = pci_sun4v_iommu_map(devhandle, HV_PCI_TSBID(0, entry), - npages, prot, __pa(pglist)); - if (unlikely(num < 0)) { - if (printk_ratelimit()) - printk("iommu_batch_flush: IOMMU map of " - "[%08lx:%08llx:%lx:%lx:%lx] failed with " - "status %ld\n", - devhandle, HV_PCI_TSBID(0, entry), - npages, prot, __pa(pglist), num); - return -1; + if (mask <= DMA_BIT_MASK(32)) { + num = pci_sun4v_iommu_map(devhandle, + HV_PCI_TSBID(0, entry), + npages, + prot, + __pa(pglist)); + if (unlikely(num < 0)) { + pr_err_ratelimited("%s: IOMMU map of [%08lx:%08llx:%lx:%lx:%lx] failed with status %ld\n", + __func__, + devhandle, + HV_PCI_TSBID(0, entry), + npages, prot, __pa(pglist), + num); + return -1; + } + } else { + index_count = HV_PCI_IOTSB_INDEX_COUNT(npages, entry), + iotsb_num = pbm->iommu->atu->iotsb->iotsb_num; + ret = pci_sun4v_iotsb_map(devhandle, + iotsb_num, + index_count, + prot, + __pa(pglist), + &num); + if (unlikely(ret != HV_EOK)) { + pr_err_ratelimited("%s: ATU map of [%08lx:%lx:%llx:%lx:%lx] failed with status %ld\n", + __func__, + devhandle, iotsb_num, + index_count, prot, + __pa(pglist), ret); + return -1; + } } - entry += num; npages -= num; pglist += num; @@ -111,19 +134,19 @@ static long iommu_batch_flush(struct iommu_batch *p) return 0; } -static inline void iommu_batch_new_entry(unsigned long entry) +static inline void iommu_batch_new_entry(unsigned long entry, u64 mask) { struct iommu_batch *p = this_cpu_ptr(&iommu_batch); if (p->entry + p->npages == entry) return; if (p->entry != ~0UL) - iommu_batch_flush(p); + iommu_batch_flush(p, mask); p->entry = entry; } /* Interrupts must be disabled. */ -static inline long iommu_batch_add(u64 phys_page) +static inline long iommu_batch_add(u64 phys_page, u64 mask) { struct iommu_batch *p = this_cpu_ptr(&iommu_batch); @@ -131,28 +154,31 @@ static inline long iommu_batch_add(u64 phys_page) p->pglist[p->npages++] = phys_page; if (p->npages == PGLIST_NENTS) - return iommu_batch_flush(p); + return iommu_batch_flush(p, mask); return 0; } /* Interrupts must be disabled. */ -static inline long iommu_batch_end(void) +static inline long iommu_batch_end(u64 mask) { struct iommu_batch *p = this_cpu_ptr(&iommu_batch); BUG_ON(p->npages >= PGLIST_NENTS); - return iommu_batch_flush(p); + return iommu_batch_flush(p, mask); } static void *dma_4v_alloc_coherent(struct device *dev, size_t size, dma_addr_t *dma_addrp, gfp_t gfp, unsigned long attrs) { + u64 mask; unsigned long flags, order, first_page, npages, n; unsigned long prot = 0; struct iommu *iommu; + struct atu *atu; + struct iommu_map_table *tbl; struct page *page; void *ret; long entry; @@ -177,14 +203,21 @@ static void *dma_4v_alloc_coherent(struct device *dev, size_t size, memset((char *)first_page, 0, PAGE_SIZE << order); iommu = dev->archdata.iommu; + atu = iommu->atu; + + mask = dev->coherent_dma_mask; + if (mask <= DMA_BIT_MASK(32)) + tbl = &iommu->tbl; + else + tbl = &atu->tbl; - entry = iommu_tbl_range_alloc(dev, &iommu->tbl, npages, NULL, + entry = iommu_tbl_range_alloc(dev, tbl, npages, NULL, (unsigned long)(-1), 0); if (unlikely(entry == IOMMU_ERROR_CODE)) goto range_alloc_fail; - *dma_addrp = (iommu->tbl.table_map_base + (entry << IO_PAGE_SHIFT)); + *dma_addrp = (tbl->table_map_base + (entry << IO_PAGE_SHIFT)); ret = (void *) first_page; first_page = __pa(first_page); @@ -196,12 +229,12 @@ static void *dma_4v_alloc_coherent(struct device *dev, size_t size, entry); for (n = 0; n < npages; n++) { - long err = iommu_batch_add(first_page + (n * PAGE_SIZE)); + long err = iommu_batch_add(first_page + (n * PAGE_SIZE), mask); if (unlikely(err < 0L)) goto iommu_map_fail; } - if (unlikely(iommu_batch_end() < 0L)) + if (unlikely(iommu_batch_end(mask) < 0L)) goto iommu_map_fail; local_irq_restore(flags); @@ -209,7 +242,7 @@ static void *dma_4v_alloc_coherent(struct device *dev, size_t size, return ret; iommu_map_fail: - iommu_tbl_range_free(&iommu->tbl, *dma_addrp, npages, IOMMU_ERROR_CODE); + iommu_tbl_range_free(tbl, *dma_addrp, npages, IOMMU_ERROR_CODE); range_alloc_fail: free_pages(first_page, order); @@ -253,18 +286,27 @@ unsigned long dma_4v_iotsb_bind(unsigned long devhandle, return 0; } -static void dma_4v_iommu_demap(void *demap_arg, unsigned long entry, - unsigned long npages) +static void dma_4v_iommu_demap(struct device *dev, unsigned long devhandle, + dma_addr_t dvma, unsigned long iotsb_num, + unsigned long entry, unsigned long npages) { - u32 devhandle = *(u32 *)demap_arg; unsigned long num, flags; + unsigned long ret; local_irq_save(flags); do { - num = pci_sun4v_iommu_demap(devhandle, - HV_PCI_TSBID(0, entry), - npages); - + if (dvma <= DMA_BIT_MASK(32)) { + num = pci_sun4v_iommu_demap(devhandle, + HV_PCI_TSBID(0, entry), + npages); + } else { + ret = pci_sun4v_iotsb_demap(devhandle, iotsb_num, + entry, npages, &num); + if (unlikely(ret != HV_EOK)) { + pr_err_ratelimited("pci_iotsb_demap() failed with error: %ld\n", + ret); + } + } entry += num; npages -= num; } while (npages != 0); @@ -276,16 +318,28 @@ static void dma_4v_free_coherent(struct device *dev, size_t size, void *cpu, { struct pci_pbm_info *pbm; struct iommu *iommu; + struct atu *atu; + struct iommu_map_table *tbl; unsigned long order, npages, entry; + unsigned long iotsb_num; u32 devhandle; npages = IO_PAGE_ALIGN(size) >> IO_PAGE_SHIFT; iommu = dev->archdata.iommu; pbm = dev->archdata.host_controller; + atu = iommu->atu; devhandle = pbm->devhandle; - entry = ((dvma - iommu->tbl.table_map_base) >> IO_PAGE_SHIFT); - dma_4v_iommu_demap(&devhandle, entry, npages); - iommu_tbl_range_free(&iommu->tbl, dvma, npages, IOMMU_ERROR_CODE); + + if (dvma <= DMA_BIT_MASK(32)) { + tbl = &iommu->tbl; + iotsb_num = 0; /* we don't care for legacy iommu */ + } else { + tbl = &atu->tbl; + iotsb_num = atu->iotsb->iotsb_num; + } + entry = ((dvma - tbl->table_map_base) >> IO_PAGE_SHIFT); + dma_4v_iommu_demap(dev, devhandle, dvma, iotsb_num, entry, npages); + iommu_tbl_range_free(tbl, dvma, npages, IOMMU_ERROR_CODE); order = get_order(size); if (order < 10) free_pages((unsigned long)cpu, order); @@ -297,13 +351,17 @@ static dma_addr_t dma_4v_map_page(struct device *dev, struct page *page, unsigned long attrs) { struct iommu *iommu; + struct atu *atu; + struct iommu_map_table *tbl; + u64 mask; unsigned long flags, npages, oaddr; unsigned long i, base_paddr; - u32 bus_addr, ret; unsigned long prot; + dma_addr_t bus_addr, ret; long entry; iommu = dev->archdata.iommu; + atu = iommu->atu; if (unlikely(direction == DMA_NONE)) goto bad; @@ -312,13 +370,19 @@ static dma_addr_t dma_4v_map_page(struct device *dev, struct page *page, npages = IO_PAGE_ALIGN(oaddr + sz) - (oaddr & IO_PAGE_MASK); npages >>= IO_PAGE_SHIFT; - entry = iommu_tbl_range_alloc(dev, &iommu->tbl, npages, NULL, + mask = *dev->dma_mask; + if (mask <= DMA_BIT_MASK(32)) + tbl = &iommu->tbl; + else + tbl = &atu->tbl; + + entry = iommu_tbl_range_alloc(dev, tbl, npages, NULL, (unsigned long)(-1), 0); if (unlikely(entry == IOMMU_ERROR_CODE)) goto bad; - bus_addr = (iommu->tbl.table_map_base + (entry << IO_PAGE_SHIFT)); + bus_addr = (tbl->table_map_base + (entry << IO_PAGE_SHIFT)); ret = bus_addr | (oaddr & ~IO_PAGE_MASK); base_paddr = __pa(oaddr & IO_PAGE_MASK); prot = HV_PCI_MAP_ATTR_READ; @@ -333,11 +397,11 @@ static dma_addr_t dma_4v_map_page(struct device *dev, struct page *page, iommu_batch_start(dev, prot, entry); for (i = 0; i < npages; i++, base_paddr += IO_PAGE_SIZE) { - long err = iommu_batch_add(base_paddr); + long err = iommu_batch_add(base_paddr, mask); if (unlikely(err < 0L)) goto iommu_map_fail; } - if (unlikely(iommu_batch_end() < 0L)) + if (unlikely(iommu_batch_end(mask) < 0L)) goto iommu_map_fail; local_irq_restore(flags); @@ -350,7 +414,7 @@ bad: return DMA_ERROR_CODE; iommu_map_fail: - iommu_tbl_range_free(&iommu->tbl, bus_addr, npages, IOMMU_ERROR_CODE); + iommu_tbl_range_free(tbl, bus_addr, npages, IOMMU_ERROR_CODE); return DMA_ERROR_CODE; } @@ -360,7 +424,10 @@ static void dma_4v_unmap_page(struct device *dev, dma_addr_t bus_addr, { struct pci_pbm_info *pbm; struct iommu *iommu; + struct atu *atu; + struct iommu_map_table *tbl; unsigned long npages; + unsigned long iotsb_num; long entry; u32 devhandle; @@ -372,14 +439,23 @@ static void dma_4v_unmap_page(struct device *dev, dma_addr_t bus_addr, iommu = dev->archdata.iommu; pbm = dev->archdata.host_controller; + atu = iommu->atu; devhandle = pbm->devhandle; npages = IO_PAGE_ALIGN(bus_addr + sz) - (bus_addr & IO_PAGE_MASK); npages >>= IO_PAGE_SHIFT; bus_addr &= IO_PAGE_MASK; - entry = (bus_addr - iommu->tbl.table_map_base) >> IO_PAGE_SHIFT; - dma_4v_iommu_demap(&devhandle, entry, npages); - iommu_tbl_range_free(&iommu->tbl, bus_addr, npages, IOMMU_ERROR_CODE); + + if (bus_addr <= DMA_BIT_MASK(32)) { + iotsb_num = 0; /* we don't care for legacy iommu */ + tbl = &iommu->tbl; + } else { + iotsb_num = atu->iotsb->iotsb_num; + tbl = &atu->tbl; + } + entry = (bus_addr - tbl->table_map_base) >> IO_PAGE_SHIFT; + dma_4v_iommu_demap(dev, devhandle, bus_addr, iotsb_num, entry, npages); + iommu_tbl_range_free(tbl, bus_addr, npages, IOMMU_ERROR_CODE); } static int dma_4v_map_sg(struct device *dev, struct scatterlist *sglist, @@ -393,12 +469,17 @@ static int dma_4v_map_sg(struct device *dev, struct scatterlist *sglist, unsigned long seg_boundary_size; int outcount, incount, i; struct iommu *iommu; + struct atu *atu; + struct iommu_map_table *tbl; + u64 mask; unsigned long base_shift; long err; BUG_ON(direction == DMA_NONE); iommu = dev->archdata.iommu; + atu = iommu->atu; + if (nelems == 0 || !iommu) return 0; @@ -424,7 +505,15 @@ static int dma_4v_map_sg(struct device *dev, struct scatterlist *sglist, max_seg_size = dma_get_max_seg_size(dev); seg_boundary_size = ALIGN(dma_get_seg_boundary(dev) + 1, IO_PAGE_SIZE) >> IO_PAGE_SHIFT; - base_shift = iommu->tbl.table_map_base >> IO_PAGE_SHIFT; + + mask = *dev->dma_mask; + if (mask <= DMA_BIT_MASK(32)) + tbl = &iommu->tbl; + else + tbl = &atu->tbl; + + base_shift = tbl->table_map_base >> IO_PAGE_SHIFT; + for_each_sg(sglist, s, nelems, i) { unsigned long paddr, npages, entry, out_entry = 0, slen; @@ -437,27 +526,26 @@ static int dma_4v_map_sg(struct device *dev, struct scatterlist *sglist, /* Allocate iommu entries for that segment */ paddr = (unsigned long) SG_ENT_PHYS_ADDRESS(s); npages = iommu_num_pages(paddr, slen, IO_PAGE_SIZE); - entry = iommu_tbl_range_alloc(dev, &iommu->tbl, npages, + entry = iommu_tbl_range_alloc(dev, tbl, npages, &handle, (unsigned long)(-1), 0); /* Handle failure */ if (unlikely(entry == IOMMU_ERROR_CODE)) { - if (printk_ratelimit()) - printk(KERN_INFO "iommu_alloc failed, iommu %p paddr %lx" - " npages %lx\n", iommu, paddr, npages); + pr_err_ratelimited("iommu_alloc failed, iommu %p paddr %lx npages %lx\n", + tbl, paddr, npages); goto iommu_map_failed; } - iommu_batch_new_entry(entry); + iommu_batch_new_entry(entry, mask); /* Convert entry to a dma_addr_t */ - dma_addr = iommu->tbl.table_map_base + (entry << IO_PAGE_SHIFT); + dma_addr = tbl->table_map_base + (entry << IO_PAGE_SHIFT); dma_addr |= (s->offset & ~IO_PAGE_MASK); /* Insert into HW table */ paddr &= IO_PAGE_MASK; while (npages--) { - err = iommu_batch_add(paddr); + err = iommu_batch_add(paddr, mask); if (unlikely(err < 0L)) goto iommu_map_failed; paddr += IO_PAGE_SIZE; @@ -492,7 +580,7 @@ static int dma_4v_map_sg(struct device *dev, struct scatterlist *sglist, dma_next = dma_addr + slen; } - err = iommu_batch_end(); + err = iommu_batch_end(mask); if (unlikely(err < 0L)) goto iommu_map_failed; @@ -515,7 +603,7 @@ iommu_map_failed: vaddr = s->dma_address & IO_PAGE_MASK; npages = iommu_num_pages(s->dma_address, s->dma_length, IO_PAGE_SIZE); - iommu_tbl_range_free(&iommu->tbl, vaddr, npages, + iommu_tbl_range_free(tbl, vaddr, npages, IOMMU_ERROR_CODE); /* XXX demap? XXX */ s->dma_address = DMA_ERROR_CODE; @@ -536,13 +624,16 @@ static void dma_4v_unmap_sg(struct device *dev, struct scatterlist *sglist, struct pci_pbm_info *pbm; struct scatterlist *sg; struct iommu *iommu; + struct atu *atu; unsigned long flags, entry; + unsigned long iotsb_num; u32 devhandle; BUG_ON(direction == DMA_NONE); iommu = dev->archdata.iommu; pbm = dev->archdata.host_controller; + atu = iommu->atu; devhandle = pbm->devhandle; local_irq_save(flags); @@ -552,15 +643,24 @@ static void dma_4v_unmap_sg(struct device *dev, struct scatterlist *sglist, dma_addr_t dma_handle = sg->dma_address; unsigned int len = sg->dma_length; unsigned long npages; - struct iommu_map_table *tbl = &iommu->tbl; + struct iommu_map_table *tbl; unsigned long shift = IO_PAGE_SHIFT; if (!len) break; npages = iommu_num_pages(dma_handle, len, IO_PAGE_SIZE); + + if (dma_handle <= DMA_BIT_MASK(32)) { + iotsb_num = 0; /* we don't care for legacy iommu */ + tbl = &iommu->tbl; + } else { + iotsb_num = atu->iotsb->iotsb_num; + tbl = &atu->tbl; + } entry = ((dma_handle - tbl->table_map_base) >> shift); - dma_4v_iommu_demap(&devhandle, entry, npages); - iommu_tbl_range_free(&iommu->tbl, dma_handle, npages, + dma_4v_iommu_demap(dev, devhandle, dma_handle, iotsb_num, + entry, npages); + iommu_tbl_range_free(tbl, dma_handle, npages, IOMMU_ERROR_CODE); sg = sg_next(sg); } diff --git a/arch/sparc/kernel/pci_sun4v.h b/arch/sparc/kernel/pci_sun4v.h index 1019e0fe6e9d..22603a4e48bf 100644 --- a/arch/sparc/kernel/pci_sun4v.h +++ b/arch/sparc/kernel/pci_sun4v.h @@ -99,4 +99,15 @@ unsigned long pci_sun4v_iotsb_conf(unsigned long devhandle, unsigned long pci_sun4v_iotsb_bind(unsigned long devhandle, unsigned long iotsb_num, unsigned int pci_device); +unsigned long pci_sun4v_iotsb_map(unsigned long devhandle, + unsigned long iotsb_num, + unsigned long iotsb_index_iottes, + unsigned long io_attributes, + unsigned long io_page_list_pa, + long *mapped); +unsigned long pci_sun4v_iotsb_demap(unsigned long devhandle, + unsigned long iotsb_num, + unsigned long iotsb_index, + unsigned long iottes, + unsigned long *demapped); #endif /* !(_PCI_SUN4V_H) */ diff --git a/arch/sparc/kernel/pci_sun4v_asm.S b/arch/sparc/kernel/pci_sun4v_asm.S index 22024a96c317..578f09657916 100644 --- a/arch/sparc/kernel/pci_sun4v_asm.S +++ b/arch/sparc/kernel/pci_sun4v_asm.S @@ -392,3 +392,39 @@ ENTRY(pci_sun4v_iotsb_bind) retl nop ENDPROC(pci_sun4v_iotsb_bind) + + /* + * %o0: devhandle + * %o1: iotsb_num/iotsb_handle + * %o2: index_count + * %o3: iotte_attributes + * %o4: io_page_list_p + * %o5: &mapped + * + * returns %o0: status + * %o1: #mapped + */ +ENTRY(pci_sun4v_iotsb_map) + mov %o5, %g1 + mov HV_FAST_PCI_IOTSB_MAP, %o5 + ta HV_FAST_TRAP + retl + stx %o1, [%g1] +ENDPROC(pci_sun4v_iotsb_map) + + /* + * %o0: devhandle + * %o1: iotsb_num/iotsb_handle + * %o2: iotsb_index + * %o3: #iottes + * %o4: &demapped + * + * returns %o0: status + * %o1: #demapped + */ +ENTRY(pci_sun4v_iotsb_demap) + mov HV_FAST_PCI_IOTSB_DEMAP, %o5 + ta HV_FAST_TRAP + retl + stx %o1, [%o4] +ENDPROC(pci_sun4v_iotsb_demap) -- cgit v1.2.3-59-g8ed1b From d30a6b84df00128e03588564925dc828a53e6865 Mon Sep 17 00:00:00 2001 From: Tushar Dave Date: Fri, 28 Oct 2016 10:12:45 -0700 Subject: sparc64: Enable 64-bit DMA ATU 64bit addressing allows PCIe devices with 64bit DMA capabilities to use ATU for 64bit DMA. Signed-off-by: Tushar Dave Reviewed-by: chris hyser Acked-by: Sowmini Varadhan Signed-off-by: David S. Miller --- arch/sparc/Kconfig | 4 ++++ arch/sparc/kernel/iommu.c | 8 ++++++-- 2 files changed, 10 insertions(+), 2 deletions(-) diff --git a/arch/sparc/Kconfig b/arch/sparc/Kconfig index 5202eb4ba2db..60145c9b9f84 100644 --- a/arch/sparc/Kconfig +++ b/arch/sparc/Kconfig @@ -93,6 +93,10 @@ config ARCH_ATU bool default y if SPARC64 +config ARCH_DMA_ADDR_T_64BIT + bool + default y if ARCH_ATU + config IOMMU_HELPER bool default y if SPARC64 diff --git a/arch/sparc/kernel/iommu.c b/arch/sparc/kernel/iommu.c index 5c615abff030..852a3291db96 100644 --- a/arch/sparc/kernel/iommu.c +++ b/arch/sparc/kernel/iommu.c @@ -760,8 +760,12 @@ int dma_supported(struct device *dev, u64 device_mask) struct iommu *iommu = dev->archdata.iommu; u64 dma_addr_mask = iommu->dma_addr_mask; - if (device_mask >= (1UL << 32UL)) - return 0; + if (device_mask > DMA_BIT_MASK(32)) { + if (iommu->atu) + dma_addr_mask = iommu->atu->dma_addr_mask; + else + return 0; + } if ((device_mask & dma_addr_mask) == dma_addr_mask) return 1; -- cgit v1.2.3-59-g8ed1b From 266439c94df9e6aee3390c6e1cfdb645e566f704 Mon Sep 17 00:00:00 2001 From: Tushar Dave Date: Mon, 17 Oct 2016 13:56:59 -0700 Subject: sunqe: Fix compiler warnings MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit sunqe uses '__u32' for dma handle while invoking kernel DMA APIs, instead of using dma_addr_t. This hasn't caused any 'incompatible pointer type' warning on SPARC because until now dma_addr_t is of type u32. However, recent changes in SPARC ATU (iommu) enables 64bit DMA and therefore dma_addr_t becomes of type u64. This makes 'incompatible pointer type' warnings inevitable. e.g. drivers/net/ethernet/sun/sunqe.c: In function ‘qec_ether_init’: drivers/net/ethernet/sun/sunqe.c:883: warning: passing argument 3 of ‘dma_alloc_coherent’ from incompatible pointer type ./include/linux/dma-mapping.h:445: note: expected ‘dma_addr_t *’ but argument is of type ‘__u32 *’ drivers/net/ethernet/sun/sunqe.c:885: warning: passing argument 3 of ‘dma_alloc_coherent’ from incompatible pointer type ./include/linux/dma-mapping.h:445: note: expected ‘dma_addr_t *’ but argument is of type ‘__u32 *’ This patch resolves above compiler warnings. Signed-off-by: Tushar Dave Reviewed-by: chris hyser Signed-off-by: David S. Miller --- drivers/net/ethernet/sun/sunqe.c | 11 ++++++----- drivers/net/ethernet/sun/sunqe.h | 4 ++-- 2 files changed, 8 insertions(+), 7 deletions(-) diff --git a/drivers/net/ethernet/sun/sunqe.c b/drivers/net/ethernet/sun/sunqe.c index 9b825780b3be..9582948145c1 100644 --- a/drivers/net/ethernet/sun/sunqe.c +++ b/drivers/net/ethernet/sun/sunqe.c @@ -124,7 +124,7 @@ static void qe_init_rings(struct sunqe *qep) { struct qe_init_block *qb = qep->qe_block; struct sunqe_buffers *qbufs = qep->buffers; - __u32 qbufs_dvma = qep->buffers_dvma; + __u32 qbufs_dvma = (__u32)qep->buffers_dvma; int i; qep->rx_new = qep->rx_old = qep->tx_new = qep->tx_old = 0; @@ -144,6 +144,7 @@ static int qe_init(struct sunqe *qep, int from_irq) void __iomem *mregs = qep->mregs; void __iomem *gregs = qecp->gregs; unsigned char *e = &qep->dev->dev_addr[0]; + __u32 qblk_dvma = (__u32)qep->qblock_dvma; u32 tmp; int i; @@ -152,8 +153,8 @@ static int qe_init(struct sunqe *qep, int from_irq) return -EAGAIN; /* Setup initial rx/tx init block pointers. */ - sbus_writel(qep->qblock_dvma + qib_offset(qe_rxd, 0), cregs + CREG_RXDS); - sbus_writel(qep->qblock_dvma + qib_offset(qe_txd, 0), cregs + CREG_TXDS); + sbus_writel(qblk_dvma + qib_offset(qe_rxd, 0), cregs + CREG_RXDS); + sbus_writel(qblk_dvma + qib_offset(qe_txd, 0), cregs + CREG_TXDS); /* Enable/mask the various irq's. */ sbus_writel(0, cregs + CREG_RIMASK); @@ -413,7 +414,7 @@ static void qe_rx(struct sunqe *qep) struct net_device *dev = qep->dev; struct qe_rxd *this; struct sunqe_buffers *qbufs = qep->buffers; - __u32 qbufs_dvma = qep->buffers_dvma; + __u32 qbufs_dvma = (__u32)qep->buffers_dvma; int elem = qep->rx_new; u32 flags; @@ -572,7 +573,7 @@ static int qe_start_xmit(struct sk_buff *skb, struct net_device *dev) { struct sunqe *qep = netdev_priv(dev); struct sunqe_buffers *qbufs = qep->buffers; - __u32 txbuf_dvma, qbufs_dvma = qep->buffers_dvma; + __u32 txbuf_dvma, qbufs_dvma = (__u32)qep->buffers_dvma; unsigned char *txbuf; int len, entry; diff --git a/drivers/net/ethernet/sun/sunqe.h b/drivers/net/ethernet/sun/sunqe.h index 581781b6b2fa..ae190b77431b 100644 --- a/drivers/net/ethernet/sun/sunqe.h +++ b/drivers/net/ethernet/sun/sunqe.h @@ -334,12 +334,12 @@ struct sunqe { void __iomem *qcregs; /* QEC per-channel Registers */ void __iomem *mregs; /* Per-channel MACE Registers */ struct qe_init_block *qe_block; /* RX and TX descriptors */ - __u32 qblock_dvma; /* RX and TX descriptors */ + dma_addr_t qblock_dvma; /* RX and TX descriptors */ spinlock_t lock; /* Protects txfull state */ int rx_new, rx_old; /* RX ring extents */ int tx_new, tx_old; /* TX ring extents */ struct sunqe_buffers *buffers; /* CPU visible address. */ - __u32 buffers_dvma; /* DVMA visible address. */ + dma_addr_t buffers_dvma; /* DVMA visible address. */ struct sunqec *parent; u8 mconfig; /* Base MACE mconfig value */ struct platform_device *op; /* QE's OF device struct */ -- cgit v1.2.3-59-g8ed1b From 1a9bbccaf8182da368dae454b57dc1c55074d266 Mon Sep 17 00:00:00 2001 From: Tushar Dave Date: Mon, 17 Oct 2016 13:57:00 -0700 Subject: sunbmac: Fix compiler warning MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit sunbmac uses '__u32' for dma handle while invoking kernel DMA APIs, instead of using dma_addr_t. This hasn't caused any 'incompatible pointer type' warning on SPARC because until now dma_addr_t is of type u32. However, recent changes in SPARC ATU (iommu) enables 64bit DMA and therefore dma_addr_t becomes of type u64. This makes 'incompatible pointer type' warnings inevitable. e.g. drivers/net/ethernet/sun/sunbmac.c: In function ‘bigmac_ether_init’: drivers/net/ethernet/sun/sunbmac.c:1166: warning: passing argument 3 of ‘dma_alloc_coherent’ from incompatible pointer type ./include/linux/dma-mapping.h:445: note: expected ‘dma_addr_t *’ but argument is of type ‘__u32 *’ This patch resolves above compiler warning. Signed-off-by: Tushar Dave Reviewed-by: chris hyser Signed-off-by: David S. Miller --- drivers/net/ethernet/sun/sunbmac.c | 5 +++-- drivers/net/ethernet/sun/sunbmac.h | 2 +- 2 files changed, 4 insertions(+), 3 deletions(-) diff --git a/drivers/net/ethernet/sun/sunbmac.c b/drivers/net/ethernet/sun/sunbmac.c index aa4f9d2d8fa9..02f452730d52 100644 --- a/drivers/net/ethernet/sun/sunbmac.c +++ b/drivers/net/ethernet/sun/sunbmac.c @@ -623,6 +623,7 @@ static int bigmac_init_hw(struct bigmac *bp, int from_irq) void __iomem *gregs = bp->gregs; void __iomem *cregs = bp->creg; void __iomem *bregs = bp->bregs; + __u32 bblk_dvma = (__u32)bp->bblock_dvma; unsigned char *e = &bp->dev->dev_addr[0]; /* Latch current counters into statistics. */ @@ -671,9 +672,9 @@ static int bigmac_init_hw(struct bigmac *bp, int from_irq) bregs + BMAC_XIFCFG); /* Tell the QEC where the ring descriptors are. */ - sbus_writel(bp->bblock_dvma + bib_offset(be_rxd, 0), + sbus_writel(bblk_dvma + bib_offset(be_rxd, 0), cregs + CREG_RXDS); - sbus_writel(bp->bblock_dvma + bib_offset(be_txd, 0), + sbus_writel(bblk_dvma + bib_offset(be_txd, 0), cregs + CREG_TXDS); /* Setup the FIFO pointers into QEC local memory. */ diff --git a/drivers/net/ethernet/sun/sunbmac.h b/drivers/net/ethernet/sun/sunbmac.h index 06dd21707353..532fc56830cf 100644 --- a/drivers/net/ethernet/sun/sunbmac.h +++ b/drivers/net/ethernet/sun/sunbmac.h @@ -291,7 +291,7 @@ struct bigmac { void __iomem *bregs; /* BigMAC Registers */ void __iomem *tregs; /* BigMAC Transceiver */ struct bmac_init_block *bmac_block; /* RX and TX descriptors */ - __u32 bblock_dvma; /* RX and TX descriptors */ + dma_addr_t bblock_dvma; /* RX and TX descriptors */ spinlock_t lock; -- cgit v1.2.3-59-g8ed1b From e6b5f1be7afe1657c40c08082c562b1a036a54c1 Mon Sep 17 00:00:00 2001 From: Babu Moger Date: Wed, 2 Nov 2016 09:36:32 -0700 Subject: config: Adding the new config parameter CONFIG_PROVE_LOCKING_SMALL for sparc This new config parameter limits the space used for "Lock debugging: prove locking correctness" by about 4MB. The current sparc systems have the limitation of 32MB size for kernel size including .text, .data and .bss sections. With PROVE_LOCKING feature, the kernel size could grow beyond this limit and causing system boot-up issues. With this option, kernel limits the size of the entries of lock_chains, stack_trace etc., so that kernel fits in required size limit. This is not visible to user and only used for sparc. Signed-off-by: Babu Moger Acked-by: Sam Ravnborg Signed-off-by: David S. Miller --- arch/sparc/Kconfig | 1 + lib/Kconfig.debug | 3 +++ 2 files changed, 4 insertions(+) diff --git a/arch/sparc/Kconfig b/arch/sparc/Kconfig index 60145c9b9f84..165ecdd24d22 100644 --- a/arch/sparc/Kconfig +++ b/arch/sparc/Kconfig @@ -43,6 +43,7 @@ config SPARC select ARCH_HAS_SG_CHAIN select CPU_NO_EFFICIENT_FFS select HAVE_ARCH_HARDENED_USERCOPY + select PROVE_LOCKING_SMALL if PROVE_LOCKING config SPARC32 def_bool !64BIT diff --git a/lib/Kconfig.debug b/lib/Kconfig.debug index b01e547d4d04..a6c8db1d62f6 100644 --- a/lib/Kconfig.debug +++ b/lib/Kconfig.debug @@ -1085,6 +1085,9 @@ config PROVE_LOCKING For more details, see Documentation/locking/lockdep-design.txt. +config PROVE_LOCKING_SMALL + bool + config LOCKDEP bool depends on DEBUG_KERNEL && TRACE_IRQFLAGS_SUPPORT && STACKTRACE_SUPPORT && LOCKDEP_SUPPORT -- cgit v1.2.3-59-g8ed1b From e245d99e6cc4a0b904b87b46b4f60d46fb405987 Mon Sep 17 00:00:00 2001 From: Babu Moger Date: Wed, 2 Nov 2016 09:36:33 -0700 Subject: lockdep: Limit static allocations if PROVE_LOCKING_SMALL is defined Reduce the size of data structure for lockdep entries by half if PROVE_LOCKING_SMALL if defined. This is used only for sparc. Signed-off-by: Babu Moger Acked-by: Sam Ravnborg Signed-off-by: David S. Miller --- kernel/locking/lockdep_internals.h | 20 +++++++++++++++++--- 1 file changed, 17 insertions(+), 3 deletions(-) diff --git a/kernel/locking/lockdep_internals.h b/kernel/locking/lockdep_internals.h index 51c4b24b6328..c2b88490d857 100644 --- a/kernel/locking/lockdep_internals.h +++ b/kernel/locking/lockdep_internals.h @@ -45,6 +45,14 @@ enum { #define LOCKF_USED_IN_IRQ_READ \ (LOCKF_USED_IN_HARDIRQ_READ | LOCKF_USED_IN_SOFTIRQ_READ) +/* + * CONFIG_PROVE_LOCKING_SMALL is defined for sparc. Sparc requires .text, + * .data and .bss to fit in required 32MB limit for the kernel. With + * PROVE_LOCKING we could go over this limit and cause system boot-up problems. + * So, reduce the static allocations for lockdeps related structures so that + * everything fits in current required size limit. + */ +#ifdef CONFIG_PROVE_LOCKING_SMALL /* * MAX_LOCKDEP_ENTRIES is the maximum number of lock dependencies * we track. @@ -54,18 +62,24 @@ enum { * table (if it's not there yet), and we check it for lock order * conflicts and deadlocks. */ +#define MAX_LOCKDEP_ENTRIES 16384UL +#define MAX_LOCKDEP_CHAINS_BITS 15 +#define MAX_STACK_TRACE_ENTRIES 262144UL +#else #define MAX_LOCKDEP_ENTRIES 32768UL #define MAX_LOCKDEP_CHAINS_BITS 16 -#define MAX_LOCKDEP_CHAINS (1UL << MAX_LOCKDEP_CHAINS_BITS) - -#define MAX_LOCKDEP_CHAIN_HLOCKS (MAX_LOCKDEP_CHAINS*5) /* * Stack-trace: tightly packed array of stack backtrace * addresses. Protected by the hash_lock. */ #define MAX_STACK_TRACE_ENTRIES 524288UL +#endif + +#define MAX_LOCKDEP_CHAINS (1UL << MAX_LOCKDEP_CHAINS_BITS) + +#define MAX_LOCKDEP_CHAIN_HLOCKS (MAX_LOCKDEP_CHAINS*5) extern struct list_head all_lock_classes; extern struct lock_chain lock_chains[]; -- cgit v1.2.3-59-g8ed1b From dbfa048db97c15ee3fff2ee17b19e61f3ab12d53 Mon Sep 17 00:00:00 2001 From: Pavel Machek Date: Tue, 15 Nov 2016 11:12:05 +0100 Subject: MAINTAINERS: Add LED subsystem co-maintainer Mark me as a co-maintainer of LED subsystem. Signed-off-by: Pavel Machek Signed-off-by: Jacek Anaszewski --- MAINTAINERS | 1 + 1 file changed, 1 insertion(+) diff --git a/MAINTAINERS b/MAINTAINERS index 411e3b87b8c2..2433e471634f 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -7084,6 +7084,7 @@ F: drivers/scsi/53c700* LED SUBSYSTEM M: Richard Purdie M: Jacek Anaszewski +M: Pavel Machek L: linux-leds@vger.kernel.org T: git git://git.kernel.org/pub/scm/linux/kernel/git/j.anaszewski/linux-leds.git S: Maintained -- cgit v1.2.3-59-g8ed1b From 178c7ae944444c198a1d9646477ab10d2d51f03e Mon Sep 17 00:00:00 2001 From: Alexey Khoroshilov Date: Sat, 19 Nov 2016 01:40:10 +0300 Subject: net: macb: add check for dma mapping error in start_xmit() at91ether_start_xmit() does not check for dma mapping errors. Found by Linux Driver Verification project (linuxtesting.org). Signed-off-by: Alexey Khoroshilov Signed-off-by: David S. Miller --- drivers/net/ethernet/cadence/macb.c | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/drivers/net/ethernet/cadence/macb.c b/drivers/net/ethernet/cadence/macb.c index b32444a3ed79..533653bd7aec 100644 --- a/drivers/net/ethernet/cadence/macb.c +++ b/drivers/net/ethernet/cadence/macb.c @@ -2673,6 +2673,12 @@ static int at91ether_start_xmit(struct sk_buff *skb, struct net_device *dev) lp->skb_length = skb->len; lp->skb_physaddr = dma_map_single(NULL, skb->data, skb->len, DMA_TO_DEVICE); + if (dma_mapping_error(NULL, lp->skb_physaddr)) { + dev_kfree_skb_any(skb); + dev->stats.tx_dropped++; + netdev_err(dev, "%s: DMA mapping error\n", __func__); + return NETDEV_TX_OK; + } /* Set address of the data in the Transmit Address register */ macb_writel(lp, TAR, lp->skb_physaddr); -- cgit v1.2.3-59-g8ed1b From 9dd35d6882a10629b95f2bc41a541740ef24c226 Mon Sep 17 00:00:00 2001 From: Geliang Tang Date: Fri, 18 Nov 2016 22:21:17 +0800 Subject: sparc: drop duplicate header scatterlist.h Drop duplicate header scatterlist.h from iommu_common.h. Signed-off-by: Geliang Tang Signed-off-by: David S. Miller --- arch/sparc/kernel/iommu_common.h | 1 - 1 file changed, 1 deletion(-) diff --git a/arch/sparc/kernel/iommu_common.h b/arch/sparc/kernel/iommu_common.h index b40cec252905..828493329f68 100644 --- a/arch/sparc/kernel/iommu_common.h +++ b/arch/sparc/kernel/iommu_common.h @@ -13,7 +13,6 @@ #include #include #include -#include #include -- cgit v1.2.3-59-g8ed1b From 8b9534406456313beb7bf9051150b50c63049ab7 Mon Sep 17 00:00:00 2001 From: Paolo Bonzini Date: Wed, 16 Nov 2016 18:31:30 +0100 Subject: KVM: x86: do not go through vcpu in __get_kvmclock_ns MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Going through the first VCPU is wrong if you follow a KVM_SET_CLOCK with a KVM_GET_CLOCK immediately after, without letting the VCPU run and call kvm_guest_time_update. To fix this, compute the kvmclock value ourselves, using the master clock (tsc, nsec) pair as the base and the host CPU frequency as the scale. Reported-by: Marcelo Tosatti Signed-off-by: Paolo Bonzini Signed-off-by: Radim Krčmář --- arch/x86/kvm/x86.c | 21 +++++++++++++-------- 1 file changed, 13 insertions(+), 8 deletions(-) diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index 3017de0431bd..7d3d9d4d6124 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -1724,18 +1724,23 @@ static void kvm_gen_update_masterclock(struct kvm *kvm) static u64 __get_kvmclock_ns(struct kvm *kvm) { - struct kvm_vcpu *vcpu = kvm_get_vcpu(kvm, 0); struct kvm_arch *ka = &kvm->arch; - s64 ns; + struct pvclock_vcpu_time_info hv_clock; - if (vcpu->arch.hv_clock.flags & PVCLOCK_TSC_STABLE_BIT) { - u64 tsc = kvm_read_l1_tsc(vcpu, rdtsc()); - ns = __pvclock_read_cycles(&vcpu->arch.hv_clock, tsc); - } else { - ns = ktime_get_boot_ns() + ka->kvmclock_offset; + spin_lock(&ka->pvclock_gtod_sync_lock); + if (!ka->use_master_clock) { + spin_unlock(&ka->pvclock_gtod_sync_lock); + return ktime_get_boot_ns() + ka->kvmclock_offset; } - return ns; + hv_clock.tsc_timestamp = ka->master_cycle_now; + hv_clock.system_time = ka->master_kernel_ns + ka->kvmclock_offset; + spin_unlock(&ka->pvclock_gtod_sync_lock); + + kvm_get_time_scale(NSEC_PER_SEC, __this_cpu_read(cpu_tsc_khz) * 1000LL, + &hv_clock.tsc_shift, + &hv_clock.tsc_to_system_mul); + return __pvclock_read_cycles(&hv_clock, rdtsc()); } u64 get_kvmclock_ns(struct kvm *kvm) -- cgit v1.2.3-59-g8ed1b From 1650b4ebc99da4c137bfbfc531be4a2405f951dd Mon Sep 17 00:00:00 2001 From: Ignacio Alvarado Date: Fri, 4 Nov 2016 12:15:55 -0700 Subject: KVM: Disable irq while unregistering user notifier MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Function user_notifier_unregister should be called only once for each registered user notifier. Function kvm_arch_hardware_disable can be executed from an IPI context which could cause a race condition with a VCPU returning to user mode and attempting to unregister the notifier. Signed-off-by: Ignacio Alvarado Cc: stable@vger.kernel.org Fixes: 18863bdd60f8 ("KVM: x86 shared msr infrastructure") Reviewed-by: Paolo Bonzini Signed-off-by: Radim Krčmář --- arch/x86/kvm/x86.c | 13 +++++++++++-- 1 file changed, 11 insertions(+), 2 deletions(-) diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index 7d3d9d4d6124..2f27af4f312a 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -210,7 +210,18 @@ static void kvm_on_user_return(struct user_return_notifier *urn) struct kvm_shared_msrs *locals = container_of(urn, struct kvm_shared_msrs, urn); struct kvm_shared_msr_values *values; + unsigned long flags; + /* + * Disabling irqs at this point since the following code could be + * interrupted and executed through kvm_arch_hardware_disable() + */ + local_irq_save(flags); + if (locals->registered) { + locals->registered = false; + user_return_notifier_unregister(urn); + } + local_irq_restore(flags); for (slot = 0; slot < shared_msrs_global.nr; ++slot) { values = &locals->values[slot]; if (values->host != values->curr) { @@ -218,8 +229,6 @@ static void kvm_on_user_return(struct user_return_notifier *urn) values->curr = values->host; } } - locals->registered = false; - user_return_notifier_unregister(urn); } static void shared_msr_update(unsigned slot, u32 msr) -- cgit v1.2.3-59-g8ed1b From e3fd9a93a12a1020067a676e826877623cee8e2b Mon Sep 17 00:00:00 2001 From: Paolo Bonzini Date: Wed, 9 Nov 2016 17:48:15 +0100 Subject: kvm: kvmclock: let KVM_GET_CLOCK return whether the master clock is in use MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Userspace can read the exact value of kvmclock by reading the TSC and fetching the timekeeping parameters out of guest memory. This however is brittle and not necessary anymore with KVM 4.11. Provide a mechanism that lets userspace know if the new KVM_GET_CLOCK semantics are in effect, and---since we are at it---if the clock is stable across all VCPUs. Cc: Radim Krčmář Cc: Marcelo Tosatti Signed-off-by: Paolo Bonzini Signed-off-by: Radim Krčmář --- Documentation/virtual/kvm/api.txt | 11 +++++++++++ arch/x86/kvm/x86.c | 10 +++++++--- include/uapi/linux/kvm.h | 7 +++++++ 3 files changed, 25 insertions(+), 3 deletions(-) diff --git a/Documentation/virtual/kvm/api.txt b/Documentation/virtual/kvm/api.txt index 739db9ab16b2..6bbceb9a3a19 100644 --- a/Documentation/virtual/kvm/api.txt +++ b/Documentation/virtual/kvm/api.txt @@ -777,6 +777,17 @@ Gets the current timestamp of kvmclock as seen by the current guest. In conjunction with KVM_SET_CLOCK, it is used to ensure monotonicity on scenarios such as migration. +When KVM_CAP_ADJUST_CLOCK is passed to KVM_CHECK_EXTENSION, it returns the +set of bits that KVM can return in struct kvm_clock_data's flag member. + +The only flag defined now is KVM_CLOCK_TSC_STABLE. If set, the returned +value is the exact kvmclock value seen by all VCPUs at the instant +when KVM_GET_CLOCK was called. If clear, the returned value is simply +CLOCK_MONOTONIC plus a constant offset; the offset can be modified +with KVM_SET_CLOCK. KVM will try to make all VCPUs follow this clock, +but the exact value read by each VCPU could differ, because the host +TSC is not stable. + struct kvm_clock_data { __u64 clock; /* kvmclock current value */ __u32 flags; diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index 2f27af4f312a..3320804bb2ac 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -2610,7 +2610,6 @@ int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext) case KVM_CAP_PIT_STATE2: case KVM_CAP_SET_IDENTITY_MAP_ADDR: case KVM_CAP_XEN_HVM: - case KVM_CAP_ADJUST_CLOCK: case KVM_CAP_VCPU_EVENTS: case KVM_CAP_HYPERV: case KVM_CAP_HYPERV_VAPIC: @@ -2637,6 +2636,9 @@ int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext) #endif r = 1; break; + case KVM_CAP_ADJUST_CLOCK: + r = KVM_CLOCK_TSC_STABLE; + break; case KVM_CAP_X86_SMM: /* SMBASE is usually relocated above 1M on modern chipsets, * and SMM handlers might indeed rely on 4G segment limits, @@ -4117,9 +4119,11 @@ long kvm_arch_vm_ioctl(struct file *filp, struct kvm_clock_data user_ns; u64 now_ns; - now_ns = get_kvmclock_ns(kvm); + local_irq_disable(); + now_ns = __get_kvmclock_ns(kvm); user_ns.clock = now_ns; - user_ns.flags = 0; + user_ns.flags = kvm->arch.use_master_clock ? KVM_CLOCK_TSC_STABLE : 0; + local_irq_enable(); memset(&user_ns.pad, 0, sizeof(user_ns.pad)); r = -EFAULT; diff --git a/include/uapi/linux/kvm.h b/include/uapi/linux/kvm.h index 300ef255d1e0..4ee67cb99143 100644 --- a/include/uapi/linux/kvm.h +++ b/include/uapi/linux/kvm.h @@ -972,12 +972,19 @@ struct kvm_irqfd { __u8 pad[16]; }; +/* For KVM_CAP_ADJUST_CLOCK */ + +/* Do not use 1, KVM_CHECK_EXTENSION returned it before we had flags. */ +#define KVM_CLOCK_TSC_STABLE 2 + struct kvm_clock_data { __u64 clock; __u32 flags; __u32 pad[9]; }; +/* For KVM_CAP_SW_TLB */ + #define KVM_MMU_FSL_BOOKE_NOHV 0 #define KVM_MMU_FSL_BOOKE_HV 1 -- cgit v1.2.3-59-g8ed1b From 22583f0d9c85e60c9860bc8a0ebff59fe08be6d7 Mon Sep 17 00:00:00 2001 From: Paolo Bonzini Date: Thu, 17 Nov 2016 15:55:45 +0100 Subject: KVM: async_pf: avoid recursive flushing of work items MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This was reported by syzkaller: [ INFO: possible recursive locking detected ] 4.9.0-rc4+ #49 Not tainted --------------------------------------------- kworker/2:1/5658 is trying to acquire lock: ([ 1644.769018] (&work->work) [< inline >] list_empty include/linux/compiler.h:243 [] flush_work+0x0/0x660 kernel/workqueue.c:1511 but task is already holding lock: ([ 1644.769018] (&work->work) [] process_one_work+0x94b/0x1900 kernel/workqueue.c:2093 stack backtrace: CPU: 2 PID: 5658 Comm: kworker/2:1 Not tainted 4.9.0-rc4+ #49 Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS Bochs 01/01/2011 Workqueue: events async_pf_execute ffff8800676ff630 ffffffff81c2e46b ffffffff8485b930 ffff88006b1fc480 0000000000000000 ffffffff8485b930 ffff8800676ff7e0 ffffffff81339b27 ffff8800676ff7e8 0000000000000046 ffff88006b1fcce8 ffff88006b1fccf0 Call Trace: ... [] flush_work+0x93/0x660 kernel/workqueue.c:2846 [] __cancel_work_timer+0x17a/0x410 kernel/workqueue.c:2916 [] cancel_work_sync+0x17/0x20 kernel/workqueue.c:2951 [] kvm_clear_async_pf_completion_queue+0xd7/0x400 virt/kvm/async_pf.c:126 [< inline >] kvm_free_vcpus arch/x86/kvm/x86.c:7841 [] kvm_arch_destroy_vm+0x23d/0x620 arch/x86/kvm/x86.c:7946 [< inline >] kvm_destroy_vm virt/kvm/kvm_main.c:731 [] kvm_put_kvm+0x40e/0x790 virt/kvm/kvm_main.c:752 [] async_pf_execute+0x23d/0x4f0 virt/kvm/async_pf.c:111 [] process_one_work+0x9fc/0x1900 kernel/workqueue.c:2096 [] worker_thread+0xef/0x1480 kernel/workqueue.c:2230 [] kthread+0x244/0x2d0 kernel/kthread.c:209 [] ret_from_fork+0x2a/0x40 arch/x86/entry/entry_64.S:433 The reason is that kvm_put_kvm is causing the destruction of the VM, but the page fault is still on the ->queue list. The ->queue list is owned by the VCPU, not by the work items, so we cannot just add list_del to the work item. Instead, use work->vcpu to note async page faults that have been resolved and will be processed through the done list. There is no need to flush those. Cc: Dmitry Vyukov Signed-off-by: Paolo Bonzini Signed-off-by: Radim Krčmář --- virt/kvm/async_pf.c | 13 ++++++++++++- 1 file changed, 12 insertions(+), 1 deletion(-) diff --git a/virt/kvm/async_pf.c b/virt/kvm/async_pf.c index 8035cc1eb955..efeceb0a222d 100644 --- a/virt/kvm/async_pf.c +++ b/virt/kvm/async_pf.c @@ -91,6 +91,7 @@ static void async_pf_execute(struct work_struct *work) spin_lock(&vcpu->async_pf.lock); list_add_tail(&apf->link, &vcpu->async_pf.done); + apf->vcpu = NULL; spin_unlock(&vcpu->async_pf.lock); /* @@ -113,6 +114,8 @@ static void async_pf_execute(struct work_struct *work) void kvm_clear_async_pf_completion_queue(struct kvm_vcpu *vcpu) { + spin_lock(&vcpu->async_pf.lock); + /* cancel outstanding work queue item */ while (!list_empty(&vcpu->async_pf.queue)) { struct kvm_async_pf *work = @@ -120,6 +123,14 @@ void kvm_clear_async_pf_completion_queue(struct kvm_vcpu *vcpu) typeof(*work), queue); list_del(&work->queue); + /* + * We know it's present in vcpu->async_pf.done, do + * nothing here. + */ + if (!work->vcpu) + continue; + + spin_unlock(&vcpu->async_pf.lock); #ifdef CONFIG_KVM_ASYNC_PF_SYNC flush_work(&work->work); #else @@ -129,9 +140,9 @@ void kvm_clear_async_pf_completion_queue(struct kvm_vcpu *vcpu) kmem_cache_free(async_pf_cache, work); } #endif + spin_lock(&vcpu->async_pf.lock); } - spin_lock(&vcpu->async_pf.lock); while (!list_empty(&vcpu->async_pf.done)) { struct kvm_async_pf *work = list_first_entry(&vcpu->async_pf.done, -- cgit v1.2.3-59-g8ed1b From 7301d6abaea926d685832f7e1f0c37dd206b01f4 Mon Sep 17 00:00:00 2001 From: Paolo Bonzini Date: Thu, 17 Nov 2016 15:55:46 +0100 Subject: KVM: x86: fix missed SRCU usage in kvm_lapic_set_vapic_addr MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Reported by syzkaller: [ INFO: suspicious RCU usage. ] 4.9.0-rc4+ #47 Not tainted ------------------------------- ./include/linux/kvm_host.h:536 suspicious rcu_dereference_check() usage! stack backtrace: CPU: 1 PID: 6679 Comm: syz-executor Not tainted 4.9.0-rc4+ #47 Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS Bochs 01/01/2011 ffff880039e2f6d0 ffffffff81c2e46b ffff88003e3a5b40 0000000000000000 0000000000000001 ffffffff83215600 ffff880039e2f700 ffffffff81334ea9 ffffc9000730b000 0000000000000004 ffff88003c4f8420 ffff88003d3f8000 Call Trace: [< inline >] __dump_stack lib/dump_stack.c:15 [] dump_stack+0xb3/0x118 lib/dump_stack.c:51 [] lockdep_rcu_suspicious+0x139/0x180 kernel/locking/lockdep.c:4445 [< inline >] __kvm_memslots include/linux/kvm_host.h:534 [< inline >] kvm_memslots include/linux/kvm_host.h:541 [] kvm_gfn_to_hva_cache_init+0xa1e/0xce0 virt/kvm/kvm_main.c:1941 [] kvm_lapic_set_vapic_addr+0xed/0x140 arch/x86/kvm/lapic.c:2217 Reported-by: Dmitry Vyukov Fixes: fda4e2e85589191b123d31cdc21fd33ee70f50fd Cc: Andrew Honig Cc: stable@vger.kernel.org Signed-off-by: Paolo Bonzini Reviewed-by: David Hildenbrand Signed-off-by: Radim Krčmář --- arch/x86/kvm/x86.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index 3320804bb2ac..04c5d96b1d67 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -3431,6 +3431,7 @@ long kvm_arch_vcpu_ioctl(struct file *filp, }; case KVM_SET_VAPIC_ADDR: { struct kvm_vapic_addr va; + int idx; r = -EINVAL; if (!lapic_in_kernel(vcpu)) @@ -3438,7 +3439,9 @@ long kvm_arch_vcpu_ioctl(struct file *filp, r = -EFAULT; if (copy_from_user(&va, argp, sizeof va)) goto out; + idx = srcu_read_lock(&vcpu->kvm->srcu); r = kvm_lapic_set_vapic_addr(vcpu, va.vapic_addr); + srcu_read_unlock(&vcpu->kvm->srcu, idx); break; } case KVM_X86_SETUP_MCE: { -- cgit v1.2.3-59-g8ed1b From a2b07739ff5ded8ca7e9c7ff0749ed6f0d36aee2 Mon Sep 17 00:00:00 2001 From: Paolo Bonzini Date: Thu, 17 Nov 2016 15:55:47 +0100 Subject: kvm: x86: merge kvm_arch_set_irq and kvm_arch_set_irq_inatomic MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit kvm_arch_set_irq is unused since commit b97e6de9c96. Merge its functionality with kvm_arch_set_irq_inatomic. Reported-by: Jiang Biao Signed-off-by: Paolo Bonzini Reviewed-by: David Hildenbrand Signed-off-by: Radim Krčmář --- arch/x86/kvm/irq_comm.c | 58 +++++++++++++++++++++++-------------------------- 1 file changed, 27 insertions(+), 31 deletions(-) diff --git a/arch/x86/kvm/irq_comm.c b/arch/x86/kvm/irq_comm.c index 25810b144b58..4da03030d5a7 100644 --- a/arch/x86/kvm/irq_comm.c +++ b/arch/x86/kvm/irq_comm.c @@ -156,6 +156,16 @@ int kvm_set_msi(struct kvm_kernel_irq_routing_entry *e, } +static int kvm_hv_set_sint(struct kvm_kernel_irq_routing_entry *e, + struct kvm *kvm, int irq_source_id, int level, + bool line_status) +{ + if (!level) + return -1; + + return kvm_hv_synic_set_irq(kvm, e->hv_sint.vcpu, e->hv_sint.sint); +} + int kvm_arch_set_irq_inatomic(struct kvm_kernel_irq_routing_entry *e, struct kvm *kvm, int irq_source_id, int level, bool line_status) @@ -163,18 +173,26 @@ int kvm_arch_set_irq_inatomic(struct kvm_kernel_irq_routing_entry *e, struct kvm_lapic_irq irq; int r; - if (unlikely(e->type != KVM_IRQ_ROUTING_MSI)) - return -EWOULDBLOCK; + switch (e->type) { + case KVM_IRQ_ROUTING_HV_SINT: + return kvm_hv_set_sint(e, kvm, irq_source_id, level, + line_status); - if (kvm_msi_route_invalid(kvm, e)) - return -EINVAL; + case KVM_IRQ_ROUTING_MSI: + if (kvm_msi_route_invalid(kvm, e)) + return -EINVAL; - kvm_set_msi_irq(kvm, e, &irq); + kvm_set_msi_irq(kvm, e, &irq); - if (kvm_irq_delivery_to_apic_fast(kvm, NULL, &irq, &r, NULL)) - return r; - else - return -EWOULDBLOCK; + if (kvm_irq_delivery_to_apic_fast(kvm, NULL, &irq, &r, NULL)) + return r; + break; + + default: + break; + } + + return -EWOULDBLOCK; } int kvm_request_irq_source_id(struct kvm *kvm) @@ -254,16 +272,6 @@ void kvm_fire_mask_notifiers(struct kvm *kvm, unsigned irqchip, unsigned pin, srcu_read_unlock(&kvm->irq_srcu, idx); } -static int kvm_hv_set_sint(struct kvm_kernel_irq_routing_entry *e, - struct kvm *kvm, int irq_source_id, int level, - bool line_status) -{ - if (!level) - return -1; - - return kvm_hv_synic_set_irq(kvm, e->hv_sint.vcpu, e->hv_sint.sint); -} - int kvm_set_routing_entry(struct kvm *kvm, struct kvm_kernel_irq_routing_entry *e, const struct kvm_irq_routing_entry *ue) @@ -423,18 +431,6 @@ void kvm_scan_ioapic_routes(struct kvm_vcpu *vcpu, srcu_read_unlock(&kvm->irq_srcu, idx); } -int kvm_arch_set_irq(struct kvm_kernel_irq_routing_entry *irq, struct kvm *kvm, - int irq_source_id, int level, bool line_status) -{ - switch (irq->type) { - case KVM_IRQ_ROUTING_HV_SINT: - return kvm_hv_set_sint(irq, kvm, irq_source_id, level, - line_status); - default: - return -EWOULDBLOCK; - } -} - void kvm_arch_irq_routing_update(struct kvm *kvm) { kvm_hv_irq_routing_update(kvm); -- cgit v1.2.3-59-g8ed1b From ad092de60f865c1ad94221bd06d381ecea446cc8 Mon Sep 17 00:00:00 2001 From: Alex Hemme Date: Sat, 19 Nov 2016 10:48:38 +0100 Subject: i2c: i2c-mux-pca954x: fix deselect enabling for device-tree Deselect functionality can be ignored for device-trees with "i2c-mux-idle-disconnect" entries if no platform_data is available. By enabling the deselect functionality outside the platform_data block the logic works as it did in previous kernels. Fixes: 7fcac9807175 ("i2c: i2c-mux-pca954x: convert to use an explicit i2c mux core") Cc: # v4.7+ Signed-off-by: Alex Hemme Signed-off-by: Ziyang Wu [touched up a few minor issues /peda] Signed-off-by: Peter Rosin Signed-off-by: Wolfram Sang --- drivers/i2c/muxes/i2c-mux-pca954x.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/i2c/muxes/i2c-mux-pca954x.c b/drivers/i2c/muxes/i2c-mux-pca954x.c index 1091346f2480..8bc3d36d2837 100644 --- a/drivers/i2c/muxes/i2c-mux-pca954x.c +++ b/drivers/i2c/muxes/i2c-mux-pca954x.c @@ -268,9 +268,9 @@ static int pca954x_probe(struct i2c_client *client, /* discard unconfigured channels */ break; idle_disconnect_pd = pdata->modes[num].deselect_on_exit; - data->deselect |= (idle_disconnect_pd - || idle_disconnect_dt) << num; } + data->deselect |= (idle_disconnect_pd || + idle_disconnect_dt) << num; ret = i2c_mux_add_adapter(muxc, force, num, class); -- cgit v1.2.3-59-g8ed1b From 3c7018ebf8dbf14e7cd4f5dc648c51fc979f45bb Mon Sep 17 00:00:00 2001 From: Eric Biggers Date: Sun, 13 Nov 2016 20:35:52 -0500 Subject: fscrypto: don't use on-stack buffer for filename encryption With the new (in 4.9) option to use a virtually-mapped stack (CONFIG_VMAP_STACK), stack buffers cannot be used as input/output for the scatterlist crypto API because they may not be directly mappable to struct page. For short filenames, fname_encrypt() was encrypting a stack buffer holding the padded filename. Fix it by encrypting the filename in-place in the output buffer, thereby making the temporary buffer unnecessary. This bug could most easily be observed in a CONFIG_DEBUG_SG kernel because this allowed the BUG in sg_set_buf() to be triggered. Cc: stable@vger.kernel.org Signed-off-by: Eric Biggers Signed-off-by: Theodore Ts'o --- fs/crypto/fname.c | 53 +++++++++++++++++++++-------------------------------- 1 file changed, 21 insertions(+), 32 deletions(-) diff --git a/fs/crypto/fname.c b/fs/crypto/fname.c index 9a28133ac3b8..9b774f4b50c8 100644 --- a/fs/crypto/fname.c +++ b/fs/crypto/fname.c @@ -39,65 +39,54 @@ static void fname_crypt_complete(struct crypto_async_request *req, int res) static int fname_encrypt(struct inode *inode, const struct qstr *iname, struct fscrypt_str *oname) { - u32 ciphertext_len; struct skcipher_request *req = NULL; DECLARE_FS_COMPLETION_RESULT(ecr); struct fscrypt_info *ci = inode->i_crypt_info; struct crypto_skcipher *tfm = ci->ci_ctfm; int res = 0; char iv[FS_CRYPTO_BLOCK_SIZE]; - struct scatterlist src_sg, dst_sg; + struct scatterlist sg; int padding = 4 << (ci->ci_flags & FS_POLICY_FLAGS_PAD_MASK); - char *workbuf, buf[32], *alloc_buf = NULL; - unsigned lim; + unsigned int lim; + unsigned int cryptlen; lim = inode->i_sb->s_cop->max_namelen(inode); if (iname->len <= 0 || iname->len > lim) return -EIO; - ciphertext_len = max(iname->len, (u32)FS_CRYPTO_BLOCK_SIZE); - ciphertext_len = round_up(ciphertext_len, padding); - ciphertext_len = min(ciphertext_len, lim); + /* + * Copy the filename to the output buffer for encrypting in-place and + * pad it with the needed number of NUL bytes. + */ + cryptlen = max_t(unsigned int, iname->len, FS_CRYPTO_BLOCK_SIZE); + cryptlen = round_up(cryptlen, padding); + cryptlen = min(cryptlen, lim); + memcpy(oname->name, iname->name, iname->len); + memset(oname->name + iname->len, 0, cryptlen - iname->len); - if (ciphertext_len <= sizeof(buf)) { - workbuf = buf; - } else { - alloc_buf = kmalloc(ciphertext_len, GFP_NOFS); - if (!alloc_buf) - return -ENOMEM; - workbuf = alloc_buf; - } + /* Initialize the IV */ + memset(iv, 0, FS_CRYPTO_BLOCK_SIZE); - /* Allocate request */ + /* Set up the encryption request */ req = skcipher_request_alloc(tfm, GFP_NOFS); if (!req) { printk_ratelimited(KERN_ERR - "%s: crypto_request_alloc() failed\n", __func__); - kfree(alloc_buf); + "%s: skcipher_request_alloc() failed\n", __func__); return -ENOMEM; } skcipher_request_set_callback(req, CRYPTO_TFM_REQ_MAY_BACKLOG | CRYPTO_TFM_REQ_MAY_SLEEP, fname_crypt_complete, &ecr); + sg_init_one(&sg, oname->name, cryptlen); + skcipher_request_set_crypt(req, &sg, &sg, cryptlen, iv); - /* Copy the input */ - memcpy(workbuf, iname->name, iname->len); - if (iname->len < ciphertext_len) - memset(workbuf + iname->len, 0, ciphertext_len - iname->len); - - /* Initialize IV */ - memset(iv, 0, FS_CRYPTO_BLOCK_SIZE); - - /* Create encryption request */ - sg_init_one(&src_sg, workbuf, ciphertext_len); - sg_init_one(&dst_sg, oname->name, ciphertext_len); - skcipher_request_set_crypt(req, &src_sg, &dst_sg, ciphertext_len, iv); + /* Do the encryption */ res = crypto_skcipher_encrypt(req); if (res == -EINPROGRESS || res == -EBUSY) { + /* Request is being completed asynchronously; wait for it */ wait_for_completion(&ecr.completion); res = ecr.res; } - kfree(alloc_buf); skcipher_request_free(req); if (res < 0) { printk_ratelimited(KERN_ERR @@ -105,7 +94,7 @@ static int fname_encrypt(struct inode *inode, return res; } - oname->len = ciphertext_len; + oname->len = cryptlen; return 0; } -- cgit v1.2.3-59-g8ed1b From 0f0909e242f73c1154272cf04f07fc9afe13e5b8 Mon Sep 17 00:00:00 2001 From: Eric Biggers Date: Sun, 13 Nov 2016 20:41:09 -0500 Subject: fscrypto: don't use on-stack buffer for key derivation With the new (in 4.9) option to use a virtually-mapped stack (CONFIG_VMAP_STACK), stack buffers cannot be used as input/output for the scatterlist crypto API because they may not be directly mappable to struct page. get_crypt_info() was using a stack buffer to hold the output from the encryption operation used to derive the per-file key. Fix it by using a heap buffer. This bug could most easily be observed in a CONFIG_DEBUG_SG kernel because this allowed the BUG in sg_set_buf() to be triggered. Cc: stable@vger.kernel.org Signed-off-by: Eric Biggers Signed-off-by: Theodore Ts'o --- fs/crypto/keyinfo.c | 16 +++++++++++++--- 1 file changed, 13 insertions(+), 3 deletions(-) diff --git a/fs/crypto/keyinfo.c b/fs/crypto/keyinfo.c index 82f0285f5d08..67fb6d8876d0 100644 --- a/fs/crypto/keyinfo.c +++ b/fs/crypto/keyinfo.c @@ -185,7 +185,7 @@ int get_crypt_info(struct inode *inode) struct crypto_skcipher *ctfm; const char *cipher_str; int keysize; - u8 raw_key[FS_MAX_KEY_SIZE]; + u8 *raw_key = NULL; int res; res = fscrypt_initialize(); @@ -238,6 +238,15 @@ retry: if (res) goto out; + /* + * This cannot be a stack buffer because it is passed to the scatterlist + * crypto API as part of key derivation. + */ + res = -ENOMEM; + raw_key = kmalloc(FS_MAX_KEY_SIZE, GFP_NOFS); + if (!raw_key) + goto out; + if (fscrypt_dummy_context_enabled(inode)) { memset(raw_key, 0x42, FS_AES_256_XTS_KEY_SIZE); goto got_key; @@ -276,7 +285,8 @@ got_key: if (res) goto out; - memzero_explicit(raw_key, sizeof(raw_key)); + kzfree(raw_key); + raw_key = NULL; if (cmpxchg(&inode->i_crypt_info, NULL, crypt_info) != NULL) { put_crypt_info(crypt_info); goto retry; @@ -287,7 +297,7 @@ out: if (res == -ENOKEY) res = 0; put_crypt_info(crypt_info); - memzero_explicit(raw_key, sizeof(raw_key)); + kzfree(raw_key); return res; } -- cgit v1.2.3-59-g8ed1b From 8cdf3372fe8368f56315e66bea9f35053c418093 Mon Sep 17 00:00:00 2001 From: Theodore Ts'o Date: Fri, 18 Nov 2016 13:00:24 -0500 Subject: ext4: sanity check the block and cluster size at mount time If the block size or cluster size is insane, reject the mount. This is important for security reasons (although we shouldn't be just depending on this check). Ref: http://www.securityfocus.com/archive/1/539661 Ref: https://bugzilla.redhat.com/show_bug.cgi?id=1332506 Reported-by: Borislav Petkov Reported-by: Nikolay Borisov Signed-off-by: Theodore Ts'o Cc: stable@vger.kernel.org --- fs/ext4/ext4.h | 1 + fs/ext4/super.c | 17 ++++++++++++++++- 2 files changed, 17 insertions(+), 1 deletion(-) diff --git a/fs/ext4/ext4.h b/fs/ext4/ext4.h index 282a51b07c57..a8a750f59621 100644 --- a/fs/ext4/ext4.h +++ b/fs/ext4/ext4.h @@ -235,6 +235,7 @@ struct ext4_io_submit { #define EXT4_MAX_BLOCK_SIZE 65536 #define EXT4_MIN_BLOCK_LOG_SIZE 10 #define EXT4_MAX_BLOCK_LOG_SIZE 16 +#define EXT4_MAX_CLUSTER_LOG_SIZE 30 #ifdef __KERNEL__ # define EXT4_BLOCK_SIZE(s) ((s)->s_blocksize) #else diff --git a/fs/ext4/super.c b/fs/ext4/super.c index 20da99da0a34..52b0530c5d65 100644 --- a/fs/ext4/super.c +++ b/fs/ext4/super.c @@ -3565,7 +3565,15 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent) if (blocksize < EXT4_MIN_BLOCK_SIZE || blocksize > EXT4_MAX_BLOCK_SIZE) { ext4_msg(sb, KERN_ERR, - "Unsupported filesystem blocksize %d", blocksize); + "Unsupported filesystem blocksize %d (%d log_block_size)", + blocksize, le32_to_cpu(es->s_log_block_size)); + goto failed_mount; + } + if (le32_to_cpu(es->s_log_block_size) > + (EXT4_MAX_BLOCK_LOG_SIZE - EXT4_MIN_BLOCK_LOG_SIZE)) { + ext4_msg(sb, KERN_ERR, + "Invalid log block size: %u", + le32_to_cpu(es->s_log_block_size)); goto failed_mount; } @@ -3697,6 +3705,13 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent) "block size (%d)", clustersize, blocksize); goto failed_mount; } + if (le32_to_cpu(es->s_log_cluster_size) > + (EXT4_MAX_CLUSTER_LOG_SIZE - EXT4_MIN_BLOCK_LOG_SIZE)) { + ext4_msg(sb, KERN_ERR, + "Invalid log cluster size: %u", + le32_to_cpu(es->s_log_cluster_size)); + goto failed_mount; + } sbi->s_cluster_bits = le32_to_cpu(es->s_log_cluster_size) - le32_to_cpu(es->s_log_block_size); sbi->s_clusters_per_group = -- cgit v1.2.3-59-g8ed1b From 32c231164b762dddefa13af5a0101032c70b50ef Mon Sep 17 00:00:00 2001 From: Guillaume Nault Date: Fri, 18 Nov 2016 22:13:00 +0100 Subject: l2tp: fix racy SOCK_ZAPPED flag check in l2tp_ip{,6}_bind() Lock socket before checking the SOCK_ZAPPED flag in l2tp_ip6_bind(). Without lock, a concurrent call could modify the socket flags between the sock_flag(sk, SOCK_ZAPPED) test and the lock_sock() call. This way, a socket could be inserted twice in l2tp_ip6_bind_table. Releasing it would then leave a stale pointer there, generating use-after-free errors when walking through the list or modifying adjacent entries. BUG: KASAN: use-after-free in l2tp_ip6_close+0x22e/0x290 at addr ffff8800081b0ed8 Write of size 8 by task syz-executor/10987 CPU: 0 PID: 10987 Comm: syz-executor Not tainted 4.8.0+ #39 Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS rel-1.8.2-0-g33fbe13 by qemu-project.org 04/01/2014 ffff880031d97838 ffffffff829f835b ffff88001b5a1640 ffff8800081b0ec0 ffff8800081b15a0 ffff8800081b6d20 ffff880031d97860 ffffffff8174d3cc ffff880031d978f0 ffff8800081b0e80 ffff88001b5a1640 ffff880031d978e0 Call Trace: [] dump_stack+0xb3/0x118 lib/dump_stack.c:15 [] kasan_object_err+0x1c/0x70 mm/kasan/report.c:156 [< inline >] print_address_description mm/kasan/report.c:194 [] kasan_report_error+0x1f6/0x4d0 mm/kasan/report.c:283 [< inline >] kasan_report mm/kasan/report.c:303 [] __asan_report_store8_noabort+0x3e/0x40 mm/kasan/report.c:329 [< inline >] __write_once_size ./include/linux/compiler.h:249 [< inline >] __hlist_del ./include/linux/list.h:622 [< inline >] hlist_del_init ./include/linux/list.h:637 [] l2tp_ip6_close+0x22e/0x290 net/l2tp/l2tp_ip6.c:239 [] inet_release+0xed/0x1c0 net/ipv4/af_inet.c:415 [] inet6_release+0x50/0x70 net/ipv6/af_inet6.c:422 [] sock_release+0x8d/0x1d0 net/socket.c:570 [] sock_close+0x16/0x20 net/socket.c:1017 [] __fput+0x28c/0x780 fs/file_table.c:208 [] ____fput+0x15/0x20 fs/file_table.c:244 [] task_work_run+0xf9/0x170 [] do_exit+0x85e/0x2a00 [] do_group_exit+0x108/0x330 [] get_signal+0x617/0x17a0 kernel/signal.c:2307 [] do_signal+0x7f/0x18f0 [] exit_to_usermode_loop+0xbf/0x150 arch/x86/entry/common.c:156 [< inline >] prepare_exit_to_usermode arch/x86/entry/common.c:190 [] syscall_return_slowpath+0x1a0/0x1e0 arch/x86/entry/common.c:259 [] entry_SYSCALL_64_fastpath+0xc4/0xc6 Object at ffff8800081b0ec0, in cache L2TP/IPv6 size: 1448 Allocated: PID = 10987 [ 1116.897025] [] save_stack_trace+0x16/0x20 [ 1116.897025] [] save_stack+0x46/0xd0 [ 1116.897025] [] kasan_kmalloc+0xad/0xe0 [ 1116.897025] [] kasan_slab_alloc+0x12/0x20 [ 1116.897025] [< inline >] slab_post_alloc_hook mm/slab.h:417 [ 1116.897025] [< inline >] slab_alloc_node mm/slub.c:2708 [ 1116.897025] [< inline >] slab_alloc mm/slub.c:2716 [ 1116.897025] [] kmem_cache_alloc+0xc8/0x2b0 mm/slub.c:2721 [ 1116.897025] [] sk_prot_alloc+0x69/0x2b0 net/core/sock.c:1326 [ 1116.897025] [] sk_alloc+0x38/0xae0 net/core/sock.c:1388 [ 1116.897025] [] inet6_create+0x2d7/0x1000 net/ipv6/af_inet6.c:182 [ 1116.897025] [] __sock_create+0x37b/0x640 net/socket.c:1153 [ 1116.897025] [< inline >] sock_create net/socket.c:1193 [ 1116.897025] [< inline >] SYSC_socket net/socket.c:1223 [ 1116.897025] [] SyS_socket+0xef/0x1b0 net/socket.c:1203 [ 1116.897025] [] entry_SYSCALL_64_fastpath+0x23/0xc6 Freed: PID = 10987 [ 1116.897025] [] save_stack_trace+0x16/0x20 [ 1116.897025] [] save_stack+0x46/0xd0 [ 1116.897025] [] kasan_slab_free+0x71/0xb0 [ 1116.897025] [< inline >] slab_free_hook mm/slub.c:1352 [ 1116.897025] [< inline >] slab_free_freelist_hook mm/slub.c:1374 [ 1116.897025] [< inline >] slab_free mm/slub.c:2951 [ 1116.897025] [] kmem_cache_free+0xc8/0x330 mm/slub.c:2973 [ 1116.897025] [< inline >] sk_prot_free net/core/sock.c:1369 [ 1116.897025] [] __sk_destruct+0x32b/0x4f0 net/core/sock.c:1444 [ 1116.897025] [] sk_destruct+0x44/0x80 net/core/sock.c:1452 [ 1116.897025] [] __sk_free+0x53/0x220 net/core/sock.c:1460 [ 1116.897025] [] sk_free+0x23/0x30 net/core/sock.c:1471 [ 1116.897025] [] sk_common_release+0x28c/0x3e0 ./include/net/sock.h:1589 [ 1116.897025] [] l2tp_ip6_close+0x1fe/0x290 net/l2tp/l2tp_ip6.c:243 [ 1116.897025] [] inet_release+0xed/0x1c0 net/ipv4/af_inet.c:415 [ 1116.897025] [] inet6_release+0x50/0x70 net/ipv6/af_inet6.c:422 [ 1116.897025] [] sock_release+0x8d/0x1d0 net/socket.c:570 [ 1116.897025] [] sock_close+0x16/0x20 net/socket.c:1017 [ 1116.897025] [] __fput+0x28c/0x780 fs/file_table.c:208 [ 1116.897025] [] ____fput+0x15/0x20 fs/file_table.c:244 [ 1116.897025] [] task_work_run+0xf9/0x170 [ 1116.897025] [] do_exit+0x85e/0x2a00 [ 1116.897025] [] do_group_exit+0x108/0x330 [ 1116.897025] [] get_signal+0x617/0x17a0 kernel/signal.c:2307 [ 1116.897025] [] do_signal+0x7f/0x18f0 [ 1116.897025] [] exit_to_usermode_loop+0xbf/0x150 arch/x86/entry/common.c:156 [ 1116.897025] [< inline >] prepare_exit_to_usermode arch/x86/entry/common.c:190 [ 1116.897025] [] syscall_return_slowpath+0x1a0/0x1e0 arch/x86/entry/common.c:259 [ 1116.897025] [] entry_SYSCALL_64_fastpath+0xc4/0xc6 Memory state around the buggy address: ffff8800081b0d80: fc fc fc fc fc fc fc fc fc fc fc fc fc fc fc fc ffff8800081b0e00: fc fc fc fc fc fc fc fc fc fc fc fc fc fc fc fc >ffff8800081b0e80: fc fc fc fc fc fc fc fc fb fb fb fb fb fb fb fb ^ ffff8800081b0f00: fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb ffff8800081b0f80: fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb ================================================================== The same issue exists with l2tp_ip_bind() and l2tp_ip_bind_table. Fixes: c51ce49735c1 ("l2tp: fix oops in L2TP IP sockets for connect() AF_UNSPEC case") Reported-by: Baozeng Ding Reported-by: Andrey Konovalov Tested-by: Baozeng Ding Signed-off-by: Guillaume Nault Signed-off-by: David S. Miller --- net/l2tp/l2tp_ip.c | 5 +++-- net/l2tp/l2tp_ip6.c | 5 +++-- 2 files changed, 6 insertions(+), 4 deletions(-) diff --git a/net/l2tp/l2tp_ip.c b/net/l2tp/l2tp_ip.c index fce25afb652a..982f6c44ea01 100644 --- a/net/l2tp/l2tp_ip.c +++ b/net/l2tp/l2tp_ip.c @@ -251,8 +251,6 @@ static int l2tp_ip_bind(struct sock *sk, struct sockaddr *uaddr, int addr_len) int ret; int chk_addr_ret; - if (!sock_flag(sk, SOCK_ZAPPED)) - return -EINVAL; if (addr_len < sizeof(struct sockaddr_l2tpip)) return -EINVAL; if (addr->l2tp_family != AF_INET) @@ -267,6 +265,9 @@ static int l2tp_ip_bind(struct sock *sk, struct sockaddr *uaddr, int addr_len) read_unlock_bh(&l2tp_ip_lock); lock_sock(sk); + if (!sock_flag(sk, SOCK_ZAPPED)) + goto out; + if (sk->sk_state != TCP_CLOSE || addr_len < sizeof(struct sockaddr_l2tpip)) goto out; diff --git a/net/l2tp/l2tp_ip6.c b/net/l2tp/l2tp_ip6.c index ad3468c32b53..9978d01ba0ba 100644 --- a/net/l2tp/l2tp_ip6.c +++ b/net/l2tp/l2tp_ip6.c @@ -269,8 +269,6 @@ static int l2tp_ip6_bind(struct sock *sk, struct sockaddr *uaddr, int addr_len) int addr_type; int err; - if (!sock_flag(sk, SOCK_ZAPPED)) - return -EINVAL; if (addr->l2tp_family != AF_INET6) return -EINVAL; if (addr_len < sizeof(*addr)) @@ -296,6 +294,9 @@ static int l2tp_ip6_bind(struct sock *sk, struct sockaddr *uaddr, int addr_len) lock_sock(sk); err = -EINVAL; + if (!sock_flag(sk, SOCK_ZAPPED)) + goto out_unlock; + if (sk->sk_state != TCP_CLOSE) goto out_unlock; -- cgit v1.2.3-59-g8ed1b From 3f0ae05d6fea0ed5b19efdbc9c9f8e02685a3af3 Mon Sep 17 00:00:00 2001 From: Zhang Shengju Date: Sat, 19 Nov 2016 23:28:32 +0800 Subject: rtnl: fix the loop index update error in rtnl_dump_ifinfo() If the link is filtered out, loop index should also be updated. If not, loop index will not be correct. Fixes: dc599f76c22b0 ("net: Add support for filtering link dump by master device and kind") Signed-off-by: Zhang Shengju Acked-by: David Ahern Signed-off-by: David S. Miller --- net/core/rtnetlink.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c index 2b9d7d08ed4d..a99917b5de33 100644 --- a/net/core/rtnetlink.c +++ b/net/core/rtnetlink.c @@ -1609,7 +1609,7 @@ static int rtnl_dump_ifinfo(struct sk_buff *skb, struct netlink_callback *cb) head = &net->dev_index_head[h]; hlist_for_each_entry(dev, head, index_hlist) { if (link_dump_filtered(dev, master_idx, kind_ops)) - continue; + goto cont; if (idx < s_idx) goto cont; err = rtnl_fill_ifinfo(skb, dev, RTM_NEWLINK, -- cgit v1.2.3-59-g8ed1b From 51b9a31c42edcd089f5b229633477ab5128faf03 Mon Sep 17 00:00:00 2001 From: Jon Paul Maloy Date: Sat, 19 Nov 2016 14:47:07 -0500 Subject: tipc: eliminate obsolete socket locking policy description The comment block in socket.c describing the locking policy is obsolete, and does not reflect current reality. We remove it in this commit. Since the current locking policy is much simpler and follows a mainstream approach, we see no need to add a new description. Signed-off-by: Jon Maloy Signed-off-by: David S. Miller --- net/tipc/socket.c | 48 +----------------------------------------------- 1 file changed, 1 insertion(+), 47 deletions(-) diff --git a/net/tipc/socket.c b/net/tipc/socket.c index f9f5f3c3dab5..db32777ab591 100644 --- a/net/tipc/socket.c +++ b/net/tipc/socket.c @@ -1,7 +1,7 @@ /* * net/tipc/socket.c: TIPC socket API * - * Copyright (c) 2001-2007, 2012-2015, Ericsson AB + * Copyright (c) 2001-2007, 2012-2016, Ericsson AB * Copyright (c) 2004-2008, 2010-2013, Wind River Systems * All rights reserved. * @@ -129,54 +129,8 @@ static const struct proto_ops packet_ops; static const struct proto_ops stream_ops; static const struct proto_ops msg_ops; static struct proto tipc_proto; - static const struct rhashtable_params tsk_rht_params; -/* - * Revised TIPC socket locking policy: - * - * Most socket operations take the standard socket lock when they start - * and hold it until they finish (or until they need to sleep). Acquiring - * this lock grants the owner exclusive access to the fields of the socket - * data structures, with the exception of the backlog queue. A few socket - * operations can be done without taking the socket lock because they only - * read socket information that never changes during the life of the socket. - * - * Socket operations may acquire the lock for the associated TIPC port if they - * need to perform an operation on the port. If any routine needs to acquire - * both the socket lock and the port lock it must take the socket lock first - * to avoid the risk of deadlock. - * - * The dispatcher handling incoming messages cannot grab the socket lock in - * the standard fashion, since invoked it runs at the BH level and cannot block. - * Instead, it checks to see if the socket lock is currently owned by someone, - * and either handles the message itself or adds it to the socket's backlog - * queue; in the latter case the queued message is processed once the process - * owning the socket lock releases it. - * - * NOTE: Releasing the socket lock while an operation is sleeping overcomes - * the problem of a blocked socket operation preventing any other operations - * from occurring. However, applications must be careful if they have - * multiple threads trying to send (or receive) on the same socket, as these - * operations might interfere with each other. For example, doing a connect - * and a receive at the same time might allow the receive to consume the - * ACK message meant for the connect. While additional work could be done - * to try and overcome this, it doesn't seem to be worthwhile at the present. - * - * NOTE: Releasing the socket lock while an operation is sleeping also ensures - * that another operation that must be performed in a non-blocking manner is - * not delayed for very long because the lock has already been taken. - * - * NOTE: This code assumes that certain fields of a port/socket pair are - * constant over its lifetime; such fields can be examined without taking - * the socket lock and/or port lock, and do not need to be re-read even - * after resuming processing after waiting. These fields include: - * - socket type - * - pointer to socket sk structure (aka tipc_sock structure) - * - pointer to port structure - * - port reference - */ - static u32 tsk_own_node(struct tipc_sock *tsk) { return msg_prevnode(&tsk->phdr); -- cgit v1.2.3-59-g8ed1b From 9c763584b7c8911106bb77af7e648bef09af9d80 Mon Sep 17 00:00:00 2001 From: Linus Torvalds Date: Sun, 20 Nov 2016 13:52:19 -0800 Subject: Linux 4.9-rc6 --- Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Makefile b/Makefile index 9bc877d073d7..0ede48ba5aaf 100644 --- a/Makefile +++ b/Makefile @@ -1,7 +1,7 @@ VERSION = 4 PATCHLEVEL = 9 SUBLEVEL = 0 -EXTRAVERSION = -rc5 +EXTRAVERSION = -rc6 NAME = Psychotic Stoned Sheep # *DOCUMENTATION* -- cgit v1.2.3-59-g8ed1b From 3d40658c977769ce2138f286cf131537bf68bdfe Mon Sep 17 00:00:00 2001 From: John Johansen Date: Wed, 31 Aug 2016 21:10:06 -0700 Subject: apparmor: fix change_hat not finding hat after policy replacement After a policy replacement, the task cred may be out of date and need to be updated. However change_hat is using the stale profiles from the out of date cred resulting in either: a stale profile being applied or, incorrect failure when searching for a hat profile as it has been migrated to the new parent profile. Fixes: 01e2b670aa898a39259bc85c78e3d74820f4d3b6 (failure to find hat) Fixes: 898127c34ec03291c86f4ff3856d79e9e18952bc (stale policy being applied) Bugzilla: https://bugzilla.suse.com/show_bug.cgi?id=1000287 Cc: stable@vger.kernel.org Signed-off-by: John Johansen Signed-off-by: James Morris --- security/apparmor/domain.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/security/apparmor/domain.c b/security/apparmor/domain.c index fc3036b34e51..a4d90aa1045a 100644 --- a/security/apparmor/domain.c +++ b/security/apparmor/domain.c @@ -621,8 +621,8 @@ int aa_change_hat(const char *hats[], int count, u64 token, bool permtest) /* released below */ cred = get_current_cred(); cxt = cred_cxt(cred); - profile = aa_cred_profile(cred); - previous_profile = cxt->previous; + profile = aa_get_newest_profile(aa_cred_profile(cred)); + previous_profile = aa_get_newest_profile(cxt->previous); if (unconfined(profile)) { info = "unconfined"; @@ -718,6 +718,8 @@ audit: out: aa_put_profile(hat); kfree(name); + aa_put_profile(profile); + aa_put_profile(previous_profile); put_cred(cred); return error; -- cgit v1.2.3-59-g8ed1b From 6bc5445c0180a0c7cc61a95d131c7eac66459692 Mon Sep 17 00:00:00 2001 From: Peter Robinson Date: Sun, 20 Nov 2016 17:22:38 +0000 Subject: ethernet: stmmac: make DWMAC_STM32 depend on it's associated SoC There's not much point, except compile test, enabling the stmmac platform drivers unless the STM32 SoC is enabled. It's not useful without it. Signed-off-by: Peter Robinson Signed-off-by: David S. Miller --- drivers/net/ethernet/stmicro/stmmac/Kconfig | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ethernet/stmicro/stmmac/Kconfig b/drivers/net/ethernet/stmicro/stmmac/Kconfig index 3818c5e06eba..4b78168a5f3c 100644 --- a/drivers/net/ethernet/stmicro/stmmac/Kconfig +++ b/drivers/net/ethernet/stmicro/stmmac/Kconfig @@ -107,7 +107,7 @@ config DWMAC_STI config DWMAC_STM32 tristate "STM32 DWMAC support" default ARCH_STM32 - depends on OF && HAS_IOMEM + depends on OF && HAS_IOMEM && (ARCH_STM32 || COMPILE_TEST) select MFD_SYSCON ---help--- Support for ethernet controller on STM32 SOCs. -- cgit v1.2.3-59-g8ed1b From 7c6ae610a1f0a9d3cebf790e0245b4e0f76aa86e Mon Sep 17 00:00:00 2001 From: Gao Feng Date: Mon, 21 Nov 2016 08:56:21 +0800 Subject: net: l2tp: Treat NET_XMIT_CN as success in l2tp_eth_dev_xmit The tc could return NET_XMIT_CN as one congestion notification, but it does not mean the packe is lost. Other modules like ipvlan, macvlan, and others treat NET_XMIT_CN as success too. So l2tp_eth_dev_xmit should add the NET_XMIT_CN check. Signed-off-by: Gao Feng Signed-off-by: David S. Miller --- net/l2tp/l2tp_eth.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/l2tp/l2tp_eth.c b/net/l2tp/l2tp_eth.c index 965f7e344cef..3dc97b4f982b 100644 --- a/net/l2tp/l2tp_eth.c +++ b/net/l2tp/l2tp_eth.c @@ -97,7 +97,7 @@ static int l2tp_eth_dev_xmit(struct sk_buff *skb, struct net_device *dev) unsigned int len = skb->len; int ret = l2tp_xmit_skb(session, skb, session->hdr_len); - if (likely(ret == NET_XMIT_SUCCESS)) { + if (likely(ret == NET_XMIT_SUCCESS || ret == NET_XMIT_CN)) { atomic_long_add(len, &priv->tx_bytes); atomic_long_inc(&priv->tx_packets); } else { -- cgit v1.2.3-59-g8ed1b From 7082c5c3f2407c52022507ffaf644dbbab97a883 Mon Sep 17 00:00:00 2001 From: Florian Westphal Date: Mon, 21 Nov 2016 10:08:37 +0100 Subject: tcp: zero ca_priv area when switching cc algorithms We need to zero out the private data area when application switches connection to different algorithm (TCP_CONGESTION setsockopt). When congestion ops get assigned at connect time everything is already zeroed because sk_alloc uses GFP_ZERO flag. But in the setsockopt case this contains whatever previous cc placed there. Signed-off-by: Florian Westphal Signed-off-by: David S. Miller --- net/ipv4/tcp_cong.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/net/ipv4/tcp_cong.c b/net/ipv4/tcp_cong.c index 1294af4e0127..f9038d6b109e 100644 --- a/net/ipv4/tcp_cong.c +++ b/net/ipv4/tcp_cong.c @@ -200,8 +200,10 @@ static void tcp_reinit_congestion_control(struct sock *sk, icsk->icsk_ca_ops = ca; icsk->icsk_ca_setsockopt = 1; - if (sk->sk_state != TCP_CLOSE) + if (sk->sk_state != TCP_CLOSE) { + memset(icsk->icsk_ca_priv, 0, sizeof(icsk->icsk_ca_priv)); tcp_init_congestion_control(sk); + } } /* Manage refcounts on socket close. */ -- cgit v1.2.3-59-g8ed1b