From 3182215dd0b2120fb942ed88430cfb7c12d583e0 Mon Sep 17 00:00:00 2001 From: Alistair Popple Date: Tue, 30 Oct 2018 22:02:03 +1100 Subject: powerpc/powernv/npu: Remove NPU DMA ops The NPU IOMMU is setup to mirror the parent PCIe device IOMMU setup. Therefore it does not make sense to call dma operations such as dma_map_page(), etc. directly on these devices. The existing dma_ops simply print a warning if they are ever called, however this is unnecessary and the warnings are likely to go unnoticed. It is instead simpler to remove these operations and let the generic DMA code print warnings (eg. via a NULL pointer deref) in cases of buggy drivers attempting dma operations on NVLink devices. Signed-off-by: Alistair Popple Signed-off-by: Michael Ellerman --- arch/powerpc/platforms/powernv/npu-dma.c | 64 ++------------------------------ 1 file changed, 4 insertions(+), 60 deletions(-) diff --git a/arch/powerpc/platforms/powernv/npu-dma.c b/arch/powerpc/platforms/powernv/npu-dma.c index 6f60e0931922..75b935252981 100644 --- a/arch/powerpc/platforms/powernv/npu-dma.c +++ b/arch/powerpc/platforms/powernv/npu-dma.c @@ -102,63 +102,6 @@ struct pci_dev *pnv_pci_get_npu_dev(struct pci_dev *gpdev, int index) } EXPORT_SYMBOL(pnv_pci_get_npu_dev); -#define NPU_DMA_OP_UNSUPPORTED() \ - dev_err_once(dev, "%s operation unsupported for NVLink devices\n", \ - __func__) - -static void *dma_npu_alloc(struct device *dev, size_t size, - dma_addr_t *dma_handle, gfp_t flag, - unsigned long attrs) -{ - NPU_DMA_OP_UNSUPPORTED(); - return NULL; -} - -static void dma_npu_free(struct device *dev, size_t size, - void *vaddr, dma_addr_t dma_handle, - unsigned long attrs) -{ - NPU_DMA_OP_UNSUPPORTED(); -} - -static dma_addr_t dma_npu_map_page(struct device *dev, struct page *page, - unsigned long offset, size_t size, - enum dma_data_direction direction, - unsigned long attrs) -{ - NPU_DMA_OP_UNSUPPORTED(); - return 0; -} - -static int dma_npu_map_sg(struct device *dev, struct scatterlist *sglist, - int nelems, enum dma_data_direction direction, - unsigned long attrs) -{ - NPU_DMA_OP_UNSUPPORTED(); - return 0; -} - -static int dma_npu_dma_supported(struct device *dev, u64 mask) -{ - NPU_DMA_OP_UNSUPPORTED(); - return 0; -} - -static u64 dma_npu_get_required_mask(struct device *dev) -{ - NPU_DMA_OP_UNSUPPORTED(); - return 0; -} - -static const struct dma_map_ops dma_npu_ops = { - .map_page = dma_npu_map_page, - .map_sg = dma_npu_map_sg, - .alloc = dma_npu_alloc, - .free = dma_npu_free, - .dma_supported = dma_npu_dma_supported, - .get_required_mask = dma_npu_get_required_mask, -}; - /* * Returns the PE assoicated with the PCI device of the given * NPU. Returns the linked pci device if pci_dev != NULL. @@ -270,10 +213,11 @@ static void pnv_npu_dma_set_32(struct pnv_ioda_pe *npe) rc = pnv_npu_set_window(npe, 0, gpe->table_group.tables[0]); /* - * We don't initialise npu_pe->tce32_table as we always use - * dma_npu_ops which are nops. + * NVLink devices use the same TCE table configuration as + * their parent device so drivers shouldn't be doing DMA + * operations directly on these devices. */ - set_dma_ops(&npe->pdev->dev, &dma_npu_ops); + set_dma_ops(&npe->pdev->dev, NULL); } /* -- cgit v1.2.3-59-g8ed1b From e39f9dd8206ad66992ac0e6218ef1ba746f2cce9 Mon Sep 17 00:00:00 2001 From: Jerome Brunet Date: Tue, 23 Oct 2018 18:03:19 +0200 Subject: pinctrl: meson: fix pinconf bias disable If a bias is enabled on a pin of an Amlogic SoC, calling .pin_config_set() with PIN_CONFIG_BIAS_DISABLE will not disable the bias. Instead it will force a pull-down bias on the pin. Instead of the pull type register bank, the driver should access the pull enable register bank. Fixes: 6ac730951104 ("pinctrl: add driver for Amlogic Meson SoCs") Signed-off-by: Jerome Brunet Acked-by: Neil Armstrong Signed-off-by: Linus Walleij --- drivers/pinctrl/meson/pinctrl-meson.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/pinctrl/meson/pinctrl-meson.c b/drivers/pinctrl/meson/pinctrl-meson.c index f8b778a7d471..53d449076dee 100644 --- a/drivers/pinctrl/meson/pinctrl-meson.c +++ b/drivers/pinctrl/meson/pinctrl-meson.c @@ -192,7 +192,7 @@ static int meson_pinconf_set(struct pinctrl_dev *pcdev, unsigned int pin, dev_dbg(pc->dev, "pin %u: disable bias\n", pin); meson_calc_reg_and_bit(bank, pin, REG_PULL, ®, &bit); - ret = regmap_update_bits(pc->reg_pull, reg, + ret = regmap_update_bits(pc->reg_pullen, reg, BIT(bit), 0); if (ret) return ret; -- cgit v1.2.3-59-g8ed1b From 4bc51e1e350cd4707ce6e551a93eae26d40b9889 Mon Sep 17 00:00:00 2001 From: Jerome Brunet Date: Mon, 29 Oct 2018 16:13:37 +0100 Subject: pinctrl: meson: fix gxbb ao pull register bits AO pull register definition is inverted between pull (up/down) and pull enable. Fixing this allows to properly apply bias setting through pinconf Fixes: 468c234f9ed7 ("pinctrl: amlogic: Add support for Amlogic Meson GXBB SoC") Signed-off-by: Jerome Brunet Signed-off-by: Linus Walleij --- drivers/pinctrl/meson/pinctrl-meson-gxbb.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/pinctrl/meson/pinctrl-meson-gxbb.c b/drivers/pinctrl/meson/pinctrl-meson-gxbb.c index 4ceb06f8a33c..4edeb4cae72a 100644 --- a/drivers/pinctrl/meson/pinctrl-meson-gxbb.c +++ b/drivers/pinctrl/meson/pinctrl-meson-gxbb.c @@ -830,7 +830,7 @@ static struct meson_bank meson_gxbb_periphs_banks[] = { static struct meson_bank meson_gxbb_aobus_banks[] = { /* name first last irq pullen pull dir out in */ - BANK("AO", GPIOAO_0, GPIOAO_13, 0, 13, 0, 0, 0, 16, 0, 0, 0, 16, 1, 0), + BANK("AO", GPIOAO_0, GPIOAO_13, 0, 13, 0, 16, 0, 0, 0, 0, 0, 16, 1, 0), }; static struct meson_pinctrl_data meson_gxbb_periphs_pinctrl_data = { -- cgit v1.2.3-59-g8ed1b From ed3a2b74f3eb34c84c8377353f4730f05acdfd05 Mon Sep 17 00:00:00 2001 From: Jerome Brunet Date: Mon, 29 Oct 2018 16:13:38 +0100 Subject: pinctrl: meson: fix gxl ao pull register bits AO pull register definition is inverted between pull (up/down) and pull enable. Fixing this allows to properly apply bias setting through pinconf Fixes: 0f15f500ff2c ("pinctrl: meson: Add GXL pinctrl definitions") Signed-off-by: Jerome Brunet Signed-off-by: Linus Walleij --- drivers/pinctrl/meson/pinctrl-meson-gxl.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/pinctrl/meson/pinctrl-meson-gxl.c b/drivers/pinctrl/meson/pinctrl-meson-gxl.c index 7dae1d7bf6b0..158f618f1695 100644 --- a/drivers/pinctrl/meson/pinctrl-meson-gxl.c +++ b/drivers/pinctrl/meson/pinctrl-meson-gxl.c @@ -807,7 +807,7 @@ static struct meson_bank meson_gxl_periphs_banks[] = { static struct meson_bank meson_gxl_aobus_banks[] = { /* name first last irq pullen pull dir out in */ - BANK("AO", GPIOAO_0, GPIOAO_9, 0, 9, 0, 0, 0, 16, 0, 0, 0, 16, 1, 0), + BANK("AO", GPIOAO_0, GPIOAO_9, 0, 9, 0, 16, 0, 0, 0, 0, 0, 16, 1, 0), }; static struct meson_pinctrl_data meson_gxl_periphs_pinctrl_data = { -- cgit v1.2.3-59-g8ed1b From e91b162d2868672d06010f34aa83d408db13d3c6 Mon Sep 17 00:00:00 2001 From: Jerome Brunet Date: Mon, 29 Oct 2018 16:13:39 +0100 Subject: pinctrl: meson: fix meson8 ao pull register bits AO pull register definition is inverted between pull (up/down) and pull enable. Fixing this allows to properly apply bias setting through pinconf Fixes: 6ac730951104 ("pinctrl: add driver for Amlogic Meson SoCs") Signed-off-by: Jerome Brunet Signed-off-by: Linus Walleij --- drivers/pinctrl/meson/pinctrl-meson8.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/pinctrl/meson/pinctrl-meson8.c b/drivers/pinctrl/meson/pinctrl-meson8.c index c6d79315218f..86466173114d 100644 --- a/drivers/pinctrl/meson/pinctrl-meson8.c +++ b/drivers/pinctrl/meson/pinctrl-meson8.c @@ -1053,7 +1053,7 @@ static struct meson_bank meson8_cbus_banks[] = { static struct meson_bank meson8_aobus_banks[] = { /* name first last irq pullen pull dir out in */ - BANK("AO", GPIOAO_0, GPIO_TEST_N, 0, 13, 0, 0, 0, 16, 0, 0, 0, 16, 1, 0), + BANK("AO", GPIOAO_0, GPIO_TEST_N, 0, 13, 0, 16, 0, 0, 0, 0, 0, 16, 1, 0), }; static struct meson_pinctrl_data meson8_cbus_pinctrl_data = { -- cgit v1.2.3-59-g8ed1b From a1705f02704cd8a24d434bfd0141ee8142ad277a Mon Sep 17 00:00:00 2001 From: Jerome Brunet Date: Mon, 29 Oct 2018 16:13:40 +0100 Subject: pinctrl: meson: fix meson8b ao pull register bits AO pull register definition is inverted between pull (up/down) and pull enable. Fixing this allows to properly apply bias setting through pinconf Fixes: 0fefcb6876d0 ("pinctrl: Add support for Meson8b") Signed-off-by: Jerome Brunet Signed-off-by: Linus Walleij --- drivers/pinctrl/meson/pinctrl-meson8b.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/pinctrl/meson/pinctrl-meson8b.c b/drivers/pinctrl/meson/pinctrl-meson8b.c index bb2a30964fc6..647ad15d5c3c 100644 --- a/drivers/pinctrl/meson/pinctrl-meson8b.c +++ b/drivers/pinctrl/meson/pinctrl-meson8b.c @@ -906,7 +906,7 @@ static struct meson_bank meson8b_cbus_banks[] = { static struct meson_bank meson8b_aobus_banks[] = { /* name first lastc irq pullen pull dir out in */ - BANK("AO", GPIOAO_0, GPIO_TEST_N, 0, 13, 0, 0, 0, 16, 0, 0, 0, 16, 1, 0), + BANK("AO", GPIOAO_0, GPIO_TEST_N, 0, 13, 0, 16, 0, 0, 0, 0, 0, 16, 1, 0), }; static struct meson_pinctrl_data meson8b_cbus_pinctrl_data = { -- cgit v1.2.3-59-g8ed1b From 5d7a5bcb67c70cbc904057ef52d3fcfeb24420bb Mon Sep 17 00:00:00 2001 From: Frank Sorenson Date: Tue, 30 Oct 2018 15:10:40 -0500 Subject: sunrpc: correct the computation for page_ptr when truncating When truncating the encode buffer, the page_ptr is getting advanced, causing the next page to be skipped while encoding. The page is still included in the response, so the response contains a page of bogus data. We need to adjust the page_ptr backwards to ensure we encode the next page into the correct place. We saw this triggered when concurrent directory modifications caused nfsd4_encode_direct_fattr() to return nfserr_noent, and the resulting call to xdr_truncate_encode() corrupted the READDIR reply. Signed-off-by: Frank Sorenson Cc: stable@vger.kernel.org Signed-off-by: J. Bruce Fields --- net/sunrpc/xdr.c | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/net/sunrpc/xdr.c b/net/sunrpc/xdr.c index 2bbb8d38d2bf..5cfb9e0a18dc 100644 --- a/net/sunrpc/xdr.c +++ b/net/sunrpc/xdr.c @@ -673,11 +673,10 @@ void xdr_truncate_encode(struct xdr_stream *xdr, size_t len) WARN_ON_ONCE(xdr->iov); return; } - if (fraglen) { + if (fraglen) xdr->end = head->iov_base + head->iov_len; - xdr->page_ptr--; - } /* (otherwise assume xdr->end is already set) */ + xdr->page_ptr--; head->iov_len = len; buf->len = len; xdr->p = head->iov_base + head->iov_len; -- cgit v1.2.3-59-g8ed1b From af31b04b67f4fd7f639fd465a507c154c46fc9fb Mon Sep 17 00:00:00 2001 From: Masayoshi Mizuma Date: Tue, 30 Oct 2018 21:50:25 -0400 Subject: tools/testing/nvdimm: Fix the array size for dimm devices. KASAN reports following global out of bounds access while nfit_test is being loaded. The out of bound access happens the following reference to dimm_fail_cmd_flags[dimm]. 'dimm' is over than the index value, NUM_DCR (==5). static int override_return_code(int dimm, unsigned int func, int rc) { if ((1 << func) & dimm_fail_cmd_flags[dimm]) { dimm_fail_cmd_flags[] definition: static unsigned long dimm_fail_cmd_flags[NUM_DCR]; 'dimm' is the return value of get_dimm(), and get_dimm() returns the index of handle[] array. The handle[] has 7 index. Let's use ARRAY_SIZE(handle) as the array size. KASAN report: ================================================================== BUG: KASAN: global-out-of-bounds in nfit_test_ctl+0x47bb/0x55b0 [nfit_test] Read of size 8 at addr ffffffffc10cbbe8 by task kworker/u41:0/8 ... Call Trace: dump_stack+0xea/0x1b0 ? dump_stack_print_info.cold.0+0x1b/0x1b ? kmsg_dump_rewind_nolock+0xd9/0xd9 print_address_description+0x65/0x22e ? nfit_test_ctl+0x47bb/0x55b0 [nfit_test] kasan_report.cold.6+0x92/0x1a6 nfit_test_ctl+0x47bb/0x55b0 [nfit_test] ... The buggy address belongs to the variable: dimm_fail_cmd_flags+0x28/0xffffffffffffa440 [nfit_test] ================================================================== Fixes: 39611e83a28c ("tools/testing/nvdimm: Make DSM failure code injection...") Signed-off-by: Masayoshi Mizuma Signed-off-by: Dan Williams --- tools/testing/nvdimm/test/nfit.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/tools/testing/nvdimm/test/nfit.c b/tools/testing/nvdimm/test/nfit.c index 9527d47a1070..01ec04bf91b5 100644 --- a/tools/testing/nvdimm/test/nfit.c +++ b/tools/testing/nvdimm/test/nfit.c @@ -140,8 +140,8 @@ static u32 handle[] = { [6] = NFIT_DIMM_HANDLE(1, 0, 0, 0, 1), }; -static unsigned long dimm_fail_cmd_flags[NUM_DCR]; -static int dimm_fail_cmd_code[NUM_DCR]; +static unsigned long dimm_fail_cmd_flags[ARRAY_SIZE(handle)]; +static int dimm_fail_cmd_code[ARRAY_SIZE(handle)]; static const struct nd_intel_smart smart_def = { .flags = ND_INTEL_SMART_HEALTH_VALID @@ -205,7 +205,7 @@ struct nfit_test { unsigned long deadline; spinlock_t lock; } ars_state; - struct device *dimm_dev[NUM_DCR]; + struct device *dimm_dev[ARRAY_SIZE(handle)]; struct nd_intel_smart *smart; struct nd_intel_smart_threshold *smart_threshold; struct badrange badrange; @@ -2680,7 +2680,7 @@ static int nfit_test_probe(struct platform_device *pdev) u32 nfit_handle = __to_nfit_memdev(nfit_mem)->device_handle; int i; - for (i = 0; i < NUM_DCR; i++) + for (i = 0; i < ARRAY_SIZE(handle); i++) if (nfit_handle == handle[i]) dev_set_drvdata(nfit_test->dimm_dev[i], nfit_mem); -- cgit v1.2.3-59-g8ed1b From 0773495b1f5f1c5e23551843f87b5ff37e7af8f7 Mon Sep 17 00:00:00 2001 From: Max Filippov Date: Sun, 4 Nov 2018 01:46:00 -0700 Subject: xtensa: make sure bFLT stack is 16 byte aligned Xtensa ABI requires stack alignment to be at least 16. In noMMU configuration ARCH_SLAB_MINALIGN is used to align stack. Make it at least 16. This fixes the following runtime error in noMMU configuration, caused by interaction between insufficiently aligned stack and alloca function, that results in corruption of on-stack variable in the libc function glob: Caught unhandled exception in 'sh' (pid = 47, pc = 0x02d05d65) - should not happen EXCCAUSE is 15 Cc: stable@vger.kernel.org Signed-off-by: Max Filippov --- arch/xtensa/include/asm/processor.h | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/arch/xtensa/include/asm/processor.h b/arch/xtensa/include/asm/processor.h index e4ccb88b7996..677bc76c1d70 100644 --- a/arch/xtensa/include/asm/processor.h +++ b/arch/xtensa/include/asm/processor.h @@ -23,7 +23,11 @@ # error Linux requires the Xtensa Windowed Registers Option. #endif -#define ARCH_SLAB_MINALIGN XCHAL_DATA_WIDTH +/* Xtensa ABI requires stack alignment to be at least 16 */ + +#define STACK_ALIGN (XCHAL_DATA_WIDTH > 16 ? XCHAL_DATA_WIDTH : 16) + +#define ARCH_SLAB_MINALIGN STACK_ALIGN /* * User space process size: 1 GB. -- cgit v1.2.3-59-g8ed1b From 5841734fa6f997e57e8c29cbb6409bd74a6949e8 Mon Sep 17 00:00:00 2001 From: Bart Van Assche Date: Mon, 5 Nov 2018 16:44:59 -0800 Subject: scsi: target/core: Avoid that a kernel oops is triggered when COMPARE AND WRITE fails Fixes: aa73237dcb2d ("scsi: target/core: Always call transport_complete_callback() upon failure") Reviewed-by: David Disseldorp Cc: Nicholas Bellinger Cc: Mike Christie Cc: Christoph Hellwig Cc: Hannes Reinecke Signed-off-by: Bart Van Assche Signed-off-by: Martin K. Petersen --- drivers/target/target_core_transport.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/target/target_core_transport.c b/drivers/target/target_core_transport.c index e31e4fc31aa1..2cfd61d62e97 100644 --- a/drivers/target/target_core_transport.c +++ b/drivers/target/target_core_transport.c @@ -1778,7 +1778,7 @@ EXPORT_SYMBOL(target_submit_tmr); void transport_generic_request_failure(struct se_cmd *cmd, sense_reason_t sense_reason) { - int ret = 0; + int ret = 0, post_ret; pr_debug("-----[ Storage Engine Exception; sense_reason %d\n", sense_reason); @@ -1790,7 +1790,7 @@ void transport_generic_request_failure(struct se_cmd *cmd, transport_complete_task_attr(cmd); if (cmd->transport_complete_callback) - cmd->transport_complete_callback(cmd, false, NULL); + cmd->transport_complete_callback(cmd, false, &post_ret); if (transport_check_aborted_status(cmd, 1)) return; -- cgit v1.2.3-59-g8ed1b From f8f4adc1c1661686f492cf27817844a3d0517aff Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Fri, 2 Nov 2018 16:34:49 +0100 Subject: scsi: myrb: fix sprintf buffer overflow warning gcc warns that the 12 byte fw_version field might not be long enough to contain the generated firmware name string: drivers/scsi/myrb.c: In function 'myrb_get_hba_config': drivers/scsi/myrb.c:1052:38: error: '%02d' directive writing between 2 and 3 bytes into a region of size between 2 and 5 [-Werror=format-overflow=] sprintf(cb->fw_version, "%d.%02d-%c-%02d", ^~~~ drivers/scsi/myrb.c:1052:26: note: directive argument in the range [0, 255] sprintf(cb->fw_version, "%d.%02d-%c-%02d", ^~~~~~~~~~~~~~~~~ drivers/scsi/myrb.c:1052:2: note: 'sprintf' output between 10 and 14 bytes into a destination of size 12 sprintf(cb->fw_version, "%d.%02d-%c-%02d", ^~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ enquiry2->fw.major_version, ~~~~~~~~~~~~~~~~~~~~~~~~~~~ enquiry2->fw.minor_version, ~~~~~~~~~~~~~~~~~~~~~~~~~~~ enquiry2->fw.firmware_type, ~~~~~~~~~~~~~~~~~~~~~~~~~~~ enquiry2->fw.turn_id); ~~~~~~~~~~~~~~~~~~~~~ I have not checked whether there are appropriate range checks before the sprintf, but there is a range check after it that will bail out in case of out of range version numbers. This means we can simply use snprintf() instead of sprintf() to limit the output buffer size, and it will work correctly. Fixes: 081ff398c56c ("scsi: myrb: Add Mylex RAID controller (block interface)") Signed-off-by: Arnd Bergmann Reviewed-by: Hannes Reinecke Signed-off-by: Martin K. Petersen --- drivers/scsi/myrb.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/scsi/myrb.c b/drivers/scsi/myrb.c index aeb282f617c5..0642f2d0a3bb 100644 --- a/drivers/scsi/myrb.c +++ b/drivers/scsi/myrb.c @@ -1049,7 +1049,8 @@ static int myrb_get_hba_config(struct myrb_hba *cb) enquiry2->fw.firmware_type = '0'; enquiry2->fw.turn_id = 0; } - sprintf(cb->fw_version, "%d.%02d-%c-%02d", + snprintf(cb->fw_version, sizeof(cb->fw_version), + "%d.%02d-%c-%02d", enquiry2->fw.major_version, enquiry2->fw.minor_version, enquiry2->fw.firmware_type, -- cgit v1.2.3-59-g8ed1b From f8d294324598ec85bea2779512e48c94cbe4d7c6 Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Fri, 2 Nov 2018 16:35:48 +0100 Subject: scsi: lpfc: fix remoteport access The addition of a spinlock in lpfc_debugfs_nodelist_data() introduced a bug that lets us not skip NULL pointers correctly, as noticed by gcc-8: drivers/scsi/lpfc/lpfc_debugfs.c: In function 'lpfc_debugfs_nodelist_data.constprop': drivers/scsi/lpfc/lpfc_debugfs.c:728:13: error: 'nrport' may be used uninitialized in this function [-Werror=maybe-uninitialized] if (nrport->port_role & FC_PORT_ROLE_NVME_INITIATOR) This changes the logic back to what it was, while keeping the added spinlock. Fixes: 9e210178267b ("scsi: lpfc: Synchronize access to remoteport via rport") Signed-off-by: Arnd Bergmann Reviewed-by: Johannes Thumshirn Signed-off-by: Martin K. Petersen --- drivers/scsi/lpfc/lpfc_debugfs.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/scsi/lpfc/lpfc_debugfs.c b/drivers/scsi/lpfc/lpfc_debugfs.c index 0c8005bb0f53..34d311a7dbef 100644 --- a/drivers/scsi/lpfc/lpfc_debugfs.c +++ b/drivers/scsi/lpfc/lpfc_debugfs.c @@ -698,6 +698,8 @@ lpfc_debugfs_nodelist_data(struct lpfc_vport *vport, char *buf, int size) rport = lpfc_ndlp_get_nrport(ndlp); if (rport) nrport = rport->remoteport; + else + nrport = NULL; spin_unlock(&phba->hbalock); if (!nrport) continue; -- cgit v1.2.3-59-g8ed1b From 77409c4cdc44560e1b3b839e62d7f73478199680 Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Fri, 2 Nov 2018 16:44:13 +0100 Subject: scsi: myrs: avoid stack overflow warning Putting a 1024 byte data structure on the stack is generally a bad idea. On 32-bit systems, it also triggers a compile-time warning when building with -Og: drivers/scsi/myrs.c: In function 'myrs_get_ctlr_info': drivers/scsi/myrs.c:212:1: error: the frame size of 1028 bytes is larger than 1024 bytes [-Werror=frame-larger-than=] We only really need three members of the structure, so just read them manually here instead of copying the entire structure. Fixes: 77266186397c ("scsi: myrs: Add Mylex RAID controller (SCSI interface)") Signed-off-by: Arnd Bergmann Reviewed-by: Hannes Reinecke Signed-off-by: Martin K. Petersen --- drivers/scsi/myrs.c | 13 ++++++++----- 1 file changed, 8 insertions(+), 5 deletions(-) diff --git a/drivers/scsi/myrs.c b/drivers/scsi/myrs.c index 0264a2e2bc19..b8d54ef8cf6d 100644 --- a/drivers/scsi/myrs.c +++ b/drivers/scsi/myrs.c @@ -163,9 +163,12 @@ static unsigned char myrs_get_ctlr_info(struct myrs_hba *cs) dma_addr_t ctlr_info_addr; union myrs_sgl *sgl; unsigned char status; - struct myrs_ctlr_info old; + unsigned short ldev_present, ldev_critical, ldev_offline; + + ldev_present = cs->ctlr_info->ldev_present; + ldev_critical = cs->ctlr_info->ldev_critical; + ldev_offline = cs->ctlr_info->ldev_offline; - memcpy(&old, cs->ctlr_info, sizeof(struct myrs_ctlr_info)); ctlr_info_addr = dma_map_single(&cs->pdev->dev, cs->ctlr_info, sizeof(struct myrs_ctlr_info), DMA_FROM_DEVICE); @@ -198,9 +201,9 @@ static unsigned char myrs_get_ctlr_info(struct myrs_hba *cs) cs->ctlr_info->rbld_active + cs->ctlr_info->exp_active != 0) cs->needs_update = true; - if (cs->ctlr_info->ldev_present != old.ldev_present || - cs->ctlr_info->ldev_critical != old.ldev_critical || - cs->ctlr_info->ldev_offline != old.ldev_offline) + if (cs->ctlr_info->ldev_present != ldev_present || + cs->ctlr_info->ldev_critical != ldev_critical || + cs->ctlr_info->ldev_offline != ldev_offline) shost_printk(KERN_INFO, cs->host, "Logical drive count changes (%d/%d/%d)\n", cs->ctlr_info->ldev_critical, -- cgit v1.2.3-59-g8ed1b From a3ecf48248a393438e77e569a0047e968e0ec6c6 Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Fri, 2 Nov 2018 16:47:23 +0100 Subject: scsi: myrs: only build on little-endian platforms Reading throught the new driver, I noticed that this cannot work on big-endian CPUs, and the old DAC960 had exactly the same behavior. To document this for the future, add a Kconfig dependency that prevents it from being included in big-endian kernels. Since the hardware is really old and we never had a working driver on it for big-endian platforms, it's unlikely to make a difference to users. Signed-off-by: Arnd Bergmann Reviewed-by: Hannes Reinecke Signed-off-by: Martin K. Petersen --- drivers/scsi/Kconfig | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/scsi/Kconfig b/drivers/scsi/Kconfig index f07444d30b21..640cd1b31a18 100644 --- a/drivers/scsi/Kconfig +++ b/drivers/scsi/Kconfig @@ -578,6 +578,7 @@ config SCSI_MYRB config SCSI_MYRS tristate "Mylex DAC960/DAC1100 PCI RAID Controller (SCSI Interface)" depends on PCI + depends on !CPU_BIG_ENDIAN || COMPILE_TEST select RAID_ATTRS help This driver adds support for the Mylex DAC960, AcceleRAID, and -- cgit v1.2.3-59-g8ed1b From e34ff8edcae89922d187425ab0b82e6a039aa371 Mon Sep 17 00:00:00 2001 From: YueHaibing Date: Fri, 26 Oct 2018 15:07:31 +0800 Subject: scsi: hisi_sas: Remove set but not used variable 'dq_list' Fixes gcc '-Wunused-but-set-variable' warning: drivers/scsi/hisi_sas/hisi_sas_v1_hw.c: In function 'start_delivery_v1_hw': drivers/scsi/hisi_sas/hisi_sas_v1_hw.c:907:20: warning: variable 'dq_list' set but not used [-Wunused-but-set-variable] drivers/scsi/hisi_sas/hisi_sas_v2_hw.c: In function 'start_delivery_v2_hw': drivers/scsi/hisi_sas/hisi_sas_v2_hw.c:1671:20: warning: variable 'dq_list' set but not used [-Wunused-but-set-variable] drivers/scsi/hisi_sas/hisi_sas_v3_hw.c: In function 'start_delivery_v3_hw': drivers/scsi/hisi_sas/hisi_sas_v3_hw.c:889:20: warning: variable 'dq_list' set but not used [-Wunused-but-set-variable] It never used since introduction in commit fa222db0b036 ("scsi: hisi_sas: Don't lock DQ for complete task sending") Signed-off-by: YueHaibing Acked-by: John Garry Signed-off-by: Martin K. Petersen --- drivers/scsi/hisi_sas/hisi_sas_v1_hw.c | 2 -- drivers/scsi/hisi_sas/hisi_sas_v2_hw.c | 2 -- drivers/scsi/hisi_sas/hisi_sas_v3_hw.c | 2 -- 3 files changed, 6 deletions(-) diff --git a/drivers/scsi/hisi_sas/hisi_sas_v1_hw.c b/drivers/scsi/hisi_sas/hisi_sas_v1_hw.c index f0e457e6884e..8df822a4a1bd 100644 --- a/drivers/scsi/hisi_sas/hisi_sas_v1_hw.c +++ b/drivers/scsi/hisi_sas/hisi_sas_v1_hw.c @@ -904,11 +904,9 @@ static void start_delivery_v1_hw(struct hisi_sas_dq *dq) { struct hisi_hba *hisi_hba = dq->hisi_hba; struct hisi_sas_slot *s, *s1, *s2 = NULL; - struct list_head *dq_list; int dlvry_queue = dq->id; int wp; - dq_list = &dq->list; list_for_each_entry_safe(s, s1, &dq->list, delivery) { if (!s->ready) break; diff --git a/drivers/scsi/hisi_sas/hisi_sas_v2_hw.c b/drivers/scsi/hisi_sas/hisi_sas_v2_hw.c index cc36b6473e98..77a85ead483e 100644 --- a/drivers/scsi/hisi_sas/hisi_sas_v2_hw.c +++ b/drivers/scsi/hisi_sas/hisi_sas_v2_hw.c @@ -1670,11 +1670,9 @@ static void start_delivery_v2_hw(struct hisi_sas_dq *dq) { struct hisi_hba *hisi_hba = dq->hisi_hba; struct hisi_sas_slot *s, *s1, *s2 = NULL; - struct list_head *dq_list; int dlvry_queue = dq->id; int wp; - dq_list = &dq->list; list_for_each_entry_safe(s, s1, &dq->list, delivery) { if (!s->ready) break; diff --git a/drivers/scsi/hisi_sas/hisi_sas_v3_hw.c b/drivers/scsi/hisi_sas/hisi_sas_v3_hw.c index bd4ce38b98d2..a369450a1fa7 100644 --- a/drivers/scsi/hisi_sas/hisi_sas_v3_hw.c +++ b/drivers/scsi/hisi_sas/hisi_sas_v3_hw.c @@ -886,11 +886,9 @@ static void start_delivery_v3_hw(struct hisi_sas_dq *dq) { struct hisi_hba *hisi_hba = dq->hisi_hba; struct hisi_sas_slot *s, *s1, *s2 = NULL; - struct list_head *dq_list; int dlvry_queue = dq->id; int wp; - dq_list = &dq->list; list_for_each_entry_safe(s, s1, &dq->list, delivery) { if (!s->ready) break; -- cgit v1.2.3-59-g8ed1b From 0d52e642c0ccd7a877a58b6ec23552eb35e7170c Mon Sep 17 00:00:00 2001 From: Masanari Iida Date: Sun, 28 Oct 2018 14:05:48 +0900 Subject: scsi: qla2xxx: Fix a typo in MODULE_PARM_DESC This patch fixes a spelling typo in MODULE_PARM_DESC of ql2xplogiabsentdevice. Signed-off-by: Masanari Iida Signed-off-by: Martin K. Petersen --- drivers/scsi/qla2xxx/qla_os.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/scsi/qla2xxx/qla_os.c b/drivers/scsi/qla2xxx/qla_os.c index 518f15141170..20c85eed1a75 100644 --- a/drivers/scsi/qla2xxx/qla_os.c +++ b/drivers/scsi/qla2xxx/qla_os.c @@ -67,7 +67,7 @@ module_param(ql2xplogiabsentdevice, int, S_IRUGO|S_IWUSR); MODULE_PARM_DESC(ql2xplogiabsentdevice, "Option to enable PLOGI to devices that are not present after " "a Fabric scan. This is needed for several broken switches. " - "Default is 0 - no PLOGI. 1 - perfom PLOGI."); + "Default is 0 - no PLOGI. 1 - perform PLOGI."); int ql2xloginretrycount = 0; module_param(ql2xloginretrycount, int, S_IRUGO); -- cgit v1.2.3-59-g8ed1b From 96edebd6bb995f2acb7694bed6e01bf6f5a7b634 Mon Sep 17 00:00:00 2001 From: Finn Thain Date: Wed, 24 Oct 2018 18:45:33 +1100 Subject: scsi: NCR5380: Return false instead of NULL I overlooked this statement when I recently converted the function result type from struct scsi_cmnd * to bool. No change to object code. Signed-off-by: Finn Thain Signed-off-by: Martin K. Petersen --- drivers/scsi/NCR5380.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/scsi/NCR5380.c b/drivers/scsi/NCR5380.c index 8429c855701f..01c23d27f290 100644 --- a/drivers/scsi/NCR5380.c +++ b/drivers/scsi/NCR5380.c @@ -1198,7 +1198,7 @@ static bool NCR5380_select(struct Scsi_Host *instance, struct scsi_cmnd *cmd) out: if (!hostdata->selecting) - return NULL; + return false; hostdata->selecting = NULL; return ret; } -- cgit v1.2.3-59-g8ed1b From 0ae790683fc28bb718d74f87cdf753c6445fe28d Mon Sep 17 00:00:00 2001 From: Michael Ellerman Date: Tue, 6 Nov 2018 19:23:28 +1100 Subject: powerpc/mm/64s: Consolidate SLB assertions The code for assert_slb_exists() and assert_slb_notexists() is almost identical, except for the polarity of the WARN_ON(). In a future patch we'll need to modify this code, so consolidate it now into a single function. Signed-off-by: Michael Ellerman --- arch/powerpc/mm/slb.c | 29 +++++++++-------------------- 1 file changed, 9 insertions(+), 20 deletions(-) diff --git a/arch/powerpc/mm/slb.c b/arch/powerpc/mm/slb.c index c3fdf2969d9f..f3e002ee457b 100644 --- a/arch/powerpc/mm/slb.c +++ b/arch/powerpc/mm/slb.c @@ -58,7 +58,7 @@ static inline unsigned long mk_vsid_data(unsigned long ea, int ssize, return __mk_vsid_data(get_kernel_vsid(ea, ssize), ssize, flags); } -static void assert_slb_exists(unsigned long ea) +static void assert_slb_presence(bool present, unsigned long ea) { #ifdef CONFIG_DEBUG_VM unsigned long tmp; @@ -66,19 +66,8 @@ static void assert_slb_exists(unsigned long ea) WARN_ON_ONCE(mfmsr() & MSR_EE); asm volatile("slbfee. %0, %1" : "=r"(tmp) : "r"(ea) : "cr0"); - WARN_ON(tmp == 0); -#endif -} -static void assert_slb_notexists(unsigned long ea) -{ -#ifdef CONFIG_DEBUG_VM - unsigned long tmp; - - WARN_ON_ONCE(mfmsr() & MSR_EE); - - asm volatile("slbfee. %0, %1" : "=r"(tmp) : "r"(ea) : "cr0"); - WARN_ON(tmp != 0); + WARN_ON(present == (tmp == 0)); #endif } @@ -114,7 +103,7 @@ static inline void create_shadowed_slbe(unsigned long ea, int ssize, */ slb_shadow_update(ea, ssize, flags, index); - assert_slb_notexists(ea); + assert_slb_presence(false, ea); asm volatile("slbmte %0,%1" : : "r" (mk_vsid_data(ea, ssize, flags)), "r" (mk_esid_data(ea, ssize, index)) @@ -137,7 +126,7 @@ void __slb_restore_bolted_realmode(void) "r" (be64_to_cpu(p->save_area[index].esid))); } - assert_slb_exists(local_paca->kstack); + assert_slb_presence(true, local_paca->kstack); } /* @@ -185,7 +174,7 @@ void slb_flush_and_restore_bolted(void) :: "r" (be64_to_cpu(p->save_area[KSTACK_INDEX].vsid)), "r" (be64_to_cpu(p->save_area[KSTACK_INDEX].esid)) : "memory"); - assert_slb_exists(get_paca()->kstack); + assert_slb_presence(true, get_paca()->kstack); get_paca()->slb_cache_ptr = 0; @@ -443,9 +432,9 @@ void switch_slb(struct task_struct *tsk, struct mm_struct *mm) ea = (unsigned long) get_paca()->slb_cache[i] << SID_SHIFT; /* - * Could assert_slb_exists here, but hypervisor - * or machine check could have come in and - * removed the entry at this point. + * Could assert_slb_presence(true) here, but + * hypervisor or machine check could have come + * in and removed the entry at this point. */ slbie_data = ea; @@ -676,7 +665,7 @@ static long slb_insert_entry(unsigned long ea, unsigned long context, * User preloads should add isync afterwards in case the kernel * accesses user memory before it returns to userspace with rfid. */ - assert_slb_notexists(ea); + assert_slb_presence(false, ea); asm volatile("slbmte %0, %1" : : "r" (vsid_data), "r" (esid_data)); barrier(); -- cgit v1.2.3-59-g8ed1b From 08e6a3434e2125e4b21d0d3f84678d427345bc0d Mon Sep 17 00:00:00 2001 From: Michael Ellerman Date: Tue, 6 Nov 2018 19:25:18 +1100 Subject: powerpc/mm/64s: Use PPC_SLBFEE macro Old toolchains don't know about slbfee and break the build, eg: {standard input}:37: Error: Unrecognized opcode: `slbfee.' Fix it by using the macro version. We need to add an underscore version that takes raw register numbers from the inline asm, rather than our Rx macros. Fixes: e15a4fea4dee ("powerpc/64s/hash: Add some SLB debugging tests") Signed-off-by: Michael Ellerman --- arch/powerpc/include/asm/ppc-opcode.h | 2 ++ arch/powerpc/mm/slb.c | 3 ++- 2 files changed, 4 insertions(+), 1 deletion(-) diff --git a/arch/powerpc/include/asm/ppc-opcode.h b/arch/powerpc/include/asm/ppc-opcode.h index 6093bc8f74e5..a6e9e314c707 100644 --- a/arch/powerpc/include/asm/ppc-opcode.h +++ b/arch/powerpc/include/asm/ppc-opcode.h @@ -493,6 +493,8 @@ __PPC_RS(t) | __PPC_RA0(a) | __PPC_RB(b)) #define PPC_SLBFEE_DOT(t, b) stringify_in_c(.long PPC_INST_SLBFEE | \ __PPC_RT(t) | __PPC_RB(b)) +#define __PPC_SLBFEE_DOT(t, b) stringify_in_c(.long PPC_INST_SLBFEE | \ + ___PPC_RT(t) | ___PPC_RB(b)) #define PPC_ICBT(c,a,b) stringify_in_c(.long PPC_INST_ICBT | \ __PPC_CT(c) | __PPC_RA0(a) | __PPC_RB(b)) /* PASemi instructions */ diff --git a/arch/powerpc/mm/slb.c b/arch/powerpc/mm/slb.c index f3e002ee457b..457fd29448b1 100644 --- a/arch/powerpc/mm/slb.c +++ b/arch/powerpc/mm/slb.c @@ -19,6 +19,7 @@ #include #include #include +#include #include #include #include @@ -65,7 +66,7 @@ static void assert_slb_presence(bool present, unsigned long ea) WARN_ON_ONCE(mfmsr() & MSR_EE); - asm volatile("slbfee. %0, %1" : "=r"(tmp) : "r"(ea) : "cr0"); + asm volatile(__PPC_SLBFEE_DOT(%0, %1) : "=r"(tmp) : "r"(ea) : "cr0"); WARN_ON(present == (tmp == 0)); #endif -- cgit v1.2.3-59-g8ed1b From 9586d569a369dc585a3e191dcabd72748e3c9c5c Mon Sep 17 00:00:00 2001 From: Michael Ellerman Date: Tue, 6 Nov 2018 19:25:38 +1100 Subject: powerpc/mm/64s: Only use slbfee on CPUs that support it The slbfee instruction was only added in ISA 2.05 (Power6), it's not supported on older CPUs. We don't have a CPU feature for that ISA version though, so just use the ISA 2.06 feature flag. Fixes: e15a4fea4dee ("powerpc/64s/hash: Add some SLB debugging tests") Signed-off-by: Michael Ellerman --- arch/powerpc/mm/slb.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/arch/powerpc/mm/slb.c b/arch/powerpc/mm/slb.c index 457fd29448b1..b663a36f9ada 100644 --- a/arch/powerpc/mm/slb.c +++ b/arch/powerpc/mm/slb.c @@ -66,6 +66,9 @@ static void assert_slb_presence(bool present, unsigned long ea) WARN_ON_ONCE(mfmsr() & MSR_EE); + if (!cpu_has_feature(CPU_FTR_ARCH_206)) + return; + asm volatile(__PPC_SLBFEE_DOT(%0, %1) : "=r"(tmp) : "r"(ea) : "cr0"); WARN_ON(present == (tmp == 0)); -- cgit v1.2.3-59-g8ed1b From 86d4d068df573a8c2105554624796c086d6bec3d Mon Sep 17 00:00:00 2001 From: John David Anglin Date: Tue, 6 Nov 2018 12:00:01 +0100 Subject: parisc: Revert "Release spinlocks using ordered store" This reverts commit d27dfa13b9f77ae7e6ed09d70a0426ed26c1a8f9. Unfortunately, this patch needs to be reverted. We need the full sync barrier and not the limited barrier provided by using an ordered store. The sync ensures that all accesses and cache purge instructions that follow the sync are performed after all such instructions prior the sync instruction have completed executing. The patch breaks the rwlock implementation in glibc. This caused the test-lock application in the libprelude testsuite to hang. With the change reverted, the test runs correctly and the libprelude package builds successfully. Signed-off-by: John David Anglin Signed-off-by: Helge Deller --- arch/parisc/include/asm/spinlock.h | 4 ++-- arch/parisc/kernel/syscall.S | 12 ++++++++---- 2 files changed, 10 insertions(+), 6 deletions(-) diff --git a/arch/parisc/include/asm/spinlock.h b/arch/parisc/include/asm/spinlock.h index 16aec9ba2580..8a63515f03bf 100644 --- a/arch/parisc/include/asm/spinlock.h +++ b/arch/parisc/include/asm/spinlock.h @@ -37,8 +37,8 @@ static inline void arch_spin_unlock(arch_spinlock_t *x) volatile unsigned int *a; a = __ldcw_align(x); - /* Release with ordered store. */ - __asm__ __volatile__("stw,ma %0,0(%1)" : : "r"(1), "r"(a) : "memory"); + mb(); + *a = 1; } static inline int arch_spin_trylock(arch_spinlock_t *x) diff --git a/arch/parisc/kernel/syscall.S b/arch/parisc/kernel/syscall.S index 9505c317818d..a9bc90dc4ae7 100644 --- a/arch/parisc/kernel/syscall.S +++ b/arch/parisc/kernel/syscall.S @@ -640,7 +640,8 @@ cas_action: sub,<> %r28, %r25, %r0 2: stw %r24, 0(%r26) /* Free lock */ - stw,ma %r20, 0(%sr2,%r20) + sync + stw %r20, 0(%sr2,%r20) #if ENABLE_LWS_DEBUG /* Clear thread register indicator */ stw %r0, 4(%sr2,%r20) @@ -654,7 +655,8 @@ cas_action: 3: /* Error occurred on load or store */ /* Free lock */ - stw,ma %r20, 0(%sr2,%r20) + sync + stw %r20, 0(%sr2,%r20) #if ENABLE_LWS_DEBUG stw %r0, 4(%sr2,%r20) #endif @@ -855,7 +857,8 @@ cas2_action: cas2_end: /* Free lock */ - stw,ma %r20, 0(%sr2,%r20) + sync + stw %r20, 0(%sr2,%r20) /* Enable interrupts */ ssm PSW_SM_I, %r0 /* Return to userspace, set no error */ @@ -865,7 +868,8 @@ cas2_end: 22: /* Error occurred on load or store */ /* Free lock */ - stw,ma %r20, 0(%sr2,%r20) + sync + stw %r20, 0(%sr2,%r20) ssm PSW_SM_I, %r0 ldo 1(%r0),%r28 b lws_exit -- cgit v1.2.3-59-g8ed1b From aca49ee041cbdd329c55d4dbcd6b3d4b9af240e4 Mon Sep 17 00:00:00 2001 From: "Martin K. Petersen" Date: Mon, 5 Nov 2018 22:49:47 -0500 Subject: Revert "scsi: ufs: Disable blk-mq for now" This reverts commit d87161bea405e3260377026ca8a704a3f68bd67a. The issues that forced us to disable blk_mq for ufs have been resolved. Signed-off-by: Martin K. Petersen --- drivers/scsi/ufs/ufshcd.c | 7 ------- 1 file changed, 7 deletions(-) diff --git a/drivers/scsi/ufs/ufshcd.c b/drivers/scsi/ufs/ufshcd.c index 23d7cca36ff0..27db55b0ca7f 100644 --- a/drivers/scsi/ufs/ufshcd.c +++ b/drivers/scsi/ufs/ufshcd.c @@ -8099,13 +8099,6 @@ int ufshcd_alloc_host(struct device *dev, struct ufs_hba **hba_handle) err = -ENOMEM; goto out_error; } - - /* - * Do not use blk-mq at this time because blk-mq does not support - * runtime pm. - */ - host->use_blk_mq = false; - hba = shost_priv(host); hba->host = host; hba->dev = dev; -- cgit v1.2.3-59-g8ed1b From f635e48e866ee1a47d2d42ce012fdcc07bf55853 Mon Sep 17 00:00:00 2001 From: Quinn Tran Date: Tue, 6 Nov 2018 00:51:21 -0800 Subject: scsi: qla2xxx: Initialize port speed to avoid setting lower speed This patch initializes port speed so that firmware does not set lower operating speed. Setting lower speed in firmware impacts WRITE perfomance. Fixes: 726b85487067 ("qla2xxx: Add framework for async fabric discovery") Cc: Signed-off-by: Quinn Tran Signed-off-by: Himanshu Madhani Tested-by: Laurence Oberman Reviewed-by: Ewan D. Milne Signed-off-by: Martin K. Petersen --- drivers/scsi/qla2xxx/qla_init.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/scsi/qla2xxx/qla_init.c b/drivers/scsi/qla2xxx/qla_init.c index 6fe20c27acc1..eb59c796a795 100644 --- a/drivers/scsi/qla2xxx/qla_init.c +++ b/drivers/scsi/qla2xxx/qla_init.c @@ -4763,6 +4763,7 @@ qla2x00_alloc_fcport(scsi_qla_host_t *vha, gfp_t flags) fcport->loop_id = FC_NO_LOOP_ID; qla2x00_set_fcport_state(fcport, FCS_UNCONFIGURED); fcport->supported_classes = FC_COS_UNSPECIFIED; + fcport->fp_speed = PORT_SPEED_UNKNOWN; fcport->ct_desc.ct_sns = dma_alloc_coherent(&vha->hw->pdev->dev, sizeof(struct ct_sns_pkt), &fcport->ct_desc.ct_sns_dma, -- cgit v1.2.3-59-g8ed1b From 28c5bcf74fa07c25d5bd118d1271920f51ce2a98 Mon Sep 17 00:00:00 2001 From: Scott Wood Date: Tue, 6 Nov 2018 19:49:34 -0600 Subject: KVM: PPC: Move and undef TRACE_INCLUDE_PATH/FILE TRACE_INCLUDE_PATH and TRACE_INCLUDE_FILE are used by , so like that #include, they should be outside #ifdef protection. They also need to be #undefed before defining, in case multiple trace headers are included by the same C file. This became the case on book3e after commit cf4a6085151a ("powerpc/mm: Add missing tracepoint for tlbie"), leading to the following build error: CC arch/powerpc/kvm/powerpc.o In file included from arch/powerpc/kvm/powerpc.c:51:0: arch/powerpc/kvm/trace.h:9:0: error: "TRACE_INCLUDE_PATH" redefined [-Werror] #define TRACE_INCLUDE_PATH . ^ In file included from arch/powerpc/kvm/../mm/mmu_decl.h:25:0, from arch/powerpc/kvm/powerpc.c:48: ./arch/powerpc/include/asm/trace.h:224:0: note: this is the location of the previous definition #define TRACE_INCLUDE_PATH asm ^ cc1: all warnings being treated as errors Reported-by: Christian Zigotzky Signed-off-by: Scott Wood Signed-off-by: Michael Ellerman --- arch/powerpc/kvm/trace.h | 8 ++++++-- arch/powerpc/kvm/trace_booke.h | 9 +++++++-- arch/powerpc/kvm/trace_hv.h | 9 +++++++-- arch/powerpc/kvm/trace_pr.h | 9 +++++++-- 4 files changed, 27 insertions(+), 8 deletions(-) diff --git a/arch/powerpc/kvm/trace.h b/arch/powerpc/kvm/trace.h index 491b0f715d6b..ea1d7c808319 100644 --- a/arch/powerpc/kvm/trace.h +++ b/arch/powerpc/kvm/trace.h @@ -6,8 +6,6 @@ #undef TRACE_SYSTEM #define TRACE_SYSTEM kvm -#define TRACE_INCLUDE_PATH . -#define TRACE_INCLUDE_FILE trace /* * Tracepoint for guest mode entry. @@ -120,4 +118,10 @@ TRACE_EVENT(kvm_check_requests, #endif /* _TRACE_KVM_H */ /* This part must be outside protection */ +#undef TRACE_INCLUDE_PATH +#undef TRACE_INCLUDE_FILE + +#define TRACE_INCLUDE_PATH . +#define TRACE_INCLUDE_FILE trace + #include diff --git a/arch/powerpc/kvm/trace_booke.h b/arch/powerpc/kvm/trace_booke.h index ac640e81fdc5..3837842986aa 100644 --- a/arch/powerpc/kvm/trace_booke.h +++ b/arch/powerpc/kvm/trace_booke.h @@ -6,8 +6,6 @@ #undef TRACE_SYSTEM #define TRACE_SYSTEM kvm_booke -#define TRACE_INCLUDE_PATH . -#define TRACE_INCLUDE_FILE trace_booke #define kvm_trace_symbol_exit \ {0, "CRITICAL"}, \ @@ -218,4 +216,11 @@ TRACE_EVENT(kvm_booke_queue_irqprio, #endif /* This part must be outside protection */ + +#undef TRACE_INCLUDE_PATH +#undef TRACE_INCLUDE_FILE + +#define TRACE_INCLUDE_PATH . +#define TRACE_INCLUDE_FILE trace_booke + #include diff --git a/arch/powerpc/kvm/trace_hv.h b/arch/powerpc/kvm/trace_hv.h index bcfe8a987f6a..8a1e3b0047f1 100644 --- a/arch/powerpc/kvm/trace_hv.h +++ b/arch/powerpc/kvm/trace_hv.h @@ -9,8 +9,6 @@ #undef TRACE_SYSTEM #define TRACE_SYSTEM kvm_hv -#define TRACE_INCLUDE_PATH . -#define TRACE_INCLUDE_FILE trace_hv #define kvm_trace_symbol_hcall \ {H_REMOVE, "H_REMOVE"}, \ @@ -497,4 +495,11 @@ TRACE_EVENT(kvmppc_run_vcpu_exit, #endif /* _TRACE_KVM_HV_H */ /* This part must be outside protection */ + +#undef TRACE_INCLUDE_PATH +#undef TRACE_INCLUDE_FILE + +#define TRACE_INCLUDE_PATH . +#define TRACE_INCLUDE_FILE trace_hv + #include diff --git a/arch/powerpc/kvm/trace_pr.h b/arch/powerpc/kvm/trace_pr.h index 2f9a8829552b..46a46d328fbf 100644 --- a/arch/powerpc/kvm/trace_pr.h +++ b/arch/powerpc/kvm/trace_pr.h @@ -8,8 +8,6 @@ #undef TRACE_SYSTEM #define TRACE_SYSTEM kvm_pr -#define TRACE_INCLUDE_PATH . -#define TRACE_INCLUDE_FILE trace_pr TRACE_EVENT(kvm_book3s_reenter, TP_PROTO(int r, struct kvm_vcpu *vcpu), @@ -257,4 +255,11 @@ TRACE_EVENT(kvm_exit, #endif /* _TRACE_KVM_H */ /* This part must be outside protection */ + +#undef TRACE_INCLUDE_PATH +#undef TRACE_INCLUDE_FILE + +#define TRACE_INCLUDE_PATH . +#define TRACE_INCLUDE_FILE trace_pr + #include -- cgit v1.2.3-59-g8ed1b From 8d72ee3266f08fd3490011ee7b95ffc31e66fd6d Mon Sep 17 00:00:00 2001 From: Viresh Kumar Date: Mon, 29 Oct 2018 12:47:40 +0530 Subject: Documentation: cpu-freq: Frequencies aren't always sorted The order in which the frequencies are displayed in cpufreq stats depends on the order in which the frequencies were sorted in the frequency table provided to cpufreq core by the cpufreq driver. They can be completely unsorted as well. The documentation's claim that the stats will be sorted in descending order is hence incorrect and here is an attempt to fix it. Reported-by: Pavel Signed-off-by: Viresh Kumar Signed-off-by: Rafael J. Wysocki --- Documentation/cpu-freq/cpufreq-stats.txt | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/Documentation/cpu-freq/cpufreq-stats.txt b/Documentation/cpu-freq/cpufreq-stats.txt index a873855c811d..14378cecb172 100644 --- a/Documentation/cpu-freq/cpufreq-stats.txt +++ b/Documentation/cpu-freq/cpufreq-stats.txt @@ -86,9 +86,11 @@ transitions. This will give a fine grained information about all the CPU frequency transitions. The cat output here is a two dimensional matrix, where an entry (row i, column j) represents the count of number of transitions from -Freq_i to Freq_j. Freq_i is in descending order with increasing rows and -Freq_j is in descending order with increasing columns. The output here also -contains the actual freq values for each row and column for better readability. +Freq_i to Freq_j. Freq_i rows and Freq_j columns follow the sorting order in +which the driver has provided the frequency table initially to the cpufreq core +and so can be sorted (ascending or descending) or unsorted. The output here +also contains the actual freq values for each row and column for better +readability. If the transition table is bigger than PAGE_SIZE, reading this will return an -EFBIG error. -- cgit v1.2.3-59-g8ed1b From 6ef28a04d1ccf718eee069b72132ce4aa1e52ab9 Mon Sep 17 00:00:00 2001 From: Anson Huang Date: Mon, 5 Nov 2018 00:59:28 +0000 Subject: cpufreq: imx6q: add return value check for voltage scale Add return value check for voltage scale when ARM clock rate change fail. Signed-off-by: Anson Huang Acked-by: Viresh Kumar Signed-off-by: Rafael J. Wysocki --- drivers/cpufreq/imx6q-cpufreq.c | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/drivers/cpufreq/imx6q-cpufreq.c b/drivers/cpufreq/imx6q-cpufreq.c index 8cfee0ab804b..d8c3595e9023 100644 --- a/drivers/cpufreq/imx6q-cpufreq.c +++ b/drivers/cpufreq/imx6q-cpufreq.c @@ -160,8 +160,13 @@ static int imx6q_set_target(struct cpufreq_policy *policy, unsigned int index) /* Ensure the arm clock divider is what we expect */ ret = clk_set_rate(clks[ARM].clk, new_freq * 1000); if (ret) { + int ret1; + dev_err(cpu_dev, "failed to set clock rate: %d\n", ret); - regulator_set_voltage_tol(arm_reg, volt_old, 0); + ret1 = regulator_set_voltage_tol(arm_reg, volt_old, 0); + if (ret1) + dev_warn(cpu_dev, + "failed to restore vddarm voltage: %d\n", ret1); return ret; } -- cgit v1.2.3-59-g8ed1b From e531efa1e92b888acce45785b3ae69278fa712c0 Mon Sep 17 00:00:00 2001 From: Zhao Wei Liew Date: Thu, 1 Nov 2018 01:32:46 +0000 Subject: Documentation: cpufreq: Correct a typo Fix a typo in the admin-guide documentation for cpufreq. Signed-off-by: Zhao Wei Liew Acked-by: Viresh Kumar Signed-off-by: Rafael J. Wysocki --- Documentation/admin-guide/pm/cpufreq.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Documentation/admin-guide/pm/cpufreq.rst b/Documentation/admin-guide/pm/cpufreq.rst index 47153e64dfb5..7eca9026a9ed 100644 --- a/Documentation/admin-guide/pm/cpufreq.rst +++ b/Documentation/admin-guide/pm/cpufreq.rst @@ -150,7 +150,7 @@ data structures necessary to handle the given policy and, possibly, to add a governor ``sysfs`` interface to it. Next, the governor is started by invoking its ``->start()`` callback. -That callback it expected to register per-CPU utilization update callbacks for +That callback is expected to register per-CPU utilization update callbacks for all of the online CPUs belonging to the given policy with the CPU scheduler. The utilization update callbacks will be invoked by the CPU scheduler on important events, like task enqueue and dequeue, on every iteration of the -- cgit v1.2.3-59-g8ed1b From fbb974ba693bbfb4e24a62181ef16d4e45febc37 Mon Sep 17 00:00:00 2001 From: Hans de Goede Date: Tue, 4 Sep 2018 16:51:29 +0200 Subject: rtc: cmos: Do not export alarm rtc_ops when we do not support alarms When there is no IRQ configured for the RTC, the rtc-cmos code does not support alarms, all alarm rtc_ops fail with -EIO / -EINVAL. The rtc-core expects a rtc driver which does not support rtc alarms to not have alarm ops at all. Otherwise the wakealarm sysfs attr will read as empty rather then returning an error, making it impossible for userspace to find out beforehand if alarms are supported. A system without an IRQ for the RTC before this patch: [root@localhost ~]# cat /sys/class/rtc/rtc0/wakealarm [root@localhost ~]# After this patch: [root@localhost ~]# cat /sys/class/rtc/rtc0/wakealarm cat: /sys/class/rtc/rtc0/wakealarm: No such file or directory [root@localhost ~]# This fixes gnome-session + systemd trying to use suspend-then-hibernate, which causes systemd to abort the suspend when writing the RTC alarm fails. BugLink: https://github.com/systemd/systemd/issues/9988 Signed-off-by: Hans de Goede Signed-off-by: Alexandre Belloni --- drivers/rtc/rtc-cmos.c | 16 ++++++++++++---- 1 file changed, 12 insertions(+), 4 deletions(-) diff --git a/drivers/rtc/rtc-cmos.c b/drivers/rtc/rtc-cmos.c index df0c5776d49b..a5a19ff10535 100644 --- a/drivers/rtc/rtc-cmos.c +++ b/drivers/rtc/rtc-cmos.c @@ -257,6 +257,7 @@ static int cmos_read_alarm(struct device *dev, struct rtc_wkalrm *t) struct cmos_rtc *cmos = dev_get_drvdata(dev); unsigned char rtc_control; + /* This not only a rtc_op, but also called directly */ if (!is_valid_irq(cmos->irq)) return -EIO; @@ -452,6 +453,7 @@ static int cmos_set_alarm(struct device *dev, struct rtc_wkalrm *t) unsigned char mon, mday, hrs, min, sec, rtc_control; int ret; + /* This not only a rtc_op, but also called directly */ if (!is_valid_irq(cmos->irq)) return -EIO; @@ -516,9 +518,6 @@ static int cmos_alarm_irq_enable(struct device *dev, unsigned int enabled) struct cmos_rtc *cmos = dev_get_drvdata(dev); unsigned long flags; - if (!is_valid_irq(cmos->irq)) - return -EINVAL; - spin_lock_irqsave(&rtc_lock, flags); if (enabled) @@ -579,6 +578,12 @@ static const struct rtc_class_ops cmos_rtc_ops = { .alarm_irq_enable = cmos_alarm_irq_enable, }; +static const struct rtc_class_ops cmos_rtc_ops_no_alarm = { + .read_time = cmos_read_time, + .set_time = cmos_set_time, + .proc = cmos_procfs, +}; + /*----------------------------------------------------------------*/ /* @@ -855,9 +860,12 @@ cmos_do_probe(struct device *dev, struct resource *ports, int rtc_irq) dev_dbg(dev, "IRQ %d is already in use\n", rtc_irq); goto cleanup1; } + + cmos_rtc.rtc->ops = &cmos_rtc_ops; + } else { + cmos_rtc.rtc->ops = &cmos_rtc_ops_no_alarm; } - cmos_rtc.rtc->ops = &cmos_rtc_ops; cmos_rtc.rtc->nvram_old_abi = true; retval = rtc_register_device(cmos_rtc.rtc); if (retval) -- cgit v1.2.3-59-g8ed1b From 7ce9a992ffde8ce93d5ae5767362a5c7389ae895 Mon Sep 17 00:00:00 2001 From: "Maciej W. Rozycki" Date: Mon, 5 Nov 2018 03:48:25 +0000 Subject: rtc: hctosys: Add missing range error reporting Fix an issue with the 32-bit range error path in `rtc_hctosys' where no error code is set and consequently the successful preceding call result from `rtc_read_time' is propagated to `rtc_hctosys_ret'. This in turn makes any subsequent call to `hctosys_show' incorrectly report in sysfs that the system time has been set from this RTC while it has not. Set the error to ERANGE then if we can't express the result due to an overflow. Signed-off-by: Maciej W. Rozycki Fixes: b3a5ac42ab18 ("rtc: hctosys: Ensure system time doesn't overflow time_t") Cc: stable@vger.kernel.org # 4.17+ Signed-off-by: Alexandre Belloni --- drivers/rtc/hctosys.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/drivers/rtc/hctosys.c b/drivers/rtc/hctosys.c index e79f2a181ad2..b9ec4a16db1f 100644 --- a/drivers/rtc/hctosys.c +++ b/drivers/rtc/hctosys.c @@ -50,8 +50,10 @@ static int __init rtc_hctosys(void) tv64.tv_sec = rtc_tm_to_time64(&tm); #if BITS_PER_LONG == 32 - if (tv64.tv_sec > INT_MAX) + if (tv64.tv_sec > INT_MAX) { + err = -ERANGE; goto err_read; + } #endif err = do_settimeofday64(&tv64); -- cgit v1.2.3-59-g8ed1b From 9bde0afb7a906f1dabdba37162551565740b862d Mon Sep 17 00:00:00 2001 From: Xulin Sun Date: Tue, 6 Nov 2018 16:42:19 +0800 Subject: rtc: pcf2127: fix a kmemleak caused in pcf2127_i2c_gather_write pcf2127_i2c_gather_write() allocates memory as local variable for i2c_master_send(), after finishing the master transfer, the allocated memory should be freed. The kmemleak is reported: unreferenced object 0xffff80231e7dba80 (size 64): comm "hwclock", pid 27762, jiffies 4296880075 (age 356.944s) hex dump (first 32 bytes): 03 00 12 03 19 02 11 13 00 80 98 18 00 00 ff ff ................ 00 50 00 00 00 00 00 00 02 00 00 00 00 00 00 00 .P.............. backtrace: [] create_object+0xf8/0x278 [] kmemleak_alloc+0x74/0xa0 [] __kmalloc+0x1ac/0x348 [] pcf2127_i2c_gather_write+0x54/0xf8 [] _regmap_raw_write+0x464/0x850 [] regmap_bulk_write+0x1a4/0x348 [] pcf2127_rtc_set_time+0xac/0xe8 [] rtc_set_time+0x80/0x138 [] rtc_dev_ioctl+0x398/0x610 [] do_vfs_ioctl+0xb0/0x848 [] SyS_ioctl+0x8c/0xa8 [] el0_svc_naked+0x34/0x38 [] 0xffffffffffffffff Signed-off-by: Xulin Sun Signed-off-by: Alexandre Belloni --- drivers/rtc/rtc-pcf2127.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/rtc/rtc-pcf2127.c b/drivers/rtc/rtc-pcf2127.c index 9f99a0966550..7cb786d76e3c 100644 --- a/drivers/rtc/rtc-pcf2127.c +++ b/drivers/rtc/rtc-pcf2127.c @@ -303,6 +303,9 @@ static int pcf2127_i2c_gather_write(void *context, memcpy(buf + 1, val, val_size); ret = i2c_master_send(client, buf, val_size + 1); + + kfree(buf); + if (ret != val_size + 1) return ret < 0 ? ret : -EIO; -- cgit v1.2.3-59-g8ed1b From cf3d02a185edf449d30465dae8d22a4979545829 Mon Sep 17 00:00:00 2001 From: Sean Paul Date: Wed, 7 Nov 2018 15:55:33 -0500 Subject: drm: Fix htmldocs warnings in drm_fourcc.c Add a description for dev and remove the excess one for native. Fixes the following warnings: ../drivers/gpu/drm/drm_fourcc.c:112: warning: Function parameter or member 'dev' not described in 'drm_driver_legacy_fb_format' ../drivers/gpu/drm/drm_fourcc.c:112: warning: Excess function parameter 'native' description in 'drm_driver_legacy_fb_format' Fixes: 059b5eb5d955 ("drm: move native byte order quirk to new drm_driver_legacy_fb_format function") Cc: Gerd Hoffmann Cc: Daniel Vetter Reviewed-by: Daniel Vetter Signed-off-by: Sean Paul Link: https://patchwork.freedesktop.org/patch/msgid/20181107205546.216088-1-sean@poorly.run --- drivers/gpu/drm/drm_fourcc.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/drm_fourcc.c b/drivers/gpu/drm/drm_fourcc.c index 90a1c846fc25..8aaa5e86a979 100644 --- a/drivers/gpu/drm/drm_fourcc.c +++ b/drivers/gpu/drm/drm_fourcc.c @@ -97,9 +97,9 @@ EXPORT_SYMBOL(drm_mode_legacy_fb_format); /** * drm_driver_legacy_fb_format - compute drm fourcc code from legacy description + * @dev: DRM device * @bpp: bits per pixels * @depth: bit depth per pixel - * @native: use host native byte order * * Computes a drm fourcc pixel format code for the given @bpp/@depth values. * Unlike drm_mode_legacy_fb_format() this looks at the drivers mode_config, -- cgit v1.2.3-59-g8ed1b From b469e7e47c8a075cc08bcd1e85d4365134bdcdd5 Mon Sep 17 00:00:00 2001 From: Amir Goldstein Date: Tue, 30 Oct 2018 20:29:53 +0200 Subject: fanotify: fix handling of events on child sub-directory When an event is reported on a sub-directory and the parent inode has a mark mask with FS_EVENT_ON_CHILD|FS_ISDIR, the event will be sent to fsnotify() even if the event type is not in the parent mark mask (e.g. FS_OPEN). Further more, if that event happened on a mount or a filesystem with a mount/sb mark that does have that event type in their mask, the "on child" event will be reported on the mount/sb mark. That is not desired, because user will get a duplicate event for the same action. Note that the event reported on the victim inode is never merged with the event reported on the parent inode, because of the check in should_merge(): old_fsn->inode == new_fsn->inode. Fix this by looking for a match of an actual event type (i.e. not just FS_ISDIR) in parent's inode mark mask and by not reporting an "on child" event to group if event type is only found on mount/sb marks. [backport hint: The bug seems to have always been in fanotify, but this patch will only apply cleanly to v4.19.y] Cc: # v4.19 Signed-off-by: Amir Goldstein Signed-off-by: Jan Kara --- fs/notify/fanotify/fanotify.c | 10 +++++----- fs/notify/fsnotify.c | 7 +++++-- 2 files changed, 10 insertions(+), 7 deletions(-) diff --git a/fs/notify/fanotify/fanotify.c b/fs/notify/fanotify/fanotify.c index 5769cf3ff035..e08a6647267b 100644 --- a/fs/notify/fanotify/fanotify.c +++ b/fs/notify/fanotify/fanotify.c @@ -115,12 +115,12 @@ static bool fanotify_should_send_event(struct fsnotify_iter_info *iter_info, continue; mark = iter_info->marks[type]; /* - * if the event is for a child and this inode doesn't care about - * events on the child, don't send it! + * If the event is for a child and this mark doesn't care about + * events on a child, don't send it! */ - if (type == FSNOTIFY_OBJ_TYPE_INODE && - (event_mask & FS_EVENT_ON_CHILD) && - !(mark->mask & FS_EVENT_ON_CHILD)) + if (event_mask & FS_EVENT_ON_CHILD && + (type != FSNOTIFY_OBJ_TYPE_INODE || + !(mark->mask & FS_EVENT_ON_CHILD))) continue; marks_mask |= mark->mask; diff --git a/fs/notify/fsnotify.c b/fs/notify/fsnotify.c index 2172ba516c61..d2c34900ae05 100644 --- a/fs/notify/fsnotify.c +++ b/fs/notify/fsnotify.c @@ -167,9 +167,9 @@ int __fsnotify_parent(const struct path *path, struct dentry *dentry, __u32 mask parent = dget_parent(dentry); p_inode = parent->d_inode; - if (unlikely(!fsnotify_inode_watches_children(p_inode))) + if (unlikely(!fsnotify_inode_watches_children(p_inode))) { __fsnotify_update_child_dentry_flags(p_inode); - else if (p_inode->i_fsnotify_mask & mask) { + } else if (p_inode->i_fsnotify_mask & mask & ALL_FSNOTIFY_EVENTS) { struct name_snapshot name; /* we are notifying a parent so come up with the new mask which @@ -339,6 +339,9 @@ int fsnotify(struct inode *to_tell, __u32 mask, const void *data, int data_is, sb = mnt->mnt.mnt_sb; mnt_or_sb_mask = mnt->mnt_fsnotify_mask | sb->s_fsnotify_mask; } + /* An event "on child" is not intended for a mount/sb mark */ + if (mask & FS_EVENT_ON_CHILD) + mnt_or_sb_mask = 0; /* * Optimization: srcu_read_lock() has a memory barrier which can -- cgit v1.2.3-59-g8ed1b From 01310bb7c9c98752cc763b36532fab028e0f8f81 Mon Sep 17 00:00:00 2001 From: Scott Mayhew Date: Thu, 8 Nov 2018 11:11:36 -0500 Subject: nfsd: COPY and CLONE operations require the saved filehandle to be set Make sure we have a saved filehandle, otherwise we'll oops with a null pointer dereference in nfs4_preprocess_stateid_op(). Signed-off-by: Scott Mayhew Cc: stable@vger.kernel.org Signed-off-by: J. Bruce Fields --- fs/nfsd/nfs4proc.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/fs/nfsd/nfs4proc.c b/fs/nfsd/nfs4proc.c index edff074d38c7..d505990dac7c 100644 --- a/fs/nfsd/nfs4proc.c +++ b/fs/nfsd/nfs4proc.c @@ -1038,6 +1038,9 @@ nfsd4_verify_copy(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, { __be32 status; + if (!cstate->save_fh.fh_dentry) + return nfserr_nofilehandle; + status = nfs4_preprocess_stateid_op(rqstp, cstate, &cstate->save_fh, src_stateid, RD_STATE, src, NULL); if (status) { -- cgit v1.2.3-59-g8ed1b From 025911a5f4e36955498ed50806ad1b02f0f76288 Mon Sep 17 00:00:00 2001 From: YueHaibing Date: Thu, 8 Nov 2018 02:04:57 +0000 Subject: SUNRPC: drop pointless static qualifier in xdr_get_next_encode_buffer() There is no need to have the '__be32 *p' variable static since new value always be assigned before use it. Signed-off-by: YueHaibing Cc: stable@vger.kernel.org Signed-off-by: J. Bruce Fields --- net/sunrpc/xdr.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/sunrpc/xdr.c b/net/sunrpc/xdr.c index 5cfb9e0a18dc..f302c6eb8779 100644 --- a/net/sunrpc/xdr.c +++ b/net/sunrpc/xdr.c @@ -546,7 +546,7 @@ EXPORT_SYMBOL_GPL(xdr_commit_encode); static __be32 *xdr_get_next_encode_buffer(struct xdr_stream *xdr, size_t nbytes) { - static __be32 *p; + __be32 *p; int space_left; int frag1bytes, frag2bytes; -- cgit v1.2.3-59-g8ed1b From 017ce359a7187192df5222a00fa3c9055eb3736d Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Fri, 2 Nov 2018 12:06:43 +0100 Subject: ACPI / PMIC: xpower: fix IOSF_MBI dependency We still get a link failure with IOSF_MBI=m when the xpower driver is built-in: drivers/acpi/pmic/intel_pmic_xpower.o: In function `intel_xpower_pmic_update_power': intel_pmic_xpower.c:(.text+0x4f2): undefined reference to `iosf_mbi_block_punit_i2c_access' intel_pmic_xpower.c:(.text+0x5e2): undefined reference to `iosf_mbi_unblock_punit_i2c_access' This makes the dependency stronger, so we can only build when IOSF_MBI is built-in. Fixes: 6a9b593d4b6f (ACPI / PMIC: xpower: Add depends on IOSF_MBI to Kconfig entry) Signed-off-by: Arnd Bergmann Signed-off-by: Rafael J. Wysocki --- drivers/acpi/Kconfig | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/acpi/Kconfig b/drivers/acpi/Kconfig index 8f3a444c6ea9..7cea769c37df 100644 --- a/drivers/acpi/Kconfig +++ b/drivers/acpi/Kconfig @@ -512,7 +512,7 @@ config CRC_PMIC_OPREGION config XPOWER_PMIC_OPREGION bool "ACPI operation region support for XPower AXP288 PMIC" - depends on MFD_AXP20X_I2C && IOSF_MBI + depends on MFD_AXP20X_I2C && IOSF_MBI=y help This config adds ACPI operation region support for XPower AXP288 PMIC. -- cgit v1.2.3-59-g8ed1b From aeaf6a4b2d9ed4373e39a64c1c10cb1d93dd6bd1 Mon Sep 17 00:00:00 2001 From: Sudeep Holla Date: Wed, 7 Nov 2018 17:40:58 +0000 Subject: dt-bindings: cpufreq: remove stale arm_big_little_dt entry Most of the ARM platforms used v2 OPP bindings to support big-little configurations. This arm_big_little_dt binding is incomplete and was never used. Commit f174e49e4906 (cpufreq: remove unused arm_big_little_dt driver) removed the driver supporting this binding, but the binding was left unnoticed, so let's get rid of it now. Fixes: f174e49e4906 (cpufreq: remove unused arm_big_little_dt driver) Signed-off-by: Sudeep Holla Acked-by: Viresh Kumar Signed-off-by: Rafael J. Wysocki --- .../bindings/cpufreq/arm_big_little_dt.txt | 65 ---------------------- 1 file changed, 65 deletions(-) delete mode 100644 Documentation/devicetree/bindings/cpufreq/arm_big_little_dt.txt diff --git a/Documentation/devicetree/bindings/cpufreq/arm_big_little_dt.txt b/Documentation/devicetree/bindings/cpufreq/arm_big_little_dt.txt deleted file mode 100644 index 2aa06ac0fac5..000000000000 --- a/Documentation/devicetree/bindings/cpufreq/arm_big_little_dt.txt +++ /dev/null @@ -1,65 +0,0 @@ -Generic ARM big LITTLE cpufreq driver's DT glue ------------------------------------------------ - -This is DT specific glue layer for generic cpufreq driver for big LITTLE -systems. - -Both required and optional properties listed below must be defined -under node /cpus/cpu@x. Where x is the first cpu inside a cluster. - -FIXME: Cpus should boot in the order specified in DT and all cpus for a cluster -must be present contiguously. Generic DT driver will check only node 'x' for -cpu:x. - -Required properties: -- operating-points: Refer to Documentation/devicetree/bindings/opp/opp.txt - for details - -Optional properties: -- clock-latency: Specify the possible maximum transition latency for clock, - in unit of nanoseconds. - -Examples: - -cpus { - #address-cells = <1>; - #size-cells = <0>; - - cpu@0 { - compatible = "arm,cortex-a15"; - reg = <0>; - next-level-cache = <&L2>; - operating-points = < - /* kHz uV */ - 792000 1100000 - 396000 950000 - 198000 850000 - >; - clock-latency = <61036>; /* two CLK32 periods */ - }; - - cpu@1 { - compatible = "arm,cortex-a15"; - reg = <1>; - next-level-cache = <&L2>; - }; - - cpu@100 { - compatible = "arm,cortex-a7"; - reg = <100>; - next-level-cache = <&L2>; - operating-points = < - /* kHz uV */ - 792000 950000 - 396000 750000 - 198000 450000 - >; - clock-latency = <61036>; /* two CLK32 periods */ - }; - - cpu@101 { - compatible = "arm,cortex-a7"; - reg = <101>; - next-level-cache = <&L2>; - }; -}; -- cgit v1.2.3-59-g8ed1b From 763f191af51f127cf8e69cd361f50bf6180768a5 Mon Sep 17 00:00:00 2001 From: Ulf Hansson Date: Thu, 1 Nov 2018 13:22:38 +0100 Subject: ARM: cpuidle: Don't register the driver when back-end init returns -ENXIO There's no point to register the cpuidle driver for the current CPU, when the initialization of the arch specific back-end data fails by returning -ENXIO. Instead, let's re-order the sequence to its original flow, by first trying to initialize the back-end part and then act accordingly on the returned error code. Additionally, let's print the error message, no matter of what error code that was returned. Fixes: a0d46a3dfdc3 (ARM: cpuidle: Register per cpuidle device) Signed-off-by: Ulf Hansson Reviewed-by: Daniel Lezcano Cc: 4.19+ # v4.19+ Signed-off-by: Rafael J. Wysocki --- drivers/cpuidle/cpuidle-arm.c | 22 ++++++++++------------ 1 file changed, 10 insertions(+), 12 deletions(-) diff --git a/drivers/cpuidle/cpuidle-arm.c b/drivers/cpuidle/cpuidle-arm.c index 073557f433eb..df564d783216 100644 --- a/drivers/cpuidle/cpuidle-arm.c +++ b/drivers/cpuidle/cpuidle-arm.c @@ -103,13 +103,6 @@ static int __init arm_idle_init_cpu(int cpu) goto out_kfree_drv; } - ret = cpuidle_register_driver(drv); - if (ret) { - if (ret != -EBUSY) - pr_err("Failed to register cpuidle driver\n"); - goto out_kfree_drv; - } - /* * Call arch CPU operations in order to initialize * idle states suspend back-end specific data @@ -117,15 +110,20 @@ static int __init arm_idle_init_cpu(int cpu) ret = arm_cpuidle_init(cpu); /* - * Skip the cpuidle device initialization if the reported + * Allow the initialization to continue for other CPUs, if the reported * failure is a HW misconfiguration/breakage (-ENXIO). */ - if (ret == -ENXIO) - return 0; - if (ret) { pr_err("CPU %d failed to init idle CPU ops\n", cpu); - goto out_unregister_drv; + ret = ret == -ENXIO ? 0 : ret; + goto out_kfree_drv; + } + + ret = cpuidle_register_driver(drv); + if (ret) { + if (ret != -EBUSY) + pr_err("Failed to register cpuidle driver\n"); + goto out_kfree_drv; } dev = kzalloc(sizeof(*dev), GFP_KERNEL); -- cgit v1.2.3-59-g8ed1b From 3e452e636d006fa759a9914c044398869acba98f Mon Sep 17 00:00:00 2001 From: Ulf Hansson Date: Thu, 1 Nov 2018 11:15:58 +0100 Subject: ARM: cpuidle: Convert to use cpuidle_register|unregister() The only reason that remains, to why the ARM cpuidle driver calls cpuidle_register_driver(), is to avoid printing an error message in case another driver already have been registered for the CPU. This seems a bit silly, but more importantly, if that is a common scenario, perhaps we should change cpuidle_register() accordingly instead. In either case, let's consolidate the code, by converting to use cpuidle_register|unregister(), which also avoids the unnecessary allocation of the struct cpuidle_device. Signed-off-by: Ulf Hansson Reviewed-by: Lorenzo Pieralisi Reviewed-by: Daniel Lezcano Signed-off-by: Rafael J. Wysocki --- drivers/cpuidle/cpuidle-arm.c | 30 +++--------------------------- 1 file changed, 3 insertions(+), 27 deletions(-) diff --git a/drivers/cpuidle/cpuidle-arm.c b/drivers/cpuidle/cpuidle-arm.c index df564d783216..3a407a3ef22b 100644 --- a/drivers/cpuidle/cpuidle-arm.c +++ b/drivers/cpuidle/cpuidle-arm.c @@ -82,7 +82,6 @@ static int __init arm_idle_init_cpu(int cpu) { int ret; struct cpuidle_driver *drv; - struct cpuidle_device *dev; drv = kmemdup(&arm_idle_driver, sizeof(*drv), GFP_KERNEL); if (!drv) @@ -119,33 +118,12 @@ static int __init arm_idle_init_cpu(int cpu) goto out_kfree_drv; } - ret = cpuidle_register_driver(drv); - if (ret) { - if (ret != -EBUSY) - pr_err("Failed to register cpuidle driver\n"); + ret = cpuidle_register(drv, NULL); + if (ret) goto out_kfree_drv; - } - - dev = kzalloc(sizeof(*dev), GFP_KERNEL); - if (!dev) { - ret = -ENOMEM; - goto out_unregister_drv; - } - dev->cpu = cpu; - - ret = cpuidle_register_device(dev); - if (ret) { - pr_err("Failed to register cpuidle device for CPU %d\n", - cpu); - goto out_kfree_dev; - } return 0; -out_kfree_dev: - kfree(dev); -out_unregister_drv: - cpuidle_unregister_driver(drv); out_kfree_drv: kfree(drv); return ret; @@ -176,9 +154,7 @@ out_fail: while (--cpu >= 0) { dev = per_cpu(cpuidle_devices, cpu); drv = cpuidle_get_cpu_driver(dev); - cpuidle_unregister_device(dev); - cpuidle_unregister_driver(drv); - kfree(dev); + cpuidle_unregister(drv); kfree(drv); } -- cgit v1.2.3-59-g8ed1b From 68a031d22c57b94870ba13513c9d93b8a8119ab2 Mon Sep 17 00:00:00 2001 From: John Garry Date: Mon, 5 Nov 2018 20:35:14 +0800 Subject: crypto: hisilicon - Fix NULL dereference for same dst and src When the source and destination addresses for the cipher are the same, we will get a NULL dereference from accessing the split destination scatterlist memories, as shown: [ 56.565719] tcrypt: [ 56.565719] testing speed of async ecb(aes) (hisi_sec_aes_ecb) encryption [ 56.574683] tcrypt: test 0 (128 bit key, 16 byte blocks): [ 56.587585] Unable to handle kernel NULL pointer dereference at virtual address 0000000000000000 [ 56.596361] Mem abort info: [ 56.599151] ESR = 0x96000006 [ 56.602196] Exception class = DABT (current EL), IL = 32 bits [ 56.608105] SET = 0, FnV = 0 [ 56.611149] EA = 0, S1PTW = 0 [ 56.614280] Data abort info: [ 56.617151] ISV = 0, ISS = 0x00000006 [ 56.620976] CM = 0, WnR = 0 [ 56.623930] user pgtable: 4k pages, 48-bit VAs, pgdp = (____ptrval____) [ 56.630533] [0000000000000000] pgd=0000041fc7e4d003, pud=0000041fcd9bf003, pmd=0000000000000000 [ 56.639224] Internal error: Oops: 96000006 [#1] PREEMPT SMP [ 56.644782] Modules linked in: tcrypt(+) [ 56.648695] CPU: 21 PID: 2326 Comm: insmod Tainted: G W 4.19.0-rc6-00001-g3fabfb8-dirty #716 [ 56.658420] Hardware name: Huawei Taishan 2280 /D05, BIOS Hisilicon D05 IT17 Nemo 2.0 RC0 10/05/2018 [ 56.667537] pstate: 20000005 (nzCv daif -PAN -UAO) [ 56.672322] pc : sec_alg_skcipher_crypto+0x318/0x748 [ 56.677274] lr : sec_alg_skcipher_crypto+0x178/0x748 [ 56.682224] sp : ffff0000118e3840 [ 56.685525] x29: ffff0000118e3840 x28: ffff841fbb3f8118 [ 56.690825] x27: 0000000000000000 x26: 0000000000000000 [ 56.696125] x25: ffff841fbb3f8080 x24: ffff841fbadc0018 [ 56.701425] x23: ffff000009119000 x22: ffff841fbb24e280 [ 56.706724] x21: ffff841ff212e780 x20: ffff841ff212e700 [ 56.712023] x19: 0000000000000001 x18: ffffffffffffffff [ 56.717322] x17: 0000000000000000 x16: 0000000000000000 [ 56.722621] x15: ffff0000091196c8 x14: 72635f7265687069 [ 56.727920] x13: 636b735f676c615f x12: ffff000009119940 [ 56.733219] x11: 0000000000000000 x10: 00000000006080c0 [ 56.738519] x9 : 0000000000000000 x8 : ffff841fbb24e480 [ 56.743818] x7 : ffff841fbb24e500 x6 : ffff841ff00cdcc0 [ 56.749117] x5 : 0000000000000010 x4 : 0000000000000000 [ 56.754416] x3 : ffff841fbb24e380 x2 : ffff841fbb24e480 [ 56.759715] x1 : 0000000000000000 x0 : ffff000008f682c8 [ 56.765016] Process insmod (pid: 2326, stack limit = 0x(____ptrval____)) [ 56.771702] Call trace: [ 56.774136] sec_alg_skcipher_crypto+0x318/0x748 [ 56.778740] sec_alg_skcipher_encrypt+0x10/0x18 [ 56.783259] test_skcipher_speed+0x2a0/0x700 [tcrypt] [ 56.788298] do_test+0x18f8/0x48c8 [tcrypt] [ 56.792469] tcrypt_mod_init+0x60/0x1000 [tcrypt] [ 56.797161] do_one_initcall+0x5c/0x178 [ 56.800985] do_init_module+0x58/0x1b4 [ 56.804721] load_module+0x1da4/0x2150 [ 56.808456] __se_sys_init_module+0x14c/0x1e8 [ 56.812799] __arm64_sys_init_module+0x18/0x20 [ 56.817231] el0_svc_common+0x60/0xe8 [ 56.820880] el0_svc_handler+0x2c/0x80 [ 56.824615] el0_svc+0x8/0xc [ 56.827483] Code: a94c87a3 910b2000 f87b7842 f9004ba2 (b87b7821) [ 56.833564] ---[ end trace 0f63290590e93d94 ]--- Segmentation fault Fix this by only accessing these memories when we have different src and dst. Fixes: 915e4e8413da ("crypto: hisilicon - SEC security accelerator driver") Reviewed-by: Jonathan Cameron Cc: Signed-off-by: John Garry Signed-off-by: Herbert Xu --- drivers/crypto/hisilicon/sec/sec_algs.c | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/drivers/crypto/hisilicon/sec/sec_algs.c b/drivers/crypto/hisilicon/sec/sec_algs.c index f7d6d690116e..32c6c02cb9ae 100644 --- a/drivers/crypto/hisilicon/sec/sec_algs.c +++ b/drivers/crypto/hisilicon/sec/sec_algs.c @@ -732,6 +732,7 @@ static int sec_alg_skcipher_crypto(struct skcipher_request *skreq, int *splits_in_nents; int *splits_out_nents = NULL; struct sec_request_el *el, *temp; + bool split = skreq->src != skreq->dst; mutex_init(&sec_req->lock); sec_req->req_base = &skreq->base; @@ -750,7 +751,7 @@ static int sec_alg_skcipher_crypto(struct skcipher_request *skreq, if (ret) goto err_free_split_sizes; - if (skreq->src != skreq->dst) { + if (split) { sec_req->len_out = sg_nents(skreq->dst); ret = sec_map_and_split_sg(skreq->dst, split_sizes, steps, &splits_out, &splits_out_nents, @@ -785,8 +786,9 @@ static int sec_alg_skcipher_crypto(struct skcipher_request *skreq, split_sizes[i], skreq->src != skreq->dst, splits_in[i], splits_in_nents[i], - splits_out[i], - splits_out_nents[i], info); + split ? splits_out[i] : NULL, + split ? splits_out_nents[i] : 0, + info); if (IS_ERR(el)) { ret = PTR_ERR(el); goto err_free_elements; @@ -854,7 +856,7 @@ err_free_elements: crypto_skcipher_ivsize(atfm), DMA_BIDIRECTIONAL); err_unmap_out_sg: - if (skreq->src != skreq->dst) + if (split) sec_unmap_sg_on_err(skreq->dst, steps, splits_out, splits_out_nents, sec_req->len_out, info->dev); -- cgit v1.2.3-59-g8ed1b From 0b0cf6af3f3151c26c27e8e51def5527091c3e69 Mon Sep 17 00:00:00 2001 From: John Garry Date: Mon, 5 Nov 2018 20:35:15 +0800 Subject: crypto: hisilicon - Fix reference after free of memories on error path coccicheck currently warns of the following issues in the driver: drivers/crypto/hisilicon/sec/sec_algs.c:864:51-66: ERROR: reference preceded by free on line 812 drivers/crypto/hisilicon/sec/sec_algs.c:864:40-49: ERROR: reference preceded by free on line 813 drivers/crypto/hisilicon/sec/sec_algs.c:861:8-24: ERROR: reference preceded by free on line 814 drivers/crypto/hisilicon/sec/sec_algs.c:860:41-51: ERROR: reference preceded by free on line 815 drivers/crypto/hisilicon/sec/sec_algs.c:867:7-18: ERROR: reference preceded by free on line 816 It would appear than on certain error paths that we may attempt reference- after-free some memories. This patch fixes those issues. The solution doesn't look perfect, but having same memories free'd possibly from separate functions makes it tricky. Fixes: 915e4e8413da ("crypto: hisilicon - SEC security accelerator driver") Reviewed-by: Jonathan Cameron Cc: Signed-off-by: John Garry Signed-off-by: Herbert Xu --- drivers/crypto/hisilicon/sec/sec_algs.c | 21 +++++++++++---------- 1 file changed, 11 insertions(+), 10 deletions(-) diff --git a/drivers/crypto/hisilicon/sec/sec_algs.c b/drivers/crypto/hisilicon/sec/sec_algs.c index 32c6c02cb9ae..cdc4f9a171d9 100644 --- a/drivers/crypto/hisilicon/sec/sec_algs.c +++ b/drivers/crypto/hisilicon/sec/sec_algs.c @@ -808,13 +808,6 @@ static int sec_alg_skcipher_crypto(struct skcipher_request *skreq, * more refined but this is unlikely to happen so no need. */ - /* Cleanup - all elements in pointer arrays have been coppied */ - kfree(splits_in_nents); - kfree(splits_in); - kfree(splits_out_nents); - kfree(splits_out); - kfree(split_sizes); - /* Grab a big lock for a long time to avoid concurrency issues */ mutex_lock(&queue->queuelock); @@ -829,13 +822,13 @@ static int sec_alg_skcipher_crypto(struct skcipher_request *skreq, (!queue->havesoftqueue || kfifo_avail(&queue->softqueue) > steps)) || !list_empty(&ctx->backlog)) { + ret = -EBUSY; if ((skreq->base.flags & CRYPTO_TFM_REQ_MAY_BACKLOG)) { list_add_tail(&sec_req->backlog_head, &ctx->backlog); mutex_unlock(&queue->queuelock); - return -EBUSY; + goto out; } - ret = -EBUSY; mutex_unlock(&queue->queuelock); goto err_free_elements; } @@ -844,7 +837,15 @@ static int sec_alg_skcipher_crypto(struct skcipher_request *skreq, if (ret) goto err_free_elements; - return -EINPROGRESS; + ret = -EINPROGRESS; +out: + /* Cleanup - all elements in pointer arrays have been copied */ + kfree(splits_in_nents); + kfree(splits_in); + kfree(splits_out_nents); + kfree(splits_out); + kfree(split_sizes); + return ret; err_free_elements: list_for_each_entry_safe(el, temp, &sec_req->elements, head) { -- cgit v1.2.3-59-g8ed1b From 508a1c4df085a547187eed346f1bfe5e381797f1 Mon Sep 17 00:00:00 2001 From: Ard Biesheuvel Date: Thu, 8 Nov 2018 23:55:16 +0100 Subject: crypto: simd - correctly take reqsize of wrapped skcipher into account The simd wrapper's skcipher request context structure consists of a single subrequest whose size is taken from the subordinate skcipher. However, in simd_skcipher_init(), the reqsize that is retrieved is not from the subordinate skcipher but from the cryptd request structure, whose size is completely unrelated to the actual wrapped skcipher. Reported-by: Qian Cai Signed-off-by: Ard Biesheuvel Tested-by: Qian Cai Signed-off-by: Herbert Xu --- crypto/simd.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/crypto/simd.c b/crypto/simd.c index ea7240be3001..78e8d037ae2b 100644 --- a/crypto/simd.c +++ b/crypto/simd.c @@ -124,8 +124,9 @@ static int simd_skcipher_init(struct crypto_skcipher *tfm) ctx->cryptd_tfm = cryptd_tfm; - reqsize = sizeof(struct skcipher_request); - reqsize += crypto_skcipher_reqsize(&cryptd_tfm->base); + reqsize = crypto_skcipher_reqsize(cryptd_skcipher_child(cryptd_tfm)); + reqsize = max(reqsize, crypto_skcipher_reqsize(&cryptd_tfm->base)); + reqsize += sizeof(struct skcipher_request); crypto_skcipher_set_reqsize(tfm, reqsize); -- cgit v1.2.3-59-g8ed1b From f43f39958beb206b53292801e216d9b8a660f087 Mon Sep 17 00:00:00 2001 From: Eric Biggers Date: Sat, 3 Nov 2018 14:56:00 -0700 Subject: crypto: user - fix leaking uninitialized memory to userspace All bytes of the NETLINK_CRYPTO report structures must be initialized, since they are copied to userspace. The change from strncpy() to strlcpy() broke this. As a minimal fix, change it back. Fixes: 4473710df1f8 ("crypto: user - Prepare for CRYPTO_MAX_ALG_NAME expansion") Cc: # v4.12+ Signed-off-by: Eric Biggers Signed-off-by: Herbert Xu --- crypto/crypto_user_base.c | 18 +++++++++--------- 1 file changed, 9 insertions(+), 9 deletions(-) diff --git a/crypto/crypto_user_base.c b/crypto/crypto_user_base.c index e41f6cc33fff..784748dbb19f 100644 --- a/crypto/crypto_user_base.c +++ b/crypto/crypto_user_base.c @@ -84,7 +84,7 @@ static int crypto_report_cipher(struct sk_buff *skb, struct crypto_alg *alg) { struct crypto_report_cipher rcipher; - strlcpy(rcipher.type, "cipher", sizeof(rcipher.type)); + strncpy(rcipher.type, "cipher", sizeof(rcipher.type)); rcipher.blocksize = alg->cra_blocksize; rcipher.min_keysize = alg->cra_cipher.cia_min_keysize; @@ -103,7 +103,7 @@ static int crypto_report_comp(struct sk_buff *skb, struct crypto_alg *alg) { struct crypto_report_comp rcomp; - strlcpy(rcomp.type, "compression", sizeof(rcomp.type)); + strncpy(rcomp.type, "compression", sizeof(rcomp.type)); if (nla_put(skb, CRYPTOCFGA_REPORT_COMPRESS, sizeof(struct crypto_report_comp), &rcomp)) goto nla_put_failure; @@ -117,7 +117,7 @@ static int crypto_report_acomp(struct sk_buff *skb, struct crypto_alg *alg) { struct crypto_report_acomp racomp; - strlcpy(racomp.type, "acomp", sizeof(racomp.type)); + strncpy(racomp.type, "acomp", sizeof(racomp.type)); if (nla_put(skb, CRYPTOCFGA_REPORT_ACOMP, sizeof(struct crypto_report_acomp), &racomp)) @@ -132,7 +132,7 @@ static int crypto_report_akcipher(struct sk_buff *skb, struct crypto_alg *alg) { struct crypto_report_akcipher rakcipher; - strlcpy(rakcipher.type, "akcipher", sizeof(rakcipher.type)); + strncpy(rakcipher.type, "akcipher", sizeof(rakcipher.type)); if (nla_put(skb, CRYPTOCFGA_REPORT_AKCIPHER, sizeof(struct crypto_report_akcipher), &rakcipher)) @@ -147,7 +147,7 @@ static int crypto_report_kpp(struct sk_buff *skb, struct crypto_alg *alg) { struct crypto_report_kpp rkpp; - strlcpy(rkpp.type, "kpp", sizeof(rkpp.type)); + strncpy(rkpp.type, "kpp", sizeof(rkpp.type)); if (nla_put(skb, CRYPTOCFGA_REPORT_KPP, sizeof(struct crypto_report_kpp), &rkpp)) @@ -161,10 +161,10 @@ nla_put_failure: static int crypto_report_one(struct crypto_alg *alg, struct crypto_user_alg *ualg, struct sk_buff *skb) { - strlcpy(ualg->cru_name, alg->cra_name, sizeof(ualg->cru_name)); - strlcpy(ualg->cru_driver_name, alg->cra_driver_name, + strncpy(ualg->cru_name, alg->cra_name, sizeof(ualg->cru_name)); + strncpy(ualg->cru_driver_name, alg->cra_driver_name, sizeof(ualg->cru_driver_name)); - strlcpy(ualg->cru_module_name, module_name(alg->cra_module), + strncpy(ualg->cru_module_name, module_name(alg->cra_module), sizeof(ualg->cru_module_name)); ualg->cru_type = 0; @@ -177,7 +177,7 @@ static int crypto_report_one(struct crypto_alg *alg, if (alg->cra_flags & CRYPTO_ALG_LARVAL) { struct crypto_report_larval rl; - strlcpy(rl.type, "larval", sizeof(rl.type)); + strncpy(rl.type, "larval", sizeof(rl.type)); if (nla_put(skb, CRYPTOCFGA_REPORT_LARVAL, sizeof(struct crypto_report_larval), &rl)) goto nla_put_failure; -- cgit v1.2.3-59-g8ed1b From 9f4debe38415583086ce814798eeb864aeb39551 Mon Sep 17 00:00:00 2001 From: Corentin Labbe Date: Sat, 3 Nov 2018 14:56:01 -0700 Subject: crypto: user - Zeroize whole structure given to user space For preventing uninitialized data to be given to user-space (and so leak potential useful data), the crypto_stat structure must be correctly initialized. Reported-by: Dan Carpenter Fixes: cac5818c25d0 ("crypto: user - Implement a generic crypto statistics") Signed-off-by: Corentin Labbe [EB: also fix it in crypto_reportstat_one()] [EB: use sizeof(var) rather than sizeof(type)] Signed-off-by: Eric Biggers Signed-off-by: Herbert Xu --- crypto/crypto_user_stat.c | 21 +++++++++++++++++++++ 1 file changed, 21 insertions(+) diff --git a/crypto/crypto_user_stat.c b/crypto/crypto_user_stat.c index 021ad06bbb62..1dfaa0ccd555 100644 --- a/crypto/crypto_user_stat.c +++ b/crypto/crypto_user_stat.c @@ -37,6 +37,8 @@ static int crypto_report_aead(struct sk_buff *skb, struct crypto_alg *alg) u64 v64; u32 v32; + memset(&raead, 0, sizeof(raead)); + strncpy(raead.type, "aead", sizeof(raead.type)); v32 = atomic_read(&alg->encrypt_cnt); @@ -65,6 +67,8 @@ static int crypto_report_cipher(struct sk_buff *skb, struct crypto_alg *alg) u64 v64; u32 v32; + memset(&rcipher, 0, sizeof(rcipher)); + strlcpy(rcipher.type, "cipher", sizeof(rcipher.type)); v32 = atomic_read(&alg->encrypt_cnt); @@ -93,6 +97,8 @@ static int crypto_report_comp(struct sk_buff *skb, struct crypto_alg *alg) u64 v64; u32 v32; + memset(&rcomp, 0, sizeof(rcomp)); + strlcpy(rcomp.type, "compression", sizeof(rcomp.type)); v32 = atomic_read(&alg->compress_cnt); rcomp.stat_compress_cnt = v32; @@ -120,6 +126,8 @@ static int crypto_report_acomp(struct sk_buff *skb, struct crypto_alg *alg) u64 v64; u32 v32; + memset(&racomp, 0, sizeof(racomp)); + strlcpy(racomp.type, "acomp", sizeof(racomp.type)); v32 = atomic_read(&alg->compress_cnt); racomp.stat_compress_cnt = v32; @@ -147,6 +155,8 @@ static int crypto_report_akcipher(struct sk_buff *skb, struct crypto_alg *alg) u64 v64; u32 v32; + memset(&rakcipher, 0, sizeof(rakcipher)); + strncpy(rakcipher.type, "akcipher", sizeof(rakcipher.type)); v32 = atomic_read(&alg->encrypt_cnt); rakcipher.stat_encrypt_cnt = v32; @@ -177,6 +187,8 @@ static int crypto_report_kpp(struct sk_buff *skb, struct crypto_alg *alg) struct crypto_stat rkpp; u32 v; + memset(&rkpp, 0, sizeof(rkpp)); + strlcpy(rkpp.type, "kpp", sizeof(rkpp.type)); v = atomic_read(&alg->setsecret_cnt); @@ -203,6 +215,8 @@ static int crypto_report_ahash(struct sk_buff *skb, struct crypto_alg *alg) u64 v64; u32 v32; + memset(&rhash, 0, sizeof(rhash)); + strncpy(rhash.type, "ahash", sizeof(rhash.type)); v32 = atomic_read(&alg->hash_cnt); @@ -227,6 +241,8 @@ static int crypto_report_shash(struct sk_buff *skb, struct crypto_alg *alg) u64 v64; u32 v32; + memset(&rhash, 0, sizeof(rhash)); + strncpy(rhash.type, "shash", sizeof(rhash.type)); v32 = atomic_read(&alg->hash_cnt); @@ -251,6 +267,8 @@ static int crypto_report_rng(struct sk_buff *skb, struct crypto_alg *alg) u64 v64; u32 v32; + memset(&rrng, 0, sizeof(rrng)); + strncpy(rrng.type, "rng", sizeof(rrng.type)); v32 = atomic_read(&alg->generate_cnt); @@ -275,6 +293,8 @@ static int crypto_reportstat_one(struct crypto_alg *alg, struct crypto_user_alg *ualg, struct sk_buff *skb) { + memset(ualg, 0, sizeof(*ualg)); + strlcpy(ualg->cru_name, alg->cra_name, sizeof(ualg->cru_name)); strlcpy(ualg->cru_driver_name, alg->cra_driver_name, sizeof(ualg->cru_driver_name)); @@ -291,6 +311,7 @@ static int crypto_reportstat_one(struct crypto_alg *alg, if (alg->cra_flags & CRYPTO_ALG_LARVAL) { struct crypto_stat rl; + memset(&rl, 0, sizeof(rl)); strlcpy(rl.type, "larval", sizeof(rl.type)); if (nla_put(skb, CRYPTOCFGA_STAT_LARVAL, sizeof(struct crypto_stat), &rl)) -- cgit v1.2.3-59-g8ed1b From 595b0674ce781e38522097b18718ce3c3bffc1a1 Mon Sep 17 00:00:00 2001 From: Andy Shevchenko Date: Mon, 5 Nov 2018 18:33:34 +0200 Subject: MAINTAINERS: Add tree link for Intel pin control driver Intel pin control driver gets its own tree. Update MAINTAINERS accordingly. Signed-off-by: Andy Shevchenko Acked-by: Mika Westerberg Signed-off-by: Linus Walleij --- MAINTAINERS | 1 + 1 file changed, 1 insertion(+) diff --git a/MAINTAINERS b/MAINTAINERS index f4855974f325..570a8c593b52 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -11730,6 +11730,7 @@ F: Documentation/devicetree/bindings/pinctrl/fsl,* PIN CONTROLLER - INTEL M: Mika Westerberg M: Andy Shevchenko +T: git git://git.kernel.org/pub/scm/linux/kernel/git/pinctrl/intel.git S: Maintained F: drivers/pinctrl/intel/ -- cgit v1.2.3-59-g8ed1b From 10283ea525d30f2e99828978fd04d8427876a7ad Mon Sep 17 00:00:00 2001 From: Andreas Gruenbacher Date: Mon, 5 Nov 2018 22:57:24 +0000 Subject: gfs2: Put bitmap buffers in put_super gfs2_put_super calls gfs2_clear_rgrpd to destroy the gfs2_rgrpd objects attached to the resource group glocks. That function should release the buffers attached to the gfs2_bitmap objects (bi_bh), but the call to gfs2_rgrp_brelse for doing that is missing. When gfs2_releasepage later runs across these buffers which are still referenced, it refuses to free them. This causes the pages the buffers are attached to to remain referenced as well. With enough mount/unmount cycles, the system will eventually run out of memory. Fix this by adding the missing call to gfs2_rgrp_brelse in gfs2_clear_rgrpd. (Also fix a gfs2_rgrp_relse -> gfs2_rgrp_brelse typo in a comment.) Fixes: 39b0f1e92908 ("GFS2: Don't brelse rgrp buffer_heads every allocation") Cc: stable@vger.kernel.org # v4.2+ Signed-off-by: Andreas Gruenbacher --- fs/gfs2/rgrp.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/fs/gfs2/rgrp.c b/fs/gfs2/rgrp.c index ffe3032b1043..b08a530433ad 100644 --- a/fs/gfs2/rgrp.c +++ b/fs/gfs2/rgrp.c @@ -733,6 +733,7 @@ void gfs2_clear_rgrpd(struct gfs2_sbd *sdp) if (gl) { glock_clear_object(gl, rgd); + gfs2_rgrp_brelse(rgd); gfs2_glock_put(gl); } @@ -1174,7 +1175,7 @@ static u32 count_unlinked(struct gfs2_rgrpd *rgd) * @rgd: the struct gfs2_rgrpd describing the RG to read in * * Read in all of a Resource Group's header and bitmap blocks. - * Caller must eventually call gfs2_rgrp_relse() to free the bitmaps. + * Caller must eventually call gfs2_rgrp_brelse() to free the bitmaps. * * Returns: errno */ -- cgit v1.2.3-59-g8ed1b From e7445ceddfc220c1aede6d42758a5acb8844e9c3 Mon Sep 17 00:00:00 2001 From: Andreas Gruenbacher Date: Thu, 8 Nov 2018 20:14:29 +0000 Subject: gfs2: Fix metadata read-ahead during truncate (2) The previous attempt to fix for metadata read-ahead during truncate was incorrect: for files with a height > 2 (1006989312 bytes with a block size of 4096 bytes), read-ahead requests were not being issued for some of the indirect blocks discovered while walking the metadata tree, leading to significant slow-downs when deleting large files. Fix that. In addition, only issue read-ahead requests in the first pass through the meta-data tree, while deallocating data blocks. Fixes: c3ce5aa9b0 ("gfs2: Fix metadata read-ahead during truncate") Cc: stable@vger.kernel.org # v4.16+ Signed-off-by: Andreas Gruenbacher --- fs/gfs2/bmap.c | 14 ++++++++++---- 1 file changed, 10 insertions(+), 4 deletions(-) diff --git a/fs/gfs2/bmap.c b/fs/gfs2/bmap.c index 5f3ea07ef5e2..38d88fcb6988 100644 --- a/fs/gfs2/bmap.c +++ b/fs/gfs2/bmap.c @@ -1908,10 +1908,16 @@ static int punch_hole(struct gfs2_inode *ip, u64 offset, u64 length) if (ret < 0) goto out; - /* issue read-ahead on metadata */ - if (mp.mp_aheight > 1) { - for (; ret > 1; ret--) { - metapointer_range(&mp, mp.mp_aheight - ret, + /* On the first pass, issue read-ahead on metadata. */ + if (mp.mp_aheight > 1 && strip_h == ip->i_height - 1) { + unsigned int height = mp.mp_aheight - 1; + + /* No read-ahead for data blocks. */ + if (mp.mp_aheight - 1 == strip_h) + height--; + + for (; height >= mp.mp_aheight - ret; height--) { + metapointer_range(&mp, height, start_list, start_aligned, end_list, end_aligned, &start, &end); -- cgit v1.2.3-59-g8ed1b From 7fabaf303458fcabb694999d6fa772cc13d4e217 Mon Sep 17 00:00:00 2001 From: Miklos Szeredi Date: Fri, 9 Nov 2018 15:52:16 +0100 Subject: fuse: fix leaked notify reply fuse_request_send_notify_reply() may fail if the connection was reset for some reason (e.g. fs was unmounted). Don't leak request reference in this case. Besides leaking memory, this resulted in fc->num_waiting not being decremented and hence fuse_wait_aborted() left in a hanging and unkillable state. Fixes: 2d45ba381a74 ("fuse: add retrieve request") Fixes: b8f95e5d13f5 ("fuse: umount should wait for all requests") Reported-and-tested-by: syzbot+6339eda9cb4ebbc4c37b@syzkaller.appspotmail.com Signed-off-by: Miklos Szeredi Cc: #v2.6.36 --- fs/fuse/dev.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/fs/fuse/dev.c b/fs/fuse/dev.c index ae813e609932..6fe330cc9709 100644 --- a/fs/fuse/dev.c +++ b/fs/fuse/dev.c @@ -1768,8 +1768,10 @@ static int fuse_retrieve(struct fuse_conn *fc, struct inode *inode, req->in.args[1].size = total_len; err = fuse_request_send_notify_reply(fc, req, outarg->notify_unique); - if (err) + if (err) { fuse_retrieve_end(fc, req); + fuse_put_request(fc, req); + } return err; } -- cgit v1.2.3-59-g8ed1b From 2d84a2d19b6150c6dbac1e6ebad9c82e4c123772 Mon Sep 17 00:00:00 2001 From: Miklos Szeredi Date: Fri, 9 Nov 2018 15:52:16 +0100 Subject: fuse: fix possibly missed wake-up after abort In current fuse_drop_waiting() implementation it's possible that fuse_wait_aborted() will not be woken up in the unlikely case that fuse_abort_conn() + fuse_wait_aborted() runs in between checking fc->connected and calling atomic_dec(&fc->num_waiting). Do the atomic_dec_and_test() unconditionally, which also provides the necessary barrier against reordering with the fc->connected check. The explicit smp_mb() in fuse_wait_aborted() is not actually needed, since the spin_unlock() in fuse_abort_conn() provides the necessary RELEASE barrier after resetting fc->connected. However, this is not a performance sensitive path, and adding the explicit barrier makes it easier to document. Signed-off-by: Miklos Szeredi Fixes: b8f95e5d13f5 ("fuse: umount should wait for all requests") Cc: #v4.19 --- fs/fuse/dev.c | 12 +++++++++--- 1 file changed, 9 insertions(+), 3 deletions(-) diff --git a/fs/fuse/dev.c b/fs/fuse/dev.c index 6fe330cc9709..a5e516a40e7a 100644 --- a/fs/fuse/dev.c +++ b/fs/fuse/dev.c @@ -165,9 +165,13 @@ static bool fuse_block_alloc(struct fuse_conn *fc, bool for_background) static void fuse_drop_waiting(struct fuse_conn *fc) { - if (fc->connected) { - atomic_dec(&fc->num_waiting); - } else if (atomic_dec_and_test(&fc->num_waiting)) { + /* + * lockess check of fc->connected is okay, because atomic_dec_and_test() + * provides a memory barrier mached with the one in fuse_wait_aborted() + * to ensure no wake-up is missed. + */ + if (atomic_dec_and_test(&fc->num_waiting) && + !READ_ONCE(fc->connected)) { /* wake up aborters */ wake_up_all(&fc->blocked_waitq); } @@ -2221,6 +2225,8 @@ EXPORT_SYMBOL_GPL(fuse_abort_conn); void fuse_wait_aborted(struct fuse_conn *fc) { + /* matches implicit memory barrier in fuse_drop_waiting() */ + smp_mb(); wait_event(fc->blocked_waitq, atomic_read(&fc->num_waiting) == 0); } -- cgit v1.2.3-59-g8ed1b From ebacb81273599555a7a19f7754a1451206a5fc4f Mon Sep 17 00:00:00 2001 From: Lukas Czerner Date: Fri, 9 Nov 2018 14:51:46 +0100 Subject: fuse: fix use-after-free in fuse_direct_IO() In async IO blocking case the additional reference to the io is taken for it to survive fuse_aio_complete(). In non blocking case this additional reference is not needed, however we still reference io to figure out whether to wait for completion or not. This is wrong and will lead to use-after-free. Fix it by storing blocking information in separate variable. This was spotted by KASAN when running generic/208 fstest. Signed-off-by: Lukas Czerner Reported-by: Zorro Lang Signed-off-by: Miklos Szeredi Fixes: 744742d692e3 ("fuse: Add reference counting for fuse_io_priv") Cc: # v4.6 --- fs/fuse/file.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/fs/fuse/file.c b/fs/fuse/file.c index cc2121b37bf5..b52f9baaa3e7 100644 --- a/fs/fuse/file.c +++ b/fs/fuse/file.c @@ -2924,10 +2924,12 @@ fuse_direct_IO(struct kiocb *iocb, struct iov_iter *iter) } if (io->async) { + bool blocking = io->blocking; + fuse_aio_complete(io, ret < 0 ? ret : 0, -1); /* we have a non-extending, async request, so return */ - if (!io->blocking) + if (!blocking) return -EIOCBQUEUED; wait_for_completion(&wait); -- cgit v1.2.3-59-g8ed1b From a43608fa77213ad5ac5f75994254b9f65d57cfa0 Mon Sep 17 00:00:00 2001 From: Oliver Hartkopp Date: Wed, 24 Oct 2018 10:27:12 +0200 Subject: can: raw: check for CAN FD capable netdev in raw_sendmsg() When the socket is CAN FD enabled it can handle CAN FD frame transmissions. Add an additional check in raw_sendmsg() as a CAN2.0 CAN driver (non CAN FD) should never see a CAN FD frame. Due to the commonly used can_dropped_invalid_skb() function the CAN 2.0 driver would drop that CAN FD frame anyway - but with this patch the user gets a proper -EINVAL return code. Signed-off-by: Oliver Hartkopp Cc: linux-stable Signed-off-by: Marc Kleine-Budde --- net/can/raw.c | 15 ++++++++------- 1 file changed, 8 insertions(+), 7 deletions(-) diff --git a/net/can/raw.c b/net/can/raw.c index 1051eee82581..3aab7664933f 100644 --- a/net/can/raw.c +++ b/net/can/raw.c @@ -745,18 +745,19 @@ static int raw_sendmsg(struct socket *sock, struct msghdr *msg, size_t size) } else ifindex = ro->ifindex; - if (ro->fd_frames) { + dev = dev_get_by_index(sock_net(sk), ifindex); + if (!dev) + return -ENXIO; + + err = -EINVAL; + if (ro->fd_frames && dev->mtu == CANFD_MTU) { if (unlikely(size != CANFD_MTU && size != CAN_MTU)) - return -EINVAL; + goto put_dev; } else { if (unlikely(size != CAN_MTU)) - return -EINVAL; + goto put_dev; } - dev = dev_get_by_index(sock_net(sk), ifindex); - if (!dev) - return -ENXIO; - skb = sock_alloc_send_skb(sk, size + sizeof(struct can_skb_priv), msg->msg_flags & MSG_DONTWAIT, &err); if (!skb) -- cgit v1.2.3-59-g8ed1b From 95217260649aa504eb5d4a0d50959ca4e67c8f96 Mon Sep 17 00:00:00 2001 From: Jimmy Assarsson Date: Mon, 6 Aug 2018 15:14:50 +0200 Subject: can: kvaser_usb: Fix potential uninitialized variable use If alloc_can_err_skb() fails, cf is never initialized. Move assignment of cf inside check. Reported-by: Dan Carpenter Signed-off-by: Jimmy Assarsson Cc: linux-stable Signed-off-by: Marc Kleine-Budde --- drivers/net/can/usb/kvaser_usb/kvaser_usb_hydra.c | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/drivers/net/can/usb/kvaser_usb/kvaser_usb_hydra.c b/drivers/net/can/usb/kvaser_usb/kvaser_usb_hydra.c index c084bae5ec0a..5fc0be564274 100644 --- a/drivers/net/can/usb/kvaser_usb/kvaser_usb_hydra.c +++ b/drivers/net/can/usb/kvaser_usb/kvaser_usb_hydra.c @@ -1019,6 +1019,11 @@ kvaser_usb_hydra_error_frame(struct kvaser_usb_net_priv *priv, new_state : CAN_STATE_ERROR_ACTIVE; can_change_state(netdev, cf, tx_state, rx_state); + + if (priv->can.restart_ms && + old_state >= CAN_STATE_BUS_OFF && + new_state < CAN_STATE_BUS_OFF) + cf->can_id |= CAN_ERR_RESTARTED; } if (new_state == CAN_STATE_BUS_OFF) { @@ -1028,11 +1033,6 @@ kvaser_usb_hydra_error_frame(struct kvaser_usb_net_priv *priv, can_bus_off(netdev); } - - if (priv->can.restart_ms && - old_state >= CAN_STATE_BUS_OFF && - new_state < CAN_STATE_BUS_OFF) - cf->can_id |= CAN_ERR_RESTARTED; } if (!skb) { -- cgit v1.2.3-59-g8ed1b From e13fb9b37cc00616b90df2d620f30345b5ada6ff Mon Sep 17 00:00:00 2001 From: Jimmy Assarsson Date: Mon, 6 Aug 2018 15:14:49 +0200 Subject: can: kvaser_usb: Fix accessing freed memory in kvaser_usb_start_xmit() The call to can_put_echo_skb() may result in the skb being freed. The skb is later used in the call to dev->ops->dev_frame_to_cmd(). This is avoided by moving the call to can_put_echo_skb() after dev->ops->dev_frame_to_cmd(). Reported-by: Dan Carpenter Signed-off-by: Jimmy Assarsson Cc: linux-stable Signed-off-by: Marc Kleine-Budde --- drivers/net/can/usb/kvaser_usb/kvaser_usb_core.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/net/can/usb/kvaser_usb/kvaser_usb_core.c b/drivers/net/can/usb/kvaser_usb/kvaser_usb_core.c index b939a4c10b84..c89c7d4900d7 100644 --- a/drivers/net/can/usb/kvaser_usb/kvaser_usb_core.c +++ b/drivers/net/can/usb/kvaser_usb/kvaser_usb_core.c @@ -528,7 +528,6 @@ static netdev_tx_t kvaser_usb_start_xmit(struct sk_buff *skb, context = &priv->tx_contexts[i]; context->echo_index = i; - can_put_echo_skb(skb, netdev, context->echo_index); ++priv->active_tx_contexts; if (priv->active_tx_contexts >= (int)dev->max_tx_urbs) netif_stop_queue(netdev); @@ -553,7 +552,6 @@ static netdev_tx_t kvaser_usb_start_xmit(struct sk_buff *skb, dev_kfree_skb(skb); spin_lock_irqsave(&priv->tx_contexts_lock, flags); - can_free_echo_skb(netdev, context->echo_index); context->echo_index = dev->max_tx_urbs; --priv->active_tx_contexts; netif_wake_queue(netdev); @@ -564,6 +562,8 @@ static netdev_tx_t kvaser_usb_start_xmit(struct sk_buff *skb, context->priv = priv; + can_put_echo_skb(skb, netdev, context->echo_index); + usb_fill_bulk_urb(urb, dev->udev, usb_sndbulkpipe(dev->udev, dev->bulk_out->bEndpointAddress), -- cgit v1.2.3-59-g8ed1b From 207681fc5f3d5d398f106d1ae0080fc2373f707a Mon Sep 17 00:00:00 2001 From: YueHaibing Date: Wed, 29 Aug 2018 01:46:54 +0000 Subject: can: ucan: remove set but not used variable 'udev' Fixes gcc '-Wunused-but-set-variable' warning: drivers/net/can/usb/ucan.c: In function 'ucan_disconnect': drivers/net/can/usb/ucan.c:1578:21: warning: variable 'udev' set but not used [-Wunused-but-set-variable] struct usb_device *udev; Signed-off-by: YueHaibing Reviewed-by: Martin Elshuber Signed-off-by: Marc Kleine-Budde --- drivers/net/can/usb/ucan.c | 3 --- 1 file changed, 3 deletions(-) diff --git a/drivers/net/can/usb/ucan.c b/drivers/net/can/usb/ucan.c index 0678a38b1af4..c9fd83e8d947 100644 --- a/drivers/net/can/usb/ucan.c +++ b/drivers/net/can/usb/ucan.c @@ -1575,11 +1575,8 @@ err_firmware_needs_update: /* disconnect the device */ static void ucan_disconnect(struct usb_interface *intf) { - struct usb_device *udev; struct ucan_priv *up = usb_get_intfdata(intf); - udev = interface_to_usbdev(intf); - usb_set_intfdata(intf, NULL); if (up) { -- cgit v1.2.3-59-g8ed1b From ff1f19d56c200b35eb07cfa6668aa6dcac198cec Mon Sep 17 00:00:00 2001 From: YueHaibing Date: Wed, 29 Aug 2018 01:25:45 +0000 Subject: can: ucan: remove duplicated include from ucan.c Remove duplicated include. Signed-off-by: YueHaibing Reviewed-by: Martin Elshuber Signed-off-by: Marc Kleine-Budde --- drivers/net/can/usb/ucan.c | 4 ---- 1 file changed, 4 deletions(-) diff --git a/drivers/net/can/usb/ucan.c b/drivers/net/can/usb/ucan.c index c9fd83e8d947..f3d5bda012a1 100644 --- a/drivers/net/can/usb/ucan.c +++ b/drivers/net/can/usb/ucan.c @@ -35,10 +35,6 @@ #include #include -#include -#include -#include - #define UCAN_DRIVER_NAME "ucan" #define UCAN_MAX_RX_URBS 8 /* the CAN controller needs a while to enable/disable the bus */ -- cgit v1.2.3-59-g8ed1b From 4f145f14f6b98b5aa0dd91bdae518b3f24f74b37 Mon Sep 17 00:00:00 2001 From: Eugeniu Rosca Date: Mon, 20 Aug 2018 16:49:10 +0200 Subject: dt-bindings: can: rcar_can: document r8a77965 support Document the support for rcar_can on R8A77965 SoC devices. Add R8A77965 to the list of SoCs which require the "assigned-clocks" and "assigned-clock-rates" properties (thanks, Sergei). Signed-off-by: Eugeniu Rosca Reviewed-by: Simon Horman Reviewed-by: Kieran Bingham Reviewed-by: Rob Herring Signed-off-by: Marc Kleine-Budde --- Documentation/devicetree/bindings/net/can/rcar_can.txt | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/Documentation/devicetree/bindings/net/can/rcar_can.txt b/Documentation/devicetree/bindings/net/can/rcar_can.txt index cc4372842bf3..47fc68148f38 100644 --- a/Documentation/devicetree/bindings/net/can/rcar_can.txt +++ b/Documentation/devicetree/bindings/net/can/rcar_can.txt @@ -14,6 +14,7 @@ Required properties: "renesas,can-r8a7794" if CAN controller is a part of R8A7794 SoC. "renesas,can-r8a7795" if CAN controller is a part of R8A7795 SoC. "renesas,can-r8a7796" if CAN controller is a part of R8A7796 SoC. + "renesas,can-r8a77965" if CAN controller is a part of R8A77965 SoC. "renesas,rcar-gen1-can" for a generic R-Car Gen1 compatible device. "renesas,rcar-gen2-can" for a generic R-Car Gen2 or RZ/G1 compatible device. @@ -29,11 +30,10 @@ Required properties: - pinctrl-0: pin control group to be used for this controller. - pinctrl-names: must be "default". -Required properties for "renesas,can-r8a7795" and "renesas,can-r8a7796" -compatible: -In R8A7795 and R8A7796 SoCs, "clkp2" can be CANFD clock. This is a div6 clock -and can be used by both CAN and CAN FD controller at the same time. It needs to -be scaled to maximum frequency if any of these controllers use it. This is done +Required properties for R8A7795, R8A7796 and R8A77965: +For the denoted SoCs, "clkp2" can be CANFD clock. This is a div6 clock and can +be used by both CAN and CAN FD controller at the same time. It needs to be +scaled to maximum frequency if any of these controllers use it. This is done using the below properties: - assigned-clocks: phandle of clkp2(CANFD) clock. -- cgit v1.2.3-59-g8ed1b From 68c8d209cd4337da4fa04c672f0b62bb735969bc Mon Sep 17 00:00:00 2001 From: Fabrizio Castro Date: Mon, 10 Sep 2018 11:43:13 +0100 Subject: can: rcar_can: Fix erroneous registration Assigning 2 to "renesas,can-clock-select" tricks the driver into registering the CAN interface, even though we don't want that. This patch improves one of the checks to prevent that from happening. Fixes: 862e2b6af9413b43 ("can: rcar_can: support all input clocks") Signed-off-by: Fabrizio Castro Signed-off-by: Chris Paterson Reviewed-by: Simon Horman Signed-off-by: Marc Kleine-Budde --- drivers/net/can/rcar/rcar_can.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/drivers/net/can/rcar/rcar_can.c b/drivers/net/can/rcar/rcar_can.c index 11662f479e76..771a46083739 100644 --- a/drivers/net/can/rcar/rcar_can.c +++ b/drivers/net/can/rcar/rcar_can.c @@ -24,6 +24,9 @@ #define RCAR_CAN_DRV_NAME "rcar_can" +#define RCAR_SUPPORTED_CLOCKS (BIT(CLKR_CLKP1) | BIT(CLKR_CLKP2) | \ + BIT(CLKR_CLKEXT)) + /* Mailbox configuration: * mailbox 60 - 63 - Rx FIFO mailboxes * mailbox 56 - 59 - Tx FIFO mailboxes @@ -789,7 +792,7 @@ static int rcar_can_probe(struct platform_device *pdev) goto fail_clk; } - if (clock_select >= ARRAY_SIZE(clock_names)) { + if (!(BIT(clock_select) & RCAR_SUPPORTED_CLOCKS)) { err = -EINVAL; dev_err(&pdev->dev, "invalid CAN clock selected\n"); goto fail_clk; -- cgit v1.2.3-59-g8ed1b From 868b7c0f43e61f227bf3d7f7d6134bb3c67bb0e8 Mon Sep 17 00:00:00 2001 From: Fabrizio Castro Date: Mon, 10 Sep 2018 11:43:14 +0100 Subject: dt-bindings: can: rcar_can: Add r8a774a1 support Document RZ/G2M (r8a774a1) SoC specific bindings. Signed-off-by: Fabrizio Castro Signed-off-by: Chris Paterson Reviewed-by: Biju Das Reviewed-by: Rob Herring Reviewed-by: Simon Horman Signed-off-by: Marc Kleine-Budde --- Documentation/devicetree/bindings/net/can/rcar_can.txt | 18 +++++++++++++----- 1 file changed, 13 insertions(+), 5 deletions(-) diff --git a/Documentation/devicetree/bindings/net/can/rcar_can.txt b/Documentation/devicetree/bindings/net/can/rcar_can.txt index 47fc68148f38..9936b9ee67c3 100644 --- a/Documentation/devicetree/bindings/net/can/rcar_can.txt +++ b/Documentation/devicetree/bindings/net/can/rcar_can.txt @@ -5,6 +5,7 @@ Required properties: - compatible: "renesas,can-r8a7743" if CAN controller is a part of R8A7743 SoC. "renesas,can-r8a7744" if CAN controller is a part of R8A7744 SoC. "renesas,can-r8a7745" if CAN controller is a part of R8A7745 SoC. + "renesas,can-r8a774a1" if CAN controller is a part of R8A774A1 SoC. "renesas,can-r8a7778" if CAN controller is a part of R8A7778 SoC. "renesas,can-r8a7779" if CAN controller is a part of R8A7779 SoC. "renesas,can-r8a7790" if CAN controller is a part of R8A7790 SoC. @@ -18,15 +19,21 @@ Required properties: "renesas,rcar-gen1-can" for a generic R-Car Gen1 compatible device. "renesas,rcar-gen2-can" for a generic R-Car Gen2 or RZ/G1 compatible device. - "renesas,rcar-gen3-can" for a generic R-Car Gen3 compatible device. + "renesas,rcar-gen3-can" for a generic R-Car Gen3 or RZ/G2 + compatible device. When compatible with the generic version, nodes must list the SoC-specific version corresponding to the platform first followed by the generic version. - reg: physical base address and size of the R-Car CAN register map. - interrupts: interrupt specifier for the sole interrupt. -- clocks: phandles and clock specifiers for 3 CAN clock inputs. -- clock-names: 3 clock input name strings: "clkp1", "clkp2", "can_clk". +- clocks: phandles and clock specifiers for 2 CAN clock inputs for RZ/G2 + devices. + phandles and clock specifiers for 3 CAN clock inputs for every other + SoC. +- clock-names: 2 clock input name strings for RZ/G2: "clkp1", "can_clk". + 3 clock input name strings for every other SoC: "clkp1", "clkp2", + "can_clk". - pinctrl-0: pin control group to be used for this controller. - pinctrl-names: must be "default". @@ -42,8 +49,9 @@ using the below properties: Optional properties: - renesas,can-clock-select: R-Car CAN Clock Source Select. Valid values are: <0x0> (default) : Peripheral clock (clkp1) - <0x1> : Peripheral clock (clkp2) - <0x3> : Externally input clock + <0x1> : Peripheral clock (clkp2) (not supported by + RZ/G2 devices) + <0x3> : External input clock Example ------- -- cgit v1.2.3-59-g8ed1b From f164d0204b1156a7e0d8d1622c1a8d25752befec Mon Sep 17 00:00:00 2001 From: Lukas Wunner Date: Sat, 27 Oct 2018 10:36:54 +0200 Subject: can: hi311x: Use level-triggered interrupt If the hi3110 shares the SPI bus with another traffic-intensive device and packets are received in high volume (by a separate machine sending with "cangen -g 0 -i -x"), reception stops after a few minutes and the counter in /proc/interrupts stops incrementing. Bus state is "active". Bringing the interface down and back up reconvenes the reception. The issue is not observed when the hi3110 is the sole device on the SPI bus. Using a level-triggered interrupt makes the issue go away and lets the hi3110 successfully receive 2 GByte over the course of 5 days while a ks8851 Ethernet chip on the same SPI bus handles 6 GByte of traffic. Unfortunately the hi3110 datasheet is mum on the trigger type. The pin description on page 3 only specifies the polarity (active high): http://www.holtic.com/documents/371-hi-3110_v-rev-kpdf.do Cc: Mathias Duckeck Cc: Akshay Bhat Cc: Casey Fitzpatrick Signed-off-by: Lukas Wunner Cc: linux-stable Signed-off-by: Marc Kleine-Budde --- Documentation/devicetree/bindings/net/can/holt_hi311x.txt | 2 +- drivers/net/can/spi/hi311x.c | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/Documentation/devicetree/bindings/net/can/holt_hi311x.txt b/Documentation/devicetree/bindings/net/can/holt_hi311x.txt index 903a78da65be..3a9926f99937 100644 --- a/Documentation/devicetree/bindings/net/can/holt_hi311x.txt +++ b/Documentation/devicetree/bindings/net/can/holt_hi311x.txt @@ -17,7 +17,7 @@ Example: reg = <1>; clocks = <&clk32m>; interrupt-parent = <&gpio4>; - interrupts = <13 IRQ_TYPE_EDGE_RISING>; + interrupts = <13 IRQ_TYPE_LEVEL_HIGH>; vdd-supply = <®5v0>; xceiver-supply = <®5v0>; }; diff --git a/drivers/net/can/spi/hi311x.c b/drivers/net/can/spi/hi311x.c index 53e320c92a8b..ddaf46239e39 100644 --- a/drivers/net/can/spi/hi311x.c +++ b/drivers/net/can/spi/hi311x.c @@ -760,7 +760,7 @@ static int hi3110_open(struct net_device *net) { struct hi3110_priv *priv = netdev_priv(net); struct spi_device *spi = priv->spi; - unsigned long flags = IRQF_ONESHOT | IRQF_TRIGGER_RISING; + unsigned long flags = IRQF_ONESHOT | IRQF_TRIGGER_HIGH; int ret; ret = open_candev(net); -- cgit v1.2.3-59-g8ed1b From 5178b7cd8e42448b1041716f124734eaaa36ca50 Mon Sep 17 00:00:00 2001 From: Pankaj Bansal Date: Wed, 1 Aug 2018 19:36:46 +0530 Subject: can: flexcan: Unlock the MB unconditionally Unlock the MB irrespective of reception method being FIFO or timestamp based. It is optional but recommended to unlock Mailbox as soon as possible and make it available for reception. Reported-by: Alexander Stein Signed-off-by: Pankaj Bansal Cc: linux-stable Signed-off-by: Marc Kleine-Budde --- drivers/net/can/flexcan.c | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/drivers/net/can/flexcan.c b/drivers/net/can/flexcan.c index 8e972ef08637..0431f8d05518 100644 --- a/drivers/net/can/flexcan.c +++ b/drivers/net/can/flexcan.c @@ -720,9 +720,14 @@ static unsigned int flexcan_mailbox_read(struct can_rx_offload *offload, priv->write(BIT(n - 32), ®s->iflag2); } else { priv->write(FLEXCAN_IFLAG_RX_FIFO_AVAILABLE, ®s->iflag1); - priv->read(®s->timer); } + /* Read the Free Running Timer. It is optional but recommended + * to unlock Mailbox as soon as possible and make it available + * for reception. + */ + priv->read(®s->timer); + return 1; } -- cgit v1.2.3-59-g8ed1b From cbffaf7aa09edbaea2bc7dc440c945297095e2fd Mon Sep 17 00:00:00 2001 From: Alexander Stein Date: Thu, 11 Oct 2018 17:01:25 +0200 Subject: can: flexcan: Always use last mailbox for TX Essentially this patch moves the TX mailbox to position 63, regardless of timestamp based offloading or RX FIFO. So mainly the iflag register usage regarding TX has changed. The rest is consolidating RX FIFO and timestamp offloading as they now use both the same TX mailbox. The reason is a very annoying behavior regarding sending RTR frames when _not_ using RX FIFO: If a TX mailbox sent a RTR frame it becomes a RX mailbox. For that reason flexcan_irq disables the TX mailbox again. But if during the time the RTR was sent and the TX mailbox is disabled a new CAN frames is received, it is lost without notice. The reason is that so-called "Move-in" process starts from the lowest mailbox which happen to be a TX mailbox set to EMPTY. Steps to reproduce (I used an imx7d): 1. generate regular bursts of messages 2. send a RTR from flexcan with higher priority than burst messages every 1ms, e.g. cangen -I 0x100 -L 0 -g 1 -R can0 3. notice a lost message without notification after some seconds When running an iperf in parallel this problem is occurring even more frequently. Using filters is not possible as at least one single CAN-ID is allowed. Handling the TX MB during RX is also not possible as there is no race-free disable of RX MB. There is still a slight window when the described problem can occur. But for that all RX MB must be in use which is essentially next to an overrun. Still there will be no indication if it ever occurs. Signed-off-by: Alexander Stein Cc: linux-stable Signed-off-by: Marc Kleine-Budde --- drivers/net/can/flexcan.c | 67 +++++++++++++++++++++++------------------------ 1 file changed, 33 insertions(+), 34 deletions(-) diff --git a/drivers/net/can/flexcan.c b/drivers/net/can/flexcan.c index 0431f8d05518..677c41701cf3 100644 --- a/drivers/net/can/flexcan.c +++ b/drivers/net/can/flexcan.c @@ -135,13 +135,12 @@ /* FLEXCAN interrupt flag register (IFLAG) bits */ /* Errata ERR005829 step7: Reserve first valid MB */ -#define FLEXCAN_TX_MB_RESERVED_OFF_FIFO 8 -#define FLEXCAN_TX_MB_OFF_FIFO 9 +#define FLEXCAN_TX_MB_RESERVED_OFF_FIFO 8 #define FLEXCAN_TX_MB_RESERVED_OFF_TIMESTAMP 0 -#define FLEXCAN_TX_MB_OFF_TIMESTAMP 1 -#define FLEXCAN_RX_MB_OFF_TIMESTAMP_FIRST (FLEXCAN_TX_MB_OFF_TIMESTAMP + 1) -#define FLEXCAN_RX_MB_OFF_TIMESTAMP_LAST 63 -#define FLEXCAN_IFLAG_MB(x) BIT(x) +#define FLEXCAN_TX_MB 63 +#define FLEXCAN_RX_MB_OFF_TIMESTAMP_FIRST (FLEXCAN_TX_MB_RESERVED_OFF_TIMESTAMP + 1) +#define FLEXCAN_RX_MB_OFF_TIMESTAMP_LAST (FLEXCAN_TX_MB - 1) +#define FLEXCAN_IFLAG_MB(x) BIT(x & 0x1f) #define FLEXCAN_IFLAG_RX_FIFO_OVERFLOW BIT(7) #define FLEXCAN_IFLAG_RX_FIFO_WARN BIT(6) #define FLEXCAN_IFLAG_RX_FIFO_AVAILABLE BIT(5) @@ -737,9 +736,9 @@ static inline u64 flexcan_read_reg_iflag_rx(struct flexcan_priv *priv) struct flexcan_regs __iomem *regs = priv->regs; u32 iflag1, iflag2; - iflag2 = priv->read(®s->iflag2) & priv->reg_imask2_default; - iflag1 = priv->read(®s->iflag1) & priv->reg_imask1_default & + iflag2 = priv->read(®s->iflag2) & priv->reg_imask2_default & ~FLEXCAN_IFLAG_MB(priv->tx_mb_idx); + iflag1 = priv->read(®s->iflag1) & priv->reg_imask1_default; return (u64)iflag2 << 32 | iflag1; } @@ -751,11 +750,9 @@ static irqreturn_t flexcan_irq(int irq, void *dev_id) struct flexcan_priv *priv = netdev_priv(dev); struct flexcan_regs __iomem *regs = priv->regs; irqreturn_t handled = IRQ_NONE; - u32 reg_iflag1, reg_esr; + u32 reg_iflag2, reg_esr; enum can_state last_state = priv->can.state; - reg_iflag1 = priv->read(®s->iflag1); - /* reception interrupt */ if (priv->devtype_data->quirks & FLEXCAN_QUIRK_USE_OFF_TIMESTAMP) { u64 reg_iflag; @@ -769,6 +766,9 @@ static irqreturn_t flexcan_irq(int irq, void *dev_id) break; } } else { + u32 reg_iflag1; + + reg_iflag1 = priv->read(®s->iflag1); if (reg_iflag1 & FLEXCAN_IFLAG_RX_FIFO_AVAILABLE) { handled = IRQ_HANDLED; can_rx_offload_irq_offload_fifo(&priv->offload); @@ -784,8 +784,10 @@ static irqreturn_t flexcan_irq(int irq, void *dev_id) } } + reg_iflag2 = priv->read(®s->iflag2); + /* transmission complete interrupt */ - if (reg_iflag1 & FLEXCAN_IFLAG_MB(priv->tx_mb_idx)) { + if (reg_iflag2 & FLEXCAN_IFLAG_MB(priv->tx_mb_idx)) { handled = IRQ_HANDLED; stats->tx_bytes += can_get_echo_skb(dev, 0); stats->tx_packets++; @@ -794,7 +796,7 @@ static irqreturn_t flexcan_irq(int irq, void *dev_id) /* after sending a RTR frame MB is in RX mode */ priv->write(FLEXCAN_MB_CODE_TX_INACTIVE, &priv->tx_mb->can_ctrl); - priv->write(FLEXCAN_IFLAG_MB(priv->tx_mb_idx), ®s->iflag1); + priv->write(FLEXCAN_IFLAG_MB(priv->tx_mb_idx), ®s->iflag2); netif_wake_queue(dev); } @@ -936,15 +938,13 @@ static int flexcan_chip_start(struct net_device *dev) reg_mcr &= ~FLEXCAN_MCR_MAXMB(0xff); reg_mcr |= FLEXCAN_MCR_FRZ | FLEXCAN_MCR_HALT | FLEXCAN_MCR_SUPV | FLEXCAN_MCR_WRN_EN | FLEXCAN_MCR_SRX_DIS | FLEXCAN_MCR_IRMQ | - FLEXCAN_MCR_IDAM_C; + FLEXCAN_MCR_IDAM_C | FLEXCAN_MCR_MAXMB(priv->tx_mb_idx); - if (priv->devtype_data->quirks & FLEXCAN_QUIRK_USE_OFF_TIMESTAMP) { + if (priv->devtype_data->quirks & FLEXCAN_QUIRK_USE_OFF_TIMESTAMP) reg_mcr &= ~FLEXCAN_MCR_FEN; - reg_mcr |= FLEXCAN_MCR_MAXMB(priv->offload.mb_last); - } else { - reg_mcr |= FLEXCAN_MCR_FEN | - FLEXCAN_MCR_MAXMB(priv->tx_mb_idx); - } + else + reg_mcr |= FLEXCAN_MCR_FEN; + netdev_dbg(dev, "%s: writing mcr=0x%08x", __func__, reg_mcr); priv->write(reg_mcr, ®s->mcr); @@ -987,16 +987,17 @@ static int flexcan_chip_start(struct net_device *dev) priv->write(reg_ctrl2, ®s->ctrl2); } - /* clear and invalidate all mailboxes first */ - for (i = priv->tx_mb_idx; i < ARRAY_SIZE(regs->mb); i++) { - priv->write(FLEXCAN_MB_CODE_RX_INACTIVE, - ®s->mb[i].can_ctrl); - } - if (priv->devtype_data->quirks & FLEXCAN_QUIRK_USE_OFF_TIMESTAMP) { - for (i = priv->offload.mb_first; i <= priv->offload.mb_last; i++) + for (i = priv->offload.mb_first; i <= priv->offload.mb_last; i++) { priv->write(FLEXCAN_MB_CODE_RX_EMPTY, ®s->mb[i].can_ctrl); + } + } else { + /* clear and invalidate unused mailboxes first */ + for (i = FLEXCAN_TX_MB_RESERVED_OFF_FIFO; i <= ARRAY_SIZE(regs->mb); i++) { + priv->write(FLEXCAN_MB_CODE_RX_INACTIVE, + ®s->mb[i].can_ctrl); + } } /* Errata ERR005829: mark first TX mailbox as INACTIVE */ @@ -1360,17 +1361,15 @@ static int flexcan_probe(struct platform_device *pdev) priv->devtype_data = devtype_data; priv->reg_xceiver = reg_xceiver; - if (priv->devtype_data->quirks & FLEXCAN_QUIRK_USE_OFF_TIMESTAMP) { - priv->tx_mb_idx = FLEXCAN_TX_MB_OFF_TIMESTAMP; + if (priv->devtype_data->quirks & FLEXCAN_QUIRK_USE_OFF_TIMESTAMP) priv->tx_mb_reserved = ®s->mb[FLEXCAN_TX_MB_RESERVED_OFF_TIMESTAMP]; - } else { - priv->tx_mb_idx = FLEXCAN_TX_MB_OFF_FIFO; + else priv->tx_mb_reserved = ®s->mb[FLEXCAN_TX_MB_RESERVED_OFF_FIFO]; - } + priv->tx_mb_idx = FLEXCAN_TX_MB; priv->tx_mb = ®s->mb[priv->tx_mb_idx]; - priv->reg_imask1_default = FLEXCAN_IFLAG_MB(priv->tx_mb_idx); - priv->reg_imask2_default = 0; + priv->reg_imask1_default = 0; + priv->reg_imask2_default = FLEXCAN_IFLAG_MB(priv->tx_mb_idx); priv->offload.mailbox_read = flexcan_mailbox_read; -- cgit v1.2.3-59-g8ed1b From e05237f9da42ee52e73acea0bb082d788e111229 Mon Sep 17 00:00:00 2001 From: Marc Kleine-Budde Date: Fri, 9 Nov 2018 15:01:50 +0100 Subject: can: flexcan: remove not needed struct flexcan_priv::tx_mb and struct flexcan_priv::tx_mb_idx The previous patch changes the TX path to always use the last mailbox regardless of the used offload scheme (rx-fifo or timestamp based). This means members "tx_mb" and "tx_mb_idx" of the struct flexcan_priv don't depend on the offload scheme, so replace them by compile time constants. Cc: linux-stable Signed-off-by: Marc Kleine-Budde --- drivers/net/can/flexcan.c | 27 ++++++++++++--------------- 1 file changed, 12 insertions(+), 15 deletions(-) diff --git a/drivers/net/can/flexcan.c b/drivers/net/can/flexcan.c index 677c41701cf3..68b46395c580 100644 --- a/drivers/net/can/flexcan.c +++ b/drivers/net/can/flexcan.c @@ -258,9 +258,7 @@ struct flexcan_priv { struct can_rx_offload offload; struct flexcan_regs __iomem *regs; - struct flexcan_mb __iomem *tx_mb; struct flexcan_mb __iomem *tx_mb_reserved; - u8 tx_mb_idx; u32 reg_ctrl_default; u32 reg_imask1_default; u32 reg_imask2_default; @@ -514,6 +512,7 @@ static int flexcan_get_berr_counter(const struct net_device *dev, static netdev_tx_t flexcan_start_xmit(struct sk_buff *skb, struct net_device *dev) { const struct flexcan_priv *priv = netdev_priv(dev); + struct flexcan_regs __iomem *regs = priv->regs; struct can_frame *cf = (struct can_frame *)skb->data; u32 can_id; u32 data; @@ -536,17 +535,17 @@ static netdev_tx_t flexcan_start_xmit(struct sk_buff *skb, struct net_device *de if (cf->can_dlc > 0) { data = be32_to_cpup((__be32 *)&cf->data[0]); - priv->write(data, &priv->tx_mb->data[0]); + priv->write(data, ®s->mb[FLEXCAN_TX_MB].data[0]); } if (cf->can_dlc > 4) { data = be32_to_cpup((__be32 *)&cf->data[4]); - priv->write(data, &priv->tx_mb->data[1]); + priv->write(data, ®s->mb[FLEXCAN_TX_MB].data[1]); } can_put_echo_skb(skb, dev, 0); - priv->write(can_id, &priv->tx_mb->can_id); - priv->write(ctrl, &priv->tx_mb->can_ctrl); + priv->write(can_id, ®s->mb[FLEXCAN_TX_MB].can_id); + priv->write(ctrl, ®s->mb[FLEXCAN_TX_MB].can_ctrl); /* Errata ERR005829 step8: * Write twice INACTIVE(0x8) code to first MB. @@ -737,7 +736,7 @@ static inline u64 flexcan_read_reg_iflag_rx(struct flexcan_priv *priv) u32 iflag1, iflag2; iflag2 = priv->read(®s->iflag2) & priv->reg_imask2_default & - ~FLEXCAN_IFLAG_MB(priv->tx_mb_idx); + ~FLEXCAN_IFLAG_MB(FLEXCAN_TX_MB); iflag1 = priv->read(®s->iflag1) & priv->reg_imask1_default; return (u64)iflag2 << 32 | iflag1; @@ -787,7 +786,7 @@ static irqreturn_t flexcan_irq(int irq, void *dev_id) reg_iflag2 = priv->read(®s->iflag2); /* transmission complete interrupt */ - if (reg_iflag2 & FLEXCAN_IFLAG_MB(priv->tx_mb_idx)) { + if (reg_iflag2 & FLEXCAN_IFLAG_MB(FLEXCAN_TX_MB)) { handled = IRQ_HANDLED; stats->tx_bytes += can_get_echo_skb(dev, 0); stats->tx_packets++; @@ -795,8 +794,8 @@ static irqreturn_t flexcan_irq(int irq, void *dev_id) /* after sending a RTR frame MB is in RX mode */ priv->write(FLEXCAN_MB_CODE_TX_INACTIVE, - &priv->tx_mb->can_ctrl); - priv->write(FLEXCAN_IFLAG_MB(priv->tx_mb_idx), ®s->iflag2); + ®s->mb[FLEXCAN_TX_MB].can_ctrl); + priv->write(FLEXCAN_IFLAG_MB(FLEXCAN_TX_MB), ®s->iflag2); netif_wake_queue(dev); } @@ -938,7 +937,7 @@ static int flexcan_chip_start(struct net_device *dev) reg_mcr &= ~FLEXCAN_MCR_MAXMB(0xff); reg_mcr |= FLEXCAN_MCR_FRZ | FLEXCAN_MCR_HALT | FLEXCAN_MCR_SUPV | FLEXCAN_MCR_WRN_EN | FLEXCAN_MCR_SRX_DIS | FLEXCAN_MCR_IRMQ | - FLEXCAN_MCR_IDAM_C | FLEXCAN_MCR_MAXMB(priv->tx_mb_idx); + FLEXCAN_MCR_IDAM_C | FLEXCAN_MCR_MAXMB(FLEXCAN_TX_MB); if (priv->devtype_data->quirks & FLEXCAN_QUIRK_USE_OFF_TIMESTAMP) reg_mcr &= ~FLEXCAN_MCR_FEN; @@ -1006,7 +1005,7 @@ static int flexcan_chip_start(struct net_device *dev) /* mark TX mailbox as INACTIVE */ priv->write(FLEXCAN_MB_CODE_TX_INACTIVE, - &priv->tx_mb->can_ctrl); + ®s->mb[FLEXCAN_TX_MB].can_ctrl); /* acceptance mask/acceptance code (accept everything) */ priv->write(0x0, ®s->rxgmask); @@ -1365,11 +1364,9 @@ static int flexcan_probe(struct platform_device *pdev) priv->tx_mb_reserved = ®s->mb[FLEXCAN_TX_MB_RESERVED_OFF_TIMESTAMP]; else priv->tx_mb_reserved = ®s->mb[FLEXCAN_TX_MB_RESERVED_OFF_FIFO]; - priv->tx_mb_idx = FLEXCAN_TX_MB; - priv->tx_mb = ®s->mb[priv->tx_mb_idx]; priv->reg_imask1_default = 0; - priv->reg_imask2_default = FLEXCAN_IFLAG_MB(priv->tx_mb_idx); + priv->reg_imask2_default = FLEXCAN_IFLAG_MB(FLEXCAN_TX_MB); priv->offload.mailbox_read = flexcan_mailbox_read; -- cgit v1.2.3-59-g8ed1b From a4310fa2f24687888ce80fdb0e88583561a23700 Mon Sep 17 00:00:00 2001 From: Marc Kleine-Budde Date: Wed, 31 Oct 2018 10:37:46 +0100 Subject: can: dev: can_get_echo_skb(): factor out non sending code to __can_get_echo_skb() This patch factors out all non sending parts of can_get_echo_skb() into a seperate function __can_get_echo_skb(), so that it can be re-used in an upcoming patch. Cc: linux-stable Signed-off-by: Marc Kleine-Budde --- drivers/net/can/dev.c | 36 +++++++++++++++++++++++++----------- include/linux/can/dev.h | 1 + 2 files changed, 26 insertions(+), 11 deletions(-) diff --git a/drivers/net/can/dev.c b/drivers/net/can/dev.c index 49163570a63a..80530ab37b1e 100644 --- a/drivers/net/can/dev.c +++ b/drivers/net/can/dev.c @@ -477,14 +477,7 @@ void can_put_echo_skb(struct sk_buff *skb, struct net_device *dev, } EXPORT_SYMBOL_GPL(can_put_echo_skb); -/* - * Get the skb from the stack and loop it back locally - * - * The function is typically called when the TX done interrupt - * is handled in the device driver. The driver must protect - * access to priv->echo_skb, if necessary. - */ -unsigned int can_get_echo_skb(struct net_device *dev, unsigned int idx) +struct sk_buff *__can_get_echo_skb(struct net_device *dev, unsigned int idx, u8 *len_ptr) { struct can_priv *priv = netdev_priv(dev); @@ -495,13 +488,34 @@ unsigned int can_get_echo_skb(struct net_device *dev, unsigned int idx) struct can_frame *cf = (struct can_frame *)skb->data; u8 dlc = cf->can_dlc; - netif_rx(priv->echo_skb[idx]); + *len_ptr = dlc; priv->echo_skb[idx] = NULL; - return dlc; + return skb; } - return 0; + return NULL; +} + +/* + * Get the skb from the stack and loop it back locally + * + * The function is typically called when the TX done interrupt + * is handled in the device driver. The driver must protect + * access to priv->echo_skb, if necessary. + */ +unsigned int can_get_echo_skb(struct net_device *dev, unsigned int idx) +{ + struct sk_buff *skb; + u8 len; + + skb = __can_get_echo_skb(dev, idx, &len); + if (!skb) + return 0; + + netif_rx(skb); + + return len; } EXPORT_SYMBOL_GPL(can_get_echo_skb); diff --git a/include/linux/can/dev.h b/include/linux/can/dev.h index a83e1f632eb7..f01623aef2f7 100644 --- a/include/linux/can/dev.h +++ b/include/linux/can/dev.h @@ -169,6 +169,7 @@ void can_change_state(struct net_device *dev, struct can_frame *cf, void can_put_echo_skb(struct sk_buff *skb, struct net_device *dev, unsigned int idx); +struct sk_buff *__can_get_echo_skb(struct net_device *dev, unsigned int idx, u8 *len_ptr); unsigned int can_get_echo_skb(struct net_device *dev, unsigned int idx); void can_free_echo_skb(struct net_device *dev, unsigned int idx); -- cgit v1.2.3-59-g8ed1b From 200f5c49f7a2cd694436bfc6cb0662b794c96736 Mon Sep 17 00:00:00 2001 From: Marc Kleine-Budde Date: Wed, 31 Oct 2018 11:08:21 +0100 Subject: can: dev: __can_get_echo_skb(): replace struct can_frame by canfd_frame to access frame length This patch replaces the use of "struct can_frame::can_dlc" by "struct canfd_frame::len" to access the frame's length. As it is ensured that both structures have a compatible memory layout for this member this is no functional change. Futher, this compatibility is documented in a comment. Cc: linux-stable Signed-off-by: Marc Kleine-Budde --- drivers/net/can/dev.c | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/drivers/net/can/dev.c b/drivers/net/can/dev.c index 80530ab37b1e..46cc5fec4043 100644 --- a/drivers/net/can/dev.c +++ b/drivers/net/can/dev.c @@ -484,11 +484,14 @@ struct sk_buff *__can_get_echo_skb(struct net_device *dev, unsigned int idx, u8 BUG_ON(idx >= priv->echo_skb_max); if (priv->echo_skb[idx]) { + /* Using "struct canfd_frame::len" for the frame + * length is supported on both CAN and CANFD frames. + */ struct sk_buff *skb = priv->echo_skb[idx]; - struct can_frame *cf = (struct can_frame *)skb->data; - u8 dlc = cf->can_dlc; + struct canfd_frame *cf = (struct canfd_frame *)skb->data; + u8 len = cf->len; - *len_ptr = dlc; + *len_ptr = len; priv->echo_skb[idx] = NULL; return skb; -- cgit v1.2.3-59-g8ed1b From e7a6994d043a1e31d5b17706a22ce33d2a3e4cdc Mon Sep 17 00:00:00 2001 From: Marc Kleine-Budde Date: Wed, 31 Oct 2018 14:05:26 +0100 Subject: can: dev: __can_get_echo_skb(): Don't crash the kernel if can_priv::echo_skb is accessed out of bounds If the "struct can_priv::echo_skb" is accessed out of bounds would lead to a kernel crash. Better print a sensible warning message instead and try to recover. Cc: linux-stable Signed-off-by: Marc Kleine-Budde --- drivers/net/can/dev.c | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/drivers/net/can/dev.c b/drivers/net/can/dev.c index 46cc5fec4043..c05e4d50d43d 100644 --- a/drivers/net/can/dev.c +++ b/drivers/net/can/dev.c @@ -481,7 +481,11 @@ struct sk_buff *__can_get_echo_skb(struct net_device *dev, unsigned int idx, u8 { struct can_priv *priv = netdev_priv(dev); - BUG_ON(idx >= priv->echo_skb_max); + if (idx >= priv->echo_skb_max) { + netdev_err(dev, "%s: BUG! Trying to access can_priv::echo_skb out of bounds (%u/max %u)\n", + __func__, idx, priv->echo_skb_max); + return NULL; + } if (priv->echo_skb[idx]) { /* Using "struct canfd_frame::len" for the frame -- cgit v1.2.3-59-g8ed1b From 7da11ba5c5066dadc2e96835a6233d56d7b7764a Mon Sep 17 00:00:00 2001 From: Marc Kleine-Budde Date: Wed, 31 Oct 2018 14:15:13 +0100 Subject: can: dev: __can_get_echo_skb(): print error message, if trying to echo non existing skb Prior to echoing a successfully transmitted CAN frame (by calling can_get_echo_skb()), CAN drivers have to put the CAN frame (by calling can_put_echo_skb() in the transmit function). These put and get function take an index as parameter, which is used to identify the CAN frame. A driver calling can_get_echo_skb() with a index not pointing to a skb is a BUG, so add an appropriate error message. Cc: linux-stable Signed-off-by: Marc Kleine-Budde --- drivers/net/can/dev.c | 27 ++++++++++++++------------- 1 file changed, 14 insertions(+), 13 deletions(-) diff --git a/drivers/net/can/dev.c b/drivers/net/can/dev.c index c05e4d50d43d..3b3f88ffab53 100644 --- a/drivers/net/can/dev.c +++ b/drivers/net/can/dev.c @@ -480,6 +480,8 @@ EXPORT_SYMBOL_GPL(can_put_echo_skb); struct sk_buff *__can_get_echo_skb(struct net_device *dev, unsigned int idx, u8 *len_ptr) { struct can_priv *priv = netdev_priv(dev); + struct sk_buff *skb = priv->echo_skb[idx]; + struct canfd_frame *cf; if (idx >= priv->echo_skb_max) { netdev_err(dev, "%s: BUG! Trying to access can_priv::echo_skb out of bounds (%u/max %u)\n", @@ -487,21 +489,20 @@ struct sk_buff *__can_get_echo_skb(struct net_device *dev, unsigned int idx, u8 return NULL; } - if (priv->echo_skb[idx]) { - /* Using "struct canfd_frame::len" for the frame - * length is supported on both CAN and CANFD frames. - */ - struct sk_buff *skb = priv->echo_skb[idx]; - struct canfd_frame *cf = (struct canfd_frame *)skb->data; - u8 len = cf->len; - - *len_ptr = len; - priv->echo_skb[idx] = NULL; - - return skb; + if (!skb) { + netdev_err(dev, "%s: BUG! Trying to echo non existing skb: can_priv::echo_skb[%u]\n", + __func__, idx); + return NULL; } - return NULL; + /* Using "struct canfd_frame::len" for the frame + * length is supported on both CAN and CANFD frames. + */ + cf = (struct canfd_frame *)skb->data; + *len_ptr = cf->len; + priv->echo_skb[idx] = NULL; + + return skb; } /* -- cgit v1.2.3-59-g8ed1b From 55059f2b7f868cd43b3ad30e28e18347e1b46ace Mon Sep 17 00:00:00 2001 From: Oleksij Rempel Date: Tue, 18 Sep 2018 11:40:38 +0200 Subject: can: rx-offload: introduce can_rx_offload_get_echo_skb() and can_rx_offload_queue_sorted() functions Current CAN framework can't guarantee proper/chronological order of RX and TX-ECHO messages. To make this possible, drivers should use this functions instead of can_get_echo_skb(). Signed-off-by: Oleksij Rempel Cc: linux-stable Signed-off-by: Marc Kleine-Budde --- drivers/net/can/rx-offload.c | 46 ++++++++++++++++++++++++++++++++++++++++++ include/linux/can/rx-offload.h | 4 ++++ 2 files changed, 50 insertions(+) diff --git a/drivers/net/can/rx-offload.c b/drivers/net/can/rx-offload.c index c7d05027a7a0..c368686e2164 100644 --- a/drivers/net/can/rx-offload.c +++ b/drivers/net/can/rx-offload.c @@ -211,6 +211,52 @@ int can_rx_offload_irq_offload_fifo(struct can_rx_offload *offload) } EXPORT_SYMBOL_GPL(can_rx_offload_irq_offload_fifo); +int can_rx_offload_queue_sorted(struct can_rx_offload *offload, + struct sk_buff *skb, u32 timestamp) +{ + struct can_rx_offload_cb *cb; + unsigned long flags; + + if (skb_queue_len(&offload->skb_queue) > + offload->skb_queue_len_max) + return -ENOMEM; + + cb = can_rx_offload_get_cb(skb); + cb->timestamp = timestamp; + + spin_lock_irqsave(&offload->skb_queue.lock, flags); + __skb_queue_add_sort(&offload->skb_queue, skb, can_rx_offload_compare); + spin_unlock_irqrestore(&offload->skb_queue.lock, flags); + + can_rx_offload_schedule(offload); + + return 0; +} +EXPORT_SYMBOL_GPL(can_rx_offload_queue_sorted); + +unsigned int can_rx_offload_get_echo_skb(struct can_rx_offload *offload, + unsigned int idx, u32 timestamp) +{ + struct net_device *dev = offload->dev; + struct net_device_stats *stats = &dev->stats; + struct sk_buff *skb; + u8 len; + int err; + + skb = __can_get_echo_skb(dev, idx, &len); + if (!skb) + return 0; + + err = can_rx_offload_queue_sorted(offload, skb, timestamp); + if (err) { + stats->rx_errors++; + stats->tx_fifo_errors++; + } + + return len; +} +EXPORT_SYMBOL_GPL(can_rx_offload_get_echo_skb); + int can_rx_offload_irq_queue_err_skb(struct can_rx_offload *offload, struct sk_buff *skb) { if (skb_queue_len(&offload->skb_queue) > diff --git a/include/linux/can/rx-offload.h b/include/linux/can/rx-offload.h index cb31683bbe15..01a7c9e5d8d8 100644 --- a/include/linux/can/rx-offload.h +++ b/include/linux/can/rx-offload.h @@ -41,6 +41,10 @@ int can_rx_offload_add_timestamp(struct net_device *dev, struct can_rx_offload * int can_rx_offload_add_fifo(struct net_device *dev, struct can_rx_offload *offload, unsigned int weight); int can_rx_offload_irq_offload_timestamp(struct can_rx_offload *offload, u64 reg); int can_rx_offload_irq_offload_fifo(struct can_rx_offload *offload); +int can_rx_offload_queue_sorted(struct can_rx_offload *offload, + struct sk_buff *skb, u32 timestamp); +unsigned int can_rx_offload_get_echo_skb(struct can_rx_offload *offload, + unsigned int idx, u32 timestamp); int can_rx_offload_irq_queue_err_skb(struct can_rx_offload *offload, struct sk_buff *skb); void can_rx_offload_reset(struct can_rx_offload *offload); void can_rx_offload_del(struct can_rx_offload *offload); -- cgit v1.2.3-59-g8ed1b From ed72bc8bcb9277061e753faf300b20f97323761c Mon Sep 17 00:00:00 2001 From: Oleksij Rempel Date: Tue, 18 Sep 2018 11:40:39 +0200 Subject: can: flexcan: handle tx-complete CAN frames via rx-offload infrastructure Current flexcan driver will put TX-ECHO in regular unsorted way, in this case TX-ECHO can come after the response to the same TXed message. In some cases, for example for J1939 stack, things will break. This patch is using new rx-offload API to put the messages just in the right place. Signed-off-by: Oleksij Rempel Cc: linux-stable Signed-off-by: Marc Kleine-Budde --- drivers/net/can/flexcan.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/drivers/net/can/flexcan.c b/drivers/net/can/flexcan.c index 68b46395c580..41a175f80c4b 100644 --- a/drivers/net/can/flexcan.c +++ b/drivers/net/can/flexcan.c @@ -787,8 +787,11 @@ static irqreturn_t flexcan_irq(int irq, void *dev_id) /* transmission complete interrupt */ if (reg_iflag2 & FLEXCAN_IFLAG_MB(FLEXCAN_TX_MB)) { + u32 reg_ctrl = priv->read(®s->mb[FLEXCAN_TX_MB].can_ctrl); + handled = IRQ_HANDLED; - stats->tx_bytes += can_get_echo_skb(dev, 0); + stats->tx_bytes += can_rx_offload_get_echo_skb(&priv->offload, + 0, reg_ctrl << 16); stats->tx_packets++; can_led_event(dev, CAN_LED_EVENT_TX); -- cgit v1.2.3-59-g8ed1b From 4530ec36bb1e0d24f41c33229694adacda3d5d89 Mon Sep 17 00:00:00 2001 From: Oleksij Rempel Date: Tue, 18 Sep 2018 11:40:40 +0200 Subject: can: rx-offload: rename can_rx_offload_irq_queue_err_skb() to can_rx_offload_queue_tail() This function has nothing todo with error. Signed-off-by: Oleksij Rempel Cc: linux-stable Signed-off-by: Marc Kleine-Budde --- drivers/net/can/flexcan.c | 4 ++-- drivers/net/can/rx-offload.c | 5 +++-- include/linux/can/rx-offload.h | 3 ++- 3 files changed, 7 insertions(+), 5 deletions(-) diff --git a/drivers/net/can/flexcan.c b/drivers/net/can/flexcan.c index 41a175f80c4b..5923bd0ec118 100644 --- a/drivers/net/can/flexcan.c +++ b/drivers/net/can/flexcan.c @@ -610,7 +610,7 @@ static void flexcan_irq_bus_err(struct net_device *dev, u32 reg_esr) if (tx_errors) dev->stats.tx_errors++; - can_rx_offload_irq_queue_err_skb(&priv->offload, skb); + can_rx_offload_queue_tail(&priv->offload, skb); } static void flexcan_irq_state(struct net_device *dev, u32 reg_esr) @@ -650,7 +650,7 @@ static void flexcan_irq_state(struct net_device *dev, u32 reg_esr) if (unlikely(new_state == CAN_STATE_BUS_OFF)) can_bus_off(dev); - can_rx_offload_irq_queue_err_skb(&priv->offload, skb); + can_rx_offload_queue_tail(&priv->offload, skb); } static inline struct flexcan_priv *rx_offload_to_priv(struct can_rx_offload *offload) diff --git a/drivers/net/can/rx-offload.c b/drivers/net/can/rx-offload.c index c368686e2164..2ce4fa8698c7 100644 --- a/drivers/net/can/rx-offload.c +++ b/drivers/net/can/rx-offload.c @@ -257,7 +257,8 @@ unsigned int can_rx_offload_get_echo_skb(struct can_rx_offload *offload, } EXPORT_SYMBOL_GPL(can_rx_offload_get_echo_skb); -int can_rx_offload_irq_queue_err_skb(struct can_rx_offload *offload, struct sk_buff *skb) +int can_rx_offload_queue_tail(struct can_rx_offload *offload, + struct sk_buff *skb) { if (skb_queue_len(&offload->skb_queue) > offload->skb_queue_len_max) @@ -268,7 +269,7 @@ int can_rx_offload_irq_queue_err_skb(struct can_rx_offload *offload, struct sk_b return 0; } -EXPORT_SYMBOL_GPL(can_rx_offload_irq_queue_err_skb); +EXPORT_SYMBOL_GPL(can_rx_offload_queue_tail); static int can_rx_offload_init_queue(struct net_device *dev, struct can_rx_offload *offload, unsigned int weight) { diff --git a/include/linux/can/rx-offload.h b/include/linux/can/rx-offload.h index 01a7c9e5d8d8..8268811a697e 100644 --- a/include/linux/can/rx-offload.h +++ b/include/linux/can/rx-offload.h @@ -45,7 +45,8 @@ int can_rx_offload_queue_sorted(struct can_rx_offload *offload, struct sk_buff *skb, u32 timestamp); unsigned int can_rx_offload_get_echo_skb(struct can_rx_offload *offload, unsigned int idx, u32 timestamp); -int can_rx_offload_irq_queue_err_skb(struct can_rx_offload *offload, struct sk_buff *skb); +int can_rx_offload_queue_tail(struct can_rx_offload *offload, + struct sk_buff *skb); void can_rx_offload_reset(struct can_rx_offload *offload); void can_rx_offload_del(struct can_rx_offload *offload); void can_rx_offload_enable(struct can_rx_offload *offload); -- cgit v1.2.3-59-g8ed1b From d788905f68fd4714c82936f6f7f1d3644d7ae7ef Mon Sep 17 00:00:00 2001 From: Oleksij Rempel Date: Tue, 18 Sep 2018 11:40:41 +0200 Subject: can: flexcan: use can_rx_offload_queue_sorted() for flexcan_irq_bus_*() Currently, in case of bus error, driver will generate error message and put in the tail of the message queue. To avoid confusions, this change should place the bus related messages in proper order. Signed-off-by: Oleksij Rempel Cc: linux-stable Signed-off-by: Marc Kleine-Budde --- drivers/net/can/flexcan.c | 12 ++++++++++-- 1 file changed, 10 insertions(+), 2 deletions(-) diff --git a/drivers/net/can/flexcan.c b/drivers/net/can/flexcan.c index 5923bd0ec118..75ce11395ee8 100644 --- a/drivers/net/can/flexcan.c +++ b/drivers/net/can/flexcan.c @@ -561,9 +561,13 @@ static netdev_tx_t flexcan_start_xmit(struct sk_buff *skb, struct net_device *de static void flexcan_irq_bus_err(struct net_device *dev, u32 reg_esr) { struct flexcan_priv *priv = netdev_priv(dev); + struct flexcan_regs __iomem *regs = priv->regs; struct sk_buff *skb; struct can_frame *cf; bool rx_errors = false, tx_errors = false; + u32 timestamp; + + timestamp = priv->read(®s->timer) << 16; skb = alloc_can_err_skb(dev, &cf); if (unlikely(!skb)) @@ -610,17 +614,21 @@ static void flexcan_irq_bus_err(struct net_device *dev, u32 reg_esr) if (tx_errors) dev->stats.tx_errors++; - can_rx_offload_queue_tail(&priv->offload, skb); + can_rx_offload_queue_sorted(&priv->offload, skb, timestamp); } static void flexcan_irq_state(struct net_device *dev, u32 reg_esr) { struct flexcan_priv *priv = netdev_priv(dev); + struct flexcan_regs __iomem *regs = priv->regs; struct sk_buff *skb; struct can_frame *cf; enum can_state new_state, rx_state, tx_state; int flt; struct can_berr_counter bec; + u32 timestamp; + + timestamp = priv->read(®s->timer) << 16; flt = reg_esr & FLEXCAN_ESR_FLT_CONF_MASK; if (likely(flt == FLEXCAN_ESR_FLT_CONF_ACTIVE)) { @@ -650,7 +658,7 @@ static void flexcan_irq_state(struct net_device *dev, u32 reg_esr) if (unlikely(new_state == CAN_STATE_BUS_OFF)) can_bus_off(dev); - can_rx_offload_queue_tail(&priv->offload, skb); + can_rx_offload_queue_sorted(&priv->offload, skb, timestamp); } static inline struct flexcan_priv *rx_offload_to_priv(struct can_rx_offload *offload) -- cgit v1.2.3-59-g8ed1b From 23d8003907d094f77cf959228e2248d6db819fa7 Mon Sep 17 00:00:00 2001 From: Stanislav Lisovskiy Date: Fri, 9 Nov 2018 11:00:12 +0200 Subject: drm/dp_mst: Check if primary mstb is null Unfortunately drm_dp_get_mst_branch_device which is called from both drm_dp_mst_handle_down_rep and drm_dp_mst_handle_up_rep seem to rely on that mgr->mst_primary is not NULL, which seem to be wrong as it can be cleared with simultaneous mode set, if probing fails or in other case. mgr->lock mutex doesn't protect against that as it might just get assigned to NULL right before, not simultaneously. There are currently bugs 107738, 108616 bugs which crash in drm_dp_get_mst_branch_device, caused by this issue. v2: Refactored the code, as it was nicely noticed. Fixed Bugzilla bug numbers(second was 108616, but not 108816) and added links. [changed title and added stable cc] Signed-off-by: Lyude Paul Signed-off-by: Stanislav Lisovskiy Cc: stable@vger.kernel.org Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=108616 Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=107738 Link: https://patchwork.freedesktop.org/patch/msgid/20181109090012.24438-1-stanislav.lisovskiy@intel.com --- drivers/gpu/drm/drm_dp_mst_topology.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/gpu/drm/drm_dp_mst_topology.c b/drivers/gpu/drm/drm_dp_mst_topology.c index 5ff1d79b86c4..0e0df398222d 100644 --- a/drivers/gpu/drm/drm_dp_mst_topology.c +++ b/drivers/gpu/drm/drm_dp_mst_topology.c @@ -1275,6 +1275,9 @@ static struct drm_dp_mst_branch *drm_dp_get_mst_branch_device(struct drm_dp_mst_ mutex_lock(&mgr->lock); mstb = mgr->mst_primary; + if (!mstb) + goto out; + for (i = 0; i < lct - 1; i++) { int shift = (i % 2) ? 0 : 4; int port_num = (rad[i / 2] >> shift) & 0xf; -- cgit v1.2.3-59-g8ed1b From de7b75d82f70c5469675b99ad632983c50b6f7e7 Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Fri, 9 Nov 2018 15:58:40 -0700 Subject: floppy: fix race condition in __floppy_read_block_0() LKP recently reported a hang at bootup in the floppy code: [ 245.678853] INFO: task mount:580 blocked for more than 120 seconds. [ 245.679906] Tainted: G T 4.19.0-rc6-00172-ga9f38e1 #1 [ 245.680959] "echo 0 > /proc/sys/kernel/hung_task_timeout_secs" disables this message. [ 245.682181] mount D 6372 580 1 0x00000004 [ 245.683023] Call Trace: [ 245.683425] __schedule+0x2df/0x570 [ 245.683975] schedule+0x2d/0x80 [ 245.684476] schedule_timeout+0x19d/0x330 [ 245.685090] ? wait_for_common+0xa5/0x170 [ 245.685735] wait_for_common+0xac/0x170 [ 245.686339] ? do_sched_yield+0x90/0x90 [ 245.686935] wait_for_completion+0x12/0x20 [ 245.687571] __floppy_read_block_0+0xfb/0x150 [ 245.688244] ? floppy_resume+0x40/0x40 [ 245.688844] floppy_revalidate+0x20f/0x240 [ 245.689486] check_disk_change+0x43/0x60 [ 245.690087] floppy_open+0x1ea/0x360 [ 245.690653] __blkdev_get+0xb4/0x4d0 [ 245.691212] ? blkdev_get+0x1db/0x370 [ 245.691777] blkdev_get+0x1f3/0x370 [ 245.692351] ? path_put+0x15/0x20 [ 245.692871] ? lookup_bdev+0x4b/0x90 [ 245.693539] blkdev_get_by_path+0x3d/0x80 [ 245.694165] mount_bdev+0x2a/0x190 [ 245.694695] squashfs_mount+0x10/0x20 [ 245.695271] ? squashfs_alloc_inode+0x30/0x30 [ 245.695960] mount_fs+0xf/0x90 [ 245.696451] vfs_kern_mount+0x43/0x130 [ 245.697036] do_mount+0x187/0xc40 [ 245.697563] ? memdup_user+0x28/0x50 [ 245.698124] ksys_mount+0x60/0xc0 [ 245.698639] sys_mount+0x19/0x20 [ 245.699167] do_int80_syscall_32+0x61/0x130 [ 245.699813] entry_INT80_32+0xc7/0xc7 showing that we never complete that read request. The reason is that the completion setup is racy - it initializes the completion event AFTER submitting the IO, which means that the IO could complete before/during the init. If it does, we are passing garbage to complete() and we may sleep forever waiting for the event to occur. Fixes: 7b7b68bba5ef ("floppy: bail out in open() if drive is not responding to block0 read") Reviewed-by: Omar Sandoval Signed-off-by: Jens Axboe --- drivers/block/floppy.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/block/floppy.c b/drivers/block/floppy.c index a8cfa011c284..fb23578e9a41 100644 --- a/drivers/block/floppy.c +++ b/drivers/block/floppy.c @@ -4148,10 +4148,11 @@ static int __floppy_read_block_0(struct block_device *bdev, int drive) bio.bi_end_io = floppy_rb0_cb; bio_set_op_attrs(&bio, REQ_OP_READ, 0); + init_completion(&cbdata.complete); + submit_bio(&bio); process_fd_request(); - init_completion(&cbdata.complete); wait_for_completion(&cbdata.complete); __free_page(page); -- cgit v1.2.3-59-g8ed1b From 3fa58dcab50a0aa16817f16a8d38aee869eb3fb9 Mon Sep 17 00:00:00 2001 From: Dan Williams Date: Thu, 1 Nov 2018 00:30:22 -0700 Subject: acpi, nfit: Fix ARS overflow continuation When the platform BIOS is unable to report all the media error records it requires the OS to restart the scrub at a prescribed location. The driver detects the overflow condition, but then fails to report it to the ARS state machine after reaping the records. Propagate -ENOSPC correctly to continue the ARS operation. Cc: Fixes: 1cf03c00e7c1 ("nfit: scrub and register regions in a workqueue") Reported-by: Jacek Zloch Reviewed-by: Dave Jiang Signed-off-by: Dan Williams --- drivers/acpi/nfit/core.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/acpi/nfit/core.c b/drivers/acpi/nfit/core.c index f8c638f3c946..5970b8f5f768 100644 --- a/drivers/acpi/nfit/core.c +++ b/drivers/acpi/nfit/core.c @@ -2928,9 +2928,9 @@ static int acpi_nfit_query_poison(struct acpi_nfit_desc *acpi_desc) return rc; if (ars_status_process_records(acpi_desc)) - return -ENOMEM; + dev_err(acpi_desc->dev, "Failed to process ARS records\n"); - return 0; + return rc; } static int ars_register(struct acpi_nfit_desc *acpi_desc, -- cgit v1.2.3-59-g8ed1b From 2121db09630113e67b51ae78c18115f1858f648a Mon Sep 17 00:00:00 2001 From: Dan Williams Date: Sat, 3 Nov 2018 17:53:09 -0700 Subject: Revert "acpi, nfit: Further restrict userspace ARS start requests" The following lockdep splat results from acquiring the init_mutex in acpi_nfit_clear_to_send(): WARNING: possible circular locking dependency detected lt-daxdev-error/7216 is trying to acquire lock: 00000000f694db15 (&acpi_desc->init_mutex){+.+.}, at: acpi_nfit_clear_to_send+0x27/0x80 [nfit] but task is already holding lock: 00000000182298f2 (&nvdimm_bus->reconfig_mutex){+.+.}, at: __nd_ioctl+0x457/0x610 [libnvdimm] which lock already depends on the new lock. the existing dependency chain (in reverse order) is: -> #1 (&nvdimm_bus->reconfig_mutex){+.+.}: nvdimm_badblocks_populate+0x41/0x150 [libnvdimm] nd_region_notify+0x95/0xb0 [libnvdimm] nd_device_notify+0x40/0x50 [libnvdimm] ars_complete+0x7f/0xd0 [nfit] acpi_nfit_scrub+0xbb/0x410 [nfit] process_one_work+0x22b/0x5c0 worker_thread+0x3c/0x390 kthread+0x11e/0x140 ret_from_fork+0x3a/0x50 -> #0 (&acpi_desc->init_mutex){+.+.}: __mutex_lock+0x83/0x980 acpi_nfit_clear_to_send+0x27/0x80 [nfit] __nd_ioctl+0x474/0x610 [libnvdimm] nd_ioctl+0xa4/0xb0 [libnvdimm] do_vfs_ioctl+0xa5/0x6e0 ksys_ioctl+0x70/0x80 __x64_sys_ioctl+0x16/0x20 do_syscall_64+0x60/0x210 entry_SYSCALL_64_after_hwframe+0x49/0xbe New infrastructure is needed to be able to perform this check without acquiring the lock. Fixes: 594861215c83 ("acpi, nfit: Further restrict userspace ARS start") Cc: Dave Jiang Signed-off-by: Dan Williams --- drivers/acpi/nfit/core.c | 15 +++------------ 1 file changed, 3 insertions(+), 12 deletions(-) diff --git a/drivers/acpi/nfit/core.c b/drivers/acpi/nfit/core.c index 5970b8f5f768..14d9f5bea015 100644 --- a/drivers/acpi/nfit/core.c +++ b/drivers/acpi/nfit/core.c @@ -3341,8 +3341,6 @@ static int acpi_nfit_clear_to_send(struct nvdimm_bus_descriptor *nd_desc, struct nvdimm *nvdimm, unsigned int cmd) { struct acpi_nfit_desc *acpi_desc = to_acpi_nfit_desc(nd_desc); - struct nfit_spa *nfit_spa; - int rc = 0; if (nvdimm) return 0; @@ -3355,17 +3353,10 @@ static int acpi_nfit_clear_to_send(struct nvdimm_bus_descriptor *nd_desc, * just needs guarantees that any ARS it initiates are not * interrupted by any intervening start requests from userspace. */ - mutex_lock(&acpi_desc->init_mutex); - list_for_each_entry(nfit_spa, &acpi_desc->spas, list) - if (acpi_desc->scrub_spa - || test_bit(ARS_REQ_SHORT, &nfit_spa->ars_state) - || test_bit(ARS_REQ_LONG, &nfit_spa->ars_state)) { - rc = -EBUSY; - break; - } - mutex_unlock(&acpi_desc->init_mutex); + if (work_busy(&acpi_desc->dwork.work)) + return -EBUSY; - return rc; + return 0; } int acpi_nfit_ars_rescan(struct acpi_nfit_desc *acpi_desc, -- cgit v1.2.3-59-g8ed1b From c8b00bb742dd036388f37d019dbb9db177f3e66c Mon Sep 17 00:00:00 2001 From: Michael Ellerman Date: Thu, 1 Nov 2018 16:21:05 +1100 Subject: powerpc/mm/64s: Fix preempt warning in slb_allocate_kernel() With preempt enabled we see warnings in do_slb_fault(): BUG: using smp_processor_id() in preemptible [00000000] code: kworker/u33:0/98 futex hash table entries: 4096 (order: 3, 524288 bytes) caller is do_slb_fault+0x204/0x230 CPU: 5 PID: 98 Comm: kworker/u33:0 Not tainted 4.19.0-rc3-gcc-7.3.1-00022-g1936f094e164 #138 Call Trace: dump_stack+0xb4/0x104 (unreliable) check_preemption_disabled+0x148/0x150 do_slb_fault+0x204/0x230 data_access_slb_common+0x138/0x180 This is caused by the get_paca() in slb_allocate_kernel(), which includes a call to debug_smp_processor_id(). slb_allocate_kernel() can only be called from do_slb_fault(), and in that path interrupts are hard disabled and so we can't be preempted, but we can't update the preempt flags (in thread_info) because that could cause an SLB fault. So just use local_paca which is safe and doesn't cause the warning. Fixes: 48e7b7695745 ("powerpc/64s/hash: Convert SLB miss handlers to C") Signed-off-by: Michael Ellerman --- arch/powerpc/mm/slb.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/powerpc/mm/slb.c b/arch/powerpc/mm/slb.c index b663a36f9ada..bc3914d54e26 100644 --- a/arch/powerpc/mm/slb.c +++ b/arch/powerpc/mm/slb.c @@ -708,7 +708,7 @@ static long slb_allocate_kernel(unsigned long ea, unsigned long id) return -EFAULT; if (ea < H_VMALLOC_END) - flags = get_paca()->vmalloc_sllp; + flags = local_paca->vmalloc_sllp; else flags = SLB_VSID_KERNEL | mmu_psize_defs[mmu_io_psize].sllp; } else { -- cgit v1.2.3-59-g8ed1b From 43c6494fa1499912c8177e71450c0279041152a6 Mon Sep 17 00:00:00 2001 From: Michael Ellerman Date: Tue, 6 Nov 2018 23:37:58 +1100 Subject: powerpc/io: Fix the IO workarounds code to work with Radix Back in 2006 Ben added some workarounds for a misbehaviour in the Spider IO bridge used on early Cell machines, see commit 014da7ff47b5 ("[POWERPC] Cell "Spider" MMIO workarounds"). Later these were made to be generic, ie. not tied specifically to Spider. The code stashes a token in the high bits (59-48) of virtual addresses used for IO (eg. returned from ioremap()). This works fine when using the Hash MMU, but when we're using the Radix MMU the bits used for the token overlap with some of the bits of the virtual address. This is because the maximum virtual address is larger with Radix, up to c00fffffffffffff, and in fact we use that high part of the address range for ioremap(), see RADIX_KERN_IO_START. As it happens the bits that are used overlap with the bits that differentiate an IO address vs a linear map address. If the resulting address lies outside the linear mapping we will crash (see below), if not we just corrupt memory. virtio-pci 0000:00:00.0: Using 64-bit direct DMA at offset 800000000000000 Unable to handle kernel paging request for data at address 0xc000000080000014 ... CFAR: c000000000626b98 DAR: c000000080000014 DSISR: 42000000 IRQMASK: 0 GPR00: c0000000006c54fc c00000003e523378 c0000000016de600 0000000000000000 GPR04: c00c000080000014 0000000000000007 0fffffff000affff 0000000000000030 ^^^^ ... NIP [c000000000626c5c] .iowrite8+0xec/0x100 LR [c0000000006c992c] .vp_reset+0x2c/0x90 Call Trace: .pci_bus_read_config_dword+0xc4/0x120 (unreliable) .register_virtio_device+0x13c/0x1c0 .virtio_pci_probe+0x148/0x1f0 .local_pci_probe+0x68/0x140 .pci_device_probe+0x164/0x220 .really_probe+0x274/0x3b0 .driver_probe_device+0x80/0x170 .__driver_attach+0x14c/0x150 .bus_for_each_dev+0xb8/0x130 .driver_attach+0x34/0x50 .bus_add_driver+0x178/0x2f0 .driver_register+0x90/0x1a0 .__pci_register_driver+0x6c/0x90 .virtio_pci_driver_init+0x2c/0x40 .do_one_initcall+0x64/0x280 .kernel_init_freeable+0x36c/0x474 .kernel_init+0x24/0x160 .ret_from_kernel_thread+0x58/0x7c This hasn't been a problem because CONFIG_PPC_IO_WORKAROUNDS which enables this code is usually not enabled. It is only enabled when it's selected by PPC_CELL_NATIVE which is only selected by PPC_IBM_CELL_BLADE and that in turn depends on BIG_ENDIAN. So in order to hit the bug you need to build a big endian kernel, with IBM Cell Blade support enabled, as well as Radix MMU support, and then boot that on Power9 using Radix MMU. Still we can fix the bug, so let's do that. We simply use fewer bits for the token, taking the union of the restrictions on the address from both Hash and Radix, we end up with 8 bits we can use for the token. The only user of the token is iowa_mem_find_bus() which only supports 8 token values, so 8 bits is plenty for that. Fixes: 566ca99af026 ("powerpc/mm/radix: Add dummy radix_enabled()") Signed-off-by: Michael Ellerman --- arch/powerpc/include/asm/io.h | 20 +++++++------------- 1 file changed, 7 insertions(+), 13 deletions(-) diff --git a/arch/powerpc/include/asm/io.h b/arch/powerpc/include/asm/io.h index 3ef40b703c4a..e746becd9d6f 100644 --- a/arch/powerpc/include/asm/io.h +++ b/arch/powerpc/include/asm/io.h @@ -268,19 +268,13 @@ extern void _memcpy_toio(volatile void __iomem *dest, const void *src, * their hooks, a bitfield is reserved for use by the platform near the * top of MMIO addresses (not PIO, those have to cope the hard way). * - * This bit field is 12 bits and is at the top of the IO virtual - * addresses PCI_IO_INDIRECT_TOKEN_MASK. + * The highest address in the kernel virtual space are: * - * The kernel virtual space is thus: + * d0003fffffffffff # with Hash MMU + * c00fffffffffffff # with Radix MMU * - * 0xD000000000000000 : vmalloc - * 0xD000080000000000 : PCI PHB IO space - * 0xD000080080000000 : ioremap - * 0xD0000fffffffffff : end of ioremap region - * - * Since the top 4 bits are reserved as the region ID, we use thus - * the next 12 bits and keep 4 bits available for the future if the - * virtual address space is ever to be extended. + * The top 4 bits are reserved as the region ID on hash, leaving us 8 bits + * that can be used for the field. * * The direct IO mapping operations will then mask off those bits * before doing the actual access, though that only happen when @@ -292,8 +286,8 @@ extern void _memcpy_toio(volatile void __iomem *dest, const void *src, */ #ifdef CONFIG_PPC_INDIRECT_MMIO -#define PCI_IO_IND_TOKEN_MASK 0x0fff000000000000ul -#define PCI_IO_IND_TOKEN_SHIFT 48 +#define PCI_IO_IND_TOKEN_SHIFT 52 +#define PCI_IO_IND_TOKEN_MASK (0xfful << PCI_IO_IND_TOKEN_SHIFT) #define PCI_FIX_ADDR(addr) \ ((PCI_IO_ADDR)(((unsigned long)(addr)) & ~PCI_IO_IND_TOKEN_MASK)) #define PCI_GET_ADDR_TOKEN(addr) \ -- cgit v1.2.3-59-g8ed1b From 2c7645b0f7d1014f2636393de7906c6bfd25939f Mon Sep 17 00:00:00 2001 From: Michael Ellerman Date: Mon, 12 Nov 2018 13:46:06 +1100 Subject: selftests/powerpc: Fix wild_bctr test to work on ppc64 The selftest I recently added to test branching to an out-of-bounds NIP doesn't work on 64-bit big endian. It does fail but not in the right way. That is it SEGVs trying to load from the opd at BAD_NIP, but it never gets as far as branching to BAD_NIP. To fix it we need to create an opd which is reachable but which holds the bad address. Fixes: b7683fc66eba ("selftests/powerpc: Add a test of wild bctr") Signed-off-by: Michael Ellerman --- tools/testing/selftests/powerpc/mm/wild_bctr.c | 16 +++++++++++++++- 1 file changed, 15 insertions(+), 1 deletion(-) diff --git a/tools/testing/selftests/powerpc/mm/wild_bctr.c b/tools/testing/selftests/powerpc/mm/wild_bctr.c index 1b0e9e9a2ddc..90469a9e49d4 100644 --- a/tools/testing/selftests/powerpc/mm/wild_bctr.c +++ b/tools/testing/selftests/powerpc/mm/wild_bctr.c @@ -105,6 +105,20 @@ static void dump_regs(void) } } +#ifdef _CALL_AIXDESC +struct opd { + unsigned long ip; + unsigned long toc; + unsigned long env; +}; +static struct opd bad_opd = { + .ip = BAD_NIP, +}; +#define BAD_FUNC (&bad_opd) +#else +#define BAD_FUNC BAD_NIP +#endif + int test_wild_bctr(void) { int (*func_ptr)(void); @@ -133,7 +147,7 @@ int test_wild_bctr(void) poison_regs(); - func_ptr = (int (*)(void))BAD_NIP; + func_ptr = (int (*)(void))BAD_FUNC; func_ptr(); FAIL_IF(1); /* we didn't segv? */ -- cgit v1.2.3-59-g8ed1b From c469933e772132aad040bd6a2adc8edf9ad6f825 Mon Sep 17 00:00:00 2001 From: Patrick Bellasi Date: Mon, 5 Nov 2018 14:53:58 +0000 Subject: sched/fair: Fix cpu_util_wake() for 'execl' type workloads A ~10% regression has been reported for UnixBench's execl throughput test by Aaron Lu and Ye Xiaolong: https://lkml.org/lkml/2018/10/30/765 That test is pretty simple, it does a "recursive" execve() syscall on the same binary. Starting from the syscall, this sequence is possible: do_execve() do_execveat_common() __do_execve_file() sched_exec() select_task_rq_fair() <==| Task already enqueued find_idlest_cpu() find_idlest_group() capacity_spare_wake() <==| Functions not called from cpu_util_wake() | the wakeup path which means we can end up calling cpu_util_wake() not only from the "wakeup path", as its name would suggest. Indeed, the task doing an execve() syscall is already enqueued on the CPU we want to get the cpu_util_wake() for. The estimated utilization for a CPU computed in cpu_util_wake() was written under the assumption that function can be called only from the wakeup path. If instead the task is already enqueued, we end up with a utilization which does not remove the current task's contribution from the estimated utilization of the CPU. This will wrongly assume a reduced spare capacity on the current CPU and increase the chances to migrate the task on execve. The regression is tracked down to: commit d519329f72a6 ("sched/fair: Update util_est only on util_avg updates") because in that patch we turn on by default the UTIL_EST sched feature. However, the real issue is introduced by: commit f9be3e5961c5 ("sched/fair: Use util_est in LB and WU paths") Let's fix this by ensuring to always discount the task estimated utilization from the CPU's estimated utilization when the task is also the current one. The same benchmark of the bug report, executed on a dual socket 40 CPUs Intel(R) Xeon(R) CPU E5-2690 v2 @ 3.00GHz machine, reports these "Execl Throughput" figures (higher the better): mainline : 48136.5 lps mainline+fix : 55376.5 lps which correspond to a 15% speedup. Moreover, since {cpu_util,capacity_spare}_wake() are not really only used from the wakeup path, let's remove this ambiguity by using a better matching name: {cpu_util,capacity_spare}_without(). Since we are at that, let's also improve the existing documentation. Reported-by: Aaron Lu Reported-by: Ye Xiaolong Tested-by: Aaron Lu Signed-off-by: Patrick Bellasi Signed-off-by: Peter Zijlstra (Intel) Cc: Dietmar Eggemann Cc: Juri Lelli Cc: Linus Torvalds Cc: Morten Rasmussen Cc: Peter Zijlstra Cc: Quentin Perret Cc: Steve Muckle Cc: Suren Baghdasaryan Cc: Thomas Gleixner Cc: Todd Kjos Cc: Vincent Guittot Fixes: f9be3e5961c5 (sched/fair: Use util_est in LB and WU paths) Link: https://lore.kernel.org/lkml/20181025093100.GB13236@e110439-lin/ Signed-off-by: Ingo Molnar --- kernel/sched/fair.c | 62 +++++++++++++++++++++++++++++++++++++++++------------ 1 file changed, 48 insertions(+), 14 deletions(-) diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c index 3648d0300fdf..ac855b2f4774 100644 --- a/kernel/sched/fair.c +++ b/kernel/sched/fair.c @@ -5674,11 +5674,11 @@ static int wake_affine(struct sched_domain *sd, struct task_struct *p, return target; } -static unsigned long cpu_util_wake(int cpu, struct task_struct *p); +static unsigned long cpu_util_without(int cpu, struct task_struct *p); -static unsigned long capacity_spare_wake(int cpu, struct task_struct *p) +static unsigned long capacity_spare_without(int cpu, struct task_struct *p) { - return max_t(long, capacity_of(cpu) - cpu_util_wake(cpu, p), 0); + return max_t(long, capacity_of(cpu) - cpu_util_without(cpu, p), 0); } /* @@ -5738,7 +5738,7 @@ find_idlest_group(struct sched_domain *sd, struct task_struct *p, avg_load += cfs_rq_load_avg(&cpu_rq(i)->cfs); - spare_cap = capacity_spare_wake(i, p); + spare_cap = capacity_spare_without(i, p); if (spare_cap > max_spare_cap) max_spare_cap = spare_cap; @@ -5889,8 +5889,8 @@ static inline int find_idlest_cpu(struct sched_domain *sd, struct task_struct *p return prev_cpu; /* - * We need task's util for capacity_spare_wake, sync it up to prev_cpu's - * last_update_time. + * We need task's util for capacity_spare_without, sync it up to + * prev_cpu's last_update_time. */ if (!(sd_flag & SD_BALANCE_FORK)) sync_entity_load_avg(&p->se); @@ -6216,10 +6216,19 @@ static inline unsigned long cpu_util(int cpu) } /* - * cpu_util_wake: Compute CPU utilization with any contributions from - * the waking task p removed. + * cpu_util_without: compute cpu utilization without any contributions from *p + * @cpu: the CPU which utilization is requested + * @p: the task which utilization should be discounted + * + * The utilization of a CPU is defined by the utilization of tasks currently + * enqueued on that CPU as well as tasks which are currently sleeping after an + * execution on that CPU. + * + * This method returns the utilization of the specified CPU by discounting the + * utilization of the specified task, whenever the task is currently + * contributing to the CPU utilization. */ -static unsigned long cpu_util_wake(int cpu, struct task_struct *p) +static unsigned long cpu_util_without(int cpu, struct task_struct *p) { struct cfs_rq *cfs_rq; unsigned int util; @@ -6231,7 +6240,7 @@ static unsigned long cpu_util_wake(int cpu, struct task_struct *p) cfs_rq = &cpu_rq(cpu)->cfs; util = READ_ONCE(cfs_rq->avg.util_avg); - /* Discount task's blocked util from CPU's util */ + /* Discount task's util from CPU's util */ util -= min_t(unsigned int, util, task_util(p)); /* @@ -6240,14 +6249,14 @@ static unsigned long cpu_util_wake(int cpu, struct task_struct *p) * a) if *p is the only task sleeping on this CPU, then: * cpu_util (== task_util) > util_est (== 0) * and thus we return: - * cpu_util_wake = (cpu_util - task_util) = 0 + * cpu_util_without = (cpu_util - task_util) = 0 * * b) if other tasks are SLEEPING on this CPU, which is now exiting * IDLE, then: * cpu_util >= task_util * cpu_util > util_est (== 0) * and thus we discount *p's blocked utilization to return: - * cpu_util_wake = (cpu_util - task_util) >= 0 + * cpu_util_without = (cpu_util - task_util) >= 0 * * c) if other tasks are RUNNABLE on that CPU and * util_est > cpu_util @@ -6260,8 +6269,33 @@ static unsigned long cpu_util_wake(int cpu, struct task_struct *p) * covered by the following code when estimated utilization is * enabled. */ - if (sched_feat(UTIL_EST)) - util = max(util, READ_ONCE(cfs_rq->avg.util_est.enqueued)); + if (sched_feat(UTIL_EST)) { + unsigned int estimated = + READ_ONCE(cfs_rq->avg.util_est.enqueued); + + /* + * Despite the following checks we still have a small window + * for a possible race, when an execl's select_task_rq_fair() + * races with LB's detach_task(): + * + * detach_task() + * p->on_rq = TASK_ON_RQ_MIGRATING; + * ---------------------------------- A + * deactivate_task() \ + * dequeue_task() + RaceTime + * util_est_dequeue() / + * ---------------------------------- B + * + * The additional check on "current == p" it's required to + * properly fix the execl regression and it helps in further + * reducing the chances for the above race. + */ + if (unlikely(task_on_rq_queued(p) || current == p)) { + estimated -= min_t(unsigned int, estimated, + (_task_util_est(p) | UTIL_AVG_UNCHANGED)); + } + util = max(util, estimated); + } /* * Utilization (estimated) can exceed the CPU capacity, thus let's -- cgit v1.2.3-59-g8ed1b From c10a8de0d32e95b0b8c7c17b6dc09baea5a5a899 Mon Sep 17 00:00:00 2001 From: Kan Liang Date: Fri, 19 Oct 2018 10:04:18 -0700 Subject: perf/x86/intel/uncore: Add more IMC PCI IDs for KabyLake and CoffeeLake CPUs KabyLake and CoffeeLake CPUs have the same client uncore events as SkyLake. Add the PCI IDs for the KabyLake Y, U, S processor lines and CoffeeLake U, H, S processor lines. Signed-off-by: Kan Liang Signed-off-by: Peter Zijlstra (Intel) Cc: Alexander Shishkin Cc: Arnaldo Carvalho de Melo Cc: Jiri Olsa Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Stephane Eranian Cc: Thomas Gleixner Cc: Vince Weaver Link: http://lkml.kernel.org/r/20181019170419.378-1-kan.liang@linux.intel.com Signed-off-by: Ingo Molnar --- arch/x86/events/intel/uncore_snb.c | 115 ++++++++++++++++++++++++++++++++++++- 1 file changed, 114 insertions(+), 1 deletion(-) diff --git a/arch/x86/events/intel/uncore_snb.c b/arch/x86/events/intel/uncore_snb.c index 8527c3e1038b..bfa25814fe5f 100644 --- a/arch/x86/events/intel/uncore_snb.c +++ b/arch/x86/events/intel/uncore_snb.c @@ -15,6 +15,25 @@ #define PCI_DEVICE_ID_INTEL_SKL_HQ_IMC 0x1910 #define PCI_DEVICE_ID_INTEL_SKL_SD_IMC 0x190f #define PCI_DEVICE_ID_INTEL_SKL_SQ_IMC 0x191f +#define PCI_DEVICE_ID_INTEL_KBL_Y_IMC 0x590c +#define PCI_DEVICE_ID_INTEL_KBL_U_IMC 0x5904 +#define PCI_DEVICE_ID_INTEL_KBL_UQ_IMC 0x5914 +#define PCI_DEVICE_ID_INTEL_KBL_SD_IMC 0x590f +#define PCI_DEVICE_ID_INTEL_KBL_SQ_IMC 0x591f +#define PCI_DEVICE_ID_INTEL_CFL_2U_IMC 0x3ecc +#define PCI_DEVICE_ID_INTEL_CFL_4U_IMC 0x3ed0 +#define PCI_DEVICE_ID_INTEL_CFL_4H_IMC 0x3e10 +#define PCI_DEVICE_ID_INTEL_CFL_6H_IMC 0x3ec4 +#define PCI_DEVICE_ID_INTEL_CFL_2S_D_IMC 0x3e0f +#define PCI_DEVICE_ID_INTEL_CFL_4S_D_IMC 0x3e1f +#define PCI_DEVICE_ID_INTEL_CFL_6S_D_IMC 0x3ec2 +#define PCI_DEVICE_ID_INTEL_CFL_8S_D_IMC 0x3e30 +#define PCI_DEVICE_ID_INTEL_CFL_4S_W_IMC 0x3e18 +#define PCI_DEVICE_ID_INTEL_CFL_6S_W_IMC 0x3ec6 +#define PCI_DEVICE_ID_INTEL_CFL_8S_W_IMC 0x3e31 +#define PCI_DEVICE_ID_INTEL_CFL_4S_S_IMC 0x3e33 +#define PCI_DEVICE_ID_INTEL_CFL_6S_S_IMC 0x3eca +#define PCI_DEVICE_ID_INTEL_CFL_8S_S_IMC 0x3e32 /* SNB event control */ #define SNB_UNC_CTL_EV_SEL_MASK 0x000000ff @@ -569,7 +588,82 @@ static const struct pci_device_id skl_uncore_pci_ids[] = { PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_SKL_SQ_IMC), .driver_data = UNCORE_PCI_DEV_DATA(SNB_PCI_UNCORE_IMC, 0), }, - + { /* IMC */ + PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_KBL_Y_IMC), + .driver_data = UNCORE_PCI_DEV_DATA(SNB_PCI_UNCORE_IMC, 0), + }, + { /* IMC */ + PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_KBL_U_IMC), + .driver_data = UNCORE_PCI_DEV_DATA(SNB_PCI_UNCORE_IMC, 0), + }, + { /* IMC */ + PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_KBL_UQ_IMC), + .driver_data = UNCORE_PCI_DEV_DATA(SNB_PCI_UNCORE_IMC, 0), + }, + { /* IMC */ + PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_KBL_SD_IMC), + .driver_data = UNCORE_PCI_DEV_DATA(SNB_PCI_UNCORE_IMC, 0), + }, + { /* IMC */ + PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_KBL_SQ_IMC), + .driver_data = UNCORE_PCI_DEV_DATA(SNB_PCI_UNCORE_IMC, 0), + }, + { /* IMC */ + PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_CFL_2U_IMC), + .driver_data = UNCORE_PCI_DEV_DATA(SNB_PCI_UNCORE_IMC, 0), + }, + { /* IMC */ + PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_CFL_4U_IMC), + .driver_data = UNCORE_PCI_DEV_DATA(SNB_PCI_UNCORE_IMC, 0), + }, + { /* IMC */ + PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_CFL_4H_IMC), + .driver_data = UNCORE_PCI_DEV_DATA(SNB_PCI_UNCORE_IMC, 0), + }, + { /* IMC */ + PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_CFL_6H_IMC), + .driver_data = UNCORE_PCI_DEV_DATA(SNB_PCI_UNCORE_IMC, 0), + }, + { /* IMC */ + PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_CFL_2S_D_IMC), + .driver_data = UNCORE_PCI_DEV_DATA(SNB_PCI_UNCORE_IMC, 0), + }, + { /* IMC */ + PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_CFL_4S_D_IMC), + .driver_data = UNCORE_PCI_DEV_DATA(SNB_PCI_UNCORE_IMC, 0), + }, + { /* IMC */ + PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_CFL_6S_D_IMC), + .driver_data = UNCORE_PCI_DEV_DATA(SNB_PCI_UNCORE_IMC, 0), + }, + { /* IMC */ + PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_CFL_8S_D_IMC), + .driver_data = UNCORE_PCI_DEV_DATA(SNB_PCI_UNCORE_IMC, 0), + }, + { /* IMC */ + PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_CFL_4S_W_IMC), + .driver_data = UNCORE_PCI_DEV_DATA(SNB_PCI_UNCORE_IMC, 0), + }, + { /* IMC */ + PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_CFL_6S_W_IMC), + .driver_data = UNCORE_PCI_DEV_DATA(SNB_PCI_UNCORE_IMC, 0), + }, + { /* IMC */ + PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_CFL_8S_W_IMC), + .driver_data = UNCORE_PCI_DEV_DATA(SNB_PCI_UNCORE_IMC, 0), + }, + { /* IMC */ + PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_CFL_4S_S_IMC), + .driver_data = UNCORE_PCI_DEV_DATA(SNB_PCI_UNCORE_IMC, 0), + }, + { /* IMC */ + PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_CFL_6S_S_IMC), + .driver_data = UNCORE_PCI_DEV_DATA(SNB_PCI_UNCORE_IMC, 0), + }, + { /* IMC */ + PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_CFL_8S_S_IMC), + .driver_data = UNCORE_PCI_DEV_DATA(SNB_PCI_UNCORE_IMC, 0), + }, { /* end: all zeroes */ }, }; @@ -618,6 +712,25 @@ static const struct imc_uncore_pci_dev desktop_imc_pci_ids[] = { IMC_DEV(SKL_HQ_IMC, &skl_uncore_pci_driver), /* 6th Gen Core H Quad Core */ IMC_DEV(SKL_SD_IMC, &skl_uncore_pci_driver), /* 6th Gen Core S Dual Core */ IMC_DEV(SKL_SQ_IMC, &skl_uncore_pci_driver), /* 6th Gen Core S Quad Core */ + IMC_DEV(KBL_Y_IMC, &skl_uncore_pci_driver), /* 7th Gen Core Y */ + IMC_DEV(KBL_U_IMC, &skl_uncore_pci_driver), /* 7th Gen Core U */ + IMC_DEV(KBL_UQ_IMC, &skl_uncore_pci_driver), /* 7th Gen Core U Quad Core */ + IMC_DEV(KBL_SD_IMC, &skl_uncore_pci_driver), /* 7th Gen Core S Dual Core */ + IMC_DEV(KBL_SQ_IMC, &skl_uncore_pci_driver), /* 7th Gen Core S Quad Core */ + IMC_DEV(CFL_2U_IMC, &skl_uncore_pci_driver), /* 8th Gen Core U 2 Cores */ + IMC_DEV(CFL_4U_IMC, &skl_uncore_pci_driver), /* 8th Gen Core U 4 Cores */ + IMC_DEV(CFL_4H_IMC, &skl_uncore_pci_driver), /* 8th Gen Core H 4 Cores */ + IMC_DEV(CFL_6H_IMC, &skl_uncore_pci_driver), /* 8th Gen Core H 6 Cores */ + IMC_DEV(CFL_2S_D_IMC, &skl_uncore_pci_driver), /* 8th Gen Core S 2 Cores Desktop */ + IMC_DEV(CFL_4S_D_IMC, &skl_uncore_pci_driver), /* 8th Gen Core S 4 Cores Desktop */ + IMC_DEV(CFL_6S_D_IMC, &skl_uncore_pci_driver), /* 8th Gen Core S 6 Cores Desktop */ + IMC_DEV(CFL_8S_D_IMC, &skl_uncore_pci_driver), /* 8th Gen Core S 8 Cores Desktop */ + IMC_DEV(CFL_4S_W_IMC, &skl_uncore_pci_driver), /* 8th Gen Core S 4 Cores Work Station */ + IMC_DEV(CFL_6S_W_IMC, &skl_uncore_pci_driver), /* 8th Gen Core S 6 Cores Work Station */ + IMC_DEV(CFL_8S_W_IMC, &skl_uncore_pci_driver), /* 8th Gen Core S 8 Cores Work Station */ + IMC_DEV(CFL_4S_S_IMC, &skl_uncore_pci_driver), /* 8th Gen Core S 4 Cores Server */ + IMC_DEV(CFL_6S_S_IMC, &skl_uncore_pci_driver), /* 8th Gen Core S 6 Cores Server */ + IMC_DEV(CFL_8S_S_IMC, &skl_uncore_pci_driver), /* 8th Gen Core S 8 Cores Server */ { /* end marker */ } }; -- cgit v1.2.3-59-g8ed1b From 4d47d6407ac7b4b442a4e717488a3bb137398b6c Mon Sep 17 00:00:00 2001 From: Kan Liang Date: Fri, 19 Oct 2018 10:04:19 -0700 Subject: perf/x86/intel/uncore: Support CoffeeLake 8th CBOX Coffee Lake has 8 core products which has 8 Cboxes. The 8th CBOX is mapped into different MSR space. Increase the num_boxes to 8 to handle the new products. It will not impact the previous platforms, SkyLake, KabyLake and earlier CoffeeLake. Because the num_boxes will be recalculated in uncore_cpu_init and doesn't exceed the x86_max_cores. Introduce a new box flag bit to indicate the 8th CBOX. Signed-off-by: Kan Liang Signed-off-by: Peter Zijlstra (Intel) Cc: Alexander Shishkin Cc: Arnaldo Carvalho de Melo Cc: Jiri Olsa Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Stephane Eranian Cc: Thomas Gleixner Cc: Vince Weaver Link: http://lkml.kernel.org/r/20181019170419.378-2-kan.liang@linux.intel.com Signed-off-by: Ingo Molnar --- arch/x86/events/intel/uncore.h | 33 +++++++++++++++++++++++++-------- arch/x86/events/intel/uncore_snb.c | 6 +++++- 2 files changed, 30 insertions(+), 9 deletions(-) diff --git a/arch/x86/events/intel/uncore.h b/arch/x86/events/intel/uncore.h index e17ab885b1e9..cb46d602a6b8 100644 --- a/arch/x86/events/intel/uncore.h +++ b/arch/x86/events/intel/uncore.h @@ -129,8 +129,15 @@ struct intel_uncore_box { struct intel_uncore_extra_reg shared_regs[0]; }; -#define UNCORE_BOX_FLAG_INITIATED 0 -#define UNCORE_BOX_FLAG_CTL_OFFS8 1 /* event config registers are 8-byte apart */ +/* CFL uncore 8th cbox MSRs */ +#define CFL_UNC_CBO_7_PERFEVTSEL0 0xf70 +#define CFL_UNC_CBO_7_PER_CTR0 0xf76 + +#define UNCORE_BOX_FLAG_INITIATED 0 +/* event config registers are 8-byte apart */ +#define UNCORE_BOX_FLAG_CTL_OFFS8 1 +/* CFL 8th CBOX has different MSR space */ +#define UNCORE_BOX_FLAG_CFL8_CBOX_MSR_OFFS 2 struct uncore_event_desc { struct kobj_attribute attr; @@ -297,17 +304,27 @@ unsigned int uncore_freerunning_counter(struct intel_uncore_box *box, static inline unsigned uncore_msr_event_ctl(struct intel_uncore_box *box, int idx) { - return box->pmu->type->event_ctl + - (box->pmu->type->pair_ctr_ctl ? 2 * idx : idx) + - uncore_msr_box_offset(box); + if (test_bit(UNCORE_BOX_FLAG_CFL8_CBOX_MSR_OFFS, &box->flags)) { + return CFL_UNC_CBO_7_PERFEVTSEL0 + + (box->pmu->type->pair_ctr_ctl ? 2 * idx : idx); + } else { + return box->pmu->type->event_ctl + + (box->pmu->type->pair_ctr_ctl ? 2 * idx : idx) + + uncore_msr_box_offset(box); + } } static inline unsigned uncore_msr_perf_ctr(struct intel_uncore_box *box, int idx) { - return box->pmu->type->perf_ctr + - (box->pmu->type->pair_ctr_ctl ? 2 * idx : idx) + - uncore_msr_box_offset(box); + if (test_bit(UNCORE_BOX_FLAG_CFL8_CBOX_MSR_OFFS, &box->flags)) { + return CFL_UNC_CBO_7_PER_CTR0 + + (box->pmu->type->pair_ctr_ctl ? 2 * idx : idx); + } else { + return box->pmu->type->perf_ctr + + (box->pmu->type->pair_ctr_ctl ? 2 * idx : idx) + + uncore_msr_box_offset(box); + } } static inline diff --git a/arch/x86/events/intel/uncore_snb.c b/arch/x86/events/intel/uncore_snb.c index bfa25814fe5f..2593b0d7aeee 100644 --- a/arch/x86/events/intel/uncore_snb.c +++ b/arch/x86/events/intel/uncore_snb.c @@ -221,6 +221,10 @@ static void skl_uncore_msr_init_box(struct intel_uncore_box *box) wrmsrl(SKL_UNC_PERF_GLOBAL_CTL, SNB_UNC_GLOBAL_CTL_EN | SKL_UNC_GLOBAL_CTL_CORE_ALL); } + + /* The 8th CBOX has different MSR space */ + if (box->pmu->pmu_idx == 7) + __set_bit(UNCORE_BOX_FLAG_CFL8_CBOX_MSR_OFFS, &box->flags); } static void skl_uncore_msr_enable_box(struct intel_uncore_box *box) @@ -247,7 +251,7 @@ static struct intel_uncore_ops skl_uncore_msr_ops = { static struct intel_uncore_type skl_uncore_cbox = { .name = "cbox", .num_counters = 4, - .num_boxes = 5, + .num_boxes = 8, .perf_ctr_bits = 44, .fixed_ctr_bits = 48, .perf_ctr = SNB_UNC_CBO_0_PER_CTR0, -- cgit v1.2.3-59-g8ed1b From 1e9c75fb9c47a75a9aec0cd17db5f6dc36b58e00 Mon Sep 17 00:00:00 2001 From: Benjamin Coddington Date: Wed, 3 Oct 2018 10:18:33 -0400 Subject: mnt: fix __detach_mounts infinite loop Since commit ff17fa561a04 ("d_invalidate(): unhash immediately") immediately unhashes the dentry, we'll never return the mountpoint in lookup_mountpoint(), which can lead to an unbreakable loop in d_invalidate(). I have reports of NFS clients getting into this condition after the server removes an export of an existing mount created through follow_automount(), but I suspect there are various other ways to produce this problem if we hunt down users of d_invalidate(). For example, it is possible to get into this state by using XFS' d_invalidate() call in xfs_vn_unlink(): truncate -s 100m img{1,2} mkfs.xfs -q -n version=ci img1 mkfs.xfs -q -n version=ci img2 mkdir -p /mnt/xfs mount img1 /mnt/xfs mkdir /mnt/xfs/sub1 mount img2 /mnt/xfs/sub1 cat > /mnt/xfs/sub1/foo & umount -l /mnt/xfs/sub1 mount img2 /mnt/xfs/sub1 mount --make-private /mnt/xfs mkdir /mnt/xfs/sub2 mount --move /mnt/xfs/sub1 /mnt/xfs/sub2 rmdir /mnt/xfs/sub1 Fix this by moving the check for an unlinked dentry out of the detach_mounts() path. Fixes: ff17fa561a04 ("d_invalidate(): unhash immediately") Cc: stable@vger.kernel.org Reviewed-by: "Eric W. Biederman" Signed-off-by: Benjamin Coddington Signed-off-by: Eric W. Biederman --- fs/namespace.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/fs/namespace.c b/fs/namespace.c index 74f64294a410..a7f91265ea67 100644 --- a/fs/namespace.c +++ b/fs/namespace.c @@ -695,9 +695,6 @@ static struct mountpoint *lookup_mountpoint(struct dentry *dentry) hlist_for_each_entry(mp, chain, m_hash) { if (mp->m_dentry == dentry) { - /* might be worth a WARN_ON() */ - if (d_unlinked(dentry)) - return ERR_PTR(-ENOENT); mp->m_count++; return mp; } @@ -711,6 +708,9 @@ static struct mountpoint *get_mountpoint(struct dentry *dentry) int ret; if (d_mountpoint(dentry)) { + /* might be worth a WARN_ON() */ + if (d_unlinked(dentry)) + return ERR_PTR(-ENOENT); mountpoint: read_seqlock_excl(&mount_lock); mp = lookup_mountpoint(dentry); -- cgit v1.2.3-59-g8ed1b From f4156f9656feac21f4de712fac94fae964c5d402 Mon Sep 17 00:00:00 2001 From: Sven Eckelmann Date: Tue, 30 Oct 2018 12:17:10 +0100 Subject: batman-adv: Use explicit tvlv padding for ELP packets MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The announcement messages of batman-adv COMPAT_VERSION 15 have the possibility to announce additional information via a dynamic TVLV part. This part is optional for the ELP packets and currently not parsed by the Linux implementation. Still out-of-tree versions are using it to transport things like neighbor hashes to optimize the rebroadcast behavior. Since the ELP broadcast packets are smaller than the minimal ethernet packet, it often has to be padded. This is often done (as specified in RFC894) with octets of zero and thus work perfectly fine with the TVLV part (making it a zero length and thus empty). But not all ethernet compatible hardware seems to follow this advice. To avoid ambiguous situations when parsing the TVLV header, just force the 4 bytes (TVLV length + padding) after the required ELP header to zero. Fixes: d6f94d91f766 ("batman-adv: ELP - adding basic infrastructure") Reported-by: Linus Lüssing Signed-off-by: Sven Eckelmann Signed-off-by: Simon Wunderlich --- net/batman-adv/bat_v_elp.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/net/batman-adv/bat_v_elp.c b/net/batman-adv/bat_v_elp.c index 9f481cfdf77d..e8090f099eb8 100644 --- a/net/batman-adv/bat_v_elp.c +++ b/net/batman-adv/bat_v_elp.c @@ -352,19 +352,21 @@ out: */ int batadv_v_elp_iface_enable(struct batadv_hard_iface *hard_iface) { + static const size_t tvlv_padding = sizeof(__be32); struct batadv_elp_packet *elp_packet; unsigned char *elp_buff; u32 random_seqno; size_t size; int res = -ENOMEM; - size = ETH_HLEN + NET_IP_ALIGN + BATADV_ELP_HLEN; + size = ETH_HLEN + NET_IP_ALIGN + BATADV_ELP_HLEN + tvlv_padding; hard_iface->bat_v.elp_skb = dev_alloc_skb(size); if (!hard_iface->bat_v.elp_skb) goto out; skb_reserve(hard_iface->bat_v.elp_skb, ETH_HLEN + NET_IP_ALIGN); - elp_buff = skb_put_zero(hard_iface->bat_v.elp_skb, BATADV_ELP_HLEN); + elp_buff = skb_put_zero(hard_iface->bat_v.elp_skb, + BATADV_ELP_HLEN + tvlv_padding); elp_packet = (struct batadv_elp_packet *)elp_buff; elp_packet->packet_type = BATADV_ELP; -- cgit v1.2.3-59-g8ed1b From d7d8bbb40a5b1f682ee6589e212934f4c6b8ad60 Mon Sep 17 00:00:00 2001 From: Sven Eckelmann Date: Wed, 7 Nov 2018 23:09:12 +0100 Subject: batman-adv: Expand merged fragment buffer for full packet MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The complete size ("total_size") of the fragmented packet is stored in the fragment header and in the size of the fragment chain. When the fragments are ready for merge, the skbuff's tail of the first fragment is expanded to have enough room after the data pointer for at least total_size. This means that it gets expanded by total_size - first_skb->len. But this is ignoring the fact that after expanding the buffer, the fragment header is pulled by from this buffer. Assuming that the tailroom of the buffer was already 0, the buffer after the data pointer of the skbuff is now only total_size - len(fragment_header) large. When the merge function is then processing the remaining fragments, the code to copy the data over to the merged skbuff will cause an skb_over_panic when it tries to actually put enough data to fill the total_size bytes of the packet. The size of the skb_pull must therefore also be taken into account when the buffer's tailroom is expanded. Fixes: 610bfc6bc99b ("batman-adv: Receive fragmented packets and merge") Reported-by: Martin Weinelt Co-authored-by: Linus Lüssing Signed-off-by: Sven Eckelmann Signed-off-by: Simon Wunderlich --- net/batman-adv/fragmentation.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/batman-adv/fragmentation.c b/net/batman-adv/fragmentation.c index 0fddc17106bd..5b71a289d04f 100644 --- a/net/batman-adv/fragmentation.c +++ b/net/batman-adv/fragmentation.c @@ -275,7 +275,7 @@ batadv_frag_merge_packets(struct hlist_head *chain) kfree(entry); packet = (struct batadv_frag_packet *)skb_out->data; - size = ntohs(packet->total_size); + size = ntohs(packet->total_size) + hdr_size; /* Make room for the rest of the fragments. */ if (pskb_expand_head(skb_out, 0, size - skb_out->len, GFP_ATOMIC) < 0) { -- cgit v1.2.3-59-g8ed1b From e0c827aca0730b51f38081aa4e8ecf0912aab55f Mon Sep 17 00:00:00 2001 From: Laurent Pinchart Date: Sat, 10 Nov 2018 13:16:51 +0200 Subject: drm/omap: Populate DSS children in omapdss driver The DSS DT node contains children that describe the DSS components (DISPC and internal encoders). Each of those components is handled by a platform driver, and thus needs to be backed by a platform device. The corresponding platform devices are created in mach-omap2 code by a call to of_platform_populate(). While this approach has worked so far, it doesn't model the hardware architecture very well, as it creates child devices before the parent is ready to handle them. This would be akin to creating I2C slaves before the I2C master is available. The task can be easily performed in the omapdss driver code instead, simplifying mach-omap2 code. We however can't remove the mach-omap2 code completely as the omap2fb driver still depends on it, but we can move it to the omap2fb-specific section, where it can stay until the omap2fb driver gets removed. This has the added benefit of not allowing DSS components to probe before the DSS itself, which led to runtime PM issues when the DSS probe is deferred. Fixes: 27d624527d99 ("drm/omap: dss: Acquire next dssdev at probe time") Signed-off-by: Laurent Pinchart Acked-by: Tony Lindgren Reviewed-by: Sebastian Reichel Signed-off-by: Tomi Valkeinen Link: https://patchwork.freedesktop.org/patch/msgid/20181110111654.4387-2-laurent.pinchart@ideasonboard.com --- arch/arm/mach-omap2/display.c | 111 ++++++++++++++++++-------------------- drivers/gpu/drm/omapdrm/dss/dss.c | 11 +++- 2 files changed, 63 insertions(+), 59 deletions(-) diff --git a/arch/arm/mach-omap2/display.c b/arch/arm/mach-omap2/display.c index 9500b6e27380..f86b72d1d59e 100644 --- a/arch/arm/mach-omap2/display.c +++ b/arch/arm/mach-omap2/display.c @@ -209,11 +209,61 @@ static int __init omapdss_init_fbdev(void) return 0; } -#else -static inline int omapdss_init_fbdev(void) + +static const char * const omapdss_compat_names[] __initconst = { + "ti,omap2-dss", + "ti,omap3-dss", + "ti,omap4-dss", + "ti,omap5-dss", + "ti,dra7-dss", +}; + +static struct device_node * __init omapdss_find_dss_of_node(void) { - return 0; + struct device_node *node; + int i; + + for (i = 0; i < ARRAY_SIZE(omapdss_compat_names); ++i) { + node = of_find_compatible_node(NULL, NULL, + omapdss_compat_names[i]); + if (node) + return node; + } + + return NULL; } + +static int __init omapdss_init_of(void) +{ + int r; + struct device_node *node; + struct platform_device *pdev; + + /* only create dss helper devices if dss is enabled in the .dts */ + + node = omapdss_find_dss_of_node(); + if (!node) + return 0; + + if (!of_device_is_available(node)) + return 0; + + pdev = of_find_device_by_node(node); + + if (!pdev) { + pr_err("Unable to find DSS platform device\n"); + return -ENODEV; + } + + r = of_platform_populate(node, NULL, NULL, &pdev->dev); + if (r) { + pr_err("Unable to populate DSS submodule devices\n"); + return r; + } + + return omapdss_init_fbdev(); +} +omap_device_initcall(omapdss_init_of); #endif /* CONFIG_FB_OMAP2 */ static void dispc_disable_outputs(void) @@ -361,58 +411,3 @@ int omap_dss_reset(struct omap_hwmod *oh) return r; } - -static const char * const omapdss_compat_names[] __initconst = { - "ti,omap2-dss", - "ti,omap3-dss", - "ti,omap4-dss", - "ti,omap5-dss", - "ti,dra7-dss", -}; - -static struct device_node * __init omapdss_find_dss_of_node(void) -{ - struct device_node *node; - int i; - - for (i = 0; i < ARRAY_SIZE(omapdss_compat_names); ++i) { - node = of_find_compatible_node(NULL, NULL, - omapdss_compat_names[i]); - if (node) - return node; - } - - return NULL; -} - -static int __init omapdss_init_of(void) -{ - int r; - struct device_node *node; - struct platform_device *pdev; - - /* only create dss helper devices if dss is enabled in the .dts */ - - node = omapdss_find_dss_of_node(); - if (!node) - return 0; - - if (!of_device_is_available(node)) - return 0; - - pdev = of_find_device_by_node(node); - - if (!pdev) { - pr_err("Unable to find DSS platform device\n"); - return -ENODEV; - } - - r = of_platform_populate(node, NULL, NULL, &pdev->dev); - if (r) { - pr_err("Unable to populate DSS submodule devices\n"); - return r; - } - - return omapdss_init_fbdev(); -} -omap_device_initcall(omapdss_init_of); diff --git a/drivers/gpu/drm/omapdrm/dss/dss.c b/drivers/gpu/drm/omapdrm/dss/dss.c index 1aaf260aa9b8..7553c7fc1c45 100644 --- a/drivers/gpu/drm/omapdrm/dss/dss.c +++ b/drivers/gpu/drm/omapdrm/dss/dss.c @@ -1484,16 +1484,23 @@ static int dss_probe(struct platform_device *pdev) dss); /* Add all the child devices as components. */ + r = of_platform_populate(pdev->dev.of_node, NULL, NULL, &pdev->dev); + if (r) + goto err_uninit_debugfs; + omapdss_gather_components(&pdev->dev); device_for_each_child(&pdev->dev, &match, dss_add_child_component); r = component_master_add_with_match(&pdev->dev, &dss_component_ops, match); if (r) - goto err_uninit_debugfs; + goto err_of_depopulate; return 0; +err_of_depopulate: + of_platform_depopulate(&pdev->dev); + err_uninit_debugfs: dss_debugfs_remove_file(dss->debugfs.clk); dss_debugfs_remove_file(dss->debugfs.dss); @@ -1522,6 +1529,8 @@ static int dss_remove(struct platform_device *pdev) { struct dss_device *dss = platform_get_drvdata(pdev); + of_platform_depopulate(&pdev->dev); + component_master_del(&pdev->dev, &dss_component_ops); dss_debugfs_remove_file(dss->debugfs.clk); -- cgit v1.2.3-59-g8ed1b From f8523b64d2d2f94bb429c15166d7601d39243c4d Mon Sep 17 00:00:00 2001 From: Laurent Pinchart Date: Sat, 10 Nov 2018 13:16:52 +0200 Subject: drm/omap: hdmi4: Ensure the device is active during bind The bind function performs hardware access (in hdmi4_cec_init()) and thus requires the device to be active. Ensure this by surrounding the bind function by hdmi_runtime_get() and hdmi_runtime_put() calls. Fixes: 27d624527d99 ("drm/omap: dss: Acquire next dssdev at probe time") Signed-off-by: Laurent Pinchart Reviewed-by: Sebastian Reichel Signed-off-by: Tomi Valkeinen Link: https://patchwork.freedesktop.org/patch/msgid/20181110111654.4387-3-laurent.pinchart@ideasonboard.com --- drivers/gpu/drm/omapdrm/dss/hdmi4.c | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/omapdrm/dss/hdmi4.c b/drivers/gpu/drm/omapdrm/dss/hdmi4.c index cf6230eac31a..073fa462930a 100644 --- a/drivers/gpu/drm/omapdrm/dss/hdmi4.c +++ b/drivers/gpu/drm/omapdrm/dss/hdmi4.c @@ -635,10 +635,14 @@ static int hdmi4_bind(struct device *dev, struct device *master, void *data) hdmi->dss = dss; - r = hdmi_pll_init(dss, hdmi->pdev, &hdmi->pll, &hdmi->wp); + r = hdmi_runtime_get(hdmi); if (r) return r; + r = hdmi_pll_init(dss, hdmi->pdev, &hdmi->pll, &hdmi->wp); + if (r) + goto err_runtime_put; + r = hdmi4_cec_init(hdmi->pdev, &hdmi->core, &hdmi->wp); if (r) goto err_pll_uninit; @@ -652,12 +656,16 @@ static int hdmi4_bind(struct device *dev, struct device *master, void *data) hdmi->debugfs = dss_debugfs_create_file(dss, "hdmi", hdmi_dump_regs, hdmi); + hdmi_runtime_put(hdmi); + return 0; err_cec_uninit: hdmi4_cec_uninit(&hdmi->core); err_pll_uninit: hdmi_pll_uninit(&hdmi->pll); +err_runtime_put: + hdmi_runtime_put(hdmi); return r; } -- cgit v1.2.3-59-g8ed1b From 350c03e880038bf60184500bab9025d3361c0b0e Mon Sep 17 00:00:00 2001 From: Laurent Pinchart Date: Sat, 10 Nov 2018 13:16:53 +0200 Subject: drm/omap: dsi: Ensure the device is active during probe The probe function performs hardware access to read the number of supported data lanes from a configuration register and thus requires the device to be active. Ensure this by surrounding the access with dsi_runtime_get() and dsi_runtime_put() calls. Fixes: edb715dffdee ("drm/omap: dss: dsi: Move initialization code from bind to probe") Signed-off-by: Laurent Pinchart Acked-by: Tony Lindgren Reviewed-by: Sebastian Reichel Signed-off-by: Tomi Valkeinen Link: https://patchwork.freedesktop.org/patch/msgid/20181110111654.4387-4-laurent.pinchart@ideasonboard.com --- drivers/gpu/drm/omapdrm/dss/dsi.c | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/omapdrm/dss/dsi.c b/drivers/gpu/drm/omapdrm/dss/dsi.c index 394c129cfb3b..b9d5ad7e67d8 100644 --- a/drivers/gpu/drm/omapdrm/dss/dsi.c +++ b/drivers/gpu/drm/omapdrm/dss/dsi.c @@ -5409,11 +5409,14 @@ static int dsi_probe(struct platform_device *pdev) /* DSI on OMAP3 doesn't have register DSI_GNQ, set number * of data to 3 by default */ - if (dsi->data->quirks & DSI_QUIRK_GNQ) + if (dsi->data->quirks & DSI_QUIRK_GNQ) { + dsi_runtime_get(dsi); /* NB_DATA_LANES */ dsi->num_lanes_supported = 1 + REG_GET(dsi, DSI_GNQ, 11, 9); - else + dsi_runtime_put(dsi); + } else { dsi->num_lanes_supported = 3; + } r = dsi_init_output(dsi); if (r) -- cgit v1.2.3-59-g8ed1b From 24ec84e854c68ceda59a26027114eb7f260f9411 Mon Sep 17 00:00:00 2001 From: Laurent Pinchart Date: Sat, 10 Nov 2018 13:16:54 +0200 Subject: drm/omap: Move DISPC runtime PM handling to omapdrm The internal encoders (DSI, HDMI4, HDMI5 and VENC) runtime PM handlers attempt to manage the runtime PM state of the connected DISPC, based on the rationale that the DISPC providing data to the encoders requires ensuring that the display is active whenever the encoders are active. While the DISPC provides data to the encoders, it doesn't as such constitute a resource that encoders require in order to be taken out of suspend, contrary to for instance a functional clock or a power supply. Encoders registers can be accessed without the DISPC being active, and while the encoders will not output any video stream without being fed by the DISPC, the DISPC PM state doesn't influence the encoders PM state. For this reason the DISPC PM state is better managed from the omapdrm driver, in the CRTC enable and disable operations. This allows the encoders PM state to be handled separately from the DISPC, and in particular at times when the DISPC may not be available (for instance at probe due to the DSS probe being deferred, or at remove time du to the DISPC being already removed). Fixes: edb715dffdee ("drm/omap: dss: dsi: Move initialization code from bind to probe") Signed-off-by: Laurent Pinchart Reviewed-by: Sebastian Reichel Signed-off-by: Tomi Valkeinen Link: https://patchwork.freedesktop.org/patch/msgid/20181110111654.4387-5-laurent.pinchart@ideasonboard.com --- drivers/gpu/drm/omapdrm/dss/dsi.c | 7 ------- drivers/gpu/drm/omapdrm/dss/hdmi4.c | 27 --------------------------- drivers/gpu/drm/omapdrm/dss/hdmi5.c | 27 --------------------------- drivers/gpu/drm/omapdrm/dss/venc.c | 7 ------- drivers/gpu/drm/omapdrm/omap_crtc.c | 6 ++++++ 5 files changed, 6 insertions(+), 68 deletions(-) diff --git a/drivers/gpu/drm/omapdrm/dss/dsi.c b/drivers/gpu/drm/omapdrm/dss/dsi.c index b9d5ad7e67d8..36123c086d97 100644 --- a/drivers/gpu/drm/omapdrm/dss/dsi.c +++ b/drivers/gpu/drm/omapdrm/dss/dsi.c @@ -5473,19 +5473,12 @@ static int dsi_runtime_suspend(struct device *dev) /* wait for current handler to finish before turning the DSI off */ synchronize_irq(dsi->irq); - dispc_runtime_put(dsi->dss->dispc); - return 0; } static int dsi_runtime_resume(struct device *dev) { struct dsi_data *dsi = dev_get_drvdata(dev); - int r; - - r = dispc_runtime_get(dsi->dss->dispc); - if (r) - return r; dsi->is_enabled = true; /* ensure the irq handler sees the is_enabled value */ diff --git a/drivers/gpu/drm/omapdrm/dss/hdmi4.c b/drivers/gpu/drm/omapdrm/dss/hdmi4.c index 073fa462930a..aabdda394c9c 100644 --- a/drivers/gpu/drm/omapdrm/dss/hdmi4.c +++ b/drivers/gpu/drm/omapdrm/dss/hdmi4.c @@ -841,32 +841,6 @@ static int hdmi4_remove(struct platform_device *pdev) return 0; } -static int hdmi_runtime_suspend(struct device *dev) -{ - struct omap_hdmi *hdmi = dev_get_drvdata(dev); - - dispc_runtime_put(hdmi->dss->dispc); - - return 0; -} - -static int hdmi_runtime_resume(struct device *dev) -{ - struct omap_hdmi *hdmi = dev_get_drvdata(dev); - int r; - - r = dispc_runtime_get(hdmi->dss->dispc); - if (r < 0) - return r; - - return 0; -} - -static const struct dev_pm_ops hdmi_pm_ops = { - .runtime_suspend = hdmi_runtime_suspend, - .runtime_resume = hdmi_runtime_resume, -}; - static const struct of_device_id hdmi_of_match[] = { { .compatible = "ti,omap4-hdmi", }, {}, @@ -877,7 +851,6 @@ struct platform_driver omapdss_hdmi4hw_driver = { .remove = hdmi4_remove, .driver = { .name = "omapdss_hdmi", - .pm = &hdmi_pm_ops, .of_match_table = hdmi_of_match, .suppress_bind_attrs = true, }, diff --git a/drivers/gpu/drm/omapdrm/dss/hdmi5.c b/drivers/gpu/drm/omapdrm/dss/hdmi5.c index b0e4a7463f8c..9e8556f67a29 100644 --- a/drivers/gpu/drm/omapdrm/dss/hdmi5.c +++ b/drivers/gpu/drm/omapdrm/dss/hdmi5.c @@ -825,32 +825,6 @@ static int hdmi5_remove(struct platform_device *pdev) return 0; } -static int hdmi_runtime_suspend(struct device *dev) -{ - struct omap_hdmi *hdmi = dev_get_drvdata(dev); - - dispc_runtime_put(hdmi->dss->dispc); - - return 0; -} - -static int hdmi_runtime_resume(struct device *dev) -{ - struct omap_hdmi *hdmi = dev_get_drvdata(dev); - int r; - - r = dispc_runtime_get(hdmi->dss->dispc); - if (r < 0) - return r; - - return 0; -} - -static const struct dev_pm_ops hdmi_pm_ops = { - .runtime_suspend = hdmi_runtime_suspend, - .runtime_resume = hdmi_runtime_resume, -}; - static const struct of_device_id hdmi_of_match[] = { { .compatible = "ti,omap5-hdmi", }, { .compatible = "ti,dra7-hdmi", }, @@ -862,7 +836,6 @@ struct platform_driver omapdss_hdmi5hw_driver = { .remove = hdmi5_remove, .driver = { .name = "omapdss_hdmi5", - .pm = &hdmi_pm_ops, .of_match_table = hdmi_of_match, .suppress_bind_attrs = true, }, diff --git a/drivers/gpu/drm/omapdrm/dss/venc.c b/drivers/gpu/drm/omapdrm/dss/venc.c index ff0b18c8e4ac..b5f52727f8b1 100644 --- a/drivers/gpu/drm/omapdrm/dss/venc.c +++ b/drivers/gpu/drm/omapdrm/dss/venc.c @@ -946,19 +946,12 @@ static int venc_runtime_suspend(struct device *dev) if (venc->tv_dac_clk) clk_disable_unprepare(venc->tv_dac_clk); - dispc_runtime_put(venc->dss->dispc); - return 0; } static int venc_runtime_resume(struct device *dev) { struct venc_device *venc = dev_get_drvdata(dev); - int r; - - r = dispc_runtime_get(venc->dss->dispc); - if (r < 0) - return r; if (venc->tv_dac_clk) clk_prepare_enable(venc->tv_dac_clk); diff --git a/drivers/gpu/drm/omapdrm/omap_crtc.c b/drivers/gpu/drm/omapdrm/omap_crtc.c index 62928ec0e7db..caffc547ef97 100644 --- a/drivers/gpu/drm/omapdrm/omap_crtc.c +++ b/drivers/gpu/drm/omapdrm/omap_crtc.c @@ -350,11 +350,14 @@ static void omap_crtc_arm_event(struct drm_crtc *crtc) static void omap_crtc_atomic_enable(struct drm_crtc *crtc, struct drm_crtc_state *old_state) { + struct omap_drm_private *priv = crtc->dev->dev_private; struct omap_crtc *omap_crtc = to_omap_crtc(crtc); int ret; DBG("%s", omap_crtc->name); + priv->dispc_ops->runtime_get(priv->dispc); + spin_lock_irq(&crtc->dev->event_lock); drm_crtc_vblank_on(crtc); ret = drm_crtc_vblank_get(crtc); @@ -367,6 +370,7 @@ static void omap_crtc_atomic_enable(struct drm_crtc *crtc, static void omap_crtc_atomic_disable(struct drm_crtc *crtc, struct drm_crtc_state *old_state) { + struct omap_drm_private *priv = crtc->dev->dev_private; struct omap_crtc *omap_crtc = to_omap_crtc(crtc); DBG("%s", omap_crtc->name); @@ -379,6 +383,8 @@ static void omap_crtc_atomic_disable(struct drm_crtc *crtc, spin_unlock_irq(&crtc->dev->event_lock); drm_crtc_vblank_off(crtc); + + priv->dispc_ops->runtime_put(priv->dispc); } static enum drm_mode_status omap_crtc_mode_valid(struct drm_crtc *crtc, -- cgit v1.2.3-59-g8ed1b From cbed7545db7ae5907d7dc9d4002717d46cae29e9 Mon Sep 17 00:00:00 2001 From: Tony Lindgren Date: Tue, 6 Nov 2018 07:28:02 -0800 Subject: drm/omap: dsi: Fix missing of_platform_depopulate() We're missing a call to of_platform_depopulate() on errors for dsi. Looks like dss is already doing this. Signed-off-by: Tony Lindgren Reviewed-by: Laurent Pinchart Reviewed-by: Sebastian Reichel Signed-off-by: Tomi Valkeinen Link: https://patchwork.freedesktop.org/patch/msgid/20181106152802.38599-1-tony@atomide.com --- drivers/gpu/drm/omapdrm/dss/dsi.c | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/omapdrm/dss/dsi.c b/drivers/gpu/drm/omapdrm/dss/dsi.c index 36123c086d97..0a485c5b982e 100644 --- a/drivers/gpu/drm/omapdrm/dss/dsi.c +++ b/drivers/gpu/drm/omapdrm/dss/dsi.c @@ -5429,15 +5429,19 @@ static int dsi_probe(struct platform_device *pdev) } r = of_platform_populate(dev->of_node, NULL, NULL, dev); - if (r) + if (r) { DSSERR("Failed to populate DSI child devices: %d\n", r); + goto err_uninit_output; + } r = component_add(&pdev->dev, &dsi_component_ops); if (r) - goto err_uninit_output; + goto err_of_depopulate; return 0; +err_of_depopulate: + of_platform_depopulate(dev); err_uninit_output: dsi_uninit_output(dsi); err_pm_disable: -- cgit v1.2.3-59-g8ed1b From 899a42f836678a595f7d2bc36a5a0c2b03d08cbc Mon Sep 17 00:00:00 2001 From: Russell King Date: Thu, 19 Jul 2018 11:42:36 +0100 Subject: ARM: make lookup_processor_type() non-__init Move lookup_processor_type() out of the __init section so it is callable from (eg) the secondary startup code during hotplug. Reviewed-by: Julien Thierry Signed-off-by: Russell King --- arch/arm/kernel/head-common.S | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/arch/arm/kernel/head-common.S b/arch/arm/kernel/head-common.S index 6e0375e7db05..997b02302c31 100644 --- a/arch/arm/kernel/head-common.S +++ b/arch/arm/kernel/head-common.S @@ -145,6 +145,9 @@ __mmap_switched_data: #endif .size __mmap_switched_data, . - __mmap_switched_data + __FINIT + .text + /* * This provides a C-API version of __lookup_processor_type */ @@ -156,9 +159,6 @@ ENTRY(lookup_processor_type) ldmfd sp!, {r4 - r6, r9, pc} ENDPROC(lookup_processor_type) - __FINIT - .text - /* * Read processor ID register (CP#15, CR0), and look up in the linker-built * supported processor list. Note that we can't use the absolute addresses -- cgit v1.2.3-59-g8ed1b From 65987a8553061515b5851b472081aedb9837a391 Mon Sep 17 00:00:00 2001 From: Russell King Date: Thu, 19 Jul 2018 11:59:56 +0100 Subject: ARM: split out processor lookup Split out the lookup of the processor type and associated error handling from the rest of setup_processor() - we will need to use this in the secondary CPU bringup path for big.Little Spectre variant 2 mitigation. Reviewed-by: Julien Thierry Signed-off-by: Russell King --- arch/arm/include/asm/cputype.h | 1 + arch/arm/kernel/setup.c | 31 +++++++++++++++++++------------ 2 files changed, 20 insertions(+), 12 deletions(-) diff --git a/arch/arm/include/asm/cputype.h b/arch/arm/include/asm/cputype.h index 26021980504d..f6df4bb4e543 100644 --- a/arch/arm/include/asm/cputype.h +++ b/arch/arm/include/asm/cputype.h @@ -107,6 +107,7 @@ #define ARM_CPU_PART_SCORPION 0x510002d0 extern unsigned int processor_id; +struct proc_info_list *lookup_processor(u32 midr); #ifdef CONFIG_CPU_CP15 #define read_cpuid(reg) \ diff --git a/arch/arm/kernel/setup.c b/arch/arm/kernel/setup.c index fc40a2b40595..05a4eb6b0d01 100644 --- a/arch/arm/kernel/setup.c +++ b/arch/arm/kernel/setup.c @@ -667,22 +667,29 @@ static void __init smp_build_mpidr_hash(void) } #endif -static void __init setup_processor(void) +/* + * locate processor in the list of supported processor types. The linker + * builds this table for us from the entries in arch/arm/mm/proc-*.S + */ +struct proc_info_list *lookup_processor(u32 midr) { - struct proc_info_list *list; + struct proc_info_list *list = lookup_processor_type(midr); - /* - * locate processor in the list of supported processor - * types. The linker builds this table for us from the - * entries in arch/arm/mm/proc-*.S - */ - list = lookup_processor_type(read_cpuid_id()); if (!list) { - pr_err("CPU configuration botched (ID %08x), unable to continue.\n", - read_cpuid_id()); - while (1); + pr_err("CPU%u: configuration botched (ID %08x), CPU halted\n", + smp_processor_id(), midr); + while (1) + /* can't use cpu_relax() here as it may require MMU setup */; } + return list; +} + +static void __init setup_processor(void) +{ + unsigned int midr = read_cpuid_id(); + struct proc_info_list *list = lookup_processor(midr); + cpu_name = list->cpu_name; __cpu_architecture = __get_cpu_architecture(); @@ -700,7 +707,7 @@ static void __init setup_processor(void) #endif pr_info("CPU: %s [%08x] revision %d (ARMv%s), cr=%08lx\n", - cpu_name, read_cpuid_id(), read_cpuid_id() & 15, + list->cpu_name, midr, midr & 15, proc_arch[cpu_architecture()], get_cr()); snprintf(init_utsname()->machine, __NEW_UTS_LEN + 1, "%s%c", -- cgit v1.2.3-59-g8ed1b From 945aceb1db8885d3a35790cf2e810f681db52756 Mon Sep 17 00:00:00 2001 From: Russell King Date: Thu, 19 Jul 2018 12:43:03 +0100 Subject: ARM: clean up per-processor check_bugs method call Call the per-processor type check_bugs() method in the same way as we do other per-processor functions - move the "processor." detail into proc-fns.h. Reviewed-by: Julien Thierry Signed-off-by: Russell King --- arch/arm/include/asm/proc-fns.h | 1 + arch/arm/kernel/bugs.c | 4 ++-- 2 files changed, 3 insertions(+), 2 deletions(-) diff --git a/arch/arm/include/asm/proc-fns.h b/arch/arm/include/asm/proc-fns.h index e25f4392e1b2..30c499146320 100644 --- a/arch/arm/include/asm/proc-fns.h +++ b/arch/arm/include/asm/proc-fns.h @@ -99,6 +99,7 @@ extern void cpu_do_suspend(void *); extern void cpu_do_resume(void *); #else #define cpu_proc_init processor._proc_init +#define cpu_check_bugs processor.check_bugs #define cpu_proc_fin processor._proc_fin #define cpu_reset processor.reset #define cpu_do_idle processor._do_idle diff --git a/arch/arm/kernel/bugs.c b/arch/arm/kernel/bugs.c index 7be511310191..d41d3598e5e5 100644 --- a/arch/arm/kernel/bugs.c +++ b/arch/arm/kernel/bugs.c @@ -6,8 +6,8 @@ void check_other_bugs(void) { #ifdef MULTI_CPU - if (processor.check_bugs) - processor.check_bugs(); + if (cpu_check_bugs) + cpu_check_bugs(); #endif } -- cgit v1.2.3-59-g8ed1b From e209950fdd065d2cc46e6338e47e52841b830cba Mon Sep 17 00:00:00 2001 From: Russell King Date: Thu, 19 Jul 2018 12:17:38 +0100 Subject: ARM: add PROC_VTABLE and PROC_TABLE macros Allow the way we access members of the processor vtable to be changed at compile time. We will need to move to per-CPU vtables to fix the Spectre variant 2 issues on big.Little systems. However, we have a couple of calls that do not need the vtable treatment, and indeed cause a kernel warning due to the (later) use of smp_processor_id(), so also introduce the PROC_TABLE macro for these which always use CPU 0's function pointers. Reviewed-by: Julien Thierry Signed-off-by: Russell King --- arch/arm/include/asm/proc-fns.h | 39 ++++++++++++++++++++++++++------------- arch/arm/kernel/setup.c | 4 +--- 2 files changed, 27 insertions(+), 16 deletions(-) diff --git a/arch/arm/include/asm/proc-fns.h b/arch/arm/include/asm/proc-fns.h index 30c499146320..c259cc49c641 100644 --- a/arch/arm/include/asm/proc-fns.h +++ b/arch/arm/include/asm/proc-fns.h @@ -23,7 +23,7 @@ struct mm_struct; /* * Don't change this structure - ASM code relies on it. */ -extern struct processor { +struct processor { /* MISC * get data abort address/flags */ @@ -79,9 +79,13 @@ extern struct processor { unsigned int suspend_size; void (*do_suspend)(void *); void (*do_resume)(void *); -} processor; +}; #ifndef MULTI_CPU +static inline void init_proc_vtable(const struct processor *p) +{ +} + extern void cpu_proc_init(void); extern void cpu_proc_fin(void); extern int cpu_do_idle(void); @@ -98,18 +102,27 @@ extern void cpu_reset(unsigned long addr, bool hvc) __attribute__((noreturn)); extern void cpu_do_suspend(void *); extern void cpu_do_resume(void *); #else -#define cpu_proc_init processor._proc_init -#define cpu_check_bugs processor.check_bugs -#define cpu_proc_fin processor._proc_fin -#define cpu_reset processor.reset -#define cpu_do_idle processor._do_idle -#define cpu_dcache_clean_area processor.dcache_clean_area -#define cpu_set_pte_ext processor.set_pte_ext -#define cpu_do_switch_mm processor.switch_mm -/* These three are private to arch/arm/kernel/suspend.c */ -#define cpu_do_suspend processor.do_suspend -#define cpu_do_resume processor.do_resume +extern struct processor processor; +#define PROC_VTABLE(f) processor.f +#define PROC_TABLE(f) processor.f +static inline void init_proc_vtable(const struct processor *p) +{ + processor = *p; +} + +#define cpu_proc_init PROC_VTABLE(_proc_init) +#define cpu_check_bugs PROC_VTABLE(check_bugs) +#define cpu_proc_fin PROC_VTABLE(_proc_fin) +#define cpu_reset PROC_VTABLE(reset) +#define cpu_do_idle PROC_VTABLE(_do_idle) +#define cpu_dcache_clean_area PROC_TABLE(dcache_clean_area) +#define cpu_set_pte_ext PROC_TABLE(set_pte_ext) +#define cpu_do_switch_mm PROC_VTABLE(switch_mm) + +/* These two are private to arch/arm/kernel/suspend.c */ +#define cpu_do_suspend PROC_VTABLE(do_suspend) +#define cpu_do_resume PROC_VTABLE(do_resume) #endif extern void cpu_resume(void); diff --git a/arch/arm/kernel/setup.c b/arch/arm/kernel/setup.c index 05a4eb6b0d01..c214bd14a1fe 100644 --- a/arch/arm/kernel/setup.c +++ b/arch/arm/kernel/setup.c @@ -693,9 +693,7 @@ static void __init setup_processor(void) cpu_name = list->cpu_name; __cpu_architecture = __get_cpu_architecture(); -#ifdef MULTI_CPU - processor = *list->proc; -#endif + init_proc_vtable(list->proc); #ifdef MULTI_TLB cpu_tlb = *list->tlb; #endif -- cgit v1.2.3-59-g8ed1b From 5df7a99bdd0de4a0480320264c44c04543c29d5a Mon Sep 17 00:00:00 2001 From: Julien Thierry Date: Thu, 8 Nov 2018 17:25:28 +0100 Subject: ARM: 8810/1: vfp: Fix wrong assignement to ufp_exc In vfp_preserve_user_clear_hwstate, ufp_exc->fpinst2 gets assigned to itself. It should actually be hwstate->fpinst2 that gets assigned to the ufp_exc field. Fixes commit 3aa2df6ec2ca6bc143a65351cca4266d03a8bc41 ("ARM: 8791/1: vfp: use __copy_to_user() when saving VFP state"). Reported-by: David Binderman Signed-off-by: Julien Thierry Signed-off-by: Russell King --- arch/arm/vfp/vfpmodule.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/arm/vfp/vfpmodule.c b/arch/arm/vfp/vfpmodule.c index 3b75f1d8a491..15bc3cf2a7fd 100644 --- a/arch/arm/vfp/vfpmodule.c +++ b/arch/arm/vfp/vfpmodule.c @@ -579,7 +579,7 @@ int vfp_preserve_user_clear_hwstate(struct user_vfp *ufp, */ ufp_exc->fpexc = hwstate->fpexc; ufp_exc->fpinst = hwstate->fpinst; - ufp_exc->fpinst2 = ufp_exc->fpinst2; + ufp_exc->fpinst2 = hwstate->fpinst2; /* Ensure that VFP is disabled. */ vfp_flush_hwstate(thread); -- cgit v1.2.3-59-g8ed1b From 383fb3ee8024d596f488d2dbaf45e572897acbdb Mon Sep 17 00:00:00 2001 From: Russell King Date: Thu, 19 Jul 2018 12:21:31 +0100 Subject: ARM: spectre-v2: per-CPU vtables to work around big.Little systems In big.Little systems, some CPUs require the Spectre workarounds in paths such as the context switch, but other CPUs do not. In order to handle these differences, we need per-CPU vtables. We are unable to use the kernel's per-CPU variables to support this as per-CPU is not initialised at times when we need access to the vtables, so we have to use an array indexed by logical CPU number. We use an array-of-pointers to avoid having function pointers in the kernel's read/write .data section. Reviewed-by: Julien Thierry Signed-off-by: Russell King --- arch/arm/include/asm/proc-fns.h | 23 +++++++++++++++++++++++ arch/arm/kernel/setup.c | 5 +++++ arch/arm/kernel/smp.c | 31 +++++++++++++++++++++++++++++++ arch/arm/mm/proc-v7-bugs.c | 17 ++--------------- 4 files changed, 61 insertions(+), 15 deletions(-) diff --git a/arch/arm/include/asm/proc-fns.h b/arch/arm/include/asm/proc-fns.h index c259cc49c641..e1b6f280ab08 100644 --- a/arch/arm/include/asm/proc-fns.h +++ b/arch/arm/include/asm/proc-fns.h @@ -104,12 +104,35 @@ extern void cpu_do_resume(void *); #else extern struct processor processor; +#if defined(CONFIG_BIG_LITTLE) && defined(CONFIG_HARDEN_BRANCH_PREDICTOR) +#include +/* + * This can't be a per-cpu variable because we need to access it before + * per-cpu has been initialised. We have a couple of functions that are + * called in a pre-emptible context, and so can't use smp_processor_id() + * there, hence PROC_TABLE(). We insist in init_proc_vtable() that the + * function pointers for these are identical across all CPUs. + */ +extern struct processor *cpu_vtable[]; +#define PROC_VTABLE(f) cpu_vtable[smp_processor_id()]->f +#define PROC_TABLE(f) cpu_vtable[0]->f +static inline void init_proc_vtable(const struct processor *p) +{ + unsigned int cpu = smp_processor_id(); + *cpu_vtable[cpu] = *p; + WARN_ON_ONCE(cpu_vtable[cpu]->dcache_clean_area != + cpu_vtable[0]->dcache_clean_area); + WARN_ON_ONCE(cpu_vtable[cpu]->set_pte_ext != + cpu_vtable[0]->set_pte_ext); +} +#else #define PROC_VTABLE(f) processor.f #define PROC_TABLE(f) processor.f static inline void init_proc_vtable(const struct processor *p) { processor = *p; } +#endif #define cpu_proc_init PROC_VTABLE(_proc_init) #define cpu_check_bugs PROC_VTABLE(check_bugs) diff --git a/arch/arm/kernel/setup.c b/arch/arm/kernel/setup.c index c214bd14a1fe..cd46a595422c 100644 --- a/arch/arm/kernel/setup.c +++ b/arch/arm/kernel/setup.c @@ -115,6 +115,11 @@ EXPORT_SYMBOL(elf_hwcap2); #ifdef MULTI_CPU struct processor processor __ro_after_init; +#if defined(CONFIG_BIG_LITTLE) && defined(CONFIG_HARDEN_BRANCH_PREDICTOR) +struct processor *cpu_vtable[NR_CPUS] = { + [0] = &processor, +}; +#endif #endif #ifdef MULTI_TLB struct cpu_tlb_fns cpu_tlb __ro_after_init; diff --git a/arch/arm/kernel/smp.c b/arch/arm/kernel/smp.c index 5ad0b67b9e33..82b879db32ee 100644 --- a/arch/arm/kernel/smp.c +++ b/arch/arm/kernel/smp.c @@ -42,6 +42,7 @@ #include #include #include +#include #include #include #include @@ -102,6 +103,30 @@ static unsigned long get_arch_pgd(pgd_t *pgd) #endif } +#if defined(CONFIG_BIG_LITTLE) && defined(CONFIG_HARDEN_BRANCH_PREDICTOR) +static int secondary_biglittle_prepare(unsigned int cpu) +{ + if (!cpu_vtable[cpu]) + cpu_vtable[cpu] = kzalloc(sizeof(*cpu_vtable[cpu]), GFP_KERNEL); + + return cpu_vtable[cpu] ? 0 : -ENOMEM; +} + +static void secondary_biglittle_init(void) +{ + init_proc_vtable(lookup_processor(read_cpuid_id())->proc); +} +#else +static int secondary_biglittle_prepare(unsigned int cpu) +{ + return 0; +} + +static void secondary_biglittle_init(void) +{ +} +#endif + int __cpu_up(unsigned int cpu, struct task_struct *idle) { int ret; @@ -109,6 +134,10 @@ int __cpu_up(unsigned int cpu, struct task_struct *idle) if (!smp_ops.smp_boot_secondary) return -ENOSYS; + ret = secondary_biglittle_prepare(cpu); + if (ret) + return ret; + /* * We need to tell the secondary core where to find * its stack and the page tables. @@ -360,6 +389,8 @@ asmlinkage void secondary_start_kernel(void) struct mm_struct *mm = &init_mm; unsigned int cpu; + secondary_biglittle_init(); + /* * The identity mapping is uncached (strongly ordered), so * switch away from it before attempting any exclusive accesses. diff --git a/arch/arm/mm/proc-v7-bugs.c b/arch/arm/mm/proc-v7-bugs.c index 5544b82a2e7a..9a07916af8dd 100644 --- a/arch/arm/mm/proc-v7-bugs.c +++ b/arch/arm/mm/proc-v7-bugs.c @@ -52,8 +52,6 @@ static void cpu_v7_spectre_init(void) case ARM_CPU_PART_CORTEX_A17: case ARM_CPU_PART_CORTEX_A73: case ARM_CPU_PART_CORTEX_A75: - if (processor.switch_mm != cpu_v7_bpiall_switch_mm) - goto bl_error; per_cpu(harden_branch_predictor_fn, cpu) = harden_branch_predictor_bpiall; spectre_v2_method = "BPIALL"; @@ -61,8 +59,6 @@ static void cpu_v7_spectre_init(void) case ARM_CPU_PART_CORTEX_A15: case ARM_CPU_PART_BRAHMA_B15: - if (processor.switch_mm != cpu_v7_iciallu_switch_mm) - goto bl_error; per_cpu(harden_branch_predictor_fn, cpu) = harden_branch_predictor_iciallu; spectre_v2_method = "ICIALLU"; @@ -88,11 +84,9 @@ static void cpu_v7_spectre_init(void) ARM_SMCCC_ARCH_WORKAROUND_1, &res); if ((int)res.a0 != 0) break; - if (processor.switch_mm != cpu_v7_hvc_switch_mm && cpu) - goto bl_error; per_cpu(harden_branch_predictor_fn, cpu) = call_hvc_arch_workaround_1; - processor.switch_mm = cpu_v7_hvc_switch_mm; + cpu_do_switch_mm = cpu_v7_hvc_switch_mm; spectre_v2_method = "hypervisor"; break; @@ -101,11 +95,9 @@ static void cpu_v7_spectre_init(void) ARM_SMCCC_ARCH_WORKAROUND_1, &res); if ((int)res.a0 != 0) break; - if (processor.switch_mm != cpu_v7_smc_switch_mm && cpu) - goto bl_error; per_cpu(harden_branch_predictor_fn, cpu) = call_smc_arch_workaround_1; - processor.switch_mm = cpu_v7_smc_switch_mm; + cpu_do_switch_mm = cpu_v7_smc_switch_mm; spectre_v2_method = "firmware"; break; @@ -119,11 +111,6 @@ static void cpu_v7_spectre_init(void) if (spectre_v2_method) pr_info("CPU%u: Spectre v2: using %s workaround\n", smp_processor_id(), spectre_v2_method); - return; - -bl_error: - pr_err("CPU%u: Spectre v2: incorrect context switching function, system vulnerable\n", - cpu); } #else static void cpu_v7_spectre_init(void) -- cgit v1.2.3-59-g8ed1b From fb5bbae9b1333d44023713946fdd28db0cd85751 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Mon, 5 Nov 2018 09:43:05 +0000 Subject: drm/i915/ringbuffer: Delay after EMIT_INVALIDATE for gen4/gen5 MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Exercising the gpu reloc path strenuously revealed an issue where the updated relocations (from MI_STORE_DWORD_IMM) were not being observed upon execution. After some experiments with adding pipecontrols (a lot of pipecontrols (32) as gen4/5 do not have a bit to wait on earlier pipe controls or even the current on), it was discovered that we merely needed to delay the EMIT_INVALIDATE by several flushes. It is important to note that it is the EMIT_INVALIDATE as opposed to the EMIT_FLUSH that needs the delay as opposed to what one might first expect -- that the delay is required for the TLB invalidation to take effect (one presumes to purge any CS buffers) as opposed to a delay after flushing to ensure the writes have landed before triggering invalidation. Testcase: igt/gem_tiled_fence_blits Signed-off-by: Chris Wilson Cc: stable@vger.kernel.org Reviewed-by: Ville Syrjälä Link: https://patchwork.freedesktop.org/patch/msgid/20181105094305.5767-1-chris@chris-wilson.co.uk (cherry picked from commit 55f99bf2a9c331838c981694bc872cd1ec4070b2) Signed-off-by: Joonas Lahtinen --- drivers/gpu/drm/i915/intel_ringbuffer.c | 38 +++++++++++++++++++++++++++++++-- 1 file changed, 36 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.c b/drivers/gpu/drm/i915/intel_ringbuffer.c index d0ef50bf930a..187bb0ceb4ac 100644 --- a/drivers/gpu/drm/i915/intel_ringbuffer.c +++ b/drivers/gpu/drm/i915/intel_ringbuffer.c @@ -91,6 +91,7 @@ static int gen4_render_ring_flush(struct i915_request *rq, u32 mode) { u32 cmd, *cs; + int i; /* * read/write caches: @@ -127,12 +128,45 @@ gen4_render_ring_flush(struct i915_request *rq, u32 mode) cmd |= MI_INVALIDATE_ISP; } - cs = intel_ring_begin(rq, 2); + i = 2; + if (mode & EMIT_INVALIDATE) + i += 20; + + cs = intel_ring_begin(rq, i); if (IS_ERR(cs)) return PTR_ERR(cs); *cs++ = cmd; - *cs++ = MI_NOOP; + + /* + * A random delay to let the CS invalidate take effect? Without this + * delay, the GPU relocation path fails as the CS does not see + * the updated contents. Just as important, if we apply the flushes + * to the EMIT_FLUSH branch (i.e. immediately after the relocation + * write and before the invalidate on the next batch), the relocations + * still fail. This implies that is a delay following invalidation + * that is required to reset the caches as opposed to a delay to + * ensure the memory is written. + */ + if (mode & EMIT_INVALIDATE) { + *cs++ = GFX_OP_PIPE_CONTROL(4) | PIPE_CONTROL_QW_WRITE; + *cs++ = i915_ggtt_offset(rq->engine->scratch) | + PIPE_CONTROL_GLOBAL_GTT; + *cs++ = 0; + *cs++ = 0; + + for (i = 0; i < 12; i++) + *cs++ = MI_FLUSH; + + *cs++ = GFX_OP_PIPE_CONTROL(4) | PIPE_CONTROL_QW_WRITE; + *cs++ = i915_ggtt_offset(rq->engine->scratch) | + PIPE_CONTROL_GLOBAL_GTT; + *cs++ = 0; + *cs++ = 0; + } + + *cs++ = cmd; + intel_ring_advance(rq, cs); return 0; -- cgit v1.2.3-59-g8ed1b From 7c4512300cfa5a4dcc8c1c52ae61e3fa4bd11a39 Mon Sep 17 00:00:00 2001 From: Lyude Paul Date: Tue, 6 Nov 2018 16:30:12 -0500 Subject: drm/i915: Fix possible race in intel_dp_add_mst_connector() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This hasn't caused any issues yet that I'm aware of, but as Ville Syrjälä pointed out - we need to make sure that intel_connector->mst_port is set before initializing MST connectors, since in theory we could potentially check intel_connector->mst_port in i915_hpd_poll_init_work() after registering the connector but before having written it's value. Signed-off-by: Lyude Paul Reviewed-by: Ville Syrjälä Cc: Rodrigo Vivi Cc: stable@vger.kernel.org Link: https://patchwork.freedesktop.org/patch/msgid/20181106213017.14563-2-lyude@redhat.com (cherry picked from commit 66a5ab1034be801630816d1fa6cfc30db1a2f0b0) Signed-off-by: Joonas Lahtinen --- drivers/gpu/drm/i915/intel_dp_mst.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/drivers/gpu/drm/i915/intel_dp_mst.c b/drivers/gpu/drm/i915/intel_dp_mst.c index 1b00f8ea145b..a911691dbd0f 100644 --- a/drivers/gpu/drm/i915/intel_dp_mst.c +++ b/drivers/gpu/drm/i915/intel_dp_mst.c @@ -452,6 +452,10 @@ static struct drm_connector *intel_dp_add_mst_connector(struct drm_dp_mst_topolo if (!intel_connector) return NULL; + intel_connector->get_hw_state = intel_dp_mst_get_hw_state; + intel_connector->mst_port = intel_dp; + intel_connector->port = port; + connector = &intel_connector->base; ret = drm_connector_init(dev, connector, &intel_dp_mst_connector_funcs, DRM_MODE_CONNECTOR_DisplayPort); @@ -462,10 +466,6 @@ static struct drm_connector *intel_dp_add_mst_connector(struct drm_dp_mst_topolo drm_connector_helper_add(connector, &intel_dp_mst_connector_helper_funcs); - intel_connector->get_hw_state = intel_dp_mst_get_hw_state; - intel_connector->mst_port = intel_dp; - intel_connector->port = port; - for_each_pipe(dev_priv, pipe) { struct drm_encoder *enc = &intel_dp->mst_encoders[pipe]->base.base; -- cgit v1.2.3-59-g8ed1b From 541ff7e96c13cd5d67f6021d233f8e1c3df49278 Mon Sep 17 00:00:00 2001 From: Lyude Paul Date: Tue, 6 Nov 2018 16:30:13 -0500 Subject: drm/i915: Fix NULL deref when re-enabling HPD IRQs on systems with MST MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Turns out that if you trigger an HPD storm on a system that has an MST topology connected to it, you'll end up causing the kernel to eventually hit a NULL deref: [ 332.339041] BUG: unable to handle kernel NULL pointer dereference at 00000000000000ec [ 332.340906] PGD 0 P4D 0 [ 332.342750] Oops: 0000 [#1] SMP PTI [ 332.344579] CPU: 2 PID: 25 Comm: kworker/2:0 Kdump: loaded Tainted: G O 4.18.0-rc3short-hpd-storm+ #2 [ 332.346453] Hardware name: LENOVO 20BWS1KY00/20BWS1KY00, BIOS JBET71WW (1.35 ) 09/14/2018 [ 332.348361] Workqueue: events intel_hpd_irq_storm_reenable_work [i915] [ 332.350301] RIP: 0010:intel_hpd_irq_storm_reenable_work.cold.3+0x2f/0x86 [i915] [ 332.352213] Code: 00 00 ba e8 00 00 00 48 c7 c6 c0 aa 5f a0 48 c7 c7 d0 73 62 a0 4c 89 c1 4c 89 04 24 e8 7f f5 af e0 4c 8b 04 24 44 89 f8 29 e8 <41> 39 80 ec 00 00 00 0f 85 43 13 fc ff 41 0f b6 86 b8 04 00 00 41 [ 332.354286] RSP: 0018:ffffc90000147e48 EFLAGS: 00010006 [ 332.356344] RAX: 0000000000000005 RBX: ffff8802c226c9d4 RCX: 0000000000000006 [ 332.358404] RDX: 0000000000000000 RSI: 0000000000000082 RDI: ffff88032dc95570 [ 332.360466] RBP: 0000000000000005 R08: 0000000000000000 R09: ffff88031b3dc840 [ 332.362528] R10: 0000000000000000 R11: 000000031a069602 R12: ffff8802c226ca20 [ 332.364575] R13: ffff8802c2268000 R14: ffff880310661000 R15: 000000000000000a [ 332.366615] FS: 0000000000000000(0000) GS:ffff88032dc80000(0000) knlGS:0000000000000000 [ 332.368658] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 [ 332.370690] CR2: 00000000000000ec CR3: 000000000200a003 CR4: 00000000003606e0 [ 332.372724] DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 [ 332.374773] DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400 [ 332.376798] Call Trace: [ 332.378809] process_one_work+0x1a1/0x350 [ 332.380806] worker_thread+0x30/0x380 [ 332.382777] ? wq_update_unbound_numa+0x10/0x10 [ 332.384772] kthread+0x112/0x130 [ 332.386740] ? kthread_create_worker_on_cpu+0x70/0x70 [ 332.388706] ret_from_fork+0x35/0x40 [ 332.390651] Modules linked in: i915(O) vfat fat joydev btusb btrtl btbcm btintel bluetooth ecdh_generic iTCO_wdt wmi_bmof i2c_algo_bit drm_kms_helper intel_rapl syscopyarea sysfillrect x86_pkg_temp_thermal sysimgblt coretemp fb_sys_fops crc32_pclmul drm psmouse pcspkr mei_me mei i2c_i801 lpc_ich mfd_core i2c_core tpm_tis tpm_tis_core thinkpad_acpi wmi tpm rfkill video crc32c_intel serio_raw ehci_pci xhci_pci ehci_hcd xhci_hcd [last unloaded: i915] [ 332.394963] CR2: 00000000000000ec This appears to be due to the fact that with an MST topology, not all intel_connector structs will have ->encoder set. So, fix this by skipping connectors without encoders in intel_hpd_irq_storm_reenable_work(). For those wondering, this bug was found on accident while simulating HPD storms using a Chamelium connected to a ThinkPad T450s (Broadwell). Changes since v1: - Check intel_connector->mst_port instead of intel_connector->encoder Signed-off-by: Lyude Paul Reviewed-by: Ville Syrjälä Cc: stable@vger.kernel.org Cc: Rodrigo Vivi Link: https://patchwork.freedesktop.org/patch/msgid/20181106213017.14563-3-lyude@redhat.com (cherry picked from commit fee61deecb1d850bf34f682a6a452e5ee51b7572) Signed-off-by: Joonas Lahtinen --- drivers/gpu/drm/i915/intel_hotplug.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/i915/intel_hotplug.c b/drivers/gpu/drm/i915/intel_hotplug.c index 648a13c6043c..8326900a311e 100644 --- a/drivers/gpu/drm/i915/intel_hotplug.c +++ b/drivers/gpu/drm/i915/intel_hotplug.c @@ -228,7 +228,9 @@ static void intel_hpd_irq_storm_reenable_work(struct work_struct *work) drm_for_each_connector_iter(connector, &conn_iter) { struct intel_connector *intel_connector = to_intel_connector(connector); - if (intel_connector->encoder->hpd_pin == pin) { + /* Don't check MST ports, they don't have pins */ + if (!intel_connector->mst_port && + intel_connector->encoder->hpd_pin == pin) { if (connector->polled != intel_connector->polled) DRM_DEBUG_DRIVER("Reenabling HPD on connector %s\n", connector->name); -- cgit v1.2.3-59-g8ed1b From c4f224076d00ccf30c7bd3561cceaed82628c8ce Mon Sep 17 00:00:00 2001 From: Imre Deak Date: Fri, 2 Nov 2018 20:22:00 +0200 Subject: drm/i915/icl: Fix power well 2 wrt. DC-off toggling order MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit To enable DC5/6 power well 2 has to be disabled as for previous platforms, so fix things up. Bspec: 4234 Fixes: 67ca07e7ac10 ("drm/i915/icl: Add power well support") Cc: Animesh Manna Cc: Paulo Zanoni Cc: Ville Syrjälä Signed-off-by: Imre Deak Reviewed-by: Ville Syrjälä Link: https://patchwork.freedesktop.org/patch/msgid/20181102182200.17219-1-imre.deak@intel.com (cherry picked from commit a33e1ece777996ddddb1f23a30f8c66422ed0b68) Signed-off-by: Joonas Lahtinen --- drivers/gpu/drm/i915/intel_runtime_pm.c | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/drivers/gpu/drm/i915/intel_runtime_pm.c b/drivers/gpu/drm/i915/intel_runtime_pm.c index 0fdabce647ab..0a4990d8843c 100644 --- a/drivers/gpu/drm/i915/intel_runtime_pm.c +++ b/drivers/gpu/drm/i915/intel_runtime_pm.c @@ -2748,6 +2748,12 @@ static const struct i915_power_well_desc icl_power_wells[] = { .hsw.has_fuses = true, }, }, + { + .name = "DC off", + .domains = ICL_DISPLAY_DC_OFF_POWER_DOMAINS, + .ops = &gen9_dc_off_power_well_ops, + .id = DISP_PW_ID_NONE, + }, { .name = "power well 2", .domains = ICL_PW_2_POWER_DOMAINS, @@ -2759,12 +2765,6 @@ static const struct i915_power_well_desc icl_power_wells[] = { .hsw.has_fuses = true, }, }, - { - .name = "DC off", - .domains = ICL_DISPLAY_DC_OFF_POWER_DOMAINS, - .ops = &gen9_dc_off_power_well_ops, - .id = DISP_PW_ID_NONE, - }, { .name = "power well 3", .domains = ICL_PW_3_POWER_DOMAINS, -- cgit v1.2.3-59-g8ed1b From 0a823e8fd4fd67726697854578f3584ee3a49b1d Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Thu, 8 Nov 2018 08:17:38 +0000 Subject: drm/i915/execlists: Force write serialisation into context image vs execution Ensure that the writes into the context image are completed prior to the register mmio to trigger execution. Although previously we were assured by the SDM that all writes are flushed before an uncached memory transaction (our mmio write to submit the context to HW for execution), we have empirical evidence to believe that this is not actually the case. Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=108656 References: https://bugs.freedesktop.org/show_bug.cgi?id=108315 References: https://bugs.freedesktop.org/show_bug.cgi?id=106887 Signed-off-by: Chris Wilson Cc: Mika Kuoppala Cc: Tvrtko Ursulin Acked-by: Mika Kuoppala Link: https://patchwork.freedesktop.org/patch/msgid/20181108081740.25615-1-chris@chris-wilson.co.uk Cc: stable@vger.kernel.org (cherry picked from commit 987abd5c62f92ee4970b45aa077f47949974e615) Signed-off-by: Joonas Lahtinen --- drivers/gpu/drm/i915/intel_lrc.c | 14 +++++++++++++- 1 file changed, 13 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c index 43957bb37a42..37c94a54efcb 100644 --- a/drivers/gpu/drm/i915/intel_lrc.c +++ b/drivers/gpu/drm/i915/intel_lrc.c @@ -424,7 +424,8 @@ static u64 execlists_update_context(struct i915_request *rq) reg_state[CTX_RING_TAIL+1] = intel_ring_set_tail(rq->ring, rq->tail); - /* True 32b PPGTT with dynamic page allocation: update PDP + /* + * True 32b PPGTT with dynamic page allocation: update PDP * registers and point the unallocated PDPs to scratch page. * PML4 is allocated during ppgtt init, so this is not needed * in 48-bit mode. @@ -432,6 +433,17 @@ static u64 execlists_update_context(struct i915_request *rq) if (ppgtt && !i915_vm_is_48bit(&ppgtt->vm)) execlists_update_context_pdps(ppgtt, reg_state); + /* + * Make sure the context image is complete before we submit it to HW. + * + * Ostensibly, writes (including the WCB) should be flushed prior to + * an uncached write such as our mmio register access, the empirical + * evidence (esp. on Braswell) suggests that the WC write into memory + * may not be visible to the HW prior to the completion of the UC + * register write and that we may begin execution from the context + * before its image is complete leading to invalid PD chasing. + */ + wmb(); return ce->lrc_desc; } -- cgit v1.2.3-59-g8ed1b From 44a7276b30c3c15f2b7790a5729640597fb6a1df Mon Sep 17 00:00:00 2001 From: Ville Syrjälä Date: Mon, 12 Nov 2018 15:36:16 +0200 Subject: drm/i915: Fix hpd handling for pins with two encoders MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit In my haste to remove irq_port[] I accidentally changed the way we deal with hpd pins that are shared by multiple encoders (DP and HDMI for pre-DDI platforms). Previously we would only handle such pins via ->hpd_pulse(), but now we queue up the hotplug work for the HDMI encoder directly. Worse yet, we now count each hpd twice and this increment the hpd storm count twice as fast. This can lead to spurious storms being detected. Go back to the old way of doing things, ie. delegate to ->hpd_pulse() for any pin which has an encoder with that hook implemented. I don't really like the idea of adding irq_port[] back so let's loop through the encoders first to check if we have an encoder with ->hpd_pulse() for the pin, and then go through all the pins and decided on the correct course of action based on the earlier findings. I have occasionally toyed with the idea of unifying the pre-DDI HDMI and DP encoders into a single encoder as well. Besides the hotplug processing it would have the other benefit of preventing userspace from trying to enable both encoders at the same time. That is simply illegal as they share the same clock/data pins. We have some testcases that will attempt that and thus fail on many older machines. But for now let's stick to fixing just the hotplug code. Cc: stable@vger.kernel.org # 4.19+ Cc: Lyude Paul Cc: Rodrigo Vivi Fixes: b6ca3eee18ba ("drm/i915: Nuke dev_priv->irq_port[]") Signed-off-by: Ville Syrjälä Link: https://patchwork.freedesktop.org/patch/msgid/20181108200424.28371-1-ville.syrjala@linux.intel.com Reviewed-by: Lyude Paul (cherry picked from commit 5a3aeca97af1b6b3498d59a7fd4e8bb95814c108) Signed-off-by: Joonas Lahtinen --- drivers/gpu/drm/i915/intel_hotplug.c | 66 +++++++++++++++++++++++++----------- 1 file changed, 47 insertions(+), 19 deletions(-) diff --git a/drivers/gpu/drm/i915/intel_hotplug.c b/drivers/gpu/drm/i915/intel_hotplug.c index 8326900a311e..9a8018130237 100644 --- a/drivers/gpu/drm/i915/intel_hotplug.c +++ b/drivers/gpu/drm/i915/intel_hotplug.c @@ -397,37 +397,54 @@ void intel_hpd_irq_handler(struct drm_i915_private *dev_priv, struct intel_encoder *encoder; bool storm_detected = false; bool queue_dig = false, queue_hp = false; + u32 long_hpd_pulse_mask = 0; + u32 short_hpd_pulse_mask = 0; + enum hpd_pin pin; if (!pin_mask) return; spin_lock(&dev_priv->irq_lock); + + /* + * Determine whether ->hpd_pulse() exists for each pin, and + * whether we have a short or a long pulse. This is needed + * as each pin may have up to two encoders (HDMI and DP) and + * only the one of them (DP) will have ->hpd_pulse(). + */ for_each_intel_encoder(&dev_priv->drm, encoder) { - enum hpd_pin pin = encoder->hpd_pin; bool has_hpd_pulse = intel_encoder_has_hpd_pulse(encoder); + enum port port = encoder->port; + bool long_hpd; + pin = encoder->hpd_pin; if (!(BIT(pin) & pin_mask)) continue; - if (has_hpd_pulse) { - bool long_hpd = long_mask & BIT(pin); - enum port port = encoder->port; + if (!has_hpd_pulse) + continue; - DRM_DEBUG_DRIVER("digital hpd port %c - %s\n", port_name(port), - long_hpd ? "long" : "short"); - /* - * For long HPD pulses we want to have the digital queue happen, - * but we still want HPD storm detection to function. - */ - queue_dig = true; - if (long_hpd) { - dev_priv->hotplug.long_port_mask |= (1 << port); - } else { - /* for short HPD just trigger the digital queue */ - dev_priv->hotplug.short_port_mask |= (1 << port); - continue; - } + long_hpd = long_mask & BIT(pin); + + DRM_DEBUG_DRIVER("digital hpd port %c - %s\n", port_name(port), + long_hpd ? "long" : "short"); + queue_dig = true; + + if (long_hpd) { + long_hpd_pulse_mask |= BIT(pin); + dev_priv->hotplug.long_port_mask |= BIT(port); + } else { + short_hpd_pulse_mask |= BIT(pin); + dev_priv->hotplug.short_port_mask |= BIT(port); } + } + + /* Now process each pin just once */ + for_each_hpd_pin(pin) { + bool long_hpd; + + if (!(BIT(pin) & pin_mask)) + continue; if (dev_priv->hotplug.stats[pin].state == HPD_DISABLED) { /* @@ -444,11 +461,22 @@ void intel_hpd_irq_handler(struct drm_i915_private *dev_priv, if (dev_priv->hotplug.stats[pin].state != HPD_ENABLED) continue; - if (!has_hpd_pulse) { + /* + * Delegate to ->hpd_pulse() if one of the encoders for this + * pin has it, otherwise let the hotplug_work deal with this + * pin directly. + */ + if (((short_hpd_pulse_mask | long_hpd_pulse_mask) & BIT(pin))) { + long_hpd = long_hpd_pulse_mask & BIT(pin); + } else { dev_priv->hotplug.event_bits |= BIT(pin); + long_hpd = true; queue_hp = true; } + if (!long_hpd) + continue; + if (intel_hpd_irq_storm_detect(dev_priv, pin)) { dev_priv->hotplug.event_bits &= ~BIT(pin); storm_detected = true; -- cgit v1.2.3-59-g8ed1b From 18e962ac0781bcb70d433de3b2a325ff792b4288 Mon Sep 17 00:00:00 2001 From: Omar Sandoval Date: Mon, 12 Nov 2018 00:08:46 -0800 Subject: kyber: fix wrong strlcpy() size in trace_kyber_latency() When copying to the latency type, we should be passing LATENCY_TYPE_LEN, not DOMAIN_LEN (this isn't a problem in practice because we only pass "total" or "I/O"). Fix it by changing all of the strlcpy() calls to use sizeof(). Fixes: 6c3b7af1c975 ("kyber: add tracepoints") Reported-by: Jordan Glover Tested-by: Jordan Glover Signed-off-by: Omar Sandoval Signed-off-by: Jens Axboe --- include/trace/events/kyber.h | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/include/trace/events/kyber.h b/include/trace/events/kyber.h index a9834c37ac40..c0e7d24ca256 100644 --- a/include/trace/events/kyber.h +++ b/include/trace/events/kyber.h @@ -31,8 +31,8 @@ TRACE_EVENT(kyber_latency, TP_fast_assign( __entry->dev = disk_devt(dev_to_disk(kobj_to_dev(q->kobj.parent))); - strlcpy(__entry->domain, domain, DOMAIN_LEN); - strlcpy(__entry->type, type, DOMAIN_LEN); + strlcpy(__entry->domain, domain, sizeof(__entry->domain)); + strlcpy(__entry->type, type, sizeof(__entry->type)); __entry->percentile = percentile; __entry->numerator = numerator; __entry->denominator = denominator; @@ -60,7 +60,7 @@ TRACE_EVENT(kyber_adjust, TP_fast_assign( __entry->dev = disk_devt(dev_to_disk(kobj_to_dev(q->kobj.parent))); - strlcpy(__entry->domain, domain, DOMAIN_LEN); + strlcpy(__entry->domain, domain, sizeof(__entry->domain)); __entry->depth = depth; ), @@ -82,7 +82,7 @@ TRACE_EVENT(kyber_throttled, TP_fast_assign( __entry->dev = disk_devt(dev_to_disk(kobj_to_dev(q->kobj.parent))); - strlcpy(__entry->domain, domain, DOMAIN_LEN); + strlcpy(__entry->domain, domain, sizeof(__entry->domain)); ), TP_printk("%d,%d %s", MAJOR(__entry->dev), MINOR(__entry->dev), -- cgit v1.2.3-59-g8ed1b From 77e461d14ed141253573eeeb4d34eccc51e38328 Mon Sep 17 00:00:00 2001 From: Sudarsana Reddy Kalluru Date: Sun, 11 Nov 2018 18:27:34 -0800 Subject: bnx2x: Assign unique DMAE channel number for FW DMAE transactions. Driver assigns DMAE channel 0 for FW as part of START_RAMROD command. FW uses this channel for DMAE operations (e.g., TIME_SYNC implementation). Driver also uses the same channel 0 for DMAE operations for some of the PFs (e.g., PF0 on Port0). This could lead to concurrent access to the DMAE channel by FW and driver which is not legal. Hence need to assign unique DMAE id for FW. Currently following DMAE channels are used by the clients, MFW - OCBB/OCSD functionality uses DMAE channel 14/15 Driver 0-3 and 8-11 (for PF dmae operations) 4 and 12 (for stats requests) Assigning unique dmae_id '13' to the FW. Changes from previous version: ------------------------------ v2: Incorporated the review comments. Signed-off-by: Sudarsana Reddy Kalluru Signed-off-by: Michal Kalderon Signed-off-by: David S. Miller --- drivers/net/ethernet/broadcom/bnx2x/bnx2x.h | 7 +++++++ drivers/net/ethernet/broadcom/bnx2x/bnx2x_sp.c | 1 + 2 files changed, 8 insertions(+) diff --git a/drivers/net/ethernet/broadcom/bnx2x/bnx2x.h b/drivers/net/ethernet/broadcom/bnx2x/bnx2x.h index be1506169076..0de487a8f0eb 100644 --- a/drivers/net/ethernet/broadcom/bnx2x/bnx2x.h +++ b/drivers/net/ethernet/broadcom/bnx2x/bnx2x.h @@ -2191,6 +2191,13 @@ void bnx2x_igu_clear_sb_gen(struct bnx2x *bp, u8 func, u8 idu_sb_id, #define PMF_DMAE_C(bp) (BP_PORT(bp) * MAX_DMAE_C_PER_PORT + \ E1HVN_MAX) +/* Following is the DMAE channel number allocation for the clients. + * MFW: OCBB/OCSD implementations use DMAE channels 14/15 respectively. + * Driver: 0-3 and 8-11 (for PF dmae operations) + * 4 and 12 (for stats requests) + */ +#define BNX2X_FW_DMAE_C 13 /* Channel for FW DMAE operations */ + /* PCIE link and speed */ #define PCICFG_LINK_WIDTH 0x1f00000 #define PCICFG_LINK_WIDTH_SHIFT 20 diff --git a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_sp.c b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_sp.c index 3f4d2c8da21a..a9eaaf3e73a4 100644 --- a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_sp.c +++ b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_sp.c @@ -6149,6 +6149,7 @@ static inline int bnx2x_func_send_start(struct bnx2x *bp, rdata->sd_vlan_tag = cpu_to_le16(start_params->sd_vlan_tag); rdata->path_id = BP_PATH(bp); rdata->network_cos_mode = start_params->network_cos_mode; + rdata->dmae_cmd_id = BNX2X_FW_DMAE_C; rdata->vxlan_dst_port = cpu_to_le16(start_params->vxlan_dst_port); rdata->geneve_dst_port = cpu_to_le16(start_params->geneve_dst_port); -- cgit v1.2.3-59-g8ed1b From ca474b73896bf6e0c1eb8787eb217b0f80221610 Mon Sep 17 00:00:00 2001 From: Hannes Reinecke Date: Mon, 12 Nov 2018 10:35:25 -0700 Subject: block: copy ioprio in __bio_clone_fast() and bounce We need to copy the io priority, too; otherwise the clone will run with a different priority than the original one. Fixes: 43b62ce3ff0a ("block: move bio io prio to a new field") Signed-off-by: Hannes Reinecke Signed-off-by: Jean Delvare Fixed up subject, and ordered stores. Signed-off-by: Jens Axboe --- block/bio.c | 1 + block/bounce.c | 1 + 2 files changed, 2 insertions(+) diff --git a/block/bio.c b/block/bio.c index a50d59236b19..4f4d9884443b 100644 --- a/block/bio.c +++ b/block/bio.c @@ -605,6 +605,7 @@ void __bio_clone_fast(struct bio *bio, struct bio *bio_src) if (bio_flagged(bio_src, BIO_THROTTLED)) bio_set_flag(bio, BIO_THROTTLED); bio->bi_opf = bio_src->bi_opf; + bio->bi_ioprio = bio_src->bi_ioprio; bio->bi_write_hint = bio_src->bi_write_hint; bio->bi_iter = bio_src->bi_iter; bio->bi_io_vec = bio_src->bi_io_vec; diff --git a/block/bounce.c b/block/bounce.c index 36869afc258c..559c55bda040 100644 --- a/block/bounce.c +++ b/block/bounce.c @@ -248,6 +248,7 @@ static struct bio *bounce_clone_bio(struct bio *bio_src, gfp_t gfp_mask, return NULL; bio->bi_disk = bio_src->bi_disk; bio->bi_opf = bio_src->bi_opf; + bio->bi_ioprio = bio_src->bi_ioprio; bio->bi_write_hint = bio_src->bi_write_hint; bio->bi_iter.bi_sector = bio_src->bi_iter.bi_sector; bio->bi_iter.bi_size = bio_src->bi_iter.bi_size; -- cgit v1.2.3-59-g8ed1b From 410b5c7b48368317af95f0113692561d01d8144e Mon Sep 17 00:00:00 2001 From: Diego Viola Date: Mon, 12 Nov 2018 17:22:52 -0200 Subject: libata: blacklist SAMSUNG MZ7TD256HAFV-000L9 SSD med_power_with_dipm still causes freezes after updating the firmware to the latest version (DXT04L5Q). Set model_rev to NULL and blacklist the device. Cc: stable@vger.kernel.org Signed-off-by: Diego Viola Reviewed-by: Hans de Goede Signed-off-by: Jens Axboe --- drivers/ata/libata-core.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/ata/libata-core.c b/drivers/ata/libata-core.c index 6e594644cb1d..a7f5202a4815 100644 --- a/drivers/ata/libata-core.c +++ b/drivers/ata/libata-core.c @@ -4553,7 +4553,7 @@ static const struct ata_blacklist_entry ata_device_blacklist [] = { /* These specific Samsung models/firmware-revs do not handle LPM well */ { "SAMSUNG MZMPC128HBFU-000MV", "CXM14M1Q", ATA_HORKAGE_NOLPM, }, { "SAMSUNG SSD PM830 mSATA *", "CXM13D1Q", ATA_HORKAGE_NOLPM, }, - { "SAMSUNG MZ7TD256HAFV-000L9", "DXT02L5Q", ATA_HORKAGE_NOLPM, }, + { "SAMSUNG MZ7TD256HAFV-000L9", NULL, ATA_HORKAGE_NOLPM, }, /* devices that don't properly handle queued TRIM commands */ { "Micron_M500IT_*", "MU01", ATA_HORKAGE_NO_NCQ_TRIM | -- cgit v1.2.3-59-g8ed1b From 5581c670fb7ec267fc79215c6d5176b07e5f6dad Mon Sep 17 00:00:00 2001 From: shaoyunl Date: Mon, 12 Nov 2018 11:19:24 -0500 Subject: drm/amdgpu: set system aperture to cover whole FB region MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit In XGMI configuration, the FB region covers vram region from peer device, adjust system aperture to cover all of them Reviewed-by: Alex Deucher Reviewed-by: Christian König Signed-off-by: shaoyunl Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/gfxhub_v1_0.c | 6 +++--- drivers/gpu/drm/amd/amdgpu/mmhub_v1_0.c | 6 +++--- 2 files changed, 6 insertions(+), 6 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/gfxhub_v1_0.c b/drivers/gpu/drm/amd/amdgpu/gfxhub_v1_0.c index ceb7847b504f..bfa317ad20a9 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfxhub_v1_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gfxhub_v1_0.c @@ -72,7 +72,7 @@ static void gfxhub_v1_0_init_system_aperture_regs(struct amdgpu_device *adev) /* Program the system aperture low logical page number. */ WREG32_SOC15(GC, 0, mmMC_VM_SYSTEM_APERTURE_LOW_ADDR, - min(adev->gmc.vram_start, adev->gmc.agp_start) >> 18); + min(adev->gmc.fb_start, adev->gmc.agp_start) >> 18); if (adev->asic_type == CHIP_RAVEN && adev->rev_id >= 0x8) /* @@ -82,11 +82,11 @@ static void gfxhub_v1_0_init_system_aperture_regs(struct amdgpu_device *adev) * to get rid of the VM fault and hardware hang. */ WREG32_SOC15(GC, 0, mmMC_VM_SYSTEM_APERTURE_HIGH_ADDR, - max((adev->gmc.vram_end >> 18) + 0x1, + max((adev->gmc.fb_end >> 18) + 0x1, adev->gmc.agp_end >> 18)); else WREG32_SOC15(GC, 0, mmMC_VM_SYSTEM_APERTURE_HIGH_ADDR, - max(adev->gmc.vram_end, adev->gmc.agp_end) >> 18); + max(adev->gmc.fb_end, adev->gmc.agp_end) >> 18); /* Set default page address. */ value = adev->vram_scratch.gpu_addr - adev->gmc.vram_start diff --git a/drivers/gpu/drm/amd/amdgpu/mmhub_v1_0.c b/drivers/gpu/drm/amd/amdgpu/mmhub_v1_0.c index fd23ba1226a5..a0db67adc34c 100644 --- a/drivers/gpu/drm/amd/amdgpu/mmhub_v1_0.c +++ b/drivers/gpu/drm/amd/amdgpu/mmhub_v1_0.c @@ -90,7 +90,7 @@ static void mmhub_v1_0_init_system_aperture_regs(struct amdgpu_device *adev) /* Program the system aperture low logical page number. */ WREG32_SOC15(MMHUB, 0, mmMC_VM_SYSTEM_APERTURE_LOW_ADDR, - min(adev->gmc.vram_start, adev->gmc.agp_start) >> 18); + min(adev->gmc.fb_start, adev->gmc.agp_start) >> 18); if (adev->asic_type == CHIP_RAVEN && adev->rev_id >= 0x8) /* @@ -100,11 +100,11 @@ static void mmhub_v1_0_init_system_aperture_regs(struct amdgpu_device *adev) * to get rid of the VM fault and hardware hang. */ WREG32_SOC15(MMHUB, 0, mmMC_VM_SYSTEM_APERTURE_HIGH_ADDR, - max((adev->gmc.vram_end >> 18) + 0x1, + max((adev->gmc.fb_end >> 18) + 0x1, adev->gmc.agp_end >> 18)); else WREG32_SOC15(MMHUB, 0, mmMC_VM_SYSTEM_APERTURE_HIGH_ADDR, - max(adev->gmc.vram_end, adev->gmc.agp_end) >> 18); + max(adev->gmc.fb_end, adev->gmc.agp_end) >> 18); /* Set default page address. */ value = adev->vram_scratch.gpu_addr - adev->gmc.vram_start + -- cgit v1.2.3-59-g8ed1b From 21a446cf186570168b7281b154b1993968598aca Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Mon, 5 Nov 2018 11:10:50 -0500 Subject: NFSv4: Don't exit the state manager without clearing NFS4CLNT_MANAGER_RUNNING If we exit the NFSv4 state manager due to a umount, then we can end up leaving the NFS4CLNT_MANAGER_RUNNING flag set. If another mount causes the nfs4_client to be rereferenced before it is destroyed, then we end up never being able to recover state. Fixes: 47c2199b6eb5 ("NFSv4.1: Ensure state manager thread dies on last ...") Signed-off-by: Trond Myklebust Cc: stable@vger.kernel.org # v4.15+ --- fs/nfs/nfs4state.c | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/fs/nfs/nfs4state.c b/fs/nfs/nfs4state.c index 62ae0fd345ad..98d1b6a6646a 100644 --- a/fs/nfs/nfs4state.c +++ b/fs/nfs/nfs4state.c @@ -2601,11 +2601,12 @@ static void nfs4_state_manager(struct nfs_client *clp) nfs4_clear_state_manager_bit(clp); /* Did we race with an attempt to give us more work? */ if (clp->cl_state == 0) - break; + return; if (test_and_set_bit(NFS4CLNT_MANAGER_RUNNING, &clp->cl_state) != 0) - break; + return; } while (refcount_read(&clp->cl_count) > 1); - return; + goto out_drain; + out_error: if (strlen(section)) section_sep = ": "; @@ -2613,6 +2614,7 @@ out_error: " with error %d\n", section_sep, section, clp->cl_hostname, -status); ssleep(1); +out_drain: nfs4_end_drain_session(clp); nfs4_clear_state_manager_bit(clp); } -- cgit v1.2.3-59-g8ed1b From a1aa09be21fa344d1f5585aab8164bfae55f57e3 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Mon, 5 Nov 2018 12:17:01 -0500 Subject: NFSv4: Ensure that the state manager exits the loop on SIGKILL Signed-off-by: Trond Myklebust --- fs/nfs/nfs4state.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/nfs/nfs4state.c b/fs/nfs/nfs4state.c index 98d1b6a6646a..ffea57885394 100644 --- a/fs/nfs/nfs4state.c +++ b/fs/nfs/nfs4state.c @@ -2604,7 +2604,7 @@ static void nfs4_state_manager(struct nfs_client *clp) return; if (test_and_set_bit(NFS4CLNT_MANAGER_RUNNING, &clp->cl_state) != 0) return; - } while (refcount_read(&clp->cl_count) > 1); + } while (refcount_read(&clp->cl_count) > 1 && !signalled()); goto out_drain; out_error: -- cgit v1.2.3-59-g8ed1b From a652a4bc21695a57c3b8d13d222a6f8b41f100aa Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Mon, 12 Nov 2018 15:30:52 -0500 Subject: SUNRPC: Fix a Oops when destroying the RPCSEC_GSS credential cache Commit 07d02a67b7fa causes a use-after free in the RPCSEC_GSS credential destroy code, because the call to get_rpccred() in gss_destroying_context() will now always fail to increment the refcount. While we could just replace the get_rpccred() with a refcount_set(), that would have the unfortunate consequence of resurrecting a credential in the credential cache for which we are in the process of destroying the RPCSEC_GSS context. Rather than do this, we choose to make a copy that is never added to the cache and use that to destroy the context. Fixes: 07d02a67b7fa ("SUNRPC: Simplify lookup code") Signed-off-by: Trond Myklebust --- net/sunrpc/auth_gss/auth_gss.c | 61 +++++++++++++++++++++++++++++------------- 1 file changed, 42 insertions(+), 19 deletions(-) diff --git a/net/sunrpc/auth_gss/auth_gss.c b/net/sunrpc/auth_gss/auth_gss.c index 30f970cdc7f6..5d3f252659f1 100644 --- a/net/sunrpc/auth_gss/auth_gss.c +++ b/net/sunrpc/auth_gss/auth_gss.c @@ -1239,36 +1239,59 @@ gss_create(const struct rpc_auth_create_args *args, struct rpc_clnt *clnt) return &gss_auth->rpc_auth; } +static struct gss_cred * +gss_dup_cred(struct gss_auth *gss_auth, struct gss_cred *gss_cred) +{ + struct gss_cred *new; + + /* Make a copy of the cred so that we can reference count it */ + new = kzalloc(sizeof(*gss_cred), GFP_NOIO); + if (new) { + struct auth_cred acred = { + .uid = gss_cred->gc_base.cr_uid, + }; + struct gss_cl_ctx *ctx = + rcu_dereference_protected(gss_cred->gc_ctx, 1); + + rpcauth_init_cred(&new->gc_base, &acred, + &gss_auth->rpc_auth, + &gss_nullops); + new->gc_base.cr_flags = 1UL << RPCAUTH_CRED_UPTODATE; + new->gc_service = gss_cred->gc_service; + new->gc_principal = gss_cred->gc_principal; + kref_get(&gss_auth->kref); + rcu_assign_pointer(new->gc_ctx, ctx); + gss_get_ctx(ctx); + } + return new; +} + /* - * gss_destroying_context will cause the RPCSEC_GSS to send a NULL RPC call + * gss_send_destroy_context will cause the RPCSEC_GSS to send a NULL RPC call * to the server with the GSS control procedure field set to * RPC_GSS_PROC_DESTROY. This should normally cause the server to release * all RPCSEC_GSS state associated with that context. */ -static int -gss_destroying_context(struct rpc_cred *cred) +static void +gss_send_destroy_context(struct rpc_cred *cred) { struct gss_cred *gss_cred = container_of(cred, struct gss_cred, gc_base); struct gss_auth *gss_auth = container_of(cred->cr_auth, struct gss_auth, rpc_auth); struct gss_cl_ctx *ctx = rcu_dereference_protected(gss_cred->gc_ctx, 1); + struct gss_cred *new; struct rpc_task *task; - if (test_bit(RPCAUTH_CRED_UPTODATE, &cred->cr_flags) == 0) - return 0; - - ctx->gc_proc = RPC_GSS_PROC_DESTROY; - cred->cr_ops = &gss_nullops; - - /* Take a reference to ensure the cred will be destroyed either - * by the RPC call or by the put_rpccred() below */ - get_rpccred(cred); + new = gss_dup_cred(gss_auth, gss_cred); + if (new) { + ctx->gc_proc = RPC_GSS_PROC_DESTROY; - task = rpc_call_null(gss_auth->client, cred, RPC_TASK_ASYNC|RPC_TASK_SOFT); - if (!IS_ERR(task)) - rpc_put_task(task); + task = rpc_call_null(gss_auth->client, &new->gc_base, + RPC_TASK_ASYNC|RPC_TASK_SOFT); + if (!IS_ERR(task)) + rpc_put_task(task); - put_rpccred(cred); - return 1; + put_rpccred(&new->gc_base); + } } /* gss_destroy_cred (and gss_free_ctx) are used to clean up after failure @@ -1330,8 +1353,8 @@ static void gss_destroy_cred(struct rpc_cred *cred) { - if (gss_destroying_context(cred)) - return; + if (test_and_clear_bit(RPCAUTH_CRED_UPTODATE, &cred->cr_flags) != 0) + gss_send_destroy_context(cred); gss_destroy_nullcred(cred); } -- cgit v1.2.3-59-g8ed1b From e3d5e573a54dabdc0f9f3cb039d799323372b251 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Mon, 12 Nov 2018 16:06:51 -0500 Subject: SUNRPC: Fix a bogus get/put in generic_key_to_expire() Signed-off-by: Trond Myklebust --- net/sunrpc/auth_generic.c | 8 +------- 1 file changed, 1 insertion(+), 7 deletions(-) diff --git a/net/sunrpc/auth_generic.c b/net/sunrpc/auth_generic.c index d8831b988b1e..ab4a3be1542a 100644 --- a/net/sunrpc/auth_generic.c +++ b/net/sunrpc/auth_generic.c @@ -281,13 +281,7 @@ static bool generic_key_to_expire(struct rpc_cred *cred) { struct auth_cred *acred = &container_of(cred, struct generic_cred, gc_base)->acred; - bool ret; - - get_rpccred(cred); - ret = test_bit(RPC_CRED_KEY_EXPIRE_SOON, &acred->ac_flags); - put_rpccred(cred); - - return ret; + return test_bit(RPC_CRED_KEY_EXPIRE_SOON, &acred->ac_flags); } static const struct rpc_credops generic_credops = { -- cgit v1.2.3-59-g8ed1b From 4ab49461d9d9b8274cc72d8656fe685ed394b8f7 Mon Sep 17 00:00:00 2001 From: Anup Patel Date: Thu, 1 Nov 2018 10:40:33 +0530 Subject: RISC-V: defconfig: Enable printk timestamps The printk timestamps are very useful information to visually see where kernel is spending time during boot. It also helps us see the timing of hotplug events at runtime. This patch enables printk timestamps in RISC-V defconfig so that we have it enabled by default (similar to other architectures such as x86_64, arm64, etc). Signed-off-by: Anup Patel Acked-by: Olof Johansson Signed-off-by: Palmer Dabbelt --- arch/riscv/configs/defconfig | 1 + 1 file changed, 1 insertion(+) diff --git a/arch/riscv/configs/defconfig b/arch/riscv/configs/defconfig index 07fa9ea75fea..ef4f15df9adf 100644 --- a/arch/riscv/configs/defconfig +++ b/arch/riscv/configs/defconfig @@ -76,4 +76,5 @@ CONFIG_NFS_V4_1=y CONFIG_NFS_V4_2=y CONFIG_ROOT_NFS=y CONFIG_CRYPTO_USER_API_HASH=y +CONFIG_PRINTK_TIME=y # CONFIG_RCU_TRACE is not set -- cgit v1.2.3-59-g8ed1b From 10febb3ecace4b557eaa0d52c9d2c3531c1a715a Mon Sep 17 00:00:00 2001 From: David Abdurachmanov Date: Mon, 5 Nov 2018 15:40:04 +0100 Subject: riscv: fix spacing in struct pt_regs Replace 8 spaces with tab to match styling. Signed-off-by: David Abdurachmanov Signed-off-by: Palmer Dabbelt --- arch/riscv/include/asm/ptrace.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/arch/riscv/include/asm/ptrace.h b/arch/riscv/include/asm/ptrace.h index 2c5df945d43c..bbe1862e8f80 100644 --- a/arch/riscv/include/asm/ptrace.h +++ b/arch/riscv/include/asm/ptrace.h @@ -56,8 +56,8 @@ struct pt_regs { unsigned long sstatus; unsigned long sbadaddr; unsigned long scause; - /* a0 value before the syscall */ - unsigned long orig_a0; + /* a0 value before the syscall */ + unsigned long orig_a0; }; #ifdef CONFIG_64BIT -- cgit v1.2.3-59-g8ed1b From f157d411a9eb170d2ee6b766da7a381962017cc9 Mon Sep 17 00:00:00 2001 From: David Abdurachmanov Date: Mon, 5 Nov 2018 15:35:37 +0100 Subject: riscv: add missing vdso_install target Building kernel 4.20 for Fedora as RPM fails, because riscv is missing vdso_install target in arch/riscv/Makefile. Signed-off-by: David Abdurachmanov Signed-off-by: Palmer Dabbelt --- arch/riscv/Makefile | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/arch/riscv/Makefile b/arch/riscv/Makefile index d10146197533..4af153a182b0 100644 --- a/arch/riscv/Makefile +++ b/arch/riscv/Makefile @@ -77,4 +77,8 @@ core-y += arch/riscv/kernel/ arch/riscv/mm/ libs-y += arch/riscv/lib/ +PHONY += vdso_install +vdso_install: + $(Q)$(MAKE) $(build)=arch/riscv/kernel/vdso $@ + all: vmlinux -- cgit v1.2.3-59-g8ed1b From 85d90b91807bb0c4a0fcff6a144e73f11cda782a Mon Sep 17 00:00:00 2001 From: Olof Johansson Date: Tue, 30 Oct 2018 23:47:07 -0700 Subject: RISC-V: lib: Fix build error for 64-bit Fixes the following build error from tinyconfig: riscv64-unknown-linux-gnu-ld: kernel/sched/fair.o: in function `.L8': fair.c:(.text+0x70): undefined reference to `__lshrti3' riscv64-unknown-linux-gnu-ld: kernel/time/clocksource.o: in function `.L0 ': clocksource.c:(.text+0x334): undefined reference to `__lshrti3' Fixes: 7f47c73b355f ("RISC-V: Build tishift only on 64-bit") Signed-off-by: Olof Johansson Signed-off-by: Palmer Dabbelt --- arch/riscv/lib/Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/riscv/lib/Makefile b/arch/riscv/lib/Makefile index 5739bd05d289..4e2e600f7d53 100644 --- a/arch/riscv/lib/Makefile +++ b/arch/riscv/lib/Makefile @@ -3,6 +3,6 @@ lib-y += memcpy.o lib-y += memset.o lib-y += uaccess.o -lib-(CONFIG_64BIT) += tishift.o +lib-$(CONFIG_64BIT) += tishift.o lib-$(CONFIG_32BIT) += udivdi3.o -- cgit v1.2.3-59-g8ed1b From ef3a61406618291c46da168ff91acaa28d85944c Mon Sep 17 00:00:00 2001 From: Olof Johansson Date: Tue, 30 Oct 2018 23:47:09 -0700 Subject: RISC-V: Silence some module warnings on 32-bit Fixes: arch/riscv/kernel/module.c: In function 'apply_r_riscv_32_rela': ./include/linux/kern_levels.h:5:18: warning: format '%llx' expects argument of type 'long long unsigned int', but argument 3 has type 'Elf32_Addr' {aka 'unsigned int'} [-Wformat=] arch/riscv/kernel/module.c:23:27: note: format string is defined here arch/riscv/kernel/module.c: In function 'apply_r_riscv_pcrel_hi20_rela': ./include/linux/kern_levels.h:5:18: warning: format '%llx' expects argument of type 'long long unsigned int', but argument 3 has type 'Elf32_Addr' {aka 'unsigned int'} [-Wformat=] arch/riscv/kernel/module.c:104:23: note: format string is defined here arch/riscv/kernel/module.c: In function 'apply_r_riscv_hi20_rela': ./include/linux/kern_levels.h:5:18: warning: format '%llx' expects argument of type 'long long unsigned int', but argument 3 has type 'Elf32_Addr' {aka 'unsigned int'} [-Wformat=] arch/riscv/kernel/module.c:146:23: note: format string is defined here arch/riscv/kernel/module.c: In function 'apply_r_riscv_got_hi20_rela': ./include/linux/kern_levels.h:5:18: warning: format '%llx' expects argument of type 'long long unsigned int', but argument 3 has type 'Elf32_Addr' {aka 'unsigned int'} [-Wformat=] arch/riscv/kernel/module.c:190:60: note: format string is defined here arch/riscv/kernel/module.c: In function 'apply_r_riscv_call_plt_rela': ./include/linux/kern_levels.h:5:18: warning: format '%llx' expects argument of type 'long long unsigned int', but argument 3 has type 'Elf32_Addr' {aka 'unsigned int'} [-Wformat=] arch/riscv/kernel/module.c:214:24: note: format string is defined here arch/riscv/kernel/module.c: In function 'apply_r_riscv_call_rela': ./include/linux/kern_levels.h:5:18: warning: format '%llx' expects argument of type 'long long unsigned int', but argument 3 has type 'Elf32_Addr' {aka 'unsigned int'} [-Wformat=] arch/riscv/kernel/module.c:236:23: note: format string is defined here Signed-off-by: Olof Johansson Signed-off-by: Palmer Dabbelt --- arch/riscv/kernel/module.c | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/arch/riscv/kernel/module.c b/arch/riscv/kernel/module.c index 3303ed2cd419..7dd308129b40 100644 --- a/arch/riscv/kernel/module.c +++ b/arch/riscv/kernel/module.c @@ -21,7 +21,7 @@ static int apply_r_riscv_32_rela(struct module *me, u32 *location, Elf_Addr v) { if (v != (u32)v) { pr_err("%s: value %016llx out of range for 32-bit field\n", - me->name, v); + me->name, (long long)v); return -EINVAL; } *location = v; @@ -102,7 +102,7 @@ static int apply_r_riscv_pcrel_hi20_rela(struct module *me, u32 *location, if (offset != (s32)offset) { pr_err( "%s: target %016llx can not be addressed by the 32-bit offset from PC = %p\n", - me->name, v, location); + me->name, (long long)v, location); return -EINVAL; } @@ -144,7 +144,7 @@ static int apply_r_riscv_hi20_rela(struct module *me, u32 *location, if (IS_ENABLED(CMODEL_MEDLOW)) { pr_err( "%s: target %016llx can not be addressed by the 32-bit offset from PC = %p\n", - me->name, v, location); + me->name, (long long)v, location); return -EINVAL; } @@ -188,7 +188,7 @@ static int apply_r_riscv_got_hi20_rela(struct module *me, u32 *location, } else { pr_err( "%s: can not generate the GOT entry for symbol = %016llx from PC = %p\n", - me->name, v, location); + me->name, (long long)v, location); return -EINVAL; } @@ -212,7 +212,7 @@ static int apply_r_riscv_call_plt_rela(struct module *me, u32 *location, } else { pr_err( "%s: target %016llx can not be addressed by the 32-bit offset from PC = %p\n", - me->name, v, location); + me->name, (long long)v, location); return -EINVAL; } } @@ -234,7 +234,7 @@ static int apply_r_riscv_call_rela(struct module *me, u32 *location, if (offset != fill_v) { pr_err( "%s: target %016llx can not be addressed by the 32-bit offset from PC = %p\n", - me->name, v, location); + me->name, (long long)v, location); return -EINVAL; } -- cgit v1.2.3-59-g8ed1b From adf59dd2408c4536d490bee649784f0465562444 Mon Sep 17 00:00:00 2001 From: Jorge Ramirez-Ortiz Date: Mon, 12 Nov 2018 19:41:09 +0100 Subject: drm/meson: venc: dmt mode must use encp The video mode for DMT is only populated to support encp. Signed-off-by: Jorge Ramirez-Ortiz Acked-by: Neil Armstrong Signed-off-by: Neil Armstrong Link: https://patchwork.freedesktop.org/patch/msgid/1542048069-22603-1-git-send-email-jramirez@baylibre.com --- drivers/gpu/drm/meson/meson_venc.c | 15 ++++++++------- 1 file changed, 8 insertions(+), 7 deletions(-) diff --git a/drivers/gpu/drm/meson/meson_venc.c b/drivers/gpu/drm/meson/meson_venc.c index 514245e69b38..acbbad3e322c 100644 --- a/drivers/gpu/drm/meson/meson_venc.c +++ b/drivers/gpu/drm/meson/meson_venc.c @@ -854,6 +854,13 @@ void meson_venc_hdmi_mode_set(struct meson_drm *priv, int vic, unsigned int sof_lines; unsigned int vsync_lines; + /* Use VENCI for 480i and 576i and double HDMI pixels */ + if (mode->flags & DRM_MODE_FLAG_DBLCLK) { + hdmi_repeat = true; + use_enci = true; + venc_hdmi_latency = 1; + } + if (meson_venc_hdmi_supported_vic(vic)) { vmode = meson_venc_hdmi_get_vic_vmode(vic); if (!vmode) { @@ -865,13 +872,7 @@ void meson_venc_hdmi_mode_set(struct meson_drm *priv, int vic, } else { meson_venc_hdmi_get_dmt_vmode(mode, &vmode_dmt); vmode = &vmode_dmt; - } - - /* Use VENCI for 480i and 576i and double HDMI pixels */ - if (mode->flags & DRM_MODE_FLAG_DBLCLK) { - hdmi_repeat = true; - use_enci = true; - venc_hdmi_latency = 1; + use_enci = false; } /* Repeat VENC pixels for 480/576i/p, 720p50/60 and 1080p50/60 */ -- cgit v1.2.3-59-g8ed1b From 0d76bcc960e6057750fcf556b65da13f8bbdfd2b Mon Sep 17 00:00:00 2001 From: Bjorn Helgaas Date: Tue, 13 Nov 2018 08:38:17 -0600 Subject: Revert "ACPI/PCI: Pay attention to device-specific _PXM node values" This reverts commit bad7dcd94f3956bcfc0a69ef71fdf0fcca3de4a8. bad7dcd94f39 ("ACPI/PCI: Pay attention to device-specific _PXM node values") caused boot failures (no console output at all) for Martin [1] and Ingo [2] on AMD ThreadRipper systems. Revert the commit until we figure out how to safely use these device-specific _PXM values. [1] https://lore.kernel.org/linux-pci/20180912152140.3676-2-Jonathan.Cameron@huawei.com [2] https://lore.kernel.org/linux-pci/20181113071712.GA2353@gmail.com Fixes: bad7dcd94f39 ("ACPI/PCI: Pay attention to device-specific _PXM node values") Signed-off-by: Bjorn Helgaas --- drivers/pci/pci-acpi.c | 5 ----- 1 file changed, 5 deletions(-) diff --git a/drivers/pci/pci-acpi.c b/drivers/pci/pci-acpi.c index 2a4aa6468579..921db6f80340 100644 --- a/drivers/pci/pci-acpi.c +++ b/drivers/pci/pci-acpi.c @@ -793,15 +793,10 @@ static void pci_acpi_setup(struct device *dev) { struct pci_dev *pci_dev = to_pci_dev(dev); struct acpi_device *adev = ACPI_COMPANION(dev); - int node; if (!adev) return; - node = acpi_get_node(adev->handle); - if (node != NUMA_NO_NODE) - set_dev_node(dev, node); - pci_acpi_optimize_delay(pci_dev, adev->handle); pci_acpi_add_pm_notifier(adev, pci_dev); -- cgit v1.2.3-59-g8ed1b From c837243ff4017f493c7d6f4ab57278d812a86859 Mon Sep 17 00:00:00 2001 From: Philip Yang Date: Mon, 12 Nov 2018 14:00:45 -0500 Subject: drm/amdgpu: fix bug with IH ring setup MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The bug limits the IH ring wptr address to 40bit. When the system memory is bigger than 1TB, the bus address is more than 40bit, this causes the interrupt cannot be handled and cleared correctly. Reviewed-by: Christian König Signed-off-by: Philip Yang Reviewed-by: Alex Deucher Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/vega10_ih.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/amd/amdgpu/vega10_ih.c b/drivers/gpu/drm/amd/amdgpu/vega10_ih.c index a99f71797aa3..a0fda6f9252a 100644 --- a/drivers/gpu/drm/amd/amdgpu/vega10_ih.c +++ b/drivers/gpu/drm/amd/amdgpu/vega10_ih.c @@ -129,7 +129,7 @@ static int vega10_ih_irq_init(struct amdgpu_device *adev) else wptr_off = adev->wb.gpu_addr + (adev->irq.ih.wptr_offs * 4); WREG32_SOC15(OSSSYS, 0, mmIH_RB_WPTR_ADDR_LO, lower_32_bits(wptr_off)); - WREG32_SOC15(OSSSYS, 0, mmIH_RB_WPTR_ADDR_HI, upper_32_bits(wptr_off) & 0xFF); + WREG32_SOC15(OSSSYS, 0, mmIH_RB_WPTR_ADDR_HI, upper_32_bits(wptr_off) & 0xFFFF); /* set rptr, wptr to 0 */ WREG32_SOC15(OSSSYS, 0, mmIH_RB_RPTR, 0); -- cgit v1.2.3-59-g8ed1b From 4d454e9ffdb1ef5a51ebc147b5389c96048db683 Mon Sep 17 00:00:00 2001 From: Rex Zhu Date: Tue, 13 Nov 2018 11:15:56 +0800 Subject: drm/amd/pp: Fix truncated clock value when set watermark the clk value should be tranferred to MHz first and then transfer to uint16. otherwise, the clock value will be truncated. Reviewed-by: Alex Deucher Reported-by: Hersen Wu Signed-off-by: Rex Zhu Signed-off-by: Alex Deucher Cc: stable@vger.kernel.org --- drivers/gpu/drm/amd/powerplay/hwmgr/smu_helper.c | 32 ++++++++++++------------ 1 file changed, 16 insertions(+), 16 deletions(-) diff --git a/drivers/gpu/drm/amd/powerplay/hwmgr/smu_helper.c b/drivers/gpu/drm/amd/powerplay/hwmgr/smu_helper.c index 99a33c33a32c..101c09b212ad 100644 --- a/drivers/gpu/drm/amd/powerplay/hwmgr/smu_helper.c +++ b/drivers/gpu/drm/amd/powerplay/hwmgr/smu_helper.c @@ -713,20 +713,20 @@ int smu_set_watermarks_for_clocks_ranges(void *wt_table, for (i = 0; i < wm_with_clock_ranges->num_wm_dmif_sets; i++) { table->WatermarkRow[1][i].MinClock = cpu_to_le16((uint16_t) - (wm_with_clock_ranges->wm_dmif_clocks_ranges[i].wm_min_dcfclk_clk_in_khz) / - 1000); + (wm_with_clock_ranges->wm_dmif_clocks_ranges[i].wm_min_dcfclk_clk_in_khz / + 1000)); table->WatermarkRow[1][i].MaxClock = cpu_to_le16((uint16_t) - (wm_with_clock_ranges->wm_dmif_clocks_ranges[i].wm_max_dcfclk_clk_in_khz) / - 1000); + (wm_with_clock_ranges->wm_dmif_clocks_ranges[i].wm_max_dcfclk_clk_in_khz / + 1000)); table->WatermarkRow[1][i].MinUclk = cpu_to_le16((uint16_t) - (wm_with_clock_ranges->wm_dmif_clocks_ranges[i].wm_min_mem_clk_in_khz) / - 1000); + (wm_with_clock_ranges->wm_dmif_clocks_ranges[i].wm_min_mem_clk_in_khz / + 1000)); table->WatermarkRow[1][i].MaxUclk = cpu_to_le16((uint16_t) - (wm_with_clock_ranges->wm_dmif_clocks_ranges[i].wm_max_mem_clk_in_khz) / - 1000); + (wm_with_clock_ranges->wm_dmif_clocks_ranges[i].wm_max_mem_clk_in_khz / + 1000)); table->WatermarkRow[1][i].WmSetting = (uint8_t) wm_with_clock_ranges->wm_dmif_clocks_ranges[i].wm_set_id; } @@ -734,20 +734,20 @@ int smu_set_watermarks_for_clocks_ranges(void *wt_table, for (i = 0; i < wm_with_clock_ranges->num_wm_mcif_sets; i++) { table->WatermarkRow[0][i].MinClock = cpu_to_le16((uint16_t) - (wm_with_clock_ranges->wm_mcif_clocks_ranges[i].wm_min_socclk_clk_in_khz) / - 1000); + (wm_with_clock_ranges->wm_mcif_clocks_ranges[i].wm_min_socclk_clk_in_khz / + 1000)); table->WatermarkRow[0][i].MaxClock = cpu_to_le16((uint16_t) - (wm_with_clock_ranges->wm_mcif_clocks_ranges[i].wm_max_socclk_clk_in_khz) / - 1000); + (wm_with_clock_ranges->wm_mcif_clocks_ranges[i].wm_max_socclk_clk_in_khz / + 1000)); table->WatermarkRow[0][i].MinUclk = cpu_to_le16((uint16_t) - (wm_with_clock_ranges->wm_mcif_clocks_ranges[i].wm_min_mem_clk_in_khz) / - 1000); + (wm_with_clock_ranges->wm_mcif_clocks_ranges[i].wm_min_mem_clk_in_khz / + 1000)); table->WatermarkRow[0][i].MaxUclk = cpu_to_le16((uint16_t) - (wm_with_clock_ranges->wm_mcif_clocks_ranges[i].wm_max_mem_clk_in_khz) / - 1000); + (wm_with_clock_ranges->wm_mcif_clocks_ranges[i].wm_max_mem_clk_in_khz / + 1000)); table->WatermarkRow[0][i].WmSetting = (uint8_t) wm_with_clock_ranges->wm_mcif_clocks_ranges[i].wm_set_id; } -- cgit v1.2.3-59-g8ed1b From c1a17777eb45d9f3821f35e9869c0a08cd2e664e Mon Sep 17 00:00:00 2001 From: Christian König Date: Mon, 12 Nov 2018 18:08:31 +0100 Subject: drm/amdgpu: fix huge page handling on Vega10 MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit We accidentially set the huge flag on the parent instead of the childs. This caused some VM faults under memory pressure. Signed-off-by: Christian König Acked-by: Alex Deucher Tested-by: Samuel Pitoiset Reviewed-by: Felix Kuehling Signed-off-by: Alex Deucher Cc: stable@vger.kernel.org --- drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c | 18 ++++++++++-------- 1 file changed, 10 insertions(+), 8 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c index 352b30409060..dad0e2342df9 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c @@ -1632,13 +1632,6 @@ static int amdgpu_vm_update_ptes(struct amdgpu_pte_update_params *params, continue; } - /* First check if the entry is already handled */ - if (cursor.pfn < frag_start) { - cursor.entry->huge = true; - amdgpu_vm_pt_next(adev, &cursor); - continue; - } - /* If it isn't already handled it can't be a huge page */ if (cursor.entry->huge) { /* Add the entry to the relocated list to update it. */ @@ -1701,8 +1694,17 @@ static int amdgpu_vm_update_ptes(struct amdgpu_pte_update_params *params, } } while (frag_start < entry_end); - if (frag >= shift) + if (amdgpu_vm_pt_descendant(adev, &cursor)) { + /* Mark all child entries as huge */ + while (cursor.pfn < frag_start) { + cursor.entry->huge = true; + amdgpu_vm_pt_next(adev, &cursor); + } + + } else if (frag >= shift) { + /* or just move on to the next on the same level. */ amdgpu_vm_pt_next(adev, &cursor); + } } return 0; -- cgit v1.2.3-59-g8ed1b From c138325fb8713472d5a0c3c7258b9131bab40725 Mon Sep 17 00:00:00 2001 From: Ondrej Mosnacek Date: Tue, 13 Nov 2018 16:16:08 +0100 Subject: selinux: check length properly in SCTP bind hook selinux_sctp_bind_connect() must verify if the address buffer has sufficient length before accessing the 'sa_family' field. See __sctp_connect() for a similar check. The length of the whole address ('len') is already checked in the callees. Reported-by: Qian Cai Fixes: d452930fd3b9 ("selinux: Add SCTP support") Cc: # 4.17+ Cc: Richard Haines Signed-off-by: Ondrej Mosnacek Tested-by: Qian Cai Signed-off-by: Paul Moore --- security/selinux/hooks.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/security/selinux/hooks.c b/security/selinux/hooks.c index 18b98b5e1e3c..fe251c6f09f1 100644 --- a/security/selinux/hooks.c +++ b/security/selinux/hooks.c @@ -5318,6 +5318,9 @@ static int selinux_sctp_bind_connect(struct sock *sk, int optname, addr_buf = address; while (walk_size < addrlen) { + if (walk_size + sizeof(sa_family_t) > addrlen) + return -EINVAL; + addr = addr_buf; switch (addr->sa_family) { case AF_UNSPEC: -- cgit v1.2.3-59-g8ed1b From 9aaa4e8ba12972d674caeefbc5f88d83235dd697 Mon Sep 17 00:00:00 2001 From: Denis Bolotin Date: Mon, 12 Nov 2018 12:50:20 +0200 Subject: qed: Fix PTT leak in qed_drain() Release PTT before entering error flow. Signed-off-by: Denis Bolotin Signed-off-by: Michal Kalderon Signed-off-by: David S. Miller --- drivers/net/ethernet/qlogic/qed/qed_main.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ethernet/qlogic/qed/qed_main.c b/drivers/net/ethernet/qlogic/qed/qed_main.c index 35fd0db6a677..fff7f04d4525 100644 --- a/drivers/net/ethernet/qlogic/qed/qed_main.c +++ b/drivers/net/ethernet/qlogic/qed/qed_main.c @@ -1782,9 +1782,9 @@ static int qed_drain(struct qed_dev *cdev) return -EBUSY; } rc = qed_mcp_drain(hwfn, ptt); + qed_ptt_release(hwfn, ptt); if (rc) return rc; - qed_ptt_release(hwfn, ptt); } return 0; -- cgit v1.2.3-59-g8ed1b From e90202ed1cf9672c48a363c84a929932ebfe6fc0 Mon Sep 17 00:00:00 2001 From: Denis Bolotin Date: Mon, 12 Nov 2018 12:50:21 +0200 Subject: qed: Fix overriding offload_tc by protocols without APP TLV The TC received from APP TLV is stored in offload_tc, and should not be set by protocols which did not receive an APP TLV. Fixed the condition when overriding the offload_tc. Signed-off-by: Denis Bolotin Signed-off-by: Michal Kalderon Signed-off-by: David S. Miller --- drivers/net/ethernet/qlogic/qed/qed_dcbx.c | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/drivers/net/ethernet/qlogic/qed/qed_dcbx.c b/drivers/net/ethernet/qlogic/qed/qed_dcbx.c index 8e8fa823d611..69966dfc6e3d 100644 --- a/drivers/net/ethernet/qlogic/qed/qed_dcbx.c +++ b/drivers/net/ethernet/qlogic/qed/qed_dcbx.c @@ -191,7 +191,7 @@ qed_dcbx_dp_protocol(struct qed_hwfn *p_hwfn, struct qed_dcbx_results *p_data) static void qed_dcbx_set_params(struct qed_dcbx_results *p_data, struct qed_hwfn *p_hwfn, struct qed_ptt *p_ptt, - bool enable, u8 prio, u8 tc, + bool app_tlv, bool enable, u8 prio, u8 tc, enum dcbx_protocol_type type, enum qed_pci_personality personality) { @@ -210,7 +210,7 @@ qed_dcbx_set_params(struct qed_dcbx_results *p_data, p_data->arr[type].dont_add_vlan0 = true; /* QM reconf data */ - if (p_hwfn->hw_info.personality == personality) + if (app_tlv && p_hwfn->hw_info.personality == personality) qed_hw_info_set_offload_tc(&p_hwfn->hw_info, tc); /* Configure dcbx vlan priority in doorbell block for roce EDPM */ @@ -225,7 +225,7 @@ qed_dcbx_set_params(struct qed_dcbx_results *p_data, static void qed_dcbx_update_app_info(struct qed_dcbx_results *p_data, struct qed_hwfn *p_hwfn, struct qed_ptt *p_ptt, - bool enable, u8 prio, u8 tc, + bool app_tlv, bool enable, u8 prio, u8 tc, enum dcbx_protocol_type type) { enum qed_pci_personality personality; @@ -240,7 +240,7 @@ qed_dcbx_update_app_info(struct qed_dcbx_results *p_data, personality = qed_dcbx_app_update[i].personality; - qed_dcbx_set_params(p_data, p_hwfn, p_ptt, enable, + qed_dcbx_set_params(p_data, p_hwfn, p_ptt, app_tlv, enable, prio, tc, type, personality); } } @@ -319,8 +319,8 @@ qed_dcbx_process_tlv(struct qed_hwfn *p_hwfn, struct qed_ptt *p_ptt, enable = true; } - qed_dcbx_update_app_info(p_data, p_hwfn, p_ptt, enable, - priority, tc, type); + qed_dcbx_update_app_info(p_data, p_hwfn, p_ptt, true, + enable, priority, tc, type); } } @@ -341,7 +341,7 @@ qed_dcbx_process_tlv(struct qed_hwfn *p_hwfn, struct qed_ptt *p_ptt, continue; enable = (type == DCBX_PROTOCOL_ETH) ? false : !!dcbx_version; - qed_dcbx_update_app_info(p_data, p_hwfn, p_ptt, enable, + qed_dcbx_update_app_info(p_data, p_hwfn, p_ptt, false, enable, priority, tc, type); } -- cgit v1.2.3-59-g8ed1b From 291d57f67d2449737d1e370ab5b9a583818eaa0c Mon Sep 17 00:00:00 2001 From: Michal Kalderon Date: Mon, 12 Nov 2018 12:50:22 +0200 Subject: qed: Fix rdma_info structure allocation Certain flows need to access the rdma-info structure, for example dcbx update flows. In some cases there can be a race between the allocation or deallocation of the structure which was done in roce start / roce stop and an asynchrounous dcbx event that tries to access the structure. For this reason, we move the allocation of the rdma_info structure to be similar to the iscsi/fcoe info structures which are allocated during device setup. We add a new field of "active" to the struct to define whether roce has already been started or not, and this is checked instead of whether the pointer to the info structure. Fixes: 51ff17251c9c ("qed: Add support for RoCE hw init") Signed-off-by: Michal Kalderon Signed-off-by: Denis Bolotin Signed-off-by: David S. Miller --- drivers/net/ethernet/qlogic/qed/qed_dev.c | 15 ++++++--- drivers/net/ethernet/qlogic/qed/qed_rdma.c | 50 +++++++++++++++++------------- drivers/net/ethernet/qlogic/qed/qed_rdma.h | 5 +++ 3 files changed, 45 insertions(+), 25 deletions(-) diff --git a/drivers/net/ethernet/qlogic/qed/qed_dev.c b/drivers/net/ethernet/qlogic/qed/qed_dev.c index 7ceb2b97538d..cff141077558 100644 --- a/drivers/net/ethernet/qlogic/qed/qed_dev.c +++ b/drivers/net/ethernet/qlogic/qed/qed_dev.c @@ -185,6 +185,10 @@ void qed_resc_free(struct qed_dev *cdev) qed_iscsi_free(p_hwfn); qed_ooo_free(p_hwfn); } + + if (QED_IS_RDMA_PERSONALITY(p_hwfn)) + qed_rdma_info_free(p_hwfn); + qed_iov_free(p_hwfn); qed_l2_free(p_hwfn); qed_dmae_info_free(p_hwfn); @@ -1081,6 +1085,12 @@ int qed_resc_alloc(struct qed_dev *cdev) goto alloc_err; } + if (QED_IS_RDMA_PERSONALITY(p_hwfn)) { + rc = qed_rdma_info_alloc(p_hwfn); + if (rc) + goto alloc_err; + } + /* DMA info initialization */ rc = qed_dmae_info_alloc(p_hwfn); if (rc) @@ -2102,11 +2112,8 @@ int qed_hw_start_fastpath(struct qed_hwfn *p_hwfn) if (!p_ptt) return -EAGAIN; - /* If roce info is allocated it means roce is initialized and should - * be enabled in searcher. - */ if (p_hwfn->p_rdma_info && - p_hwfn->b_rdma_enabled_in_prs) + p_hwfn->p_rdma_info->active && p_hwfn->b_rdma_enabled_in_prs) qed_wr(p_hwfn, p_ptt, p_hwfn->rdma_prs_search_reg, 0x1); /* Re-open incoming traffic */ diff --git a/drivers/net/ethernet/qlogic/qed/qed_rdma.c b/drivers/net/ethernet/qlogic/qed/qed_rdma.c index 62113438c880..7873d6dfd91f 100644 --- a/drivers/net/ethernet/qlogic/qed/qed_rdma.c +++ b/drivers/net/ethernet/qlogic/qed/qed_rdma.c @@ -140,22 +140,34 @@ static u32 qed_rdma_get_sb_id(void *p_hwfn, u32 rel_sb_id) return FEAT_NUM((struct qed_hwfn *)p_hwfn, QED_PF_L2_QUE) + rel_sb_id; } -static int qed_rdma_alloc(struct qed_hwfn *p_hwfn, - struct qed_ptt *p_ptt, - struct qed_rdma_start_in_params *params) +int qed_rdma_info_alloc(struct qed_hwfn *p_hwfn) { struct qed_rdma_info *p_rdma_info; - u32 num_cons, num_tasks; - int rc = -ENOMEM; - DP_VERBOSE(p_hwfn, QED_MSG_RDMA, "Allocating RDMA\n"); - - /* Allocate a struct with current pf rdma info */ p_rdma_info = kzalloc(sizeof(*p_rdma_info), GFP_KERNEL); if (!p_rdma_info) - return rc; + return -ENOMEM; + + spin_lock_init(&p_rdma_info->lock); p_hwfn->p_rdma_info = p_rdma_info; + return 0; +} + +void qed_rdma_info_free(struct qed_hwfn *p_hwfn) +{ + kfree(p_hwfn->p_rdma_info); + p_hwfn->p_rdma_info = NULL; +} + +static int qed_rdma_alloc(struct qed_hwfn *p_hwfn) +{ + struct qed_rdma_info *p_rdma_info = p_hwfn->p_rdma_info; + u32 num_cons, num_tasks; + int rc = -ENOMEM; + + DP_VERBOSE(p_hwfn, QED_MSG_RDMA, "Allocating RDMA\n"); + if (QED_IS_IWARP_PERSONALITY(p_hwfn)) p_rdma_info->proto = PROTOCOLID_IWARP; else @@ -183,7 +195,7 @@ static int qed_rdma_alloc(struct qed_hwfn *p_hwfn, /* Allocate a struct with device params and fill it */ p_rdma_info->dev = kzalloc(sizeof(*p_rdma_info->dev), GFP_KERNEL); if (!p_rdma_info->dev) - goto free_rdma_info; + return rc; /* Allocate a struct with port params and fill it */ p_rdma_info->port = kzalloc(sizeof(*p_rdma_info->port), GFP_KERNEL); @@ -298,8 +310,6 @@ free_rdma_port: kfree(p_rdma_info->port); free_rdma_dev: kfree(p_rdma_info->dev); -free_rdma_info: - kfree(p_rdma_info); return rc; } @@ -370,8 +380,6 @@ static void qed_rdma_resc_free(struct qed_hwfn *p_hwfn) kfree(p_rdma_info->port); kfree(p_rdma_info->dev); - - kfree(p_rdma_info); } static void qed_rdma_free_tid(void *rdma_cxt, u32 itid) @@ -679,8 +687,6 @@ static int qed_rdma_setup(struct qed_hwfn *p_hwfn, DP_VERBOSE(p_hwfn, QED_MSG_RDMA, "RDMA setup\n"); - spin_lock_init(&p_hwfn->p_rdma_info->lock); - qed_rdma_init_devinfo(p_hwfn, params); qed_rdma_init_port(p_hwfn); qed_rdma_init_events(p_hwfn, params); @@ -727,7 +733,7 @@ static int qed_rdma_stop(void *rdma_cxt) /* Disable RoCE search */ qed_wr(p_hwfn, p_ptt, p_hwfn->rdma_prs_search_reg, 0); p_hwfn->b_rdma_enabled_in_prs = false; - + p_hwfn->p_rdma_info->active = 0; qed_wr(p_hwfn, p_ptt, PRS_REG_ROCE_DEST_QP_MAX_PF, 0); ll2_ethertype_en = qed_rd(p_hwfn, p_ptt, PRS_REG_LIGHT_L2_ETHERTYPE_EN); @@ -1236,7 +1242,8 @@ qed_rdma_create_qp(void *rdma_cxt, u8 max_stats_queues; int rc; - if (!rdma_cxt || !in_params || !out_params || !p_hwfn->p_rdma_info) { + if (!rdma_cxt || !in_params || !out_params || + !p_hwfn->p_rdma_info->active) { DP_ERR(p_hwfn->cdev, "qed roce create qp failed due to NULL entry (rdma_cxt=%p, in=%p, out=%p, roce_info=?\n", rdma_cxt, in_params, out_params); @@ -1802,8 +1809,8 @@ bool qed_rdma_allocated_qps(struct qed_hwfn *p_hwfn) { bool result; - /* if rdma info has not been allocated, naturally there are no qps */ - if (!p_hwfn->p_rdma_info) + /* if rdma wasn't activated yet, naturally there are no qps */ + if (!p_hwfn->p_rdma_info->active) return false; spin_lock_bh(&p_hwfn->p_rdma_info->lock); @@ -1849,7 +1856,7 @@ static int qed_rdma_start(void *rdma_cxt, if (!p_ptt) goto err; - rc = qed_rdma_alloc(p_hwfn, p_ptt, params); + rc = qed_rdma_alloc(p_hwfn); if (rc) goto err1; @@ -1858,6 +1865,7 @@ static int qed_rdma_start(void *rdma_cxt, goto err2; qed_ptt_release(p_hwfn, p_ptt); + p_hwfn->p_rdma_info->active = 1; return rc; diff --git a/drivers/net/ethernet/qlogic/qed/qed_rdma.h b/drivers/net/ethernet/qlogic/qed/qed_rdma.h index 6f722ee8ee94..50d609c0e108 100644 --- a/drivers/net/ethernet/qlogic/qed/qed_rdma.h +++ b/drivers/net/ethernet/qlogic/qed/qed_rdma.h @@ -102,6 +102,7 @@ struct qed_rdma_info { u16 max_queue_zones; enum protocol_type proto; struct qed_iwarp_info iwarp; + u8 active:1; }; struct qed_rdma_qp { @@ -176,10 +177,14 @@ struct qed_rdma_qp { #if IS_ENABLED(CONFIG_QED_RDMA) void qed_rdma_dpm_bar(struct qed_hwfn *p_hwfn, struct qed_ptt *p_ptt); void qed_rdma_dpm_conf(struct qed_hwfn *p_hwfn, struct qed_ptt *p_ptt); +int qed_rdma_info_alloc(struct qed_hwfn *p_hwfn); +void qed_rdma_info_free(struct qed_hwfn *p_hwfn); #else static inline void qed_rdma_dpm_conf(struct qed_hwfn *p_hwfn, struct qed_ptt *p_ptt) {} static inline void qed_rdma_dpm_bar(struct qed_hwfn *p_hwfn, struct qed_ptt *p_ptt) {} +static inline int qed_rdma_info_alloc(struct qed_hwfn *p_hwfn) {return -EINVAL} +static inline void qed_rdma_info_free(struct qed_hwfn *p_hwfn) {} #endif int -- cgit v1.2.3-59-g8ed1b From ed4eac20dcffdad47709422e0cb925981b056668 Mon Sep 17 00:00:00 2001 From: Denis Bolotin Date: Mon, 12 Nov 2018 12:50:23 +0200 Subject: qed: Fix reading wrong value in loop condition The value of "sb_index" is written by the hardware. Reading its value and writing it to "index" must finish before checking the loop condition. Signed-off-by: Denis Bolotin Signed-off-by: Michal Kalderon Signed-off-by: David S. Miller --- drivers/net/ethernet/qlogic/qed/qed_int.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/net/ethernet/qlogic/qed/qed_int.c b/drivers/net/ethernet/qlogic/qed/qed_int.c index 0f0aba793352..b22f464ea3fa 100644 --- a/drivers/net/ethernet/qlogic/qed/qed_int.c +++ b/drivers/net/ethernet/qlogic/qed/qed_int.c @@ -992,6 +992,8 @@ static int qed_int_attentions(struct qed_hwfn *p_hwfn) */ do { index = p_sb_attn->sb_index; + /* finish reading index before the loop condition */ + dma_rmb(); attn_bits = le32_to_cpu(p_sb_attn->atten_bits); attn_acks = le32_to_cpu(p_sb_attn->atten_ack); } while (index != p_sb_attn->sb_index); -- cgit v1.2.3-59-g8ed1b From dded2e159208a9edc21dd5c5f583afa28d378d39 Mon Sep 17 00:00:00 2001 From: Christophe Leroy Date: Thu, 27 Sep 2018 17:17:49 +0000 Subject: kdb: use correct pointer when 'btc' calls 'btt' On a powerpc 8xx, 'btc' fails as follows: Entering kdb (current=0x(ptrval), pid 282) due to Keyboard Entry kdb> btc btc: cpu status: Currently on cpu 0 Available cpus: 0 kdb_getarea: Bad address 0x0 when booting the kernel with 'debug_boot_weak_hash', it fails as well Entering kdb (current=0xba99ad80, pid 284) due to Keyboard Entry kdb> btc btc: cpu status: Currently on cpu 0 Available cpus: 0 kdb_getarea: Bad address 0xba99ad80 On other platforms, Oopses have been observed too, see https://github.com/linuxppc/linux/issues/139 This is due to btc calling 'btt' with %p pointer as an argument. This patch replaces %p by %px to get the real pointer value as expected by 'btt' Fixes: ad67b74d2469 ("printk: hash addresses printed with %p") Cc: Signed-off-by: Christophe Leroy Reviewed-by: Daniel Thompson Signed-off-by: Daniel Thompson --- kernel/debug/kdb/kdb_bt.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/kernel/debug/kdb/kdb_bt.c b/kernel/debug/kdb/kdb_bt.c index 6ad4a9fcbd6f..7921ae4fca8d 100644 --- a/kernel/debug/kdb/kdb_bt.c +++ b/kernel/debug/kdb/kdb_bt.c @@ -179,14 +179,14 @@ kdb_bt(int argc, const char **argv) kdb_printf("no process for cpu %ld\n", cpu); return 0; } - sprintf(buf, "btt 0x%p\n", KDB_TSK(cpu)); + sprintf(buf, "btt 0x%px\n", KDB_TSK(cpu)); kdb_parse(buf); return 0; } kdb_printf("btc: cpu status: "); kdb_parse("cpu\n"); for_each_online_cpu(cpu) { - sprintf(buf, "btt 0x%p\n", KDB_TSK(cpu)); + sprintf(buf, "btt 0x%px\n", KDB_TSK(cpu)); kdb_parse(buf); touch_nmi_watchdog(); } -- cgit v1.2.3-59-g8ed1b From 568fb6f42ac6851320adaea25f8f1b94de14e40a Mon Sep 17 00:00:00 2001 From: Christophe Leroy Date: Thu, 27 Sep 2018 17:17:57 +0000 Subject: kdb: print real address of pointers instead of hashed addresses Since commit ad67b74d2469 ("printk: hash addresses printed with %p"), all pointers printed with %p are printed with hashed addresses instead of real addresses in order to avoid leaking addresses in dmesg and syslog. But this applies to kdb too, with is unfortunate: Entering kdb (current=0x(ptrval), pid 329) due to Keyboard Entry kdb> ps 15 sleeping system daemon (state M) processes suppressed, use 'ps A' to see all. Task Addr Pid Parent [*] cpu State Thread Command 0x(ptrval) 329 328 1 0 R 0x(ptrval) *sh 0x(ptrval) 1 0 0 0 S 0x(ptrval) init 0x(ptrval) 3 2 0 0 D 0x(ptrval) rcu_gp 0x(ptrval) 4 2 0 0 D 0x(ptrval) rcu_par_gp 0x(ptrval) 5 2 0 0 D 0x(ptrval) kworker/0:0 0x(ptrval) 6 2 0 0 D 0x(ptrval) kworker/0:0H 0x(ptrval) 7 2 0 0 D 0x(ptrval) kworker/u2:0 0x(ptrval) 8 2 0 0 D 0x(ptrval) mm_percpu_wq 0x(ptrval) 10 2 0 0 D 0x(ptrval) rcu_preempt The whole purpose of kdb is to debug, and for debugging real addresses need to be known. In addition, data displayed by kdb doesn't go into dmesg. This patch replaces all %p by %px in kdb in order to display real addresses. Fixes: ad67b74d2469 ("printk: hash addresses printed with %p") Cc: Signed-off-by: Christophe Leroy Signed-off-by: Daniel Thompson --- kernel/debug/kdb/kdb_main.c | 14 +++++++------- kernel/debug/kdb/kdb_support.c | 12 ++++++------ 2 files changed, 13 insertions(+), 13 deletions(-) diff --git a/kernel/debug/kdb/kdb_main.c b/kernel/debug/kdb/kdb_main.c index bb4fe4e1a601..959242084b40 100644 --- a/kernel/debug/kdb/kdb_main.c +++ b/kernel/debug/kdb/kdb_main.c @@ -1192,7 +1192,7 @@ static int kdb_local(kdb_reason_t reason, int error, struct pt_regs *regs, if (reason == KDB_REASON_DEBUG) { /* special case below */ } else { - kdb_printf("\nEntering kdb (current=0x%p, pid %d) ", + kdb_printf("\nEntering kdb (current=0x%px, pid %d) ", kdb_current, kdb_current ? kdb_current->pid : 0); #if defined(CONFIG_SMP) kdb_printf("on processor %d ", raw_smp_processor_id()); @@ -1208,7 +1208,7 @@ static int kdb_local(kdb_reason_t reason, int error, struct pt_regs *regs, */ switch (db_result) { case KDB_DB_BPT: - kdb_printf("\nEntering kdb (0x%p, pid %d) ", + kdb_printf("\nEntering kdb (0x%px, pid %d) ", kdb_current, kdb_current->pid); #if defined(CONFIG_SMP) kdb_printf("on processor %d ", raw_smp_processor_id()); @@ -2048,7 +2048,7 @@ static int kdb_lsmod(int argc, const char **argv) if (mod->state == MODULE_STATE_UNFORMED) continue; - kdb_printf("%-20s%8u 0x%p ", mod->name, + kdb_printf("%-20s%8u 0x%px ", mod->name, mod->core_layout.size, (void *)mod); #ifdef CONFIG_MODULE_UNLOAD kdb_printf("%4d ", module_refcount(mod)); @@ -2059,7 +2059,7 @@ static int kdb_lsmod(int argc, const char **argv) kdb_printf(" (Loading)"); else kdb_printf(" (Live)"); - kdb_printf(" 0x%p", mod->core_layout.base); + kdb_printf(" 0x%px", mod->core_layout.base); #ifdef CONFIG_MODULE_UNLOAD { @@ -2341,7 +2341,7 @@ void kdb_ps1(const struct task_struct *p) return; cpu = kdb_process_cpu(p); - kdb_printf("0x%p %8d %8d %d %4d %c 0x%p %c%s\n", + kdb_printf("0x%px %8d %8d %d %4d %c 0x%px %c%s\n", (void *)p, p->pid, p->parent->pid, kdb_task_has_cpu(p), kdb_process_cpu(p), kdb_task_state_char(p), @@ -2354,7 +2354,7 @@ void kdb_ps1(const struct task_struct *p) } else { if (KDB_TSK(cpu) != p) kdb_printf(" Error: does not match running " - "process table (0x%p)\n", KDB_TSK(cpu)); + "process table (0x%px)\n", KDB_TSK(cpu)); } } } @@ -2687,7 +2687,7 @@ int kdb_register_flags(char *cmd, for_each_kdbcmd(kp, i) { if (kp->cmd_name && (strcmp(kp->cmd_name, cmd) == 0)) { kdb_printf("Duplicate kdb command registered: " - "%s, func %p help %s\n", cmd, func, help); + "%s, func %px help %s\n", cmd, func, help); return 1; } } diff --git a/kernel/debug/kdb/kdb_support.c b/kernel/debug/kdb/kdb_support.c index 990b3cc526c8..987eb73284d2 100644 --- a/kernel/debug/kdb/kdb_support.c +++ b/kernel/debug/kdb/kdb_support.c @@ -40,7 +40,7 @@ int kdbgetsymval(const char *symname, kdb_symtab_t *symtab) { if (KDB_DEBUG(AR)) - kdb_printf("kdbgetsymval: symname=%s, symtab=%p\n", symname, + kdb_printf("kdbgetsymval: symname=%s, symtab=%px\n", symname, symtab); memset(symtab, 0, sizeof(*symtab)); symtab->sym_start = kallsyms_lookup_name(symname); @@ -88,7 +88,7 @@ int kdbnearsym(unsigned long addr, kdb_symtab_t *symtab) char *knt1 = NULL; if (KDB_DEBUG(AR)) - kdb_printf("kdbnearsym: addr=0x%lx, symtab=%p\n", addr, symtab); + kdb_printf("kdbnearsym: addr=0x%lx, symtab=%px\n", addr, symtab); memset(symtab, 0, sizeof(*symtab)); if (addr < 4096) @@ -149,7 +149,7 @@ int kdbnearsym(unsigned long addr, kdb_symtab_t *symtab) symtab->mod_name = "kernel"; if (KDB_DEBUG(AR)) kdb_printf("kdbnearsym: returns %d symtab->sym_start=0x%lx, " - "symtab->mod_name=%p, symtab->sym_name=%p (%s)\n", ret, + "symtab->mod_name=%px, symtab->sym_name=%px (%s)\n", ret, symtab->sym_start, symtab->mod_name, symtab->sym_name, symtab->sym_name); @@ -887,13 +887,13 @@ void debug_kusage(void) __func__, dah_first); if (dah_first) { h_used = (struct debug_alloc_header *)debug_alloc_pool; - kdb_printf("%s: h_used %p size %d\n", __func__, h_used, + kdb_printf("%s: h_used %px size %d\n", __func__, h_used, h_used->size); } do { h_used = (struct debug_alloc_header *) ((char *)h_free + dah_overhead + h_free->size); - kdb_printf("%s: h_used %p size %d caller %p\n", + kdb_printf("%s: h_used %px size %d caller %px\n", __func__, h_used, h_used->size, h_used->caller); h_free = (struct debug_alloc_header *) (debug_alloc_pool + h_free->next); @@ -902,7 +902,7 @@ void debug_kusage(void) ((char *)h_free + dah_overhead + h_free->size); if ((char *)h_used - debug_alloc_pool != sizeof(debug_alloc_pool_aligned)) - kdb_printf("%s: h_used %p size %d caller %p\n", + kdb_printf("%s: h_used %px size %d caller %px\n", __func__, h_used, h_used->size, h_used->caller); out: spin_unlock(&dap_lock); -- cgit v1.2.3-59-g8ed1b From c2b94c72d93d0929f48157eef128c4f9d2e603ce Mon Sep 17 00:00:00 2001 From: Prarit Bhargava Date: Thu, 20 Sep 2018 08:59:14 -0400 Subject: kdb: Use strscpy with destination buffer size MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit gcc 8.1.0 warns with: kernel/debug/kdb/kdb_support.c: In function ‘kallsyms_symbol_next’: kernel/debug/kdb/kdb_support.c:239:4: warning: ‘strncpy’ specified bound depends on the length of the source argument [-Wstringop-overflow=] strncpy(prefix_name, name, strlen(name)+1); ^~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ kernel/debug/kdb/kdb_support.c:239:31: note: length computed here Use strscpy() with the destination buffer size, and use ellipses when displaying truncated symbols. v2: Use strscpy() Signed-off-by: Prarit Bhargava Cc: Jonathan Toppins Cc: Jason Wessel Cc: Daniel Thompson Cc: kgdb-bugreport@lists.sourceforge.net Reviewed-by: Daniel Thompson Signed-off-by: Daniel Thompson --- kernel/debug/kdb/kdb_io.c | 15 +++++++++------ kernel/debug/kdb/kdb_private.h | 2 +- kernel/debug/kdb/kdb_support.c | 10 +++++----- 3 files changed, 15 insertions(+), 12 deletions(-) diff --git a/kernel/debug/kdb/kdb_io.c b/kernel/debug/kdb/kdb_io.c index ed5d34925ad0..6a4b41484afe 100644 --- a/kernel/debug/kdb/kdb_io.c +++ b/kernel/debug/kdb/kdb_io.c @@ -216,7 +216,7 @@ static char *kdb_read(char *buffer, size_t bufsize) int count; int i; int diag, dtab_count; - int key; + int key, buf_size, ret; diag = kdbgetintenv("DTABCOUNT", &dtab_count); @@ -336,9 +336,8 @@ poll_again: else p_tmp = tmpbuffer; len = strlen(p_tmp); - count = kallsyms_symbol_complete(p_tmp, - sizeof(tmpbuffer) - - (p_tmp - tmpbuffer)); + buf_size = sizeof(tmpbuffer) - (p_tmp - tmpbuffer); + count = kallsyms_symbol_complete(p_tmp, buf_size); if (tab == 2 && count > 0) { kdb_printf("\n%d symbols are found.", count); if (count > dtab_count) { @@ -350,9 +349,13 @@ poll_again: } kdb_printf("\n"); for (i = 0; i < count; i++) { - if (WARN_ON(!kallsyms_symbol_next(p_tmp, i))) + ret = kallsyms_symbol_next(p_tmp, i, buf_size); + if (WARN_ON(!ret)) break; - kdb_printf("%s ", p_tmp); + if (ret != -E2BIG) + kdb_printf("%s ", p_tmp); + else + kdb_printf("%s... ", p_tmp); *(p_tmp + len) = '\0'; } if (i >= dtab_count) diff --git a/kernel/debug/kdb/kdb_private.h b/kernel/debug/kdb/kdb_private.h index 1e5a502ba4a7..2118d8258b7c 100644 --- a/kernel/debug/kdb/kdb_private.h +++ b/kernel/debug/kdb/kdb_private.h @@ -83,7 +83,7 @@ typedef struct __ksymtab { unsigned long sym_start; unsigned long sym_end; } kdb_symtab_t; -extern int kallsyms_symbol_next(char *prefix_name, int flag); +extern int kallsyms_symbol_next(char *prefix_name, int flag, int buf_size); extern int kallsyms_symbol_complete(char *prefix_name, int max_len); /* Exported Symbols for kernel loadable modules to use. */ diff --git a/kernel/debug/kdb/kdb_support.c b/kernel/debug/kdb/kdb_support.c index 987eb73284d2..b14b0925c184 100644 --- a/kernel/debug/kdb/kdb_support.c +++ b/kernel/debug/kdb/kdb_support.c @@ -221,11 +221,13 @@ int kallsyms_symbol_complete(char *prefix_name, int max_len) * Parameters: * prefix_name prefix of a symbol name to lookup * flag 0 means search from the head, 1 means continue search. + * buf_size maximum length that can be written to prefix_name + * buffer * Returns: * 1 if a symbol matches the given prefix. * 0 if no string found */ -int kallsyms_symbol_next(char *prefix_name, int flag) +int kallsyms_symbol_next(char *prefix_name, int flag, int buf_size) { int prefix_len = strlen(prefix_name); static loff_t pos; @@ -235,10 +237,8 @@ int kallsyms_symbol_next(char *prefix_name, int flag) pos = 0; while ((name = kdb_walk_kallsyms(&pos))) { - if (strncmp(name, prefix_name, prefix_len) == 0) { - strncpy(prefix_name, name, strlen(name)+1); - return 1; - } + if (!strncmp(name, prefix_name, prefix_len)) + return strscpy(prefix_name, name, buf_size); } return 0; } -- cgit v1.2.3-59-g8ed1b From 9eb62f0e1bc70ebc9b15837a0c4e8f12a7b910cb Mon Sep 17 00:00:00 2001 From: "Gustavo A. R. Silva" Date: Thu, 16 Aug 2018 09:01:41 -0500 Subject: kdb: kdb_main: refactor code in kdb_md_line Replace the whole switch statement with a for loop. This makes the code clearer and easy to read. This also addresses the following Coverity warnings: Addresses-Coverity-ID: 115090 ("Missing break in switch") Addresses-Coverity-ID: 115091 ("Missing break in switch") Addresses-Coverity-ID: 114700 ("Missing break in switch") Suggested-by: Daniel Thompson Signed-off-by: Gustavo A. R. Silva Reviewed-by: Daniel Thompson [daniel.thompson@linaro.org: Tiny grammar change in description] Signed-off-by: Daniel Thompson --- kernel/debug/kdb/kdb_main.c | 21 +++------------------ 1 file changed, 3 insertions(+), 18 deletions(-) diff --git a/kernel/debug/kdb/kdb_main.c b/kernel/debug/kdb/kdb_main.c index 959242084b40..d72b32c66f7d 100644 --- a/kernel/debug/kdb/kdb_main.c +++ b/kernel/debug/kdb/kdb_main.c @@ -1493,6 +1493,7 @@ static void kdb_md_line(const char *fmtstr, unsigned long addr, char cbuf[32]; char *c = cbuf; int i; + int j; unsigned long word; memset(cbuf, '\0', sizeof(cbuf)); @@ -1538,25 +1539,9 @@ static void kdb_md_line(const char *fmtstr, unsigned long addr, wc.word = word; #define printable_char(c) \ ({unsigned char __c = c; isascii(__c) && isprint(__c) ? __c : '.'; }) - switch (bytesperword) { - case 8: + for (j = 0; j < bytesperword; j++) *c++ = printable_char(*cp++); - *c++ = printable_char(*cp++); - *c++ = printable_char(*cp++); - *c++ = printable_char(*cp++); - addr += 4; - case 4: - *c++ = printable_char(*cp++); - *c++ = printable_char(*cp++); - addr += 2; - case 2: - *c++ = printable_char(*cp++); - addr++; - case 1: - *c++ = printable_char(*cp++); - addr++; - break; - } + addr += bytesperword; #undef printable_char } } -- cgit v1.2.3-59-g8ed1b From 01cb37351bafc1b44b962842926210115e231f0a Mon Sep 17 00:00:00 2001 From: "Gustavo A. R. Silva" Date: Sat, 4 Aug 2018 23:18:25 -0500 Subject: kdb: kdb_keyboard: mark expected switch fall-throughs In preparation to enabling -Wimplicit-fallthrough, mark switch cases where we are expecting to fall through. Notice that in this particular case, I replaced the code comments with a proper "fall through" annotation, which is what GCC is expecting to find. Signed-off-by: Gustavo A. R. Silva Reviewed-by: Daniel Thompson Signed-off-by: Daniel Thompson --- kernel/debug/kdb/kdb_keyboard.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/kernel/debug/kdb/kdb_keyboard.c b/kernel/debug/kdb/kdb_keyboard.c index 118527aa60ea..750497b0003a 100644 --- a/kernel/debug/kdb/kdb_keyboard.c +++ b/kernel/debug/kdb/kdb_keyboard.c @@ -173,11 +173,11 @@ int kdb_get_kbd_char(void) case KT_LATIN: if (isprint(keychar)) break; /* printable characters */ - /* drop through */ + /* fall through */ case KT_SPEC: if (keychar == K_ENTER) break; - /* drop through */ + /* fall through */ default: return -1; /* ignore unprintables */ } -- cgit v1.2.3-59-g8ed1b From 646558ff1643467d3b941b47f519867cbca462c3 Mon Sep 17 00:00:00 2001 From: "Gustavo A. R. Silva" Date: Sat, 4 Aug 2018 21:48:44 -0500 Subject: kdb: kdb_support: mark expected switch fall-throughs In preparation to enabling -Wimplicit-fallthrough, mark switch cases where we are expecting to fall through. Notice that in this particular case, I replaced the code comments with a proper "fall through" annotation, which is what GCC is expecting to find. Signed-off-by: Gustavo A. R. Silva Reviewed-by: Daniel Thompson Signed-off-by: Daniel Thompson --- kernel/debug/kdb/kdb_support.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/kernel/debug/kdb/kdb_support.c b/kernel/debug/kdb/kdb_support.c index b14b0925c184..50bf9b119bad 100644 --- a/kernel/debug/kdb/kdb_support.c +++ b/kernel/debug/kdb/kdb_support.c @@ -432,7 +432,7 @@ int kdb_getphysword(unsigned long *word, unsigned long addr, size_t size) *word = w8; break; } - /* drop through */ + /* fall through */ default: diag = KDB_BADWIDTH; kdb_printf("kdb_getphysword: bad width %ld\n", (long) size); @@ -481,7 +481,7 @@ int kdb_getword(unsigned long *word, unsigned long addr, size_t size) *word = w8; break; } - /* drop through */ + /* fall through */ default: diag = KDB_BADWIDTH; kdb_printf("kdb_getword: bad width %ld\n", (long) size); @@ -525,7 +525,7 @@ int kdb_putword(unsigned long addr, unsigned long word, size_t size) diag = kdb_putarea(addr, w8); break; } - /* drop through */ + /* fall through */ default: diag = KDB_BADWIDTH; kdb_printf("kdb_putword: bad width %ld\n", (long) size); -- cgit v1.2.3-59-g8ed1b From fd35f192e42cf7c0df1e2480bfd5965e35b2f4ca Mon Sep 17 00:00:00 2001 From: Mimi Zohar Date: Fri, 9 Nov 2018 00:53:40 -0500 Subject: integrity: support new struct public_key_signature encoding field On systems with IMA-appraisal enabled with a policy requiring file signatures, the "good" signature values are stored on the filesystem as extended attributes (security.ima). Signature verification failure would normally be limited to just a particular file (eg. executable), but during boot signature verification failure could result in a system hang. Defining and requiring a new public_key_signature field requires all callers of asymmetric signature verification to be updated to reflect the change. This patch updates the integrity asymmetric_verify() caller. Fixes: 82f94f24475c ("KEYS: Provide software public key query function [ver #2]") Signed-off-by: Mimi Zohar Cc: David Howells Acked-by: Denis Kenzior Signed-off-by: James Morris --- security/integrity/digsig_asymmetric.c | 1 + 1 file changed, 1 insertion(+) diff --git a/security/integrity/digsig_asymmetric.c b/security/integrity/digsig_asymmetric.c index 6dc075144508..d775e03fbbcc 100644 --- a/security/integrity/digsig_asymmetric.c +++ b/security/integrity/digsig_asymmetric.c @@ -106,6 +106,7 @@ int asymmetric_verify(struct key *keyring, const char *sig, pks.pkey_algo = "rsa"; pks.hash_algo = hash_algo_name[hdr->hash_algo]; + pks.encoding = "pkcs1"; pks.digest = (u8 *)data; pks.digest_size = datalen; pks.s = hdr->sig; -- cgit v1.2.3-59-g8ed1b From e39d8a186ed002854196668cb7562ffdfbc6d379 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Tue, 13 Nov 2018 16:37:54 -0500 Subject: NFSv4: Fix an Oops during delegation callbacks If the server sends a CB_GETATTR or a CB_RECALL while the filesystem is being unmounted, then we can Oops when releasing the inode in nfs4_callback_getattr() and nfs4_callback_recall(). Signed-off-by: Trond Myklebust --- fs/nfs/callback_proc.c | 4 ++-- fs/nfs/delegation.c | 11 +++++++++-- 2 files changed, 11 insertions(+), 4 deletions(-) diff --git a/fs/nfs/callback_proc.c b/fs/nfs/callback_proc.c index fa515d5ea5ba..7b861bbc0b43 100644 --- a/fs/nfs/callback_proc.c +++ b/fs/nfs/callback_proc.c @@ -66,7 +66,7 @@ __be32 nfs4_callback_getattr(void *argp, void *resp, out_iput: rcu_read_unlock(); trace_nfs4_cb_getattr(cps->clp, &args->fh, inode, -ntohl(res->status)); - iput(inode); + nfs_iput_and_deactive(inode); out: dprintk("%s: exit with status = %d\n", __func__, ntohl(res->status)); return res->status; @@ -108,7 +108,7 @@ __be32 nfs4_callback_recall(void *argp, void *resp, } trace_nfs4_cb_recall(cps->clp, &args->fh, inode, &args->stateid, -ntohl(res)); - iput(inode); + nfs_iput_and_deactive(inode); out: dprintk("%s: exit with status = %d\n", __func__, ntohl(res)); return res; diff --git a/fs/nfs/delegation.c b/fs/nfs/delegation.c index 07b839560576..6ec2f78c1e19 100644 --- a/fs/nfs/delegation.c +++ b/fs/nfs/delegation.c @@ -850,16 +850,23 @@ nfs_delegation_find_inode_server(struct nfs_server *server, const struct nfs_fh *fhandle) { struct nfs_delegation *delegation; - struct inode *res = NULL; + struct inode *freeme, *res = NULL; list_for_each_entry_rcu(delegation, &server->delegations, super_list) { spin_lock(&delegation->lock); if (delegation->inode != NULL && nfs_compare_fh(fhandle, &NFS_I(delegation->inode)->fh) == 0) { - res = igrab(delegation->inode); + freeme = igrab(delegation->inode); + if (freeme && nfs_sb_active(freeme->i_sb)) + res = freeme; spin_unlock(&delegation->lock); if (res != NULL) return res; + if (freeme) { + rcu_read_unlock(); + iput(freeme); + rcu_read_lock(); + } return ERR_PTR(-EAGAIN); } spin_unlock(&delegation->lock); -- cgit v1.2.3-59-g8ed1b From 877181a8d9dc663f7a73f77f50af714d7888ec3b Mon Sep 17 00:00:00 2001 From: Paul Moore Date: Tue, 13 Nov 2018 21:44:33 -0500 Subject: selinux: fix non-MLS handling in mls_context_to_sid() Commit 95ffe194204a ("selinux: refactor mls_context_to_sid() and make it stricter") inadvertently changed how we handle labels that did not contain MLS information. This patch restores the proper behavior in mls_context_to_sid() and adds a comment explaining the proper behavior to help ensure this doesn't happen again. Fixes: 95ffe194204a ("selinux: refactor mls_context_to_sid() and make it stricter") Reported-by: Stephen Smalley Signed-off-by: Paul Moore --- security/selinux/ss/mls.c | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) diff --git a/security/selinux/ss/mls.c b/security/selinux/ss/mls.c index 2fe459df3c85..b7efa2296969 100644 --- a/security/selinux/ss/mls.c +++ b/security/selinux/ss/mls.c @@ -245,9 +245,13 @@ int mls_context_to_sid(struct policydb *pol, char *rangep[2]; if (!pol->mls_enabled) { - if ((def_sid != SECSID_NULL && oldc) || (*scontext) == '\0') - return 0; - return -EINVAL; + /* + * With no MLS, only return -EINVAL if there is a MLS field + * and it did not come from an xattr. + */ + if (oldc && def_sid == SECSID_NULL) + return -EINVAL; + return 0; } /* -- cgit v1.2.3-59-g8ed1b From 437ccdc8ce629470babdda1a7086e2f477048cbd Mon Sep 17 00:00:00 2001 From: Satheesh Rajendran Date: Thu, 8 Nov 2018 10:47:56 +0530 Subject: powerpc/numa: Suppress "VPHN is not supported" messages When VPHN function is not supported and during cpu hotplug event, kernel prints message 'VPHN function not supported. Disabling polling...'. Currently it prints on every hotplug event, it floods dmesg when a KVM guest tries to hotplug huge number of vcpus, let's just print once and suppress further kernel prints. Signed-off-by: Satheesh Rajendran Signed-off-by: Michael Ellerman --- arch/powerpc/mm/numa.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/powerpc/mm/numa.c b/arch/powerpc/mm/numa.c index 3a048e98a132..ce28ae5ca080 100644 --- a/arch/powerpc/mm/numa.c +++ b/arch/powerpc/mm/numa.c @@ -1178,7 +1178,7 @@ static long vphn_get_associativity(unsigned long cpu, switch (rc) { case H_FUNCTION: - printk(KERN_INFO + printk_once(KERN_INFO "VPHN is not supported. Disabling polling...\n"); stop_topology_update(); break; -- cgit v1.2.3-59-g8ed1b From 40dc948f234b73497c3278875eb08a01d5854d3f Mon Sep 17 00:00:00 2001 From: Max Filippov Date: Tue, 13 Nov 2018 23:46:42 -0800 Subject: xtensa: fix boot parameters address translation The bootloader may pass physical address of the boot parameters structure to the MMUv3 kernel in the register a2. Code in the _SetupMMU block in the arch/xtensa/kernel/head.S is supposed to map that physical address to the virtual address in the configured virtual memory layout. This code haven't been updated when additional 256+256 and 512+512 memory layouts were introduced and it may produce wrong addresses when used with these layouts. Cc: stable@vger.kernel.org Signed-off-by: Max Filippov --- arch/xtensa/kernel/head.S | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/arch/xtensa/kernel/head.S b/arch/xtensa/kernel/head.S index 2f76118ecf62..9053a5622d2c 100644 --- a/arch/xtensa/kernel/head.S +++ b/arch/xtensa/kernel/head.S @@ -88,9 +88,12 @@ _SetupMMU: initialize_mmu #if defined(CONFIG_MMU) && XCHAL_HAVE_PTP_MMU && XCHAL_HAVE_SPANNING_WAY rsr a2, excsave1 - movi a3, 0x08000000 + movi a3, XCHAL_KSEG_PADDR + bltu a2, a3, 1f + sub a2, a2, a3 + movi a3, XCHAL_KSEG_SIZE bgeu a2, a3, 1f - movi a3, 0xd0000000 + movi a3, XCHAL_KSEG_CACHED_VADDR add a2, a2, a3 wsr a2, excsave1 1: -- cgit v1.2.3-59-g8ed1b From 6a67a20366f894c172734f28c5646bdbe48a46e3 Mon Sep 17 00:00:00 2001 From: Lionel Landwerlin Date: Mon, 12 Nov 2018 12:39:31 +0000 Subject: drm/i915: fix broadwell EU computation subslice_mask is an array indexed by slice, not subslice. Signed-off-by: Lionel Landwerlin Fixes: 8cc7669355136f ("drm/i915: store all subslice masks") Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=108712 Reviewed-by: Chris Wilson Reviewed-by: Tvrtko Ursulin Link: https://patchwork.freedesktop.org/patch/msgid/20181112123931.2815-1-lionel.g.landwerlin@intel.com (cherry picked from commit 63ac3328f0d1d37f286e397b14d9596ed09d7ca5) Signed-off-by: Joonas Lahtinen --- drivers/gpu/drm/i915/intel_device_info.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/i915/intel_device_info.c b/drivers/gpu/drm/i915/intel_device_info.c index 0ef0c6448d53..01fa98299bae 100644 --- a/drivers/gpu/drm/i915/intel_device_info.c +++ b/drivers/gpu/drm/i915/intel_device_info.c @@ -474,7 +474,7 @@ static void broadwell_sseu_info_init(struct drm_i915_private *dev_priv) u8 eu_disabled_mask; u32 n_disabled; - if (!(sseu->subslice_mask[ss] & BIT(ss))) + if (!(sseu->subslice_mask[s] & BIT(ss))) /* skip disabled subslice */ continue; -- cgit v1.2.3-59-g8ed1b From a22612301ae61d78a7c0c82dc556931a35db0e91 Mon Sep 17 00:00:00 2001 From: Mika Kuoppala Date: Fri, 9 Nov 2018 16:09:23 +0200 Subject: drm/i915/icl: Drop spurious register read from icl_dbuf_slices_update Register DBUF_CTL_S2 is read and it's value is not used. As there is no explanation why we should prime the hardware with read, remove it as spurious. Fixes: aa9664ffe863 ("drm/i915/icl: Enable 2nd DBuf slice only when needed") Cc: Mahesh Kumar Cc: Rodrigo Vivi Signed-off-by: Mika Kuoppala Reviewed-by: Imre Deak Link: https://patchwork.freedesktop.org/patch/msgid/20181109140924.2663-1-mika.kuoppala@linux.intel.com (cherry picked from commit 8577c319b6511fbc391f3775225fecd8b979bc26) Signed-off-by: Joonas Lahtinen --- drivers/gpu/drm/i915/intel_runtime_pm.c | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/drivers/gpu/drm/i915/intel_runtime_pm.c b/drivers/gpu/drm/i915/intel_runtime_pm.c index 0a4990d8843c..44e4491a4918 100644 --- a/drivers/gpu/drm/i915/intel_runtime_pm.c +++ b/drivers/gpu/drm/i915/intel_runtime_pm.c @@ -3176,8 +3176,7 @@ static u8 intel_dbuf_max_slices(struct drm_i915_private *dev_priv) void icl_dbuf_slices_update(struct drm_i915_private *dev_priv, u8 req_slices) { - u8 hw_enabled_slices = dev_priv->wm.skl_hw.ddb.enabled_slices; - u32 val; + const u8 hw_enabled_slices = dev_priv->wm.skl_hw.ddb.enabled_slices; bool ret; if (req_slices > intel_dbuf_max_slices(dev_priv)) { @@ -3188,7 +3187,6 @@ void icl_dbuf_slices_update(struct drm_i915_private *dev_priv, if (req_slices == hw_enabled_slices || req_slices == 0) return; - val = I915_READ(DBUF_CTL_S2); if (req_slices > hw_enabled_slices) ret = intel_dbuf_slice_set(dev_priv, DBUF_CTL_S2, true); else -- cgit v1.2.3-59-g8ed1b From 4800bf7bc8c725e955fcbc6191cc872f43f506d3 Mon Sep 17 00:00:00 2001 From: Dave Chinner Date: Wed, 14 Nov 2018 08:17:18 -0700 Subject: block: fix 32 bit overflow in __blkdev_issue_discard() A discard cleanup merged into 4.20-rc2 causes fstests xfs/259 to fall into an endless loop in the discard code. The test is creating a device that is exactly 2^32 sectors in size to test mkfs boundary conditions around the 32 bit sector overflow region. mkfs issues a discard for the entire device size by default, and hence this throws a sector count of 2^32 into blkdev_issue_discard(). It takes the number of sectors to discard as a sector_t - a 64 bit value. The commit ba5d73851e71 ("block: cleanup __blkdev_issue_discard") takes this sector count and casts it to a 32 bit value before comapring it against the maximum allowed discard size the device has. This truncates away the upper 32 bits, and so if the lower 32 bits of the sector count is zero, it starts issuing discards of length 0. This causes the code to fall into an endless loop, issuing a zero length discards over and over again on the same sector. Fixes: ba5d73851e71 ("block: cleanup __blkdev_issue_discard") Tested-by: Darrick J. Wong Reviewed-by: Darrick J. Wong Signed-off-by: Dave Chinner Killed pointless WARN_ON(). Signed-off-by: Jens Axboe --- block/blk-lib.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/block/blk-lib.c b/block/blk-lib.c index e8b3bb9bf375..5f2c429d4378 100644 --- a/block/blk-lib.c +++ b/block/blk-lib.c @@ -55,9 +55,11 @@ int __blkdev_issue_discard(struct block_device *bdev, sector_t sector, return -EINVAL; while (nr_sects) { - unsigned int req_sects = min_t(unsigned int, nr_sects, + sector_t req_sects = min_t(sector_t, nr_sects, bio_allowed_max_sectors(q)); + WARN_ON_ONCE((req_sects << 9) > UINT_MAX); + bio = blk_next_bio(bio, 0, gfp_mask); bio->bi_iter.bi_sector = sector; bio_set_dev(bio, bdev); -- cgit v1.2.3-59-g8ed1b From 8dc765d438f1e42b3e8227b3b09fad7d73f4ec9a Mon Sep 17 00:00:00 2001 From: Ming Lei Date: Wed, 14 Nov 2018 16:25:51 +0800 Subject: SCSI: fix queue cleanup race before queue initialization is done c2856ae2f315d ("blk-mq: quiesce queue before freeing queue") has already fixed this race, however the implied synchronize_rcu() in blk_mq_quiesce_queue() can slow down LUN probe a lot, so caused performance regression. Then 1311326cf4755c7 ("blk-mq: avoid to synchronize rcu inside blk_cleanup_queue()") tried to quiesce queue for avoiding unnecessary synchronize_rcu() only when queue initialization is done, because it is usual to see lots of inexistent LUNs which need to be probed. However, turns out it isn't safe to quiesce queue only when queue initialization is done. Because when one SCSI command is completed, the user of sending command can be waken up immediately, then the scsi device may be removed, meantime the run queue in scsi_end_request() is still in-progress, so kernel panic can be caused. In Red Hat QE lab, there are several reports about this kind of kernel panic triggered during kernel booting. This patch tries to address the issue by grabing one queue usage counter during freeing one request and the following run queue. Fixes: 1311326cf4755c7 ("blk-mq: avoid to synchronize rcu inside blk_cleanup_queue()") Cc: Andrew Jones Cc: Bart Van Assche Cc: linux-scsi@vger.kernel.org Cc: Martin K. Petersen Cc: Christoph Hellwig Cc: James E.J. Bottomley Cc: stable Cc: jianchao.wang Signed-off-by: Ming Lei Signed-off-by: Jens Axboe --- block/blk-core.c | 5 ++--- drivers/scsi/scsi_lib.c | 8 ++++++++ 2 files changed, 10 insertions(+), 3 deletions(-) diff --git a/block/blk-core.c b/block/blk-core.c index ce12515f9b9b..deb56932f8c4 100644 --- a/block/blk-core.c +++ b/block/blk-core.c @@ -798,9 +798,8 @@ void blk_cleanup_queue(struct request_queue *q) * dispatch may still be in-progress since we dispatch requests * from more than one contexts. * - * No need to quiesce queue if it isn't initialized yet since - * blk_freeze_queue() should be enough for cases of passthrough - * request. + * We rely on driver to deal with the race in case that queue + * initialization isn't done. */ if (q->mq_ops && blk_queue_init_done(q)) blk_mq_quiesce_queue(q); diff --git a/drivers/scsi/scsi_lib.c b/drivers/scsi/scsi_lib.c index c7fccbb8f554..fa6e0c3b3aa6 100644 --- a/drivers/scsi/scsi_lib.c +++ b/drivers/scsi/scsi_lib.c @@ -697,6 +697,12 @@ static bool scsi_end_request(struct request *req, blk_status_t error, */ scsi_mq_uninit_cmd(cmd); + /* + * queue is still alive, so grab the ref for preventing it + * from being cleaned up during running queue. + */ + percpu_ref_get(&q->q_usage_counter); + __blk_mq_end_request(req, error); if (scsi_target(sdev)->single_lun || @@ -704,6 +710,8 @@ static bool scsi_end_request(struct request *req, blk_status_t error, kblockd_schedule_work(&sdev->requeue_work); else blk_mq_run_hw_queues(q, true); + + percpu_ref_put(&q->q_usage_counter); } else { unsigned long flags; -- cgit v1.2.3-59-g8ed1b From 007b656851ed7f94ba0fa358ac3e5d7705da6846 Mon Sep 17 00:00:00 2001 From: Ursula Braun Date: Mon, 12 Nov 2018 17:06:12 +0100 Subject: s390/ism: clear dmbe_mask bit before SMC IRQ handling SMC-D stress workload showed connection stalls. Since the firmware decides to skip raising an interrupt if the SBA DMBE mask bit is still set, this SBA DMBE mask bit should be cleared before the IRQ handling in the SMC code runs. Otherwise there are small windows possible with missing interrupts for incoming data. SMC-D currently does not care about the old value of the SBA DMBE mask. Acked-by: Sebastian Ott Signed-off-by: Ursula Braun Signed-off-by: David S. Miller --- drivers/s390/net/ism_drv.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/s390/net/ism_drv.c b/drivers/s390/net/ism_drv.c index f96ec68af2e5..dcbf5c857743 100644 --- a/drivers/s390/net/ism_drv.c +++ b/drivers/s390/net/ism_drv.c @@ -415,9 +415,9 @@ static irqreturn_t ism_handle_irq(int irq, void *data) break; clear_bit_inv(bit, bv); + ism->sba->dmbe_mask[bit + ISM_DMB_BIT_OFFSET] = 0; barrier(); smcd_handle_irq(ism->smcd, bit + ISM_DMB_BIT_OFFSET); - ism->sba->dmbe_mask[bit + ISM_DMB_BIT_OFFSET] = 0; } if (ism->sba->e) { -- cgit v1.2.3-59-g8ed1b From 66f93c5a02d5ba6ef17fef459143961382593212 Mon Sep 17 00:00:00 2001 From: Nicholas Piggin Date: Thu, 15 Nov 2018 12:34:27 +1000 Subject: powerpc/64: Fix kernel stack 16-byte alignment MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Commit 4c2de74cc869 ("powerpc/64: Interrupts save PPR on stack rather than thread_struct") changed sizeof(struct pt_regs) % 16 from 0 to 8, which causes the interrupt frame allocation on kernel entry to put the kernel stack out of alignment. Quadword (16-byte) alignment for the stack is required by both the 64-bit v1 ABI (v1.9 § 3.2.2) and the 64-bit v2 ABI (v1.1 § 2.2.2.1). Add a pad field to fix alignment, and add a BUILD_BUG_ON to catch this in future. Fixes: 4c2de74cc869 ("powerpc/64: Interrupts save PPR on stack rather than thread_struct") Signed-off-by: Nicholas Piggin Signed-off-by: Michael Ellerman --- arch/powerpc/include/asm/ptrace.h | 1 + arch/powerpc/kernel/setup_64.c | 2 ++ 2 files changed, 3 insertions(+) diff --git a/arch/powerpc/include/asm/ptrace.h b/arch/powerpc/include/asm/ptrace.h index f73886a1a7f5..0b8a735b6d85 100644 --- a/arch/powerpc/include/asm/ptrace.h +++ b/arch/powerpc/include/asm/ptrace.h @@ -54,6 +54,7 @@ struct pt_regs #ifdef CONFIG_PPC64 unsigned long ppr; + unsigned long __pad; /* Maintain 16 byte interrupt stack alignment */ #endif }; #endif diff --git a/arch/powerpc/kernel/setup_64.c b/arch/powerpc/kernel/setup_64.c index 2a51e4cc8246..236c1151a3a7 100644 --- a/arch/powerpc/kernel/setup_64.c +++ b/arch/powerpc/kernel/setup_64.c @@ -636,6 +636,8 @@ static void *__init alloc_stack(unsigned long limit, int cpu) { unsigned long pa; + BUILD_BUG_ON(STACK_INT_FRAME_SIZE % 16); + pa = memblock_alloc_base_nid(THREAD_SIZE, THREAD_SIZE, limit, early_cpu_to_node(cpu), MEMBLOCK_NONE); if (!pa) { -- cgit v1.2.3-59-g8ed1b From f8504f4ca0a0e9f84546ef86e00b24d2ea9a0bd2 Mon Sep 17 00:00:00 2001 From: Xin Long Date: Tue, 13 Nov 2018 01:08:25 +0800 Subject: l2tp: fix a sock refcnt leak in l2tp_tunnel_register This issue happens when trying to add an existent tunnel. It doesn't call sock_put() before returning -EEXIST to release the sock refcnt that was held by calling sock_hold() before the existence check. This patch is to fix it by holding the sock after doing the existence check. Fixes: f6cd651b056f ("l2tp: fix race in duplicate tunnel detection") Reported-by: Jianlin Shi Signed-off-by: Xin Long Reviewed-by: Guillaume Nault Signed-off-by: David S. Miller --- net/l2tp/l2tp_core.c | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-) diff --git a/net/l2tp/l2tp_core.c b/net/l2tp/l2tp_core.c index 82cdf9020b53..26f1d435696a 100644 --- a/net/l2tp/l2tp_core.c +++ b/net/l2tp/l2tp_core.c @@ -1490,12 +1490,7 @@ int l2tp_tunnel_register(struct l2tp_tunnel *tunnel, struct net *net, goto err_sock; } - sk = sock->sk; - - sock_hold(sk); - tunnel->sock = sk; tunnel->l2tp_net = net; - pn = l2tp_pernet(net); spin_lock_bh(&pn->l2tp_tunnel_list_lock); @@ -1510,6 +1505,10 @@ int l2tp_tunnel_register(struct l2tp_tunnel *tunnel, struct net *net, list_add_rcu(&tunnel->list, &pn->l2tp_tunnel_list); spin_unlock_bh(&pn->l2tp_tunnel_list_lock); + sk = sock->sk; + sock_hold(sk); + tunnel->sock = sk; + if (tunnel->encap == L2TP_ENCAPTYPE_UDP) { struct udp_tunnel_sock_cfg udp_cfg = { .sk_user_data = tunnel, -- cgit v1.2.3-59-g8ed1b From ef1491e791308317bb9851a0ad380c4a68b58d54 Mon Sep 17 00:00:00 2001 From: Waiman Long Date: Wed, 14 Nov 2018 09:55:40 -0800 Subject: efi: Fix debugobjects warning on 'efi_rts_work' The following commit: 9dbbedaa6171 ("efi: Make efi_rts_work accessible to efi page fault handler") converted 'efi_rts_work' from an auto variable to a global variable. However, when submitting the work, INIT_WORK_ONSTACK() was still used, causing the following complaint from debugobjects: ODEBUG: object 00000000ed27b500 is NOT on stack 00000000c7d38760, but annotated. Change the macro to just INIT_WORK() to eliminate the warning. Signed-off-by: Waiman Long Signed-off-by: Ard Biesheuvel Acked-by: Sai Praneeth Prakhya Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: linux-efi@vger.kernel.org Fixes: 9dbbedaa6171 ("efi: Make efi_rts_work accessible to efi page fault handler") Link: http://lkml.kernel.org/r/20181114175544.12860-2-ard.biesheuvel@linaro.org Signed-off-by: Ingo Molnar --- drivers/firmware/efi/runtime-wrappers.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/firmware/efi/runtime-wrappers.c b/drivers/firmware/efi/runtime-wrappers.c index a19d845bdb06..8903b9ccfc2b 100644 --- a/drivers/firmware/efi/runtime-wrappers.c +++ b/drivers/firmware/efi/runtime-wrappers.c @@ -67,7 +67,7 @@ struct efi_runtime_work efi_rts_work; } \ \ init_completion(&efi_rts_work.efi_rts_comp); \ - INIT_WORK_ONSTACK(&efi_rts_work.work, efi_call_rts); \ + INIT_WORK(&efi_rts_work.work, efi_call_rts); \ efi_rts_work.arg1 = _arg1; \ efi_rts_work.arg2 = _arg2; \ efi_rts_work.arg3 = _arg3; \ -- cgit v1.2.3-59-g8ed1b From 33412b8673135b18ea42beb7f5117ed0091798b6 Mon Sep 17 00:00:00 2001 From: Ard Biesheuvel Date: Wed, 14 Nov 2018 09:55:41 -0800 Subject: efi/arm: Revert deferred unmap of early memmap mapping Commit: 3ea86495aef2 ("efi/arm: preserve early mapping of UEFI memory map longer for BGRT") deferred the unmap of the early mapping of the UEFI memory map to accommodate the ACPI BGRT code, which looks up the memory type that backs the BGRT table to validate it against the requirements of the UEFI spec. Unfortunately, this causes problems on ARM, which does not permit early mappings to persist after paging_init() is called, resulting in a WARN() splat. Since we don't support the BGRT table on ARM anway, let's revert ARM to the old behaviour, which is to take down the early mapping at the end of efi_init(). Signed-off-by: Ard Biesheuvel Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: linux-efi@vger.kernel.org Fixes: 3ea86495aef2 ("efi/arm: preserve early mapping of UEFI memory ...") Link: http://lkml.kernel.org/r/20181114175544.12860-3-ard.biesheuvel@linaro.org Signed-off-by: Ingo Molnar --- drivers/firmware/efi/arm-init.c | 4 ++++ drivers/firmware/efi/arm-runtime.c | 2 +- drivers/firmware/efi/memmap.c | 3 +++ 3 files changed, 8 insertions(+), 1 deletion(-) diff --git a/drivers/firmware/efi/arm-init.c b/drivers/firmware/efi/arm-init.c index 388a929baf95..1a6a77df8a5e 100644 --- a/drivers/firmware/efi/arm-init.c +++ b/drivers/firmware/efi/arm-init.c @@ -265,6 +265,10 @@ void __init efi_init(void) (params.mmap & ~PAGE_MASK))); init_screen_info(); + + /* ARM does not permit early mappings to persist across paging_init() */ + if (IS_ENABLED(CONFIG_ARM)) + efi_memmap_unmap(); } static int __init register_gop_device(void) diff --git a/drivers/firmware/efi/arm-runtime.c b/drivers/firmware/efi/arm-runtime.c index 922cfb813109..a00934d263c5 100644 --- a/drivers/firmware/efi/arm-runtime.c +++ b/drivers/firmware/efi/arm-runtime.c @@ -110,7 +110,7 @@ static int __init arm_enable_runtime_services(void) { u64 mapsize; - if (!efi_enabled(EFI_BOOT) || !efi_enabled(EFI_MEMMAP)) { + if (!efi_enabled(EFI_BOOT)) { pr_info("EFI services will not be available.\n"); return 0; } diff --git a/drivers/firmware/efi/memmap.c b/drivers/firmware/efi/memmap.c index fa2904fb841f..38b686c67b17 100644 --- a/drivers/firmware/efi/memmap.c +++ b/drivers/firmware/efi/memmap.c @@ -118,6 +118,9 @@ int __init efi_memmap_init_early(struct efi_memory_map_data *data) void __init efi_memmap_unmap(void) { + if (!efi_enabled(EFI_MEMMAP)) + return; + if (!efi.memmap.late) { unsigned long size; -- cgit v1.2.3-59-g8ed1b From 72a58a63a164b4e9d2d914e65caeb551846883f1 Mon Sep 17 00:00:00 2001 From: Ard Biesheuvel Date: Wed, 14 Nov 2018 09:55:42 -0800 Subject: efi/arm/libstub: Pack FDT after populating it Commit: 24d7c494ce46 ("efi/arm-stub: Round up FDT allocation to mapping size") increased the allocation size for the FDT image created by the stub to a fixed value of 2 MB, to simplify the former code that made several attempts with increasing values for the size. This is reasonable given that the allocation is of type EFI_LOADER_DATA, which is released to the kernel unless it is explicitly memblock_reserve()d by the early boot code. However, this allocation size leaked into the 'size' field of the FDT header metadata, and so the entire allocation remains occupied by the device tree binary, even if most of it is not used to store device tree information. So call fdt_pack() to shrink the FDT data structure to its minimum size after populating all the fields, so that the remaining memory is no longer wasted. Signed-off-by: Ard Biesheuvel Cc: # v4.12+ Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: linux-efi@vger.kernel.org Fixes: 24d7c494ce46 ("efi/arm-stub: Round up FDT allocation to mapping size") Link: http://lkml.kernel.org/r/20181114175544.12860-4-ard.biesheuvel@linaro.org Signed-off-by: Ingo Molnar --- drivers/firmware/efi/libstub/fdt.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/drivers/firmware/efi/libstub/fdt.c b/drivers/firmware/efi/libstub/fdt.c index 8830fa601e45..0c0d2312f4a8 100644 --- a/drivers/firmware/efi/libstub/fdt.c +++ b/drivers/firmware/efi/libstub/fdt.c @@ -158,6 +158,10 @@ static efi_status_t update_fdt(efi_system_table_t *sys_table, void *orig_fdt, return efi_status; } } + + /* shrink the FDT back to its minimum size */ + fdt_pack(fdt); + return EFI_SUCCESS; fdt_set_fail: -- cgit v1.2.3-59-g8ed1b From eff896288872d687d9662000ec9ae11b6d61766f Mon Sep 17 00:00:00 2001 From: Ard Biesheuvel Date: Wed, 14 Nov 2018 09:55:43 -0800 Subject: efi/arm: Defer persistent reservations until after paging_init() The new memory EFI reservation feature we introduced to allow memory reservations to persist across kexec may trigger an unbounded number of calls to memblock_reserve(). The memblock subsystem can deal with this fine, but not before memblock resizing is enabled, which we can only do after paging_init(), when the memory we reallocate the array into is actually mapped. So break out the memreserve table processing into a separate routine and call it after paging_init() on arm64. On ARM, because of limited reviewing bandwidth of the maintainer, we cannot currently fix this, so instead, disable the EFI persistent memreserve entirely on ARM so we can fix it later. Tested-by: Marc Zyngier Signed-off-by: Ard Biesheuvel Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: linux-efi@vger.kernel.org Link: http://lkml.kernel.org/r/20181114175544.12860-5-ard.biesheuvel@linaro.org Signed-off-by: Ingo Molnar --- arch/arm64/kernel/setup.c | 1 + drivers/firmware/efi/efi.c | 4 ++++ drivers/firmware/efi/libstub/arm-stub.c | 3 +++ include/linux/efi.h | 7 +++++++ 4 files changed, 15 insertions(+) diff --git a/arch/arm64/kernel/setup.c b/arch/arm64/kernel/setup.c index 953e316521fc..f4fc1e0544b7 100644 --- a/arch/arm64/kernel/setup.c +++ b/arch/arm64/kernel/setup.c @@ -313,6 +313,7 @@ void __init setup_arch(char **cmdline_p) arm64_memblock_init(); paging_init(); + efi_apply_persistent_mem_reservations(); acpi_table_upgrade(); diff --git a/drivers/firmware/efi/efi.c b/drivers/firmware/efi/efi.c index 249eb70691b0..72a4da76d274 100644 --- a/drivers/firmware/efi/efi.c +++ b/drivers/firmware/efi/efi.c @@ -592,7 +592,11 @@ int __init efi_config_parse_tables(void *config_tables, int count, int sz, early_memunmap(tbl, sizeof(*tbl)); } + return 0; +} +int __init efi_apply_persistent_mem_reservations(void) +{ if (efi.mem_reserve != EFI_INVALID_TABLE_ADDR) { unsigned long prsv = efi.mem_reserve; diff --git a/drivers/firmware/efi/libstub/arm-stub.c b/drivers/firmware/efi/libstub/arm-stub.c index 30ac0c975f8a..3d36142cf812 100644 --- a/drivers/firmware/efi/libstub/arm-stub.c +++ b/drivers/firmware/efi/libstub/arm-stub.c @@ -75,6 +75,9 @@ void install_memreserve_table(efi_system_table_t *sys_table_arg) efi_guid_t memreserve_table_guid = LINUX_EFI_MEMRESERVE_TABLE_GUID; efi_status_t status; + if (IS_ENABLED(CONFIG_ARM)) + return; + status = efi_call_early(allocate_pool, EFI_LOADER_DATA, sizeof(*rsv), (void **)&rsv); if (status != EFI_SUCCESS) { diff --git a/include/linux/efi.h b/include/linux/efi.h index 845174e113ce..100ce4a4aff6 100644 --- a/include/linux/efi.h +++ b/include/linux/efi.h @@ -1167,6 +1167,8 @@ static inline bool efi_enabled(int feature) extern void efi_reboot(enum reboot_mode reboot_mode, const char *__unused); extern bool efi_is_table_address(unsigned long phys_addr); + +extern int efi_apply_persistent_mem_reservations(void); #else static inline bool efi_enabled(int feature) { @@ -1185,6 +1187,11 @@ static inline bool efi_is_table_address(unsigned long phys_addr) { return false; } + +static inline int efi_apply_persistent_mem_reservations(void) +{ + return 0; +} #endif extern int efi_status_to_err(efi_status_t status); -- cgit v1.2.3-59-g8ed1b From 63eb322d89c8505af9b4a3d703e85e42281ebaa0 Mon Sep 17 00:00:00 2001 From: Ard Biesheuvel Date: Wed, 14 Nov 2018 09:55:44 -0800 Subject: efi: Permit calling efi_mem_reserve_persistent() from atomic context Currently, efi_mem_reserve_persistent() may not be called from atomic context, since both the kmalloc() call and the memremap() call may sleep. The kmalloc() call is easy enough to fix, but the memremap() call needs to be moved into an init hook since we cannot control the memory allocation behavior of memremap() at the call site. Signed-off-by: Ard Biesheuvel Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: linux-efi@vger.kernel.org Link: http://lkml.kernel.org/r/20181114175544.12860-6-ard.biesheuvel@linaro.org Signed-off-by: Ingo Molnar --- drivers/firmware/efi/efi.c | 31 +++++++++++++++++++------------ 1 file changed, 19 insertions(+), 12 deletions(-) diff --git a/drivers/firmware/efi/efi.c b/drivers/firmware/efi/efi.c index 72a4da76d274..fad7c62cfc0e 100644 --- a/drivers/firmware/efi/efi.c +++ b/drivers/firmware/efi/efi.c @@ -967,36 +967,43 @@ bool efi_is_table_address(unsigned long phys_addr) } static DEFINE_SPINLOCK(efi_mem_reserve_persistent_lock); +static struct linux_efi_memreserve *efi_memreserve_root __ro_after_init; int efi_mem_reserve_persistent(phys_addr_t addr, u64 size) { - struct linux_efi_memreserve *rsv, *parent; + struct linux_efi_memreserve *rsv; - if (efi.mem_reserve == EFI_INVALID_TABLE_ADDR) + if (!efi_memreserve_root) return -ENODEV; - rsv = kmalloc(sizeof(*rsv), GFP_KERNEL); + rsv = kmalloc(sizeof(*rsv), GFP_ATOMIC); if (!rsv) return -ENOMEM; - parent = memremap(efi.mem_reserve, sizeof(*rsv), MEMREMAP_WB); - if (!parent) { - kfree(rsv); - return -ENOMEM; - } - rsv->base = addr; rsv->size = size; spin_lock(&efi_mem_reserve_persistent_lock); - rsv->next = parent->next; - parent->next = __pa(rsv); + rsv->next = efi_memreserve_root->next; + efi_memreserve_root->next = __pa(rsv); spin_unlock(&efi_mem_reserve_persistent_lock); - memunmap(parent); + return 0; +} +static int __init efi_memreserve_root_init(void) +{ + if (efi.mem_reserve == EFI_INVALID_TABLE_ADDR) + return -ENODEV; + + efi_memreserve_root = memremap(efi.mem_reserve, + sizeof(*efi_memreserve_root), + MEMREMAP_WB); + if (!efi_memreserve_root) + return -ENOMEM; return 0; } +early_initcall(efi_memreserve_root_init); #ifdef CONFIG_KEXEC static int update_efi_random_seed(struct notifier_block *nb, -- cgit v1.2.3-59-g8ed1b From 2a2777990a342b05f611e78822fc4fa2d9789ade Mon Sep 17 00:00:00 2001 From: Maarten Lankhorst Date: Wed, 14 Nov 2018 13:49:23 +0200 Subject: drm/i915: Move programming plane scaler to its own function. This cleans the code up slightly, and will make other changes easier. Signed-off-by: Maarten Lankhorst Reviewed-by: Matt Roper Link: https://patchwork.freedesktop.org/patch/msgid/20180920102711.4184-8-maarten.lankhorst@linux.intel.com (cherry picked from commit ab5c60bf76755d24ae8de5c1c6ac594934656ace) Signed-off-by: Joonas Lahtinen --- drivers/gpu/drm/i915/intel_sprite.c | 90 +++++++++++++++++++++---------------- 1 file changed, 52 insertions(+), 38 deletions(-) diff --git a/drivers/gpu/drm/i915/intel_sprite.c b/drivers/gpu/drm/i915/intel_sprite.c index 5fd2f7bf3927..873adcc20109 100644 --- a/drivers/gpu/drm/i915/intel_sprite.c +++ b/drivers/gpu/drm/i915/intel_sprite.c @@ -302,13 +302,63 @@ skl_plane_max_stride(struct intel_plane *plane, return min(8192 * cpp, 32768); } +static void +skl_program_scaler(struct drm_i915_private *dev_priv, + struct intel_plane *plane, + const struct intel_crtc_state *crtc_state, + const struct intel_plane_state *plane_state) +{ + enum plane_id plane_id = plane->id; + enum pipe pipe = plane->pipe; + int scaler_id = plane_state->scaler_id; + const struct intel_scaler *scaler = + &crtc_state->scaler_state.scalers[scaler_id]; + int crtc_x = plane_state->base.dst.x1; + int crtc_y = plane_state->base.dst.y1; + uint32_t crtc_w = drm_rect_width(&plane_state->base.dst); + uint32_t crtc_h = drm_rect_height(&plane_state->base.dst); + u16 y_hphase, uv_rgb_hphase; + u16 y_vphase, uv_rgb_vphase; + + /* Sizes are 0 based */ + crtc_w--; + crtc_h--; + + /* TODO: handle sub-pixel coordinates */ + if (plane_state->base.fb->format->format == DRM_FORMAT_NV12) { + y_hphase = skl_scaler_calc_phase(1, false); + y_vphase = skl_scaler_calc_phase(1, false); + + /* MPEG2 chroma siting convention */ + uv_rgb_hphase = skl_scaler_calc_phase(2, true); + uv_rgb_vphase = skl_scaler_calc_phase(2, false); + } else { + /* not used */ + y_hphase = 0; + y_vphase = 0; + + uv_rgb_hphase = skl_scaler_calc_phase(1, false); + uv_rgb_vphase = skl_scaler_calc_phase(1, false); + } + + I915_WRITE_FW(SKL_PS_CTRL(pipe, scaler_id), + PS_SCALER_EN | PS_PLANE_SEL(plane_id) | scaler->mode); + I915_WRITE_FW(SKL_PS_PWR_GATE(pipe, scaler_id), 0); + I915_WRITE_FW(SKL_PS_VPHASE(pipe, scaler_id), + PS_Y_PHASE(y_vphase) | PS_UV_RGB_PHASE(uv_rgb_vphase)); + I915_WRITE_FW(SKL_PS_HPHASE(pipe, scaler_id), + PS_Y_PHASE(y_hphase) | PS_UV_RGB_PHASE(uv_rgb_hphase)); + I915_WRITE_FW(SKL_PS_WIN_POS(pipe, scaler_id), (crtc_x << 16) | crtc_y); + I915_WRITE_FW(SKL_PS_WIN_SZ(pipe, scaler_id), + ((crtc_w + 1) << 16)|(crtc_h + 1)); +} + void skl_update_plane(struct intel_plane *plane, const struct intel_crtc_state *crtc_state, const struct intel_plane_state *plane_state) { struct drm_i915_private *dev_priv = to_i915(plane->base.dev); - const struct drm_framebuffer *fb = plane_state->base.fb; enum plane_id plane_id = plane->id; enum pipe pipe = plane->pipe; u32 plane_ctl = plane_state->ctl; @@ -318,8 +368,6 @@ skl_update_plane(struct intel_plane *plane, u32 aux_stride = skl_plane_stride(plane_state, 1); int crtc_x = plane_state->base.dst.x1; int crtc_y = plane_state->base.dst.y1; - uint32_t crtc_w = drm_rect_width(&plane_state->base.dst); - uint32_t crtc_h = drm_rect_height(&plane_state->base.dst); uint32_t x = plane_state->color_plane[0].x; uint32_t y = plane_state->color_plane[0].y; uint32_t src_w = drm_rect_width(&plane_state->base.src) >> 16; @@ -329,8 +377,6 @@ skl_update_plane(struct intel_plane *plane, /* Sizes are 0 based */ src_w--; src_h--; - crtc_w--; - crtc_h--; spin_lock_irqsave(&dev_priv->uncore.lock, irqflags); @@ -355,39 +401,7 @@ skl_update_plane(struct intel_plane *plane, /* program plane scaler */ if (plane_state->scaler_id >= 0) { - int scaler_id = plane_state->scaler_id; - const struct intel_scaler *scaler = - &crtc_state->scaler_state.scalers[scaler_id]; - u16 y_hphase, uv_rgb_hphase; - u16 y_vphase, uv_rgb_vphase; - - /* TODO: handle sub-pixel coordinates */ - if (fb->format->format == DRM_FORMAT_NV12) { - y_hphase = skl_scaler_calc_phase(1, false); - y_vphase = skl_scaler_calc_phase(1, false); - - /* MPEG2 chroma siting convention */ - uv_rgb_hphase = skl_scaler_calc_phase(2, true); - uv_rgb_vphase = skl_scaler_calc_phase(2, false); - } else { - /* not used */ - y_hphase = 0; - y_vphase = 0; - - uv_rgb_hphase = skl_scaler_calc_phase(1, false); - uv_rgb_vphase = skl_scaler_calc_phase(1, false); - } - - I915_WRITE_FW(SKL_PS_CTRL(pipe, scaler_id), - PS_SCALER_EN | PS_PLANE_SEL(plane_id) | scaler->mode); - I915_WRITE_FW(SKL_PS_PWR_GATE(pipe, scaler_id), 0); - I915_WRITE_FW(SKL_PS_VPHASE(pipe, scaler_id), - PS_Y_PHASE(y_vphase) | PS_UV_RGB_PHASE(uv_rgb_vphase)); - I915_WRITE_FW(SKL_PS_HPHASE(pipe, scaler_id), - PS_Y_PHASE(y_hphase) | PS_UV_RGB_PHASE(uv_rgb_hphase)); - I915_WRITE_FW(SKL_PS_WIN_POS(pipe, scaler_id), (crtc_x << 16) | crtc_y); - I915_WRITE_FW(SKL_PS_WIN_SZ(pipe, scaler_id), - ((crtc_w + 1) << 16)|(crtc_h + 1)); + skl_program_scaler(dev_priv, plane, crtc_state, plane_state); I915_WRITE_FW(PLANE_POS(pipe, plane_id), 0); } else { -- cgit v1.2.3-59-g8ed1b From 27971d613fcb5b6ad96320bc4f2c90f4d4fde768 Mon Sep 17 00:00:00 2001 From: Ville Syrjälä Date: Wed, 14 Nov 2018 13:49:24 +0200 Subject: drm/i915: Clean up skl_program_scaler() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Remove the "sizes are 0 based" stuff that is not even true for the scaler. v2: Rebase Signed-off-by: Ville Syrjälä Link: https://patchwork.freedesktop.org/patch/msgid/20181101151736.20522-1-ville.syrjala@linux.intel.com Reviewed-by: Rodrigo Vivi (cherry picked from commit d0105af939769393d6447a04cee2d1ae12e3f09a) Signed-off-by: Joonas Lahtinen --- drivers/gpu/drm/i915/intel_sprite.c | 17 +++++------------ 1 file changed, 5 insertions(+), 12 deletions(-) diff --git a/drivers/gpu/drm/i915/intel_sprite.c b/drivers/gpu/drm/i915/intel_sprite.c index 873adcc20109..fa7eaace5f92 100644 --- a/drivers/gpu/drm/i915/intel_sprite.c +++ b/drivers/gpu/drm/i915/intel_sprite.c @@ -303,12 +303,11 @@ skl_plane_max_stride(struct intel_plane *plane, } static void -skl_program_scaler(struct drm_i915_private *dev_priv, - struct intel_plane *plane, +skl_program_scaler(struct intel_plane *plane, const struct intel_crtc_state *crtc_state, const struct intel_plane_state *plane_state) { - enum plane_id plane_id = plane->id; + struct drm_i915_private *dev_priv = to_i915(plane->base.dev); enum pipe pipe = plane->pipe; int scaler_id = plane_state->scaler_id; const struct intel_scaler *scaler = @@ -320,10 +319,6 @@ skl_program_scaler(struct drm_i915_private *dev_priv, u16 y_hphase, uv_rgb_hphase; u16 y_vphase, uv_rgb_vphase; - /* Sizes are 0 based */ - crtc_w--; - crtc_h--; - /* TODO: handle sub-pixel coordinates */ if (plane_state->base.fb->format->format == DRM_FORMAT_NV12) { y_hphase = skl_scaler_calc_phase(1, false); @@ -342,15 +337,14 @@ skl_program_scaler(struct drm_i915_private *dev_priv, } I915_WRITE_FW(SKL_PS_CTRL(pipe, scaler_id), - PS_SCALER_EN | PS_PLANE_SEL(plane_id) | scaler->mode); + PS_SCALER_EN | PS_PLANE_SEL(plane->id) | scaler->mode); I915_WRITE_FW(SKL_PS_PWR_GATE(pipe, scaler_id), 0); I915_WRITE_FW(SKL_PS_VPHASE(pipe, scaler_id), PS_Y_PHASE(y_vphase) | PS_UV_RGB_PHASE(uv_rgb_vphase)); I915_WRITE_FW(SKL_PS_HPHASE(pipe, scaler_id), PS_Y_PHASE(y_hphase) | PS_UV_RGB_PHASE(uv_rgb_hphase)); I915_WRITE_FW(SKL_PS_WIN_POS(pipe, scaler_id), (crtc_x << 16) | crtc_y); - I915_WRITE_FW(SKL_PS_WIN_SZ(pipe, scaler_id), - ((crtc_w + 1) << 16)|(crtc_h + 1)); + I915_WRITE_FW(SKL_PS_WIN_SZ(pipe, scaler_id), (crtc_w << 16) | crtc_h); } void @@ -399,9 +393,8 @@ skl_update_plane(struct intel_plane *plane, (plane_state->color_plane[1].y << 16) | plane_state->color_plane[1].x); - /* program plane scaler */ if (plane_state->scaler_id >= 0) { - skl_program_scaler(dev_priv, plane, crtc_state, plane_state); + skl_program_scaler(plane, crtc_state, plane_state); I915_WRITE_FW(PLANE_POS(pipe, plane_id), 0); } else { -- cgit v1.2.3-59-g8ed1b From 6e8adf6f4a4fa57dd3bef6b70de96e2b7b311204 Mon Sep 17 00:00:00 2001 From: Ville Syrjälä Date: Wed, 14 Nov 2018 15:32:55 +0200 Subject: drm/i915: Account for scale factor when calculating initial phase MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit To get the initial phase correct we need to account for the scale factor as well. I forgot this initially and was mostly looking at heavily upscaled content where the minor difference between -0.5 and the proper initial phase was not readily apparent. And let's toss in a comment that tries to explain the formula a little bit. v2: The initial phase upper limit is 1.5, not 24.0! Cc: Maarten Lankhorst Fixes: 0a59952b24e2 ("drm/i915: Configure SKL+ scaler initial phase correctly") Signed-off-by: Ville Syrjälä Link: https://patchwork.freedesktop.org/patch/msgid/20181029181820.21956-1-ville.syrjala@linux.intel.com Tested-by: Juha-Pekka Heikkila Tested-by: Maarten Lankhorst #irc Reviewed-by: Maarten Lankhorst #irc (cherry picked from commit e7a278a329dd8aa2c70c564849f164cb5673689c) Signed-off-by: Joonas Lahtinen --- drivers/gpu/drm/i915/intel_display.c | 45 +++++++++++++++++++++++++++++++++--- drivers/gpu/drm/i915/intel_drv.h | 2 +- drivers/gpu/drm/i915/intel_sprite.c | 20 +++++++++++----- 3 files changed, 57 insertions(+), 10 deletions(-) diff --git a/drivers/gpu/drm/i915/intel_display.c b/drivers/gpu/drm/i915/intel_display.c index 23d8008a93bb..a54843fdeb2f 100644 --- a/drivers/gpu/drm/i915/intel_display.c +++ b/drivers/gpu/drm/i915/intel_display.c @@ -4850,8 +4850,31 @@ static void cpt_verify_modeset(struct drm_device *dev, int pipe) * chroma samples for both of the luma samples, and thus we don't * actually get the expected MPEG2 chroma siting convention :( * The same behaviour is observed on pre-SKL platforms as well. + * + * Theory behind the formula (note that we ignore sub-pixel + * source coordinates): + * s = source sample position + * d = destination sample position + * + * Downscaling 4:1: + * -0.5 + * | 0.0 + * | | 1.5 (initial phase) + * | | | + * v v v + * | s | s | s | s | + * | d | + * + * Upscaling 1:4: + * -0.5 + * | -0.375 (initial phase) + * | | 0.0 + * | | | + * v v v + * | s | + * | d | d | d | d | */ -u16 skl_scaler_calc_phase(int sub, bool chroma_cosited) +u16 skl_scaler_calc_phase(int sub, int scale, bool chroma_cosited) { int phase = -0x8000; u16 trip = 0; @@ -4859,6 +4882,15 @@ u16 skl_scaler_calc_phase(int sub, bool chroma_cosited) if (chroma_cosited) phase += (sub - 1) * 0x8000 / sub; + phase += scale / (2 * sub); + + /* + * Hardware initial phase limited to [-0.5:1.5]. + * Since the max hardware scale factor is 3.0, we + * should never actually excdeed 1.0 here. + */ + WARN_ON(phase < -0x8000 || phase > 0x18000); + if (phase < 0) phase = 0x10000 + phase; else @@ -5067,13 +5099,20 @@ static void skylake_pfit_enable(struct intel_crtc *crtc) if (crtc->config->pch_pfit.enabled) { u16 uv_rgb_hphase, uv_rgb_vphase; + int pfit_w, pfit_h, hscale, vscale; int id; if (WARN_ON(crtc->config->scaler_state.scaler_id < 0)) return; - uv_rgb_hphase = skl_scaler_calc_phase(1, false); - uv_rgb_vphase = skl_scaler_calc_phase(1, false); + pfit_w = (crtc->config->pch_pfit.size >> 16) & 0xFFFF; + pfit_h = crtc->config->pch_pfit.size & 0xFFFF; + + hscale = (crtc->config->pipe_src_w << 16) / pfit_w; + vscale = (crtc->config->pipe_src_h << 16) / pfit_h; + + uv_rgb_hphase = skl_scaler_calc_phase(1, hscale, false); + uv_rgb_vphase = skl_scaler_calc_phase(1, vscale, false); id = scaler_state->scaler_id; I915_WRITE(SKL_PS_CTRL(pipe, id), PS_SCALER_EN | diff --git a/drivers/gpu/drm/i915/intel_drv.h b/drivers/gpu/drm/i915/intel_drv.h index f8dc84b2d2d3..8b298e5f012d 100644 --- a/drivers/gpu/drm/i915/intel_drv.h +++ b/drivers/gpu/drm/i915/intel_drv.h @@ -1646,7 +1646,7 @@ void intel_mode_from_pipe_config(struct drm_display_mode *mode, void intel_crtc_arm_fifo_underrun(struct intel_crtc *crtc, struct intel_crtc_state *crtc_state); -u16 skl_scaler_calc_phase(int sub, bool chroma_center); +u16 skl_scaler_calc_phase(int sub, int scale, bool chroma_center); int skl_update_scaler_crtc(struct intel_crtc_state *crtc_state); int skl_max_scale(const struct intel_crtc_state *crtc_state, u32 pixel_format); diff --git a/drivers/gpu/drm/i915/intel_sprite.c b/drivers/gpu/drm/i915/intel_sprite.c index fa7eaace5f92..d3090a7537bb 100644 --- a/drivers/gpu/drm/i915/intel_sprite.c +++ b/drivers/gpu/drm/i915/intel_sprite.c @@ -318,22 +318,30 @@ skl_program_scaler(struct intel_plane *plane, uint32_t crtc_h = drm_rect_height(&plane_state->base.dst); u16 y_hphase, uv_rgb_hphase; u16 y_vphase, uv_rgb_vphase; + int hscale, vscale; + + hscale = drm_rect_calc_hscale(&plane_state->base.src, + &plane_state->base.dst, + 0, INT_MAX); + vscale = drm_rect_calc_vscale(&plane_state->base.src, + &plane_state->base.dst, + 0, INT_MAX); /* TODO: handle sub-pixel coordinates */ if (plane_state->base.fb->format->format == DRM_FORMAT_NV12) { - y_hphase = skl_scaler_calc_phase(1, false); - y_vphase = skl_scaler_calc_phase(1, false); + y_hphase = skl_scaler_calc_phase(1, hscale, false); + y_vphase = skl_scaler_calc_phase(1, vscale, false); /* MPEG2 chroma siting convention */ - uv_rgb_hphase = skl_scaler_calc_phase(2, true); - uv_rgb_vphase = skl_scaler_calc_phase(2, false); + uv_rgb_hphase = skl_scaler_calc_phase(2, hscale, true); + uv_rgb_vphase = skl_scaler_calc_phase(2, vscale, false); } else { /* not used */ y_hphase = 0; y_vphase = 0; - uv_rgb_hphase = skl_scaler_calc_phase(1, false); - uv_rgb_vphase = skl_scaler_calc_phase(1, false); + uv_rgb_hphase = skl_scaler_calc_phase(1, hscale, false); + uv_rgb_vphase = skl_scaler_calc_phase(1, vscale, false); } I915_WRITE_FW(SKL_PS_CTRL(pipe, scaler_id), -- cgit v1.2.3-59-g8ed1b From b2fed34a628df6118b5d4e13f49a33e15f704fa9 Mon Sep 17 00:00:00 2001 From: Gustavo Romero Date: Wed, 14 Nov 2018 21:33:30 -0500 Subject: selftests/powerpc: Adjust wild_bctr to build with old binutils Currently the selftest wild_bctr can fail to build when an old gcc is used, notably on gcc using a binutils version <= 2.27, because the assembler does not support the integer suffix UL. This patch adjusts the wild_bctr test so the REG_POISON value is still treated as an unsigned long for the shifts on compilation but the UL suffix is absent on the stringification, so the inline asm code generated has no UL suffixes. Signed-off-by: Gustavo Romero [mpe: Wrap long line] Signed-off-by: Michael Ellerman --- tools/testing/selftests/powerpc/mm/wild_bctr.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/tools/testing/selftests/powerpc/mm/wild_bctr.c b/tools/testing/selftests/powerpc/mm/wild_bctr.c index 90469a9e49d4..f2fa101c5a6a 100644 --- a/tools/testing/selftests/powerpc/mm/wild_bctr.c +++ b/tools/testing/selftests/powerpc/mm/wild_bctr.c @@ -47,8 +47,9 @@ static int ok(void) return 0; } -#define REG_POISON 0x5a5aUL -#define POISONED_REG(n) ((REG_POISON << 48) | ((n) << 32) | (REG_POISON << 16) | (n)) +#define REG_POISON 0x5a5a +#define POISONED_REG(n) ((((unsigned long)REG_POISON) << 48) | ((n) << 32) | \ + (((unsigned long)REG_POISON) << 16) | (n)) static inline void poison_regs(void) { -- cgit v1.2.3-59-g8ed1b From 6ba990384e924476b5eed1734f3bcca0df6fd77e Mon Sep 17 00:00:00 2001 From: Michael Chan Date: Thu, 15 Nov 2018 03:25:37 -0500 Subject: bnxt_en: Fix RSS context allocation. Recent commit has added the reservation of RSS context. This requires bnxt_hwrm_vnic_qcaps() to be called before allocating any RSS contexts. The bnxt_hwrm_vnic_qcaps() call sets up proper flags that will determine how many RSS contexts to allocate to support NTUPLE. This causes a regression that too many RSS contexts are being reserved and causing resource shortage when enabling many VFs. Fix it by calling bnxt_hwrm_vnic_qcaps() earlier. Fixes: 41e8d7983752 ("bnxt_en: Modify the ring reservation functions for 57500 series chips.") Signed-off-by: Michael Chan Signed-off-by: David S. Miller --- drivers/net/ethernet/broadcom/bnxt/bnxt.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt.c b/drivers/net/ethernet/broadcom/bnxt/bnxt.c index dd85d790f638..4a45a2b809ea 100644 --- a/drivers/net/ethernet/broadcom/bnxt/bnxt.c +++ b/drivers/net/ethernet/broadcom/bnxt/bnxt.c @@ -10087,6 +10087,7 @@ static int bnxt_init_one(struct pci_dev *pdev, const struct pci_device_id *ent) } bnxt_hwrm_func_qcfg(bp); + bnxt_hwrm_vnic_qcaps(bp); bnxt_hwrm_port_led_qcaps(bp); bnxt_ethtool_init(bp); bnxt_dcb_init(bp); @@ -10120,7 +10121,6 @@ static int bnxt_init_one(struct pci_dev *pdev, const struct pci_device_id *ent) VNIC_RSS_CFG_REQ_HASH_TYPE_UDP_IPV6; } - bnxt_hwrm_vnic_qcaps(bp); if (bnxt_rfs_supported(bp)) { dev->hw_features |= NETIF_F_NTUPLE; if (bnxt_rfs_capable(bp)) { -- cgit v1.2.3-59-g8ed1b From d19819297d9284bd990e22116b8b43d0abcbf488 Mon Sep 17 00:00:00 2001 From: Michael Chan Date: Thu, 15 Nov 2018 03:25:38 -0500 Subject: bnxt_en: Fix rx_l4_csum_errors counter on 57500 devices. The software counter structure is defined in both the CP ring's structure and the NQ ring's structure on the new devices. The legacy code adds the counter to the CP ring's structure and the counter won't get displayed since the ethtool code is looking at the NQ ring's structure. Since all other counters are contained in the NQ ring's structure, it makes more sense to count rx_l4_csum_errors in the NQ. Fixes: 50e3ab7836b5 ("bnxt_en: Allocate completion ring structures for 57500 series chips.") Signed-off-by: Michael Chan Signed-off-by: David S. Miller --- drivers/net/ethernet/broadcom/bnxt/bnxt.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt.c b/drivers/net/ethernet/broadcom/bnxt/bnxt.c index 4a45a2b809ea..585609990eee 100644 --- a/drivers/net/ethernet/broadcom/bnxt/bnxt.c +++ b/drivers/net/ethernet/broadcom/bnxt/bnxt.c @@ -1675,7 +1675,7 @@ static int bnxt_rx_pkt(struct bnxt *bp, struct bnxt_cp_ring_info *cpr, } else { if (rxcmp1->rx_cmp_cfa_code_errors_v2 & RX_CMP_L4_CS_ERR_BITS) { if (dev->features & NETIF_F_RXCSUM) - cpr->rx_l4_csum_errors++; + bnapi->cp_ring.rx_l4_csum_errors++; } } -- cgit v1.2.3-59-g8ed1b From addd4df6d763556e16d5316e4e8cd441050cc2af Mon Sep 17 00:00:00 2001 From: Michael Chan Date: Thu, 15 Nov 2018 03:25:39 -0500 Subject: bnxt_en: Disable RDMA support on the 57500 chips. There is no RDMA support on 57500 chips yet, so prevent bnxt_re from registering on these chips. There is intermittent failure if bnxt_re is allowed to register and proceed with RDMA operations. Fixes: 1ab968d2f1d6 ("bnxt_en: Add PCI ID for BCM57508 device.") Signed-off-by: Michael Chan Signed-off-by: David S. Miller --- drivers/net/ethernet/broadcom/bnxt/bnxt_ulp.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt_ulp.c b/drivers/net/ethernet/broadcom/bnxt/bnxt_ulp.c index beee61292d5e..b59b382d34f9 100644 --- a/drivers/net/ethernet/broadcom/bnxt/bnxt_ulp.c +++ b/drivers/net/ethernet/broadcom/bnxt/bnxt_ulp.c @@ -43,6 +43,9 @@ static int bnxt_register_dev(struct bnxt_en_dev *edev, int ulp_id, if (ulp_id == BNXT_ROCE_ULP) { unsigned int max_stat_ctxs; + if (bp->flags & BNXT_FLAG_CHIP_P5) + return -EOPNOTSUPP; + max_stat_ctxs = bnxt_get_max_func_stat_ctxs(bp); if (max_stat_ctxs <= BNXT_MIN_ROCE_STAT_CTXS || bp->num_stat_ctxs == max_stat_ctxs) -- cgit v1.2.3-59-g8ed1b From ffd77621700ec3adcf859681e24910c38e0931f5 Mon Sep 17 00:00:00 2001 From: Michael Chan Date: Thu, 15 Nov 2018 03:25:40 -0500 Subject: bnxt_en: Workaround occasional TX timeout on 57500 A0. Hardware can sometimes not generate NQ MSIX with a single pending CP ring entry. This seems to always happen at the last entry of the CP ring before it wraps. Add logic to check all the CP rings for pending entries without the CP ring consumer index advancing. Calling HWRM_DBG_RING_INFO_GET to read the context of the CP ring will flush out the NQ entry and MSIX. Signed-off-by: Michael Chan Signed-off-by: David S. Miller --- drivers/net/ethernet/broadcom/bnxt/bnxt.c | 65 +++++++++++++++++++++++++++++++ drivers/net/ethernet/broadcom/bnxt/bnxt.h | 3 ++ 2 files changed, 68 insertions(+) diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt.c b/drivers/net/ethernet/broadcom/bnxt/bnxt.c index 585609990eee..5d4147a75cad 100644 --- a/drivers/net/ethernet/broadcom/bnxt/bnxt.c +++ b/drivers/net/ethernet/broadcom/bnxt/bnxt.c @@ -8714,6 +8714,26 @@ static int bnxt_set_features(struct net_device *dev, netdev_features_t features) return rc; } +static int bnxt_dbg_hwrm_ring_info_get(struct bnxt *bp, u8 ring_type, + u32 ring_id, u32 *prod, u32 *cons) +{ + struct hwrm_dbg_ring_info_get_output *resp = bp->hwrm_cmd_resp_addr; + struct hwrm_dbg_ring_info_get_input req = {0}; + int rc; + + bnxt_hwrm_cmd_hdr_init(bp, &req, HWRM_DBG_RING_INFO_GET, -1, -1); + req.ring_type = ring_type; + req.fw_ring_id = cpu_to_le32(ring_id); + mutex_lock(&bp->hwrm_cmd_lock); + rc = _hwrm_send_message(bp, &req, sizeof(req), HWRM_CMD_TIMEOUT); + if (!rc) { + *prod = le32_to_cpu(resp->producer_index); + *cons = le32_to_cpu(resp->consumer_index); + } + mutex_unlock(&bp->hwrm_cmd_lock); + return rc; +} + static void bnxt_dump_tx_sw_state(struct bnxt_napi *bnapi) { struct bnxt_tx_ring_info *txr = bnapi->tx_ring; @@ -8821,6 +8841,11 @@ static void bnxt_timer(struct timer_list *t) bnxt_queue_sp_work(bp); } } + + if ((bp->flags & BNXT_FLAG_CHIP_P5) && netif_carrier_ok(dev)) { + set_bit(BNXT_RING_COAL_NOW_SP_EVENT, &bp->sp_event); + bnxt_queue_sp_work(bp); + } bnxt_restart_timer: mod_timer(&bp->timer, jiffies + bp->current_interval); } @@ -8851,6 +8876,43 @@ static void bnxt_reset(struct bnxt *bp, bool silent) bnxt_rtnl_unlock_sp(bp); } +static void bnxt_chk_missed_irq(struct bnxt *bp) +{ + int i; + + if (!(bp->flags & BNXT_FLAG_CHIP_P5)) + return; + + for (i = 0; i < bp->cp_nr_rings; i++) { + struct bnxt_napi *bnapi = bp->bnapi[i]; + struct bnxt_cp_ring_info *cpr; + u32 fw_ring_id; + int j; + + if (!bnapi) + continue; + + cpr = &bnapi->cp_ring; + for (j = 0; j < 2; j++) { + struct bnxt_cp_ring_info *cpr2 = cpr->cp_ring_arr[j]; + u32 val[2]; + + if (!cpr2 || cpr2->has_more_work || + !bnxt_has_work(bp, cpr2)) + continue; + + if (cpr2->cp_raw_cons != cpr2->last_cp_raw_cons) { + cpr2->last_cp_raw_cons = cpr2->cp_raw_cons; + continue; + } + fw_ring_id = cpr2->cp_ring_struct.fw_ring_id; + bnxt_dbg_hwrm_ring_info_get(bp, + DBG_RING_INFO_GET_REQ_RING_TYPE_L2_CMPL, + fw_ring_id, &val[0], &val[1]); + } + } +} + static void bnxt_cfg_ntp_filters(struct bnxt *); static void bnxt_sp_task(struct work_struct *work) @@ -8930,6 +8992,9 @@ static void bnxt_sp_task(struct work_struct *work) if (test_and_clear_bit(BNXT_FLOW_STATS_SP_EVENT, &bp->sp_event)) bnxt_tc_flow_stats_work(bp); + if (test_and_clear_bit(BNXT_RING_COAL_NOW_SP_EVENT, &bp->sp_event)) + bnxt_chk_missed_irq(bp); + /* These functions below will clear BNXT_STATE_IN_SP_TASK. They * must be the last functions to be called before exiting. */ diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt.h b/drivers/net/ethernet/broadcom/bnxt/bnxt.h index 498b373c992d..00bd17e55e99 100644 --- a/drivers/net/ethernet/broadcom/bnxt/bnxt.h +++ b/drivers/net/ethernet/broadcom/bnxt/bnxt.h @@ -798,6 +798,8 @@ struct bnxt_cp_ring_info { u8 had_work_done:1; u8 has_more_work:1; + u32 last_cp_raw_cons; + struct bnxt_coal rx_ring_coal; u64 rx_packets; u64 rx_bytes; @@ -1527,6 +1529,7 @@ struct bnxt { #define BNXT_LINK_SPEED_CHNG_SP_EVENT 14 #define BNXT_FLOW_STATS_SP_EVENT 15 #define BNXT_UPDATE_PHY_SP_EVENT 16 +#define BNXT_RING_COAL_NOW_SP_EVENT 17 struct bnxt_hw_resc hw_resc; struct bnxt_pf_info pf; -- cgit v1.2.3-59-g8ed1b From 83eb5c5cff32681f3769f502cb5589c7d7509bfe Mon Sep 17 00:00:00 2001 From: Michael Chan Date: Thu, 15 Nov 2018 03:25:41 -0500 Subject: bnxt_en: Add software "missed_irqs" counter. To keep track of the number of times the workaround code for 57500 A0 has been triggered. This is a per NQ counter. Signed-off-by: Michael Chan Signed-off-by: David S. Miller --- drivers/net/ethernet/broadcom/bnxt/bnxt.c | 1 + drivers/net/ethernet/broadcom/bnxt/bnxt.h | 1 + drivers/net/ethernet/broadcom/bnxt/bnxt_ethtool.c | 5 ++++- 3 files changed, 6 insertions(+), 1 deletion(-) diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt.c b/drivers/net/ethernet/broadcom/bnxt/bnxt.c index 5d4147a75cad..d4c300117529 100644 --- a/drivers/net/ethernet/broadcom/bnxt/bnxt.c +++ b/drivers/net/ethernet/broadcom/bnxt/bnxt.c @@ -8909,6 +8909,7 @@ static void bnxt_chk_missed_irq(struct bnxt *bp) bnxt_dbg_hwrm_ring_info_get(bp, DBG_RING_INFO_GET_REQ_RING_TYPE_L2_CMPL, fw_ring_id, &val[0], &val[1]); + cpr->missed_irqs++; } } } diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt.h b/drivers/net/ethernet/broadcom/bnxt/bnxt.h index 00bd17e55e99..9e99d4ab3e06 100644 --- a/drivers/net/ethernet/broadcom/bnxt/bnxt.h +++ b/drivers/net/ethernet/broadcom/bnxt/bnxt.h @@ -818,6 +818,7 @@ struct bnxt_cp_ring_info { dma_addr_t hw_stats_map; u32 hw_stats_ctx_id; u64 rx_l4_csum_errors; + u64 missed_irqs; struct bnxt_ring_struct cp_ring_struct; diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt_ethtool.c b/drivers/net/ethernet/broadcom/bnxt/bnxt_ethtool.c index 48078564f025..4b734cd81f8b 100644 --- a/drivers/net/ethernet/broadcom/bnxt/bnxt_ethtool.c +++ b/drivers/net/ethernet/broadcom/bnxt/bnxt_ethtool.c @@ -137,7 +137,7 @@ reset_coalesce: return rc; } -#define BNXT_NUM_STATS 21 +#define BNXT_NUM_STATS 22 #define BNXT_RX_STATS_ENTRY(counter) \ { BNXT_RX_STATS_OFFSET(counter), __stringify(counter) } @@ -384,6 +384,7 @@ static void bnxt_get_ethtool_stats(struct net_device *dev, for (k = 0; k < stat_fields; j++, k++) buf[j] = le64_to_cpu(hw_stats[k]); buf[j++] = cpr->rx_l4_csum_errors; + buf[j++] = cpr->missed_irqs; bnxt_sw_func_stats[RX_TOTAL_DISCARDS].counter += le64_to_cpu(cpr->hw_stats->rx_discard_pkts); @@ -468,6 +469,8 @@ static void bnxt_get_strings(struct net_device *dev, u32 stringset, u8 *buf) buf += ETH_GSTRING_LEN; sprintf(buf, "[%d]: rx_l4_csum_errors", i); buf += ETH_GSTRING_LEN; + sprintf(buf, "[%d]: missed_irqs", i); + buf += ETH_GSTRING_LEN; } for (i = 0; i < BNXT_NUM_SW_FUNC_STATS; i++) { strcpy(buf, bnxt_sw_func_stats[i].string); -- cgit v1.2.3-59-g8ed1b From 8dc5ae2d48976764cf3498e97963fa06befefb0e Mon Sep 17 00:00:00 2001 From: Vasundhara Volam Date: Thu, 15 Nov 2018 03:25:42 -0500 Subject: bnxt_en: Fix filling time in bnxt_fill_coredump_record() Fix the year and month offset while storing it in bnxt_fill_coredump_record(). Fixes: 6c5657d085ae ("bnxt_en: Add support for ethtool get dump.") Signed-off-by: Vasundhara Volam Signed-off-by: Michael Chan Signed-off-by: David S. Miller --- drivers/net/ethernet/broadcom/bnxt/bnxt_ethtool.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt_ethtool.c b/drivers/net/ethernet/broadcom/bnxt/bnxt_ethtool.c index 4b734cd81f8b..6cc69a58478a 100644 --- a/drivers/net/ethernet/broadcom/bnxt/bnxt_ethtool.c +++ b/drivers/net/ethernet/broadcom/bnxt/bnxt_ethtool.c @@ -2945,8 +2945,8 @@ bnxt_fill_coredump_record(struct bnxt *bp, struct bnxt_coredump_record *record, record->asic_state = 0; strlcpy(record->system_name, utsname()->nodename, sizeof(record->system_name)); - record->year = cpu_to_le16(tm.tm_year); - record->month = cpu_to_le16(tm.tm_mon); + record->year = cpu_to_le16(tm.tm_year + 1900); + record->month = cpu_to_le16(tm.tm_mon + 1); record->day = cpu_to_le16(tm.tm_mday); record->hour = cpu_to_le16(tm.tm_hour); record->minute = cpu_to_le16(tm.tm_min); -- cgit v1.2.3-59-g8ed1b From b8c3c10cf68d7556466bb7d99f249ed586ddfbe3 Mon Sep 17 00:00:00 2001 From: Thor Thayer Date: Mon, 12 Nov 2018 11:50:56 -0600 Subject: MAINTAINERS: Replace Vince Bridgers as Altera TSE maintainer Vince has moved to a different role. Replace him as Altera TSE maintainer. Signed-off-by: Thor Thayer Acked-by: Vince Bridgers Acked-by: Alan Tull Signed-off-by: David S. Miller --- MAINTAINERS | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/MAINTAINERS b/MAINTAINERS index 0abecc528dac..5a4bd37d9d02 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -717,7 +717,7 @@ F: include/linux/mfd/altera-a10sr.h F: include/dt-bindings/reset/altr,rst-mgr-a10sr.h ALTERA TRIPLE SPEED ETHERNET DRIVER -M: Vince Bridgers +M: Thor Thayer L: netdev@vger.kernel.org L: nios2-dev@lists.rocketboards.org (moderated for non-subscribers) S: Maintained -- cgit v1.2.3-59-g8ed1b From ebcd210e93b2a984b7a7b82d45f7f0d21b7ec2d2 Mon Sep 17 00:00:00 2001 From: Ganesh Goudar Date: Thu, 15 Nov 2018 15:36:21 +0530 Subject: cxgb4: fix thermal zone build error with CONFIG_THERMAL=m and cxgb4 as built-in build fails, and 'commit e70a57fa59bb ("cxgb4: fix thermal configuration dependencies")' tries to fix it but when cxgb4i is made built-in build fails again, use IS_REACHABLE instead of IS_ENABLED to fix the issue. Fixes: e70a57fa59bb (cxgb4: fix thermal configuration dependencies) Reported-by: Randy Dunlap Signed-off-by: Ganesh Goudar Signed-off-by: David S. Miller --- drivers/net/ethernet/chelsio/Kconfig | 1 - drivers/net/ethernet/chelsio/cxgb4/Makefile | 4 +--- drivers/net/ethernet/chelsio/cxgb4/cxgb4_main.c | 4 ++-- 3 files changed, 3 insertions(+), 6 deletions(-) diff --git a/drivers/net/ethernet/chelsio/Kconfig b/drivers/net/ethernet/chelsio/Kconfig index 75c1c5ed2387..e2cdfa75673f 100644 --- a/drivers/net/ethernet/chelsio/Kconfig +++ b/drivers/net/ethernet/chelsio/Kconfig @@ -67,7 +67,6 @@ config CHELSIO_T3 config CHELSIO_T4 tristate "Chelsio Communications T4/T5/T6 Ethernet support" depends on PCI && (IPV6 || IPV6=n) - depends on THERMAL || !THERMAL select FW_LOADER select MDIO select ZLIB_DEFLATE diff --git a/drivers/net/ethernet/chelsio/cxgb4/Makefile b/drivers/net/ethernet/chelsio/cxgb4/Makefile index 78e5d17a1d5f..91d8a885deba 100644 --- a/drivers/net/ethernet/chelsio/cxgb4/Makefile +++ b/drivers/net/ethernet/chelsio/cxgb4/Makefile @@ -12,6 +12,4 @@ cxgb4-objs := cxgb4_main.o l2t.o smt.o t4_hw.o sge.o clip_tbl.o cxgb4_ethtool.o cxgb4-$(CONFIG_CHELSIO_T4_DCB) += cxgb4_dcb.o cxgb4-$(CONFIG_CHELSIO_T4_FCOE) += cxgb4_fcoe.o cxgb4-$(CONFIG_DEBUG_FS) += cxgb4_debugfs.o -ifdef CONFIG_THERMAL -cxgb4-objs += cxgb4_thermal.o -endif +cxgb4-$(CONFIG_THERMAL) += cxgb4_thermal.o diff --git a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_main.c b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_main.c index 05a46926016a..d49db46254cd 100644 --- a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_main.c +++ b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_main.c @@ -5863,7 +5863,7 @@ fw_attach_fail: if (!is_t4(adapter->params.chip)) cxgb4_ptp_init(adapter); - if (IS_ENABLED(CONFIG_THERMAL) && + if (IS_REACHABLE(CONFIG_THERMAL) && !is_t4(adapter->params.chip) && (adapter->flags & FW_OK)) cxgb4_thermal_init(adapter); @@ -5932,7 +5932,7 @@ static void remove_one(struct pci_dev *pdev) if (!is_t4(adapter->params.chip)) cxgb4_ptp_stop(adapter); - if (IS_ENABLED(CONFIG_THERMAL)) + if (IS_REACHABLE(CONFIG_THERMAL)) cxgb4_thermal_remove(adapter); /* If we allocated filters, free up state associated with any -- cgit v1.2.3-59-g8ed1b From 7150ceaacb27f7b3bf494e72cd4be4e11612dfff Mon Sep 17 00:00:00 2001 From: David Howells Date: Mon, 12 Nov 2018 22:33:22 +0000 Subject: rxrpc: Fix life check The life-checking function, which is used by kAFS to make sure that a call is still live in the event of a pending signal, only samples the received packet serial number counter; it doesn't actually provoke a change in the counter, rather relying on the server to happen to give us a packet in the time window. Fix this by adding a function to force a ping to be transmitted. kAFS then keeps track of whether there's been a stall, and if so, uses the new function to ping the server, resetting the timeout to allow the reply to come back. If there's a stall, a ping and the call is *still* stalled in the same place after another period, then the call will be aborted. Fixes: bc5e3a546d55 ("rxrpc: Use MSG_WAITALL to tell sendmsg() to temporarily ignore signals") Fixes: f4d15fb6f99a ("rxrpc: Provide functions for allowing cleaner handling of signals") Signed-off-by: David Howells Signed-off-by: David S. Miller --- Documentation/networking/rxrpc.txt | 17 +++++++++++------ fs/afs/rxrpc.c | 11 ++++++++++- include/net/af_rxrpc.h | 3 ++- include/trace/events/rxrpc.h | 2 ++ net/rxrpc/af_rxrpc.c | 27 +++++++++++++++++++++++---- 5 files changed, 48 insertions(+), 12 deletions(-) diff --git a/Documentation/networking/rxrpc.txt b/Documentation/networking/rxrpc.txt index 605e00cdd6be..89f1302d593a 100644 --- a/Documentation/networking/rxrpc.txt +++ b/Documentation/networking/rxrpc.txt @@ -1056,18 +1056,23 @@ The kernel interface functions are as follows: u32 rxrpc_kernel_check_life(struct socket *sock, struct rxrpc_call *call); + void rxrpc_kernel_probe_life(struct socket *sock, + struct rxrpc_call *call); - This returns a number that is updated when ACKs are received from the peer - (notably including PING RESPONSE ACKs which we can elicit by sending PING - ACKs to see if the call still exists on the server). The caller should - compare the numbers of two calls to see if the call is still alive after - waiting for a suitable interval. + The first function returns a number that is updated when ACKs are received + from the peer (notably including PING RESPONSE ACKs which we can elicit by + sending PING ACKs to see if the call still exists on the server). The + caller should compare the numbers of two calls to see if the call is still + alive after waiting for a suitable interval. This allows the caller to work out if the server is still contactable and if the call is still alive on the server whilst waiting for the server to process a client operation. - This function may transmit a PING ACK. + The second function causes a ping ACK to be transmitted to try to provoke + the peer into responding, which would then cause the value returned by the + first function to change. Note that this must be called in TASK_RUNNING + state. (*) Get reply timestamp. diff --git a/fs/afs/rxrpc.c b/fs/afs/rxrpc.c index 59970886690f..a7b44863d502 100644 --- a/fs/afs/rxrpc.c +++ b/fs/afs/rxrpc.c @@ -576,6 +576,7 @@ static long afs_wait_for_call_to_complete(struct afs_call *call, { signed long rtt2, timeout; long ret; + bool stalled = false; u64 rtt; u32 life, last_life; @@ -609,12 +610,20 @@ static long afs_wait_for_call_to_complete(struct afs_call *call, life = rxrpc_kernel_check_life(call->net->socket, call->rxcall); if (timeout == 0 && - life == last_life && signal_pending(current)) + life == last_life && signal_pending(current)) { + if (stalled) break; + __set_current_state(TASK_RUNNING); + rxrpc_kernel_probe_life(call->net->socket, call->rxcall); + timeout = rtt2; + stalled = true; + continue; + } if (life != last_life) { timeout = rtt2; last_life = life; + stalled = false; } timeout = schedule_timeout(timeout); diff --git a/include/net/af_rxrpc.h b/include/net/af_rxrpc.h index de587948042a..1adefe42c0a6 100644 --- a/include/net/af_rxrpc.h +++ b/include/net/af_rxrpc.h @@ -77,7 +77,8 @@ int rxrpc_kernel_retry_call(struct socket *, struct rxrpc_call *, struct sockaddr_rxrpc *, struct key *); int rxrpc_kernel_check_call(struct socket *, struct rxrpc_call *, enum rxrpc_call_completion *, u32 *); -u32 rxrpc_kernel_check_life(struct socket *, struct rxrpc_call *); +u32 rxrpc_kernel_check_life(const struct socket *, const struct rxrpc_call *); +void rxrpc_kernel_probe_life(struct socket *, struct rxrpc_call *); u32 rxrpc_kernel_get_epoch(struct socket *, struct rxrpc_call *); bool rxrpc_kernel_get_reply_time(struct socket *, struct rxrpc_call *, ktime_t *); diff --git a/include/trace/events/rxrpc.h b/include/trace/events/rxrpc.h index 573d5b901fb1..5b50fe4906d2 100644 --- a/include/trace/events/rxrpc.h +++ b/include/trace/events/rxrpc.h @@ -181,6 +181,7 @@ enum rxrpc_timer_trace { enum rxrpc_propose_ack_trace { rxrpc_propose_ack_client_tx_end, rxrpc_propose_ack_input_data, + rxrpc_propose_ack_ping_for_check_life, rxrpc_propose_ack_ping_for_keepalive, rxrpc_propose_ack_ping_for_lost_ack, rxrpc_propose_ack_ping_for_lost_reply, @@ -380,6 +381,7 @@ enum rxrpc_tx_point { #define rxrpc_propose_ack_traces \ EM(rxrpc_propose_ack_client_tx_end, "ClTxEnd") \ EM(rxrpc_propose_ack_input_data, "DataIn ") \ + EM(rxrpc_propose_ack_ping_for_check_life, "ChkLife") \ EM(rxrpc_propose_ack_ping_for_keepalive, "KeepAlv") \ EM(rxrpc_propose_ack_ping_for_lost_ack, "LostAck") \ EM(rxrpc_propose_ack_ping_for_lost_reply, "LostRpl") \ diff --git a/net/rxrpc/af_rxrpc.c b/net/rxrpc/af_rxrpc.c index 64362d078da8..a2522f9d71e2 100644 --- a/net/rxrpc/af_rxrpc.c +++ b/net/rxrpc/af_rxrpc.c @@ -375,16 +375,35 @@ EXPORT_SYMBOL(rxrpc_kernel_end_call); * getting ACKs from the server. Returns a number representing the life state * which can be compared to that returned by a previous call. * - * If this is a client call, ping ACKs will be sent to the server to find out - * whether it's still responsive and whether the call is still alive on the - * server. + * If the life state stalls, rxrpc_kernel_probe_life() should be called and + * then 2RTT waited. */ -u32 rxrpc_kernel_check_life(struct socket *sock, struct rxrpc_call *call) +u32 rxrpc_kernel_check_life(const struct socket *sock, + const struct rxrpc_call *call) { return call->acks_latest; } EXPORT_SYMBOL(rxrpc_kernel_check_life); +/** + * rxrpc_kernel_probe_life - Poke the peer to see if it's still alive + * @sock: The socket the call is on + * @call: The call to check + * + * In conjunction with rxrpc_kernel_check_life(), allow a kernel service to + * find out whether a call is still alive by pinging it. This should cause the + * life state to be bumped in about 2*RTT. + * + * The must be called in TASK_RUNNING state on pain of might_sleep() objecting. + */ +void rxrpc_kernel_probe_life(struct socket *sock, struct rxrpc_call *call) +{ + rxrpc_propose_ACK(call, RXRPC_ACK_PING, 0, 0, true, false, + rxrpc_propose_ack_ping_for_check_life); + rxrpc_send_ack_packet(call, true, NULL); +} +EXPORT_SYMBOL(rxrpc_kernel_probe_life); + /** * rxrpc_kernel_get_epoch - Retrieve the epoch value from a call. * @sock: The socket the call is on -- cgit v1.2.3-59-g8ed1b From 08e14fe429a07475ee9f29a283945d602e4a6d92 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Mon, 12 Nov 2018 16:17:16 -0800 Subject: net_sched: sch_fq: ensure maxrate fq parameter applies to EDT flows When EDT conversion happened, fq lost the ability to enfore a maxrate for all flows. It kept it for non EDT flows. This commit restores the functionality. Tested: tc qd replace dev eth0 root fq maxrate 500Mbit netperf -P0 -H host -- -O THROUGHPUT 489.75 Fixes: ab408b6dc744 ("tcp: switch tcp and sch_fq to new earliest departure time model") Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller --- net/sched/sch_fq.c | 31 +++++++++++++++++++------------ 1 file changed, 19 insertions(+), 12 deletions(-) diff --git a/net/sched/sch_fq.c b/net/sched/sch_fq.c index 4b1af706896c..25a7cf6d380f 100644 --- a/net/sched/sch_fq.c +++ b/net/sched/sch_fq.c @@ -469,22 +469,29 @@ begin: goto begin; } prefetch(&skb->end); - f->credit -= qdisc_pkt_len(skb); + plen = qdisc_pkt_len(skb); + f->credit -= plen; - if (ktime_to_ns(skb->tstamp) || !q->rate_enable) + if (!q->rate_enable) goto out; rate = q->flow_max_rate; - if (skb->sk) - rate = min(skb->sk->sk_pacing_rate, rate); - - if (rate <= q->low_rate_threshold) { - f->credit = 0; - plen = qdisc_pkt_len(skb); - } else { - plen = max(qdisc_pkt_len(skb), q->quantum); - if (f->credit > 0) - goto out; + + /* If EDT time was provided for this skb, we need to + * update f->time_next_packet only if this qdisc enforces + * a flow max rate. + */ + if (!skb->tstamp) { + if (skb->sk) + rate = min(skb->sk->sk_pacing_rate, rate); + + if (rate <= q->low_rate_threshold) { + f->credit = 0; + } else { + plen = max(plen, q->quantum); + if (f->credit > 0) + goto out; + } } if (rate != ~0UL) { u64 len = (u64)plen * NSEC_PER_SEC; -- cgit v1.2.3-59-g8ed1b From bd85fbc2038a1bbe84990b23ff69b6fc81a32b2c Mon Sep 17 00:00:00 2001 From: Jack Morgenstein Date: Thu, 15 Nov 2018 18:05:13 +0200 Subject: net/mlx4_core: Zero out lkey field in SW2HW_MPT fw command When re-registering a user mr, the mpt information for the existing mr when running SRIOV is obtained via the QUERY_MPT fw command. The returned information includes the mpt's lkey. This retrieved mpt information is used to move the mpt back to hardware ownership in the rereg flow (via the SW2HW_MPT fw command when running SRIOV). The fw API spec states that for SW2HW_MPT, the lkey field must be zero. Any ConnectX-3 PF driver which checks for strict spec adherence will return failure for SW2HW_MPT if the lkey field is not zero (although the fw in practice ignores this field for SW2HW_MPT). Thus, in order to conform to the fw API spec, set the lkey field to zero before invoking SW2HW_MPT when running SRIOV. Fixes: e630664c8383 ("mlx4_core: Add helper functions to support MR re-registration") Signed-off-by: Jack Morgenstein Signed-off-by: Tariq Toukan Signed-off-by: David S. Miller --- drivers/net/ethernet/mellanox/mlx4/mr.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/net/ethernet/mellanox/mlx4/mr.c b/drivers/net/ethernet/mellanox/mlx4/mr.c index 2e84f10f59ba..1a11bc0e1612 100644 --- a/drivers/net/ethernet/mellanox/mlx4/mr.c +++ b/drivers/net/ethernet/mellanox/mlx4/mr.c @@ -363,6 +363,7 @@ int mlx4_mr_hw_write_mpt(struct mlx4_dev *dev, struct mlx4_mr *mmr, container_of((void *)mpt_entry, struct mlx4_cmd_mailbox, buf); + (*mpt_entry)->lkey = 0; err = mlx4_SW2HW_MPT(dev, mailbox, key); } -- cgit v1.2.3-59-g8ed1b From 3ea7e7ea53c9f6ee41cb69a29c375fe9dd9a56a7 Mon Sep 17 00:00:00 2001 From: Tariq Toukan Date: Thu, 15 Nov 2018 18:05:14 +0200 Subject: net/mlx4_core: Fix uninitialized variable compilation warning Initialize the uid variable to zero to avoid the compilation warning. Fixes: 7a89399ffad7 ("net/mlx4: Add mlx4_bitmap zone allocator") Signed-off-by: Tariq Toukan Signed-off-by: David S. Miller --- drivers/net/ethernet/mellanox/mlx4/alloc.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ethernet/mellanox/mlx4/alloc.c b/drivers/net/ethernet/mellanox/mlx4/alloc.c index deef5a998985..9af34e03892c 100644 --- a/drivers/net/ethernet/mellanox/mlx4/alloc.c +++ b/drivers/net/ethernet/mellanox/mlx4/alloc.c @@ -337,7 +337,7 @@ void mlx4_zone_allocator_destroy(struct mlx4_zone_allocator *zone_alloc) static u32 __mlx4_alloc_from_zone(struct mlx4_zone_entry *zone, int count, int align, u32 skip_mask, u32 *puid) { - u32 uid; + u32 uid = 0; u32 res; struct mlx4_zone_allocator *zone_alloc = zone->allocator; struct mlx4_zone_entry *curr_node; -- cgit v1.2.3-59-g8ed1b From a463146e67c848cbab5ce706d6528281b7cded08 Mon Sep 17 00:00:00 2001 From: Aya Levin Date: Thu, 15 Nov 2018 18:05:15 +0200 Subject: net/mlx4: Fix UBSAN warning of signed integer overflow UBSAN: Undefined behavior in drivers/net/ethernet/mellanox/mlx4/resource_tracker.c:626:29 signed integer overflow: 1802201963 + 1802201963 cannot be represented in type 'int' The union of res_reserved and res_port_rsvd[MLX4_MAX_PORTS] monitors granting of reserved resources. The grant operation is calculated and protected, thus both members of the union cannot be negative. Changed type of res_reserved and of res_port_rsvd[MLX4_MAX_PORTS] from signed int to unsigned int, allowing large value. Fixes: 5a0d0a6161ae ("mlx4: Structures and init/teardown for VF resource quotas") Signed-off-by: Aya Levin Signed-off-by: Tariq Toukan Signed-off-by: David S. Miller --- drivers/net/ethernet/mellanox/mlx4/mlx4.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx4/mlx4.h b/drivers/net/ethernet/mellanox/mlx4/mlx4.h index ebcd2778eeb3..23f1b5b512c2 100644 --- a/drivers/net/ethernet/mellanox/mlx4/mlx4.h +++ b/drivers/net/ethernet/mellanox/mlx4/mlx4.h @@ -540,8 +540,8 @@ struct slave_list { struct resource_allocator { spinlock_t alloc_lock; /* protect quotas */ union { - int res_reserved; - int res_port_rsvd[MLX4_MAX_PORTS]; + unsigned int res_reserved; + unsigned int res_port_rsvd[MLX4_MAX_PORTS]; }; union { int res_free; -- cgit v1.2.3-59-g8ed1b From a97b9565338350d70d8d971c4ee6f0d4fa967418 Mon Sep 17 00:00:00 2001 From: Andrew Morton Date: Thu, 15 Nov 2018 16:15:20 -0800 Subject: drivers/net/ethernet/qlogic/qed/qed_rdma.h: fix typo Add missing semicolon. Fixes: 291d57f67d244973 ("qed: Fix rdma_info structure allocation") Cc: Michal Kalderon Cc: Denis Bolotin Cc: David S. Miller Signed-off-by: Andrew Morton Signed-off-by: David S. Miller --- drivers/net/ethernet/qlogic/qed/qed_rdma.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ethernet/qlogic/qed/qed_rdma.h b/drivers/net/ethernet/qlogic/qed/qed_rdma.h index 50d609c0e108..3689fe3e5935 100644 --- a/drivers/net/ethernet/qlogic/qed/qed_rdma.h +++ b/drivers/net/ethernet/qlogic/qed/qed_rdma.h @@ -183,7 +183,7 @@ void qed_rdma_info_free(struct qed_hwfn *p_hwfn); static inline void qed_rdma_dpm_conf(struct qed_hwfn *p_hwfn, struct qed_ptt *p_ptt) {} static inline void qed_rdma_dpm_bar(struct qed_hwfn *p_hwfn, struct qed_ptt *p_ptt) {} -static inline int qed_rdma_info_alloc(struct qed_hwfn *p_hwfn) {return -EINVAL} +static inline int qed_rdma_info_alloc(struct qed_hwfn *p_hwfn) {return -EINVAL;} static inline void qed_rdma_info_free(struct qed_hwfn *p_hwfn) {} #endif -- cgit v1.2.3-59-g8ed1b From c26b5aa8ef0d46035060fded475e6ab957b9f69f Mon Sep 17 00:00:00 2001 From: Andreas Gruenbacher Date: Sun, 11 Nov 2018 11:15:21 +0000 Subject: gfs2: Fix iomap buffer head reference counting bug GFS2 passes the inode buffer head (dibh) from gfs2_iomap_begin to gfs2_iomap_end in iomap->private. It sets that private pointer in gfs2_iomap_get. Users of gfs2_iomap_get other than gfs2_iomap_begin would have to release iomap->private, but this isn't done correctly, leading to a leak of buffer head references. To fix this, move the code for setting iomap->private from gfs2_iomap_get to gfs2_iomap_begin. Fixes: 64bc06bb32 ("gfs2: iomap buffered write support") Cc: stable@vger.kernel.org # v4.19+ Signed-off-by: Andreas Gruenbacher Signed-off-by: Linus Torvalds --- fs/gfs2/bmap.c | 40 +++++++++++++++++----------------------- 1 file changed, 17 insertions(+), 23 deletions(-) diff --git a/fs/gfs2/bmap.c b/fs/gfs2/bmap.c index 38d88fcb6988..0d643306c255 100644 --- a/fs/gfs2/bmap.c +++ b/fs/gfs2/bmap.c @@ -826,7 +826,7 @@ static int gfs2_iomap_get(struct inode *inode, loff_t pos, loff_t length, ret = gfs2_meta_inode_buffer(ip, &dibh); if (ret) goto unlock; - iomap->private = dibh; + mp->mp_bh[0] = dibh; if (gfs2_is_stuffed(ip)) { if (flags & IOMAP_WRITE) { @@ -863,9 +863,6 @@ unstuff: len = lblock_stop - lblock + 1; iomap->length = len << inode->i_blkbits; - get_bh(dibh); - mp->mp_bh[0] = dibh; - height = ip->i_height; while ((lblock + 1) * sdp->sd_sb.sb_bsize > sdp->sd_heightsize[height]) height++; @@ -898,8 +895,6 @@ out: iomap->bdev = inode->i_sb->s_bdev; unlock: up_read(&ip->i_rw_mutex); - if (ret && dibh) - brelse(dibh); return ret; do_alloc: @@ -980,9 +975,9 @@ static void gfs2_iomap_journaled_page_done(struct inode *inode, loff_t pos, static int gfs2_iomap_begin_write(struct inode *inode, loff_t pos, loff_t length, unsigned flags, - struct iomap *iomap) + struct iomap *iomap, + struct metapath *mp) { - struct metapath mp = { .mp_aheight = 1, }; struct gfs2_inode *ip = GFS2_I(inode); struct gfs2_sbd *sdp = GFS2_SB(inode); unsigned int data_blocks = 0, ind_blocks = 0, rblocks; @@ -996,9 +991,9 @@ static int gfs2_iomap_begin_write(struct inode *inode, loff_t pos, unstuff = gfs2_is_stuffed(ip) && pos + length > gfs2_max_stuffed_size(ip); - ret = gfs2_iomap_get(inode, pos, length, flags, iomap, &mp); + ret = gfs2_iomap_get(inode, pos, length, flags, iomap, mp); if (ret) - goto out_release; + goto out_unlock; alloc_required = unstuff || iomap->type == IOMAP_HOLE; @@ -1013,7 +1008,7 @@ static int gfs2_iomap_begin_write(struct inode *inode, loff_t pos, ret = gfs2_quota_lock_check(ip, &ap); if (ret) - goto out_release; + goto out_unlock; ret = gfs2_inplace_reserve(ip, &ap); if (ret) @@ -1038,17 +1033,15 @@ static int gfs2_iomap_begin_write(struct inode *inode, loff_t pos, ret = gfs2_unstuff_dinode(ip, NULL); if (ret) goto out_trans_end; - release_metapath(&mp); - brelse(iomap->private); - iomap->private = NULL; + release_metapath(mp); ret = gfs2_iomap_get(inode, iomap->offset, iomap->length, - flags, iomap, &mp); + flags, iomap, mp); if (ret) goto out_trans_end; } if (iomap->type == IOMAP_HOLE) { - ret = gfs2_iomap_alloc(inode, iomap, flags, &mp); + ret = gfs2_iomap_alloc(inode, iomap, flags, mp); if (ret) { gfs2_trans_end(sdp); gfs2_inplace_release(ip); @@ -1056,7 +1049,6 @@ static int gfs2_iomap_begin_write(struct inode *inode, loff_t pos, goto out_qunlock; } } - release_metapath(&mp); if (gfs2_is_jdata(ip)) iomap->page_done = gfs2_iomap_journaled_page_done; return 0; @@ -1069,10 +1061,7 @@ out_trans_fail: out_qunlock: if (alloc_required) gfs2_quota_unlock(ip); -out_release: - if (iomap->private) - brelse(iomap->private); - release_metapath(&mp); +out_unlock: gfs2_write_unlock(inode); return ret; } @@ -1088,10 +1077,10 @@ static int gfs2_iomap_begin(struct inode *inode, loff_t pos, loff_t length, trace_gfs2_iomap_start(ip, pos, length, flags); if ((flags & IOMAP_WRITE) && !(flags & IOMAP_DIRECT)) { - ret = gfs2_iomap_begin_write(inode, pos, length, flags, iomap); + ret = gfs2_iomap_begin_write(inode, pos, length, flags, iomap, &mp); } else { ret = gfs2_iomap_get(inode, pos, length, flags, iomap, &mp); - release_metapath(&mp); + /* * Silently fall back to buffered I/O for stuffed files or if * we've hot a hole (see gfs2_file_direct_write). @@ -1100,6 +1089,11 @@ static int gfs2_iomap_begin(struct inode *inode, loff_t pos, loff_t length, iomap->type != IOMAP_MAPPED) ret = -ENOTBLK; } + if (!ret) { + get_bh(mp.mp_bh[0]); + iomap->private = mp.mp_bh[0]; + } + release_metapath(&mp); trace_gfs2_iomap_end(ip, iomap, ret); return ret; } -- cgit v1.2.3-59-g8ed1b From 83e65df6dfece9eb588735459428f221eb930c0c Mon Sep 17 00:00:00 2001 From: Maxime Chevallier Date: Fri, 9 Nov 2018 09:17:33 +0100 Subject: net: mvneta: Don't advertise 2.5G modes Using 2.5G speed relies on the SerDes lanes being configured accordingly. The lanes have to be reconfigured to switch between 1G and 2.5G, and for now only the bootloader does this configuration. In the case we add a Comphy driver to handle switching the lanes dynamically, it's better for now to stick with supporting only 1G and add advertisement for 2.5G once we really are capable of handling both speeds without problem. Since the interface mode is initialy taken from the DT, we want to make sure that adding comphy support won't break boards that don't update their dtb. Fixes: da58a931f248 ("net: mvneta: Add support for 2500Mbps SGMII") Reported-by: Andrew Lunn Reported-by: Russell King Signed-off-by: Maxime Chevallier Signed-off-by: David S. Miller --- drivers/net/ethernet/marvell/mvneta.c | 12 +++--------- 1 file changed, 3 insertions(+), 9 deletions(-) diff --git a/drivers/net/ethernet/marvell/mvneta.c b/drivers/net/ethernet/marvell/mvneta.c index 3ba672e9e353..e5397c8197b9 100644 --- a/drivers/net/ethernet/marvell/mvneta.c +++ b/drivers/net/ethernet/marvell/mvneta.c @@ -3343,7 +3343,6 @@ static void mvneta_validate(struct net_device *ndev, unsigned long *supported, if (state->interface != PHY_INTERFACE_MODE_NA && state->interface != PHY_INTERFACE_MODE_QSGMII && state->interface != PHY_INTERFACE_MODE_SGMII && - state->interface != PHY_INTERFACE_MODE_2500BASEX && !phy_interface_mode_is_8023z(state->interface) && !phy_interface_mode_is_rgmii(state->interface)) { bitmap_zero(supported, __ETHTOOL_LINK_MODE_MASK_NBITS); @@ -3357,14 +3356,9 @@ static void mvneta_validate(struct net_device *ndev, unsigned long *supported, /* Asymmetric pause is unsupported */ phylink_set(mask, Pause); - /* We cannot use 1Gbps when using the 2.5G interface. */ - if (state->interface == PHY_INTERFACE_MODE_2500BASEX) { - phylink_set(mask, 2500baseT_Full); - phylink_set(mask, 2500baseX_Full); - } else { - phylink_set(mask, 1000baseT_Full); - phylink_set(mask, 1000baseX_Full); - } + /* Half-duplex at speeds higher than 100Mbit is unsupported */ + phylink_set(mask, 1000baseT_Full); + phylink_set(mask, 1000baseX_Full); if (!phy_interface_mode_is_8023z(state->interface)) { /* 10M and 100M are only supported in non-802.3z mode */ -- cgit v1.2.3-59-g8ed1b From 761f60261b4401aa368d71d431b4c218af0efcee Mon Sep 17 00:00:00 2001 From: Xin Long Date: Wed, 14 Nov 2018 00:48:28 +0800 Subject: ipv6: fix a dst leak when removing its exception These is no need to hold dst before calling rt6_remove_exception_rt(). The call to dst_hold_safe() in ip6_link_failure() was for ip6_del_rt(), which has been removed in Commit 93531c674315 ("net/ipv6: separate handling of FIB entries from dst based routes"). Otherwise, it will cause a dst leak. This patch is to simply remove the dst_hold_safe() call before calling rt6_remove_exception_rt() and also do the same in ip6_del_cached_rt(). It's safe, because the removal of the exception that holds its dst's refcnt is protected by rt6_exception_lock. Fixes: 93531c674315 ("net/ipv6: separate handling of FIB entries from dst based routes") Fixes: 23fb93a4d3f1 ("net/ipv6: Cleanup exception and cache route handling") Reported-by: Li Shuang Signed-off-by: Xin Long Reviewed-by: David Ahern Signed-off-by: David S. Miller --- net/ipv6/route.c | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/net/ipv6/route.c b/net/ipv6/route.c index 2a7423c39456..14b422f35504 100644 --- a/net/ipv6/route.c +++ b/net/ipv6/route.c @@ -2232,8 +2232,7 @@ static void ip6_link_failure(struct sk_buff *skb) if (rt) { rcu_read_lock(); if (rt->rt6i_flags & RTF_CACHE) { - if (dst_hold_safe(&rt->dst)) - rt6_remove_exception_rt(rt); + rt6_remove_exception_rt(rt); } else { struct fib6_info *from; struct fib6_node *fn; @@ -3214,8 +3213,8 @@ static int ip6_del_cached_rt(struct rt6_info *rt, struct fib6_config *cfg) if (cfg->fc_flags & RTF_GATEWAY && !ipv6_addr_equal(&cfg->fc_gateway, &rt->rt6i_gateway)) goto out; - if (dst_hold_safe(&rt->dst)) - rc = rt6_remove_exception_rt(rt); + + rc = rt6_remove_exception_rt(rt); out: return rc; } -- cgit v1.2.3-59-g8ed1b From 06bc4d0079ab6a2de86f56703ce1bd13e90b9d9d Mon Sep 17 00:00:00 2001 From: Christophe JAILLET Date: Tue, 13 Nov 2018 18:42:24 +0100 Subject: net: lantiq: Fix returned value in case of error in 'xrx200_probe()' Return 'err' in the error handling path instead of 0. Return explicitly 0 in the normal path, instead of 'err', which is known to be 0 at this point. Fixes: fe1a56420cf2 ("net: lantiq: Add Lantiq / Intel VRX200 Ethernet driver") Signed-off-by: Christophe JAILLET Signed-off-by: David S. Miller --- drivers/net/ethernet/lantiq_xrx200.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/drivers/net/ethernet/lantiq_xrx200.c b/drivers/net/ethernet/lantiq_xrx200.c index 8c5ba4b81fb7..2d4d10a017e5 100644 --- a/drivers/net/ethernet/lantiq_xrx200.c +++ b/drivers/net/ethernet/lantiq_xrx200.c @@ -512,7 +512,8 @@ static int xrx200_probe(struct platform_device *pdev) err = register_netdev(net_dev); if (err) goto err_unprepare_clk; - return err; + + return 0; err_unprepare_clk: clk_disable_unprepare(priv->clk); @@ -520,7 +521,7 @@ err_unprepare_clk: err_uninit_dma: xrx200_hw_cleanup(priv); - return 0; + return err; } static int xrx200_remove(struct platform_device *pdev) -- cgit v1.2.3-59-g8ed1b From 19ab69107d3ecfb7cd3e38ad262a881be40c01a3 Mon Sep 17 00:00:00 2001 From: Davide Caratti Date: Wed, 14 Nov 2018 12:17:25 +0100 Subject: net/sched: act_pedit: fix memory leak when IDR allocation fails tcf_idr_check_alloc() can return a negative value, on allocation failures (-ENOMEM) or IDR exhaustion (-ENOSPC): don't leak keys_ex in these cases. Fixes: 0190c1d452a9 ("net: sched: atomically check-allocate action") Signed-off-by: Davide Caratti Acked-by: Cong Wang Signed-off-by: David S. Miller --- net/sched/act_pedit.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/net/sched/act_pedit.c b/net/sched/act_pedit.c index da3dd0f68cc2..2b372a06b432 100644 --- a/net/sched/act_pedit.c +++ b/net/sched/act_pedit.c @@ -201,7 +201,8 @@ static int tcf_pedit_init(struct net *net, struct nlattr *nla, goto out_release; } } else { - return err; + ret = err; + goto out_free; } p = to_pedit(*a); -- cgit v1.2.3-59-g8ed1b From dfa0d55ff6be64e7b6881212a291cb95f8da3b08 Mon Sep 17 00:00:00 2001 From: Martin Schiller Date: Wed, 14 Nov 2018 12:54:49 +0100 Subject: net: phy: mdio-gpio: Fix working over slow can_sleep GPIOs This commit re-enables support for slow GPIO pins. It was initially introduced by commit 2d6c9091ab76 ("net: mdio-gpio: support access that may sleep") and got lost by commit 7e5fbd1e0700 ("net: mdio-gpio: Convert to use gpiod functions where possible"). Also add a warning about slow GPIO pins like it is done in i2c-gpio. Signed-off-by: Martin Schiller Signed-off-by: David S. Miller --- drivers/net/phy/mdio-gpio.c | 14 +++++++++----- 1 file changed, 9 insertions(+), 5 deletions(-) diff --git a/drivers/net/phy/mdio-gpio.c b/drivers/net/phy/mdio-gpio.c index 33265747bf39..3a5a24daf384 100644 --- a/drivers/net/phy/mdio-gpio.c +++ b/drivers/net/phy/mdio-gpio.c @@ -63,7 +63,7 @@ static void mdio_dir(struct mdiobb_ctrl *ctrl, int dir) * assume the pin serves as pull-up. If direction is * output, the default value is high. */ - gpiod_set_value(bitbang->mdo, 1); + gpiod_set_value_cansleep(bitbang->mdo, 1); return; } @@ -78,7 +78,7 @@ static int mdio_get(struct mdiobb_ctrl *ctrl) struct mdio_gpio_info *bitbang = container_of(ctrl, struct mdio_gpio_info, ctrl); - return gpiod_get_value(bitbang->mdio); + return gpiod_get_value_cansleep(bitbang->mdio); } static void mdio_set(struct mdiobb_ctrl *ctrl, int what) @@ -87,9 +87,9 @@ static void mdio_set(struct mdiobb_ctrl *ctrl, int what) container_of(ctrl, struct mdio_gpio_info, ctrl); if (bitbang->mdo) - gpiod_set_value(bitbang->mdo, what); + gpiod_set_value_cansleep(bitbang->mdo, what); else - gpiod_set_value(bitbang->mdio, what); + gpiod_set_value_cansleep(bitbang->mdio, what); } static void mdc_set(struct mdiobb_ctrl *ctrl, int what) @@ -97,7 +97,7 @@ static void mdc_set(struct mdiobb_ctrl *ctrl, int what) struct mdio_gpio_info *bitbang = container_of(ctrl, struct mdio_gpio_info, ctrl); - gpiod_set_value(bitbang->mdc, what); + gpiod_set_value_cansleep(bitbang->mdc, what); } static const struct mdiobb_ops mdio_gpio_ops = { @@ -162,6 +162,10 @@ static int mdio_gpio_probe(struct platform_device *pdev) if (ret) return ret; + if (gpiod_cansleep(bitbang->mdc) || gpiod_cansleep(bitbang->mdio) || + gpiod_cansleep(bitbang->mdo)) + dev_warn(&pdev->dev, "Slow GPIO pins might wreak havoc into MDIO bus timing"); + if (pdev->dev.of_node) { bus_id = of_alias_get_id(pdev->dev.of_node, "mdio-gpio"); if (bus_id < 0) { -- cgit v1.2.3-59-g8ed1b From 160396a722e0c4dfd462f3eec779251bf944f438 Mon Sep 17 00:00:00 2001 From: "David S. Miller" Date: Fri, 16 Nov 2018 23:04:37 -0800 Subject: Revert "net: phy: mdio-gpio: Fix working over slow can_sleep GPIOs" This reverts commit dfa0d55ff6be64e7b6881212a291cb95f8da3b08. Discussion still ongoing, I shouldn't have applied this. Signed-off-by: David S. Miller --- drivers/net/phy/mdio-gpio.c | 14 +++++--------- 1 file changed, 5 insertions(+), 9 deletions(-) diff --git a/drivers/net/phy/mdio-gpio.c b/drivers/net/phy/mdio-gpio.c index 3a5a24daf384..33265747bf39 100644 --- a/drivers/net/phy/mdio-gpio.c +++ b/drivers/net/phy/mdio-gpio.c @@ -63,7 +63,7 @@ static void mdio_dir(struct mdiobb_ctrl *ctrl, int dir) * assume the pin serves as pull-up. If direction is * output, the default value is high. */ - gpiod_set_value_cansleep(bitbang->mdo, 1); + gpiod_set_value(bitbang->mdo, 1); return; } @@ -78,7 +78,7 @@ static int mdio_get(struct mdiobb_ctrl *ctrl) struct mdio_gpio_info *bitbang = container_of(ctrl, struct mdio_gpio_info, ctrl); - return gpiod_get_value_cansleep(bitbang->mdio); + return gpiod_get_value(bitbang->mdio); } static void mdio_set(struct mdiobb_ctrl *ctrl, int what) @@ -87,9 +87,9 @@ static void mdio_set(struct mdiobb_ctrl *ctrl, int what) container_of(ctrl, struct mdio_gpio_info, ctrl); if (bitbang->mdo) - gpiod_set_value_cansleep(bitbang->mdo, what); + gpiod_set_value(bitbang->mdo, what); else - gpiod_set_value_cansleep(bitbang->mdio, what); + gpiod_set_value(bitbang->mdio, what); } static void mdc_set(struct mdiobb_ctrl *ctrl, int what) @@ -97,7 +97,7 @@ static void mdc_set(struct mdiobb_ctrl *ctrl, int what) struct mdio_gpio_info *bitbang = container_of(ctrl, struct mdio_gpio_info, ctrl); - gpiod_set_value_cansleep(bitbang->mdc, what); + gpiod_set_value(bitbang->mdc, what); } static const struct mdiobb_ops mdio_gpio_ops = { @@ -162,10 +162,6 @@ static int mdio_gpio_probe(struct platform_device *pdev) if (ret) return ret; - if (gpiod_cansleep(bitbang->mdc) || gpiod_cansleep(bitbang->mdio) || - gpiod_cansleep(bitbang->mdo)) - dev_warn(&pdev->dev, "Slow GPIO pins might wreak havoc into MDIO bus timing"); - if (pdev->dev.of_node) { bus_id = of_alias_get_id(pdev->dev.of_node, "mdio-gpio"); if (bus_id < 0) { -- cgit v1.2.3-59-g8ed1b From df5a8ec64eed7fe45b556cfff503acd6429ab817 Mon Sep 17 00:00:00 2001 From: Martin Schiller Date: Fri, 16 Nov 2018 08:38:36 +0100 Subject: net: phy: mdio-gpio: Fix working over slow can_sleep GPIOs Up until commit 7e5fbd1e0700 ("net: mdio-gpio: Convert to use gpiod functions where possible"), the _cansleep variants of the gpio_ API was used. After that commit and the change to gpiod_ API, the _cansleep() was dropped. This then results in WARN_ON() when used with GPIO devices which do sleep. Add back the _cansleep() to avoid this. Fixes: 7e5fbd1e0700 ("net: mdio-gpio: Convert to use gpiod functions where possible") Signed-off-by: Martin Schiller Reviewed-by: Andrew Lunn Reviewed-by: Florian Fainelli Signed-off-by: David S. Miller --- drivers/net/phy/mdio-gpio.c | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/drivers/net/phy/mdio-gpio.c b/drivers/net/phy/mdio-gpio.c index 33265747bf39..0fbcedcdf6e2 100644 --- a/drivers/net/phy/mdio-gpio.c +++ b/drivers/net/phy/mdio-gpio.c @@ -63,7 +63,7 @@ static void mdio_dir(struct mdiobb_ctrl *ctrl, int dir) * assume the pin serves as pull-up. If direction is * output, the default value is high. */ - gpiod_set_value(bitbang->mdo, 1); + gpiod_set_value_cansleep(bitbang->mdo, 1); return; } @@ -78,7 +78,7 @@ static int mdio_get(struct mdiobb_ctrl *ctrl) struct mdio_gpio_info *bitbang = container_of(ctrl, struct mdio_gpio_info, ctrl); - return gpiod_get_value(bitbang->mdio); + return gpiod_get_value_cansleep(bitbang->mdio); } static void mdio_set(struct mdiobb_ctrl *ctrl, int what) @@ -87,9 +87,9 @@ static void mdio_set(struct mdiobb_ctrl *ctrl, int what) container_of(ctrl, struct mdio_gpio_info, ctrl); if (bitbang->mdo) - gpiod_set_value(bitbang->mdo, what); + gpiod_set_value_cansleep(bitbang->mdo, what); else - gpiod_set_value(bitbang->mdio, what); + gpiod_set_value_cansleep(bitbang->mdio, what); } static void mdc_set(struct mdiobb_ctrl *ctrl, int what) @@ -97,7 +97,7 @@ static void mdc_set(struct mdiobb_ctrl *ctrl, int what) struct mdio_gpio_info *bitbang = container_of(ctrl, struct mdio_gpio_info, ctrl); - gpiod_set_value(bitbang->mdc, what); + gpiod_set_value_cansleep(bitbang->mdc, what); } static const struct mdiobb_ops mdio_gpio_ops = { -- cgit v1.2.3-59-g8ed1b From 95506588d2c1d72ca29adef8ae9bf771bcfb4ced Mon Sep 17 00:00:00 2001 From: Slavomir Kaslev Date: Fri, 16 Nov 2018 11:27:53 +0200 Subject: socket: do a generic_file_splice_read when proto_ops has no splice_read splice(2) fails with -EINVAL when called reading on a socket with no splice_read set in its proto_ops (such as vsock sockets). Switch this to fallbacks to a generic_file_splice_read instead. Signed-off-by: Slavomir Kaslev Signed-off-by: David S. Miller --- net/socket.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/socket.c b/net/socket.c index 593826e11a53..334fcc617ef2 100644 --- a/net/socket.c +++ b/net/socket.c @@ -853,7 +853,7 @@ static ssize_t sock_splice_read(struct file *file, loff_t *ppos, struct socket *sock = file->private_data; if (unlikely(!sock->ops->splice_read)) - return -EINVAL; + return generic_file_splice_read(file, ppos, pipe, len, flags); return sock->ops->splice_read(sock, ppos, pipe, len, flags); } -- cgit v1.2.3-59-g8ed1b From 9d332e69c1dc74dcd748de7cbd2dac5c61bda265 Mon Sep 17 00:00:00 2001 From: Nikolay Aleksandrov Date: Fri, 16 Nov 2018 18:50:01 +0200 Subject: net: bridge: fix vlan stats use-after-free on destruction Syzbot reported a use-after-free of the global vlan context on port vlan destruction. When I added per-port vlan stats I missed the fact that the global vlan context can be freed before the per-port vlan rcu callback. There're a few different ways to deal with this, I've chosen to add a new private flag that is set only when per-port stats are allocated so we can directly check it on destruction without dereferencing the global context at all. The new field in net_bridge_vlan uses a hole. v2: cosmetic change, move the check to br_process_vlan_info where the other checks are done v3: add change log in the patch, add private (in-kernel only) flags in a hole in net_bridge_vlan struct and use that instead of mixing user-space flags with private flags Fixes: 9163a0fc1f0c ("net: bridge: add support for per-port vlan stats") Reported-by: syzbot+04681da557a0e49a52e5@syzkaller.appspotmail.com Signed-off-by: Nikolay Aleksandrov Signed-off-by: David S. Miller --- net/bridge/br_private.h | 7 +++++++ net/bridge/br_vlan.c | 3 ++- 2 files changed, 9 insertions(+), 1 deletion(-) diff --git a/net/bridge/br_private.h b/net/bridge/br_private.h index 2920e06a5403..04c19a37e500 100644 --- a/net/bridge/br_private.h +++ b/net/bridge/br_private.h @@ -102,12 +102,18 @@ struct br_tunnel_info { struct metadata_dst *tunnel_dst; }; +/* private vlan flags */ +enum { + BR_VLFLAG_PER_PORT_STATS = BIT(0), +}; + /** * struct net_bridge_vlan - per-vlan entry * * @vnode: rhashtable member * @vid: VLAN id * @flags: bridge vlan flags + * @priv_flags: private (in-kernel) bridge vlan flags * @stats: per-cpu VLAN statistics * @br: if MASTER flag set, this points to a bridge struct * @port: if MASTER flag unset, this points to a port struct @@ -127,6 +133,7 @@ struct net_bridge_vlan { struct rhash_head tnode; u16 vid; u16 flags; + u16 priv_flags; struct br_vlan_stats __percpu *stats; union { struct net_bridge *br; diff --git a/net/bridge/br_vlan.c b/net/bridge/br_vlan.c index 8c9297a01947..e84be08b8285 100644 --- a/net/bridge/br_vlan.c +++ b/net/bridge/br_vlan.c @@ -197,7 +197,7 @@ static void nbp_vlan_rcu_free(struct rcu_head *rcu) v = container_of(rcu, struct net_bridge_vlan, rcu); WARN_ON(br_vlan_is_master(v)); /* if we had per-port stats configured then free them here */ - if (v->brvlan->stats != v->stats) + if (v->priv_flags & BR_VLFLAG_PER_PORT_STATS) free_percpu(v->stats); v->stats = NULL; kfree(v); @@ -264,6 +264,7 @@ static int __vlan_add(struct net_bridge_vlan *v, u16 flags) err = -ENOMEM; goto out_filt; } + v->priv_flags |= BR_VLFLAG_PER_PORT_STATS; } else { v->stats = masterv->stats; } -- cgit v1.2.3-59-g8ed1b From 8840c3e2344a456267d7989b97d097e798b28b0b Mon Sep 17 00:00:00 2001 From: Toke Høiland-Jørgensen Date: Fri, 16 Nov 2018 12:13:59 -0800 Subject: MAINTAINERS: Add entry for CAKE qdisc MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit We would like the existing community to be kept in the loop for any new developments on CAKE; and I certainly plan to keep maintaining it. Reflect this in MAINTAINERS. Signed-off-by: Toke Høiland-Jørgensen Signed-off-by: David S. Miller --- MAINTAINERS | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/MAINTAINERS b/MAINTAINERS index 5a4bd37d9d02..99f2956be87b 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -3276,6 +3276,12 @@ F: include/uapi/linux/caif/ F: include/net/caif/ F: net/caif/ +CAKE QDISC +M: Toke Høiland-Jørgensen +L: cake@lists.bufferbloat.net (moderated for non-subscribers) +S: Maintained +F: net/sched/sch_cake.c + CALGARY x86-64 IOMMU M: Muli Ben-Yehuda M: Jon Mason -- cgit v1.2.3-59-g8ed1b From 16f7eb2b77b55da816c4e207f3f9440a8cafc00a Mon Sep 17 00:00:00 2001 From: Sabrina Dubroca Date: Fri, 16 Nov 2018 16:58:19 +0100 Subject: ip_tunnel: don't force DF when MTU is locked The various types of tunnels running over IPv4 can ask to set the DF bit to do PMTU discovery. However, PMTU discovery is subject to the threshold set by the net.ipv4.route.min_pmtu sysctl, and is also disabled on routes with "mtu lock". In those cases, we shouldn't set the DF bit. This patch makes setting the DF bit conditional on the route's MTU locking state. This issue seems to be older than git history. Signed-off-by: Sabrina Dubroca Reviewed-by: Stefano Brivio Signed-off-by: David S. Miller --- net/ipv4/ip_tunnel_core.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/ipv4/ip_tunnel_core.c b/net/ipv4/ip_tunnel_core.c index dde671e97829..c248e0dccbe1 100644 --- a/net/ipv4/ip_tunnel_core.c +++ b/net/ipv4/ip_tunnel_core.c @@ -80,7 +80,7 @@ void iptunnel_xmit(struct sock *sk, struct rtable *rt, struct sk_buff *skb, iph->version = 4; iph->ihl = sizeof(struct iphdr) >> 2; - iph->frag_off = df; + iph->frag_off = ip_mtu_locked(&rt->dst) ? 0 : df; iph->protocol = proto; iph->tos = tos; iph->daddr = dst; -- cgit v1.2.3-59-g8ed1b From 5aaf6428526bcad98d6f51f2f679c919bb75d7e9 Mon Sep 17 00:00:00 2001 From: Lucas Bates Date: Fri, 16 Nov 2018 17:37:55 -0500 Subject: tc-testing: tdc.py: ignore errors when decoding stdout/stderr Prevent exceptions from being raised while decoding output from an executed command. There is no impact on tdc's execution and the verify command phase would fail the pattern match. Signed-off-by: Lucas Bates Signed-off-by: David S. Miller --- tools/testing/selftests/tc-testing/tdc.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tools/testing/selftests/tc-testing/tdc.py b/tools/testing/selftests/tc-testing/tdc.py index 87a04a8a5945..9b3f414ff1e9 100755 --- a/tools/testing/selftests/tc-testing/tdc.py +++ b/tools/testing/selftests/tc-testing/tdc.py @@ -134,9 +134,9 @@ def exec_cmd(args, pm, stage, command): (rawout, serr) = proc.communicate() if proc.returncode != 0 and len(serr) > 0: - foutput = serr.decode("utf-8") + foutput = serr.decode("utf-8", errors="ignore") else: - foutput = rawout.decode("utf-8") + foutput = rawout.decode("utf-8", errors="ignore") proc.stdout.close() proc.stderr.close() -- cgit v1.2.3-59-g8ed1b From c6cecf4ae44e4ce9158ef8806358142c3512cd33 Mon Sep 17 00:00:00 2001 From: "Brenda J. Butler" Date: Fri, 16 Nov 2018 17:37:56 -0500 Subject: tc-testing: tdc.py: Guard against lack of returncode in executed command Add some defensive coding in case one of the subprocesses created by tdc returns nothing. If no object is returned from exec_cmd, then tdc will halt with an unhandled exception. Signed-off-by: Brenda J. Butler Signed-off-by: Lucas Bates Signed-off-by: David S. Miller --- tools/testing/selftests/tc-testing/tdc.py | 14 +++++++++++--- 1 file changed, 11 insertions(+), 3 deletions(-) diff --git a/tools/testing/selftests/tc-testing/tdc.py b/tools/testing/selftests/tc-testing/tdc.py index 9b3f414ff1e9..7607ba3e3cbe 100755 --- a/tools/testing/selftests/tc-testing/tdc.py +++ b/tools/testing/selftests/tc-testing/tdc.py @@ -169,6 +169,8 @@ def prepare_env(args, pm, stage, prefix, cmdlist, output = None): file=sys.stderr) print("\n{} *** Error message: \"{}\"".format(prefix, foutput), file=sys.stderr) + print("returncode {}; expected {}".format(proc.returncode, + exit_codes)) print("\n{} *** Aborting test run.".format(prefix), file=sys.stderr) print("\n\n{} *** stdout ***".format(proc.stdout), file=sys.stderr) print("\n\n{} *** stderr ***".format(proc.stderr), file=sys.stderr) @@ -195,12 +197,18 @@ def run_one_test(pm, args, index, tidx): print('-----> execute stage') pm.call_pre_execute() (p, procout) = exec_cmd(args, pm, 'execute', tidx["cmdUnderTest"]) - exit_code = p.returncode + if p: + exit_code = p.returncode + else: + exit_code = None + pm.call_post_execute() - if (exit_code != int(tidx["expExitCode"])): + if (exit_code is None or exit_code != int(tidx["expExitCode"])): result = False - print("exit:", exit_code, int(tidx["expExitCode"])) + print("exit: {!r}".format(exit_code)) + print("exit: {}".format(int(tidx["expExitCode"]))) + #print("exit: {!r} {}".format(exit_code, int(tidx["expExitCode"]))) print(procout) else: if args.verbose > 0: -- cgit v1.2.3-59-g8ed1b From 33d9a2c72f086cbf1087b2fd2d1a15aa9df14a7f Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Sat, 17 Nov 2018 21:57:02 -0800 Subject: net-gro: reset skb->pkt_type in napi_reuse_skb() eth_type_trans() assumes initial value for skb->pkt_type is PACKET_HOST. This is indeed the value right after a fresh skb allocation. However, it is possible that GRO merged a packet with a different value (like PACKET_OTHERHOST in case macvlan is used), so we need to make sure napi->skb will have pkt_type set back to PACKET_HOST. Otherwise, valid packets might be dropped by the stack because their pkt_type is not PACKET_HOST. napi_reuse_skb() was added in commit 96e93eab2033 ("gro: Add internal interfaces for VLAN"), but this bug always has been there. Fixes: 96e93eab2033 ("gro: Add internal interfaces for VLAN") Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller --- net/core/dev.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/net/core/dev.c b/net/core/dev.c index 0ffcbdd55fa9..066aa902d85c 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -5655,6 +5655,10 @@ static void napi_reuse_skb(struct napi_struct *napi, struct sk_buff *skb) skb->vlan_tci = 0; skb->dev = napi->dev; skb->skb_iif = 0; + + /* eth_type_trans() assumes pkt_type is PACKET_HOST */ + skb->pkt_type = PACKET_HOST; + skb->encapsulation = 0; skb_shinfo(skb)->gso_type = 0; skb->truesize = SKB_TRUESIZE(skb_end_offset(skb)); -- cgit v1.2.3-59-g8ed1b From adba75be0d23cca92a028749d92c60c8909bbdb3 Mon Sep 17 00:00:00 2001 From: Jon Maloy Date: Fri, 16 Nov 2018 16:55:04 -0500 Subject: tipc: fix lockdep warning when reinitilaizing sockets We get the following warning: [ 47.926140] 32-bit node address hash set to 2010a0a [ 47.927202] [ 47.927433] ================================ [ 47.928050] WARNING: inconsistent lock state [ 47.928661] 4.19.0+ #37 Tainted: G E [ 47.929346] -------------------------------- [ 47.929954] inconsistent {SOFTIRQ-ON-W} -> {IN-SOFTIRQ-W} usage. [ 47.930116] swapper/3/0 [HC0[0]:SC1[3]:HE1:SE0] takes: [ 47.930116] 00000000af8bc31e (&(&ht->lock)->rlock){+.?.}, at: rhashtable_walk_enter+0x36/0xb0 [ 47.930116] {SOFTIRQ-ON-W} state was registered at: [ 47.930116] _raw_spin_lock+0x29/0x60 [ 47.930116] rht_deferred_worker+0x556/0x810 [ 47.930116] process_one_work+0x1f5/0x540 [ 47.930116] worker_thread+0x64/0x3e0 [ 47.930116] kthread+0x112/0x150 [ 47.930116] ret_from_fork+0x3a/0x50 [ 47.930116] irq event stamp: 14044 [ 47.930116] hardirqs last enabled at (14044): [] __local_bh_enable_ip+0x7a/0xf0 [ 47.938117] hardirqs last disabled at (14043): [] __local_bh_enable_ip+0x41/0xf0 [ 47.938117] softirqs last enabled at (14028): [] irq_enter+0x5e/0x60 [ 47.938117] softirqs last disabled at (14029): [] irq_exit+0xb5/0xc0 [ 47.938117] [ 47.938117] other info that might help us debug this: [ 47.938117] Possible unsafe locking scenario: [ 47.938117] [ 47.938117] CPU0 [ 47.938117] ---- [ 47.938117] lock(&(&ht->lock)->rlock); [ 47.938117] [ 47.938117] lock(&(&ht->lock)->rlock); [ 47.938117] [ 47.938117] *** DEADLOCK *** [ 47.938117] [ 47.938117] 2 locks held by swapper/3/0: [ 47.938117] #0: 0000000062c64f90 ((&d->timer)){+.-.}, at: call_timer_fn+0x5/0x280 [ 47.938117] #1: 00000000ee39619c (&(&d->lock)->rlock){+.-.}, at: tipc_disc_timeout+0xc8/0x540 [tipc] [ 47.938117] [ 47.938117] stack backtrace: [ 47.938117] CPU: 3 PID: 0 Comm: swapper/3 Tainted: G E 4.19.0+ #37 [ 47.938117] Hardware name: Bochs Bochs, BIOS Bochs 01/01/2011 [ 47.938117] Call Trace: [ 47.938117] [ 47.938117] dump_stack+0x5e/0x8b [ 47.938117] print_usage_bug+0x1ed/0x1ff [ 47.938117] mark_lock+0x5b5/0x630 [ 47.938117] __lock_acquire+0x4c0/0x18f0 [ 47.938117] ? lock_acquire+0xa6/0x180 [ 47.938117] lock_acquire+0xa6/0x180 [ 47.938117] ? rhashtable_walk_enter+0x36/0xb0 [ 47.938117] _raw_spin_lock+0x29/0x60 [ 47.938117] ? rhashtable_walk_enter+0x36/0xb0 [ 47.938117] rhashtable_walk_enter+0x36/0xb0 [ 47.938117] tipc_sk_reinit+0xb0/0x410 [tipc] [ 47.938117] ? mark_held_locks+0x6f/0x90 [ 47.938117] ? __local_bh_enable_ip+0x7a/0xf0 [ 47.938117] ? lockdep_hardirqs_on+0x20/0x1a0 [ 47.938117] tipc_net_finalize+0xbf/0x180 [tipc] [ 47.938117] tipc_disc_timeout+0x509/0x540 [tipc] [ 47.938117] ? call_timer_fn+0x5/0x280 [ 47.938117] ? tipc_disc_msg_xmit.isra.19+0xa0/0xa0 [tipc] [ 47.938117] ? tipc_disc_msg_xmit.isra.19+0xa0/0xa0 [tipc] [ 47.938117] call_timer_fn+0xa1/0x280 [ 47.938117] ? tipc_disc_msg_xmit.isra.19+0xa0/0xa0 [tipc] [ 47.938117] run_timer_softirq+0x1f2/0x4d0 [ 47.938117] __do_softirq+0xfc/0x413 [ 47.938117] irq_exit+0xb5/0xc0 [ 47.938117] smp_apic_timer_interrupt+0xac/0x210 [ 47.938117] apic_timer_interrupt+0xf/0x20 [ 47.938117] [ 47.938117] RIP: 0010:default_idle+0x1c/0x140 [ 47.938117] Code: 90 90 90 90 90 90 90 90 90 90 90 90 90 90 0f 1f 44 00 00 41 54 55 53 65 8b 2d d8 2b 74 65 0f 1f 44 00 00 e8 c6 2c 8b ff fb f4 <65> 8b 2d c5 2b 74 65 0f 1f 44 00 00 5b 5d 41 5c c3 65 8b 05 b4 2b [ 47.938117] RSP: 0018:ffffaf6ac0207ec8 EFLAGS: 00000206 ORIG_RAX: ffffffffffffff13 [ 47.938117] RAX: ffff8f5b3735e200 RBX: 0000000000000003 RCX: 0000000000000001 [ 47.938117] RDX: 0000000000000001 RSI: 0000000000000001 RDI: ffff8f5b3735e200 [ 47.938117] RBP: 0000000000000003 R08: 0000000000000001 R09: 0000000000000000 [ 47.938117] R10: 0000000000000000 R11: 0000000000000000 R12: 0000000000000000 [ 47.938117] R13: 0000000000000000 R14: ffff8f5b3735e200 R15: ffff8f5b3735e200 [ 47.938117] ? default_idle+0x1a/0x140 [ 47.938117] do_idle+0x1bc/0x280 [ 47.938117] cpu_startup_entry+0x19/0x20 [ 47.938117] start_secondary+0x187/0x1c0 [ 47.938117] secondary_startup_64+0xa4/0xb0 The reason seems to be that tipc_net_finalize()->tipc_sk_reinit() is calling the function rhashtable_walk_enter() within a timer interrupt. We fix this by executing tipc_net_finalize() in work queue context. Acked-by: Ying Xue Signed-off-by: Jon Maloy Signed-off-by: David S. Miller --- net/tipc/discover.c | 19 ++++++++++--------- net/tipc/net.c | 45 +++++++++++++++++++++++++++++++++++++-------- net/tipc/net.h | 2 +- 3 files changed, 48 insertions(+), 18 deletions(-) diff --git a/net/tipc/discover.c b/net/tipc/discover.c index 2830709957bd..c138d68e8a69 100644 --- a/net/tipc/discover.c +++ b/net/tipc/discover.c @@ -166,7 +166,8 @@ static bool tipc_disc_addr_trial_msg(struct tipc_discoverer *d, /* Apply trial address if we just left trial period */ if (!trial && !self) { - tipc_net_finalize(net, tn->trial_addr); + tipc_sched_net_finalize(net, tn->trial_addr); + msg_set_prevnode(buf_msg(d->skb), tn->trial_addr); msg_set_type(buf_msg(d->skb), DSC_REQ_MSG); } @@ -300,14 +301,12 @@ static void tipc_disc_timeout(struct timer_list *t) goto exit; } - /* Trial period over ? */ - if (!time_before(jiffies, tn->addr_trial_end)) { - /* Did we just leave it ? */ - if (!tipc_own_addr(net)) - tipc_net_finalize(net, tn->trial_addr); - - msg_set_type(buf_msg(d->skb), DSC_REQ_MSG); - msg_set_prevnode(buf_msg(d->skb), tipc_own_addr(net)); + /* Did we just leave trial period ? */ + if (!time_before(jiffies, tn->addr_trial_end) && !tipc_own_addr(net)) { + mod_timer(&d->timer, jiffies + TIPC_DISC_INIT); + spin_unlock_bh(&d->lock); + tipc_sched_net_finalize(net, tn->trial_addr); + return; } /* Adjust timeout interval according to discovery phase */ @@ -319,6 +318,8 @@ static void tipc_disc_timeout(struct timer_list *t) d->timer_intv = TIPC_DISC_SLOW; else if (!d->num_nodes && d->timer_intv > TIPC_DISC_FAST) d->timer_intv = TIPC_DISC_FAST; + msg_set_type(buf_msg(d->skb), DSC_REQ_MSG); + msg_set_prevnode(buf_msg(d->skb), tn->trial_addr); } mod_timer(&d->timer, jiffies + d->timer_intv); diff --git a/net/tipc/net.c b/net/tipc/net.c index 62199cf5a56c..f076edb74338 100644 --- a/net/tipc/net.c +++ b/net/tipc/net.c @@ -104,6 +104,14 @@ * - A local spin_lock protecting the queue of subscriber events. */ +struct tipc_net_work { + struct work_struct work; + struct net *net; + u32 addr; +}; + +static void tipc_net_finalize(struct net *net, u32 addr); + int tipc_net_init(struct net *net, u8 *node_id, u32 addr) { if (tipc_own_id(net)) { @@ -119,17 +127,38 @@ int tipc_net_init(struct net *net, u8 *node_id, u32 addr) return 0; } -void tipc_net_finalize(struct net *net, u32 addr) +static void tipc_net_finalize(struct net *net, u32 addr) { struct tipc_net *tn = tipc_net(net); - if (!cmpxchg(&tn->node_addr, 0, addr)) { - tipc_set_node_addr(net, addr); - tipc_named_reinit(net); - tipc_sk_reinit(net); - tipc_nametbl_publish(net, TIPC_CFG_SRV, addr, addr, - TIPC_CLUSTER_SCOPE, 0, addr); - } + if (cmpxchg(&tn->node_addr, 0, addr)) + return; + tipc_set_node_addr(net, addr); + tipc_named_reinit(net); + tipc_sk_reinit(net); + tipc_nametbl_publish(net, TIPC_CFG_SRV, addr, addr, + TIPC_CLUSTER_SCOPE, 0, addr); +} + +static void tipc_net_finalize_work(struct work_struct *work) +{ + struct tipc_net_work *fwork; + + fwork = container_of(work, struct tipc_net_work, work); + tipc_net_finalize(fwork->net, fwork->addr); + kfree(fwork); +} + +void tipc_sched_net_finalize(struct net *net, u32 addr) +{ + struct tipc_net_work *fwork = kzalloc(sizeof(*fwork), GFP_ATOMIC); + + if (!fwork) + return; + INIT_WORK(&fwork->work, tipc_net_finalize_work); + fwork->net = net; + fwork->addr = addr; + schedule_work(&fwork->work); } void tipc_net_stop(struct net *net) diff --git a/net/tipc/net.h b/net/tipc/net.h index 09ad02b50bb1..b7f2e364eb99 100644 --- a/net/tipc/net.h +++ b/net/tipc/net.h @@ -42,7 +42,7 @@ extern const struct nla_policy tipc_nl_net_policy[]; int tipc_net_init(struct net *net, u8 *node_id, u32 addr); -void tipc_net_finalize(struct net *net, u32 addr); +void tipc_sched_net_finalize(struct net *net, u32 addr); void tipc_net_stop(struct net *net); int tipc_nl_net_dump(struct sk_buff *skb, struct netlink_callback *cb); int tipc_nl_net_set(struct sk_buff *skb, struct genl_info *info); -- cgit v1.2.3-59-g8ed1b From 1c1274a56999fbdf9cf84e332b28448bb2d55221 Mon Sep 17 00:00:00 2001 From: Jon Maloy Date: Sat, 17 Nov 2018 12:17:06 -0500 Subject: tipc: don't assume linear buffer when reading ancillary data The code for reading ancillary data from a received buffer is assuming the buffer is linear. To make this assumption true we have to linearize the buffer before message data is read. Signed-off-by: Jon Maloy Signed-off-by: David S. Miller --- net/tipc/socket.c | 15 +++++++++++---- 1 file changed, 11 insertions(+), 4 deletions(-) diff --git a/net/tipc/socket.c b/net/tipc/socket.c index 636e6131769d..b57b1be7252b 100644 --- a/net/tipc/socket.c +++ b/net/tipc/socket.c @@ -1555,16 +1555,17 @@ static void tipc_sk_set_orig_addr(struct msghdr *m, struct sk_buff *skb) /** * tipc_sk_anc_data_recv - optionally capture ancillary data for received message * @m: descriptor for message info - * @msg: received message header + * @skb: received message buffer * @tsk: TIPC port associated with message * * Note: Ancillary data is not captured if not requested by receiver. * * Returns 0 if successful, otherwise errno */ -static int tipc_sk_anc_data_recv(struct msghdr *m, struct tipc_msg *msg, +static int tipc_sk_anc_data_recv(struct msghdr *m, struct sk_buff *skb, struct tipc_sock *tsk) { + struct tipc_msg *msg; u32 anc_data[3]; u32 err; u32 dest_type; @@ -1573,6 +1574,7 @@ static int tipc_sk_anc_data_recv(struct msghdr *m, struct tipc_msg *msg, if (likely(m->msg_controllen == 0)) return 0; + msg = buf_msg(skb); /* Optionally capture errored message object(s) */ err = msg ? msg_errcode(msg) : 0; @@ -1583,6 +1585,9 @@ static int tipc_sk_anc_data_recv(struct msghdr *m, struct tipc_msg *msg, if (res) return res; if (anc_data[1]) { + if (skb_linearize(skb)) + return -ENOMEM; + msg = buf_msg(skb); res = put_cmsg(m, SOL_TIPC, TIPC_RETDATA, anc_data[1], msg_data(msg)); if (res) @@ -1744,9 +1749,10 @@ static int tipc_recvmsg(struct socket *sock, struct msghdr *m, /* Collect msg meta data, including error code and rejected data */ tipc_sk_set_orig_addr(m, skb); - rc = tipc_sk_anc_data_recv(m, hdr, tsk); + rc = tipc_sk_anc_data_recv(m, skb, tsk); if (unlikely(rc)) goto exit; + hdr = buf_msg(skb); /* Capture data if non-error msg, otherwise just set return value */ if (likely(!err)) { @@ -1856,9 +1862,10 @@ static int tipc_recvstream(struct socket *sock, struct msghdr *m, /* Collect msg meta data, incl. error code and rejected data */ if (!copied) { tipc_sk_set_orig_addr(m, skb); - rc = tipc_sk_anc_data_recv(m, hdr, tsk); + rc = tipc_sk_anc_data_recv(m, skb, tsk); if (rc) break; + hdr = buf_msg(skb); } /* Copy data if msg ok, otherwise return error/partial data */ -- cgit v1.2.3-59-g8ed1b From ca0246bb97c23da9d267c2107c07fb77e38205c9 Mon Sep 17 00:00:00 2001 From: Vitaly Wool Date: Fri, 16 Nov 2018 15:07:56 -0800 Subject: z3fold: fix possible reclaim races Reclaim and free can race on an object which is basically fine but in order for reclaim to be able to map "freed" object we need to encode object length in the handle. handle_to_chunks() is then introduced to extract object length from a handle and use it during mapping. Moreover, to avoid racing on a z3fold "headless" page release, we should not try to free that page in z3fold_free() if the reclaim bit is set. Also, in the unlikely case of trying to reclaim a page being freed, we should not proceed with that page. While at it, fix the page accounting in reclaim function. This patch supersedes "[PATCH] z3fold: fix reclaim lock-ups". Link: http://lkml.kernel.org/r/20181105162225.74e8837d03583a9b707cf559@gmail.com Signed-off-by: Vitaly Wool Signed-off-by: Jongseok Kim Reported-by-by: Jongseok Kim Reviewed-by: Snild Dolkow Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- mm/z3fold.c | 101 +++++++++++++++++++++++++++++++++++++----------------------- 1 file changed, 62 insertions(+), 39 deletions(-) diff --git a/mm/z3fold.c b/mm/z3fold.c index 4b366d181f35..aee9b0b8d907 100644 --- a/mm/z3fold.c +++ b/mm/z3fold.c @@ -99,6 +99,7 @@ struct z3fold_header { #define NCHUNKS ((PAGE_SIZE - ZHDR_SIZE_ALIGNED) >> CHUNK_SHIFT) #define BUDDY_MASK (0x3) +#define BUDDY_SHIFT 2 /** * struct z3fold_pool - stores metadata for each z3fold pool @@ -145,7 +146,7 @@ enum z3fold_page_flags { MIDDLE_CHUNK_MAPPED, NEEDS_COMPACTING, PAGE_STALE, - UNDER_RECLAIM + PAGE_CLAIMED, /* by either reclaim or free */ }; /***************** @@ -174,7 +175,7 @@ static struct z3fold_header *init_z3fold_page(struct page *page, clear_bit(MIDDLE_CHUNK_MAPPED, &page->private); clear_bit(NEEDS_COMPACTING, &page->private); clear_bit(PAGE_STALE, &page->private); - clear_bit(UNDER_RECLAIM, &page->private); + clear_bit(PAGE_CLAIMED, &page->private); spin_lock_init(&zhdr->page_lock); kref_init(&zhdr->refcount); @@ -223,8 +224,11 @@ static unsigned long encode_handle(struct z3fold_header *zhdr, enum buddy bud) unsigned long handle; handle = (unsigned long)zhdr; - if (bud != HEADLESS) - handle += (bud + zhdr->first_num) & BUDDY_MASK; + if (bud != HEADLESS) { + handle |= (bud + zhdr->first_num) & BUDDY_MASK; + if (bud == LAST) + handle |= (zhdr->last_chunks << BUDDY_SHIFT); + } return handle; } @@ -234,6 +238,12 @@ static struct z3fold_header *handle_to_z3fold_header(unsigned long handle) return (struct z3fold_header *)(handle & PAGE_MASK); } +/* only for LAST bud, returns zero otherwise */ +static unsigned short handle_to_chunks(unsigned long handle) +{ + return (handle & ~PAGE_MASK) >> BUDDY_SHIFT; +} + /* * (handle & BUDDY_MASK) < zhdr->first_num is possible in encode_handle * but that doesn't matter. because the masking will result in the @@ -720,37 +730,39 @@ static void z3fold_free(struct z3fold_pool *pool, unsigned long handle) page = virt_to_page(zhdr); if (test_bit(PAGE_HEADLESS, &page->private)) { - /* HEADLESS page stored */ - bud = HEADLESS; - } else { - z3fold_page_lock(zhdr); - bud = handle_to_buddy(handle); - - switch (bud) { - case FIRST: - zhdr->first_chunks = 0; - break; - case MIDDLE: - zhdr->middle_chunks = 0; - zhdr->start_middle = 0; - break; - case LAST: - zhdr->last_chunks = 0; - break; - default: - pr_err("%s: unknown bud %d\n", __func__, bud); - WARN_ON(1); - z3fold_page_unlock(zhdr); - return; + /* if a headless page is under reclaim, just leave. + * NB: we use test_and_set_bit for a reason: if the bit + * has not been set before, we release this page + * immediately so we don't care about its value any more. + */ + if (!test_and_set_bit(PAGE_CLAIMED, &page->private)) { + spin_lock(&pool->lock); + list_del(&page->lru); + spin_unlock(&pool->lock); + free_z3fold_page(page); + atomic64_dec(&pool->pages_nr); } + return; } - if (bud == HEADLESS) { - spin_lock(&pool->lock); - list_del(&page->lru); - spin_unlock(&pool->lock); - free_z3fold_page(page); - atomic64_dec(&pool->pages_nr); + /* Non-headless case */ + z3fold_page_lock(zhdr); + bud = handle_to_buddy(handle); + + switch (bud) { + case FIRST: + zhdr->first_chunks = 0; + break; + case MIDDLE: + zhdr->middle_chunks = 0; + break; + case LAST: + zhdr->last_chunks = 0; + break; + default: + pr_err("%s: unknown bud %d\n", __func__, bud); + WARN_ON(1); + z3fold_page_unlock(zhdr); return; } @@ -758,7 +770,7 @@ static void z3fold_free(struct z3fold_pool *pool, unsigned long handle) atomic64_dec(&pool->pages_nr); return; } - if (test_bit(UNDER_RECLAIM, &page->private)) { + if (test_bit(PAGE_CLAIMED, &page->private)) { z3fold_page_unlock(zhdr); return; } @@ -836,20 +848,30 @@ static int z3fold_reclaim_page(struct z3fold_pool *pool, unsigned int retries) } list_for_each_prev(pos, &pool->lru) { page = list_entry(pos, struct page, lru); + + /* this bit could have been set by free, in which case + * we pass over to the next page in the pool. + */ + if (test_and_set_bit(PAGE_CLAIMED, &page->private)) + continue; + + zhdr = page_address(page); if (test_bit(PAGE_HEADLESS, &page->private)) - /* candidate found */ break; - zhdr = page_address(page); - if (!z3fold_page_trylock(zhdr)) + if (!z3fold_page_trylock(zhdr)) { + zhdr = NULL; continue; /* can't evict at this point */ + } kref_get(&zhdr->refcount); list_del_init(&zhdr->buddy); zhdr->cpu = -1; - set_bit(UNDER_RECLAIM, &page->private); break; } + if (!zhdr) + break; + list_del_init(&page->lru); spin_unlock(&pool->lock); @@ -898,6 +920,7 @@ next: if (test_bit(PAGE_HEADLESS, &page->private)) { if (ret == 0) { free_z3fold_page(page); + atomic64_dec(&pool->pages_nr); return 0; } spin_lock(&pool->lock); @@ -905,7 +928,7 @@ next: spin_unlock(&pool->lock); } else { z3fold_page_lock(zhdr); - clear_bit(UNDER_RECLAIM, &page->private); + clear_bit(PAGE_CLAIMED, &page->private); if (kref_put(&zhdr->refcount, release_z3fold_page_locked)) { atomic64_dec(&pool->pages_nr); @@ -964,7 +987,7 @@ static void *z3fold_map(struct z3fold_pool *pool, unsigned long handle) set_bit(MIDDLE_CHUNK_MAPPED, &page->private); break; case LAST: - addr += PAGE_SIZE - (zhdr->last_chunks << CHUNK_SHIFT); + addr += PAGE_SIZE - (handle_to_chunks(handle) << CHUNK_SHIFT); break; default: pr_err("unknown buddy id %d\n", buddy); -- cgit v1.2.3-59-g8ed1b From 8fcb2312d1e3300e81aa871aad00d4c038cfc184 Mon Sep 17 00:00:00 2001 From: Olof Johansson Date: Fri, 16 Nov 2018 15:08:00 -0800 Subject: kernel/sched/psi.c: simplify cgroup_move_task() The existing code triggered an invalid warning about 'rq' possibly being used uninitialized. Instead of doing the silly warning suppression by initializa it to NULL, refactor the code to bail out early instead. Warning was: kernel/sched/psi.c: In function `cgroup_move_task': kernel/sched/psi.c:639:13: warning: `rq' may be used uninitialized in this function [-Wmaybe-uninitialized] Link: http://lkml.kernel.org/r/20181103183339.8669-1-olof@lixom.net Fixes: 2ce7135adc9ad ("psi: cgroup support") Signed-off-by: Olof Johansson Reviewed-by: Andrew Morton Acked-by: Johannes Weiner Cc: Ingo Molnar Cc: Peter Zijlstra Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- kernel/sched/psi.c | 43 ++++++++++++++++++++++--------------------- 1 file changed, 22 insertions(+), 21 deletions(-) diff --git a/kernel/sched/psi.c b/kernel/sched/psi.c index 7cdecfc010af..3d7355d7c3e3 100644 --- a/kernel/sched/psi.c +++ b/kernel/sched/psi.c @@ -633,38 +633,39 @@ void psi_cgroup_free(struct cgroup *cgroup) */ void cgroup_move_task(struct task_struct *task, struct css_set *to) { - bool move_psi = !psi_disabled; unsigned int task_flags = 0; struct rq_flags rf; struct rq *rq; - if (move_psi) { - rq = task_rq_lock(task, &rf); + if (psi_disabled) { + /* + * Lame to do this here, but the scheduler cannot be locked + * from the outside, so we move cgroups from inside sched/. + */ + rcu_assign_pointer(task->cgroups, to); + return; + } - if (task_on_rq_queued(task)) - task_flags = TSK_RUNNING; - else if (task->in_iowait) - task_flags = TSK_IOWAIT; + rq = task_rq_lock(task, &rf); - if (task->flags & PF_MEMSTALL) - task_flags |= TSK_MEMSTALL; + if (task_on_rq_queued(task)) + task_flags = TSK_RUNNING; + else if (task->in_iowait) + task_flags = TSK_IOWAIT; - if (task_flags) - psi_task_change(task, task_flags, 0); - } + if (task->flags & PF_MEMSTALL) + task_flags |= TSK_MEMSTALL; - /* - * Lame to do this here, but the scheduler cannot be locked - * from the outside, so we move cgroups from inside sched/. - */ + if (task_flags) + psi_task_change(task, task_flags, 0); + + /* See comment above */ rcu_assign_pointer(task->cgroups, to); - if (move_psi) { - if (task_flags) - psi_task_change(task, 0, task_flags); + if (task_flags) + psi_task_change(task, 0, task_flags); - task_rq_unlock(rq, task, &rf); - } + task_rq_unlock(rq, task, &rf); } #endif /* CONFIG_CGROUPS */ -- cgit v1.2.3-59-g8ed1b From 5e41540c8a0f0e98c337dda8b391e5dda0cde7cf Mon Sep 17 00:00:00 2001 From: Mike Kravetz Date: Fri, 16 Nov 2018 15:08:04 -0800 Subject: hugetlbfs: fix kernel BUG at fs/hugetlbfs/inode.c:444! This bug has been experienced several times by the Oracle DB team. The BUG is in remove_inode_hugepages() as follows: /* * If page is mapped, it was faulted in after being * unmapped in caller. Unmap (again) now after taking * the fault mutex. The mutex will prevent faults * until we finish removing the page. * * This race can only happen in the hole punch case. * Getting here in a truncate operation is a bug. */ if (unlikely(page_mapped(page))) { BUG_ON(truncate_op); In this case, the elevated map count is not the result of a race. Rather it was incorrectly incremented as the result of a bug in the huge pmd sharing code. Consider the following: - Process A maps a hugetlbfs file of sufficient size and alignment (PUD_SIZE) that a pmd page could be shared. - Process B maps the same hugetlbfs file with the same size and alignment such that a pmd page is shared. - Process B then calls mprotect() to change protections for the mapping with the shared pmd. As a result, the pmd is 'unshared'. - Process B then calls mprotect() again to chage protections for the mapping back to their original value. pmd remains unshared. - Process B then forks and process C is created. During the fork process, we do dup_mm -> dup_mmap -> copy_page_range to copy page tables. Copying page tables for hugetlb mappings is done in the routine copy_hugetlb_page_range. In copy_hugetlb_page_range(), the destination pte is obtained by: dst_pte = huge_pte_alloc(dst, addr, sz); If pmd sharing is possible, the returned pointer will be to a pte in an existing page table. In the situation above, process C could share with either process A or process B. Since process A is first in the list, the returned pte is a pointer to a pte in process A's page table. However, the check for pmd sharing in copy_hugetlb_page_range is: /* If the pagetables are shared don't copy or take references */ if (dst_pte == src_pte) continue; Since process C is sharing with process A instead of process B, the above test fails. The code in copy_hugetlb_page_range which follows assumes dst_pte points to a huge_pte_none pte. It copies the pte entry from src_pte to dst_pte and increments this map count of the associated page. This is how we end up with an elevated map count. To solve, check the dst_pte entry for huge_pte_none. If !none, this implies PMD sharing so do not copy. Link: http://lkml.kernel.org/r/20181105212315.14125-1-mike.kravetz@oracle.com Fixes: c5c99429fa57 ("fix hugepages leak due to pagetable page sharing") Signed-off-by: Mike Kravetz Reviewed-by: Naoya Horiguchi Cc: Michal Hocko Cc: Hugh Dickins Cc: Andrea Arcangeli Cc: "Kirill A . Shutemov" Cc: Davidlohr Bueso Cc: Prakash Sangappa Cc: Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- mm/hugetlb.c | 23 +++++++++++++++++++---- 1 file changed, 19 insertions(+), 4 deletions(-) diff --git a/mm/hugetlb.c b/mm/hugetlb.c index c007fb5fb8d5..7f2a28ab46d5 100644 --- a/mm/hugetlb.c +++ b/mm/hugetlb.c @@ -3233,7 +3233,7 @@ static int is_hugetlb_entry_hwpoisoned(pte_t pte) int copy_hugetlb_page_range(struct mm_struct *dst, struct mm_struct *src, struct vm_area_struct *vma) { - pte_t *src_pte, *dst_pte, entry; + pte_t *src_pte, *dst_pte, entry, dst_entry; struct page *ptepage; unsigned long addr; int cow; @@ -3261,15 +3261,30 @@ int copy_hugetlb_page_range(struct mm_struct *dst, struct mm_struct *src, break; } - /* If the pagetables are shared don't copy or take references */ - if (dst_pte == src_pte) + /* + * If the pagetables are shared don't copy or take references. + * dst_pte == src_pte is the common case of src/dest sharing. + * + * However, src could have 'unshared' and dst shares with + * another vma. If dst_pte !none, this implies sharing. + * Check here before taking page table lock, and once again + * after taking the lock below. + */ + dst_entry = huge_ptep_get(dst_pte); + if ((dst_pte == src_pte) || !huge_pte_none(dst_entry)) continue; dst_ptl = huge_pte_lock(h, dst, dst_pte); src_ptl = huge_pte_lockptr(h, src, src_pte); spin_lock_nested(src_ptl, SINGLE_DEPTH_NESTING); entry = huge_ptep_get(src_pte); - if (huge_pte_none(entry)) { /* skip none entry */ + dst_entry = huge_ptep_get(dst_pte); + if (huge_pte_none(entry) || !huge_pte_none(dst_entry)) { + /* + * Skip if src entry none. Also, skip in the + * unlikely case dst entry !none as this implies + * sharing with another vma. + */ ; } else if (unlikely(is_hugetlb_entry_migration(entry) || is_hugetlb_entry_hwpoisoned(entry))) { -- cgit v1.2.3-59-g8ed1b From f341e16fb67ddc199582d187b0d39120a9dfd2bf Mon Sep 17 00:00:00 2001 From: Aaro Koskinen Date: Fri, 16 Nov 2018 15:08:08 -0800 Subject: MAINTAINERS: update OMAP MMC entry Jarkko's e-mail address hasn't worked for a long time. We still want to keep this driver working as it is critical for some of the OMAP boards. I use and test this driver frequently, so change myself as a maintainer with "Odd Fixes" status. Link: http://lkml.kernel.org/r/20181106222750.12939-1-aaro.koskinen@iki.fi Signed-off-by: Aaro Koskinen Acked-by: Tony Lindgren Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- CREDITS | 4 ++++ MAINTAINERS | 4 ++-- 2 files changed, 6 insertions(+), 2 deletions(-) diff --git a/CREDITS b/CREDITS index 5befd2d714d0..84cbec4c6211 100644 --- a/CREDITS +++ b/CREDITS @@ -2138,6 +2138,10 @@ E: paul@laufernet.com D: Soundblaster driver fixes, ISAPnP quirk S: California, USA +N: Jarkko Lavinen +E: jarkko.lavinen@nokia.com +D: OMAP MMC support + N: Jonathan Layes D: ARPD support diff --git a/MAINTAINERS b/MAINTAINERS index 6c3fbbb361f8..b755a89fa325 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -10808,9 +10808,9 @@ F: drivers/media/platform/omap3isp/ F: drivers/staging/media/omap4iss/ OMAP MMC SUPPORT -M: Jarkko Lavinen +M: Aaro Koskinen L: linux-omap@vger.kernel.org -S: Maintained +S: Odd Fixes F: drivers/mmc/host/omap.c OMAP POWER MANAGEMENT SUPPORT -- cgit v1.2.3-59-g8ed1b From 873d7bcfd066663e3e50113dc4a0de19289b6354 Mon Sep 17 00:00:00 2001 From: Vasily Averin Date: Fri, 16 Nov 2018 15:08:11 -0800 Subject: mm/swapfile.c: use kvzalloc for swap_info_struct allocation Commit a2468cc9bfdf ("swap: choose swap device according to numa node") changed 'avail_lists' field of 'struct swap_info_struct' to an array. In popular linux distros it increased size of swap_info_struct up to 40 Kbytes and now swap_info_struct allocation requires order-4 page. Switch to kvzmalloc allows to avoid unexpected allocation failures. Link: http://lkml.kernel.org/r/fc23172d-3c75-21e2-d551-8b1808cbe593@virtuozzo.com Fixes: a2468cc9bfdf ("swap: choose swap device according to numa node") Signed-off-by: Vasily Averin Acked-by: Aaron Lu Acked-by: Michal Hocko Reviewed-by: Andrew Morton Cc: Huang Ying Cc: Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- mm/swapfile.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/mm/swapfile.c b/mm/swapfile.c index 644f746e167a..8688ae65ef58 100644 --- a/mm/swapfile.c +++ b/mm/swapfile.c @@ -2813,7 +2813,7 @@ static struct swap_info_struct *alloc_swap_info(void) unsigned int type; int i; - p = kzalloc(sizeof(*p), GFP_KERNEL); + p = kvzalloc(sizeof(*p), GFP_KERNEL); if (!p) return ERR_PTR(-ENOMEM); @@ -2824,7 +2824,7 @@ static struct swap_info_struct *alloc_swap_info(void) } if (type >= MAX_SWAPFILES) { spin_unlock(&swap_lock); - kfree(p); + kvfree(p); return ERR_PTR(-EPERM); } if (type >= nr_swapfiles) { @@ -2838,7 +2838,7 @@ static struct swap_info_struct *alloc_swap_info(void) smp_wmb(); nr_swapfiles++; } else { - kfree(p); + kvfree(p); p = swap_info[type]; /* * Do not memset this entry: a racing procfs swap_next() -- cgit v1.2.3-59-g8ed1b From 9d7899999c62c1a81129b76d2a6ecbc4655e1597 Mon Sep 17 00:00:00 2001 From: Michal Hocko Date: Fri, 16 Nov 2018 15:08:15 -0800 Subject: mm, memory_hotplug: check zone_movable in has_unmovable_pages Page state checks are racy. Under a heavy memory workload (e.g. stress -m 200 -t 2h) it is quite easy to hit a race window when the page is allocated but its state is not fully populated yet. A debugging patch to dump the struct page state shows has_unmovable_pages: pfn:0x10dfec00, found:0x1, count:0x0 page:ffffea0437fb0000 count:1 mapcount:1 mapping:ffff880e05239841 index:0x7f26e5000 compound_mapcount: 1 flags: 0x5fffffc0090034(uptodate|lru|active|head|swapbacked) Note that the state has been checked for both PageLRU and PageSwapBacked already. Closing this race completely would require some sort of retry logic. This can be tricky and error prone (think of potential endless or long taking loops). Workaround this problem for movable zones at least. Such a zone should only contain movable pages. Commit 15c30bc09085 ("mm, memory_hotplug: make has_unmovable_pages more robust") has told us that this is not strictly true though. Bootmem pages should be marked reserved though so we can move the original check after the PageReserved check. Pages from other zones are still prone to races but we even do not pretend that memory hotremove works for those so pre-mature failure doesn't hurt that much. Link: http://lkml.kernel.org/r/20181106095524.14629-1-mhocko@kernel.org Fixes: 15c30bc09085 ("mm, memory_hotplug: make has_unmovable_pages more robust") Signed-off-by: Michal Hocko Reported-by: Baoquan He Tested-by: Baoquan He Acked-by: Baoquan He Reviewed-by: Oscar Salvador Acked-by: Balbir Singh Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- mm/page_alloc.c | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/mm/page_alloc.c b/mm/page_alloc.c index a919ba5cb3c8..a0e5ec0addd2 100644 --- a/mm/page_alloc.c +++ b/mm/page_alloc.c @@ -7788,6 +7788,14 @@ bool has_unmovable_pages(struct zone *zone, struct page *page, int count, if (PageReserved(page)) goto unmovable; + /* + * If the zone is movable and we have ruled out all reserved + * pages then it should be reasonably safe to assume the rest + * is movable. + */ + if (zone_idx(zone) == ZONE_MOVABLE) + continue; + /* * Hugepages are not in LRU lists, but they're movable. * We need not scan over tail pages bacause we don't -- cgit v1.2.3-59-g8ed1b From a76cf1a474d7dbcd9336b5f5afb0162baa142cf0 Mon Sep 17 00:00:00 2001 From: Roman Gushchin Date: Fri, 16 Nov 2018 15:08:18 -0800 Subject: mm: don't reclaim inodes with many attached pages Spock reported that commit 172b06c32b94 ("mm: slowly shrink slabs with a relatively small number of objects") leads to a regression on his setup: periodically the majority of the pagecache is evicted without an obvious reason, while before the change the amount of free memory was balancing around the watermark. The reason behind is that the mentioned above change created some minimal background pressure on the inode cache. The problem is that if an inode is considered to be reclaimed, all belonging pagecache page are stripped, no matter how many of them are there. So, if a huge multi-gigabyte file is cached in the memory, and the goal is to reclaim only few slab objects (unused inodes), we still can eventually evict all gigabytes of the pagecache at once. The workload described by Spock has few large non-mapped files in the pagecache, so it's especially noticeable. To solve the problem let's postpone the reclaim of inodes, which have more than 1 attached page. Let's wait until the pagecache pages will be evicted naturally by scanning the corresponding LRU lists, and only then reclaim the inode structure. Link: http://lkml.kernel.org/r/20181023164302.20436-1-guro@fb.com Signed-off-by: Roman Gushchin Reported-by: Spock Tested-by: Spock Reviewed-by: Andrew Morton Cc: Michal Hocko Cc: Rik van Riel Cc: Randy Dunlap Cc: [4.19.x] Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- fs/inode.c | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/fs/inode.c b/fs/inode.c index 9e198f00b64c..35d2108d567c 100644 --- a/fs/inode.c +++ b/fs/inode.c @@ -730,8 +730,11 @@ static enum lru_status inode_lru_isolate(struct list_head *item, return LRU_REMOVED; } - /* recently referenced inodes get one more pass */ - if (inode->i_state & I_REFERENCED) { + /* + * Recently referenced inodes and inodes with many attached pages + * get one more pass. + */ + if (inode->i_state & I_REFERENCED || inode->i_data.nrpages > 1) { inode->i_state &= ~I_REFERENCED; spin_unlock(&inode->i_lock); return LRU_ROTATE; -- cgit v1.2.3-59-g8ed1b From f5f67cc0e0d3d17ee046ba83f831f267767b8554 Mon Sep 17 00:00:00 2001 From: Randy Dunlap Date: Fri, 16 Nov 2018 15:08:22 -0800 Subject: scripts/faddr2line: fix location of start_kernel in comment Fix a source file reference location to the correct path name. Link: http://lkml.kernel.org/r/1d50bd3d-178e-dcd8-779f-9711887440eb@infradead.org Signed-off-by: Randy Dunlap Acked-by: Josh Poimboeuf Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- scripts/faddr2line | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/scripts/faddr2line b/scripts/faddr2line index a0149db00be7..6c6439f69a72 100755 --- a/scripts/faddr2line +++ b/scripts/faddr2line @@ -71,7 +71,7 @@ die() { # Try to figure out the source directory prefix so we can remove it from the # addr2line output. HACK ALERT: This assumes that start_kernel() is in -# kernel/init.c! This only works for vmlinux. Otherwise it falls back to +# init/main.c! This only works for vmlinux. Otherwise it falls back to # printing the absolute path. find_dir_prefix() { local objfile=$1 -- cgit v1.2.3-59-g8ed1b From 5040f8df56fb90c7919f1c9b0b6e54c843437456 Mon Sep 17 00:00:00 2001 From: Wengang Wang Date: Fri, 16 Nov 2018 15:08:25 -0800 Subject: ocfs2: free up write context when direct IO failed The write context should also be freed even when direct IO failed. Otherwise a memory leak is introduced and entries remain in oi->ip_unwritten_list causing the following BUG later in unlink path: ERROR: bug expression: !list_empty(&oi->ip_unwritten_list) ERROR: Clear inode of 215043, inode has unwritten extents ... Call Trace: ? __set_current_blocked+0x42/0x68 ocfs2_evict_inode+0x91/0x6a0 [ocfs2] ? bit_waitqueue+0x40/0x33 evict+0xdb/0x1af iput+0x1a2/0x1f7 do_unlinkat+0x194/0x28f SyS_unlinkat+0x1b/0x2f do_syscall_64+0x79/0x1ae entry_SYSCALL_64_after_hwframe+0x151/0x0 This patch also logs, with frequency limit, direct IO failures. Link: http://lkml.kernel.org/r/20181102170632.25921-1-wen.gang.wang@oracle.com Signed-off-by: Wengang Wang Reviewed-by: Junxiao Bi Reviewed-by: Changwei Ge Reviewed-by: Joseph Qi Cc: Mark Fasheh Cc: Joel Becker Cc: Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- fs/ocfs2/aops.c | 12 ++++++++++-- fs/ocfs2/cluster/masklog.h | 9 +++++++++ 2 files changed, 19 insertions(+), 2 deletions(-) diff --git a/fs/ocfs2/aops.c b/fs/ocfs2/aops.c index da578ad4c08f..eb1ce30412dc 100644 --- a/fs/ocfs2/aops.c +++ b/fs/ocfs2/aops.c @@ -2411,8 +2411,16 @@ static int ocfs2_dio_end_io(struct kiocb *iocb, /* this io's submitter should not have unlocked this before we could */ BUG_ON(!ocfs2_iocb_is_rw_locked(iocb)); - if (bytes > 0 && private) - ret = ocfs2_dio_end_io_write(inode, private, offset, bytes); + if (bytes <= 0) + mlog_ratelimited(ML_ERROR, "Direct IO failed, bytes = %lld", + (long long)bytes); + if (private) { + if (bytes > 0) + ret = ocfs2_dio_end_io_write(inode, private, offset, + bytes); + else + ocfs2_dio_free_write_ctx(inode, private); + } ocfs2_iocb_clear_rw_locked(iocb); diff --git a/fs/ocfs2/cluster/masklog.h b/fs/ocfs2/cluster/masklog.h index 308ea0eb35fd..a396096a5099 100644 --- a/fs/ocfs2/cluster/masklog.h +++ b/fs/ocfs2/cluster/masklog.h @@ -178,6 +178,15 @@ do { \ ##__VA_ARGS__); \ } while (0) +#define mlog_ratelimited(mask, fmt, ...) \ +do { \ + static DEFINE_RATELIMIT_STATE(_rs, \ + DEFAULT_RATELIMIT_INTERVAL, \ + DEFAULT_RATELIMIT_BURST); \ + if (__ratelimit(&_rs)) \ + mlog(mask, fmt, ##__VA_ARGS__); \ +} while (0) + #define mlog_errno(st) ({ \ int _st = (st); \ if (_st != -ERESTARTSYS && _st != -EINTR && \ -- cgit v1.2.3-59-g8ed1b From 78179556e7605ba07fd3ddba6ab8891fa457b93e Mon Sep 17 00:00:00 2001 From: Mike Rapoport Date: Fri, 16 Nov 2018 15:08:29 -0800 Subject: mm/gup.c: fix follow_page_mask() kerneldoc comment Commit df06b37ffe5a ("mm/gup: cache dev_pagemap while pinning pages") modified the signature of follow_page_mask() but left the parameter description behind. Update the description to make the code and comments agree again. While at it, update formatting of the return value description to match Documentation/doc-guide/kernel-doc.rst guidelines. Link: http://lkml.kernel.org/r/1541603316-27832-1-git-send-email-rppt@linux.ibm.com Signed-off-by: Mike Rapoport Reviewed-by: Andrew Morton Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- mm/gup.c | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/mm/gup.c b/mm/gup.c index f76e77a2d34b..aa43620a3270 100644 --- a/mm/gup.c +++ b/mm/gup.c @@ -385,11 +385,17 @@ static struct page *follow_p4d_mask(struct vm_area_struct *vma, * @vma: vm_area_struct mapping @address * @address: virtual address to look up * @flags: flags modifying lookup behaviour - * @page_mask: on output, *page_mask is set according to the size of the page + * @ctx: contains dev_pagemap for %ZONE_DEVICE memory pinning and a + * pointer to output page_mask * * @flags can have FOLL_ flags set, defined in * - * Returns the mapped (struct page *), %NULL if no mapping exists, or + * When getting pages from ZONE_DEVICE memory, the @ctx->pgmap caches + * the device's dev_pagemap metadata to avoid repeating expensive lookups. + * + * On output, the @ctx->page_mask is set according to the size of the page. + * + * Return: the mapped (struct page *), %NULL if no mapping exists, or * an error pointer if there is a mapping to something not represented * by a page descriptor (see also vm_normal_page()). */ -- cgit v1.2.3-59-g8ed1b From 13c9aaf7fa01cc7600c61981609feadeef3354ec Mon Sep 17 00:00:00 2001 From: Janne Huttunen Date: Fri, 16 Nov 2018 15:08:32 -0800 Subject: mm/vmstat.c: fix NUMA statistics updates Scan through the whole array to see if an update is needed. While we're at it, use sizeof() to be safe against any possible type changes in the future. The bug here is that we wouldn't sync per-cpu counters into global ones if there was an update of numa_stats for higher cpus. Highly theoretical one though because it is much more probable that zone_stats are updated so we would refresh anyway. So I wouldn't bother to mark this for stable, yet something nice to fix. [mhocko@suse.com: changelog enhancement] Link: http://lkml.kernel.org/r/1541601517-17282-1-git-send-email-janne.huttunen@nokia.com Fixes: 1d90ca897cb0 ("mm: update NUMA counter threshold size") Signed-off-by: Janne Huttunen Acked-by: Michal Hocko Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- mm/vmstat.c | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/mm/vmstat.c b/mm/vmstat.c index 6038ce593ce3..9c624595e904 100644 --- a/mm/vmstat.c +++ b/mm/vmstat.c @@ -1827,12 +1827,13 @@ static bool need_update(int cpu) /* * The fast way of checking if there are any vmstat diffs. - * This works because the diffs are byte sized items. */ - if (memchr_inv(p->vm_stat_diff, 0, NR_VM_ZONE_STAT_ITEMS)) + if (memchr_inv(p->vm_stat_diff, 0, NR_VM_ZONE_STAT_ITEMS * + sizeof(p->vm_stat_diff[0]))) return true; #ifdef CONFIG_NUMA - if (memchr_inv(p->vm_numa_stat_diff, 0, NR_VM_NUMA_STAT_ITEMS)) + if (memchr_inv(p->vm_numa_stat_diff, 0, NR_VM_NUMA_STAT_ITEMS * + sizeof(p->vm_numa_stat_diff[0]))) return true; #endif } -- cgit v1.2.3-59-g8ed1b From 1c23b4108d716cc848b38532063a8aca4f86add8 Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Fri, 16 Nov 2018 15:08:35 -0800 Subject: lib/ubsan.c: don't mark __ubsan_handle_builtin_unreachable as noreturn gcc-8 complains about the prototype for this function: lib/ubsan.c:432:1: error: ignoring attribute 'noreturn' in declaration of a built-in function '__ubsan_handle_builtin_unreachable' because it conflicts with attribute 'const' [-Werror=attributes] This is actually a GCC's bug. In GCC internals __ubsan_handle_builtin_unreachable() declared with both 'noreturn' and 'const' attributes instead of only 'noreturn': https://gcc.gnu.org/bugzilla/show_bug.cgi?id=84210 Workaround this by removing the noreturn attribute. [aryabinin: add information about GCC bug in changelog] Link: http://lkml.kernel.org/r/20181107144516.4587-1-aryabinin@virtuozzo.com Signed-off-by: Arnd Bergmann Signed-off-by: Andrey Ryabinin Acked-by: Olof Johansson Cc: Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- lib/ubsan.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/lib/ubsan.c b/lib/ubsan.c index 59fee96c29a0..e4162f59a81c 100644 --- a/lib/ubsan.c +++ b/lib/ubsan.c @@ -427,8 +427,7 @@ void __ubsan_handle_shift_out_of_bounds(struct shift_out_of_bounds_data *data, EXPORT_SYMBOL(__ubsan_handle_shift_out_of_bounds); -void __noreturn -__ubsan_handle_builtin_unreachable(struct unreachable_data *data) +void __ubsan_handle_builtin_unreachable(struct unreachable_data *data) { unsigned long flags; -- cgit v1.2.3-59-g8ed1b From 1a413646931cb14442065cfc17561e50f5b5bb44 Mon Sep 17 00:00:00 2001 From: Yufen Yu Date: Fri, 16 Nov 2018 15:08:39 -0800 Subject: tmpfs: make lseek(SEEK_DATA/SEK_HOLE) return ENXIO with a negative offset Other filesystems such as ext4, f2fs and ubifs all return ENXIO when lseek (SEEK_DATA or SEEK_HOLE) requests a negative offset. man 2 lseek says : EINVAL whence is not valid. Or: the resulting file offset would be : negative, or beyond the end of a seekable device. : : ENXIO whence is SEEK_DATA or SEEK_HOLE, and the file offset is beyond : the end of the file. Make tmpfs return ENXIO under these circumstances as well. After this, tmpfs also passes xfstests's generic/448. [akpm@linux-foundation.org: rewrite changelog] Link: http://lkml.kernel.org/r/1540434176-14349-1-git-send-email-yuyufen@huawei.com Signed-off-by: Yufen Yu Reviewed-by: Andrew Morton Cc: Al Viro Cc: Hugh Dickins Cc: William Kucharski Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- mm/shmem.c | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/mm/shmem.c b/mm/shmem.c index ea26d7a0342d..d44991ea5ed4 100644 --- a/mm/shmem.c +++ b/mm/shmem.c @@ -2563,9 +2563,7 @@ static loff_t shmem_file_llseek(struct file *file, loff_t offset, int whence) inode_lock(inode); /* We're holding i_mutex so we can access i_size directly */ - if (offset < 0) - offset = -EINVAL; - else if (offset >= inode->i_size) + if (offset < 0 || offset >= inode->i_size) offset = -ENXIO; else { start = offset >> PAGE_SHIFT; -- cgit v1.2.3-59-g8ed1b From 6f4d29df66acd49303a99025046b85cabe7aa17a Mon Sep 17 00:00:00 2001 From: Uwe Kleine-König Date: Fri, 16 Nov 2018 15:08:43 -0800 Subject: scripts/spdxcheck.py: make python3 compliant MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Without this change the following happens when using Python3 (3.6.6): $ echo "GPL-2.0" | python3 scripts/spdxcheck.py - FAIL: 'str' object has no attribute 'decode' Traceback (most recent call last): File "scripts/spdxcheck.py", line 253, in parser.parse_lines(sys.stdin, args.maxlines, '-') File "scripts/spdxcheck.py", line 171, in parse_lines line = line.decode(locale.getpreferredencoding(False), errors='ignore') AttributeError: 'str' object has no attribute 'decode' So as the line is already a string, there is no need to decode it and the line can be dropped. /usr/bin/python on Arch is Python 3. So this would indeed be worth going into 4.19. Link: http://lkml.kernel.org/r/20181023070802.22558-1-u.kleine-koenig@pengutronix.de Signed-off-by: Uwe Kleine-König Cc: Thomas Gleixner Cc: Joe Perches Cc: Greg Kroah-Hartman Cc: Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- scripts/spdxcheck.py | 1 - 1 file changed, 1 deletion(-) diff --git a/scripts/spdxcheck.py b/scripts/spdxcheck.py index 839e190bbd7a..5056fb3b897d 100755 --- a/scripts/spdxcheck.py +++ b/scripts/spdxcheck.py @@ -168,7 +168,6 @@ class id_parser(object): self.curline = 0 try: for line in fd: - line = line.decode(locale.getpreferredencoding(False), errors='ignore') self.curline += 1 if self.curline > maxlines: break -- cgit v1.2.3-59-g8ed1b From c63ae43ba53bc432b414fd73dd5f4b01fcb1ab43 Mon Sep 17 00:00:00 2001 From: Michal Hocko Date: Fri, 16 Nov 2018 15:08:53 -0800 Subject: mm, page_alloc: check for max order in hot path Konstantin has noticed that kvmalloc might trigger the following warning: WARNING: CPU: 0 PID: 6676 at mm/vmstat.c:986 __fragmentation_index+0x54/0x60 [...] Call Trace: fragmentation_index+0x76/0x90 compaction_suitable+0x4f/0xf0 shrink_node+0x295/0x310 node_reclaim+0x205/0x250 get_page_from_freelist+0x649/0xad0 __alloc_pages_nodemask+0x12a/0x2a0 kmalloc_large_node+0x47/0x90 __kmalloc_node+0x22b/0x2e0 kvmalloc_node+0x3e/0x70 xt_alloc_table_info+0x3a/0x80 [x_tables] do_ip6t_set_ctl+0xcd/0x1c0 [ip6_tables] nf_setsockopt+0x44/0x60 SyS_setsockopt+0x6f/0xc0 do_syscall_64+0x67/0x120 entry_SYSCALL_64_after_hwframe+0x3d/0xa2 the problem is that we only check for an out of bound order in the slow path and the node reclaim might happen from the fast path already. This is fixable by making sure that kvmalloc doesn't ever use kmalloc for requests that are larger than KMALLOC_MAX_SIZE but this also shows that the code is rather fragile. A recent UBSAN report just underlines that by the following report UBSAN: Undefined behaviour in mm/page_alloc.c:3117:19 shift exponent 51 is too large for 32-bit type 'int' CPU: 0 PID: 6520 Comm: syz-executor1 Not tainted 4.19.0-rc2 #1 Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS Bochs 01/01/2011 Call Trace: __dump_stack lib/dump_stack.c:77 [inline] dump_stack+0xd2/0x148 lib/dump_stack.c:113 ubsan_epilogue+0x12/0x94 lib/ubsan.c:159 __ubsan_handle_shift_out_of_bounds+0x2b6/0x30b lib/ubsan.c:425 __zone_watermark_ok+0x2c7/0x400 mm/page_alloc.c:3117 zone_watermark_fast mm/page_alloc.c:3216 [inline] get_page_from_freelist+0xc49/0x44c0 mm/page_alloc.c:3300 __alloc_pages_nodemask+0x21e/0x640 mm/page_alloc.c:4370 alloc_pages_current+0xcc/0x210 mm/mempolicy.c:2093 alloc_pages include/linux/gfp.h:509 [inline] __get_free_pages+0x12/0x60 mm/page_alloc.c:4414 dma_mem_alloc+0x36/0x50 arch/x86/include/asm/floppy.h:156 raw_cmd_copyin drivers/block/floppy.c:3159 [inline] raw_cmd_ioctl drivers/block/floppy.c:3206 [inline] fd_locked_ioctl+0xa00/0x2c10 drivers/block/floppy.c:3544 fd_ioctl+0x40/0x60 drivers/block/floppy.c:3571 __blkdev_driver_ioctl block/ioctl.c:303 [inline] blkdev_ioctl+0xb3c/0x1a30 block/ioctl.c:601 block_ioctl+0x105/0x150 fs/block_dev.c:1883 vfs_ioctl fs/ioctl.c:46 [inline] do_vfs_ioctl+0x1c0/0x1150 fs/ioctl.c:687 ksys_ioctl+0x9e/0xb0 fs/ioctl.c:702 __do_sys_ioctl fs/ioctl.c:709 [inline] __se_sys_ioctl fs/ioctl.c:707 [inline] __x64_sys_ioctl+0x7e/0xc0 fs/ioctl.c:707 do_syscall_64+0xc4/0x510 arch/x86/entry/common.c:290 entry_SYSCALL_64_after_hwframe+0x49/0xbe Note that this is not a kvmalloc path. It is just that the fast path really depends on having sanitzed order as well. Therefore move the order check to the fast path. Link: http://lkml.kernel.org/r/20181113094305.GM15120@dhcp22.suse.cz Signed-off-by: Michal Hocko Reported-by: Konstantin Khlebnikov Reported-by: Kyungtae Kim Acked-by: Vlastimil Babka Cc: Balbir Singh Cc: Mel Gorman Cc: Pavel Tatashin Cc: Oscar Salvador Cc: Mike Rapoport Cc: Aaron Lu Cc: Joonsoo Kim Cc: Byoungyoung Lee Cc: "Dae R. Jeong" Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- mm/page_alloc.c | 20 +++++++++----------- 1 file changed, 9 insertions(+), 11 deletions(-) diff --git a/mm/page_alloc.c b/mm/page_alloc.c index a0e5ec0addd2..6847177dc4a1 100644 --- a/mm/page_alloc.c +++ b/mm/page_alloc.c @@ -4060,17 +4060,6 @@ __alloc_pages_slowpath(gfp_t gfp_mask, unsigned int order, unsigned int cpuset_mems_cookie; int reserve_flags; - /* - * In the slowpath, we sanity check order to avoid ever trying to - * reclaim >= MAX_ORDER areas which will never succeed. Callers may - * be using allocators in order of preference for an area that is - * too large. - */ - if (order >= MAX_ORDER) { - WARN_ON_ONCE(!(gfp_mask & __GFP_NOWARN)); - return NULL; - } - /* * We also sanity check to catch abuse of atomic reserves being used by * callers that are not in atomic context. @@ -4364,6 +4353,15 @@ __alloc_pages_nodemask(gfp_t gfp_mask, unsigned int order, int preferred_nid, gfp_t alloc_mask; /* The gfp_t that was actually used for allocation */ struct alloc_context ac = { }; + /* + * There are several places where we assume that the order value is sane + * so bail out early if the request is out of bound. + */ + if (unlikely(order >= MAX_ORDER)) { + WARN_ON_ONCE(!(gfp_mask & __GFP_NOWARN)); + return NULL; + } + gfp_mask &= gfp_allowed_mask; alloc_mask = gfp_mask; if (!prepare_alloc_pages(gfp_mask, order, preferred_nid, nodemask, &ac, &alloc_mask, &alloc_flags)) -- cgit v1.2.3-59-g8ed1b From 45e79815b89149dc6698e71b587c86ffaf4062aa Mon Sep 17 00:00:00 2001 From: Chen Chang Date: Fri, 16 Nov 2018 15:08:57 -0800 Subject: mm/memblock.c: fix a typo in __next_mem_pfn_range() comments Link: http://lkml.kernel.org/r/20181107100247.13359-1-rainccrun@gmail.com Signed-off-by: Chen Chang Acked-by: Michal Hocko Acked-by: Mike Rapoport Reviewed-by: Andrew Morton Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- mm/memblock.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/mm/memblock.c b/mm/memblock.c index 7df468c8ebc8..9a2d5ae81ae1 100644 --- a/mm/memblock.c +++ b/mm/memblock.c @@ -1179,7 +1179,7 @@ void __init_memblock __next_mem_range_rev(u64 *idx, int nid, #ifdef CONFIG_HAVE_MEMBLOCK_NODE_MAP /* - * Common iterator interface used to define for_each_mem_range(). + * Common iterator interface used to define for_each_mem_pfn_range(). */ void __init_memblock __next_mem_pfn_range(int *idx, int nid, unsigned long *out_start_pfn, -- cgit v1.2.3-59-g8ed1b From 9ff01193a20d391e8dbce4403dd5ef87c7eaaca6 Mon Sep 17 00:00:00 2001 From: Linus Torvalds Date: Sun, 18 Nov 2018 13:33:44 -0800 Subject: Linux 4.20-rc3 --- Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Makefile b/Makefile index 2f36db897895..ddbf627cad8f 100644 --- a/Makefile +++ b/Makefile @@ -2,7 +2,7 @@ VERSION = 4 PATCHLEVEL = 20 SUBLEVEL = 0 -EXTRAVERSION = -rc2 +EXTRAVERSION = -rc3 NAME = "People's Front" # *DOCUMENTATION* -- cgit v1.2.3-59-g8ed1b From 7ddacfa564870cdd97275fd87decb6174abc6380 Mon Sep 17 00:00:00 2001 From: David Ahern Date: Sun, 18 Nov 2018 10:45:30 -0800 Subject: ipv6: Fix PMTU updates for UDP/raw sockets in presence of VRF Preethi reported that PMTU discovery for UDP/raw applications is not working in the presence of VRF when the socket is not bound to a device. The problem is that ip6_sk_update_pmtu does not consider the L3 domain of the skb device if the socket is not bound. Update the function to set oif to the L3 master device if relevant. Fixes: ca254490c8df ("net: Add VRF support to IPv6 stack") Reported-by: Preethi Ramachandra Signed-off-by: David Ahern Signed-off-by: David S. Miller --- net/ipv6/route.c | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/net/ipv6/route.c b/net/ipv6/route.c index 14b422f35504..059f0531f7c1 100644 --- a/net/ipv6/route.c +++ b/net/ipv6/route.c @@ -2359,10 +2359,13 @@ EXPORT_SYMBOL_GPL(ip6_update_pmtu); void ip6_sk_update_pmtu(struct sk_buff *skb, struct sock *sk, __be32 mtu) { + int oif = sk->sk_bound_dev_if; struct dst_entry *dst; - ip6_update_pmtu(skb, sock_net(sk), mtu, - sk->sk_bound_dev_if, sk->sk_mark, sk->sk_uid); + if (!oif && skb->dev) + oif = l3mdev_master_ifindex(skb->dev); + + ip6_update_pmtu(skb, sock_net(sk), mtu, oif, sk->sk_mark, sk->sk_uid); dst = __sk_dst_get(sk); if (!dst || !dst->obsolete || -- cgit v1.2.3-59-g8ed1b From 8ebebcba559a1bfbaec7bbda64feb9870b9c58da Mon Sep 17 00:00:00 2001 From: Matthew Cover Date: Sun, 18 Nov 2018 00:46:00 -0700 Subject: tuntap: fix multiqueue rx When writing packets to a descriptor associated with a combined queue, the packets should end up on that queue. Before this change all packets written to any descriptor associated with a tap interface end up on rx-0, even when the descriptor is associated with a different queue. The rx traffic can be generated by either of the following. 1. a simple tap program which spins up multiple queues and writes packets to each of the file descriptors 2. tx from a qemu vm with a tap multiqueue netdev The queue for rx traffic can be observed by either of the following (done on the hypervisor in the qemu case). 1. a simple netmap program which opens and reads from per-queue descriptors 2. configuring RPS and doing per-cpu captures with rxtxcpu Alternatively, if you printk() the return value of skb_get_rx_queue() just before each instance of netif_receive_skb() in tun.c, you will get 65535 for every skb. Calling skb_record_rx_queue() to set the rx queue to the queue_index fixes the association between descriptor and rx queue. Signed-off-by: Matthew Cover Signed-off-by: David S. Miller --- drivers/net/tun.c | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/drivers/net/tun.c b/drivers/net/tun.c index 060135ceaf0e..e244f5d7512a 100644 --- a/drivers/net/tun.c +++ b/drivers/net/tun.c @@ -1536,6 +1536,7 @@ static void tun_rx_batched(struct tun_struct *tun, struct tun_file *tfile, if (!rx_batched || (!more && skb_queue_empty(queue))) { local_bh_disable(); + skb_record_rx_queue(skb, tfile->queue_index); netif_receive_skb(skb); local_bh_enable(); return; @@ -1555,8 +1556,11 @@ static void tun_rx_batched(struct tun_struct *tun, struct tun_file *tfile, struct sk_buff *nskb; local_bh_disable(); - while ((nskb = __skb_dequeue(&process_queue))) + while ((nskb = __skb_dequeue(&process_queue))) { + skb_record_rx_queue(nskb, tfile->queue_index); netif_receive_skb(nskb); + } + skb_record_rx_queue(skb, tfile->queue_index); netif_receive_skb(skb); local_bh_enable(); } @@ -2451,6 +2455,7 @@ build: if (!rcu_dereference(tun->steering_prog)) rxhash = __skb_get_hash_symmetric(skb); + skb_record_rx_queue(skb, tfile->queue_index); netif_receive_skb(skb); stats = get_cpu_ptr(tun->pcpu_stats); -- cgit v1.2.3-59-g8ed1b