From 344bf332ceb2364a2fcd3ab4133dce5ea35c2594 Mon Sep 17 00:00:00 2001 From: Muchun Song Date: Sat, 30 Mar 2019 21:13:46 +0800 Subject: arm64: mm: fix incorrect assignment of 'max_mapnr' Although we don't actually make use of the 'max_mapnr' global variable, we do set it to a junk value for !CONFIG_FLATMEM configurations that leave mem_map uninitialised. To avoid somebody tripping over this in future, set 'max_mapnr' using max_pfn, which is calculated directly from the memblock information. Reviewed-by: Catalin Marinas Signed-off-by: Muchun Song Signed-off-by: Will Deacon --- arch/arm64/mm/init.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/arm64/mm/init.c b/arch/arm64/mm/init.c index 6bc135042f5e..c29b17b520cd 100644 --- a/arch/arm64/mm/init.c +++ b/arch/arm64/mm/init.c @@ -535,7 +535,7 @@ void __init mem_init(void) else swiotlb_force = SWIOTLB_NO_FORCE; - set_max_mapnr(pfn_to_page(max_pfn) - mem_map); + set_max_mapnr(max_pfn - PHYS_PFN_OFFSET); #ifndef CONFIG_SPARSEMEM_VMEMMAP free_unused_memmap(); -- cgit v1.2.3-59-g8ed1b From b1ce45e86b81e8ce354c47a7440d263e4cd5dc56 Mon Sep 17 00:00:00 2001 From: Qian Cai Date: Thu, 14 Mar 2019 12:16:19 -0400 Subject: arm64/mm: fix kernel-doc comments Building a kernel with W=1 generates several warnings due to abuse of kernel-doc comments: | arch/arm64/mm/numa.c:281: warning: Cannot understand * | on line 281 - I thought it was a doc line Tidy up the comments to remove the warnings. Fixes: 1a2db300348b ("arm64, numa: Add NUMA support for arm64 platforms.") Signed-off-by: Qian Cai Signed-off-by: Will Deacon --- arch/arm64/mm/numa.c | 25 ++++++++++++------------- 1 file changed, 12 insertions(+), 13 deletions(-) diff --git a/arch/arm64/mm/numa.c b/arch/arm64/mm/numa.c index 06a6f264f2dd..5202f63c29c9 100644 --- a/arch/arm64/mm/numa.c +++ b/arch/arm64/mm/numa.c @@ -124,7 +124,7 @@ static void __init setup_node_to_cpumask_map(void) } /* - * Set the cpu to node and mem mapping + * Set the cpu to node and mem mapping */ void numa_store_cpu_info(unsigned int cpu) { @@ -200,7 +200,7 @@ void __init setup_per_cpu_areas(void) #endif /** - * numa_add_memblk - Set node id to memblk + * numa_add_memblk() - Set node id to memblk * @nid: NUMA node ID of the new memblk * @start: Start address of the new memblk * @end: End address of the new memblk @@ -223,7 +223,7 @@ int __init numa_add_memblk(int nid, u64 start, u64 end) return ret; } -/** +/* * Initialize NODE_DATA for a node on the local memory */ static void __init setup_node_data(int nid, u64 start_pfn, u64 end_pfn) @@ -257,7 +257,7 @@ static void __init setup_node_data(int nid, u64 start_pfn, u64 end_pfn) NODE_DATA(nid)->node_spanned_pages = end_pfn - start_pfn; } -/** +/* * numa_free_distance * * The current table is freed. @@ -277,10 +277,8 @@ void __init numa_free_distance(void) numa_distance = NULL; } -/** - * +/* * Create a new NUMA distance table. - * */ static int __init numa_alloc_distance(void) { @@ -311,7 +309,7 @@ static int __init numa_alloc_distance(void) } /** - * numa_set_distance - Set inter node NUMA distance from node to node. + * numa_set_distance() - Set inter node NUMA distance from node to node. * @from: the 'from' node to set distance * @to: the 'to' node to set distance * @distance: NUMA distance @@ -321,7 +319,6 @@ static int __init numa_alloc_distance(void) * * If @from or @to is higher than the highest known node or lower than zero * or @distance doesn't make sense, the call is ignored. - * */ void __init numa_set_distance(int from, int to, int distance) { @@ -347,7 +344,7 @@ void __init numa_set_distance(int from, int to, int distance) numa_distance[from * numa_distance_cnt + to] = distance; } -/** +/* * Return NUMA distance @from to @to */ int __node_distance(int from, int to) @@ -422,13 +419,15 @@ out_free_distance: } /** - * dummy_numa_init - Fallback dummy NUMA init + * dummy_numa_init() - Fallback dummy NUMA init * * Used if there's no underlying NUMA architecture, NUMA initialization * fails, or NUMA is disabled on the command line. * * Must online at least one node (node 0) and add memory blocks that cover all * allowed memory. It is unlikely that this function fails. + * + * Return: 0 on success, -errno on failure. */ static int __init dummy_numa_init(void) { @@ -454,9 +453,9 @@ static int __init dummy_numa_init(void) } /** - * arm64_numa_init - Initialize NUMA + * arm64_numa_init() - Initialize NUMA * - * Try each configured NUMA initialization method until one succeeds. The + * Try each configured NUMA initialization method until one succeeds. The * last fallback is dummy single node config encomapssing whole memory. */ void __init arm64_numa_init(void) -- cgit v1.2.3-59-g8ed1b From 19d6242ece1f35aa004a7da9adf52e0ec18a854e Mon Sep 17 00:00:00 2001 From: Miles Chen Date: Thu, 21 Mar 2019 12:21:25 +0800 Subject: arm64: setup min_low_pfn When debugging with CONFIG_PAGE_OWNER, I noticed that the min_low_pfn on arm64 is always zero and the page owner scanning has to start from zero. We have to loop a while before we see the first valid pfn. (see: read_page_owner()) Setup min_low_pfn to save some loops. Before setting min_low_pfn: [ 21.265602] min_low_pfn=0, *ppos=0 Page allocated via order 0, mask 0x100cca(GFP_HIGHUSER_MOVABLE) PFN 262144 type Movable Block 512 type Movable Flags 0x8001e referenced|uptodate|dirty|lru|swapbacked) prep_new_page+0x13c/0x140 get_page_from_freelist+0x254/0x1068 __alloc_pages_nodemask+0xd4/0xcb8 After setting min_low_pfn: [ 11.025787] min_low_pfn=262144, *ppos=0 Page allocated via order 0, mask 0x100cca(GFP_HIGHUSER_MOVABLE) PFN 262144 type Movable Block 512 type Movable Flags 0x8001e referenced|uptodate|dirty|lru|swapbacked) prep_new_page+0x13c/0x140 get_page_from_freelist+0x254/0x1068 __alloc_pages_nodemask+0xd4/0xcb8 shmem_alloc_page+0x7c/0xa0 shmem_alloc_and_acct_page+0x124/0x1e8 shmem_getpage_gfp.isra.7+0x118/0x878 shmem_write_begin+0x38/0x68 Signed-off-by: Miles Chen Signed-off-by: Will Deacon --- arch/arm64/mm/init.c | 1 + 1 file changed, 1 insertion(+) diff --git a/arch/arm64/mm/init.c b/arch/arm64/mm/init.c index c29b17b520cd..43fa75601b3d 100644 --- a/arch/arm64/mm/init.c +++ b/arch/arm64/mm/init.c @@ -440,6 +440,7 @@ void __init bootmem_init(void) early_memtest(min << PAGE_SHIFT, max << PAGE_SHIFT); max_pfn = max_low_pfn = max; + min_low_pfn = min; arm64_numa_init(); /* -- cgit v1.2.3-59-g8ed1b From 0f1bf7e39822476b2f921435cf990f67a61f5f92 Mon Sep 17 00:00:00 2001 From: Matteo Croce Date: Mon, 1 Apr 2019 12:44:47 +0200 Subject: arm64/vdso: don't leak kernel addresses Since commit ad67b74d2469d9b8 ("printk: hash addresses printed with %p"), two obfuscated kernel pointer are printed at every boot: vdso: 2 pages (1 code @ (____ptrval____), 1 data @ (____ptrval____)) Remove the the print completely, as it's useless without the addresses. Fixes: ad67b74d2469d9b8 ("printk: hash addresses printed with %p") Acked-by: Mark Rutland Signed-off-by: Matteo Croce Signed-off-by: Will Deacon --- arch/arm64/kernel/vdso.c | 2 -- 1 file changed, 2 deletions(-) diff --git a/arch/arm64/kernel/vdso.c b/arch/arm64/kernel/vdso.c index 2d419006ad43..9fb0c0c95a85 100644 --- a/arch/arm64/kernel/vdso.c +++ b/arch/arm64/kernel/vdso.c @@ -146,8 +146,6 @@ static int __init vdso_init(void) } vdso_pages = (vdso_end - vdso_start) >> PAGE_SHIFT; - pr_info("vdso: %ld pages (%ld code @ %p, %ld data @ %p)\n", - vdso_pages + 1, vdso_pages, vdso_start, 1L, vdso_data); /* Allocate the vDSO pagelist, plus a page for the data. */ vdso_pagelist = kcalloc(vdso_pages + 1, sizeof(struct page *), -- cgit v1.2.3-59-g8ed1b From 7048a5973eb1d6a747b516334c052d92d8ed7ab4 Mon Sep 17 00:00:00 2001 From: Will Deacon Date: Wed, 3 Apr 2019 13:36:54 +0100 Subject: arm64: mm: Make show_pte() a static function show_pte() doesn't have any external callers, so make it static. Signed-off-by: Will Deacon --- arch/arm64/include/asm/system_misc.h | 1 - arch/arm64/mm/fault.c | 2 +- 2 files changed, 1 insertion(+), 2 deletions(-) diff --git a/arch/arm64/include/asm/system_misc.h b/arch/arm64/include/asm/system_misc.h index 32693f34f431..fca95424e873 100644 --- a/arch/arm64/include/asm/system_misc.h +++ b/arch/arm64/include/asm/system_misc.h @@ -41,7 +41,6 @@ void hook_debug_fault_code(int nr, int (*fn)(unsigned long, unsigned int, int sig, int code, const char *name); struct mm_struct; -extern void show_pte(unsigned long addr); extern void __show_regs(struct pt_regs *); extern void (*arm_pm_restart)(enum reboot_mode reboot_mode, const char *cmd); diff --git a/arch/arm64/mm/fault.c b/arch/arm64/mm/fault.c index 1a7e92ab69eb..4f343e603925 100644 --- a/arch/arm64/mm/fault.c +++ b/arch/arm64/mm/fault.c @@ -148,7 +148,7 @@ static inline bool is_ttbr1_addr(unsigned long addr) /* * Dump out the page tables associated with 'addr' in the currently active mm. */ -void show_pte(unsigned long addr) +static void show_pte(unsigned long addr) { struct mm_struct *mm; pgd_t *pgdp; -- cgit v1.2.3-59-g8ed1b From 92606ec9285fb84cd9b5943df23f07d741384bfc Mon Sep 17 00:00:00 2001 From: Wen Yang Date: Tue, 5 Mar 2019 19:34:05 +0800 Subject: arm64: cpu_ops: fix a leaked reference by adding missing of_node_put The call to of_get_next_child returns a node pointer with refcount incremented thus it must be explicitly decremented after the last usage. Detected by coccinelle with the following warnings: ./arch/arm64/kernel/cpu_ops.c:102:1-7: ERROR: missing of_node_put; acquired a node pointer with refcount incremented on line 69, but without a corresponding object release within this function. Signed-off-by: Wen Yang Reviewed-by: Florian Fainelli Cc: Catalin Marinas Cc: Will Deacon Cc: linux-arm-kernel@lists.infradead.org Cc: linux-kernel@vger.kernel.org Signed-off-by: Will Deacon --- arch/arm64/kernel/cpu_ops.c | 1 + 1 file changed, 1 insertion(+) diff --git a/arch/arm64/kernel/cpu_ops.c b/arch/arm64/kernel/cpu_ops.c index ea001241bdd4..00f8b8612b69 100644 --- a/arch/arm64/kernel/cpu_ops.c +++ b/arch/arm64/kernel/cpu_ops.c @@ -85,6 +85,7 @@ static const char *__init cpu_read_enable_method(int cpu) pr_err("%pOF: missing enable-method property\n", dn); } + of_node_put(dn); } else { enable_method = acpi_get_enable_method(cpu); if (!enable_method) { -- cgit v1.2.3-59-g8ed1b From 70b3d237bd7f52d3322c750a358581bf5a267699 Mon Sep 17 00:00:00 2001 From: Will Deacon Date: Wed, 3 Apr 2019 17:58:39 +0100 Subject: arm64: mm: Ensure we ignore the initrd if it is placed out of range If the initrd payload isn't completely accessible via the linear map, then we print a warning during boot and nobble the virtual address of the payload so that we ignore it later on. Unfortunately, since commit c756c592e442 ("arm64: Utilize phys_initrd_start/phys_initrd_size"), the virtual address isn't initialised until later anyway, so we need to nobble the size of the payload to ensure that we don't try to use it later on. Fixes: c756c592e442 ("arm64: Utilize phys_initrd_start/phys_initrd_size") Reported-by: Pierre Kuo Acked-by: Florian Fainelli Signed-off-by: Will Deacon --- arch/arm64/mm/init.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/arm64/mm/init.c b/arch/arm64/mm/init.c index 43fa75601b3d..03a8a6888ec0 100644 --- a/arch/arm64/mm/init.c +++ b/arch/arm64/mm/init.c @@ -377,7 +377,7 @@ void __init arm64_memblock_init(void) base + size > memblock_start_of_DRAM() + linear_region_size, "initrd not fully accessible via the linear mapping -- please check your bootloader ...\n")) { - initrd_start = 0; + phys_initrd_size = 0; } else { memblock_remove(base, size); /* clear MEMBLOCK_ flags */ memblock_add(base, size); -- cgit v1.2.3-59-g8ed1b From 697e96ed1720d6aad196e7a663d545df2edd44a4 Mon Sep 17 00:00:00 2001 From: Masahiro Yamada Date: Wed, 3 Apr 2019 17:48:22 +0900 Subject: arm64: vdso: fix and clean-up Makefile - $(call if_changed,...) must have FORCE as a prerequisite - vdso.lds is a generated file, so it should be prefixed with $(obj)/ instead of $(src)/. - cmd_vdsosym is a one-liner rule, so the assignment with '=' is simpler. Signed-off-by: Masahiro Yamada Signed-off-by: Will Deacon --- arch/arm64/kernel/vdso/Makefile | 8 +++----- 1 file changed, 3 insertions(+), 5 deletions(-) diff --git a/arch/arm64/kernel/vdso/Makefile b/arch/arm64/kernel/vdso/Makefile index b215c712d897..a0af6bf6c11b 100644 --- a/arch/arm64/kernel/vdso/Makefile +++ b/arch/arm64/kernel/vdso/Makefile @@ -31,7 +31,7 @@ CPPFLAGS_vdso.lds += -P -C -U$(ARCH) $(obj)/vdso.o : $(obj)/vdso.so # Link rule for the .so file, .lds has to be first -$(obj)/vdso.so.dbg: $(src)/vdso.lds $(obj-vdso) +$(obj)/vdso.so.dbg: $(obj)/vdso.lds $(obj-vdso) FORCE $(call if_changed,vdsold) # Strip rule for the .so file @@ -42,9 +42,7 @@ $(obj)/%.so: $(obj)/%.so.dbg FORCE # Generate VDSO offsets using helper script gen-vdsosym := $(srctree)/$(src)/gen_vdso_offsets.sh quiet_cmd_vdsosym = VDSOSYM $@ -define cmd_vdsosym - $(NM) $< | $(gen-vdsosym) | LC_ALL=C sort > $@ -endef + cmd_vdsosym = $(NM) $< | $(gen-vdsosym) | LC_ALL=C sort > $@ include/generated/vdso-offsets.h: $(obj)/vdso.so.dbg FORCE $(call if_changed,vdsosym) @@ -55,7 +53,7 @@ $(obj-vdso): %.o: %.S FORCE # Actual build commands quiet_cmd_vdsold = VDSOL $@ - cmd_vdsold = $(CC) $(c_flags) -Wl,-n -Wl,-T $^ -o $@ + cmd_vdsold = $(CC) $(c_flags) -Wl,-n -Wl,-T $(real-prereqs) -o $@ quiet_cmd_vdsoas = VDSOA $@ cmd_vdsoas = $(CC) $(a_flags) -c -o $@ $< -- cgit v1.2.3-59-g8ed1b From 24e516049360eda85cf3fe9903221d43886c2689 Mon Sep 17 00:00:00 2001 From: Neil Leeder Date: Tue, 26 Mar 2019 15:17:50 +0000 Subject: ACPI/IORT: Add support for PMCG Add support for the SMMU Performance Monitor Counter Group information from ACPI. This is in preparation for its use in the SMMUv3 PMU driver. Signed-off-by: Neil Leeder Signed-off-by: Hanjun Guo Signed-off-by: Shameer Kolothum Reviewed-by: Robin Murphy Acked-by: Lorenzo Pieralisi Signed-off-by: Will Deacon --- drivers/acpi/arm64/iort.c | 117 ++++++++++++++++++++++++++++++++++++---------- include/linux/acpi_iort.h | 7 +++ 2 files changed, 100 insertions(+), 24 deletions(-) diff --git a/drivers/acpi/arm64/iort.c b/drivers/acpi/arm64/iort.c index e48894e002ba..e2c9b26bbee6 100644 --- a/drivers/acpi/arm64/iort.c +++ b/drivers/acpi/arm64/iort.c @@ -356,7 +356,8 @@ static struct acpi_iort_node *iort_node_get_id(struct acpi_iort_node *node, if (map->flags & ACPI_IORT_ID_SINGLE_MAPPING) { if (node->type == ACPI_IORT_NODE_NAMED_COMPONENT || node->type == ACPI_IORT_NODE_PCI_ROOT_COMPLEX || - node->type == ACPI_IORT_NODE_SMMU_V3) { + node->type == ACPI_IORT_NODE_SMMU_V3 || + node->type == ACPI_IORT_NODE_PMCG) { *id_out = map->output_base; return parent; } @@ -394,6 +395,8 @@ static int iort_get_id_mapping_index(struct acpi_iort_node *node) } return smmu->id_mapping_index; + case ACPI_IORT_NODE_PMCG: + return 0; default: return -EINVAL; } @@ -1218,14 +1221,23 @@ static void __init arm_smmu_v3_init_resources(struct resource *res, } } -static bool __init arm_smmu_v3_is_coherent(struct acpi_iort_node *node) +static void __init arm_smmu_v3_dma_configure(struct device *dev, + struct acpi_iort_node *node) { struct acpi_iort_smmu_v3 *smmu; + enum dev_dma_attr attr; /* Retrieve SMMUv3 specific data */ smmu = (struct acpi_iort_smmu_v3 *)node->node_data; - return smmu->flags & ACPI_IORT_SMMU_V3_COHACC_OVERRIDE; + attr = (smmu->flags & ACPI_IORT_SMMU_V3_COHACC_OVERRIDE) ? + DEV_DMA_COHERENT : DEV_DMA_NON_COHERENT; + + /* We expect the dma masks to be equivalent for all SMMUv3 set-ups */ + dev->dma_mask = &dev->coherent_dma_mask; + + /* Configure DMA for the page table walker */ + acpi_dma_configure(dev, attr); } #if defined(CONFIG_ACPI_NUMA) @@ -1301,30 +1313,82 @@ static void __init arm_smmu_init_resources(struct resource *res, } } -static bool __init arm_smmu_is_coherent(struct acpi_iort_node *node) +static void __init arm_smmu_dma_configure(struct device *dev, + struct acpi_iort_node *node) { struct acpi_iort_smmu *smmu; + enum dev_dma_attr attr; /* Retrieve SMMU specific data */ smmu = (struct acpi_iort_smmu *)node->node_data; - return smmu->flags & ACPI_IORT_SMMU_COHERENT_WALK; + attr = (smmu->flags & ACPI_IORT_SMMU_COHERENT_WALK) ? + DEV_DMA_COHERENT : DEV_DMA_NON_COHERENT; + + /* We expect the dma masks to be equivalent for SMMU set-ups */ + dev->dma_mask = &dev->coherent_dma_mask; + + /* Configure DMA for the page table walker */ + acpi_dma_configure(dev, attr); +} + +static int __init arm_smmu_v3_pmcg_count_resources(struct acpi_iort_node *node) +{ + struct acpi_iort_pmcg *pmcg; + + /* Retrieve PMCG specific data */ + pmcg = (struct acpi_iort_pmcg *)node->node_data; + + /* + * There are always 2 memory resources. + * If the overflow_gsiv is present then add that for a total of 3. + */ + return pmcg->overflow_gsiv ? 3 : 2; +} + +static void __init arm_smmu_v3_pmcg_init_resources(struct resource *res, + struct acpi_iort_node *node) +{ + struct acpi_iort_pmcg *pmcg; + + /* Retrieve PMCG specific data */ + pmcg = (struct acpi_iort_pmcg *)node->node_data; + + res[0].start = pmcg->page0_base_address; + res[0].end = pmcg->page0_base_address + SZ_4K - 1; + res[0].flags = IORESOURCE_MEM; + res[1].start = pmcg->page1_base_address; + res[1].end = pmcg->page1_base_address + SZ_4K - 1; + res[1].flags = IORESOURCE_MEM; + + if (pmcg->overflow_gsiv) + acpi_iort_register_irq(pmcg->overflow_gsiv, "overflow", + ACPI_EDGE_SENSITIVE, &res[2]); +} + +static int __init arm_smmu_v3_pmcg_add_platdata(struct platform_device *pdev) +{ + u32 model = IORT_SMMU_V3_PMCG_GENERIC; + + return platform_device_add_data(pdev, &model, sizeof(model)); } struct iort_dev_config { const char *name; int (*dev_init)(struct acpi_iort_node *node); - bool (*dev_is_coherent)(struct acpi_iort_node *node); + void (*dev_dma_configure)(struct device *dev, + struct acpi_iort_node *node); int (*dev_count_resources)(struct acpi_iort_node *node); void (*dev_init_resources)(struct resource *res, struct acpi_iort_node *node); void (*dev_set_proximity)(struct device *dev, struct acpi_iort_node *node); + int (*dev_add_platdata)(struct platform_device *pdev); }; static const struct iort_dev_config iort_arm_smmu_v3_cfg __initconst = { .name = "arm-smmu-v3", - .dev_is_coherent = arm_smmu_v3_is_coherent, + .dev_dma_configure = arm_smmu_v3_dma_configure, .dev_count_resources = arm_smmu_v3_count_resources, .dev_init_resources = arm_smmu_v3_init_resources, .dev_set_proximity = arm_smmu_v3_set_proximity, @@ -1332,9 +1396,16 @@ static const struct iort_dev_config iort_arm_smmu_v3_cfg __initconst = { static const struct iort_dev_config iort_arm_smmu_cfg __initconst = { .name = "arm-smmu", - .dev_is_coherent = arm_smmu_is_coherent, + .dev_dma_configure = arm_smmu_dma_configure, .dev_count_resources = arm_smmu_count_resources, - .dev_init_resources = arm_smmu_init_resources + .dev_init_resources = arm_smmu_init_resources, +}; + +static const struct iort_dev_config iort_arm_smmu_v3_pmcg_cfg __initconst = { + .name = "arm-smmu-v3-pmcg", + .dev_count_resources = arm_smmu_v3_pmcg_count_resources, + .dev_init_resources = arm_smmu_v3_pmcg_init_resources, + .dev_add_platdata = arm_smmu_v3_pmcg_add_platdata, }; static __init const struct iort_dev_config *iort_get_dev_cfg( @@ -1345,6 +1416,8 @@ static __init const struct iort_dev_config *iort_get_dev_cfg( return &iort_arm_smmu_v3_cfg; case ACPI_IORT_NODE_SMMU: return &iort_arm_smmu_cfg; + case ACPI_IORT_NODE_PMCG: + return &iort_arm_smmu_v3_pmcg_cfg; default: return NULL; } @@ -1362,7 +1435,6 @@ static int __init iort_add_platform_device(struct acpi_iort_node *node, struct fwnode_handle *fwnode; struct platform_device *pdev; struct resource *r; - enum dev_dma_attr attr; int ret, count; pdev = platform_device_alloc(ops->name, PLATFORM_DEVID_AUTO); @@ -1393,19 +1465,19 @@ static int __init iort_add_platform_device(struct acpi_iort_node *node, goto dev_put; /* - * Add a copy of IORT node pointer to platform_data to - * be used to retrieve IORT data information. + * Platform devices based on PMCG nodes uses platform_data to + * pass the hardware model info to the driver. For others, add + * a copy of IORT node pointer to platform_data to be used to + * retrieve IORT data information. */ - ret = platform_device_add_data(pdev, &node, sizeof(node)); + if (ops->dev_add_platdata) + ret = ops->dev_add_platdata(pdev); + else + ret = platform_device_add_data(pdev, &node, sizeof(node)); + if (ret) goto dev_put; - /* - * We expect the dma masks to be equivalent for - * all SMMUs set-ups - */ - pdev->dev.dma_mask = &pdev->dev.coherent_dma_mask; - fwnode = iort_get_fwnode(node); if (!fwnode) { @@ -1415,11 +1487,8 @@ static int __init iort_add_platform_device(struct acpi_iort_node *node, pdev->dev.fwnode = fwnode; - attr = ops->dev_is_coherent && ops->dev_is_coherent(node) ? - DEV_DMA_COHERENT : DEV_DMA_NON_COHERENT; - - /* Configure DMA for the page table walker */ - acpi_dma_configure(&pdev->dev, attr); + if (ops->dev_dma_configure) + ops->dev_dma_configure(&pdev->dev, node); iort_set_device_domain(&pdev->dev, node); diff --git a/include/linux/acpi_iort.h b/include/linux/acpi_iort.h index 38cd77b39a64..052ef7b9f985 100644 --- a/include/linux/acpi_iort.h +++ b/include/linux/acpi_iort.h @@ -26,6 +26,13 @@ #define IORT_IRQ_MASK(irq) (irq & 0xffffffffULL) #define IORT_IRQ_TRIGGER_MASK(irq) ((irq >> 32) & 0xffffffffULL) +/* + * PMCG model identifiers for use in smmu pmu driver. Please note + * that this is purely for the use of software and has nothing to + * do with hardware or with IORT specification. + */ +#define IORT_SMMU_V3_PMCG_GENERIC 0x00000000 /* Generic SMMUv3 PMCG */ + int iort_register_domain_token(int trans_id, phys_addr_t base, struct fwnode_handle *fw_node); void iort_deregister_domain_token(int trans_id); -- cgit v1.2.3-59-g8ed1b From 7d839b4b9e00645e49345d6ce5dfa8edf53c1a21 Mon Sep 17 00:00:00 2001 From: Neil Leeder Date: Tue, 26 Mar 2019 15:17:51 +0000 Subject: perf/smmuv3: Add arm64 smmuv3 pmu driver Adds a new driver to support the SMMUv3 PMU and add it into the perf events framework. Each SMMU node may have multiple PMUs associated with it, each of which may support different events. SMMUv3 PMCG devices are named as smmuv3_pmcg_ where is the physical page address of the SMMU PMCG wrapped to 4K boundary. For example, the PMCG at 0xff88840000 is named smmuv3_pmcg_ff88840 Filtering by stream id is done by specifying filtering parameters with the event. options are: filter_enable - 0 = no filtering, 1 = filtering enabled filter_span - 0 = exact match, 1 = pattern match filter_stream_id - pattern to filter against Example: perf stat -e smmuv3_pmcg_ff88840/transaction,filter_enable=1, filter_span=1,filter_stream_id=0x42/ -a netperf Applies filter pattern 0x42 to transaction events, which means events matching stream ids 0x42 & 0x43 are counted as only upper StreamID bits are required to match the given filter. Further filtering information is available in the SMMU documentation. SMMU events are not attributable to a CPU, so task mode and sampling are not supported. Signed-off-by: Neil Leeder Signed-off-by: Shameer Kolothum Reviewed-by: Robin Murphy [will: fold in review feedback from Robin] [will: rewrite Kconfig text and allow building as a module] Signed-off-by: Will Deacon --- drivers/perf/Kconfig | 9 + drivers/perf/Makefile | 1 + drivers/perf/arm_smmuv3_pmu.c | 773 ++++++++++++++++++++++++++++++++++++++++++ 3 files changed, 783 insertions(+) create mode 100644 drivers/perf/arm_smmuv3_pmu.c diff --git a/drivers/perf/Kconfig b/drivers/perf/Kconfig index af9bc178495d..a94e586a58b2 100644 --- a/drivers/perf/Kconfig +++ b/drivers/perf/Kconfig @@ -52,6 +52,15 @@ config ARM_PMU_ACPI depends on ARM_PMU && ACPI def_bool y +config ARM_SMMU_V3_PMU + tristate "ARM SMMUv3 Performance Monitors Extension" + depends on ARM64 && ACPI && ARM_SMMU_V3 + help + Provides support for the ARM SMMUv3 Performance Monitor Counter + Groups (PMCG), which provide monitoring of transactions passing + through the SMMU and allow the resulting information to be filtered + based on the Stream ID of the corresponding master. + config ARM_DSU_PMU tristate "ARM DynamIQ Shared Unit (DSU) PMU" depends on ARM64 diff --git a/drivers/perf/Makefile b/drivers/perf/Makefile index 909f27fd9db3..30489941f3d6 100644 --- a/drivers/perf/Makefile +++ b/drivers/perf/Makefile @@ -4,6 +4,7 @@ obj-$(CONFIG_ARM_CCN) += arm-ccn.o obj-$(CONFIG_ARM_DSU_PMU) += arm_dsu_pmu.o obj-$(CONFIG_ARM_PMU) += arm_pmu.o arm_pmu_platform.o obj-$(CONFIG_ARM_PMU_ACPI) += arm_pmu_acpi.o +obj-$(CONFIG_ARM_SMMU_V3_PMU) += arm_smmuv3_pmu.o obj-$(CONFIG_HISI_PMU) += hisilicon/ obj-$(CONFIG_QCOM_L2_PMU) += qcom_l2_pmu.o obj-$(CONFIG_QCOM_L3_PMU) += qcom_l3_pmu.o diff --git a/drivers/perf/arm_smmuv3_pmu.c b/drivers/perf/arm_smmuv3_pmu.c new file mode 100644 index 000000000000..a6d2e3ce94df --- /dev/null +++ b/drivers/perf/arm_smmuv3_pmu.c @@ -0,0 +1,773 @@ +// SPDX-License-Identifier: GPL-2.0 + +/* + * This driver adds support for perf events to use the Performance + * Monitor Counter Groups (PMCG) associated with an SMMUv3 node + * to monitor that node. + * + * SMMUv3 PMCG devices are named as smmuv3_pmcg_ where + * is the physical page address of the SMMU PMCG wrapped + * to 4K boundary. For example, the PMCG at 0xff88840000 is named + * smmuv3_pmcg_ff88840 + * + * Filtering by stream id is done by specifying filtering parameters + * with the event. options are: + * filter_enable - 0 = no filtering, 1 = filtering enabled + * filter_span - 0 = exact match, 1 = pattern match + * filter_stream_id - pattern to filter against + * + * To match a partial StreamID where the X most-significant bits must match + * but the Y least-significant bits might differ, STREAMID is programmed + * with a value that contains: + * STREAMID[Y - 1] == 0. + * STREAMID[Y - 2:0] == 1 (where Y > 1). + * The remainder of implemented bits of STREAMID (X bits, from bit Y upwards) + * contain a value to match from the corresponding bits of event StreamID. + * + * Example: perf stat -e smmuv3_pmcg_ff88840/transaction,filter_enable=1, + * filter_span=1,filter_stream_id=0x42/ -a netperf + * Applies filter pattern 0x42 to transaction events, which means events + * matching stream ids 0x42 and 0x43 are counted. Further filtering + * information is available in the SMMU documentation. + * + * SMMU events are not attributable to a CPU, so task mode and sampling + * are not supported. + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#define SMMU_PMCG_EVCNTR0 0x0 +#define SMMU_PMCG_EVCNTR(n, stride) (SMMU_PMCG_EVCNTR0 + (n) * (stride)) +#define SMMU_PMCG_EVTYPER0 0x400 +#define SMMU_PMCG_EVTYPER(n) (SMMU_PMCG_EVTYPER0 + (n) * 4) +#define SMMU_PMCG_SID_SPAN_SHIFT 29 +#define SMMU_PMCG_SMR0 0xA00 +#define SMMU_PMCG_SMR(n) (SMMU_PMCG_SMR0 + (n) * 4) +#define SMMU_PMCG_CNTENSET0 0xC00 +#define SMMU_PMCG_CNTENCLR0 0xC20 +#define SMMU_PMCG_INTENSET0 0xC40 +#define SMMU_PMCG_INTENCLR0 0xC60 +#define SMMU_PMCG_OVSCLR0 0xC80 +#define SMMU_PMCG_OVSSET0 0xCC0 +#define SMMU_PMCG_CFGR 0xE00 +#define SMMU_PMCG_CFGR_SID_FILTER_TYPE BIT(23) +#define SMMU_PMCG_CFGR_RELOC_CTRS BIT(20) +#define SMMU_PMCG_CFGR_SIZE GENMASK(13, 8) +#define SMMU_PMCG_CFGR_NCTR GENMASK(5, 0) +#define SMMU_PMCG_CR 0xE04 +#define SMMU_PMCG_CR_ENABLE BIT(0) +#define SMMU_PMCG_CEID0 0xE20 +#define SMMU_PMCG_CEID1 0xE28 +#define SMMU_PMCG_IRQ_CTRL 0xE50 +#define SMMU_PMCG_IRQ_CTRL_IRQEN BIT(0) +#define SMMU_PMCG_IRQ_CFG0 0xE58 + +#define SMMU_PMCG_DEFAULT_FILTER_SPAN 1 +#define SMMU_PMCG_DEFAULT_FILTER_SID GENMASK(31, 0) + +#define SMMU_PMCG_MAX_COUNTERS 64 +#define SMMU_PMCG_ARCH_MAX_EVENTS 128 + +#define SMMU_PMCG_PA_SHIFT 12 + +static int cpuhp_state_num; + +struct smmu_pmu { + struct hlist_node node; + struct perf_event *events[SMMU_PMCG_MAX_COUNTERS]; + DECLARE_BITMAP(used_counters, SMMU_PMCG_MAX_COUNTERS); + DECLARE_BITMAP(supported_events, SMMU_PMCG_ARCH_MAX_EVENTS); + unsigned int irq; + unsigned int on_cpu; + struct pmu pmu; + unsigned int num_counters; + struct device *dev; + void __iomem *reg_base; + void __iomem *reloc_base; + u64 counter_mask; + bool global_filter; + u32 global_filter_span; + u32 global_filter_sid; +}; + +#define to_smmu_pmu(p) (container_of(p, struct smmu_pmu, pmu)) + +#define SMMU_PMU_EVENT_ATTR_EXTRACTOR(_name, _config, _start, _end) \ + static inline u32 get_##_name(struct perf_event *event) \ + { \ + return FIELD_GET(GENMASK_ULL(_end, _start), \ + event->attr._config); \ + } \ + +SMMU_PMU_EVENT_ATTR_EXTRACTOR(event, config, 0, 15); +SMMU_PMU_EVENT_ATTR_EXTRACTOR(filter_stream_id, config1, 0, 31); +SMMU_PMU_EVENT_ATTR_EXTRACTOR(filter_span, config1, 32, 32); +SMMU_PMU_EVENT_ATTR_EXTRACTOR(filter_enable, config1, 33, 33); + +static inline void smmu_pmu_enable(struct pmu *pmu) +{ + struct smmu_pmu *smmu_pmu = to_smmu_pmu(pmu); + + writel(SMMU_PMCG_IRQ_CTRL_IRQEN, + smmu_pmu->reg_base + SMMU_PMCG_IRQ_CTRL); + writel(SMMU_PMCG_CR_ENABLE, smmu_pmu->reg_base + SMMU_PMCG_CR); +} + +static inline void smmu_pmu_disable(struct pmu *pmu) +{ + struct smmu_pmu *smmu_pmu = to_smmu_pmu(pmu); + + writel(0, smmu_pmu->reg_base + SMMU_PMCG_CR); + writel(0, smmu_pmu->reg_base + SMMU_PMCG_IRQ_CTRL); +} + +static inline void smmu_pmu_counter_set_value(struct smmu_pmu *smmu_pmu, + u32 idx, u64 value) +{ + if (smmu_pmu->counter_mask & BIT(32)) + writeq(value, smmu_pmu->reloc_base + SMMU_PMCG_EVCNTR(idx, 8)); + else + writel(value, smmu_pmu->reloc_base + SMMU_PMCG_EVCNTR(idx, 4)); +} + +static inline u64 smmu_pmu_counter_get_value(struct smmu_pmu *smmu_pmu, u32 idx) +{ + u64 value; + + if (smmu_pmu->counter_mask & BIT(32)) + value = readq(smmu_pmu->reloc_base + SMMU_PMCG_EVCNTR(idx, 8)); + else + value = readl(smmu_pmu->reloc_base + SMMU_PMCG_EVCNTR(idx, 4)); + + return value; +} + +static inline void smmu_pmu_counter_enable(struct smmu_pmu *smmu_pmu, u32 idx) +{ + writeq(BIT(idx), smmu_pmu->reg_base + SMMU_PMCG_CNTENSET0); +} + +static inline void smmu_pmu_counter_disable(struct smmu_pmu *smmu_pmu, u32 idx) +{ + writeq(BIT(idx), smmu_pmu->reg_base + SMMU_PMCG_CNTENCLR0); +} + +static inline void smmu_pmu_interrupt_enable(struct smmu_pmu *smmu_pmu, u32 idx) +{ + writeq(BIT(idx), smmu_pmu->reg_base + SMMU_PMCG_INTENSET0); +} + +static inline void smmu_pmu_interrupt_disable(struct smmu_pmu *smmu_pmu, + u32 idx) +{ + writeq(BIT(idx), smmu_pmu->reg_base + SMMU_PMCG_INTENCLR0); +} + +static inline void smmu_pmu_set_evtyper(struct smmu_pmu *smmu_pmu, u32 idx, + u32 val) +{ + writel(val, smmu_pmu->reg_base + SMMU_PMCG_EVTYPER(idx)); +} + +static inline void smmu_pmu_set_smr(struct smmu_pmu *smmu_pmu, u32 idx, u32 val) +{ + writel(val, smmu_pmu->reg_base + SMMU_PMCG_SMR(idx)); +} + +static void smmu_pmu_event_update(struct perf_event *event) +{ + struct hw_perf_event *hwc = &event->hw; + struct smmu_pmu *smmu_pmu = to_smmu_pmu(event->pmu); + u64 delta, prev, now; + u32 idx = hwc->idx; + + do { + prev = local64_read(&hwc->prev_count); + now = smmu_pmu_counter_get_value(smmu_pmu, idx); + } while (local64_cmpxchg(&hwc->prev_count, prev, now) != prev); + + /* handle overflow. */ + delta = now - prev; + delta &= smmu_pmu->counter_mask; + + local64_add(delta, &event->count); +} + +static void smmu_pmu_set_period(struct smmu_pmu *smmu_pmu, + struct hw_perf_event *hwc) +{ + u32 idx = hwc->idx; + u64 new; + + /* + * We limit the max period to half the max counter value of the counter + * size, so that even in the case of extreme interrupt latency the + * counter will (hopefully) not wrap past its initial value. + */ + new = smmu_pmu->counter_mask >> 1; + + local64_set(&hwc->prev_count, new); + smmu_pmu_counter_set_value(smmu_pmu, idx, new); +} + +static void smmu_pmu_set_event_filter(struct perf_event *event, + int idx, u32 span, u32 sid) +{ + struct smmu_pmu *smmu_pmu = to_smmu_pmu(event->pmu); + u32 evtyper; + + evtyper = get_event(event) | span << SMMU_PMCG_SID_SPAN_SHIFT; + smmu_pmu_set_evtyper(smmu_pmu, idx, evtyper); + smmu_pmu_set_smr(smmu_pmu, idx, sid); +} + +static int smmu_pmu_apply_event_filter(struct smmu_pmu *smmu_pmu, + struct perf_event *event, int idx) +{ + u32 span, sid; + unsigned int num_ctrs = smmu_pmu->num_counters; + bool filter_en = !!get_filter_enable(event); + + span = filter_en ? get_filter_span(event) : + SMMU_PMCG_DEFAULT_FILTER_SPAN; + sid = filter_en ? get_filter_stream_id(event) : + SMMU_PMCG_DEFAULT_FILTER_SID; + + /* Support individual filter settings */ + if (!smmu_pmu->global_filter) { + smmu_pmu_set_event_filter(event, idx, span, sid); + return 0; + } + + /* Requested settings same as current global settings*/ + if (span == smmu_pmu->global_filter_span && + sid == smmu_pmu->global_filter_sid) + return 0; + + if (!bitmap_empty(smmu_pmu->used_counters, num_ctrs)) + return -EAGAIN; + + smmu_pmu_set_event_filter(event, 0, span, sid); + smmu_pmu->global_filter_span = span; + smmu_pmu->global_filter_sid = sid; + return 0; +} + +static int smmu_pmu_get_event_idx(struct smmu_pmu *smmu_pmu, + struct perf_event *event) +{ + int idx, err; + unsigned int num_ctrs = smmu_pmu->num_counters; + + idx = find_first_zero_bit(smmu_pmu->used_counters, num_ctrs); + if (idx == num_ctrs) + /* The counters are all in use. */ + return -EAGAIN; + + err = smmu_pmu_apply_event_filter(smmu_pmu, event, idx); + if (err) + return err; + + set_bit(idx, smmu_pmu->used_counters); + + return idx; +} + +/* + * Implementation of abstract pmu functionality required by + * the core perf events code. + */ + +static int smmu_pmu_event_init(struct perf_event *event) +{ + struct hw_perf_event *hwc = &event->hw; + struct smmu_pmu *smmu_pmu = to_smmu_pmu(event->pmu); + struct device *dev = smmu_pmu->dev; + struct perf_event *sibling; + u16 event_id; + + if (event->attr.type != event->pmu->type) + return -ENOENT; + + if (hwc->sample_period) { + dev_dbg(dev, "Sampling not supported\n"); + return -EOPNOTSUPP; + } + + if (event->cpu < 0) { + dev_dbg(dev, "Per-task mode not supported\n"); + return -EOPNOTSUPP; + } + + /* Verify specified event is supported on this PMU */ + event_id = get_event(event); + if (event_id < SMMU_PMCG_ARCH_MAX_EVENTS && + (!test_bit(event_id, smmu_pmu->supported_events))) { + dev_dbg(dev, "Invalid event %d for this PMU\n", event_id); + return -EINVAL; + } + + /* Don't allow groups with mixed PMUs, except for s/w events */ + if (event->group_leader->pmu != event->pmu && + !is_software_event(event->group_leader)) { + dev_dbg(dev, "Can't create mixed PMU group\n"); + return -EINVAL; + } + + for_each_sibling_event(sibling, event->group_leader) { + if (sibling->pmu != event->pmu && + !is_software_event(sibling)) { + dev_dbg(dev, "Can't create mixed PMU group\n"); + return -EINVAL; + } + } + + hwc->idx = -1; + + /* + * Ensure all events are on the same cpu so all events are in the + * same cpu context, to avoid races on pmu_enable etc. + */ + event->cpu = smmu_pmu->on_cpu; + + return 0; +} + +static void smmu_pmu_event_start(struct perf_event *event, int flags) +{ + struct smmu_pmu *smmu_pmu = to_smmu_pmu(event->pmu); + struct hw_perf_event *hwc = &event->hw; + int idx = hwc->idx; + + hwc->state = 0; + + smmu_pmu_set_period(smmu_pmu, hwc); + + smmu_pmu_counter_enable(smmu_pmu, idx); +} + +static void smmu_pmu_event_stop(struct perf_event *event, int flags) +{ + struct smmu_pmu *smmu_pmu = to_smmu_pmu(event->pmu); + struct hw_perf_event *hwc = &event->hw; + int idx = hwc->idx; + + if (hwc->state & PERF_HES_STOPPED) + return; + + smmu_pmu_counter_disable(smmu_pmu, idx); + /* As the counter gets updated on _start, ignore PERF_EF_UPDATE */ + smmu_pmu_event_update(event); + hwc->state |= PERF_HES_STOPPED | PERF_HES_UPTODATE; +} + +static int smmu_pmu_event_add(struct perf_event *event, int flags) +{ + struct hw_perf_event *hwc = &event->hw; + int idx; + struct smmu_pmu *smmu_pmu = to_smmu_pmu(event->pmu); + + idx = smmu_pmu_get_event_idx(smmu_pmu, event); + if (idx < 0) + return idx; + + hwc->idx = idx; + hwc->state = PERF_HES_STOPPED | PERF_HES_UPTODATE; + smmu_pmu->events[idx] = event; + local64_set(&hwc->prev_count, 0); + + smmu_pmu_interrupt_enable(smmu_pmu, idx); + + if (flags & PERF_EF_START) + smmu_pmu_event_start(event, flags); + + /* Propagate changes to the userspace mapping. */ + perf_event_update_userpage(event); + + return 0; +} + +static void smmu_pmu_event_del(struct perf_event *event, int flags) +{ + struct hw_perf_event *hwc = &event->hw; + struct smmu_pmu *smmu_pmu = to_smmu_pmu(event->pmu); + int idx = hwc->idx; + + smmu_pmu_event_stop(event, flags | PERF_EF_UPDATE); + smmu_pmu_interrupt_disable(smmu_pmu, idx); + smmu_pmu->events[idx] = NULL; + clear_bit(idx, smmu_pmu->used_counters); + + perf_event_update_userpage(event); +} + +static void smmu_pmu_event_read(struct perf_event *event) +{ + smmu_pmu_event_update(event); +} + +/* cpumask */ + +static ssize_t smmu_pmu_cpumask_show(struct device *dev, + struct device_attribute *attr, + char *buf) +{ + struct smmu_pmu *smmu_pmu = to_smmu_pmu(dev_get_drvdata(dev)); + + return cpumap_print_to_pagebuf(true, buf, cpumask_of(smmu_pmu->on_cpu)); +} + +static struct device_attribute smmu_pmu_cpumask_attr = + __ATTR(cpumask, 0444, smmu_pmu_cpumask_show, NULL); + +static struct attribute *smmu_pmu_cpumask_attrs[] = { + &smmu_pmu_cpumask_attr.attr, + NULL +}; + +static struct attribute_group smmu_pmu_cpumask_group = { + .attrs = smmu_pmu_cpumask_attrs, +}; + +/* Events */ + +static ssize_t smmu_pmu_event_show(struct device *dev, + struct device_attribute *attr, char *page) +{ + struct perf_pmu_events_attr *pmu_attr; + + pmu_attr = container_of(attr, struct perf_pmu_events_attr, attr); + + return sprintf(page, "event=0x%02llx\n", pmu_attr->id); +} + +#define SMMU_EVENT_ATTR(name, config) \ + PMU_EVENT_ATTR(name, smmu_event_attr_##name, \ + config, smmu_pmu_event_show) +SMMU_EVENT_ATTR(cycles, 0); +SMMU_EVENT_ATTR(transaction, 1); +SMMU_EVENT_ATTR(tlb_miss, 2); +SMMU_EVENT_ATTR(config_cache_miss, 3); +SMMU_EVENT_ATTR(trans_table_walk_access, 4); +SMMU_EVENT_ATTR(config_struct_access, 5); +SMMU_EVENT_ATTR(pcie_ats_trans_rq, 6); +SMMU_EVENT_ATTR(pcie_ats_trans_passed, 7); + +static struct attribute *smmu_pmu_events[] = { + &smmu_event_attr_cycles.attr.attr, + &smmu_event_attr_transaction.attr.attr, + &smmu_event_attr_tlb_miss.attr.attr, + &smmu_event_attr_config_cache_miss.attr.attr, + &smmu_event_attr_trans_table_walk_access.attr.attr, + &smmu_event_attr_config_struct_access.attr.attr, + &smmu_event_attr_pcie_ats_trans_rq.attr.attr, + &smmu_event_attr_pcie_ats_trans_passed.attr.attr, + NULL +}; + +static umode_t smmu_pmu_event_is_visible(struct kobject *kobj, + struct attribute *attr, int unused) +{ + struct device *dev = kobj_to_dev(kobj); + struct smmu_pmu *smmu_pmu = to_smmu_pmu(dev_get_drvdata(dev)); + struct perf_pmu_events_attr *pmu_attr; + + pmu_attr = container_of(attr, struct perf_pmu_events_attr, attr.attr); + + if (test_bit(pmu_attr->id, smmu_pmu->supported_events)) + return attr->mode; + + return 0; +} + +static struct attribute_group smmu_pmu_events_group = { + .name = "events", + .attrs = smmu_pmu_events, + .is_visible = smmu_pmu_event_is_visible, +}; + +/* Formats */ +PMU_FORMAT_ATTR(event, "config:0-15"); +PMU_FORMAT_ATTR(filter_stream_id, "config1:0-31"); +PMU_FORMAT_ATTR(filter_span, "config1:32"); +PMU_FORMAT_ATTR(filter_enable, "config1:33"); + +static struct attribute *smmu_pmu_formats[] = { + &format_attr_event.attr, + &format_attr_filter_stream_id.attr, + &format_attr_filter_span.attr, + &format_attr_filter_enable.attr, + NULL +}; + +static struct attribute_group smmu_pmu_format_group = { + .name = "format", + .attrs = smmu_pmu_formats, +}; + +static const struct attribute_group *smmu_pmu_attr_grps[] = { + &smmu_pmu_cpumask_group, + &smmu_pmu_events_group, + &smmu_pmu_format_group, + NULL +}; + +/* + * Generic device handlers + */ + +static int smmu_pmu_offline_cpu(unsigned int cpu, struct hlist_node *node) +{ + struct smmu_pmu *smmu_pmu; + unsigned int target; + + smmu_pmu = hlist_entry_safe(node, struct smmu_pmu, node); + if (cpu != smmu_pmu->on_cpu) + return 0; + + target = cpumask_any_but(cpu_online_mask, cpu); + if (target >= nr_cpu_ids) + return 0; + + perf_pmu_migrate_context(&smmu_pmu->pmu, cpu, target); + smmu_pmu->on_cpu = target; + WARN_ON(irq_set_affinity_hint(smmu_pmu->irq, cpumask_of(target))); + + return 0; +} + +static irqreturn_t smmu_pmu_handle_irq(int irq_num, void *data) +{ + struct smmu_pmu *smmu_pmu = data; + u64 ovsr; + unsigned int idx; + + ovsr = readq(smmu_pmu->reloc_base + SMMU_PMCG_OVSSET0); + if (!ovsr) + return IRQ_NONE; + + writeq(ovsr, smmu_pmu->reloc_base + SMMU_PMCG_OVSCLR0); + + for_each_set_bit(idx, (unsigned long *)&ovsr, smmu_pmu->num_counters) { + struct perf_event *event = smmu_pmu->events[idx]; + struct hw_perf_event *hwc; + + if (WARN_ON_ONCE(!event)) + continue; + + smmu_pmu_event_update(event); + hwc = &event->hw; + + smmu_pmu_set_period(smmu_pmu, hwc); + } + + return IRQ_HANDLED; +} + +static int smmu_pmu_setup_irq(struct smmu_pmu *pmu) +{ + unsigned long flags = IRQF_NOBALANCING | IRQF_SHARED | IRQF_NO_THREAD; + int irq, ret = -ENXIO; + + irq = pmu->irq; + if (irq) + ret = devm_request_irq(pmu->dev, irq, smmu_pmu_handle_irq, + flags, "smmuv3-pmu", pmu); + return ret; +} + +static void smmu_pmu_reset(struct smmu_pmu *smmu_pmu) +{ + u64 counter_present_mask = GENMASK_ULL(smmu_pmu->num_counters - 1, 0); + + smmu_pmu_disable(&smmu_pmu->pmu); + + /* Disable counter and interrupt */ + writeq_relaxed(counter_present_mask, + smmu_pmu->reg_base + SMMU_PMCG_CNTENCLR0); + writeq_relaxed(counter_present_mask, + smmu_pmu->reg_base + SMMU_PMCG_INTENCLR0); + writeq_relaxed(counter_present_mask, + smmu_pmu->reloc_base + SMMU_PMCG_OVSCLR0); +} + +static int smmu_pmu_probe(struct platform_device *pdev) +{ + struct smmu_pmu *smmu_pmu; + struct resource *res_0, *res_1; + u32 cfgr, reg_size; + u64 ceid_64[2]; + int irq, err; + char *name; + struct device *dev = &pdev->dev; + + smmu_pmu = devm_kzalloc(dev, sizeof(*smmu_pmu), GFP_KERNEL); + if (!smmu_pmu) + return -ENOMEM; + + smmu_pmu->dev = dev; + platform_set_drvdata(pdev, smmu_pmu); + + smmu_pmu->pmu = (struct pmu) { + .task_ctx_nr = perf_invalid_context, + .pmu_enable = smmu_pmu_enable, + .pmu_disable = smmu_pmu_disable, + .event_init = smmu_pmu_event_init, + .add = smmu_pmu_event_add, + .del = smmu_pmu_event_del, + .start = smmu_pmu_event_start, + .stop = smmu_pmu_event_stop, + .read = smmu_pmu_event_read, + .attr_groups = smmu_pmu_attr_grps, + .capabilities = PERF_PMU_CAP_NO_EXCLUDE, + }; + + res_0 = platform_get_resource(pdev, IORESOURCE_MEM, 0); + smmu_pmu->reg_base = devm_ioremap_resource(dev, res_0); + if (IS_ERR(smmu_pmu->reg_base)) + return PTR_ERR(smmu_pmu->reg_base); + + cfgr = readl_relaxed(smmu_pmu->reg_base + SMMU_PMCG_CFGR); + + /* Determine if page 1 is present */ + if (cfgr & SMMU_PMCG_CFGR_RELOC_CTRS) { + res_1 = platform_get_resource(pdev, IORESOURCE_MEM, 1); + smmu_pmu->reloc_base = devm_ioremap_resource(dev, res_1); + if (IS_ERR(smmu_pmu->reloc_base)) + return PTR_ERR(smmu_pmu->reloc_base); + } else { + smmu_pmu->reloc_base = smmu_pmu->reg_base; + } + + irq = platform_get_irq(pdev, 0); + if (irq > 0) + smmu_pmu->irq = irq; + + ceid_64[0] = readq_relaxed(smmu_pmu->reg_base + SMMU_PMCG_CEID0); + ceid_64[1] = readq_relaxed(smmu_pmu->reg_base + SMMU_PMCG_CEID1); + bitmap_from_arr32(smmu_pmu->supported_events, (u32 *)ceid_64, + SMMU_PMCG_ARCH_MAX_EVENTS); + + smmu_pmu->num_counters = FIELD_GET(SMMU_PMCG_CFGR_NCTR, cfgr) + 1; + + smmu_pmu->global_filter = !!(cfgr & SMMU_PMCG_CFGR_SID_FILTER_TYPE); + + reg_size = FIELD_GET(SMMU_PMCG_CFGR_SIZE, cfgr); + smmu_pmu->counter_mask = GENMASK_ULL(reg_size, 0); + + smmu_pmu_reset(smmu_pmu); + + err = smmu_pmu_setup_irq(smmu_pmu); + if (err) { + dev_err(dev, "Setup irq failed, PMU @%pa\n", &res_0->start); + return err; + } + + name = devm_kasprintf(&pdev->dev, GFP_KERNEL, "smmuv3_pmcg_%llx", + (res_0->start) >> SMMU_PMCG_PA_SHIFT); + if (!name) { + dev_err(dev, "Create name failed, PMU @%pa\n", &res_0->start); + return -EINVAL; + } + + /* Pick one CPU to be the preferred one to use */ + smmu_pmu->on_cpu = raw_smp_processor_id(); + WARN_ON(irq_set_affinity_hint(smmu_pmu->irq, + cpumask_of(smmu_pmu->on_cpu))); + + err = cpuhp_state_add_instance_nocalls(cpuhp_state_num, + &smmu_pmu->node); + if (err) { + dev_err(dev, "Error %d registering hotplug, PMU @%pa\n", + err, &res_0->start); + goto out_cpuhp_err; + } + + err = perf_pmu_register(&smmu_pmu->pmu, name, -1); + if (err) { + dev_err(dev, "Error %d registering PMU @%pa\n", + err, &res_0->start); + goto out_unregister; + } + + dev_info(dev, "Registered PMU @ %pa using %d counters with %s filter settings\n", + &res_0->start, smmu_pmu->num_counters, + smmu_pmu->global_filter ? "Global(Counter0)" : + "Individual"); + + return 0; + +out_unregister: + cpuhp_state_remove_instance_nocalls(cpuhp_state_num, &smmu_pmu->node); +out_cpuhp_err: + put_cpu(); + return err; +} + +static int smmu_pmu_remove(struct platform_device *pdev) +{ + struct smmu_pmu *smmu_pmu = platform_get_drvdata(pdev); + + perf_pmu_unregister(&smmu_pmu->pmu); + cpuhp_state_remove_instance_nocalls(cpuhp_state_num, &smmu_pmu->node); + + return 0; +} + +static void smmu_pmu_shutdown(struct platform_device *pdev) +{ + struct smmu_pmu *smmu_pmu = platform_get_drvdata(pdev); + + smmu_pmu_disable(&smmu_pmu->pmu); +} + +static struct platform_driver smmu_pmu_driver = { + .driver = { + .name = "arm-smmu-v3-pmcg", + }, + .probe = smmu_pmu_probe, + .remove = smmu_pmu_remove, + .shutdown = smmu_pmu_shutdown, +}; + +static int __init arm_smmu_pmu_init(void) +{ + cpuhp_state_num = cpuhp_setup_state_multi(CPUHP_AP_ONLINE_DYN, + "perf/arm/pmcg:online", + NULL, + smmu_pmu_offline_cpu); + if (cpuhp_state_num < 0) + return cpuhp_state_num; + + return platform_driver_register(&smmu_pmu_driver); +} +module_init(arm_smmu_pmu_init); + +static void __exit arm_smmu_pmu_exit(void) +{ + platform_driver_unregister(&smmu_pmu_driver); + cpuhp_remove_multi_state(cpuhp_state_num); +} + +module_exit(arm_smmu_pmu_exit); + +MODULE_DESCRIPTION("PMU driver for ARM SMMUv3 Performance Monitors Extension"); +MODULE_AUTHOR("Neil Leeder "); +MODULE_AUTHOR("Shameer Kolothum "); +MODULE_LICENSE("GPL v2"); -- cgit v1.2.3-59-g8ed1b From f202cdab3b48d8c2c1846c938ea69cb8aa897699 Mon Sep 17 00:00:00 2001 From: Shameer Kolothum Date: Tue, 26 Mar 2019 15:17:52 +0000 Subject: perf/smmuv3: Add MSI irq support This adds support for MSI-based counter overflow interrupt. Signed-off-by: Shameer Kolothum Reviewed-by: Robin Murphy Signed-off-by: Will Deacon --- drivers/perf/arm_smmuv3_pmu.c | 58 +++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 58 insertions(+) diff --git a/drivers/perf/arm_smmuv3_pmu.c b/drivers/perf/arm_smmuv3_pmu.c index a6d2e3ce94df..a4f4b488a2de 100644 --- a/drivers/perf/arm_smmuv3_pmu.c +++ b/drivers/perf/arm_smmuv3_pmu.c @@ -67,6 +67,7 @@ #define SMMU_PMCG_OVSSET0 0xCC0 #define SMMU_PMCG_CFGR 0xE00 #define SMMU_PMCG_CFGR_SID_FILTER_TYPE BIT(23) +#define SMMU_PMCG_CFGR_MSI BIT(21) #define SMMU_PMCG_CFGR_RELOC_CTRS BIT(20) #define SMMU_PMCG_CFGR_SIZE GENMASK(13, 8) #define SMMU_PMCG_CFGR_NCTR GENMASK(5, 0) @@ -77,6 +78,12 @@ #define SMMU_PMCG_IRQ_CTRL 0xE50 #define SMMU_PMCG_IRQ_CTRL_IRQEN BIT(0) #define SMMU_PMCG_IRQ_CFG0 0xE58 +#define SMMU_PMCG_IRQ_CFG1 0xE60 +#define SMMU_PMCG_IRQ_CFG2 0xE64 + +/* MSI config fields */ +#define MSI_CFG0_ADDR_MASK GENMASK_ULL(51, 2) +#define MSI_CFG2_MEMATTR_DEVICE_nGnRE 0x1 #define SMMU_PMCG_DEFAULT_FILTER_SPAN 1 #define SMMU_PMCG_DEFAULT_FILTER_SID GENMASK(31, 0) @@ -580,11 +587,62 @@ static irqreturn_t smmu_pmu_handle_irq(int irq_num, void *data) return IRQ_HANDLED; } +static void smmu_pmu_free_msis(void *data) +{ + struct device *dev = data; + + platform_msi_domain_free_irqs(dev); +} + +static void smmu_pmu_write_msi_msg(struct msi_desc *desc, struct msi_msg *msg) +{ + phys_addr_t doorbell; + struct device *dev = msi_desc_to_dev(desc); + struct smmu_pmu *pmu = dev_get_drvdata(dev); + + doorbell = (((u64)msg->address_hi) << 32) | msg->address_lo; + doorbell &= MSI_CFG0_ADDR_MASK; + + writeq_relaxed(doorbell, pmu->reg_base + SMMU_PMCG_IRQ_CFG0); + writel_relaxed(msg->data, pmu->reg_base + SMMU_PMCG_IRQ_CFG1); + writel_relaxed(MSI_CFG2_MEMATTR_DEVICE_nGnRE, + pmu->reg_base + SMMU_PMCG_IRQ_CFG2); +} + +static void smmu_pmu_setup_msi(struct smmu_pmu *pmu) +{ + struct msi_desc *desc; + struct device *dev = pmu->dev; + int ret; + + /* Clear MSI address reg */ + writeq_relaxed(0, pmu->reg_base + SMMU_PMCG_IRQ_CFG0); + + /* MSI supported or not */ + if (!(readl(pmu->reg_base + SMMU_PMCG_CFGR) & SMMU_PMCG_CFGR_MSI)) + return; + + ret = platform_msi_domain_alloc_irqs(dev, 1, smmu_pmu_write_msi_msg); + if (ret) { + dev_warn(dev, "failed to allocate MSIs\n"); + return; + } + + desc = first_msi_entry(dev); + if (desc) + pmu->irq = desc->irq; + + /* Add callback to free MSIs on teardown */ + devm_add_action(dev, smmu_pmu_free_msis, dev); +} + static int smmu_pmu_setup_irq(struct smmu_pmu *pmu) { unsigned long flags = IRQF_NOBALANCING | IRQF_SHARED | IRQF_NO_THREAD; int irq, ret = -ENXIO; + smmu_pmu_setup_msi(pmu); + irq = pmu->irq; if (irq) ret = devm_request_irq(pmu->dev, irq, smmu_pmu_handle_irq, -- cgit v1.2.3-59-g8ed1b From 24062fe85860debfdae0eeaa495f27c9971ec163 Mon Sep 17 00:00:00 2001 From: Shameer Kolothum Date: Tue, 26 Mar 2019 15:17:53 +0000 Subject: perf/smmuv3: Enable HiSilicon Erratum 162001800 quirk HiSilicon erratum 162001800 describes the limitation of SMMUv3 PMCG implementation on HiSilicon Hip08 platforms. On these platforms, the PMCG event counter registers (SMMU_PMCG_EVCNTRn) are read only and as a result it is not possible to set the initial counter period value on event monitor start. To work around this, the current value of the counter is read and used for delta calculations. OEM information from ACPI header is used to identify the affected hardware platforms. Signed-off-by: Shameer Kolothum Reviewed-by: Hanjun Guo Reviewed-by: Robin Murphy Acked-by: Lorenzo Pieralisi [will: update silicon-errata.txt and add reason string to acpi match] Signed-off-by: Will Deacon --- Documentation/arm64/silicon-errata.txt | 1 + drivers/acpi/arm64/iort.c | 16 +++++++++++- drivers/perf/arm_smmuv3_pmu.c | 48 +++++++++++++++++++++++++++++----- include/linux/acpi_iort.h | 1 + 4 files changed, 58 insertions(+), 8 deletions(-) diff --git a/Documentation/arm64/silicon-errata.txt b/Documentation/arm64/silicon-errata.txt index d1e2bb801e1b..c00efb639e46 100644 --- a/Documentation/arm64/silicon-errata.txt +++ b/Documentation/arm64/silicon-errata.txt @@ -77,6 +77,7 @@ stable kernels. | Hisilicon | Hip0{5,6,7} | #161010101 | HISILICON_ERRATUM_161010101 | | Hisilicon | Hip0{6,7} | #161010701 | N/A | | Hisilicon | Hip07 | #161600802 | HISILICON_ERRATUM_161600802 | +| Hisilicon | Hip08 SMMU PMCG | #162001800 | N/A | | | | | | | Qualcomm Tech. | Kryo/Falkor v1 | E1003 | QCOM_FALKOR_ERRATUM_1003 | | Qualcomm Tech. | Falkor v1 | E1009 | QCOM_FALKOR_ERRATUM_1009 | diff --git a/drivers/acpi/arm64/iort.c b/drivers/acpi/arm64/iort.c index e2c9b26bbee6..2d70b349bd6c 100644 --- a/drivers/acpi/arm64/iort.c +++ b/drivers/acpi/arm64/iort.c @@ -1366,9 +1366,23 @@ static void __init arm_smmu_v3_pmcg_init_resources(struct resource *res, ACPI_EDGE_SENSITIVE, &res[2]); } +static struct acpi_platform_list pmcg_plat_info[] __initdata = { + /* HiSilicon Hip08 Platform */ + {"HISI ", "HIP08 ", 0, ACPI_SIG_IORT, greater_than_or_equal, + "Erratum #162001800", IORT_SMMU_V3_PMCG_HISI_HIP08}, + { } +}; + static int __init arm_smmu_v3_pmcg_add_platdata(struct platform_device *pdev) { - u32 model = IORT_SMMU_V3_PMCG_GENERIC; + u32 model; + int idx; + + idx = acpi_match_platform_list(pmcg_plat_info); + if (idx >= 0) + model = pmcg_plat_info[idx].data; + else + model = IORT_SMMU_V3_PMCG_GENERIC; return platform_device_add_data(pdev, &model, sizeof(model)); } diff --git a/drivers/perf/arm_smmuv3_pmu.c b/drivers/perf/arm_smmuv3_pmu.c index a4f4b488a2de..da71c741cb46 100644 --- a/drivers/perf/arm_smmuv3_pmu.c +++ b/drivers/perf/arm_smmuv3_pmu.c @@ -35,6 +35,7 @@ */ #include +#include #include #include #include @@ -93,6 +94,8 @@ #define SMMU_PMCG_PA_SHIFT 12 +#define SMMU_PMCG_EVCNTR_RDONLY BIT(0) + static int cpuhp_state_num; struct smmu_pmu { @@ -108,6 +111,7 @@ struct smmu_pmu { void __iomem *reg_base; void __iomem *reloc_base; u64 counter_mask; + u32 options; bool global_filter; u32 global_filter_span; u32 global_filter_sid; @@ -222,15 +226,27 @@ static void smmu_pmu_set_period(struct smmu_pmu *smmu_pmu, u32 idx = hwc->idx; u64 new; - /* - * We limit the max period to half the max counter value of the counter - * size, so that even in the case of extreme interrupt latency the - * counter will (hopefully) not wrap past its initial value. - */ - new = smmu_pmu->counter_mask >> 1; + if (smmu_pmu->options & SMMU_PMCG_EVCNTR_RDONLY) { + /* + * On platforms that require this quirk, if the counter starts + * at < half_counter value and wraps, the current logic of + * handling the overflow may not work. It is expected that, + * those platforms will have full 64 counter bits implemented + * so that such a possibility is remote(eg: HiSilicon HIP08). + */ + new = smmu_pmu_counter_get_value(smmu_pmu, idx); + } else { + /* + * We limit the max period to half the max counter value + * of the counter size, so that even in the case of extreme + * interrupt latency the counter will (hopefully) not wrap + * past its initial value. + */ + new = smmu_pmu->counter_mask >> 1; + smmu_pmu_counter_set_value(smmu_pmu, idx, new); + } local64_set(&hwc->prev_count, new); - smmu_pmu_counter_set_value(smmu_pmu, idx, new); } static void smmu_pmu_set_event_filter(struct perf_event *event, @@ -665,6 +681,22 @@ static void smmu_pmu_reset(struct smmu_pmu *smmu_pmu) smmu_pmu->reloc_base + SMMU_PMCG_OVSCLR0); } +static void smmu_pmu_get_acpi_options(struct smmu_pmu *smmu_pmu) +{ + u32 model; + + model = *(u32 *)dev_get_platdata(smmu_pmu->dev); + + switch (model) { + case IORT_SMMU_V3_PMCG_HISI_HIP08: + /* HiSilicon Erratum 162001800 */ + smmu_pmu->options |= SMMU_PMCG_EVCNTR_RDONLY; + break; + } + + dev_notice(smmu_pmu->dev, "option mask 0x%x\n", smmu_pmu->options); +} + static int smmu_pmu_probe(struct platform_device *pdev) { struct smmu_pmu *smmu_pmu; @@ -744,6 +776,8 @@ static int smmu_pmu_probe(struct platform_device *pdev) return -EINVAL; } + smmu_pmu_get_acpi_options(smmu_pmu); + /* Pick one CPU to be the preferred one to use */ smmu_pmu->on_cpu = raw_smp_processor_id(); WARN_ON(irq_set_affinity_hint(smmu_pmu->irq, diff --git a/include/linux/acpi_iort.h b/include/linux/acpi_iort.h index 052ef7b9f985..723e4dfa1c14 100644 --- a/include/linux/acpi_iort.h +++ b/include/linux/acpi_iort.h @@ -32,6 +32,7 @@ * do with hardware or with IORT specification. */ #define IORT_SMMU_V3_PMCG_GENERIC 0x00000000 /* Generic SMMUv3 PMCG */ +#define IORT_SMMU_V3_PMCG_HISI_HIP08 0x00000001 /* HiSilicon HIP08 PMCG */ int iort_register_domain_token(int trans_id, phys_addr_t base, struct fwnode_handle *fw_node); -- cgit v1.2.3-59-g8ed1b From f6e564354a01dd943ba92c514ddf386080d7baa4 Mon Sep 17 00:00:00 2001 From: Alexandru Elisei Date: Fri, 5 Apr 2019 11:20:12 +0100 Subject: arm64: Use defines instead of magic numbers Following assembly code is not trivial; make it slightly easier to read by replacing some of the magic numbers with the defines which are already present in sysreg.h. Reviewed-by: Dave Martin Signed-off-by: Alexandru Elisei Signed-off-by: Will Deacon --- arch/arm64/include/asm/assembler.h | 4 ++-- arch/arm64/kernel/head.S | 10 +++++----- 2 files changed, 7 insertions(+), 7 deletions(-) diff --git a/arch/arm64/include/asm/assembler.h b/arch/arm64/include/asm/assembler.h index c5308d01e228..621212196871 100644 --- a/arch/arm64/include/asm/assembler.h +++ b/arch/arm64/include/asm/assembler.h @@ -442,8 +442,8 @@ USER(\label, ic ivau, \tmp2) // invalidate I line PoU * reset_pmuserenr_el0 - reset PMUSERENR_EL0 if PMUv3 present */ .macro reset_pmuserenr_el0, tmpreg - mrs \tmpreg, id_aa64dfr0_el1 // Check ID_AA64DFR0_EL1 PMUVer - sbfx \tmpreg, \tmpreg, #8, #4 + mrs \tmpreg, id_aa64dfr0_el1 + sbfx \tmpreg, \tmpreg, #ID_AA64DFR0_PMUVER_SHIFT, #4 cmp \tmpreg, #1 // Skip if no PMU present b.lt 9000f msr pmuserenr_el0, xzr // Disable PMU access from EL0 diff --git a/arch/arm64/kernel/head.S b/arch/arm64/kernel/head.S index eecf7927dab0..f533305849d7 100644 --- a/arch/arm64/kernel/head.S +++ b/arch/arm64/kernel/head.S @@ -505,7 +505,7 @@ ENTRY(el2_setup) * kernel is intended to run at EL2. */ mrs x2, id_aa64mmfr1_el1 - ubfx x2, x2, #8, #4 + ubfx x2, x2, #ID_AA64MMFR1_VHE_SHIFT, #4 #else mov x2, xzr #endif @@ -538,7 +538,7 @@ set_hcr: #ifdef CONFIG_ARM_GIC_V3 /* GICv3 system register access */ mrs x0, id_aa64pfr0_el1 - ubfx x0, x0, #24, #4 + ubfx x0, x0, #ID_AA64PFR0_GIC_SHIFT, #4 cbz x0, 3f mrs_s x0, SYS_ICC_SRE_EL2 @@ -564,8 +564,8 @@ set_hcr: #endif /* EL2 debug */ - mrs x1, id_aa64dfr0_el1 // Check ID_AA64DFR0_EL1 PMUVer - sbfx x0, x1, #8, #4 + mrs x1, id_aa64dfr0_el1 + sbfx x0, x1, #ID_AA64DFR0_PMUVER_SHIFT, #4 cmp x0, #1 b.lt 4f // Skip if no PMU present mrs x0, pmcr_el0 // Disable debug access traps @@ -574,7 +574,7 @@ set_hcr: csel x3, xzr, x0, lt // all PMU counters from EL1 /* Statistical profiling */ - ubfx x0, x1, #32, #4 // Check ID_AA64DFR0_EL1 PMSVer + ubfx x0, x1, #ID_AA64DFR0_PMSVER_SHIFT, #4 cbz x0, 7f // Skip if SPE not present cbnz x2, 6f // VHE? mrs_s x4, SYS_PMBIDR_EL1 // If SPE available at EL2, -- cgit v1.2.3-59-g8ed1b From edf072d36dbfdf74465b66988f30084b6c996fbf Mon Sep 17 00:00:00 2001 From: Torsten Duwe Date: Fri, 8 Feb 2019 16:10:07 +0100 Subject: arm64: Makefile: Replace -pg with CC_FLAGS_FTRACE In preparation for arm64 supporting ftrace built on other compiler options, let's have the arm64 Makefiles remove the $(CC_FLAGS_FTRACE) flags, whatever these may be, rather than assuming '-pg'. There should be no functional change as a result of this patch. Reviewed-by: Mark Rutland Signed-off-by: Torsten Duwe Signed-off-by: Will Deacon --- arch/arm64/kernel/Makefile | 6 +++--- arch/arm64/lib/Makefile | 2 +- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/arch/arm64/kernel/Makefile b/arch/arm64/kernel/Makefile index cd434d0719c1..a30bbecb23d8 100644 --- a/arch/arm64/kernel/Makefile +++ b/arch/arm64/kernel/Makefile @@ -7,9 +7,9 @@ CPPFLAGS_vmlinux.lds := -DTEXT_OFFSET=$(TEXT_OFFSET) AFLAGS_head.o := -DTEXT_OFFSET=$(TEXT_OFFSET) CFLAGS_armv8_deprecated.o := -I$(src) -CFLAGS_REMOVE_ftrace.o = -pg -CFLAGS_REMOVE_insn.o = -pg -CFLAGS_REMOVE_return_address.o = -pg +CFLAGS_REMOVE_ftrace.o = $(CC_FLAGS_FTRACE) +CFLAGS_REMOVE_insn.o = $(CC_FLAGS_FTRACE) +CFLAGS_REMOVE_return_address.o = $(CC_FLAGS_FTRACE) # Object file lists. obj-y := debug-monitors.o entry.o irq.o fpsimd.o \ diff --git a/arch/arm64/lib/Makefile b/arch/arm64/lib/Makefile index 5540a1638baf..33c2a4abda04 100644 --- a/arch/arm64/lib/Makefile +++ b/arch/arm64/lib/Makefile @@ -24,7 +24,7 @@ CFLAGS_atomic_ll_sc.o := -ffixed-x1 -ffixed-x2 \ -fcall-saved-x10 -fcall-saved-x11 -fcall-saved-x12 \ -fcall-saved-x13 -fcall-saved-x14 -fcall-saved-x15 \ -fcall-saved-x18 -fomit-frame-pointer -CFLAGS_REMOVE_atomic_ll_sc.o := -pg +CFLAGS_REMOVE_atomic_ll_sc.o := $(CC_FLAGS_FTRACE) GCOV_PROFILE_atomic_ll_sc.o := n KASAN_SANITIZE_atomic_ll_sc.o := n KCOV_INSTRUMENT_atomic_ll_sc.o := n -- cgit v1.2.3-59-g8ed1b From e1a7eafb7350ff118e7538aca76b3f79e9280a8f Mon Sep 17 00:00:00 2001 From: Torsten Duwe Date: Fri, 8 Feb 2019 16:10:11 +0100 Subject: efi/arm/arm64: Makefile: Replace -pg with CC_FLAGS_FTRACE In preparation for arm64 supporting ftrace built on other compiler options, let's have Makefiles remove the $(CC_FLAGS_FTRACE) flags, whatever these maybe, rather than assuming '-pg'. While at it, fix arm32 as well. There should be no functional change as a result of this patch. Reviewed-by: Mark Rutland Acked-by: Ard Biesheuvel Signed-off-by: Torsten Duwe Signed-off-by: Will Deacon --- drivers/firmware/efi/libstub/Makefile | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/firmware/efi/libstub/Makefile b/drivers/firmware/efi/libstub/Makefile index b0103e16fc1b..645269c3906d 100644 --- a/drivers/firmware/efi/libstub/Makefile +++ b/drivers/firmware/efi/libstub/Makefile @@ -16,9 +16,9 @@ cflags-$(CONFIG_X86) += -m$(BITS) -D__KERNEL__ -O2 \ # arm64 uses the full KBUILD_CFLAGS so it's necessary to explicitly # disable the stackleak plugin -cflags-$(CONFIG_ARM64) := $(subst -pg,,$(KBUILD_CFLAGS)) -fpie \ - $(DISABLE_STACKLEAK_PLUGIN) -cflags-$(CONFIG_ARM) := $(subst -pg,,$(KBUILD_CFLAGS)) \ +cflags-$(CONFIG_ARM64) := $(subst $(CC_FLAGS_FTRACE),,$(KBUILD_CFLAGS)) \ + -fpie $(DISABLE_STACKLEAK_PLUGIN) +cflags-$(CONFIG_ARM) := $(subst $(CC_FLAGS_FTRACE),,$(KBUILD_CFLAGS)) \ -fno-builtin -fpic \ $(call cc-option,-mno-single-pic-base) -- cgit v1.2.3-59-g8ed1b From e2092740b72384e95b9c8a418536b35d628a1642 Mon Sep 17 00:00:00 2001 From: Torsten Duwe Date: Fri, 8 Feb 2019 16:10:14 +0100 Subject: kasan: Makefile: Replace -pg with CC_FLAGS_FTRACE In preparation for arm64 supporting ftrace built on other compiler options, let's have Makefiles remove the $(CC_FLAGS_FTRACE) flags, whatever these may be, rather than assuming '-pg'. There should be no functional change as a result of this patch. Reviewed-by: Mark Rutland Acked-by: Andrey Ryabinin Signed-off-by: Torsten Duwe Signed-off-by: Will Deacon --- mm/kasan/Makefile | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/mm/kasan/Makefile b/mm/kasan/Makefile index 5d1065efbd47..f06ee820d356 100644 --- a/mm/kasan/Makefile +++ b/mm/kasan/Makefile @@ -5,9 +5,9 @@ UBSAN_SANITIZE_generic.o := n UBSAN_SANITIZE_tags.o := n KCOV_INSTRUMENT := n -CFLAGS_REMOVE_common.o = -pg -CFLAGS_REMOVE_generic.o = -pg -CFLAGS_REMOVE_tags.o = -pg +CFLAGS_REMOVE_common.o = $(CC_FLAGS_FTRACE) +CFLAGS_REMOVE_generic.o = $(CC_FLAGS_FTRACE) +CFLAGS_REMOVE_tags.o = $(CC_FLAGS_FTRACE) # Function splitter causes unnecessary splits in __asan_load1/__asan_store1 # see: https://gcc.gnu.org/bugzilla/show_bug.cgi?id=63533 -- cgit v1.2.3-59-g8ed1b From 52c6d145da15a9a9cea14678be307c127ccc6ef5 Mon Sep 17 00:00:00 2001 From: Will Deacon Date: Mon, 25 Feb 2019 12:06:43 +0000 Subject: arm64: debug: Remove unused return value from do_debug_exception() do_debug_exception() goes out of its way to return a value that isn't ever used, so just make the thing void. Reviewed-by: Mark Rutland Signed-off-by: Will Deacon --- arch/arm64/mm/fault.c | 14 ++++---------- 1 file changed, 4 insertions(+), 10 deletions(-) diff --git a/arch/arm64/mm/fault.c b/arch/arm64/mm/fault.c index 4f343e603925..0cb0e09995e1 100644 --- a/arch/arm64/mm/fault.c +++ b/arch/arm64/mm/fault.c @@ -810,13 +810,12 @@ void __init hook_debug_fault_code(int nr, debug_fault_info[nr].name = name; } -asmlinkage int __exception do_debug_exception(unsigned long addr_if_watchpoint, - unsigned int esr, - struct pt_regs *regs) +asmlinkage void __exception do_debug_exception(unsigned long addr_if_watchpoint, + unsigned int esr, + struct pt_regs *regs) { const struct fault_info *inf = esr_to_debug_fault_info(esr); unsigned long pc = instruction_pointer(regs); - int rv; /* * Tell lockdep we disabled irqs in entry.S. Do nothing if they were @@ -828,17 +827,12 @@ asmlinkage int __exception do_debug_exception(unsigned long addr_if_watchpoint, if (user_mode(regs) && !is_ttbr0_addr(pc)) arm64_apply_bp_hardening(); - if (!inf->fn(addr_if_watchpoint, esr, regs)) { - rv = 1; - } else { + if (inf->fn(addr_if_watchpoint, esr, regs)) { arm64_notify_die(inf->name, regs, inf->sig, inf->code, (void __user *)pc, esr); - rv = 0; } if (interrupts_enabled(regs)) trace_hardirqs_on(); - - return rv; } NOKPROBE_SYMBOL(do_debug_exception); -- cgit v1.2.3-59-g8ed1b From 5a9132add862f446d5f0d7fa7468887108b5898a Mon Sep 17 00:00:00 2001 From: Will Deacon Date: Mon, 25 Feb 2019 12:42:26 +0000 Subject: arm64: debug: Rename addr parameter for non-watchpoint exception hooks Since the 'addr' parameter contains an UNKNOWN value for non-watchpoint debug exceptions, rename it to 'unused' for those hooks so we don't get tempted to use it in the future. Reviewed-by: Mark Rutland Signed-off-by: Will Deacon --- arch/arm64/kernel/debug-monitors.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/arch/arm64/kernel/debug-monitors.c b/arch/arm64/kernel/debug-monitors.c index d7bb6aefae0a..c4c263d0cf0f 100644 --- a/arch/arm64/kernel/debug-monitors.c +++ b/arch/arm64/kernel/debug-monitors.c @@ -222,7 +222,7 @@ static void send_user_sigtrap(int si_code) "User debug trap"); } -static int single_step_handler(unsigned long addr, unsigned int esr, +static int single_step_handler(unsigned long unused, unsigned int esr, struct pt_regs *regs) { bool handler_found = false; @@ -302,7 +302,7 @@ static int call_break_hook(struct pt_regs *regs, unsigned int esr) } NOKPROBE_SYMBOL(call_break_hook); -static int brk_handler(unsigned long addr, unsigned int esr, +static int brk_handler(unsigned long unused, unsigned int esr, struct pt_regs *regs) { bool handler_found = false; -- cgit v1.2.3-59-g8ed1b From cb764a69fa41179fb222b53b1a33a9d7373f9249 Mon Sep 17 00:00:00 2001 From: Will Deacon Date: Mon, 25 Feb 2019 13:22:17 +0000 Subject: arm64: debug: Remove meaningless comment The comment next to the definition of our 'break_hook' list head is at best wrong but mainly just meaningless. Rip it out. Reviewed-by: Mark Rutland Signed-off-by: Will Deacon --- arch/arm64/kernel/debug-monitors.c | 5 ----- 1 file changed, 5 deletions(-) diff --git a/arch/arm64/kernel/debug-monitors.c b/arch/arm64/kernel/debug-monitors.c index c4c263d0cf0f..744229d10ca8 100644 --- a/arch/arm64/kernel/debug-monitors.c +++ b/arch/arm64/kernel/debug-monitors.c @@ -264,11 +264,6 @@ static int single_step_handler(unsigned long unused, unsigned int esr, } NOKPROBE_SYMBOL(single_step_handler); -/* - * Breakpoint handler is re-entrant as another breakpoint can - * hit within breakpoint handler, especically in kprobes. - * Use reader/writer locks instead of plain spinlock. - */ static LIST_HEAD(break_hook); static DEFINE_SPINLOCK(break_hook_lock); -- cgit v1.2.3-59-g8ed1b From 26a04d84bc5311d7785b229b353f327e866ab61a Mon Sep 17 00:00:00 2001 From: Will Deacon Date: Tue, 26 Feb 2019 12:52:47 +0000 Subject: arm64: debug: Separate debug hooks based on target exception level Mixing kernel and user debug hooks together is highly error-prone as it relies on all of the hooks to figure out whether the exception came from kernel or user, and then to act accordingly. Make our debug hook code a little more robust by maintaining separate hook lists for user and kernel, with separate registration functions to force callers to be explicit about the exception levels that they care about. Reviewed-by: Mark Rutland Signed-off-by: Will Deacon --- arch/arm64/include/asm/brk-imm.h | 1 + arch/arm64/include/asm/debug-monitors.h | 18 ++++--- arch/arm64/kernel/debug-monitors.c | 85 +++++++++++++++++++++++---------- arch/arm64/kernel/kgdb.c | 22 ++++----- arch/arm64/kernel/probes/uprobes.c | 7 ++- arch/arm64/kernel/traps.c | 20 ++++---- 6 files changed, 95 insertions(+), 58 deletions(-) diff --git a/arch/arm64/include/asm/brk-imm.h b/arch/arm64/include/asm/brk-imm.h index 2945fe6cd863..fec9e1384641 100644 --- a/arch/arm64/include/asm/brk-imm.h +++ b/arch/arm64/include/asm/brk-imm.h @@ -23,5 +23,6 @@ #define KGDB_COMPILED_DBG_BRK_IMM 0x401 #define BUG_BRK_IMM 0x800 #define KASAN_BRK_IMM 0x900 +#define KASAN_BRK_MASK 0x0ff #endif diff --git a/arch/arm64/include/asm/debug-monitors.h b/arch/arm64/include/asm/debug-monitors.h index a44cf5225429..7d37cfa5cc16 100644 --- a/arch/arm64/include/asm/debug-monitors.h +++ b/arch/arm64/include/asm/debug-monitors.h @@ -94,18 +94,24 @@ struct step_hook { int (*fn)(struct pt_regs *regs, unsigned int esr); }; -void register_step_hook(struct step_hook *hook); -void unregister_step_hook(struct step_hook *hook); +void register_user_step_hook(struct step_hook *hook); +void unregister_user_step_hook(struct step_hook *hook); + +void register_kernel_step_hook(struct step_hook *hook); +void unregister_kernel_step_hook(struct step_hook *hook); struct break_hook { struct list_head node; - u32 esr_val; - u32 esr_mask; int (*fn)(struct pt_regs *regs, unsigned int esr); + u16 imm; + u16 mask; /* These bits are ignored when comparing with imm */ }; -void register_break_hook(struct break_hook *hook); -void unregister_break_hook(struct break_hook *hook); +void register_user_break_hook(struct break_hook *hook); +void unregister_user_break_hook(struct break_hook *hook); + +void register_kernel_break_hook(struct break_hook *hook); +void unregister_kernel_break_hook(struct break_hook *hook); u8 debug_monitors_arch(void); diff --git a/arch/arm64/kernel/debug-monitors.c b/arch/arm64/kernel/debug-monitors.c index 744229d10ca8..9b3fd7fa5b43 100644 --- a/arch/arm64/kernel/debug-monitors.c +++ b/arch/arm64/kernel/debug-monitors.c @@ -163,25 +163,46 @@ static void clear_regs_spsr_ss(struct pt_regs *regs) } NOKPROBE_SYMBOL(clear_regs_spsr_ss); -/* EL1 Single Step Handler hooks */ -static LIST_HEAD(step_hook); -static DEFINE_SPINLOCK(step_hook_lock); +static DEFINE_SPINLOCK(debug_hook_lock); +static LIST_HEAD(user_step_hook); +static LIST_HEAD(kernel_step_hook); -void register_step_hook(struct step_hook *hook) +static void register_debug_hook(struct list_head *node, struct list_head *list) { - spin_lock(&step_hook_lock); - list_add_rcu(&hook->node, &step_hook); - spin_unlock(&step_hook_lock); + spin_lock(&debug_hook_lock); + list_add_rcu(node, list); + spin_unlock(&debug_hook_lock); + } -void unregister_step_hook(struct step_hook *hook) +static void unregister_debug_hook(struct list_head *node) { - spin_lock(&step_hook_lock); - list_del_rcu(&hook->node); - spin_unlock(&step_hook_lock); + spin_lock(&debug_hook_lock); + list_del_rcu(node); + spin_unlock(&debug_hook_lock); synchronize_rcu(); } +void register_user_step_hook(struct step_hook *hook) +{ + register_debug_hook(&hook->node, &user_step_hook); +} + +void unregister_user_step_hook(struct step_hook *hook) +{ + unregister_debug_hook(&hook->node); +} + +void register_kernel_step_hook(struct step_hook *hook) +{ + register_debug_hook(&hook->node, &kernel_step_hook); +} + +void unregister_kernel_step_hook(struct step_hook *hook) +{ + unregister_debug_hook(&hook->node); +} + /* * Call registered single step handlers * There is no Syndrome info to check for determining the handler. @@ -191,11 +212,14 @@ void unregister_step_hook(struct step_hook *hook) static int call_step_hook(struct pt_regs *regs, unsigned int esr) { struct step_hook *hook; + struct list_head *list; int retval = DBG_HOOK_ERROR; + list = user_mode(regs) ? &user_step_hook : &kernel_step_hook; + rcu_read_lock(); - list_for_each_entry_rcu(hook, &step_hook, node) { + list_for_each_entry_rcu(hook, list, node) { retval = hook->fn(regs, esr); if (retval == DBG_HOOK_HANDLED) break; @@ -264,33 +288,44 @@ static int single_step_handler(unsigned long unused, unsigned int esr, } NOKPROBE_SYMBOL(single_step_handler); -static LIST_HEAD(break_hook); -static DEFINE_SPINLOCK(break_hook_lock); +static LIST_HEAD(user_break_hook); +static LIST_HEAD(kernel_break_hook); -void register_break_hook(struct break_hook *hook) +void register_user_break_hook(struct break_hook *hook) { - spin_lock(&break_hook_lock); - list_add_rcu(&hook->node, &break_hook); - spin_unlock(&break_hook_lock); + register_debug_hook(&hook->node, &user_break_hook); } -void unregister_break_hook(struct break_hook *hook) +void unregister_user_break_hook(struct break_hook *hook) { - spin_lock(&break_hook_lock); - list_del_rcu(&hook->node); - spin_unlock(&break_hook_lock); - synchronize_rcu(); + unregister_debug_hook(&hook->node); +} + +void register_kernel_break_hook(struct break_hook *hook) +{ + register_debug_hook(&hook->node, &kernel_break_hook); +} + +void unregister_kernel_break_hook(struct break_hook *hook) +{ + unregister_debug_hook(&hook->node); } static int call_break_hook(struct pt_regs *regs, unsigned int esr) { struct break_hook *hook; + struct list_head *list; int (*fn)(struct pt_regs *regs, unsigned int esr) = NULL; + list = user_mode(regs) ? &user_break_hook : &kernel_break_hook; + rcu_read_lock(); - list_for_each_entry_rcu(hook, &break_hook, node) - if ((esr & hook->esr_mask) == hook->esr_val) + list_for_each_entry_rcu(hook, list, node) { + unsigned int comment = esr & BRK64_ESR_MASK; + + if ((comment & ~hook->mask) == hook->imm) fn = hook->fn; + } rcu_read_unlock(); return fn ? fn(regs, esr) : DBG_HOOK_ERROR; diff --git a/arch/arm64/kernel/kgdb.c b/arch/arm64/kernel/kgdb.c index 691854b77c7f..4c01f299aeb2 100644 --- a/arch/arm64/kernel/kgdb.c +++ b/arch/arm64/kernel/kgdb.c @@ -275,15 +275,13 @@ static int kgdb_step_brk_fn(struct pt_regs *regs, unsigned int esr) NOKPROBE_SYMBOL(kgdb_step_brk_fn); static struct break_hook kgdb_brkpt_hook = { - .esr_mask = 0xffffffff, - .esr_val = (u32)ESR_ELx_VAL_BRK64(KGDB_DYN_DBG_BRK_IMM), - .fn = kgdb_brk_fn + .fn = kgdb_brk_fn, + .imm = KGDB_DYN_DBG_BRK_IMM, }; static struct break_hook kgdb_compiled_brkpt_hook = { - .esr_mask = 0xffffffff, - .esr_val = (u32)ESR_ELx_VAL_BRK64(KGDB_COMPILED_DBG_BRK_IMM), - .fn = kgdb_compiled_brk_fn + .fn = kgdb_compiled_brk_fn, + .imm = KGDB_COMPILED_DBG_BRK_IMM, }; static struct step_hook kgdb_step_hook = { @@ -332,9 +330,9 @@ int kgdb_arch_init(void) if (ret != 0) return ret; - register_break_hook(&kgdb_brkpt_hook); - register_break_hook(&kgdb_compiled_brkpt_hook); - register_step_hook(&kgdb_step_hook); + register_kernel_break_hook(&kgdb_brkpt_hook); + register_kernel_break_hook(&kgdb_compiled_brkpt_hook); + register_kernel_step_hook(&kgdb_step_hook); return 0; } @@ -345,9 +343,9 @@ int kgdb_arch_init(void) */ void kgdb_arch_exit(void) { - unregister_break_hook(&kgdb_brkpt_hook); - unregister_break_hook(&kgdb_compiled_brkpt_hook); - unregister_step_hook(&kgdb_step_hook); + unregister_kernel_break_hook(&kgdb_brkpt_hook); + unregister_kernel_break_hook(&kgdb_compiled_brkpt_hook); + unregister_kernel_step_hook(&kgdb_step_hook); unregister_die_notifier(&kgdb_notifier); } diff --git a/arch/arm64/kernel/probes/uprobes.c b/arch/arm64/kernel/probes/uprobes.c index 636ca0119c0e..7d6ea88796a6 100644 --- a/arch/arm64/kernel/probes/uprobes.c +++ b/arch/arm64/kernel/probes/uprobes.c @@ -195,8 +195,7 @@ static int uprobe_single_step_handler(struct pt_regs *regs, /* uprobe breakpoint handler hook */ static struct break_hook uprobes_break_hook = { - .esr_mask = BRK64_ESR_MASK, - .esr_val = BRK64_ESR_UPROBES, + .imm = BRK64_ESR_UPROBES, .fn = uprobe_breakpoint_handler, }; @@ -207,8 +206,8 @@ static struct step_hook uprobes_step_hook = { static int __init arch_init_uprobes(void) { - register_break_hook(&uprobes_break_hook); - register_step_hook(&uprobes_step_hook); + register_user_break_hook(&uprobes_break_hook); + register_user_step_hook(&uprobes_step_hook); return 0; } diff --git a/arch/arm64/kernel/traps.c b/arch/arm64/kernel/traps.c index 8ad119c3f665..85fdc3d7c556 100644 --- a/arch/arm64/kernel/traps.c +++ b/arch/arm64/kernel/traps.c @@ -969,9 +969,8 @@ static int bug_handler(struct pt_regs *regs, unsigned int esr) } static struct break_hook bug_break_hook = { - .esr_val = 0xf2000000 | BUG_BRK_IMM, - .esr_mask = 0xffffffff, .fn = bug_handler, + .imm = BUG_BRK_IMM, }; #ifdef CONFIG_KASAN_SW_TAGS @@ -1016,13 +1015,10 @@ static int kasan_handler(struct pt_regs *regs, unsigned int esr) return DBG_HOOK_HANDLED; } -#define KASAN_ESR_VAL (0xf2000000 | KASAN_BRK_IMM) -#define KASAN_ESR_MASK 0xffffff00 - static struct break_hook kasan_break_hook = { - .esr_val = KASAN_ESR_VAL, - .esr_mask = KASAN_ESR_MASK, - .fn = kasan_handler, + .fn = kasan_handler, + .imm = KASAN_BRK_IMM, + .mask = KASAN_BRK_MASK, }; #endif @@ -1034,7 +1030,9 @@ int __init early_brk64(unsigned long addr, unsigned int esr, struct pt_regs *regs) { #ifdef CONFIG_KASAN_SW_TAGS - if ((esr & KASAN_ESR_MASK) == KASAN_ESR_VAL) + unsigned int comment = esr & BRK64_ESR_MASK; + + if ((comment & ~KASAN_BRK_MASK) == KASAN_BRK_IMM) return kasan_handler(regs, esr) != DBG_HOOK_HANDLED; #endif return bug_handler(regs, esr) != DBG_HOOK_HANDLED; @@ -1043,8 +1041,8 @@ int __init early_brk64(unsigned long addr, unsigned int esr, /* This registration must happen early, before debug_traps_init(). */ void __init trap_init(void) { - register_break_hook(&bug_break_hook); + register_kernel_break_hook(&bug_break_hook); #ifdef CONFIG_KASAN_SW_TAGS - register_break_hook(&kasan_break_hook); + register_kernel_break_hook(&kasan_break_hook); #endif } -- cgit v1.2.3-59-g8ed1b From a22d570aee77ae626e4d3532478ae8058a24bdb3 Mon Sep 17 00:00:00 2001 From: Will Deacon Date: Tue, 26 Feb 2019 14:35:00 +0000 Subject: arm64: kprobes: Avoid calling kprobes debug handlers explicitly Kprobes bypasses our debug hook registration code so that it doesn't get tangled up with recursive debug exceptions from things like lockdep: http://lists.infradead.org/pipermail/linux-arm-kernel/2015-February/324385.html However, since then, (a) the hook list has become RCU protected and (b) the kprobes hooks were found not to filter out exceptions from userspace correctly. On top of that, the step handler is invoked directly from single_step_handler(), which *does* use the debug hook list, so it's clearly not the end of the world. For now, have kprobes use the debug hook registration API like everybody else. We can revisit this in the future if this is found to limit coverage significantly. Reviewed-by: Mark Rutland Signed-off-by: Will Deacon --- arch/arm64/include/asm/kprobes.h | 2 -- arch/arm64/kernel/debug-monitors.c | 10 ---------- arch/arm64/kernel/probes/kprobes.c | 16 ++++++++++++++-- 3 files changed, 14 insertions(+), 14 deletions(-) diff --git a/arch/arm64/include/asm/kprobes.h b/arch/arm64/include/asm/kprobes.h index d5a44cf859e9..21721fbf44e7 100644 --- a/arch/arm64/include/asm/kprobes.h +++ b/arch/arm64/include/asm/kprobes.h @@ -54,8 +54,6 @@ void arch_remove_kprobe(struct kprobe *); int kprobe_fault_handler(struct pt_regs *regs, unsigned int fsr); int kprobe_exceptions_notify(struct notifier_block *self, unsigned long val, void *data); -int kprobe_breakpoint_handler(struct pt_regs *regs, unsigned int esr); -int kprobe_single_step_handler(struct pt_regs *regs, unsigned int esr); void kretprobe_trampoline(void); void __kprobes *trampoline_probe_handler(struct pt_regs *regs); diff --git a/arch/arm64/kernel/debug-monitors.c b/arch/arm64/kernel/debug-monitors.c index 9b3fd7fa5b43..f4d8cda8830d 100644 --- a/arch/arm64/kernel/debug-monitors.c +++ b/arch/arm64/kernel/debug-monitors.c @@ -258,10 +258,6 @@ static int single_step_handler(unsigned long unused, unsigned int esr, if (!reinstall_suspended_bps(regs)) return 0; -#ifdef CONFIG_KPROBES - if (kprobe_single_step_handler(regs, esr) == DBG_HOOK_HANDLED) - handler_found = true; -#endif if (!handler_found && call_step_hook(regs, esr) == DBG_HOOK_HANDLED) handler_found = true; @@ -337,12 +333,6 @@ static int brk_handler(unsigned long unused, unsigned int esr, { bool handler_found = false; -#ifdef CONFIG_KPROBES - if ((esr & BRK64_ESR_MASK) == BRK64_ESR_KPROBES) { - if (kprobe_breakpoint_handler(regs, esr) == DBG_HOOK_HANDLED) - handler_found = true; - } -#endif if (!handler_found && call_break_hook(regs, esr) == DBG_HOOK_HANDLED) handler_found = true; diff --git a/arch/arm64/kernel/probes/kprobes.c b/arch/arm64/kernel/probes/kprobes.c index 7a679caf4585..baf97f47aec0 100644 --- a/arch/arm64/kernel/probes/kprobes.c +++ b/arch/arm64/kernel/probes/kprobes.c @@ -439,7 +439,7 @@ kprobe_ss_hit(struct kprobe_ctlblk *kcb, unsigned long addr) return DBG_HOOK_ERROR; } -int __kprobes +static int __kprobes kprobe_single_step_handler(struct pt_regs *regs, unsigned int esr) { struct kprobe_ctlblk *kcb = get_kprobe_ctlblk(); @@ -461,7 +461,11 @@ kprobe_single_step_handler(struct pt_regs *regs, unsigned int esr) return retval; } -int __kprobes +static struct step_hook kprobes_step_hook = { + .fn = kprobe_single_step_handler, +}; + +static int __kprobes kprobe_breakpoint_handler(struct pt_regs *regs, unsigned int esr) { if (user_mode(regs)) @@ -471,6 +475,11 @@ kprobe_breakpoint_handler(struct pt_regs *regs, unsigned int esr) return DBG_HOOK_HANDLED; } +static struct break_hook kprobes_break_hook = { + .imm = BRK64_ESR_KPROBES, + .fn = kprobe_breakpoint_handler, +}; + /* * Provide a blacklist of symbols identifying ranges which cannot be kprobed. * This blacklist is exposed to userspace via debugfs (kprobes/blacklist). @@ -599,5 +608,8 @@ int __kprobes arch_trampoline_kprobe(struct kprobe *p) int __init arch_init_kprobes(void) { + register_kernel_break_hook(&kprobes_break_hook); + register_kernel_step_hook(&kprobes_step_hook); + return 0; } -- cgit v1.2.3-59-g8ed1b From fb610f2a2006322bebeb30408fefce6a01df09ea Mon Sep 17 00:00:00 2001 From: Will Deacon Date: Tue, 26 Feb 2019 15:37:09 +0000 Subject: arm64: debug: Remove redundant user_mode(regs) checks from debug handlers Now that the debug hook dispatching code takes the triggering exception level into account, there's no need for the hooks themselves to poke around with user_mode(regs). Reviewed-by: Mark Rutland Signed-off-by: Will Deacon --- arch/arm64/kernel/kgdb.c | 8 +------- arch/arm64/kernel/probes/kprobes.c | 6 ------ arch/arm64/kernel/probes/uprobes.c | 12 ++++-------- arch/arm64/kernel/traps.c | 6 ------ 4 files changed, 5 insertions(+), 27 deletions(-) diff --git a/arch/arm64/kernel/kgdb.c b/arch/arm64/kernel/kgdb.c index 4c01f299aeb2..30853d5b7859 100644 --- a/arch/arm64/kernel/kgdb.c +++ b/arch/arm64/kernel/kgdb.c @@ -244,9 +244,6 @@ int kgdb_arch_handle_exception(int exception_vector, int signo, static int kgdb_brk_fn(struct pt_regs *regs, unsigned int esr) { - if (user_mode(regs)) - return DBG_HOOK_ERROR; - kgdb_handle_exception(1, SIGTRAP, 0, regs); return DBG_HOOK_HANDLED; } @@ -254,9 +251,6 @@ NOKPROBE_SYMBOL(kgdb_brk_fn) static int kgdb_compiled_brk_fn(struct pt_regs *regs, unsigned int esr) { - if (user_mode(regs)) - return DBG_HOOK_ERROR; - compiled_break = 1; kgdb_handle_exception(1, SIGTRAP, 0, regs); @@ -266,7 +260,7 @@ NOKPROBE_SYMBOL(kgdb_compiled_brk_fn); static int kgdb_step_brk_fn(struct pt_regs *regs, unsigned int esr) { - if (user_mode(regs) || !kgdb_single_step) + if (!kgdb_single_step) return DBG_HOOK_ERROR; kgdb_handle_exception(1, SIGTRAP, 0, regs); diff --git a/arch/arm64/kernel/probes/kprobes.c b/arch/arm64/kernel/probes/kprobes.c index baf97f47aec0..000f32d1a756 100644 --- a/arch/arm64/kernel/probes/kprobes.c +++ b/arch/arm64/kernel/probes/kprobes.c @@ -445,9 +445,6 @@ kprobe_single_step_handler(struct pt_regs *regs, unsigned int esr) struct kprobe_ctlblk *kcb = get_kprobe_ctlblk(); int retval; - if (user_mode(regs)) - return DBG_HOOK_ERROR; - /* return error if this is not our step */ retval = kprobe_ss_hit(kcb, instruction_pointer(regs)); @@ -468,9 +465,6 @@ static struct step_hook kprobes_step_hook = { static int __kprobes kprobe_breakpoint_handler(struct pt_regs *regs, unsigned int esr) { - if (user_mode(regs)) - return DBG_HOOK_ERROR; - kprobe_handler(regs); return DBG_HOOK_HANDLED; } diff --git a/arch/arm64/kernel/probes/uprobes.c b/arch/arm64/kernel/probes/uprobes.c index 7d6ea88796a6..f37ab9567676 100644 --- a/arch/arm64/kernel/probes/uprobes.c +++ b/arch/arm64/kernel/probes/uprobes.c @@ -171,7 +171,7 @@ int arch_uprobe_exception_notify(struct notifier_block *self, static int uprobe_breakpoint_handler(struct pt_regs *regs, unsigned int esr) { - if (user_mode(regs) && uprobe_pre_sstep_notifier(regs)) + if (uprobe_pre_sstep_notifier(regs)) return DBG_HOOK_HANDLED; return DBG_HOOK_ERROR; @@ -182,13 +182,9 @@ static int uprobe_single_step_handler(struct pt_regs *regs, { struct uprobe_task *utask = current->utask; - if (user_mode(regs)) { - WARN_ON(utask && - (instruction_pointer(regs) != utask->xol_vaddr + 4)); - - if (uprobe_post_sstep_notifier(regs)) - return DBG_HOOK_HANDLED; - } + WARN_ON(utask && (instruction_pointer(regs) != utask->xol_vaddr + 4)); + if (uprobe_post_sstep_notifier(regs)) + return DBG_HOOK_HANDLED; return DBG_HOOK_ERROR; } diff --git a/arch/arm64/kernel/traps.c b/arch/arm64/kernel/traps.c index 85fdc3d7c556..091379744d2f 100644 --- a/arch/arm64/kernel/traps.c +++ b/arch/arm64/kernel/traps.c @@ -947,9 +947,6 @@ int is_valid_bugaddr(unsigned long addr) static int bug_handler(struct pt_regs *regs, unsigned int esr) { - if (user_mode(regs)) - return DBG_HOOK_ERROR; - switch (report_bug(regs->pc, regs)) { case BUG_TRAP_TYPE_BUG: die("Oops - BUG", regs, 0); @@ -988,9 +985,6 @@ static int kasan_handler(struct pt_regs *regs, unsigned int esr) u64 addr = regs->regs[0]; u64 pc = regs->pc; - if (user_mode(regs)) - return DBG_HOOK_ERROR; - kasan_report(addr, size, write, pc); /* -- cgit v1.2.3-59-g8ed1b From 453b7740ebfda2d84be7fb583c54f0c91c592869 Mon Sep 17 00:00:00 2001 From: Will Deacon Date: Tue, 26 Feb 2019 15:06:42 +0000 Subject: arm64: probes: Move magic BRK values into brk-imm.h kprobes and uprobes reserve some BRK immediates for installing their probes. Define these along with the other reservations in brk-imm.h and rename the ESR definitions to be consistent with the others that we already have. Reviewed-by: Mark Rutland Signed-off-by: Will Deacon --- arch/arm64/include/asm/brk-imm.h | 4 ++++ arch/arm64/include/asm/debug-monitors.h | 7 ++----- arch/arm64/include/asm/esr.h | 4 +--- arch/arm64/kernel/debug-monitors.c | 2 +- arch/arm64/kernel/probes/kprobes.c | 2 +- arch/arm64/kernel/probes/uprobes.c | 2 +- arch/arm64/kernel/traps.c | 2 +- 7 files changed, 11 insertions(+), 12 deletions(-) diff --git a/arch/arm64/include/asm/brk-imm.h b/arch/arm64/include/asm/brk-imm.h index fec9e1384641..d84294064e6a 100644 --- a/arch/arm64/include/asm/brk-imm.h +++ b/arch/arm64/include/asm/brk-imm.h @@ -11,6 +11,8 @@ /* * #imm16 values used for BRK instruction generation + * 0x004: for installing kprobes + * 0x005: for installing uprobes * Allowed values for kgdb are 0x400 - 0x7ff * 0x100: for triggering a fault on purpose (reserved) * 0x400: for dynamic BRK instruction @@ -18,6 +20,8 @@ * 0x800: kernel-mode BUG() and WARN() traps * 0x9xx: tag-based KASAN trap (allowed values 0x900 - 0x9ff) */ +#define KPROBES_BRK_IMM 0x004 +#define UPROBES_BRK_IMM 0x005 #define FAULT_BRK_IMM 0x100 #define KGDB_DYN_DBG_BRK_IMM 0x400 #define KGDB_COMPILED_DBG_BRK_IMM 0x401 diff --git a/arch/arm64/include/asm/debug-monitors.h b/arch/arm64/include/asm/debug-monitors.h index 7d37cfa5cc16..0679f781696d 100644 --- a/arch/arm64/include/asm/debug-monitors.h +++ b/arch/arm64/include/asm/debug-monitors.h @@ -65,12 +65,9 @@ #define CACHE_FLUSH_IS_SAFE 1 /* kprobes BRK opcodes with ESR encoding */ -#define BRK64_ESR_MASK 0xFFFF -#define BRK64_ESR_KPROBES 0x0004 -#define BRK64_OPCODE_KPROBES (AARCH64_BREAK_MON | (BRK64_ESR_KPROBES << 5)) +#define BRK64_OPCODE_KPROBES (AARCH64_BREAK_MON | (KPROBES_BRK_IMM << 5)) /* uprobes BRK opcodes with ESR encoding */ -#define BRK64_ESR_UPROBES 0x0005 -#define BRK64_OPCODE_UPROBES (AARCH64_BREAK_MON | (BRK64_ESR_UPROBES << 5)) +#define BRK64_OPCODE_UPROBES (AARCH64_BREAK_MON | (UPROBES_BRK_IMM << 5)) /* AArch32 */ #define DBG_ESR_EVT_BKPT 0x4 diff --git a/arch/arm64/include/asm/esr.h b/arch/arm64/include/asm/esr.h index 52233f00d53d..3541720189c9 100644 --- a/arch/arm64/include/asm/esr.h +++ b/arch/arm64/include/asm/esr.h @@ -156,9 +156,7 @@ ESR_ELx_WFx_ISS_WFI) /* BRK instruction trap from AArch64 state */ -#define ESR_ELx_VAL_BRK64(imm) \ - ((ESR_ELx_EC_BRK64 << ESR_ELx_EC_SHIFT) | ESR_ELx_IL | \ - ((imm) & 0xffff)) +#define ESR_ELx_BRK64_ISS_COMMENT_MASK 0xffff /* ISS field definitions for System instruction traps */ #define ESR_ELx_SYS64_ISS_RES0_SHIFT 22 diff --git a/arch/arm64/kernel/debug-monitors.c b/arch/arm64/kernel/debug-monitors.c index f4d8cda8830d..2692a0a27cf3 100644 --- a/arch/arm64/kernel/debug-monitors.c +++ b/arch/arm64/kernel/debug-monitors.c @@ -317,7 +317,7 @@ static int call_break_hook(struct pt_regs *regs, unsigned int esr) rcu_read_lock(); list_for_each_entry_rcu(hook, list, node) { - unsigned int comment = esr & BRK64_ESR_MASK; + unsigned int comment = esr & ESR_ELx_BRK64_ISS_COMMENT_MASK; if ((comment & ~hook->mask) == hook->imm) fn = hook->fn; diff --git a/arch/arm64/kernel/probes/kprobes.c b/arch/arm64/kernel/probes/kprobes.c index 000f32d1a756..2509fcb6d404 100644 --- a/arch/arm64/kernel/probes/kprobes.c +++ b/arch/arm64/kernel/probes/kprobes.c @@ -470,7 +470,7 @@ kprobe_breakpoint_handler(struct pt_regs *regs, unsigned int esr) } static struct break_hook kprobes_break_hook = { - .imm = BRK64_ESR_KPROBES, + .imm = KPROBES_BRK_IMM, .fn = kprobe_breakpoint_handler, }; diff --git a/arch/arm64/kernel/probes/uprobes.c b/arch/arm64/kernel/probes/uprobes.c index f37ab9567676..605945eac1f8 100644 --- a/arch/arm64/kernel/probes/uprobes.c +++ b/arch/arm64/kernel/probes/uprobes.c @@ -191,7 +191,7 @@ static int uprobe_single_step_handler(struct pt_regs *regs, /* uprobe breakpoint handler hook */ static struct break_hook uprobes_break_hook = { - .imm = BRK64_ESR_UPROBES, + .imm = UPROBES_BRK_IMM, .fn = uprobe_breakpoint_handler, }; diff --git a/arch/arm64/kernel/traps.c b/arch/arm64/kernel/traps.c index 091379744d2f..74598396e0bf 100644 --- a/arch/arm64/kernel/traps.c +++ b/arch/arm64/kernel/traps.c @@ -1024,7 +1024,7 @@ int __init early_brk64(unsigned long addr, unsigned int esr, struct pt_regs *regs) { #ifdef CONFIG_KASAN_SW_TAGS - unsigned int comment = esr & BRK64_ESR_MASK; + unsigned int comment = esr & ESR_ELx_BRK64_ISS_COMMENT_MASK; if ((comment & ~KASAN_BRK_MASK) == KASAN_BRK_IMM) return kasan_handler(regs, esr) != DBG_HOOK_HANDLED; -- cgit v1.2.3-59-g8ed1b From ab6211c90052435126ad1319e9223b68e154b9f0 Mon Sep 17 00:00:00 2001 From: Will Deacon Date: Tue, 26 Feb 2019 15:39:47 +0000 Subject: arm64: debug: Clean up brk_handler() brk_handler() now looks pretty strange and can be refactored to drop its funny 'handler_found' local variable altogether. Reviewed-by: Mark Rutland Signed-off-by: Will Deacon --- arch/arm64/kernel/debug-monitors.c | 10 ++++------ 1 file changed, 4 insertions(+), 6 deletions(-) diff --git a/arch/arm64/kernel/debug-monitors.c b/arch/arm64/kernel/debug-monitors.c index 2692a0a27cf3..800486cc4823 100644 --- a/arch/arm64/kernel/debug-monitors.c +++ b/arch/arm64/kernel/debug-monitors.c @@ -331,14 +331,12 @@ NOKPROBE_SYMBOL(call_break_hook); static int brk_handler(unsigned long unused, unsigned int esr, struct pt_regs *regs) { - bool handler_found = false; - - if (!handler_found && call_break_hook(regs, esr) == DBG_HOOK_HANDLED) - handler_found = true; + if (call_break_hook(regs, esr) == DBG_HOOK_HANDLED) + return 0; - if (!handler_found && user_mode(regs)) { + if (user_mode(regs)) { send_user_sigtrap(TRAP_BRKPT); - } else if (!handler_found) { + } else { pr_warn("Unexpected kernel BRK exception at EL1\n"); return -EFAULT; } -- cgit v1.2.3-59-g8ed1b From 90292aca9854a2cbd904127337d6fea30f46290c Mon Sep 17 00:00:00 2001 From: Yu Zhao Date: Mon, 11 Mar 2019 18:57:46 -0600 Subject: arm64: mm: use appropriate ctors for page tables For pte page, use pgtable_page_ctor(); for pmd page, use pgtable_pmd_page_ctor(); and for the rest (pud, p4d and pgd), don't use any. For now, we don't select ARCH_ENABLE_SPLIT_PMD_PTLOCK and pgtable_pmd_page_ctor() is a nop. When we do in patch 3, we make sure pmd is not folded so we won't mistakenly call pgtable_pmd_page_ctor() on pud or p4d. Acked-by: Mark Rutland Signed-off-by: Yu Zhao Signed-off-by: Will Deacon --- arch/arm64/mm/mmu.c | 36 ++++++++++++++++++++++++------------ 1 file changed, 24 insertions(+), 12 deletions(-) diff --git a/arch/arm64/mm/mmu.c b/arch/arm64/mm/mmu.c index e97f018ff740..66460c2074f6 100644 --- a/arch/arm64/mm/mmu.c +++ b/arch/arm64/mm/mmu.c @@ -97,7 +97,7 @@ pgprot_t phys_mem_access_prot(struct file *file, unsigned long pfn, } EXPORT_SYMBOL(phys_mem_access_prot); -static phys_addr_t __init early_pgtable_alloc(void) +static phys_addr_t __init early_pgtable_alloc(int shift) { phys_addr_t phys; void *ptr; @@ -174,7 +174,7 @@ static void init_pte(pmd_t *pmdp, unsigned long addr, unsigned long end, static void alloc_init_cont_pte(pmd_t *pmdp, unsigned long addr, unsigned long end, phys_addr_t phys, pgprot_t prot, - phys_addr_t (*pgtable_alloc)(void), + phys_addr_t (*pgtable_alloc)(int), int flags) { unsigned long next; @@ -184,7 +184,7 @@ static void alloc_init_cont_pte(pmd_t *pmdp, unsigned long addr, if (pmd_none(pmd)) { phys_addr_t pte_phys; BUG_ON(!pgtable_alloc); - pte_phys = pgtable_alloc(); + pte_phys = pgtable_alloc(PAGE_SHIFT); __pmd_populate(pmdp, pte_phys, PMD_TYPE_TABLE); pmd = READ_ONCE(*pmdp); } @@ -208,7 +208,7 @@ static void alloc_init_cont_pte(pmd_t *pmdp, unsigned long addr, static void init_pmd(pud_t *pudp, unsigned long addr, unsigned long end, phys_addr_t phys, pgprot_t prot, - phys_addr_t (*pgtable_alloc)(void), int flags) + phys_addr_t (*pgtable_alloc)(int), int flags) { unsigned long next; pmd_t *pmdp; @@ -246,7 +246,7 @@ static void init_pmd(pud_t *pudp, unsigned long addr, unsigned long end, static void alloc_init_cont_pmd(pud_t *pudp, unsigned long addr, unsigned long end, phys_addr_t phys, pgprot_t prot, - phys_addr_t (*pgtable_alloc)(void), int flags) + phys_addr_t (*pgtable_alloc)(int), int flags) { unsigned long next; pud_t pud = READ_ONCE(*pudp); @@ -258,7 +258,7 @@ static void alloc_init_cont_pmd(pud_t *pudp, unsigned long addr, if (pud_none(pud)) { phys_addr_t pmd_phys; BUG_ON(!pgtable_alloc); - pmd_phys = pgtable_alloc(); + pmd_phys = pgtable_alloc(PMD_SHIFT); __pud_populate(pudp, pmd_phys, PUD_TYPE_TABLE); pud = READ_ONCE(*pudp); } @@ -294,7 +294,7 @@ static inline bool use_1G_block(unsigned long addr, unsigned long next, static void alloc_init_pud(pgd_t *pgdp, unsigned long addr, unsigned long end, phys_addr_t phys, pgprot_t prot, - phys_addr_t (*pgtable_alloc)(void), + phys_addr_t (*pgtable_alloc)(int), int flags) { unsigned long next; @@ -304,7 +304,7 @@ static void alloc_init_pud(pgd_t *pgdp, unsigned long addr, unsigned long end, if (pgd_none(pgd)) { phys_addr_t pud_phys; BUG_ON(!pgtable_alloc); - pud_phys = pgtable_alloc(); + pud_phys = pgtable_alloc(PUD_SHIFT); __pgd_populate(pgdp, pud_phys, PUD_TYPE_TABLE); pgd = READ_ONCE(*pgdp); } @@ -345,7 +345,7 @@ static void alloc_init_pud(pgd_t *pgdp, unsigned long addr, unsigned long end, static void __create_pgd_mapping(pgd_t *pgdir, phys_addr_t phys, unsigned long virt, phys_addr_t size, pgprot_t prot, - phys_addr_t (*pgtable_alloc)(void), + phys_addr_t (*pgtable_alloc)(int), int flags) { unsigned long addr, length, end, next; @@ -371,11 +371,23 @@ static void __create_pgd_mapping(pgd_t *pgdir, phys_addr_t phys, } while (pgdp++, addr = next, addr != end); } -static phys_addr_t pgd_pgtable_alloc(void) +static phys_addr_t pgd_pgtable_alloc(int shift) { void *ptr = (void *)__get_free_page(PGALLOC_GFP); - if (!ptr || !pgtable_page_ctor(virt_to_page(ptr))) - BUG(); + BUG_ON(!ptr); + + /* + * Call proper page table ctor in case later we need to + * call core mm functions like apply_to_page_range() on + * this pre-allocated page table. + * + * We don't select ARCH_ENABLE_SPLIT_PMD_PTLOCK if pmd is + * folded, and if so pgtable_pmd_page_ctor() becomes nop. + */ + if (shift == PAGE_SHIFT) + BUG_ON(!pgtable_page_ctor(virt_to_page(ptr))); + else if (shift == PMD_SHIFT) + BUG_ON(!pgtable_pmd_page_ctor(virt_to_page(ptr))); /* Ensure the zeroed page is visible to the page table walker */ dsb(ishst); -- cgit v1.2.3-59-g8ed1b From 369aaab845a65b90ceac2801edd577fae442476c Mon Sep 17 00:00:00 2001 From: Yu Zhao Date: Mon, 11 Mar 2019 18:57:47 -0600 Subject: arm64: mm: don't call page table ctors for init_mm init_mm doesn't require page table lock to be initialized at any level. Add a separate page table allocator for it, and the new one skips page table ctors. The ctors allocate memory when ALLOC_SPLIT_PTLOCKS is set. Not calling them avoids memory leak in case we call pte_free_kernel() on init_mm. Acked-by: Mark Rutland Signed-off-by: Yu Zhao Signed-off-by: Will Deacon --- arch/arm64/mm/mmu.c | 15 +++++++++++++-- 1 file changed, 13 insertions(+), 2 deletions(-) diff --git a/arch/arm64/mm/mmu.c b/arch/arm64/mm/mmu.c index 66460c2074f6..83a221d2d24c 100644 --- a/arch/arm64/mm/mmu.c +++ b/arch/arm64/mm/mmu.c @@ -371,6 +371,16 @@ static void __create_pgd_mapping(pgd_t *pgdir, phys_addr_t phys, } while (pgdp++, addr = next, addr != end); } +static phys_addr_t pgd_kernel_pgtable_alloc(int shift) +{ + void *ptr = (void *)__get_free_page(PGALLOC_GFP); + BUG_ON(!ptr); + + /* Ensure the zeroed page is visible to the page table walker */ + dsb(ishst); + return __pa(ptr); +} + static phys_addr_t pgd_pgtable_alloc(int shift) { void *ptr = (void *)__get_free_page(PGALLOC_GFP); @@ -595,7 +605,7 @@ static int __init map_entry_trampoline(void) /* Map only the text into the trampoline page table */ memset(tramp_pg_dir, 0, PGD_SIZE); __create_pgd_mapping(tramp_pg_dir, pa_start, TRAMP_VALIAS, PAGE_SIZE, - prot, pgd_pgtable_alloc, 0); + prot, pgd_kernel_pgtable_alloc, 0); /* Map both the text and data into the kernel page table */ __set_fixmap(FIX_ENTRY_TRAMP_TEXT, pa_start, prot); @@ -1067,7 +1077,8 @@ int arch_add_memory(int nid, u64 start, u64 size, struct vmem_altmap *altmap, flags = NO_BLOCK_MAPPINGS | NO_CONT_MAPPINGS; __create_pgd_mapping(swapper_pg_dir, start, __phys_to_virt(start), - size, PAGE_KERNEL, pgd_pgtable_alloc, flags); + size, PAGE_KERNEL, pgd_kernel_pgtable_alloc, + flags); return __add_pages(nid, start >> PAGE_SHIFT, size >> PAGE_SHIFT, altmap, want_memblock); -- cgit v1.2.3-59-g8ed1b From 14b94d07572619af896c6d2d83b1196c4041fe19 Mon Sep 17 00:00:00 2001 From: Anshuman Khandual Date: Tue, 12 Mar 2019 18:55:45 +0530 Subject: KVM: ARM: Remove pgtable page standard functions from stage-2 page tables ARM64 standard pgtable functions are going to use pgtable_page_[ctor|dtor] or pgtable_pmd_page_[ctor|dtor] constructs. At present KVM guest stage-2 PUD|PMD|PTE level page tabe pages are allocated with __get_free_page() via mmu_memory_cache_alloc() but released with standard pud|pmd_free() or pte_free_kernel(). These will fail once they start calling into pgtable_ [pmd]_page_dtor() for pages which never originally went through respective constructor functions. Hence convert all stage-2 page table page release functions to call buddy directly while freeing pages. Reviewed-by: Suzuki K Poulose Acked-by: Yu Zhao Acked-by: Marc Zyngier Signed-off-by: Anshuman Khandual Signed-off-by: Will Deacon --- arch/arm/include/asm/stage2_pgtable.h | 4 ++-- arch/arm64/include/asm/stage2_pgtable.h | 4 ++-- virt/kvm/arm/mmu.c | 2 +- 3 files changed, 5 insertions(+), 5 deletions(-) diff --git a/arch/arm/include/asm/stage2_pgtable.h b/arch/arm/include/asm/stage2_pgtable.h index 9e11dce55e06..9587517649bd 100644 --- a/arch/arm/include/asm/stage2_pgtable.h +++ b/arch/arm/include/asm/stage2_pgtable.h @@ -32,14 +32,14 @@ #define stage2_pgd_present(kvm, pgd) pgd_present(pgd) #define stage2_pgd_populate(kvm, pgd, pud) pgd_populate(NULL, pgd, pud) #define stage2_pud_offset(kvm, pgd, address) pud_offset(pgd, address) -#define stage2_pud_free(kvm, pud) pud_free(NULL, pud) +#define stage2_pud_free(kvm, pud) do { } while (0) #define stage2_pud_none(kvm, pud) pud_none(pud) #define stage2_pud_clear(kvm, pud) pud_clear(pud) #define stage2_pud_present(kvm, pud) pud_present(pud) #define stage2_pud_populate(kvm, pud, pmd) pud_populate(NULL, pud, pmd) #define stage2_pmd_offset(kvm, pud, address) pmd_offset(pud, address) -#define stage2_pmd_free(kvm, pmd) pmd_free(NULL, pmd) +#define stage2_pmd_free(kvm, pmd) free_page((unsigned long)pmd) #define stage2_pud_huge(kvm, pud) pud_huge(pud) diff --git a/arch/arm64/include/asm/stage2_pgtable.h b/arch/arm64/include/asm/stage2_pgtable.h index 5412fa40825e..915809e4ac32 100644 --- a/arch/arm64/include/asm/stage2_pgtable.h +++ b/arch/arm64/include/asm/stage2_pgtable.h @@ -119,7 +119,7 @@ static inline pud_t *stage2_pud_offset(struct kvm *kvm, static inline void stage2_pud_free(struct kvm *kvm, pud_t *pud) { if (kvm_stage2_has_pud(kvm)) - pud_free(NULL, pud); + free_page((unsigned long)pud); } static inline bool stage2_pud_table_empty(struct kvm *kvm, pud_t *pudp) @@ -192,7 +192,7 @@ static inline pmd_t *stage2_pmd_offset(struct kvm *kvm, static inline void stage2_pmd_free(struct kvm *kvm, pmd_t *pmd) { if (kvm_stage2_has_pmd(kvm)) - pmd_free(NULL, pmd); + free_page((unsigned long)pmd); } static inline bool stage2_pud_huge(struct kvm *kvm, pud_t pud) diff --git a/virt/kvm/arm/mmu.c b/virt/kvm/arm/mmu.c index 27c958306449..ad90ea3e5558 100644 --- a/virt/kvm/arm/mmu.c +++ b/virt/kvm/arm/mmu.c @@ -189,7 +189,7 @@ static void clear_stage2_pmd_entry(struct kvm *kvm, pmd_t *pmd, phys_addr_t addr VM_BUG_ON(pmd_thp_or_huge(*pmd)); pmd_clear(pmd); kvm_tlb_flush_vmid_ipa(kvm, addr); - pte_free_kernel(NULL, pte_table); + free_page((unsigned long)pte_table); put_page(virt_to_page(pmd)); } -- cgit v1.2.3-59-g8ed1b From 54c8d9119ec85de48fd0707946a5e57df9ad7acf Mon Sep 17 00:00:00 2001 From: Yu Zhao Date: Mon, 11 Mar 2019 18:57:49 -0600 Subject: arm64: mm: enable per pmd page table lock Switch from per mm_struct to per pmd page table lock by enabling ARCH_ENABLE_SPLIT_PMD_PTLOCK. This provides better granularity for large system. I'm not sure if there is contention on mm->page_table_lock. Given the option comes at no cost (apart from initializing more spin locks), why not enable it now. We only do so when pmd is not folded, so we don't mistakenly call pgtable_pmd_page_ctor() on pud or p4d in pgd_pgtable_alloc(). Signed-off-by: Yu Zhao Signed-off-by: Will Deacon --- arch/arm64/Kconfig | 3 +++ arch/arm64/include/asm/pgalloc.h | 12 +++++++++++- arch/arm64/include/asm/tlb.h | 5 ++++- 3 files changed, 18 insertions(+), 2 deletions(-) diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig index 7e34b9eba5de..555af5035592 100644 --- a/arch/arm64/Kconfig +++ b/arch/arm64/Kconfig @@ -889,6 +889,9 @@ config ARCH_WANT_HUGE_PMD_SHARE config ARCH_HAS_CACHE_LINE_SIZE def_bool y +config ARCH_ENABLE_SPLIT_PMD_PTLOCK + def_bool y if PGTABLE_LEVELS > 2 + config SECCOMP bool "Enable seccomp to safely compute untrusted bytecode" ---help--- diff --git a/arch/arm64/include/asm/pgalloc.h b/arch/arm64/include/asm/pgalloc.h index 52fa47c73bf0..dabba4b2c61f 100644 --- a/arch/arm64/include/asm/pgalloc.h +++ b/arch/arm64/include/asm/pgalloc.h @@ -33,12 +33,22 @@ static inline pmd_t *pmd_alloc_one(struct mm_struct *mm, unsigned long addr) { - return (pmd_t *)__get_free_page(PGALLOC_GFP); + struct page *page; + + page = alloc_page(PGALLOC_GFP); + if (!page) + return NULL; + if (!pgtable_pmd_page_ctor(page)) { + __free_page(page); + return NULL; + } + return page_address(page); } static inline void pmd_free(struct mm_struct *mm, pmd_t *pmdp) { BUG_ON((unsigned long)pmdp & (PAGE_SIZE-1)); + pgtable_pmd_page_dtor(virt_to_page(pmdp)); free_page((unsigned long)pmdp); } diff --git a/arch/arm64/include/asm/tlb.h b/arch/arm64/include/asm/tlb.h index 106fdc951b6e..4e3becfed387 100644 --- a/arch/arm64/include/asm/tlb.h +++ b/arch/arm64/include/asm/tlb.h @@ -62,7 +62,10 @@ static inline void __pte_free_tlb(struct mmu_gather *tlb, pgtable_t pte, static inline void __pmd_free_tlb(struct mmu_gather *tlb, pmd_t *pmdp, unsigned long addr) { - tlb_remove_table(tlb, virt_to_page(pmdp)); + struct page *page = virt_to_page(pmdp); + + pgtable_pmd_page_dtor(page); + tlb_remove_table(tlb, page); } #endif -- cgit v1.2.3-59-g8ed1b From 475ba3fc194b6429eebcca0e1cf9917de6fa173e Mon Sep 17 00:00:00 2001 From: Will Deacon Date: Mon, 8 Apr 2019 11:23:48 +0100 Subject: arm64: mm: Consolidate early page table allocation The logic for early allocation of page tables is duplicated between pgd_kernel_pgtable_alloc() and pgd_pgtable_alloc(). Drop the duplication by calling one from the other and renaming pgd_kernel_pgtable_alloc() accordingly. Signed-off-by: Will Deacon --- arch/arm64/mm/mmu.c | 18 +++++++----------- 1 file changed, 7 insertions(+), 11 deletions(-) diff --git a/arch/arm64/mm/mmu.c b/arch/arm64/mm/mmu.c index 83a221d2d24c..ef82312860ac 100644 --- a/arch/arm64/mm/mmu.c +++ b/arch/arm64/mm/mmu.c @@ -371,7 +371,7 @@ static void __create_pgd_mapping(pgd_t *pgdir, phys_addr_t phys, } while (pgdp++, addr = next, addr != end); } -static phys_addr_t pgd_kernel_pgtable_alloc(int shift) +static phys_addr_t __pgd_pgtable_alloc(int shift) { void *ptr = (void *)__get_free_page(PGALLOC_GFP); BUG_ON(!ptr); @@ -383,8 +383,7 @@ static phys_addr_t pgd_kernel_pgtable_alloc(int shift) static phys_addr_t pgd_pgtable_alloc(int shift) { - void *ptr = (void *)__get_free_page(PGALLOC_GFP); - BUG_ON(!ptr); + phys_addr_t pa = __pgd_pgtable_alloc(shift); /* * Call proper page table ctor in case later we need to @@ -395,13 +394,11 @@ static phys_addr_t pgd_pgtable_alloc(int shift) * folded, and if so pgtable_pmd_page_ctor() becomes nop. */ if (shift == PAGE_SHIFT) - BUG_ON(!pgtable_page_ctor(virt_to_page(ptr))); + BUG_ON(!pgtable_page_ctor(phys_to_page(pa))); else if (shift == PMD_SHIFT) - BUG_ON(!pgtable_pmd_page_ctor(virt_to_page(ptr))); + BUG_ON(!pgtable_pmd_page_ctor(phys_to_page(pa))); - /* Ensure the zeroed page is visible to the page table walker */ - dsb(ishst); - return __pa(ptr); + return pa; } /* @@ -605,7 +602,7 @@ static int __init map_entry_trampoline(void) /* Map only the text into the trampoline page table */ memset(tramp_pg_dir, 0, PGD_SIZE); __create_pgd_mapping(tramp_pg_dir, pa_start, TRAMP_VALIAS, PAGE_SIZE, - prot, pgd_kernel_pgtable_alloc, 0); + prot, __pgd_pgtable_alloc, 0); /* Map both the text and data into the kernel page table */ __set_fixmap(FIX_ENTRY_TRAMP_TEXT, pa_start, prot); @@ -1077,8 +1074,7 @@ int arch_add_memory(int nid, u64 start, u64 size, struct vmem_altmap *altmap, flags = NO_BLOCK_MAPPINGS | NO_CONT_MAPPINGS; __create_pgd_mapping(swapper_pg_dir, start, __phys_to_virt(start), - size, PAGE_KERNEL, pgd_kernel_pgtable_alloc, - flags); + size, PAGE_KERNEL, __pgd_pgtable_alloc, flags); return __add_pages(nid, start >> PAGE_SHIFT, size >> PAGE_SHIFT, altmap, want_memblock); -- cgit v1.2.3-59-g8ed1b From 6fda41bf12615ee7c3ddac88155099b1a8cf8d00 Mon Sep 17 00:00:00 2001 From: Jean-Philippe Brucker Date: Mon, 8 Apr 2019 18:17:18 +0100 Subject: arm64: Clear OSDLR_EL1 on CPU boot Some firmwares may reboot CPUs with OS Double Lock set. Make sure that it is unlocked, in order to use debug exceptions. Cc: Signed-off-by: Jean-Philippe Brucker Signed-off-by: Will Deacon --- arch/arm64/kernel/debug-monitors.c | 1 + 1 file changed, 1 insertion(+) diff --git a/arch/arm64/kernel/debug-monitors.c b/arch/arm64/kernel/debug-monitors.c index 800486cc4823..555b6bd2f3d6 100644 --- a/arch/arm64/kernel/debug-monitors.c +++ b/arch/arm64/kernel/debug-monitors.c @@ -135,6 +135,7 @@ NOKPROBE_SYMBOL(disable_debug_monitors); */ static int clear_os_lock(unsigned int cpu) { + write_sysreg(0, osdlr_el1); write_sysreg(0, oslar_el1); isb(); return 0; -- cgit v1.2.3-59-g8ed1b From 827a108e354db633698f0b4a10c1ffd2b1f8d1d0 Mon Sep 17 00:00:00 2001 From: Jean-Philippe Brucker Date: Mon, 8 Apr 2019 18:17:19 +0100 Subject: arm64: Save and restore OSDLR_EL1 across suspend/resume When the CPU comes out of suspend, the firmware may have modified the OS Double Lock Register. Save it in an unused slot of cpu_suspend_ctx, and restore it on resume. Cc: Signed-off-by: Jean-Philippe Brucker Signed-off-by: Will Deacon --- arch/arm64/mm/proc.S | 34 ++++++++++++++++++---------------- 1 file changed, 18 insertions(+), 16 deletions(-) diff --git a/arch/arm64/mm/proc.S b/arch/arm64/mm/proc.S index aa0817c9c4c3..fdd626d34274 100644 --- a/arch/arm64/mm/proc.S +++ b/arch/arm64/mm/proc.S @@ -65,24 +65,25 @@ ENTRY(cpu_do_suspend) mrs x2, tpidr_el0 mrs x3, tpidrro_el0 mrs x4, contextidr_el1 - mrs x5, cpacr_el1 - mrs x6, tcr_el1 - mrs x7, vbar_el1 - mrs x8, mdscr_el1 - mrs x9, oslsr_el1 - mrs x10, sctlr_el1 + mrs x5, osdlr_el1 + mrs x6, cpacr_el1 + mrs x7, tcr_el1 + mrs x8, vbar_el1 + mrs x9, mdscr_el1 + mrs x10, oslsr_el1 + mrs x11, sctlr_el1 alternative_if_not ARM64_HAS_VIRT_HOST_EXTN - mrs x11, tpidr_el1 + mrs x12, tpidr_el1 alternative_else - mrs x11, tpidr_el2 + mrs x12, tpidr_el2 alternative_endif - mrs x12, sp_el0 + mrs x13, sp_el0 stp x2, x3, [x0] - stp x4, xzr, [x0, #16] - stp x5, x6, [x0, #32] - stp x7, x8, [x0, #48] - stp x9, x10, [x0, #64] - stp x11, x12, [x0, #80] + stp x4, x5, [x0, #16] + stp x6, x7, [x0, #32] + stp x8, x9, [x0, #48] + stp x10, x11, [x0, #64] + stp x12, x13, [x0, #80] ret ENDPROC(cpu_do_suspend) @@ -105,8 +106,8 @@ ENTRY(cpu_do_resume) msr cpacr_el1, x6 /* Don't change t0sz here, mask those bits when restoring */ - mrs x5, tcr_el1 - bfi x8, x5, TCR_T0SZ_OFFSET, TCR_TxSZ_WIDTH + mrs x7, tcr_el1 + bfi x8, x7, TCR_T0SZ_OFFSET, TCR_TxSZ_WIDTH msr tcr_el1, x8 msr vbar_el1, x9 @@ -130,6 +131,7 @@ alternative_endif /* * Restore oslsr_el1 by writing oslar_el1 */ + msr osdlr_el1, x5 ubfx x11, x11, #1, #1 msr oslar_el1, x11 reset_pmuserenr_el0 x0 // Disable PMU access from EL0 -- cgit v1.2.3-59-g8ed1b From d263119387de9975d2acba1dfd3392f7c5979c18 Mon Sep 17 00:00:00 2001 From: Vincenzo Frascino Date: Mon, 1 Apr 2019 12:30:14 +0100 Subject: arm64: compat: Reduce address limit Currently, compat tasks running on arm64 can allocate memory up to TASK_SIZE_32 (UL(0x100000000)). This means that mmap() allocations, if we treat them as returning an array, are not compliant with the sections 6.5.8 of the C standard (C99) which states that: "If the expression P points to an element of an array object and the expression Q points to the last element of the same array object, the pointer expression Q+1 compares greater than P". Redefine TASK_SIZE_32 to address the issue. Cc: Catalin Marinas Cc: Will Deacon Cc: Jann Horn Cc: Reported-by: Jann Horn Signed-off-by: Vincenzo Frascino [will: fixed typo in comment] Signed-off-by: Will Deacon --- arch/arm64/include/asm/processor.h | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/arch/arm64/include/asm/processor.h b/arch/arm64/include/asm/processor.h index 5d9ce62bdebd..228af56ece48 100644 --- a/arch/arm64/include/asm/processor.h +++ b/arch/arm64/include/asm/processor.h @@ -57,7 +57,15 @@ #define TASK_SIZE_64 (UL(1) << vabits_user) #ifdef CONFIG_COMPAT +#ifdef CONFIG_ARM64_64K_PAGES +/* + * With CONFIG_ARM64_64K_PAGES enabled, the last page is occupied + * by the compat vectors page. + */ #define TASK_SIZE_32 UL(0x100000000) +#else +#define TASK_SIZE_32 (UL(0x100000000) - PAGE_SIZE) +#endif /* CONFIG_ARM64_64K_PAGES */ #define TASK_SIZE (test_thread_flag(TIF_32BIT) ? \ TASK_SIZE_32 : TASK_SIZE_64) #define TASK_SIZE_OF(tsk) (test_tsk_thread_flag(tsk, TIF_32BIT) ? \ -- cgit v1.2.3-59-g8ed1b From 3d659e7d6513890eb137e7056635004b8b885ce5 Mon Sep 17 00:00:00 2001 From: Raphael Gault Date: Thu, 11 Apr 2019 17:16:46 +0100 Subject: arm64: perf_event: Remove wrongfully used inline The functions armv8pmu_read_counter() and armv8pmu_write_counter() are `static inline` while they are only referenced when assigned to a function pointer field in a `struct arm_pmu` instance. The inline keyword is thus counter intuitive and shouldn't be used. Acked-by: Mark Rutland Signed-off-by: Raphael Gault Signed-off-by: Will Deacon --- arch/arm64/kernel/perf_event.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/arch/arm64/kernel/perf_event.c b/arch/arm64/kernel/perf_event.c index 4addb38bc250..6164d389eed6 100644 --- a/arch/arm64/kernel/perf_event.c +++ b/arch/arm64/kernel/perf_event.c @@ -431,7 +431,7 @@ static inline u64 armv8pmu_read_hw_counter(struct perf_event *event) return val; } -static inline u64 armv8pmu_read_counter(struct perf_event *event) +static u64 armv8pmu_read_counter(struct perf_event *event) { struct arm_pmu *cpu_pmu = to_arm_pmu(event->pmu); struct hw_perf_event *hwc = &event->hw; @@ -468,7 +468,7 @@ static inline void armv8pmu_write_hw_counter(struct perf_event *event, } } -static inline void armv8pmu_write_counter(struct perf_event *event, u64 value) +static void armv8pmu_write_counter(struct perf_event *event, u64 value) { struct arm_pmu *cpu_pmu = to_arm_pmu(event->pmu); struct hw_perf_event *hwc = &event->hw; -- cgit v1.2.3-59-g8ed1b From 691efbedc60d2a7364a90e38882fc762f06f52c4 Mon Sep 17 00:00:00 2001 From: Masahiro Yamada Date: Thu, 11 Apr 2019 18:30:15 +0900 Subject: arm64: vdso: use $(LD) instead of $(CC) to link VDSO We use $(LD) to link vmlinux, modules, decompressors, etc. VDSO is the only exceptional case where $(CC) is used as the linker driver, but I do not know why we need to do so. VDSO uses a special linker script, and does not link standard libraries at all. I changed the Makefile to use $(LD) rather than $(CC). I tested this, and VDSO worked for me. Users will be able to use their favorite linker (e.g. lld instead of of bfd) by passing LD= from the command line. My plan is to rewrite all VDSO Makefiles to use $(LD), then delete cc-ldoption. Signed-off-by: Masahiro Yamada Signed-off-by: Will Deacon --- arch/arm64/kernel/vdso/Makefile | 13 +++---------- 1 file changed, 3 insertions(+), 10 deletions(-) diff --git a/arch/arm64/kernel/vdso/Makefile b/arch/arm64/kernel/vdso/Makefile index a0af6bf6c11b..744b9dbaba03 100644 --- a/arch/arm64/kernel/vdso/Makefile +++ b/arch/arm64/kernel/vdso/Makefile @@ -12,17 +12,12 @@ obj-vdso := gettimeofday.o note.o sigreturn.o targets := $(obj-vdso) vdso.so vdso.so.dbg obj-vdso := $(addprefix $(obj)/, $(obj-vdso)) -ccflags-y := -shared -fno-common -fno-builtin -ccflags-y += -nostdlib -Wl,-soname=linux-vdso.so.1 \ - $(call cc-ldoption, -Wl$(comma)--hash-style=sysv) +ldflags-y := -shared -nostdlib -soname=linux-vdso.so.1 \ + $(call ld-option, --hash-style=sysv) -n -T # Disable gcov profiling for VDSO code GCOV_PROFILE := n -# Workaround for bare-metal (ELF) toolchains that neglect to pass -shared -# down to collect2, resulting in silent corruption of the vDSO image. -ccflags-y += -Wl,-shared - obj-y += vdso.o extra-y += vdso.lds CPPFLAGS_vdso.lds += -P -C -U$(ARCH) @@ -32,7 +27,7 @@ $(obj)/vdso.o : $(obj)/vdso.so # Link rule for the .so file, .lds has to be first $(obj)/vdso.so.dbg: $(obj)/vdso.lds $(obj-vdso) FORCE - $(call if_changed,vdsold) + $(call if_changed,ld) # Strip rule for the .so file $(obj)/%.so: OBJCOPYFLAGS := -S @@ -52,8 +47,6 @@ $(obj-vdso): %.o: %.S FORCE $(call if_changed_dep,vdsoas) # Actual build commands -quiet_cmd_vdsold = VDSOL $@ - cmd_vdsold = $(CC) $(c_flags) -Wl,-n -Wl,-T $(real-prereqs) -o $@ quiet_cmd_vdsoas = VDSOA $@ cmd_vdsoas = $(CC) $(a_flags) -c -o $@ $< -- cgit v1.2.3-59-g8ed1b From a823c35ff2eda73046cc1847326071de350fceda Mon Sep 17 00:00:00 2001 From: Masami Hiramatsu Date: Fri, 12 Apr 2019 23:22:01 +0900 Subject: arm64: ptrace: Add function argument access API Add regs_get_argument() which returns N th argument of the function call. On arm64, it supports up to 8th argument. Note that this chooses most probably assignment, in some case it can be incorrect (e.g. passing data structure or floating point etc.) This enables ftrace kprobe events to access kernel function arguments via $argN syntax. Signed-off-by: Masami Hiramatsu [will: tidied up the comment a bit] Signed-off-by: Will Deacon --- arch/arm64/Kconfig | 1 + arch/arm64/include/asm/ptrace.h | 22 ++++++++++++++++++++++ 2 files changed, 23 insertions(+) diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig index 555af5035592..c383625ec02c 100644 --- a/arch/arm64/Kconfig +++ b/arch/arm64/Kconfig @@ -148,6 +148,7 @@ config ARM64 select HAVE_PERF_REGS select HAVE_PERF_USER_STACK_DUMP select HAVE_REGS_AND_STACK_ACCESS_API + select HAVE_FUNCTION_ARG_ACCESS_API select HAVE_RCU_TABLE_FREE select HAVE_RCU_TABLE_INVALIDATE select HAVE_RSEQ diff --git a/arch/arm64/include/asm/ptrace.h b/arch/arm64/include/asm/ptrace.h index ec60174c8c18..b2de32939ada 100644 --- a/arch/arm64/include/asm/ptrace.h +++ b/arch/arm64/include/asm/ptrace.h @@ -305,6 +305,28 @@ static inline unsigned long regs_return_value(struct pt_regs *regs) return regs->regs[0]; } +/** + * regs_get_kernel_argument() - get Nth function argument in kernel + * @regs: pt_regs of that context + * @n: function argument number (start from 0) + * + * regs_get_argument() returns @n th argument of the function call. + * + * Note that this chooses the most likely register mapping. In very rare + * cases this may not return correct data, for example, if one of the + * function parameters is 16 bytes or bigger. In such cases, we cannot + * get access the parameter correctly and the register assignment of + * subsequent parameters will be shifted. + */ +static inline unsigned long regs_get_kernel_argument(struct pt_regs *regs, + unsigned int n) +{ +#define NR_REG_ARGUMENTS 8 + if (n < NR_REG_ARGUMENTS) + return pt_regs_read_reg(regs, n); + return 0; +} + /* We must avoid circular header include via sched.h */ struct task_struct; int valid_user_regs(struct user_pt_regs *regs, struct task_struct *task); -- cgit v1.2.3-59-g8ed1b From aaba098fe6ce594ae6f963dc041be6307e499f19 Mon Sep 17 00:00:00 2001 From: Andrew Murray Date: Tue, 9 Apr 2019 10:52:40 +0100 Subject: arm64: HWCAP: add support for AT_HWCAP2 As we will exhaust the first 32 bits of AT_HWCAP let's start exposing AT_HWCAP2 to userspace to give us up to 64 caps. Whilst it's possible to use the remaining 32 bits of AT_HWCAP, we prefer to expand into AT_HWCAP2 in order to provide a consistent view to userspace between ILP32 and LP64. However internal to the kernel we prefer to continue to use the full space of elf_hwcap. To reduce complexity and allow for future expansion, we now represent hwcaps in the kernel as ordinals and use a KERNEL_HWCAP_ prefix. This allows us to support automatic feature based module loading for all our hwcaps. We introduce cpu_set_feature to set hwcaps which complements the existing cpu_have_feature helper. These helpers allow us to clean up existing direct uses of elf_hwcap and reduce any future effort required to move beyond 64 caps. For convenience we also introduce cpu_{have,set}_named_feature which makes use of the cpu_feature macro to allow providing a hwcap name without a {KERNEL_}HWCAP_ prefix. Signed-off-by: Andrew Murray [will: use const_ilog2() and tweak documentation] Signed-off-by: Will Deacon --- Documentation/arm64/elf_hwcaps.txt | 13 +++++-- arch/arm64/crypto/aes-ce-ccm-glue.c | 2 +- arch/arm64/crypto/aes-neonbs-glue.c | 2 +- arch/arm64/crypto/chacha-neon-glue.c | 2 +- arch/arm64/crypto/crct10dif-ce-glue.c | 4 +- arch/arm64/crypto/ghash-ce-glue.c | 8 ++-- arch/arm64/crypto/nhpoly1305-neon-glue.c | 2 +- arch/arm64/crypto/sha256-glue.c | 4 +- arch/arm64/include/asm/cpufeature.h | 22 ++++++----- arch/arm64/include/asm/hwcap.h | 52 ++++++++++++++++++++++++- arch/arm64/include/uapi/asm/hwcap.h | 2 +- arch/arm64/kernel/cpufeature.c | 66 ++++++++++++++++---------------- arch/arm64/kernel/cpuinfo.c | 2 +- arch/arm64/kernel/fpsimd.c | 4 +- drivers/clocksource/arm_arch_timer.c | 8 ++++ 15 files changed, 130 insertions(+), 63 deletions(-) diff --git a/Documentation/arm64/elf_hwcaps.txt b/Documentation/arm64/elf_hwcaps.txt index 13d6691b37be..11805777031e 100644 --- a/Documentation/arm64/elf_hwcaps.txt +++ b/Documentation/arm64/elf_hwcaps.txt @@ -13,9 +13,9 @@ architected discovery mechanism available to userspace code at EL0. The kernel exposes the presence of these features to userspace through a set of flags called hwcaps, exposed in the auxilliary vector. -Userspace software can test for features by acquiring the AT_HWCAP entry -of the auxilliary vector, and testing whether the relevant flags are -set, e.g. +Userspace software can test for features by acquiring the AT_HWCAP or +AT_HWCAP2 entry of the auxiliary vector, and testing whether the relevant +flags are set, e.g. bool floating_point_is_present(void) { @@ -194,3 +194,10 @@ HWCAP_PACG Functionality implied by ID_AA64ISAR1_EL1.GPA == 0b0001 or ID_AA64ISAR1_EL1.GPI == 0b0001, as described by Documentation/arm64/pointer-authentication.txt. + + +4. Unused AT_HWCAP bits +----------------------- + +For interoperation with userspace, the kernel guarantees that bits 62 +and 63 of AT_HWCAP will always be returned as 0. diff --git a/arch/arm64/crypto/aes-ce-ccm-glue.c b/arch/arm64/crypto/aes-ce-ccm-glue.c index 5fc6f51908fd..036ea77f83bc 100644 --- a/arch/arm64/crypto/aes-ce-ccm-glue.c +++ b/arch/arm64/crypto/aes-ce-ccm-glue.c @@ -372,7 +372,7 @@ static struct aead_alg ccm_aes_alg = { static int __init aes_mod_init(void) { - if (!(elf_hwcap & HWCAP_AES)) + if (!cpu_have_named_feature(AES)) return -ENODEV; return crypto_register_aead(&ccm_aes_alg); } diff --git a/arch/arm64/crypto/aes-neonbs-glue.c b/arch/arm64/crypto/aes-neonbs-glue.c index e7a95a566462..bf1b321ff4c1 100644 --- a/arch/arm64/crypto/aes-neonbs-glue.c +++ b/arch/arm64/crypto/aes-neonbs-glue.c @@ -440,7 +440,7 @@ static int __init aes_init(void) int err; int i; - if (!(elf_hwcap & HWCAP_ASIMD)) + if (!cpu_have_named_feature(ASIMD)) return -ENODEV; err = crypto_register_skciphers(aes_algs, ARRAY_SIZE(aes_algs)); diff --git a/arch/arm64/crypto/chacha-neon-glue.c b/arch/arm64/crypto/chacha-neon-glue.c index bece1d85bd81..cb054f51c917 100644 --- a/arch/arm64/crypto/chacha-neon-glue.c +++ b/arch/arm64/crypto/chacha-neon-glue.c @@ -173,7 +173,7 @@ static struct skcipher_alg algs[] = { static int __init chacha_simd_mod_init(void) { - if (!(elf_hwcap & HWCAP_ASIMD)) + if (!cpu_have_named_feature(ASIMD)) return -ENODEV; return crypto_register_skciphers(algs, ARRAY_SIZE(algs)); diff --git a/arch/arm64/crypto/crct10dif-ce-glue.c b/arch/arm64/crypto/crct10dif-ce-glue.c index dd325829ee44..e81d5bd555c0 100644 --- a/arch/arm64/crypto/crct10dif-ce-glue.c +++ b/arch/arm64/crypto/crct10dif-ce-glue.c @@ -101,7 +101,7 @@ static struct shash_alg crc_t10dif_alg[] = {{ static int __init crc_t10dif_mod_init(void) { - if (elf_hwcap & HWCAP_PMULL) + if (cpu_have_named_feature(PMULL)) return crypto_register_shashes(crc_t10dif_alg, ARRAY_SIZE(crc_t10dif_alg)); else @@ -111,7 +111,7 @@ static int __init crc_t10dif_mod_init(void) static void __exit crc_t10dif_mod_exit(void) { - if (elf_hwcap & HWCAP_PMULL) + if (cpu_have_named_feature(PMULL)) crypto_unregister_shashes(crc_t10dif_alg, ARRAY_SIZE(crc_t10dif_alg)); else diff --git a/arch/arm64/crypto/ghash-ce-glue.c b/arch/arm64/crypto/ghash-ce-glue.c index 791ad422c427..4e69bb78ea89 100644 --- a/arch/arm64/crypto/ghash-ce-glue.c +++ b/arch/arm64/crypto/ghash-ce-glue.c @@ -704,10 +704,10 @@ static int __init ghash_ce_mod_init(void) { int ret; - if (!(elf_hwcap & HWCAP_ASIMD)) + if (!cpu_have_named_feature(ASIMD)) return -ENODEV; - if (elf_hwcap & HWCAP_PMULL) + if (cpu_have_named_feature(PMULL)) ret = crypto_register_shashes(ghash_alg, ARRAY_SIZE(ghash_alg)); else @@ -717,7 +717,7 @@ static int __init ghash_ce_mod_init(void) if (ret) return ret; - if (elf_hwcap & HWCAP_PMULL) { + if (cpu_have_named_feature(PMULL)) { ret = crypto_register_aead(&gcm_aes_alg); if (ret) crypto_unregister_shashes(ghash_alg, @@ -728,7 +728,7 @@ static int __init ghash_ce_mod_init(void) static void __exit ghash_ce_mod_exit(void) { - if (elf_hwcap & HWCAP_PMULL) + if (cpu_have_named_feature(PMULL)) crypto_unregister_shashes(ghash_alg, ARRAY_SIZE(ghash_alg)); else crypto_unregister_shash(ghash_alg); diff --git a/arch/arm64/crypto/nhpoly1305-neon-glue.c b/arch/arm64/crypto/nhpoly1305-neon-glue.c index 22cc32ac9448..38a589044b6c 100644 --- a/arch/arm64/crypto/nhpoly1305-neon-glue.c +++ b/arch/arm64/crypto/nhpoly1305-neon-glue.c @@ -56,7 +56,7 @@ static struct shash_alg nhpoly1305_alg = { static int __init nhpoly1305_mod_init(void) { - if (!(elf_hwcap & HWCAP_ASIMD)) + if (!cpu_have_named_feature(ASIMD)) return -ENODEV; return crypto_register_shash(&nhpoly1305_alg); diff --git a/arch/arm64/crypto/sha256-glue.c b/arch/arm64/crypto/sha256-glue.c index 4aedeaefd61f..0cccdb9cc2c0 100644 --- a/arch/arm64/crypto/sha256-glue.c +++ b/arch/arm64/crypto/sha256-glue.c @@ -173,7 +173,7 @@ static int __init sha256_mod_init(void) if (ret) return ret; - if (elf_hwcap & HWCAP_ASIMD) { + if (cpu_have_named_feature(ASIMD)) { ret = crypto_register_shashes(neon_algs, ARRAY_SIZE(neon_algs)); if (ret) crypto_unregister_shashes(algs, ARRAY_SIZE(algs)); @@ -183,7 +183,7 @@ static int __init sha256_mod_init(void) static void __exit sha256_mod_fini(void) { - if (elf_hwcap & HWCAP_ASIMD) + if (cpu_have_named_feature(ASIMD)) crypto_unregister_shashes(neon_algs, ARRAY_SIZE(neon_algs)); crypto_unregister_shashes(algs, ARRAY_SIZE(algs)); } diff --git a/arch/arm64/include/asm/cpufeature.h b/arch/arm64/include/asm/cpufeature.h index e505e1fbd2b9..347c17046668 100644 --- a/arch/arm64/include/asm/cpufeature.h +++ b/arch/arm64/include/asm/cpufeature.h @@ -14,15 +14,8 @@ #include #include -/* - * In the arm64 world (as in the ARM world), elf_hwcap is used both internally - * in the kernel and for user space to keep track of which optional features - * are supported by the current system. So let's map feature 'x' to HWCAP_x. - * Note that HWCAP_x constants are bit fields so we need to take the log. - */ - -#define MAX_CPU_FEATURES (8 * sizeof(elf_hwcap)) -#define cpu_feature(x) ilog2(HWCAP_ ## x) +#define MAX_CPU_FEATURES 64 +#define cpu_feature(x) KERNEL_HWCAP_ ## x #ifndef __ASSEMBLY__ @@ -400,10 +393,19 @@ extern DECLARE_BITMAP(boot_capabilities, ARM64_NPATCHABLE); bool this_cpu_has_cap(unsigned int cap); +static inline void cpu_set_feature(unsigned int num) +{ + WARN_ON(num >= MAX_CPU_FEATURES); + elf_hwcap |= BIT(num); +} +#define cpu_set_named_feature(name) cpu_set_feature(cpu_feature(name)) + static inline bool cpu_have_feature(unsigned int num) { - return elf_hwcap & (1UL << num); + WARN_ON(num >= MAX_CPU_FEATURES); + return elf_hwcap & BIT(num); } +#define cpu_have_named_feature(name) cpu_have_feature(cpu_feature(name)) /* System capability check for constant caps */ static inline bool __cpus_have_const_cap(int num) diff --git a/arch/arm64/include/asm/hwcap.h b/arch/arm64/include/asm/hwcap.h index 400b80b49595..af868cbe96f8 100644 --- a/arch/arm64/include/asm/hwcap.h +++ b/arch/arm64/include/asm/hwcap.h @@ -40,11 +40,61 @@ #define COMPAT_HWCAP2_CRC32 (1 << 4) #ifndef __ASSEMBLY__ +#include +#include + +/* + * For userspace we represent hwcaps as a collection of HWCAP{,2}_x bitfields + * as described in uapi/asm/hwcap.h. For the kernel we represent hwcaps as + * natural numbers (in a single range of size MAX_CPU_FEATURES) defined here + * with prefix KERNEL_HWCAP_ mapped to their HWCAP{,2}_x counterpart. + * + * Hwcaps should be set and tested within the kernel via the + * cpu_{set,have}_named_feature(feature) where feature is the unique suffix + * of KERNEL_HWCAP_{feature}. + */ +#define __khwcap_feature(x) const_ilog2(HWCAP_ ## x) +#define KERNEL_HWCAP_FP __khwcap_feature(FP) +#define KERNEL_HWCAP_ASIMD __khwcap_feature(ASIMD) +#define KERNEL_HWCAP_EVTSTRM __khwcap_feature(EVTSTRM) +#define KERNEL_HWCAP_AES __khwcap_feature(AES) +#define KERNEL_HWCAP_PMULL __khwcap_feature(PMULL) +#define KERNEL_HWCAP_SHA1 __khwcap_feature(SHA1) +#define KERNEL_HWCAP_SHA2 __khwcap_feature(SHA2) +#define KERNEL_HWCAP_CRC32 __khwcap_feature(CRC32) +#define KERNEL_HWCAP_ATOMICS __khwcap_feature(ATOMICS) +#define KERNEL_HWCAP_FPHP __khwcap_feature(FPHP) +#define KERNEL_HWCAP_ASIMDHP __khwcap_feature(ASIMDHP) +#define KERNEL_HWCAP_CPUID __khwcap_feature(CPUID) +#define KERNEL_HWCAP_ASIMDRDM __khwcap_feature(ASIMDRDM) +#define KERNEL_HWCAP_JSCVT __khwcap_feature(JSCVT) +#define KERNEL_HWCAP_FCMA __khwcap_feature(FCMA) +#define KERNEL_HWCAP_LRCPC __khwcap_feature(LRCPC) +#define KERNEL_HWCAP_DCPOP __khwcap_feature(DCPOP) +#define KERNEL_HWCAP_SHA3 __khwcap_feature(SHA3) +#define KERNEL_HWCAP_SM3 __khwcap_feature(SM3) +#define KERNEL_HWCAP_SM4 __khwcap_feature(SM4) +#define KERNEL_HWCAP_ASIMDDP __khwcap_feature(ASIMDDP) +#define KERNEL_HWCAP_SHA512 __khwcap_feature(SHA512) +#define KERNEL_HWCAP_SVE __khwcap_feature(SVE) +#define KERNEL_HWCAP_ASIMDFHM __khwcap_feature(ASIMDFHM) +#define KERNEL_HWCAP_DIT __khwcap_feature(DIT) +#define KERNEL_HWCAP_USCAT __khwcap_feature(USCAT) +#define KERNEL_HWCAP_ILRCPC __khwcap_feature(ILRCPC) +#define KERNEL_HWCAP_FLAGM __khwcap_feature(FLAGM) +#define KERNEL_HWCAP_SSBS __khwcap_feature(SSBS) +#define KERNEL_HWCAP_SB __khwcap_feature(SB) +#define KERNEL_HWCAP_PACA __khwcap_feature(PACA) +#define KERNEL_HWCAP_PACG __khwcap_feature(PACG) + +#define __khwcap2_feature(x) (const_ilog2(HWCAP2_ ## x) + 32) + /* * This yields a mask that user programs can use to figure out what * instruction set this cpu supports. */ -#define ELF_HWCAP (elf_hwcap) +#define ELF_HWCAP lower_32_bits(elf_hwcap) +#define ELF_HWCAP2 upper_32_bits(elf_hwcap) #ifdef CONFIG_COMPAT #define COMPAT_ELF_HWCAP (compat_elf_hwcap) diff --git a/arch/arm64/include/uapi/asm/hwcap.h b/arch/arm64/include/uapi/asm/hwcap.h index 5f0750c2199c..453b45af80b7 100644 --- a/arch/arm64/include/uapi/asm/hwcap.h +++ b/arch/arm64/include/uapi/asm/hwcap.h @@ -18,7 +18,7 @@ #define _UAPI__ASM_HWCAP_H /* - * HWCAP flags - for elf_hwcap (in kernel) and AT_HWCAP + * HWCAP flags - for AT_HWCAP */ #define HWCAP_FP (1 << 0) #define HWCAP_ASIMD (1 << 1) diff --git a/arch/arm64/kernel/cpufeature.c b/arch/arm64/kernel/cpufeature.c index 4061de10cea6..986ceeacd19f 100644 --- a/arch/arm64/kernel/cpufeature.c +++ b/arch/arm64/kernel/cpufeature.c @@ -1571,39 +1571,39 @@ static const struct arm64_cpu_capabilities ptr_auth_hwcap_gen_matches[] = { #endif static const struct arm64_cpu_capabilities arm64_elf_hwcaps[] = { - HWCAP_CAP(SYS_ID_AA64ISAR0_EL1, ID_AA64ISAR0_AES_SHIFT, FTR_UNSIGNED, 2, CAP_HWCAP, HWCAP_PMULL), - HWCAP_CAP(SYS_ID_AA64ISAR0_EL1, ID_AA64ISAR0_AES_SHIFT, FTR_UNSIGNED, 1, CAP_HWCAP, HWCAP_AES), - HWCAP_CAP(SYS_ID_AA64ISAR0_EL1, ID_AA64ISAR0_SHA1_SHIFT, FTR_UNSIGNED, 1, CAP_HWCAP, HWCAP_SHA1), - HWCAP_CAP(SYS_ID_AA64ISAR0_EL1, ID_AA64ISAR0_SHA2_SHIFT, FTR_UNSIGNED, 1, CAP_HWCAP, HWCAP_SHA2), - HWCAP_CAP(SYS_ID_AA64ISAR0_EL1, ID_AA64ISAR0_SHA2_SHIFT, FTR_UNSIGNED, 2, CAP_HWCAP, HWCAP_SHA512), - HWCAP_CAP(SYS_ID_AA64ISAR0_EL1, ID_AA64ISAR0_CRC32_SHIFT, FTR_UNSIGNED, 1, CAP_HWCAP, HWCAP_CRC32), - HWCAP_CAP(SYS_ID_AA64ISAR0_EL1, ID_AA64ISAR0_ATOMICS_SHIFT, FTR_UNSIGNED, 2, CAP_HWCAP, HWCAP_ATOMICS), - HWCAP_CAP(SYS_ID_AA64ISAR0_EL1, ID_AA64ISAR0_RDM_SHIFT, FTR_UNSIGNED, 1, CAP_HWCAP, HWCAP_ASIMDRDM), - HWCAP_CAP(SYS_ID_AA64ISAR0_EL1, ID_AA64ISAR0_SHA3_SHIFT, FTR_UNSIGNED, 1, CAP_HWCAP, HWCAP_SHA3), - HWCAP_CAP(SYS_ID_AA64ISAR0_EL1, ID_AA64ISAR0_SM3_SHIFT, FTR_UNSIGNED, 1, CAP_HWCAP, HWCAP_SM3), - HWCAP_CAP(SYS_ID_AA64ISAR0_EL1, ID_AA64ISAR0_SM4_SHIFT, FTR_UNSIGNED, 1, CAP_HWCAP, HWCAP_SM4), - HWCAP_CAP(SYS_ID_AA64ISAR0_EL1, ID_AA64ISAR0_DP_SHIFT, FTR_UNSIGNED, 1, CAP_HWCAP, HWCAP_ASIMDDP), - HWCAP_CAP(SYS_ID_AA64ISAR0_EL1, ID_AA64ISAR0_FHM_SHIFT, FTR_UNSIGNED, 1, CAP_HWCAP, HWCAP_ASIMDFHM), - HWCAP_CAP(SYS_ID_AA64ISAR0_EL1, ID_AA64ISAR0_TS_SHIFT, FTR_UNSIGNED, 1, CAP_HWCAP, HWCAP_FLAGM), - HWCAP_CAP(SYS_ID_AA64PFR0_EL1, ID_AA64PFR0_FP_SHIFT, FTR_SIGNED, 0, CAP_HWCAP, HWCAP_FP), - HWCAP_CAP(SYS_ID_AA64PFR0_EL1, ID_AA64PFR0_FP_SHIFT, FTR_SIGNED, 1, CAP_HWCAP, HWCAP_FPHP), - HWCAP_CAP(SYS_ID_AA64PFR0_EL1, ID_AA64PFR0_ASIMD_SHIFT, FTR_SIGNED, 0, CAP_HWCAP, HWCAP_ASIMD), - HWCAP_CAP(SYS_ID_AA64PFR0_EL1, ID_AA64PFR0_ASIMD_SHIFT, FTR_SIGNED, 1, CAP_HWCAP, HWCAP_ASIMDHP), - HWCAP_CAP(SYS_ID_AA64PFR0_EL1, ID_AA64PFR0_DIT_SHIFT, FTR_SIGNED, 1, CAP_HWCAP, HWCAP_DIT), - HWCAP_CAP(SYS_ID_AA64ISAR1_EL1, ID_AA64ISAR1_DPB_SHIFT, FTR_UNSIGNED, 1, CAP_HWCAP, HWCAP_DCPOP), - HWCAP_CAP(SYS_ID_AA64ISAR1_EL1, ID_AA64ISAR1_JSCVT_SHIFT, FTR_UNSIGNED, 1, CAP_HWCAP, HWCAP_JSCVT), - HWCAP_CAP(SYS_ID_AA64ISAR1_EL1, ID_AA64ISAR1_FCMA_SHIFT, FTR_UNSIGNED, 1, CAP_HWCAP, HWCAP_FCMA), - HWCAP_CAP(SYS_ID_AA64ISAR1_EL1, ID_AA64ISAR1_LRCPC_SHIFT, FTR_UNSIGNED, 1, CAP_HWCAP, HWCAP_LRCPC), - HWCAP_CAP(SYS_ID_AA64ISAR1_EL1, ID_AA64ISAR1_LRCPC_SHIFT, FTR_UNSIGNED, 2, CAP_HWCAP, HWCAP_ILRCPC), - HWCAP_CAP(SYS_ID_AA64ISAR1_EL1, ID_AA64ISAR1_SB_SHIFT, FTR_UNSIGNED, 1, CAP_HWCAP, HWCAP_SB), - HWCAP_CAP(SYS_ID_AA64MMFR2_EL1, ID_AA64MMFR2_AT_SHIFT, FTR_UNSIGNED, 1, CAP_HWCAP, HWCAP_USCAT), + HWCAP_CAP(SYS_ID_AA64ISAR0_EL1, ID_AA64ISAR0_AES_SHIFT, FTR_UNSIGNED, 2, CAP_HWCAP, KERNEL_HWCAP_PMULL), + HWCAP_CAP(SYS_ID_AA64ISAR0_EL1, ID_AA64ISAR0_AES_SHIFT, FTR_UNSIGNED, 1, CAP_HWCAP, KERNEL_HWCAP_AES), + HWCAP_CAP(SYS_ID_AA64ISAR0_EL1, ID_AA64ISAR0_SHA1_SHIFT, FTR_UNSIGNED, 1, CAP_HWCAP, KERNEL_HWCAP_SHA1), + HWCAP_CAP(SYS_ID_AA64ISAR0_EL1, ID_AA64ISAR0_SHA2_SHIFT, FTR_UNSIGNED, 1, CAP_HWCAP, KERNEL_HWCAP_SHA2), + HWCAP_CAP(SYS_ID_AA64ISAR0_EL1, ID_AA64ISAR0_SHA2_SHIFT, FTR_UNSIGNED, 2, CAP_HWCAP, KERNEL_HWCAP_SHA512), + HWCAP_CAP(SYS_ID_AA64ISAR0_EL1, ID_AA64ISAR0_CRC32_SHIFT, FTR_UNSIGNED, 1, CAP_HWCAP, KERNEL_HWCAP_CRC32), + HWCAP_CAP(SYS_ID_AA64ISAR0_EL1, ID_AA64ISAR0_ATOMICS_SHIFT, FTR_UNSIGNED, 2, CAP_HWCAP, KERNEL_HWCAP_ATOMICS), + HWCAP_CAP(SYS_ID_AA64ISAR0_EL1, ID_AA64ISAR0_RDM_SHIFT, FTR_UNSIGNED, 1, CAP_HWCAP, KERNEL_HWCAP_ASIMDRDM), + HWCAP_CAP(SYS_ID_AA64ISAR0_EL1, ID_AA64ISAR0_SHA3_SHIFT, FTR_UNSIGNED, 1, CAP_HWCAP, KERNEL_HWCAP_SHA3), + HWCAP_CAP(SYS_ID_AA64ISAR0_EL1, ID_AA64ISAR0_SM3_SHIFT, FTR_UNSIGNED, 1, CAP_HWCAP, KERNEL_HWCAP_SM3), + HWCAP_CAP(SYS_ID_AA64ISAR0_EL1, ID_AA64ISAR0_SM4_SHIFT, FTR_UNSIGNED, 1, CAP_HWCAP, KERNEL_HWCAP_SM4), + HWCAP_CAP(SYS_ID_AA64ISAR0_EL1, ID_AA64ISAR0_DP_SHIFT, FTR_UNSIGNED, 1, CAP_HWCAP, KERNEL_HWCAP_ASIMDDP), + HWCAP_CAP(SYS_ID_AA64ISAR0_EL1, ID_AA64ISAR0_FHM_SHIFT, FTR_UNSIGNED, 1, CAP_HWCAP, KERNEL_HWCAP_ASIMDFHM), + HWCAP_CAP(SYS_ID_AA64ISAR0_EL1, ID_AA64ISAR0_TS_SHIFT, FTR_UNSIGNED, 1, CAP_HWCAP, KERNEL_HWCAP_FLAGM), + HWCAP_CAP(SYS_ID_AA64PFR0_EL1, ID_AA64PFR0_FP_SHIFT, FTR_SIGNED, 0, CAP_HWCAP, KERNEL_HWCAP_FP), + HWCAP_CAP(SYS_ID_AA64PFR0_EL1, ID_AA64PFR0_FP_SHIFT, FTR_SIGNED, 1, CAP_HWCAP, KERNEL_HWCAP_FPHP), + HWCAP_CAP(SYS_ID_AA64PFR0_EL1, ID_AA64PFR0_ASIMD_SHIFT, FTR_SIGNED, 0, CAP_HWCAP, KERNEL_HWCAP_ASIMD), + HWCAP_CAP(SYS_ID_AA64PFR0_EL1, ID_AA64PFR0_ASIMD_SHIFT, FTR_SIGNED, 1, CAP_HWCAP, KERNEL_HWCAP_ASIMDHP), + HWCAP_CAP(SYS_ID_AA64PFR0_EL1, ID_AA64PFR0_DIT_SHIFT, FTR_SIGNED, 1, CAP_HWCAP, KERNEL_HWCAP_DIT), + HWCAP_CAP(SYS_ID_AA64ISAR1_EL1, ID_AA64ISAR1_DPB_SHIFT, FTR_UNSIGNED, 1, CAP_HWCAP, KERNEL_HWCAP_DCPOP), + HWCAP_CAP(SYS_ID_AA64ISAR1_EL1, ID_AA64ISAR1_JSCVT_SHIFT, FTR_UNSIGNED, 1, CAP_HWCAP, KERNEL_HWCAP_JSCVT), + HWCAP_CAP(SYS_ID_AA64ISAR1_EL1, ID_AA64ISAR1_FCMA_SHIFT, FTR_UNSIGNED, 1, CAP_HWCAP, KERNEL_HWCAP_FCMA), + HWCAP_CAP(SYS_ID_AA64ISAR1_EL1, ID_AA64ISAR1_LRCPC_SHIFT, FTR_UNSIGNED, 1, CAP_HWCAP, KERNEL_HWCAP_LRCPC), + HWCAP_CAP(SYS_ID_AA64ISAR1_EL1, ID_AA64ISAR1_LRCPC_SHIFT, FTR_UNSIGNED, 2, CAP_HWCAP, KERNEL_HWCAP_ILRCPC), + HWCAP_CAP(SYS_ID_AA64ISAR1_EL1, ID_AA64ISAR1_SB_SHIFT, FTR_UNSIGNED, 1, CAP_HWCAP, KERNEL_HWCAP_SB), + HWCAP_CAP(SYS_ID_AA64MMFR2_EL1, ID_AA64MMFR2_AT_SHIFT, FTR_UNSIGNED, 1, CAP_HWCAP, KERNEL_HWCAP_USCAT), #ifdef CONFIG_ARM64_SVE - HWCAP_CAP(SYS_ID_AA64PFR0_EL1, ID_AA64PFR0_SVE_SHIFT, FTR_UNSIGNED, ID_AA64PFR0_SVE, CAP_HWCAP, HWCAP_SVE), + HWCAP_CAP(SYS_ID_AA64PFR0_EL1, ID_AA64PFR0_SVE_SHIFT, FTR_UNSIGNED, ID_AA64PFR0_SVE, CAP_HWCAP, KERNEL_HWCAP_SVE), #endif - HWCAP_CAP(SYS_ID_AA64PFR1_EL1, ID_AA64PFR1_SSBS_SHIFT, FTR_UNSIGNED, ID_AA64PFR1_SSBS_PSTATE_INSNS, CAP_HWCAP, HWCAP_SSBS), + HWCAP_CAP(SYS_ID_AA64PFR1_EL1, ID_AA64PFR1_SSBS_SHIFT, FTR_UNSIGNED, ID_AA64PFR1_SSBS_PSTATE_INSNS, CAP_HWCAP, KERNEL_HWCAP_SSBS), #ifdef CONFIG_ARM64_PTR_AUTH - HWCAP_MULTI_CAP(ptr_auth_hwcap_addr_matches, CAP_HWCAP, HWCAP_PACA), - HWCAP_MULTI_CAP(ptr_auth_hwcap_gen_matches, CAP_HWCAP, HWCAP_PACG), + HWCAP_MULTI_CAP(ptr_auth_hwcap_addr_matches, CAP_HWCAP, KERNEL_HWCAP_PACA), + HWCAP_MULTI_CAP(ptr_auth_hwcap_gen_matches, CAP_HWCAP, KERNEL_HWCAP_PACG), #endif {}, }; @@ -1623,7 +1623,7 @@ static void __init cap_set_elf_hwcap(const struct arm64_cpu_capabilities *cap) { switch (cap->hwcap_type) { case CAP_HWCAP: - elf_hwcap |= cap->hwcap; + cpu_set_feature(cap->hwcap); break; #ifdef CONFIG_COMPAT case CAP_COMPAT_HWCAP: @@ -1646,7 +1646,7 @@ static bool cpus_have_elf_hwcap(const struct arm64_cpu_capabilities *cap) switch (cap->hwcap_type) { case CAP_HWCAP: - rc = (elf_hwcap & cap->hwcap) != 0; + rc = cpu_have_feature(cap->hwcap); break; #ifdef CONFIG_COMPAT case CAP_COMPAT_HWCAP: @@ -1667,7 +1667,7 @@ static bool cpus_have_elf_hwcap(const struct arm64_cpu_capabilities *cap) static void __init setup_elf_hwcaps(const struct arm64_cpu_capabilities *hwcaps) { /* We support emulation of accesses to CPU ID feature registers */ - elf_hwcap |= HWCAP_CPUID; + cpu_set_named_feature(CPUID); for (; hwcaps->matches; hwcaps++) if (hwcaps->matches(hwcaps, cpucap_default_scope(hwcaps))) cap_set_elf_hwcap(hwcaps); diff --git a/arch/arm64/kernel/cpuinfo.c b/arch/arm64/kernel/cpuinfo.c index ca0685f33900..810db95f293f 100644 --- a/arch/arm64/kernel/cpuinfo.c +++ b/arch/arm64/kernel/cpuinfo.c @@ -167,7 +167,7 @@ static int c_show(struct seq_file *m, void *v) #endif /* CONFIG_COMPAT */ } else { for (j = 0; hwcap_str[j]; j++) - if (elf_hwcap & (1 << j)) + if (cpu_have_feature(j)) seq_printf(m, " %s", hwcap_str[j]); } seq_puts(m, "\n"); diff --git a/arch/arm64/kernel/fpsimd.c b/arch/arm64/kernel/fpsimd.c index 5ebe73b69961..735cf1f8b109 100644 --- a/arch/arm64/kernel/fpsimd.c +++ b/arch/arm64/kernel/fpsimd.c @@ -1258,14 +1258,14 @@ static inline void fpsimd_hotplug_init(void) { } */ static int __init fpsimd_init(void) { - if (elf_hwcap & HWCAP_FP) { + if (cpu_have_named_feature(FP)) { fpsimd_pm_init(); fpsimd_hotplug_init(); } else { pr_notice("Floating-point is not implemented\n"); } - if (!(elf_hwcap & HWCAP_ASIMD)) + if (!cpu_have_named_feature(ASIMD)) pr_notice("Advanced SIMD is not implemented\n"); return sve_sysctl_init(); diff --git a/drivers/clocksource/arm_arch_timer.c b/drivers/clocksource/arm_arch_timer.c index aa4ec53281ce..6cc8aff83805 100644 --- a/drivers/clocksource/arm_arch_timer.c +++ b/drivers/clocksource/arm_arch_timer.c @@ -833,7 +833,11 @@ static void arch_timer_evtstrm_enable(int divider) cntkctl |= (divider << ARCH_TIMER_EVT_TRIGGER_SHIFT) | ARCH_TIMER_VIRT_EVT_EN; arch_timer_set_cntkctl(cntkctl); +#ifdef CONFIG_ARM64 + cpu_set_named_feature(EVTSTRM); +#else elf_hwcap |= HWCAP_EVTSTRM; +#endif #ifdef CONFIG_COMPAT compat_elf_hwcap |= COMPAT_HWCAP_EVTSTRM; #endif @@ -1055,7 +1059,11 @@ static int arch_timer_cpu_pm_notify(struct notifier_block *self, } else if (action == CPU_PM_ENTER_FAILED || action == CPU_PM_EXIT) { arch_timer_set_cntkctl(__this_cpu_read(saved_cntkctl)); +#ifdef CONFIG_ARM64 + if (cpu_have_named_feature(EVTSTRM)) +#else if (elf_hwcap & HWCAP_EVTSTRM) +#endif cpumask_set_cpu(smp_processor_id(), &evtstrm_available); } return NOTIFY_OK; -- cgit v1.2.3-59-g8ed1b From aec0bff757c937489d003ab7b36c52e77e4b096a Mon Sep 17 00:00:00 2001 From: Andrew Murray Date: Tue, 9 Apr 2019 10:52:41 +0100 Subject: arm64: HWCAP: encapsulate elf_hwcap The introduction of AT_HWCAP2 introduced accessors which ensure that hwcap features are set and tested appropriately. Let's now mandate access to elf_hwcap via these accessors by making elf_hwcap static within cpufeature.c. Signed-off-by: Andrew Murray Reviewed-by: Dave Martin Reviewed-by: Suzuki K Poulose Signed-off-by: Will Deacon --- arch/arm64/include/asm/cpufeature.h | 15 ++++----------- arch/arm64/include/asm/hwcap.h | 7 +++---- arch/arm64/kernel/cpufeature.c | 33 +++++++++++++++++++++++++++++++-- 3 files changed, 38 insertions(+), 17 deletions(-) diff --git a/arch/arm64/include/asm/cpufeature.h b/arch/arm64/include/asm/cpufeature.h index 347c17046668..a3f028f82def 100644 --- a/arch/arm64/include/asm/cpufeature.h +++ b/arch/arm64/include/asm/cpufeature.h @@ -392,19 +392,12 @@ extern DECLARE_BITMAP(boot_capabilities, ARM64_NPATCHABLE); for_each_set_bit(cap, cpu_hwcaps, ARM64_NCAPS) bool this_cpu_has_cap(unsigned int cap); +void cpu_set_feature(unsigned int num); +bool cpu_have_feature(unsigned int num); +unsigned long cpu_get_elf_hwcap(void); +unsigned long cpu_get_elf_hwcap2(void); -static inline void cpu_set_feature(unsigned int num) -{ - WARN_ON(num >= MAX_CPU_FEATURES); - elf_hwcap |= BIT(num); -} #define cpu_set_named_feature(name) cpu_set_feature(cpu_feature(name)) - -static inline bool cpu_have_feature(unsigned int num) -{ - WARN_ON(num >= MAX_CPU_FEATURES); - return elf_hwcap & BIT(num); -} #define cpu_have_named_feature(name) cpu_have_feature(cpu_feature(name)) /* System capability check for constant caps */ diff --git a/arch/arm64/include/asm/hwcap.h b/arch/arm64/include/asm/hwcap.h index af868cbe96f8..a843b6efce5b 100644 --- a/arch/arm64/include/asm/hwcap.h +++ b/arch/arm64/include/asm/hwcap.h @@ -17,6 +17,7 @@ #define __ASM_HWCAP_H #include +#include #define COMPAT_HWCAP_HALF (1 << 1) #define COMPAT_HWCAP_THUMB (1 << 2) @@ -40,7 +41,6 @@ #define COMPAT_HWCAP2_CRC32 (1 << 4) #ifndef __ASSEMBLY__ -#include #include /* @@ -93,8 +93,8 @@ * This yields a mask that user programs can use to figure out what * instruction set this cpu supports. */ -#define ELF_HWCAP lower_32_bits(elf_hwcap) -#define ELF_HWCAP2 upper_32_bits(elf_hwcap) +#define ELF_HWCAP cpu_get_elf_hwcap() +#define ELF_HWCAP2 cpu_get_elf_hwcap2() #ifdef CONFIG_COMPAT #define COMPAT_ELF_HWCAP (compat_elf_hwcap) @@ -110,6 +110,5 @@ enum { #endif }; -extern unsigned long elf_hwcap; #endif #endif diff --git a/arch/arm64/kernel/cpufeature.c b/arch/arm64/kernel/cpufeature.c index 986ceeacd19f..a655d1bb1186 100644 --- a/arch/arm64/kernel/cpufeature.c +++ b/arch/arm64/kernel/cpufeature.c @@ -35,8 +35,8 @@ #include #include -unsigned long elf_hwcap __read_mostly; -EXPORT_SYMBOL_GPL(elf_hwcap); +/* Kernel representation of AT_HWCAP and AT_HWCAP2 */ +static unsigned long elf_hwcap __read_mostly; #ifdef CONFIG_COMPAT #define COMPAT_ELF_HWCAP_DEFAULT \ @@ -1947,6 +1947,35 @@ bool this_cpu_has_cap(unsigned int n) return false; } +void cpu_set_feature(unsigned int num) +{ + WARN_ON(num >= MAX_CPU_FEATURES); + elf_hwcap |= BIT(num); +} +EXPORT_SYMBOL_GPL(cpu_set_feature); + +bool cpu_have_feature(unsigned int num) +{ + WARN_ON(num >= MAX_CPU_FEATURES); + return elf_hwcap & BIT(num); +} +EXPORT_SYMBOL_GPL(cpu_have_feature); + +unsigned long cpu_get_elf_hwcap(void) +{ + /* + * We currently only populate the first 32 bits of AT_HWCAP. Please + * note that for userspace compatibility we guarantee that bits 62 + * and 63 will always be returned as 0. + */ + return lower_32_bits(elf_hwcap); +} + +unsigned long cpu_get_elf_hwcap2(void) +{ + return upper_32_bits(elf_hwcap); +} + static void __init setup_system_capabilities(void) { /* -- cgit v1.2.3-59-g8ed1b From d16ed4105f5bbd8a34f15053045a6657f4c52c6f Mon Sep 17 00:00:00 2001 From: Andrew Murray Date: Tue, 9 Apr 2019 10:52:42 +0100 Subject: arm64: Handle trapped DC CVADP The ARMv8.5 DC CVADP instruction may be trapped to EL1 via SCTLR_EL1.UCI therefore let's provide a handler for it. Just like the CVAP instruction we use a 'sys' instruction instead of the 'dc' alias to avoid build issues with older toolchains. Signed-off-by: Andrew Murray Reviewed-by: Mark Rutland Reviewed-by: Dave Martin Signed-off-by: Will Deacon --- arch/arm64/include/asm/esr.h | 3 ++- arch/arm64/kernel/traps.c | 3 +++ 2 files changed, 5 insertions(+), 1 deletion(-) diff --git a/arch/arm64/include/asm/esr.h b/arch/arm64/include/asm/esr.h index 3541720189c9..0e27fe91d5ea 100644 --- a/arch/arm64/include/asm/esr.h +++ b/arch/arm64/include/asm/esr.h @@ -196,9 +196,10 @@ /* * User space cache operations have the following sysreg encoding * in System instructions. - * op0=1, op1=3, op2=1, crn=7, crm={ 5, 10, 11, 12, 14 }, WRITE (L=0) + * op0=1, op1=3, op2=1, crn=7, crm={ 5, 10, 11, 12, 13, 14 }, WRITE (L=0) */ #define ESR_ELx_SYS64_ISS_CRM_DC_CIVAC 14 +#define ESR_ELx_SYS64_ISS_CRM_DC_CVADP 13 #define ESR_ELx_SYS64_ISS_CRM_DC_CVAP 12 #define ESR_ELx_SYS64_ISS_CRM_DC_CVAU 11 #define ESR_ELx_SYS64_ISS_CRM_DC_CVAC 10 diff --git a/arch/arm64/kernel/traps.c b/arch/arm64/kernel/traps.c index 74598396e0bf..21e73954762c 100644 --- a/arch/arm64/kernel/traps.c +++ b/arch/arm64/kernel/traps.c @@ -459,6 +459,9 @@ static void user_cache_maint_handler(unsigned int esr, struct pt_regs *regs) case ESR_ELx_SYS64_ISS_CRM_DC_CVAC: /* DC CVAC, gets promoted */ __user_cache_maint("dc civac", address, ret); break; + case ESR_ELx_SYS64_ISS_CRM_DC_CVADP: /* DC CVADP */ + __user_cache_maint("sys 3, c7, c13, 1", address, ret); + break; case ESR_ELx_SYS64_ISS_CRM_DC_CVAP: /* DC CVAP */ __user_cache_maint("sys 3, c7, c12, 1", address, ret); break; -- cgit v1.2.3-59-g8ed1b From 671db581815faf17cbedd7fcbc48823a247d90b1 Mon Sep 17 00:00:00 2001 From: Andrew Murray Date: Tue, 9 Apr 2019 10:52:43 +0100 Subject: arm64: Expose DC CVADP to userspace ARMv8.5 builds upon the ARMv8.2 DC CVAP instruction by introducing a DC CVADP instruction which cleans the data cache to the point of deep persistence. Let's expose this support via the arm64 ELF hwcaps. Signed-off-by: Andrew Murray Reviewed-by: Dave Martin Signed-off-by: Will Deacon --- Documentation/arm64/elf_hwcaps.txt | 4 ++++ arch/arm64/include/asm/hwcap.h | 1 + arch/arm64/include/uapi/asm/hwcap.h | 5 +++++ arch/arm64/kernel/cpufeature.c | 1 + arch/arm64/kernel/cpuinfo.c | 1 + 5 files changed, 12 insertions(+) diff --git a/Documentation/arm64/elf_hwcaps.txt b/Documentation/arm64/elf_hwcaps.txt index 11805777031e..55431fd2a67a 100644 --- a/Documentation/arm64/elf_hwcaps.txt +++ b/Documentation/arm64/elf_hwcaps.txt @@ -135,6 +135,10 @@ HWCAP_DCPOP Functionality implied by ID_AA64ISAR1_EL1.DPB == 0b0001. +HWCAP2_DCPODP + + Functionality implied by ID_AA64ISAR1_EL1.DPB == 0b0010. + HWCAP_SHA3 Functionality implied by ID_AA64ISAR0_EL1.SHA3 == 0b0001. diff --git a/arch/arm64/include/asm/hwcap.h b/arch/arm64/include/asm/hwcap.h index a843b6efce5b..f78c86c64e68 100644 --- a/arch/arm64/include/asm/hwcap.h +++ b/arch/arm64/include/asm/hwcap.h @@ -88,6 +88,7 @@ #define KERNEL_HWCAP_PACG __khwcap_feature(PACG) #define __khwcap2_feature(x) (const_ilog2(HWCAP2_ ## x) + 32) +#define KERNEL_HWCAP_DCPODP __khwcap2_feature(DCPODP) /* * This yields a mask that user programs can use to figure out what diff --git a/arch/arm64/include/uapi/asm/hwcap.h b/arch/arm64/include/uapi/asm/hwcap.h index 453b45af80b7..d64af3913a9e 100644 --- a/arch/arm64/include/uapi/asm/hwcap.h +++ b/arch/arm64/include/uapi/asm/hwcap.h @@ -53,4 +53,9 @@ #define HWCAP_PACA (1 << 30) #define HWCAP_PACG (1UL << 31) +/* + * HWCAP2 flags - for AT_HWCAP2 + */ +#define HWCAP2_DCPODP (1 << 0) + #endif /* _UAPI__ASM_HWCAP_H */ diff --git a/arch/arm64/kernel/cpufeature.c b/arch/arm64/kernel/cpufeature.c index a655d1bb1186..f8b682a3a9f4 100644 --- a/arch/arm64/kernel/cpufeature.c +++ b/arch/arm64/kernel/cpufeature.c @@ -1591,6 +1591,7 @@ static const struct arm64_cpu_capabilities arm64_elf_hwcaps[] = { HWCAP_CAP(SYS_ID_AA64PFR0_EL1, ID_AA64PFR0_ASIMD_SHIFT, FTR_SIGNED, 1, CAP_HWCAP, KERNEL_HWCAP_ASIMDHP), HWCAP_CAP(SYS_ID_AA64PFR0_EL1, ID_AA64PFR0_DIT_SHIFT, FTR_SIGNED, 1, CAP_HWCAP, KERNEL_HWCAP_DIT), HWCAP_CAP(SYS_ID_AA64ISAR1_EL1, ID_AA64ISAR1_DPB_SHIFT, FTR_UNSIGNED, 1, CAP_HWCAP, KERNEL_HWCAP_DCPOP), + HWCAP_CAP(SYS_ID_AA64ISAR1_EL1, ID_AA64ISAR1_DPB_SHIFT, FTR_UNSIGNED, 2, CAP_HWCAP, KERNEL_HWCAP_DCPODP), HWCAP_CAP(SYS_ID_AA64ISAR1_EL1, ID_AA64ISAR1_JSCVT_SHIFT, FTR_UNSIGNED, 1, CAP_HWCAP, KERNEL_HWCAP_JSCVT), HWCAP_CAP(SYS_ID_AA64ISAR1_EL1, ID_AA64ISAR1_FCMA_SHIFT, FTR_UNSIGNED, 1, CAP_HWCAP, KERNEL_HWCAP_FCMA), HWCAP_CAP(SYS_ID_AA64ISAR1_EL1, ID_AA64ISAR1_LRCPC_SHIFT, FTR_UNSIGNED, 1, CAP_HWCAP, KERNEL_HWCAP_LRCPC), diff --git a/arch/arm64/kernel/cpuinfo.c b/arch/arm64/kernel/cpuinfo.c index 810db95f293f..093ca53ce1d1 100644 --- a/arch/arm64/kernel/cpuinfo.c +++ b/arch/arm64/kernel/cpuinfo.c @@ -85,6 +85,7 @@ static const char *const hwcap_str[] = { "sb", "paca", "pacg", + "dcpodp", NULL }; -- cgit v1.2.3-59-g8ed1b From 04a1438e5660ae44ecebd6c870fbcc140dd883a7 Mon Sep 17 00:00:00 2001 From: Andrew Murray Date: Tue, 9 Apr 2019 10:52:44 +0100 Subject: arm64: add CVADP support to the cache maintenance helper Allow users of dcache_by_line_op to specify cvadp as an op. Signed-off-by: Andrew Murray Reviewed-by: Dave Martin Signed-off-by: Will Deacon --- arch/arm64/include/asm/assembler.h | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/arch/arm64/include/asm/assembler.h b/arch/arm64/include/asm/assembler.h index 621212196871..039fbd822ec6 100644 --- a/arch/arm64/include/asm/assembler.h +++ b/arch/arm64/include/asm/assembler.h @@ -407,10 +407,14 @@ alternative_endif .ifc \op, cvap sys 3, c7, c12, 1, \kaddr // dc cvap .else + .ifc \op, cvadp + sys 3, c7, c13, 1, \kaddr // dc cvadp + .else dc \op, \kaddr .endif .endif .endif + .endif add \kaddr, \kaddr, \tmp1 cmp \kaddr, \size b.lo 9998b -- cgit v1.2.3-59-g8ed1b From b9585f53bcf1ada1fbac22c15fd723c65ef67ff1 Mon Sep 17 00:00:00 2001 From: Andrew Murray Date: Tue, 9 Apr 2019 10:52:45 +0100 Subject: arm64: Advertise ARM64_HAS_DCPODP cpu feature Advertise ARM64_HAS_DCPODP when both DC CVAP and DC CVADP are supported. Even though we don't use this feature now, we provide it for consistency with DCPOP and anticipate it being used in the future. Signed-off-by: Andrew Murray Reviewed-by: Dave Martin Reviewed-by: Suzuki K Poulose Signed-off-by: Will Deacon --- arch/arm64/include/asm/cpucaps.h | 3 ++- arch/arm64/kernel/cpufeature.c | 10 ++++++++++ 2 files changed, 12 insertions(+), 1 deletion(-) diff --git a/arch/arm64/include/asm/cpucaps.h b/arch/arm64/include/asm/cpucaps.h index f6a76e43f39e..defdc67d9ab4 100644 --- a/arch/arm64/include/asm/cpucaps.h +++ b/arch/arm64/include/asm/cpucaps.h @@ -61,7 +61,8 @@ #define ARM64_HAS_GENERIC_AUTH_ARCH 40 #define ARM64_HAS_GENERIC_AUTH_IMP_DEF 41 #define ARM64_HAS_IRQ_PRIO_MASKING 42 +#define ARM64_HAS_DCPODP 43 -#define ARM64_NCAPS 43 +#define ARM64_NCAPS 44 #endif /* __ASM_CPUCAPS_H */ diff --git a/arch/arm64/kernel/cpufeature.c b/arch/arm64/kernel/cpufeature.c index f8b682a3a9f4..9d18e45311fd 100644 --- a/arch/arm64/kernel/cpufeature.c +++ b/arch/arm64/kernel/cpufeature.c @@ -1340,6 +1340,16 @@ static const struct arm64_cpu_capabilities arm64_features[] = { .field_pos = ID_AA64ISAR1_DPB_SHIFT, .min_field_value = 1, }, + { + .desc = "Data cache clean to Point of Deep Persistence", + .capability = ARM64_HAS_DCPODP, + .type = ARM64_CPUCAP_SYSTEM_FEATURE, + .matches = has_cpuid_feature, + .sys_reg = SYS_ID_AA64ISAR1_EL1, + .sign = FTR_UNSIGNED, + .field_pos = ID_AA64ISAR1_DPB_SHIFT, + .min_field_value = 2, + }, #endif #ifdef CONFIG_ARM64_SVE { -- cgit v1.2.3-59-g8ed1b From eea1bb2248691ed65c33daff94a253af44feb103 Mon Sep 17 00:00:00 2001 From: Miles Chen Date: Tue, 16 Apr 2019 01:36:36 +0800 Subject: arm64: mm: check virtual addr in virt_to_page() if CONFIG_DEBUG_VIRTUAL=y This change uses the original virt_to_page() (the one with __pa()) to check the given virtual address if CONFIG_DEBUG_VIRTUAL=y. Recently, I worked on a bug: a driver passes a symbol address to dma_map_single() and the virt_to_page() (called by dma_map_single()) does not work for non-linear addresses after commit 9f2875912dac ("arm64: mm: restrict virt_to_page() to the linear mapping"). I tried to trap the bug by enabling CONFIG_DEBUG_VIRTUAL but it did not work - bacause the commit removes the __pa() from virt_to_page() but CONFIG_DEBUG_VIRTUAL checks the virtual address in __pa()/__virt_to_phys(). A simple solution is to use the original virt_to_page() (the one with__pa()) if CONFIG_DEBUG_VIRTUAL=y. Cc: Ard Biesheuvel Acked-by: Catalin Marinas Signed-off-by: Miles Chen Signed-off-by: Will Deacon --- arch/arm64/include/asm/memory.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/arm64/include/asm/memory.h b/arch/arm64/include/asm/memory.h index 290195168bb3..2cb8248fa2c8 100644 --- a/arch/arm64/include/asm/memory.h +++ b/arch/arm64/include/asm/memory.h @@ -302,7 +302,7 @@ static inline void *phys_to_virt(phys_addr_t x) */ #define ARCH_PFN_OFFSET ((unsigned long)PHYS_PFN_OFFSET) -#ifndef CONFIG_SPARSEMEM_VMEMMAP +#if !defined(CONFIG_SPARSEMEM_VMEMMAP) || defined(CONFIG_DEBUG_VIRTUAL) #define virt_to_page(kaddr) pfn_to_page(__pa(kaddr) >> PAGE_SHIFT) #define _virt_addr_valid(kaddr) pfn_valid(__pa(kaddr) >> PAGE_SHIFT) #else -- cgit v1.2.3-59-g8ed1b From 131e135f7fd14b1de7a5eb26631076705c18073f Mon Sep 17 00:00:00 2001 From: Mark Rutland Date: Tue, 9 Apr 2019 12:13:13 +0100 Subject: arm64: instrument smp_{load_acquire,store_release} Our __smp_store_release() and __smp_load_acquire() macros use inline assembly, which is opaque to kasan. This means that kasan can't catch erroneous use of these. This patch adds kasan instrumentation to both. Cc: Catalin Marinas Signed-off-by: Mark Rutland [will: consistently use *p as argument to sizeof] Signed-off-by: Will Deacon --- arch/arm64/include/asm/barrier.h | 24 +++++++++++++++--------- 1 file changed, 15 insertions(+), 9 deletions(-) diff --git a/arch/arm64/include/asm/barrier.h b/arch/arm64/include/asm/barrier.h index f66bb04fdf2d..85b6bedbcc68 100644 --- a/arch/arm64/include/asm/barrier.h +++ b/arch/arm64/include/asm/barrier.h @@ -20,6 +20,8 @@ #ifndef __ASSEMBLY__ +#include + #define __nops(n) ".rept " #n "\nnop\n.endr\n" #define nops(n) asm volatile(__nops(n)) @@ -72,31 +74,33 @@ static inline unsigned long array_index_mask_nospec(unsigned long idx, #define __smp_store_release(p, v) \ do { \ + typeof(p) __p = (p); \ union { typeof(*p) __val; char __c[1]; } __u = \ - { .__val = (__force typeof(*p)) (v) }; \ + { .__val = (__force typeof(*p)) (v) }; \ compiletime_assert_atomic_type(*p); \ + kasan_check_write(__p, sizeof(*p)); \ switch (sizeof(*p)) { \ case 1: \ asm volatile ("stlrb %w1, %0" \ - : "=Q" (*p) \ + : "=Q" (*__p) \ : "r" (*(__u8 *)__u.__c) \ : "memory"); \ break; \ case 2: \ asm volatile ("stlrh %w1, %0" \ - : "=Q" (*p) \ + : "=Q" (*__p) \ : "r" (*(__u16 *)__u.__c) \ : "memory"); \ break; \ case 4: \ asm volatile ("stlr %w1, %0" \ - : "=Q" (*p) \ + : "=Q" (*__p) \ : "r" (*(__u32 *)__u.__c) \ : "memory"); \ break; \ case 8: \ asm volatile ("stlr %1, %0" \ - : "=Q" (*p) \ + : "=Q" (*__p) \ : "r" (*(__u64 *)__u.__c) \ : "memory"); \ break; \ @@ -106,27 +110,29 @@ do { \ #define __smp_load_acquire(p) \ ({ \ union { typeof(*p) __val; char __c[1]; } __u; \ + typeof(p) __p = (p); \ compiletime_assert_atomic_type(*p); \ + kasan_check_read(__p, sizeof(*p)); \ switch (sizeof(*p)) { \ case 1: \ asm volatile ("ldarb %w0, %1" \ : "=r" (*(__u8 *)__u.__c) \ - : "Q" (*p) : "memory"); \ + : "Q" (*__p) : "memory"); \ break; \ case 2: \ asm volatile ("ldarh %w0, %1" \ : "=r" (*(__u16 *)__u.__c) \ - : "Q" (*p) : "memory"); \ + : "Q" (*__p) : "memory"); \ break; \ case 4: \ asm volatile ("ldar %w0, %1" \ : "=r" (*(__u32 *)__u.__c) \ - : "Q" (*p) : "memory"); \ + : "Q" (*__p) : "memory"); \ break; \ case 8: \ asm volatile ("ldar %0, %1" \ : "=r" (*(__u64 *)__u.__c) \ - : "Q" (*p) : "memory"); \ + : "Q" (*__p) : "memory"); \ break; \ } \ __u.__val; \ -- cgit v1.2.3-59-g8ed1b From 22e6c8087e175bc5c507b4e45d1ca588b2bcc61c Mon Sep 17 00:00:00 2001 From: Nishad Kamdar Date: Tue, 16 Apr 2019 20:49:35 +0530 Subject: arm64: Use the correct style for SPDX License Identifier This patch corrects the SPDX License Identifier style in the arm64 Hardware Architecture related files. Suggested-by: Joe Perches Signed-off-by: Nishad Kamdar Signed-off-by: Will Deacon --- arch/arm64/boot/dts/mediatek/mt2712-pinfunc.h | 2 +- arch/arm64/include/asm/pointer_auth.h | 2 +- arch/arm64/include/asm/sdei.h | 2 +- arch/arm64/include/asm/vmap_stack.h | 2 +- 4 files changed, 4 insertions(+), 4 deletions(-) diff --git a/arch/arm64/boot/dts/mediatek/mt2712-pinfunc.h b/arch/arm64/boot/dts/mediatek/mt2712-pinfunc.h index 1b4cb0c55744..385c455a7c98 100644 --- a/arch/arm64/boot/dts/mediatek/mt2712-pinfunc.h +++ b/arch/arm64/boot/dts/mediatek/mt2712-pinfunc.h @@ -1,4 +1,4 @@ -// SPDX-License-Identifier: GPL-2.0 +/* SPDX-License-Identifier: GPL-2.0 */ /* * Copyright (C) 2018 MediaTek Inc. * Author: Zhiyong Tao diff --git a/arch/arm64/include/asm/pointer_auth.h b/arch/arm64/include/asm/pointer_auth.h index 15d49515efdd..d328540cb85e 100644 --- a/arch/arm64/include/asm/pointer_auth.h +++ b/arch/arm64/include/asm/pointer_auth.h @@ -1,4 +1,4 @@ -// SPDX-License-Identifier: GPL-2.0 +/* SPDX-License-Identifier: GPL-2.0 */ #ifndef __ASM_POINTER_AUTH_H #define __ASM_POINTER_AUTH_H diff --git a/arch/arm64/include/asm/sdei.h b/arch/arm64/include/asm/sdei.h index ffe47d766c25..63e0b92a5fbb 100644 --- a/arch/arm64/include/asm/sdei.h +++ b/arch/arm64/include/asm/sdei.h @@ -1,4 +1,4 @@ -// SPDX-License-Identifier: GPL-2.0 +/* SPDX-License-Identifier: GPL-2.0 */ // Copyright (C) 2017 Arm Ltd. #ifndef __ASM_SDEI_H #define __ASM_SDEI_H diff --git a/arch/arm64/include/asm/vmap_stack.h b/arch/arm64/include/asm/vmap_stack.h index 0b5ec6e08c10..0a12115d9638 100644 --- a/arch/arm64/include/asm/vmap_stack.h +++ b/arch/arm64/include/asm/vmap_stack.h @@ -1,4 +1,4 @@ -// SPDX-License-Identifier: GPL-2.0 +/* SPDX-License-Identifier: GPL-2.0 */ // Copyright (C) 2017 Arm Ltd. #ifndef __ASM_VMAP_STACK_H #define __ASM_VMAP_STACK_H -- cgit v1.2.3-59-g8ed1b From 81fb8736dd81da3fe94f28968dac60f392ec6746 Mon Sep 17 00:00:00 2001 From: Vincenzo Frascino Date: Tue, 16 Apr 2019 17:14:30 +0100 Subject: arm64: vdso: Fix clock_getres() for CLOCK_REALTIME clock_getres() in the vDSO library has to preserve the same behaviour of posix_get_hrtimer_res(). In particular, posix_get_hrtimer_res() does: sec = 0; ns = hrtimer_resolution; where 'hrtimer_resolution' depends on whether or not high resolution timers are enabled, which is a runtime decision. The vDSO incorrectly returns the constant CLOCK_REALTIME_RES. Fix this by exposing 'hrtimer_resolution' in the vDSO datapage and returning that instead. Reviewed-by: Catalin Marinas Signed-off-by: Vincenzo Frascino [will: Use WRITE_ONCE(), move adr off COARSE path, renumber labels, use 'w' reg] Signed-off-by: Will Deacon --- arch/arm64/include/asm/vdso_datapage.h | 1 + arch/arm64/kernel/asm-offsets.c | 2 +- arch/arm64/kernel/vdso.c | 3 +++ arch/arm64/kernel/vdso/gettimeofday.S | 7 +++---- 4 files changed, 8 insertions(+), 5 deletions(-) diff --git a/arch/arm64/include/asm/vdso_datapage.h b/arch/arm64/include/asm/vdso_datapage.h index 2b9a63771eda..f89263c8e11a 100644 --- a/arch/arm64/include/asm/vdso_datapage.h +++ b/arch/arm64/include/asm/vdso_datapage.h @@ -38,6 +38,7 @@ struct vdso_data { __u32 tz_minuteswest; /* Whacky timezone stuff */ __u32 tz_dsttime; __u32 use_syscall; + __u32 hrtimer_res; }; #endif /* !__ASSEMBLY__ */ diff --git a/arch/arm64/kernel/asm-offsets.c b/arch/arm64/kernel/asm-offsets.c index 7f40dcbdd51d..e10e2a5d9ddc 100644 --- a/arch/arm64/kernel/asm-offsets.c +++ b/arch/arm64/kernel/asm-offsets.c @@ -94,7 +94,7 @@ int main(void) DEFINE(CLOCK_REALTIME, CLOCK_REALTIME); DEFINE(CLOCK_MONOTONIC, CLOCK_MONOTONIC); DEFINE(CLOCK_MONOTONIC_RAW, CLOCK_MONOTONIC_RAW); - DEFINE(CLOCK_REALTIME_RES, MONOTONIC_RES_NSEC); + DEFINE(CLOCK_REALTIME_RES, offsetof(struct vdso_data, hrtimer_res)); DEFINE(CLOCK_REALTIME_COARSE, CLOCK_REALTIME_COARSE); DEFINE(CLOCK_MONOTONIC_COARSE,CLOCK_MONOTONIC_COARSE); DEFINE(CLOCK_COARSE_RES, LOW_RES_NSEC); diff --git a/arch/arm64/kernel/vdso.c b/arch/arm64/kernel/vdso.c index 9fb0c0c95a85..42b7082029e1 100644 --- a/arch/arm64/kernel/vdso.c +++ b/arch/arm64/kernel/vdso.c @@ -230,6 +230,9 @@ void update_vsyscall(struct timekeeper *tk) vdso_data->wtm_clock_sec = tk->wall_to_monotonic.tv_sec; vdso_data->wtm_clock_nsec = tk->wall_to_monotonic.tv_nsec; + /* Read without the seqlock held by clock_getres() */ + WRITE_ONCE(vdso_data->hrtimer_res, hrtimer_resolution); + if (!use_syscall) { /* tkr_mono.cycle_last == tkr_raw.cycle_last */ vdso_data->cs_cycle_last = tk->tkr_mono.cycle_last; diff --git a/arch/arm64/kernel/vdso/gettimeofday.S b/arch/arm64/kernel/vdso/gettimeofday.S index c39872a7b03c..21805e416483 100644 --- a/arch/arm64/kernel/vdso/gettimeofday.S +++ b/arch/arm64/kernel/vdso/gettimeofday.S @@ -301,13 +301,14 @@ ENTRY(__kernel_clock_getres) ccmp w0, #CLOCK_MONOTONIC_RAW, #0x4, ne b.ne 1f - ldr x2, 5f + adr vdso_data, _vdso_data + ldr w2, [vdso_data, #CLOCK_REALTIME_RES] b 2f 1: cmp w0, #CLOCK_REALTIME_COARSE ccmp w0, #CLOCK_MONOTONIC_COARSE, #0x4, ne b.ne 4f - ldr x2, 6f + ldr x2, 5f 2: cbz x1, 3f stp xzr, x2, [x1] @@ -321,8 +322,6 @@ ENTRY(__kernel_clock_getres) svc #0 ret 5: - .quad CLOCK_REALTIME_RES -6: .quad CLOCK_COARSE_RES .cfi_endproc ENDPROC(__kernel_clock_getres) -- cgit v1.2.3-59-g8ed1b From 36a2ba07757df790b4a874efb1a105b9330a9ae7 Mon Sep 17 00:00:00 2001 From: Kefeng Wang Date: Mon, 8 Apr 2019 23:21:12 +0800 Subject: ACPI/IORT: Reject platform device creation on NUMA node mapping failure In a system where, through IORT firmware mappings, the SMMU device is mapped to a NUMA node that is not online, the kernel bootstrap results in the following crash: Unable to handle kernel paging request at virtual address 0000000000001388 Mem abort info: ESR = 0x96000004 Exception class = DABT (current EL), IL = 32 bits SET = 0, FnV = 0 EA = 0, S1PTW = 0 Data abort info: ISV = 0, ISS = 0x00000004 CM = 0, WnR = 0 [0000000000001388] user address but active_mm is swapper Internal error: Oops: 96000004 [#1] SMP Modules linked in: CPU: 5 PID: 1 Comm: swapper/0 Not tainted 5.0.0 #15 pstate: 80c00009 (Nzcv daif +PAN +UAO) pc : __alloc_pages_nodemask+0x13c/0x1068 lr : __alloc_pages_nodemask+0xdc/0x1068 ... Process swapper/0 (pid: 1, stack limit = 0x(____ptrval____)) Call trace: __alloc_pages_nodemask+0x13c/0x1068 new_slab+0xec/0x570 ___slab_alloc+0x3e0/0x4f8 __slab_alloc+0x60/0x80 __kmalloc_node_track_caller+0x10c/0x478 devm_kmalloc+0x44/0xb0 pinctrl_bind_pins+0x4c/0x188 really_probe+0x78/0x2b8 driver_probe_device+0x64/0x110 device_driver_attach+0x74/0x98 __driver_attach+0x9c/0xe8 bus_for_each_dev+0x84/0xd8 driver_attach+0x30/0x40 bus_add_driver+0x170/0x218 driver_register+0x64/0x118 __platform_driver_register+0x54/0x60 arm_smmu_driver_init+0x24/0x2c do_one_initcall+0xbc/0x328 kernel_init_freeable+0x304/0x3ac kernel_init+0x18/0x110 ret_from_fork+0x10/0x1c Code: f90013b5 b9410fa1 1a9f0694 b50014c2 (b9400804) ---[ end trace dfeaed4c373a32da ]-- Change the dev_set_proximity() hook prototype so that it returns a value and make it return failure if the PXM->NUMA-node mapping corresponds to an offline node, fixing the crash. Acked-by: Lorenzo Pieralisi Signed-off-by: Kefeng Wang Link: https://lore.kernel.org/linux-arm-kernel/20190315021940.86905-1-wangkefeng.wang@huawei.com/ Signed-off-by: Will Deacon --- drivers/acpi/arm64/iort.c | 19 ++++++++++++++----- 1 file changed, 14 insertions(+), 5 deletions(-) diff --git a/drivers/acpi/arm64/iort.c b/drivers/acpi/arm64/iort.c index e48894e002ba..a46c2c162c03 100644 --- a/drivers/acpi/arm64/iort.c +++ b/drivers/acpi/arm64/iort.c @@ -1232,18 +1232,24 @@ static bool __init arm_smmu_v3_is_coherent(struct acpi_iort_node *node) /* * set numa proximity domain for smmuv3 device */ -static void __init arm_smmu_v3_set_proximity(struct device *dev, +static int __init arm_smmu_v3_set_proximity(struct device *dev, struct acpi_iort_node *node) { struct acpi_iort_smmu_v3 *smmu; smmu = (struct acpi_iort_smmu_v3 *)node->node_data; if (smmu->flags & ACPI_IORT_SMMU_V3_PXM_VALID) { - set_dev_node(dev, acpi_map_pxm_to_node(smmu->pxm)); + int node = acpi_map_pxm_to_node(smmu->pxm); + + if (node != NUMA_NO_NODE && !node_online(node)) + return -EINVAL; + + set_dev_node(dev, node); pr_info("SMMU-v3[%llx] Mapped to Proximity domain %d\n", smmu->base_address, smmu->pxm); } + return 0; } #else #define arm_smmu_v3_set_proximity NULL @@ -1318,7 +1324,7 @@ struct iort_dev_config { int (*dev_count_resources)(struct acpi_iort_node *node); void (*dev_init_resources)(struct resource *res, struct acpi_iort_node *node); - void (*dev_set_proximity)(struct device *dev, + int (*dev_set_proximity)(struct device *dev, struct acpi_iort_node *node); }; @@ -1369,8 +1375,11 @@ static int __init iort_add_platform_device(struct acpi_iort_node *node, if (!pdev) return -ENOMEM; - if (ops->dev_set_proximity) - ops->dev_set_proximity(&pdev->dev, node); + if (ops->dev_set_proximity) { + ret = ops->dev_set_proximity(&pdev->dev, node); + if (ret) + goto dev_put; + } count = ops->dev_count_resources(node); -- cgit v1.2.3-59-g8ed1b From 0d2e2a82d4de298d006bf8eddc86829e3c7da820 Mon Sep 17 00:00:00 2001 From: Robin Murphy Date: Tue, 16 Apr 2019 16:24:24 +0100 Subject: perf/arm-cci: Remove broken race mitigation Uncore PMU drivers face an awkward cyclic dependency wherein: - They have to pick a valid online CPU to associate with before registering the PMU device, since it will get exposed to userspace immediately. - The PMU registration has to be be at least partly complete before hotplug events can be handled, since trying to migrate an uninitialised context would be bad. - The hotplug handler has to be ready as soon as a CPU is chosen, lest it go offline without the user-visible cpumask value getting updated. The arm-cci driver has tried to solve this by using get_cpu() to pick the current CPU and prevent it from disappearing while both registrations are performed, but that results in taking mutexes with preemption disabled, which makes certain configurations very unhappy: [ 1.983337] BUG: sleeping function called from invalid context at kernel/locking/rtmutex.c:2004 [ 1.983340] in_atomic(): 1, irqs_disabled(): 0, pid: 1, name: swapper/0 [ 1.983342] Preemption disabled at: [ 1.983353] [] cci_pmu_probe+0x1dc/0x488 [ 1.983360] CPU: 0 PID: 1 Comm: swapper/0 Not tainted 4.18.20-rt8-yocto-preempt-rt #1 [ 1.983362] Hardware name: ZynqMP ZCU102 Rev1.0 (DT) [ 1.983364] Call trace: [ 1.983369] dump_backtrace+0x0/0x158 [ 1.983372] show_stack+0x24/0x30 [ 1.983378] dump_stack+0x80/0xa4 [ 1.983383] ___might_sleep+0x138/0x160 [ 1.983386] __might_sleep+0x58/0x90 [ 1.983391] __rt_mutex_lock_state+0x30/0xc0 [ 1.983395] _mutex_lock+0x24/0x30 [ 1.983400] perf_pmu_register+0x2c/0x388 [ 1.983404] cci_pmu_probe+0x2bc/0x488 [ 1.983409] platform_drv_probe+0x58/0xa8 It is not feasible to resolve all the possible races outside of the perf core itself, so address the immediate bug by following the example of nearly every other PMU driver and not even trying to do so. Registering the hotplug notifier first should minimise the window in which things can go wrong, so that's about as much as we can reasonably do here. This also revealed an additional race in assigning the global pointer too late relative to the hotplug notifier, which gets fixed in the process. Reported-by: Li, Meng Tested-by: Corentin Labbe Reviewed-by: Suzuki K Poulose Signed-off-by: Robin Murphy Signed-off-by: Will Deacon --- drivers/perf/arm-cci.c | 21 ++++++++++++--------- 1 file changed, 12 insertions(+), 9 deletions(-) diff --git a/drivers/perf/arm-cci.c b/drivers/perf/arm-cci.c index bfd03e023308..8f8606b9bc9e 100644 --- a/drivers/perf/arm-cci.c +++ b/drivers/perf/arm-cci.c @@ -1684,21 +1684,24 @@ static int cci_pmu_probe(struct platform_device *pdev) raw_spin_lock_init(&cci_pmu->hw_events.pmu_lock); mutex_init(&cci_pmu->reserve_mutex); atomic_set(&cci_pmu->active_events, 0); - cci_pmu->cpu = get_cpu(); - - ret = cci_pmu_init(cci_pmu, pdev); - if (ret) { - put_cpu(); - return ret; - } + cci_pmu->cpu = raw_smp_processor_id(); + g_cci_pmu = cci_pmu; cpuhp_setup_state_nocalls(CPUHP_AP_PERF_ARM_CCI_ONLINE, "perf/arm/cci:online", NULL, cci_pmu_offline_cpu); - put_cpu(); - g_cci_pmu = cci_pmu; + + ret = cci_pmu_init(cci_pmu, pdev); + if (ret) + goto error_pmu_init; + pr_info("ARM %s PMU driver probed", cci_pmu->model->name); return 0; + +error_pmu_init: + cpuhp_remove_state(CPUHP_AP_PERF_ARM_CCI_ONLINE); + g_cci_pmu = NULL; + return ret; } static int cci_pmu_remove(struct platform_device *pdev) -- cgit v1.2.3-59-g8ed1b From 9bcb929f969e4054732158908b1d70e787ef780f Mon Sep 17 00:00:00 2001 From: Robin Murphy Date: Tue, 16 Apr 2019 16:24:25 +0100 Subject: perf/arm-ccn: Clean up CPU hotplug handling Like arm-cci, arm-ccn has the same issue of disabling preemption around operations which can take mutexes. Again, remove the definite bug by simply not trying to fight the theoretical races. And since we are touching the hotplug handling code, take the opportunity to streamline it, as there's really no need to store a full-sized cpumask to keep track of a single CPU ID. Reviewed-by: Suzuki K Poulose Signed-off-by: Robin Murphy Signed-off-by: Will Deacon --- drivers/perf/arm-ccn.c | 25 +++++++++++++------------ 1 file changed, 13 insertions(+), 12 deletions(-) diff --git a/drivers/perf/arm-ccn.c b/drivers/perf/arm-ccn.c index 2ae76026e947..0bb52d9bdcf7 100644 --- a/drivers/perf/arm-ccn.c +++ b/drivers/perf/arm-ccn.c @@ -167,7 +167,7 @@ struct arm_ccn_dt { struct hrtimer hrtimer; - cpumask_t cpu; + unsigned int cpu; struct hlist_node node; struct pmu pmu; @@ -559,7 +559,7 @@ static ssize_t arm_ccn_pmu_cpumask_show(struct device *dev, { struct arm_ccn *ccn = pmu_to_arm_ccn(dev_get_drvdata(dev)); - return cpumap_print_to_pagebuf(true, buf, &ccn->dt.cpu); + return cpumap_print_to_pagebuf(true, buf, cpumask_of(ccn->dt.cpu)); } static struct device_attribute arm_ccn_pmu_cpumask_attr = @@ -759,7 +759,7 @@ static int arm_ccn_pmu_event_init(struct perf_event *event) * mitigate this, we enforce CPU assignment to one, selected * processor (the one described in the "cpumask" attribute). */ - event->cpu = cpumask_first(&ccn->dt.cpu); + event->cpu = ccn->dt.cpu; node_xp = CCN_CONFIG_NODE(event->attr.config); type = CCN_CONFIG_TYPE(event->attr.config); @@ -1215,15 +1215,15 @@ static int arm_ccn_pmu_offline_cpu(unsigned int cpu, struct hlist_node *node) struct arm_ccn *ccn = container_of(dt, struct arm_ccn, dt); unsigned int target; - if (!cpumask_test_and_clear_cpu(cpu, &dt->cpu)) + if (cpu != dt->cpu) return 0; target = cpumask_any_but(cpu_online_mask, cpu); if (target >= nr_cpu_ids) return 0; perf_pmu_migrate_context(&dt->pmu, cpu, target); - cpumask_set_cpu(target, &dt->cpu); + dt->cpu = target; if (ccn->irq) - WARN_ON(irq_set_affinity_hint(ccn->irq, &dt->cpu) != 0); + WARN_ON(irq_set_affinity_hint(ccn->irq, cpumask_of(dt->cpu))); return 0; } @@ -1299,29 +1299,30 @@ static int arm_ccn_pmu_init(struct arm_ccn *ccn) } /* Pick one CPU which we will use to collect data from CCN... */ - cpumask_set_cpu(get_cpu(), &ccn->dt.cpu); + ccn->dt.cpu = raw_smp_processor_id(); /* Also make sure that the overflow interrupt is handled by this CPU */ if (ccn->irq) { - err = irq_set_affinity_hint(ccn->irq, &ccn->dt.cpu); + err = irq_set_affinity_hint(ccn->irq, cpumask_of(ccn->dt.cpu)); if (err) { dev_err(ccn->dev, "Failed to set interrupt affinity!\n"); goto error_set_affinity; } } + cpuhp_state_add_instance_nocalls(CPUHP_AP_PERF_ARM_CCN_ONLINE, + &ccn->dt.node); + err = perf_pmu_register(&ccn->dt.pmu, name, -1); if (err) goto error_pmu_register; - cpuhp_state_add_instance_nocalls(CPUHP_AP_PERF_ARM_CCN_ONLINE, - &ccn->dt.node); - put_cpu(); return 0; error_pmu_register: + cpuhp_state_remove_instance_nocalls(CPUHP_AP_PERF_ARM_CCN_ONLINE, + &ccn->dt.node); error_set_affinity: - put_cpu(); error_choose_name: ida_simple_remove(&arm_ccn_pmu_ida, ccn->dt.id); for (i = 0; i < ccn->num_xps; i++) -- cgit v1.2.3-59-g8ed1b From 0d747f6585954d7285a3995058e7dfeae7236cf9 Mon Sep 17 00:00:00 2001 From: Vincenzo Frascino Date: Mon, 15 Apr 2019 10:49:34 +0100 Subject: arm64: compat: Alloc separate pages for vectors and sigpage For AArch32 tasks, we install a special "[vectors]" page that contains the sigreturn trampolines and kuser helpers, which is mapped at a fixed address specified by the kuser helpers ABI. Having the sigreturn trampolines in the same page as the kuser helpers makes it impossible to disable the kuser helpers independently. Follow the Arm implementation, by moving the signal trampolines out of the "[vectors]" page and into their own "[sigpage]". Cc: Catalin Marinas Cc: Will Deacon Signed-off-by: Vincenzo Frascino Reviewed-by: Catalin Marinas [will: tweaked comments and fixed sparse warning] Signed-off-by: Will Deacon --- arch/arm64/include/asm/elf.h | 6 +- arch/arm64/include/asm/signal32.h | 2 - arch/arm64/kernel/signal32.c | 3 +- arch/arm64/kernel/vdso.c | 116 +++++++++++++++++++++++++++++--------- 4 files changed, 93 insertions(+), 34 deletions(-) diff --git a/arch/arm64/include/asm/elf.h b/arch/arm64/include/asm/elf.h index 6adc1a90e7e6..355d120b78cb 100644 --- a/arch/arm64/include/asm/elf.h +++ b/arch/arm64/include/asm/elf.h @@ -214,10 +214,10 @@ typedef compat_elf_greg_t compat_elf_gregset_t[COMPAT_ELF_NGREG]; set_thread_flag(TIF_32BIT); \ }) #define COMPAT_ARCH_DLINFO -extern int aarch32_setup_vectors_page(struct linux_binprm *bprm, - int uses_interp); +extern int aarch32_setup_additional_pages(struct linux_binprm *bprm, + int uses_interp); #define compat_arch_setup_additional_pages \ - aarch32_setup_vectors_page + aarch32_setup_additional_pages #endif /* CONFIG_COMPAT */ diff --git a/arch/arm64/include/asm/signal32.h b/arch/arm64/include/asm/signal32.h index 81abea0b7650..58e288aaf0ba 100644 --- a/arch/arm64/include/asm/signal32.h +++ b/arch/arm64/include/asm/signal32.h @@ -20,8 +20,6 @@ #ifdef CONFIG_COMPAT #include -#define AARCH32_KERN_SIGRET_CODE_OFFSET 0x500 - int compat_setup_frame(int usig, struct ksignal *ksig, sigset_t *set, struct pt_regs *regs); int compat_setup_rt_frame(int usig, struct ksignal *ksig, sigset_t *set, diff --git a/arch/arm64/kernel/signal32.c b/arch/arm64/kernel/signal32.c index cb7800acd19f..caea6e25db2a 100644 --- a/arch/arm64/kernel/signal32.c +++ b/arch/arm64/kernel/signal32.c @@ -403,8 +403,7 @@ static void compat_setup_return(struct pt_regs *regs, struct k_sigaction *ka, if (ka->sa.sa_flags & SA_SIGINFO) idx += 3; - retcode = AARCH32_VECTORS_BASE + - AARCH32_KERN_SIGRET_CODE_OFFSET + + retcode = (unsigned long)current->mm->context.vdso + (idx << 2) + thumb; } diff --git a/arch/arm64/kernel/vdso.c b/arch/arm64/kernel/vdso.c index 42b7082029e1..6bb7038dc96c 100644 --- a/arch/arm64/kernel/vdso.c +++ b/arch/arm64/kernel/vdso.c @@ -1,5 +1,5 @@ /* - * VDSO implementation for AArch64 and vector page setup for AArch32. + * VDSO implementations. * * Copyright (C) 2012 ARM Limited * @@ -53,61 +53,123 @@ struct vdso_data *vdso_data = &vdso_data_store.data; /* * Create and map the vectors page for AArch32 tasks. */ -static struct page *vectors_page[1] __ro_after_init; +#define C_VECTORS 0 +#define C_SIGPAGE 1 +#define C_PAGES (C_SIGPAGE + 1) +static struct page *aarch32_vdso_pages[C_PAGES] __ro_after_init; +static const struct vm_special_mapping aarch32_vdso_spec[C_PAGES] = { + { + .name = "[vectors]", /* ABI */ + .pages = &aarch32_vdso_pages[C_VECTORS], + }, + { + .name = "[sigpage]", /* ABI */ + .pages = &aarch32_vdso_pages[C_SIGPAGE], + }, +}; -static int __init alloc_vectors_page(void) +static int __init aarch32_alloc_vdso_pages(void) { extern char __kuser_helper_start[], __kuser_helper_end[]; extern char __aarch32_sigret_code_start[], __aarch32_sigret_code_end[]; int kuser_sz = __kuser_helper_end - __kuser_helper_start; int sigret_sz = __aarch32_sigret_code_end - __aarch32_sigret_code_start; - unsigned long vpage; + unsigned long vdso_pages[C_PAGES]; - vpage = get_zeroed_page(GFP_ATOMIC); + vdso_pages[C_VECTORS] = get_zeroed_page(GFP_ATOMIC); + if (!vdso_pages[C_VECTORS]) + return -ENOMEM; - if (!vpage) + vdso_pages[C_SIGPAGE] = get_zeroed_page(GFP_ATOMIC); + if (!vdso_pages[C_SIGPAGE]) { + free_page(vdso_pages[C_VECTORS]); return -ENOMEM; + } /* kuser helpers */ - memcpy((void *)vpage + 0x1000 - kuser_sz, __kuser_helper_start, - kuser_sz); + memcpy((void *)(vdso_pages[C_VECTORS] + 0x1000 - kuser_sz), + __kuser_helper_start, + kuser_sz); /* sigreturn code */ - memcpy((void *)vpage + AARCH32_KERN_SIGRET_CODE_OFFSET, - __aarch32_sigret_code_start, sigret_sz); + memcpy((void *)vdso_pages[C_SIGPAGE], __aarch32_sigret_code_start, + sigret_sz); + + flush_icache_range(vdso_pages[C_VECTORS], + vdso_pages[C_VECTORS] + PAGE_SIZE); + flush_icache_range(vdso_pages[C_SIGPAGE], + vdso_pages[C_SIGPAGE] + PAGE_SIZE); - flush_icache_range(vpage, vpage + PAGE_SIZE); - vectors_page[0] = virt_to_page(vpage); + aarch32_vdso_pages[C_VECTORS] = virt_to_page(vdso_pages[C_VECTORS]); + aarch32_vdso_pages[C_SIGPAGE] = virt_to_page(vdso_pages[C_SIGPAGE]); return 0; } -arch_initcall(alloc_vectors_page); +arch_initcall(aarch32_alloc_vdso_pages); -int aarch32_setup_vectors_page(struct linux_binprm *bprm, int uses_interp) +static int aarch32_kuser_helpers_setup(struct mm_struct *mm) { - struct mm_struct *mm = current->mm; - unsigned long addr = AARCH32_VECTORS_BASE; - static const struct vm_special_mapping spec = { - .name = "[vectors]", - .pages = vectors_page, + void *ret; + + /* + * Avoid VM_MAYWRITE for compatibility with arch/arm/, where it's + * not safe to CoW the page containing the CPU exception vectors. + */ + ret = _install_special_mapping(mm, AARCH32_VECTORS_BASE, PAGE_SIZE, + VM_READ | VM_EXEC | + VM_MAYREAD | VM_MAYEXEC, + &aarch32_vdso_spec[C_VECTORS]); + + return PTR_ERR_OR_ZERO(ret); +} - }; +static int aarch32_sigreturn_setup(struct mm_struct *mm) +{ + unsigned long addr; void *ret; - if (down_write_killable(&mm->mmap_sem)) - return -EINTR; - current->mm->context.vdso = (void *)addr; + addr = get_unmapped_area(NULL, 0, PAGE_SIZE, 0, 0); + if (IS_ERR_VALUE(addr)) { + ret = ERR_PTR(addr); + goto out; + } - /* Map vectors page at the high address. */ + /* + * VM_MAYWRITE is required to allow gdb to Copy-on-Write and + * set breakpoints. + */ ret = _install_special_mapping(mm, addr, PAGE_SIZE, - VM_READ|VM_EXEC|VM_MAYREAD|VM_MAYEXEC, - &spec); + VM_READ | VM_EXEC | VM_MAYREAD | + VM_MAYWRITE | VM_MAYEXEC, + &aarch32_vdso_spec[C_SIGPAGE]); + if (IS_ERR(ret)) + goto out; - up_write(&mm->mmap_sem); + mm->context.vdso = (void *)addr; +out: return PTR_ERR_OR_ZERO(ret); } + +int aarch32_setup_additional_pages(struct linux_binprm *bprm, int uses_interp) +{ + struct mm_struct *mm = current->mm; + int ret; + + if (down_write_killable(&mm->mmap_sem)) + return -EINTR; + + ret = aarch32_kuser_helpers_setup(mm); + if (ret) + goto out; + + ret = aarch32_sigreturn_setup(mm); + +out: + up_write(&mm->mmap_sem); + return ret; +} #endif /* CONFIG_COMPAT */ static int vdso_mremap(const struct vm_special_mapping *sm, -- cgit v1.2.3-59-g8ed1b From d1e5ca64d5bab864000566ea695617d0e124d27e Mon Sep 17 00:00:00 2001 From: Vincenzo Frascino Date: Mon, 15 Apr 2019 10:49:35 +0100 Subject: arm64: compat: Split kuser32 To make it possible to disable kuser helpers in aarch32 we need to divide the kuser and the sigreturn functionalities. Split the current version of kuser32 in kuser32 (for kuser helpers) and sigreturn32 (for sigreturn helpers). Cc: Catalin Marinas Cc: Will Deacon Signed-off-by: Vincenzo Frascino Reviewed-by: Catalin Marinas Signed-off-by: Will Deacon --- arch/arm64/kernel/Makefile | 2 +- arch/arm64/kernel/kuser32.S | 59 +++-------------------------------------- arch/arm64/kernel/sigreturn32.S | 46 ++++++++++++++++++++++++++++++++ 3 files changed, 50 insertions(+), 57 deletions(-) create mode 100644 arch/arm64/kernel/sigreturn32.S diff --git a/arch/arm64/kernel/Makefile b/arch/arm64/kernel/Makefile index a30bbecb23d8..c18c34d56029 100644 --- a/arch/arm64/kernel/Makefile +++ b/arch/arm64/kernel/Makefile @@ -28,7 +28,7 @@ $(obj)/%.stub.o: $(obj)/%.o FORCE $(call if_changed,objcopy) obj-$(CONFIG_COMPAT) += sys32.o kuser32.o signal32.o \ - sys_compat.o + sigreturn32.o sys_compat.o obj-$(CONFIG_FUNCTION_TRACER) += ftrace.o entry-ftrace.o obj-$(CONFIG_MODULES) += module.o obj-$(CONFIG_ARM64_MODULE_PLTS) += module-plts.o diff --git a/arch/arm64/kernel/kuser32.S b/arch/arm64/kernel/kuser32.S index 997e6b27ff6a..c5f2bbafd723 100644 --- a/arch/arm64/kernel/kuser32.S +++ b/arch/arm64/kernel/kuser32.S @@ -1,24 +1,10 @@ +/* SPDX-License-Identifier: GPL-2.0 */ /* - * Low-level user helpers placed in the vectors page for AArch32. + * AArch32 user helpers. * Based on the kuser helpers in arch/arm/kernel/entry-armv.S. * * Copyright (C) 2005-2011 Nicolas Pitre - * Copyright (C) 2012 ARM Ltd. - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License version 2 as - * published by the Free Software Foundation. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program. If not, see . - * - * - * AArch32 user helpers. + * Copyright (C) 2012-2018 ARM Ltd. * * Each segment is 32-byte aligned and will be moved to the top of the high * vector page. New segments (if ever needed) must be added in front of @@ -77,42 +63,3 @@ __kuser_helper_version: // 0xffff0ffc .word ((__kuser_helper_end - __kuser_helper_start) >> 5) .globl __kuser_helper_end __kuser_helper_end: - -/* - * AArch32 sigreturn code - * - * For ARM syscalls, the syscall number has to be loaded into r7. - * We do not support an OABI userspace. - * - * For Thumb syscalls, we also pass the syscall number via r7. We therefore - * need two 16-bit instructions. - */ - .globl __aarch32_sigret_code_start -__aarch32_sigret_code_start: - - /* - * ARM Code - */ - .byte __NR_compat_sigreturn, 0x70, 0xa0, 0xe3 // mov r7, #__NR_compat_sigreturn - .byte __NR_compat_sigreturn, 0x00, 0x00, 0xef // svc #__NR_compat_sigreturn - - /* - * Thumb code - */ - .byte __NR_compat_sigreturn, 0x27 // svc #__NR_compat_sigreturn - .byte __NR_compat_sigreturn, 0xdf // mov r7, #__NR_compat_sigreturn - - /* - * ARM code - */ - .byte __NR_compat_rt_sigreturn, 0x70, 0xa0, 0xe3 // mov r7, #__NR_compat_rt_sigreturn - .byte __NR_compat_rt_sigreturn, 0x00, 0x00, 0xef // svc #__NR_compat_rt_sigreturn - - /* - * Thumb code - */ - .byte __NR_compat_rt_sigreturn, 0x27 // svc #__NR_compat_rt_sigreturn - .byte __NR_compat_rt_sigreturn, 0xdf // mov r7, #__NR_compat_rt_sigreturn - - .globl __aarch32_sigret_code_end -__aarch32_sigret_code_end: diff --git a/arch/arm64/kernel/sigreturn32.S b/arch/arm64/kernel/sigreturn32.S new file mode 100644 index 000000000000..475d30d471ac --- /dev/null +++ b/arch/arm64/kernel/sigreturn32.S @@ -0,0 +1,46 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * AArch32 sigreturn code. + * Based on the kuser helpers in arch/arm/kernel/entry-armv.S. + * + * Copyright (C) 2005-2011 Nicolas Pitre + * Copyright (C) 2012-2018 ARM Ltd. + * + * For ARM syscalls, the syscall number has to be loaded into r7. + * We do not support an OABI userspace. + * + * For Thumb syscalls, we also pass the syscall number via r7. We therefore + * need two 16-bit instructions. + */ + +#include + + .globl __aarch32_sigret_code_start +__aarch32_sigret_code_start: + + /* + * ARM Code + */ + .byte __NR_compat_sigreturn, 0x70, 0xa0, 0xe3 // mov r7, #__NR_compat_sigreturn + .byte __NR_compat_sigreturn, 0x00, 0x00, 0xef // svc #__NR_compat_sigreturn + + /* + * Thumb code + */ + .byte __NR_compat_sigreturn, 0x27 // svc #__NR_compat_sigreturn + .byte __NR_compat_sigreturn, 0xdf // mov r7, #__NR_compat_sigreturn + + /* + * ARM code + */ + .byte __NR_compat_rt_sigreturn, 0x70, 0xa0, 0xe3 // mov r7, #__NR_compat_rt_sigreturn + .byte __NR_compat_rt_sigreturn, 0x00, 0x00, 0xef // svc #__NR_compat_rt_sigreturn + + /* + * Thumb code + */ + .byte __NR_compat_rt_sigreturn, 0x27 // svc #__NR_compat_rt_sigreturn + .byte __NR_compat_rt_sigreturn, 0xdf // mov r7, #__NR_compat_rt_sigreturn + + .globl __aarch32_sigret_code_end +__aarch32_sigret_code_end: -- cgit v1.2.3-59-g8ed1b From 1255a7341bee6cd96df056ed0c13e44b59023687 Mon Sep 17 00:00:00 2001 From: Vincenzo Frascino Date: Mon, 15 Apr 2019 10:49:36 +0100 Subject: arm64: compat: Refactor aarch32_alloc_vdso_pages() aarch32_alloc_vdso_pages() needs to be refactored to make it easier to disable kuser helpers. Divide the function in aarch32_alloc_kuser_vdso_page() and aarch32_alloc_sigreturn_vdso_page(). Cc: Catalin Marinas Cc: Will Deacon Signed-off-by: Vincenzo Frascino Reviewed-by: Catalin Marinas [will: Inlined sigpage allocation to simplify error paths] Signed-off-by: Will Deacon --- arch/arm64/kernel/vdso.c | 52 ++++++++++++++++++++++++------------------------ 1 file changed, 26 insertions(+), 26 deletions(-) diff --git a/arch/arm64/kernel/vdso.c b/arch/arm64/kernel/vdso.c index 6bb7038dc96c..41f4d75bbc14 100644 --- a/arch/arm64/kernel/vdso.c +++ b/arch/arm64/kernel/vdso.c @@ -68,43 +68,43 @@ static const struct vm_special_mapping aarch32_vdso_spec[C_PAGES] = { }, }; -static int __init aarch32_alloc_vdso_pages(void) +static int aarch32_alloc_kuser_vdso_page(void) { extern char __kuser_helper_start[], __kuser_helper_end[]; - extern char __aarch32_sigret_code_start[], __aarch32_sigret_code_end[]; - int kuser_sz = __kuser_helper_end - __kuser_helper_start; - int sigret_sz = __aarch32_sigret_code_end - __aarch32_sigret_code_start; - unsigned long vdso_pages[C_PAGES]; - - vdso_pages[C_VECTORS] = get_zeroed_page(GFP_ATOMIC); - if (!vdso_pages[C_VECTORS]) - return -ENOMEM; + unsigned long vdso_page; - vdso_pages[C_SIGPAGE] = get_zeroed_page(GFP_ATOMIC); - if (!vdso_pages[C_SIGPAGE]) { - free_page(vdso_pages[C_VECTORS]); + vdso_page = get_zeroed_page(GFP_ATOMIC); + if (!vdso_page) return -ENOMEM; - } - /* kuser helpers */ - memcpy((void *)(vdso_pages[C_VECTORS] + 0x1000 - kuser_sz), - __kuser_helper_start, + memcpy((void *)(vdso_page + 0x1000 - kuser_sz), __kuser_helper_start, kuser_sz); + aarch32_vdso_pages[C_VECTORS] = virt_to_page(vdso_page); + flush_dcache_page(aarch32_vdso_pages[C_VECTORS]); + return 0; +} - /* sigreturn code */ - memcpy((void *)vdso_pages[C_SIGPAGE], __aarch32_sigret_code_start, - sigret_sz); +static int __init aarch32_alloc_vdso_pages(void) +{ + extern char __aarch32_sigret_code_start[], __aarch32_sigret_code_end[]; + int sigret_sz = __aarch32_sigret_code_end - __aarch32_sigret_code_start; + unsigned long sigpage; + int ret; - flush_icache_range(vdso_pages[C_VECTORS], - vdso_pages[C_VECTORS] + PAGE_SIZE); - flush_icache_range(vdso_pages[C_SIGPAGE], - vdso_pages[C_SIGPAGE] + PAGE_SIZE); + sigpage = get_zeroed_page(GFP_ATOMIC); + if (!sigpage) + return -ENOMEM; - aarch32_vdso_pages[C_VECTORS] = virt_to_page(vdso_pages[C_VECTORS]); - aarch32_vdso_pages[C_SIGPAGE] = virt_to_page(vdso_pages[C_SIGPAGE]); + memcpy((void *)sigpage, __aarch32_sigret_code_start, sigret_sz); + aarch32_vdso_pages[C_SIGPAGE] = virt_to_page(sigpage); + flush_dcache_page(aarch32_vdso_pages[C_SIGPAGE]); - return 0; + ret = aarch32_alloc_kuser_vdso_page(); + if (ret) + free_page(sigpage); + + return ret; } arch_initcall(aarch32_alloc_vdso_pages); -- cgit v1.2.3-59-g8ed1b From af1b3cf2c2a3f42bbb680812ff1bbd715ac8af69 Mon Sep 17 00:00:00 2001 From: Vincenzo Frascino Date: Mon, 15 Apr 2019 10:49:37 +0100 Subject: arm64: compat: Add KUSER_HELPERS config option When kuser helpers are enabled the kernel maps the relative code at a fixed address (0xffff0000). Making configurable the option to disable them means that the kernel can remove this mapping and any access to this memory area results in a sigfault. Add a KUSER_HELPERS config option that can be used to disable the mapping when it is turned off. This option can be turned off if and only if the applications are designed specifically for the platform and they do not make use of the kuser helpers code. Cc: Catalin Marinas Cc: Will Deacon Signed-off-by: Vincenzo Frascino Reviewed-by: Catalin Marinas [will: Use IS_ENABLED() instead of #ifdef] Signed-off-by: Will Deacon --- arch/arm64/Kconfig | 28 ++++++++++++++++++++++++++++ arch/arm64/kernel/Makefile | 3 ++- arch/arm64/kernel/kuser32.S | 7 +++---- arch/arm64/kernel/vdso.c | 6 ++++++ 4 files changed, 39 insertions(+), 5 deletions(-) diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig index c383625ec02c..0861a6f5ee4a 100644 --- a/arch/arm64/Kconfig +++ b/arch/arm64/Kconfig @@ -1498,6 +1498,34 @@ config COMPAT If you want to execute 32-bit userspace applications, say Y. +config KUSER_HELPERS + bool "Enable kuser helpers page for 32 bit applications." + depends on COMPAT + default y + help + Warning: disabling this option may break 32-bit user programs. + + Provide kuser helpers to compat tasks. The kernel provides + helper code to userspace in read only form at a fixed location + to allow userspace to be independent of the CPU type fitted to + the system. This permits binaries to be run on ARMv4 through + to ARMv8 without modification. + + See Documentation/arm/kernel_user_helpers.txt for details. + + However, the fixed address nature of these helpers can be used + by ROP (return orientated programming) authors when creating + exploits. + + If all of the binaries and libraries which run on your platform + are built specifically for your platform, and make no use of + these helpers, then you can turn this option off to hinder + such exploits. However, in that case, if a binary or library + relying on those helpers is run, it will not function correctly. + + Say N here only if you are absolutely certain that you do not + need these helpers; otherwise, the safe option is to say Y. + config SYSVIPC_COMPAT def_bool y depends on COMPAT && SYSVIPC diff --git a/arch/arm64/kernel/Makefile b/arch/arm64/kernel/Makefile index c18c34d56029..9e7dcb2c31c7 100644 --- a/arch/arm64/kernel/Makefile +++ b/arch/arm64/kernel/Makefile @@ -27,8 +27,9 @@ OBJCOPYFLAGS := --prefix-symbols=__efistub_ $(obj)/%.stub.o: $(obj)/%.o FORCE $(call if_changed,objcopy) -obj-$(CONFIG_COMPAT) += sys32.o kuser32.o signal32.o \ +obj-$(CONFIG_COMPAT) += sys32.o signal32.o \ sigreturn32.o sys_compat.o +obj-$(CONFIG_KUSER_HELPERS) += kuser32.o obj-$(CONFIG_FUNCTION_TRACER) += ftrace.o entry-ftrace.o obj-$(CONFIG_MODULES) += module.o obj-$(CONFIG_ARM64_MODULE_PLTS) += module-plts.o diff --git a/arch/arm64/kernel/kuser32.S b/arch/arm64/kernel/kuser32.S index c5f2bbafd723..49825e9e421e 100644 --- a/arch/arm64/kernel/kuser32.S +++ b/arch/arm64/kernel/kuser32.S @@ -6,10 +6,9 @@ * Copyright (C) 2005-2011 Nicolas Pitre * Copyright (C) 2012-2018 ARM Ltd. * - * Each segment is 32-byte aligned and will be moved to the top of the high - * vector page. New segments (if ever needed) must be added in front of - * existing ones. This mechanism should be used only for things that are - * really small and justified, and not be abused freely. + * The kuser helpers below are mapped at a fixed address by + * aarch32_setup_additional_pages() and are provided for compatibility + * reasons with 32 bit (aarch32) applications that need them. * * See Documentation/arm/kernel_user_helpers.txt for formal definitions. */ diff --git a/arch/arm64/kernel/vdso.c b/arch/arm64/kernel/vdso.c index 41f4d75bbc14..8074cbd3a3a8 100644 --- a/arch/arm64/kernel/vdso.c +++ b/arch/arm64/kernel/vdso.c @@ -74,6 +74,9 @@ static int aarch32_alloc_kuser_vdso_page(void) int kuser_sz = __kuser_helper_end - __kuser_helper_start; unsigned long vdso_page; + if (!IS_ENABLED(CONFIG_KUSER_HELPERS)) + return 0; + vdso_page = get_zeroed_page(GFP_ATOMIC); if (!vdso_page) return -ENOMEM; @@ -112,6 +115,9 @@ static int aarch32_kuser_helpers_setup(struct mm_struct *mm) { void *ret; + if (!IS_ENABLED(CONFIG_KUSER_HELPERS)) + return 0; + /* * Avoid VM_MAYWRITE for compatibility with arch/arm/, where it's * not safe to CoW the page containing the CPU exception vectors. -- cgit v1.2.3-59-g8ed1b From dd523791c939cfe642557cb2fac754ac0fed80fe Mon Sep 17 00:00:00 2001 From: Will Deacon Date: Tue, 23 Apr 2019 14:37:24 +0100 Subject: arm64: Kconfig: Make CONFIG_COMPAT a menuconfig entry Make CONFIG_COMPAT a menuconfig entry so that we can place CONFIG_KUSER_HELPERS and CONFIG_ARMV8_DEPRECATED underneath it. Signed-off-by: Will Deacon --- arch/arm64/Kconfig | 113 +++++++++++++++++++++++++++-------------------------- 1 file changed, 58 insertions(+), 55 deletions(-) diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig index 0861a6f5ee4a..75c1a07abc72 100644 --- a/arch/arm64/Kconfig +++ b/arch/arm64/Kconfig @@ -1082,9 +1082,65 @@ config RODATA_FULL_DEFAULT_ENABLED This requires the linear region to be mapped down to pages, which may adversely affect performance in some cases. +config ARM64_SW_TTBR0_PAN + bool "Emulate Privileged Access Never using TTBR0_EL1 switching" + help + Enabling this option prevents the kernel from accessing + user-space memory directly by pointing TTBR0_EL1 to a reserved + zeroed area and reserved ASID. The user access routines + restore the valid TTBR0_EL1 temporarily. + +menuconfig COMPAT + bool "Kernel support for 32-bit EL0" + depends on ARM64_4K_PAGES || EXPERT + select COMPAT_BINFMT_ELF if BINFMT_ELF + select HAVE_UID16 + select OLD_SIGSUSPEND3 + select COMPAT_OLD_SIGACTION + help + This option enables support for a 32-bit EL0 running under a 64-bit + kernel at EL1. AArch32-specific components such as system calls, + the user helper functions, VFP support and the ptrace interface are + handled appropriately by the kernel. + + If you use a page size other than 4KB (i.e, 16KB or 64KB), please be aware + that you will only be able to execute AArch32 binaries that were compiled + with page size aligned segments. + + If you want to execute 32-bit userspace applications, say Y. + +if COMPAT + +config KUSER_HELPERS + bool "Enable kuser helpers page for 32 bit applications" + default y + help + Warning: disabling this option may break 32-bit user programs. + + Provide kuser helpers to compat tasks. The kernel provides + helper code to userspace in read only form at a fixed location + to allow userspace to be independent of the CPU type fitted to + the system. This permits binaries to be run on ARMv4 through + to ARMv8 without modification. + + See Documentation/arm/kernel_user_helpers.txt for details. + + However, the fixed address nature of these helpers can be used + by ROP (return orientated programming) authors when creating + exploits. + + If all of the binaries and libraries which run on your platform + are built specifically for your platform, and make no use of + these helpers, then you can turn this option off to hinder + such exploits. However, in that case, if a binary or library + relying on those helpers is run, it will not function correctly. + + Say N here only if you are absolutely certain that you do not + need these helpers; otherwise, the safe option is to say Y. + + menuconfig ARMV8_DEPRECATED bool "Emulate deprecated/obsolete ARMv8 instructions" - depends on COMPAT depends on SYSCTL help Legacy software support may require certain instructions @@ -1150,13 +1206,7 @@ config SETEND_EMULATION If unsure, say Y endif -config ARM64_SW_TTBR0_PAN - bool "Emulate Privileged Access Never using TTBR0_EL1 switching" - help - Enabling this option prevents the kernel from accessing - user-space memory directly by pointing TTBR0_EL1 to a reserved - zeroed area and reserved ASID. The user access routines - restore the valid TTBR0_EL1 temporarily. +endif menu "ARMv8.1 architectural features" @@ -1479,53 +1529,6 @@ config DMI endmenu -config COMPAT - bool "Kernel support for 32-bit EL0" - depends on ARM64_4K_PAGES || EXPERT - select COMPAT_BINFMT_ELF if BINFMT_ELF - select HAVE_UID16 - select OLD_SIGSUSPEND3 - select COMPAT_OLD_SIGACTION - help - This option enables support for a 32-bit EL0 running under a 64-bit - kernel at EL1. AArch32-specific components such as system calls, - the user helper functions, VFP support and the ptrace interface are - handled appropriately by the kernel. - - If you use a page size other than 4KB (i.e, 16KB or 64KB), please be aware - that you will only be able to execute AArch32 binaries that were compiled - with page size aligned segments. - - If you want to execute 32-bit userspace applications, say Y. - -config KUSER_HELPERS - bool "Enable kuser helpers page for 32 bit applications." - depends on COMPAT - default y - help - Warning: disabling this option may break 32-bit user programs. - - Provide kuser helpers to compat tasks. The kernel provides - helper code to userspace in read only form at a fixed location - to allow userspace to be independent of the CPU type fitted to - the system. This permits binaries to be run on ARMv4 through - to ARMv8 without modification. - - See Documentation/arm/kernel_user_helpers.txt for details. - - However, the fixed address nature of these helpers can be used - by ROP (return orientated programming) authors when creating - exploits. - - If all of the binaries and libraries which run on your platform - are built specifically for your platform, and make no use of - these helpers, then you can turn this option off to hinder - such exploits. However, in that case, if a binary or library - relying on those helpers is run, it will not function correctly. - - Say N here only if you are absolutely certain that you do not - need these helpers; otherwise, the safe option is to say Y. - config SYSVIPC_COMPAT def_bool y depends on COMPAT && SYSVIPC -- cgit v1.2.3-59-g8ed1b From 06a916feca2b262ab0c1a2aeb68882f4b1108a07 Mon Sep 17 00:00:00 2001 From: Dave Martin Date: Thu, 18 Apr 2019 18:41:38 +0100 Subject: arm64: Expose SVE2 features for userspace This patch provides support for reporting the presence of SVE2 and its optional features to userspace. This will also enable visibility of SVE2 for guests, when KVM support for SVE-enabled guests is available. Signed-off-by: Dave Martin Signed-off-by: Will Deacon --- Documentation/arm64/cpu-feature-registers.txt | 16 ++++++++++++++++ Documentation/arm64/elf_hwcaps.txt | 24 ++++++++++++++++++++++++ Documentation/arm64/sve.txt | 17 +++++++++++++++++ arch/arm64/Kconfig | 3 +++ arch/arm64/include/asm/hwcap.h | 6 ++++++ arch/arm64/include/asm/sysreg.h | 14 ++++++++++++++ arch/arm64/include/uapi/asm/hwcap.h | 6 ++++++ arch/arm64/kernel/cpufeature.c | 17 ++++++++++++++++- arch/arm64/kernel/cpuinfo.c | 6 ++++++ 9 files changed, 108 insertions(+), 1 deletion(-) diff --git a/Documentation/arm64/cpu-feature-registers.txt b/Documentation/arm64/cpu-feature-registers.txt index d4b4dd1fe786..684a0da39378 100644 --- a/Documentation/arm64/cpu-feature-registers.txt +++ b/Documentation/arm64/cpu-feature-registers.txt @@ -209,6 +209,22 @@ infrastructure: | AT | [35-32] | y | x--------------------------------------------------x + 6) ID_AA64ZFR0_EL1 - SVE feature ID register 0 + + x--------------------------------------------------x + | Name | bits | visible | + |--------------------------------------------------| + | SM4 | [43-40] | y | + |--------------------------------------------------| + | SHA3 | [35-32] | y | + |--------------------------------------------------| + | BitPerm | [19-16] | y | + |--------------------------------------------------| + | AES | [7-4] | y | + |--------------------------------------------------| + | SVEVer | [3-0] | y | + x--------------------------------------------------x + Appendix I: Example --------------------------- diff --git a/Documentation/arm64/elf_hwcaps.txt b/Documentation/arm64/elf_hwcaps.txt index 55431fd2a67a..b73a2519ecf2 100644 --- a/Documentation/arm64/elf_hwcaps.txt +++ b/Documentation/arm64/elf_hwcaps.txt @@ -163,6 +163,30 @@ HWCAP_SVE Functionality implied by ID_AA64PFR0_EL1.SVE == 0b0001. +HWCAP2_SVE2 + + Functionality implied by ID_AA64ZFR0_EL1.SVEVer == 0b0001. + +HWCAP2_SVEAES + + Functionality implied by ID_AA64ZFR0_EL1.AES == 0b0001. + +HWCAP2_SVEPMULL + + Functionality implied by ID_AA64ZFR0_EL1.AES == 0b0010. + +HWCAP2_SVEBITPERM + + Functionality implied by ID_AA64ZFR0_EL1.BitPerm == 0b0001. + +HWCAP2_SVESHA3 + + Functionality implied by ID_AA64ZFR0_EL1.SHA3 == 0b0001. + +HWCAP2_SVESM4 + + Functionality implied by ID_AA64ZFR0_EL1.SM4 == 0b0001. + HWCAP_ASIMDFHM Functionality implied by ID_AA64ISAR0_EL1.FHM == 0b0001. diff --git a/Documentation/arm64/sve.txt b/Documentation/arm64/sve.txt index 7169a0ec41d8..9940e924a47e 100644 --- a/Documentation/arm64/sve.txt +++ b/Documentation/arm64/sve.txt @@ -34,6 +34,23 @@ model features for SVE is included in Appendix A. following sections: software that needs to verify that those interfaces are present must check for HWCAP_SVE instead. +* On hardware that supports the SVE2 extensions, HWCAP2_SVE2 will also + be reported in the AT_HWCAP2 aux vector entry. In addition to this, + optional extensions to SVE2 may be reported by the presence of: + + HWCAP2_SVE2 + HWCAP2_SVEAES + HWCAP2_SVEPMULL + HWCAP2_SVEBITPERM + HWCAP2_SVESHA3 + HWCAP2_SVESM4 + + This list may be extended over time as the SVE architecture evolves. + + These extensions are also reported via the CPU ID register ID_AA64ZFR0_EL1, + which userspace can read using an MRS instruction. See elf_hwcaps.txt and + cpu-feature-registers.txt for details. + * Debuggers should restrict themselves to interacting with the target via the NT_ARM_SVE regset. The recommended way of detecting support for this regset is to connect to a target process first and then attempt a diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig index 75c1a07abc72..4791d4857124 100644 --- a/arch/arm64/Kconfig +++ b/arch/arm64/Kconfig @@ -1372,6 +1372,9 @@ config ARM64_SVE To enable use of this extension on CPUs that implement it, say Y. + On CPUs that support the SVE2 extensions, this option will enable + those too. + Note that for architectural reasons, firmware _must_ implement SVE support when running on SVE capable hardware. The required support is present in: diff --git a/arch/arm64/include/asm/hwcap.h b/arch/arm64/include/asm/hwcap.h index f78c86c64e68..b4bfb6672168 100644 --- a/arch/arm64/include/asm/hwcap.h +++ b/arch/arm64/include/asm/hwcap.h @@ -89,6 +89,12 @@ #define __khwcap2_feature(x) (const_ilog2(HWCAP2_ ## x) + 32) #define KERNEL_HWCAP_DCPODP __khwcap2_feature(DCPODP) +#define KERNEL_HWCAP_SVE2 __khwcap2_feature(SVE2) +#define KERNEL_HWCAP_SVEAES __khwcap2_feature(SVEAES) +#define KERNEL_HWCAP_SVEPMULL __khwcap2_feature(SVEPMULL) +#define KERNEL_HWCAP_SVEBITPERM __khwcap2_feature(SVEBITPERM) +#define KERNEL_HWCAP_SVESHA3 __khwcap2_feature(SVESHA3) +#define KERNEL_HWCAP_SVESM4 __khwcap2_feature(SVESM4) /* * This yields a mask that user programs can use to figure out what diff --git a/arch/arm64/include/asm/sysreg.h b/arch/arm64/include/asm/sysreg.h index 5b267dec6194..29c655969320 100644 --- a/arch/arm64/include/asm/sysreg.h +++ b/arch/arm64/include/asm/sysreg.h @@ -606,6 +606,20 @@ #define ID_AA64PFR1_SSBS_PSTATE_ONLY 1 #define ID_AA64PFR1_SSBS_PSTATE_INSNS 2 +/* id_aa64zfr0 */ +#define ID_AA64ZFR0_SM4_SHIFT 40 +#define ID_AA64ZFR0_SHA3_SHIFT 32 +#define ID_AA64ZFR0_BITPERM_SHIFT 16 +#define ID_AA64ZFR0_AES_SHIFT 4 +#define ID_AA64ZFR0_SVEVER_SHIFT 0 + +#define ID_AA64ZFR0_SM4 0x1 +#define ID_AA64ZFR0_SHA3 0x1 +#define ID_AA64ZFR0_BITPERM 0x1 +#define ID_AA64ZFR0_AES 0x1 +#define ID_AA64ZFR0_AES_PMULL 0x2 +#define ID_AA64ZFR0_SVEVER_SVE2 0x1 + /* id_aa64mmfr0 */ #define ID_AA64MMFR0_TGRAN4_SHIFT 28 #define ID_AA64MMFR0_TGRAN64_SHIFT 24 diff --git a/arch/arm64/include/uapi/asm/hwcap.h b/arch/arm64/include/uapi/asm/hwcap.h index d64af3913a9e..1a772b162191 100644 --- a/arch/arm64/include/uapi/asm/hwcap.h +++ b/arch/arm64/include/uapi/asm/hwcap.h @@ -57,5 +57,11 @@ * HWCAP2 flags - for AT_HWCAP2 */ #define HWCAP2_DCPODP (1 << 0) +#define HWCAP2_SVE2 (1 << 1) +#define HWCAP2_SVEAES (1 << 2) +#define HWCAP2_SVEPMULL (1 << 3) +#define HWCAP2_SVEBITPERM (1 << 4) +#define HWCAP2_SVESHA3 (1 << 5) +#define HWCAP2_SVESM4 (1 << 6) #endif /* _UAPI__ASM_HWCAP_H */ diff --git a/arch/arm64/kernel/cpufeature.c b/arch/arm64/kernel/cpufeature.c index 9d18e45311fd..d856c55445a2 100644 --- a/arch/arm64/kernel/cpufeature.c +++ b/arch/arm64/kernel/cpufeature.c @@ -184,6 +184,15 @@ static const struct arm64_ftr_bits ftr_id_aa64pfr1[] = { ARM64_FTR_END, }; +static const struct arm64_ftr_bits ftr_id_aa64zfr0[] = { + ARM64_FTR_BITS(FTR_VISIBLE, FTR_STRICT, FTR_LOWER_SAFE, ID_AA64ZFR0_SM4_SHIFT, 4, 0), + ARM64_FTR_BITS(FTR_VISIBLE, FTR_STRICT, FTR_LOWER_SAFE, ID_AA64ZFR0_SHA3_SHIFT, 4, 0), + ARM64_FTR_BITS(FTR_VISIBLE, FTR_STRICT, FTR_LOWER_SAFE, ID_AA64ZFR0_BITPERM_SHIFT, 4, 0), + ARM64_FTR_BITS(FTR_VISIBLE, FTR_STRICT, FTR_LOWER_SAFE, ID_AA64ZFR0_AES_SHIFT, 4, 0), + ARM64_FTR_BITS(FTR_VISIBLE, FTR_STRICT, FTR_LOWER_SAFE, ID_AA64ZFR0_SVEVER_SHIFT, 4, 0), + ARM64_FTR_END, +}; + static const struct arm64_ftr_bits ftr_id_aa64mmfr0[] = { S_ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, ID_AA64MMFR0_TGRAN4_SHIFT, 4, ID_AA64MMFR0_TGRAN4_NI), S_ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, ID_AA64MMFR0_TGRAN64_SHIFT, 4, ID_AA64MMFR0_TGRAN64_NI), @@ -392,7 +401,7 @@ static const struct __ftr_reg_entry { /* Op1 = 0, CRn = 0, CRm = 4 */ ARM64_FTR_REG(SYS_ID_AA64PFR0_EL1, ftr_id_aa64pfr0), ARM64_FTR_REG(SYS_ID_AA64PFR1_EL1, ftr_id_aa64pfr1), - ARM64_FTR_REG(SYS_ID_AA64ZFR0_EL1, ftr_raz), + ARM64_FTR_REG(SYS_ID_AA64ZFR0_EL1, ftr_id_aa64zfr0), /* Op1 = 0, CRn = 0, CRm = 5 */ ARM64_FTR_REG(SYS_ID_AA64DFR0_EL1, ftr_id_aa64dfr0), @@ -1610,6 +1619,12 @@ static const struct arm64_cpu_capabilities arm64_elf_hwcaps[] = { HWCAP_CAP(SYS_ID_AA64MMFR2_EL1, ID_AA64MMFR2_AT_SHIFT, FTR_UNSIGNED, 1, CAP_HWCAP, KERNEL_HWCAP_USCAT), #ifdef CONFIG_ARM64_SVE HWCAP_CAP(SYS_ID_AA64PFR0_EL1, ID_AA64PFR0_SVE_SHIFT, FTR_UNSIGNED, ID_AA64PFR0_SVE, CAP_HWCAP, KERNEL_HWCAP_SVE), + HWCAP_CAP(SYS_ID_AA64ZFR0_EL1, ID_AA64ZFR0_SVEVER_SHIFT, FTR_UNSIGNED, ID_AA64ZFR0_SVEVER_SVE2, CAP_HWCAP, KERNEL_HWCAP_SVE2), + HWCAP_CAP(SYS_ID_AA64ZFR0_EL1, ID_AA64ZFR0_AES_SHIFT, FTR_UNSIGNED, ID_AA64ZFR0_AES, CAP_HWCAP, KERNEL_HWCAP_SVEAES), + HWCAP_CAP(SYS_ID_AA64ZFR0_EL1, ID_AA64ZFR0_AES_SHIFT, FTR_UNSIGNED, ID_AA64ZFR0_AES_PMULL, CAP_HWCAP, KERNEL_HWCAP_SVEPMULL), + HWCAP_CAP(SYS_ID_AA64ZFR0_EL1, ID_AA64ZFR0_BITPERM_SHIFT, FTR_UNSIGNED, ID_AA64ZFR0_BITPERM, CAP_HWCAP, KERNEL_HWCAP_SVEBITPERM), + HWCAP_CAP(SYS_ID_AA64ZFR0_EL1, ID_AA64ZFR0_SHA3_SHIFT, FTR_UNSIGNED, ID_AA64ZFR0_SHA3, CAP_HWCAP, KERNEL_HWCAP_SVESHA3), + HWCAP_CAP(SYS_ID_AA64ZFR0_EL1, ID_AA64ZFR0_SM4_SHIFT, FTR_UNSIGNED, ID_AA64ZFR0_SM4, CAP_HWCAP, KERNEL_HWCAP_SVESM4), #endif HWCAP_CAP(SYS_ID_AA64PFR1_EL1, ID_AA64PFR1_SSBS_SHIFT, FTR_UNSIGNED, ID_AA64PFR1_SSBS_PSTATE_INSNS, CAP_HWCAP, KERNEL_HWCAP_SSBS), #ifdef CONFIG_ARM64_PTR_AUTH diff --git a/arch/arm64/kernel/cpuinfo.c b/arch/arm64/kernel/cpuinfo.c index 093ca53ce1d1..f6f7936be6e7 100644 --- a/arch/arm64/kernel/cpuinfo.c +++ b/arch/arm64/kernel/cpuinfo.c @@ -86,6 +86,12 @@ static const char *const hwcap_str[] = { "paca", "pacg", "dcpodp", + "sve2", + "sveaes", + "svepmull", + "svebitperm", + "svesha3", + "svesm4", NULL }; -- cgit v1.2.3-59-g8ed1b From be604c616ca71cbf5c860d0cfa4595128ab74189 Mon Sep 17 00:00:00 2001 From: Kees Cook Date: Wed, 24 Apr 2019 09:55:37 -0700 Subject: arm64: sysreg: Make mrs_s and msr_s macros work with Clang and LTO Clang's integrated assembler does not allow assembly macros defined in one inline asm block using the .macro directive to be used across separate asm blocks. LLVM developers consider this a feature and not a bug, recommending code refactoring: https://bugs.llvm.org/show_bug.cgi?id=19749 As binutils doesn't allow macros to be redefined, this change uses UNDEFINE_MRS_S and UNDEFINE_MSR_S to define corresponding macros in-place and workaround gcc and clang limitations on redefining macros across different assembler blocks. Specifically, the current state after preprocessing looks like this: asm volatile(".macro mXX_s ... .endm"); void f() { asm volatile("mXX_s a, b"); } With GCC, it gives macro redefinition error because sysreg.h is included in multiple source files, and assembler code for all of them is later combined for LTO (I've seen an intermediate file with hundreds of identical definitions). With clang, it gives macro undefined error because clang doesn't allow sharing macros between inline asm statements. I also seem to remember catching another sort of undefined error with GCC due to reordering of macro definition asm statement and generated asm code for function that uses the macro. The solution with defining and undefining for each use, while certainly not elegant, satisfies both GCC and clang, LTO and non-LTO. Co-developed-by: Alex Matveev Co-developed-by: Yury Norov Co-developed-by: Sami Tolvanen Reviewed-by: Nick Desaulniers Reviewed-by: Mark Rutland Signed-off-by: Kees Cook Signed-off-by: Will Deacon --- arch/arm64/include/asm/irqflags.h | 8 +++---- arch/arm64/include/asm/kvm_hyp.h | 4 ++-- arch/arm64/include/asm/sysreg.h | 45 ++++++++++++++++++++++++++++----------- 3 files changed, 38 insertions(+), 19 deletions(-) diff --git a/arch/arm64/include/asm/irqflags.h b/arch/arm64/include/asm/irqflags.h index 43d8366c1e87..629963189085 100644 --- a/arch/arm64/include/asm/irqflags.h +++ b/arch/arm64/include/asm/irqflags.h @@ -43,7 +43,7 @@ static inline void arch_local_irq_enable(void) asm volatile(ALTERNATIVE( "msr daifclr, #2 // arch_local_irq_enable\n" "nop", - "msr_s " __stringify(SYS_ICC_PMR_EL1) ",%0\n" + __msr_s(SYS_ICC_PMR_EL1, "%0") "dsb sy", ARM64_HAS_IRQ_PRIO_MASKING) : @@ -55,7 +55,7 @@ static inline void arch_local_irq_disable(void) { asm volatile(ALTERNATIVE( "msr daifset, #2 // arch_local_irq_disable", - "msr_s " __stringify(SYS_ICC_PMR_EL1) ", %0", + __msr_s(SYS_ICC_PMR_EL1, "%0"), ARM64_HAS_IRQ_PRIO_MASKING) : : "r" ((unsigned long) GIC_PRIO_IRQOFF) @@ -86,7 +86,7 @@ static inline unsigned long arch_local_save_flags(void) "mov %0, %1\n" "nop\n" "nop", - "mrs_s %0, " __stringify(SYS_ICC_PMR_EL1) "\n" + __mrs_s("%0", SYS_ICC_PMR_EL1) "ands %1, %1, " __stringify(PSR_I_BIT) "\n" "csel %0, %0, %2, eq", ARM64_HAS_IRQ_PRIO_MASKING) @@ -116,7 +116,7 @@ static inline void arch_local_irq_restore(unsigned long flags) asm volatile(ALTERNATIVE( "msr daif, %0\n" "nop", - "msr_s " __stringify(SYS_ICC_PMR_EL1) ", %0\n" + __msr_s(SYS_ICC_PMR_EL1, "%0") "dsb sy", ARM64_HAS_IRQ_PRIO_MASKING) : "+r" (flags) diff --git a/arch/arm64/include/asm/kvm_hyp.h b/arch/arm64/include/asm/kvm_hyp.h index 4da765f2cca5..c3060833b7a5 100644 --- a/arch/arm64/include/asm/kvm_hyp.h +++ b/arch/arm64/include/asm/kvm_hyp.h @@ -30,7 +30,7 @@ ({ \ u64 reg; \ asm volatile(ALTERNATIVE("mrs %0, " __stringify(r##nvh),\ - "mrs_s %0, " __stringify(r##vh),\ + __mrs_s("%0", r##vh), \ ARM64_HAS_VIRT_HOST_EXTN) \ : "=r" (reg)); \ reg; \ @@ -40,7 +40,7 @@ do { \ u64 __val = (u64)(v); \ asm volatile(ALTERNATIVE("msr " __stringify(r##nvh) ", %x0",\ - "msr_s " __stringify(r##vh) ", %x0",\ + __msr_s(r##vh, "%x0"), \ ARM64_HAS_VIRT_HOST_EXTN) \ : : "rZ" (__val)); \ } while (0) diff --git a/arch/arm64/include/asm/sysreg.h b/arch/arm64/include/asm/sysreg.h index 29c655969320..3f7b917e8f3a 100644 --- a/arch/arm64/include/asm/sysreg.h +++ b/arch/arm64/include/asm/sysreg.h @@ -760,20 +760,39 @@ #include #include -asm( -" .irp num,0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30\n" -" .equ .L__reg_num_x\\num, \\num\n" -" .endr\n" +#define __DEFINE_MRS_MSR_S_REGNUM \ +" .irp num,0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30\n" \ +" .equ .L__reg_num_x\\num, \\num\n" \ +" .endr\n" \ " .equ .L__reg_num_xzr, 31\n" -"\n" -" .macro mrs_s, rt, sreg\n" - __emit_inst(0xd5200000|(\\sreg)|(.L__reg_num_\\rt)) + +#define DEFINE_MRS_S \ + __DEFINE_MRS_MSR_S_REGNUM \ +" .macro mrs_s, rt, sreg\n" \ + __emit_inst(0xd5200000|(\\sreg)|(.L__reg_num_\\rt)) \ " .endm\n" -"\n" -" .macro msr_s, sreg, rt\n" - __emit_inst(0xd5000000|(\\sreg)|(.L__reg_num_\\rt)) + +#define DEFINE_MSR_S \ + __DEFINE_MRS_MSR_S_REGNUM \ +" .macro msr_s, sreg, rt\n" \ + __emit_inst(0xd5000000|(\\sreg)|(.L__reg_num_\\rt)) \ " .endm\n" -); + +#define UNDEFINE_MRS_S \ +" .purgem mrs_s\n" + +#define UNDEFINE_MSR_S \ +" .purgem msr_s\n" + +#define __mrs_s(v, r) \ + DEFINE_MRS_S \ +" mrs_s " v ", " __stringify(r) "\n" \ + UNDEFINE_MRS_S + +#define __msr_s(r, v) \ + DEFINE_MSR_S \ +" msr_s " __stringify(r) ", " v "\n" \ + UNDEFINE_MSR_S /* * Unlike read_cpuid, calls to read_sysreg are never expected to be @@ -801,13 +820,13 @@ asm( */ #define read_sysreg_s(r) ({ \ u64 __val; \ - asm volatile("mrs_s %0, " __stringify(r) : "=r" (__val)); \ + asm volatile(__mrs_s("%0", r) : "=r" (__val)); \ __val; \ }) #define write_sysreg_s(v, r) do { \ u64 __val = (u64)(v); \ - asm volatile("msr_s " __stringify(r) ", %x0" : : "rZ" (__val)); \ + asm volatile(__msr_s(r, "%x0") : : "rZ" (__val)); \ } while (0) /* -- cgit v1.2.3-59-g8ed1b From 84ff7a09c371bc7417eabfda19bf7f113ec917b6 Mon Sep 17 00:00:00 2001 From: Will Deacon Date: Mon, 8 Apr 2019 12:45:09 +0100 Subject: arm64: futex: Fix FUTEX_WAKE_OP atomic ops with non-zero result value Rather embarrassingly, our futex() FUTEX_WAKE_OP implementation doesn't explicitly set the return value on the non-faulting path and instead leaves it holding the result of the underlying atomic operation. This means that any FUTEX_WAKE_OP atomic operation which computes a non-zero value will be reported as having failed. Regrettably, I wrote the buggy code back in 2011 and it was upstreamed as part of the initial arm64 support in 2012. The reasons we appear to get away with this are: 1. FUTEX_WAKE_OP is rarely used and therefore doesn't appear to get exercised by futex() test applications 2. If the result of the atomic operation is zero, the system call behaves correctly 3. Prior to version 2.25, the only operation used by GLIBC set the futex to zero, and therefore worked as expected. From 2.25 onwards, FUTEX_WAKE_OP is not used by GLIBC at all. Fix the implementation by ensuring that the return value is either 0 to indicate that the atomic operation completed successfully, or -EFAULT if we encountered a fault when accessing the user mapping. Cc: Fixes: 6170a97460db ("arm64: Atomic operations") Signed-off-by: Will Deacon --- arch/arm64/include/asm/futex.h | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/arch/arm64/include/asm/futex.h b/arch/arm64/include/asm/futex.h index cccb83ad7fa8..e1d95f08f8e1 100644 --- a/arch/arm64/include/asm/futex.h +++ b/arch/arm64/include/asm/futex.h @@ -30,8 +30,8 @@ do { \ " prfm pstl1strm, %2\n" \ "1: ldxr %w1, %2\n" \ insn "\n" \ -"2: stlxr %w3, %w0, %2\n" \ -" cbnz %w3, 1b\n" \ +"2: stlxr %w0, %w3, %2\n" \ +" cbnz %w0, 1b\n" \ " dmb ish\n" \ "3:\n" \ " .pushsection .fixup,\"ax\"\n" \ @@ -50,30 +50,30 @@ do { \ static inline int arch_futex_atomic_op_inuser(int op, int oparg, int *oval, u32 __user *_uaddr) { - int oldval = 0, ret, tmp; + int oldval, ret, tmp; u32 __user *uaddr = __uaccess_mask_ptr(_uaddr); pagefault_disable(); switch (op) { case FUTEX_OP_SET: - __futex_atomic_op("mov %w0, %w4", + __futex_atomic_op("mov %w3, %w4", ret, oldval, uaddr, tmp, oparg); break; case FUTEX_OP_ADD: - __futex_atomic_op("add %w0, %w1, %w4", + __futex_atomic_op("add %w3, %w1, %w4", ret, oldval, uaddr, tmp, oparg); break; case FUTEX_OP_OR: - __futex_atomic_op("orr %w0, %w1, %w4", + __futex_atomic_op("orr %w3, %w1, %w4", ret, oldval, uaddr, tmp, oparg); break; case FUTEX_OP_ANDN: - __futex_atomic_op("and %w0, %w1, %w4", + __futex_atomic_op("and %w3, %w1, %w4", ret, oldval, uaddr, tmp, ~oparg); break; case FUTEX_OP_XOR: - __futex_atomic_op("eor %w0, %w1, %w4", + __futex_atomic_op("eor %w3, %w1, %w4", ret, oldval, uaddr, tmp, oparg); break; default: -- cgit v1.2.3-59-g8ed1b From 6b4f4bc9cb22875f97023984a625386f0c7cc1c0 Mon Sep 17 00:00:00 2001 From: Will Deacon Date: Thu, 28 Feb 2019 11:58:08 +0000 Subject: locking/futex: Allow low-level atomic operations to return -EAGAIN Some futex() operations, including FUTEX_WAKE_OP, require the kernel to perform an atomic read-modify-write of the futex word via the userspace mapping. These operations are implemented by each architecture in arch_futex_atomic_op_inuser() and futex_atomic_cmpxchg_inatomic(), which are called in atomic context with the relevant hash bucket locks held. Although these routines may return -EFAULT in response to a page fault generated when accessing userspace, they are expected to succeed (i.e. return 0) in all other cases. This poses a problem for architectures that do not provide bounded forward progress guarantees or fairness of contended atomic operations and can lead to starvation in some cases. In these problematic scenarios, we must return back to the core futex code so that we can drop the hash bucket locks and reschedule if necessary, much like we do in the case of a page fault. Allow architectures to return -EAGAIN from their implementations of arch_futex_atomic_op_inuser() and futex_atomic_cmpxchg_inatomic(), which will cause the core futex code to reschedule if necessary and return back to the architecture code later on. Cc: Acked-by: Peter Zijlstra (Intel) Signed-off-by: Will Deacon --- kernel/futex.c | 188 +++++++++++++++++++++++++++++++++++---------------------- 1 file changed, 117 insertions(+), 71 deletions(-) diff --git a/kernel/futex.c b/kernel/futex.c index 9e40cf7be606..6262f1534ac9 100644 --- a/kernel/futex.c +++ b/kernel/futex.c @@ -1311,13 +1311,15 @@ static int lookup_pi_state(u32 __user *uaddr, u32 uval, static int lock_pi_update_atomic(u32 __user *uaddr, u32 uval, u32 newval) { + int err; u32 uninitialized_var(curval); if (unlikely(should_fail_futex(true))) return -EFAULT; - if (unlikely(cmpxchg_futex_value_locked(&curval, uaddr, uval, newval))) - return -EFAULT; + err = cmpxchg_futex_value_locked(&curval, uaddr, uval, newval); + if (unlikely(err)) + return err; /* If user space value changed, let the caller retry */ return curval != uval ? -EAGAIN : 0; @@ -1502,10 +1504,8 @@ static int wake_futex_pi(u32 __user *uaddr, u32 uval, struct futex_pi_state *pi_ if (unlikely(should_fail_futex(true))) ret = -EFAULT; - if (cmpxchg_futex_value_locked(&curval, uaddr, uval, newval)) { - ret = -EFAULT; - - } else if (curval != uval) { + ret = cmpxchg_futex_value_locked(&curval, uaddr, uval, newval); + if (!ret && (curval != uval)) { /* * If a unconditional UNLOCK_PI operation (user space did not * try the TID->0 transition) raced with a waiter setting the @@ -1700,32 +1700,32 @@ retry_private: double_lock_hb(hb1, hb2); op_ret = futex_atomic_op_inuser(op, uaddr2); if (unlikely(op_ret < 0)) { - double_unlock_hb(hb1, hb2); -#ifndef CONFIG_MMU - /* - * we don't get EFAULT from MMU faults if we don't have an MMU, - * but we might get them from range checking - */ - ret = op_ret; - goto out_put_keys; -#endif - - if (unlikely(op_ret != -EFAULT)) { + if (!IS_ENABLED(CONFIG_MMU) || + unlikely(op_ret != -EFAULT && op_ret != -EAGAIN)) { + /* + * we don't get EFAULT from MMU faults if we don't have + * an MMU, but we might get them from range checking + */ ret = op_ret; goto out_put_keys; } - ret = fault_in_user_writeable(uaddr2); - if (ret) - goto out_put_keys; + if (op_ret == -EFAULT) { + ret = fault_in_user_writeable(uaddr2); + if (ret) + goto out_put_keys; + } - if (!(flags & FLAGS_SHARED)) + if (!(flags & FLAGS_SHARED)) { + cond_resched(); goto retry_private; + } put_futex_key(&key2); put_futex_key(&key1); + cond_resched(); goto retry; } @@ -2350,7 +2350,7 @@ static int fixup_pi_state_owner(u32 __user *uaddr, struct futex_q *q, u32 uval, uninitialized_var(curval), newval; struct task_struct *oldowner, *newowner; u32 newtid; - int ret; + int ret, err = 0; lockdep_assert_held(q->lock_ptr); @@ -2421,14 +2421,17 @@ retry: if (!pi_state->owner) newtid |= FUTEX_OWNER_DIED; - if (get_futex_value_locked(&uval, uaddr)) - goto handle_fault; + err = get_futex_value_locked(&uval, uaddr); + if (err) + goto handle_err; for (;;) { newval = (uval & FUTEX_OWNER_DIED) | newtid; - if (cmpxchg_futex_value_locked(&curval, uaddr, uval, newval)) - goto handle_fault; + err = cmpxchg_futex_value_locked(&curval, uaddr, uval, newval); + if (err) + goto handle_err; + if (curval == uval) break; uval = curval; @@ -2456,23 +2459,37 @@ retry: return 0; /* - * To handle the page fault we need to drop the locks here. That gives - * the other task (either the highest priority waiter itself or the - * task which stole the rtmutex) the chance to try the fixup of the - * pi_state. So once we are back from handling the fault we need to - * check the pi_state after reacquiring the locks and before trying to - * do another fixup. When the fixup has been done already we simply - * return. + * In order to reschedule or handle a page fault, we need to drop the + * locks here. In the case of a fault, this gives the other task + * (either the highest priority waiter itself or the task which stole + * the rtmutex) the chance to try the fixup of the pi_state. So once we + * are back from handling the fault we need to check the pi_state after + * reacquiring the locks and before trying to do another fixup. When + * the fixup has been done already we simply return. * * Note: we hold both hb->lock and pi_mutex->wait_lock. We can safely * drop hb->lock since the caller owns the hb -> futex_q relation. * Dropping the pi_mutex->wait_lock requires the state revalidate. */ -handle_fault: +handle_err: raw_spin_unlock_irq(&pi_state->pi_mutex.wait_lock); spin_unlock(q->lock_ptr); - ret = fault_in_user_writeable(uaddr); + switch (err) { + case -EFAULT: + ret = fault_in_user_writeable(uaddr); + break; + + case -EAGAIN: + cond_resched(); + ret = 0; + break; + + default: + WARN_ON_ONCE(1); + ret = err; + break; + } spin_lock(q->lock_ptr); raw_spin_lock_irq(&pi_state->pi_mutex.wait_lock); @@ -3041,10 +3058,8 @@ retry: * A unconditional UNLOCK_PI op raced against a waiter * setting the FUTEX_WAITERS bit. Try again. */ - if (ret == -EAGAIN) { - put_futex_key(&key); - goto retry; - } + if (ret == -EAGAIN) + goto pi_retry; /* * wake_futex_pi has detected invalid state. Tell user * space. @@ -3059,9 +3074,19 @@ retry: * preserve the WAITERS bit not the OWNER_DIED one. We are the * owner. */ - if (cmpxchg_futex_value_locked(&curval, uaddr, uval, 0)) { + if ((ret = cmpxchg_futex_value_locked(&curval, uaddr, uval, 0))) { spin_unlock(&hb->lock); - goto pi_faulted; + switch (ret) { + case -EFAULT: + goto pi_faulted; + + case -EAGAIN: + goto pi_retry; + + default: + WARN_ON_ONCE(1); + goto out_putkey; + } } /* @@ -3075,6 +3100,11 @@ out_putkey: put_futex_key(&key); return ret; +pi_retry: + put_futex_key(&key); + cond_resched(); + goto retry; + pi_faulted: put_futex_key(&key); @@ -3435,6 +3465,7 @@ err_unlock: static int handle_futex_death(u32 __user *uaddr, struct task_struct *curr, int pi) { u32 uval, uninitialized_var(nval), mval; + int err; /* Futex address must be 32bit aligned */ if ((((unsigned long)uaddr) % sizeof(*uaddr)) != 0) @@ -3444,42 +3475,57 @@ retry: if (get_user(uval, uaddr)) return -1; - if ((uval & FUTEX_TID_MASK) == task_pid_vnr(curr)) { - /* - * Ok, this dying thread is truly holding a futex - * of interest. Set the OWNER_DIED bit atomically - * via cmpxchg, and if the value had FUTEX_WAITERS - * set, wake up a waiter (if any). (We have to do a - * futex_wake() even if OWNER_DIED is already set - - * to handle the rare but possible case of recursive - * thread-death.) The rest of the cleanup is done in - * userspace. - */ - mval = (uval & FUTEX_WAITERS) | FUTEX_OWNER_DIED; - /* - * We are not holding a lock here, but we want to have - * the pagefault_disable/enable() protection because - * we want to handle the fault gracefully. If the - * access fails we try to fault in the futex with R/W - * verification via get_user_pages. get_user() above - * does not guarantee R/W access. If that fails we - * give up and leave the futex locked. - */ - if (cmpxchg_futex_value_locked(&nval, uaddr, uval, mval)) { + if ((uval & FUTEX_TID_MASK) != task_pid_vnr(curr)) + return 0; + + /* + * Ok, this dying thread is truly holding a futex + * of interest. Set the OWNER_DIED bit atomically + * via cmpxchg, and if the value had FUTEX_WAITERS + * set, wake up a waiter (if any). (We have to do a + * futex_wake() even if OWNER_DIED is already set - + * to handle the rare but possible case of recursive + * thread-death.) The rest of the cleanup is done in + * userspace. + */ + mval = (uval & FUTEX_WAITERS) | FUTEX_OWNER_DIED; + + /* + * We are not holding a lock here, but we want to have + * the pagefault_disable/enable() protection because + * we want to handle the fault gracefully. If the + * access fails we try to fault in the futex with R/W + * verification via get_user_pages. get_user() above + * does not guarantee R/W access. If that fails we + * give up and leave the futex locked. + */ + if ((err = cmpxchg_futex_value_locked(&nval, uaddr, uval, mval))) { + switch (err) { + case -EFAULT: if (fault_in_user_writeable(uaddr)) return -1; goto retry; - } - if (nval != uval) + + case -EAGAIN: + cond_resched(); goto retry; - /* - * Wake robust non-PI futexes here. The wakeup of - * PI futexes happens in exit_pi_state(): - */ - if (!pi && (uval & FUTEX_WAITERS)) - futex_wake(uaddr, 1, 1, FUTEX_BITSET_MATCH_ANY); + default: + WARN_ON_ONCE(1); + return err; + } } + + if (nval != uval) + goto retry; + + /* + * Wake robust non-PI futexes here. The wakeup of + * PI futexes happens in exit_pi_state(): + */ + if (!pi && (uval & FUTEX_WAITERS)) + futex_wake(uaddr, 1, 1, FUTEX_BITSET_MATCH_ANY); + return 0; } -- cgit v1.2.3-59-g8ed1b From 03110a5cb2161690ae5ac04994d47ed0cd6cef75 Mon Sep 17 00:00:00 2001 From: Will Deacon Date: Mon, 8 Apr 2019 14:23:17 +0100 Subject: arm64: futex: Bound number of LDXR/STXR loops in FUTEX_WAKE_OP Our futex implementation makes use of LDXR/STXR loops to perform atomic updates to user memory from atomic context. This can lead to latency problems if we end up spinning around the LL/SC sequence at the expense of doing something useful. Rework our futex atomic operations so that we return -EAGAIN if we fail to update the futex word after 128 attempts. The core futex code will reschedule if necessary and we'll try again later. Cc: Fixes: 6170a97460db ("arm64: Atomic operations") Signed-off-by: Will Deacon --- arch/arm64/include/asm/futex.h | 55 ++++++++++++++++++++++++++---------------- 1 file changed, 34 insertions(+), 21 deletions(-) diff --git a/arch/arm64/include/asm/futex.h b/arch/arm64/include/asm/futex.h index e1d95f08f8e1..2d78ea6932b7 100644 --- a/arch/arm64/include/asm/futex.h +++ b/arch/arm64/include/asm/futex.h @@ -23,26 +23,34 @@ #include +#define FUTEX_MAX_LOOPS 128 /* What's the largest number you can think of? */ + #define __futex_atomic_op(insn, ret, oldval, uaddr, tmp, oparg) \ do { \ + unsigned int loops = FUTEX_MAX_LOOPS; \ + \ uaccess_enable(); \ asm volatile( \ " prfm pstl1strm, %2\n" \ "1: ldxr %w1, %2\n" \ insn "\n" \ "2: stlxr %w0, %w3, %2\n" \ -" cbnz %w0, 1b\n" \ -" dmb ish\n" \ +" cbz %w0, 3f\n" \ +" sub %w4, %w4, %w0\n" \ +" cbnz %w4, 1b\n" \ +" mov %w0, %w7\n" \ "3:\n" \ +" dmb ish\n" \ " .pushsection .fixup,\"ax\"\n" \ " .align 2\n" \ -"4: mov %w0, %w5\n" \ +"4: mov %w0, %w6\n" \ " b 3b\n" \ " .popsection\n" \ _ASM_EXTABLE(1b, 4b) \ _ASM_EXTABLE(2b, 4b) \ - : "=&r" (ret), "=&r" (oldval), "+Q" (*uaddr), "=&r" (tmp) \ - : "r" (oparg), "Ir" (-EFAULT) \ + : "=&r" (ret), "=&r" (oldval), "+Q" (*uaddr), "=&r" (tmp), \ + "+r" (loops) \ + : "r" (oparg), "Ir" (-EFAULT), "Ir" (-EAGAIN) \ : "memory"); \ uaccess_disable(); \ } while (0) @@ -57,23 +65,23 @@ arch_futex_atomic_op_inuser(int op, int oparg, int *oval, u32 __user *_uaddr) switch (op) { case FUTEX_OP_SET: - __futex_atomic_op("mov %w3, %w4", + __futex_atomic_op("mov %w3, %w5", ret, oldval, uaddr, tmp, oparg); break; case FUTEX_OP_ADD: - __futex_atomic_op("add %w3, %w1, %w4", + __futex_atomic_op("add %w3, %w1, %w5", ret, oldval, uaddr, tmp, oparg); break; case FUTEX_OP_OR: - __futex_atomic_op("orr %w3, %w1, %w4", + __futex_atomic_op("orr %w3, %w1, %w5", ret, oldval, uaddr, tmp, oparg); break; case FUTEX_OP_ANDN: - __futex_atomic_op("and %w3, %w1, %w4", + __futex_atomic_op("and %w3, %w1, %w5", ret, oldval, uaddr, tmp, ~oparg); break; case FUTEX_OP_XOR: - __futex_atomic_op("eor %w3, %w1, %w4", + __futex_atomic_op("eor %w3, %w1, %w5", ret, oldval, uaddr, tmp, oparg); break; default: @@ -93,6 +101,7 @@ futex_atomic_cmpxchg_inatomic(u32 *uval, u32 __user *_uaddr, u32 oldval, u32 newval) { int ret = 0; + unsigned int loops = FUTEX_MAX_LOOPS; u32 val, tmp; u32 __user *uaddr; @@ -104,20 +113,24 @@ futex_atomic_cmpxchg_inatomic(u32 *uval, u32 __user *_uaddr, asm volatile("// futex_atomic_cmpxchg_inatomic\n" " prfm pstl1strm, %2\n" "1: ldxr %w1, %2\n" -" sub %w3, %w1, %w4\n" -" cbnz %w3, 3f\n" -"2: stlxr %w3, %w5, %2\n" -" cbnz %w3, 1b\n" -" dmb ish\n" +" sub %w3, %w1, %w5\n" +" cbnz %w3, 4f\n" +"2: stlxr %w3, %w6, %2\n" +" cbz %w3, 3f\n" +" sub %w4, %w4, %w3\n" +" cbnz %w4, 1b\n" +" mov %w0, %w8\n" "3:\n" +" dmb ish\n" +"4:\n" " .pushsection .fixup,\"ax\"\n" -"4: mov %w0, %w6\n" -" b 3b\n" +"5: mov %w0, %w7\n" +" b 4b\n" " .popsection\n" - _ASM_EXTABLE(1b, 4b) - _ASM_EXTABLE(2b, 4b) - : "+r" (ret), "=&r" (val), "+Q" (*uaddr), "=&r" (tmp) - : "r" (oldval), "r" (newval), "Ir" (-EFAULT) + _ASM_EXTABLE(1b, 5b) + _ASM_EXTABLE(2b, 5b) + : "+r" (ret), "=&r" (val), "+Q" (*uaddr), "=&r" (tmp), "+r" (loops) + : "r" (oldval), "r" (newval), "Ir" (-EFAULT), "Ir" (-EAGAIN) : "memory"); uaccess_disable(); -- cgit v1.2.3-59-g8ed1b From 8e4e0ac02b449297b86498ac24db5786ddd9f647 Mon Sep 17 00:00:00 2001 From: Will Deacon Date: Wed, 10 Apr 2019 11:49:11 +0100 Subject: arm64: futex: Avoid copying out uninitialised stack in failed cmpxchg() Returning an error code from futex_atomic_cmpxchg_inatomic() indicates that the caller should not make any use of *uval, and should instead act upon on the value of the error code. Although this is implemented correctly in our futex code, we needlessly copy uninitialised stack to *uval in the error case, which can easily be avoided. Signed-off-by: Will Deacon --- arch/arm64/include/asm/futex.h | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/arch/arm64/include/asm/futex.h b/arch/arm64/include/asm/futex.h index 2d78ea6932b7..bdb3c05070a2 100644 --- a/arch/arm64/include/asm/futex.h +++ b/arch/arm64/include/asm/futex.h @@ -134,7 +134,9 @@ futex_atomic_cmpxchg_inatomic(u32 *uval, u32 __user *_uaddr, : "memory"); uaccess_disable(); - *uval = val; + if (!ret) + *uval = val; + return ret; } -- cgit v1.2.3-59-g8ed1b From 427503519739e779c0db8afe876c1b33f3ac60ae Mon Sep 17 00:00:00 2001 From: Will Deacon Date: Wed, 10 Apr 2019 11:51:54 +0100 Subject: futex: Update comments and docs about return values of arch futex code The architecture implementations of 'arch_futex_atomic_op_inuser()' and 'futex_atomic_cmpxchg_inatomic()' are permitted to return only -EFAULT, -EAGAIN or -ENOSYS in the case of failure. Update the comments in the asm-generic/ implementation and also a stray reference in the robust futex documentation. Signed-off-by: Will Deacon --- Documentation/robust-futexes.txt | 3 +-- include/asm-generic/futex.h | 8 ++++++-- 2 files changed, 7 insertions(+), 4 deletions(-) diff --git a/Documentation/robust-futexes.txt b/Documentation/robust-futexes.txt index 6c42c75103eb..6361fb01c9c1 100644 --- a/Documentation/robust-futexes.txt +++ b/Documentation/robust-futexes.txt @@ -218,5 +218,4 @@ All other architectures should build just fine too - but they won't have the new syscalls yet. Architectures need to implement the new futex_atomic_cmpxchg_inatomic() -inline function before writing up the syscalls (that function returns --ENOSYS right now). +inline function before writing up the syscalls. diff --git a/include/asm-generic/futex.h b/include/asm-generic/futex.h index fcb61b4659b3..8666fe7f35d7 100644 --- a/include/asm-generic/futex.h +++ b/include/asm-generic/futex.h @@ -23,7 +23,9 @@ * * Return: * 0 - On success - * <0 - On error + * -EFAULT - User access resulted in a page fault + * -EAGAIN - Atomic operation was unable to complete due to contention + * -ENOSYS - Operation not supported */ static inline int arch_futex_atomic_op_inuser(int op, u32 oparg, int *oval, u32 __user *uaddr) @@ -85,7 +87,9 @@ out_pagefault_enable: * * Return: * 0 - On success - * <0 - On error + * -EFAULT - User access resulted in a page fault + * -EAGAIN - Atomic operation was unable to complete due to contention + * -ENOSYS - Function not implemented (only if !HAVE_FUTEX_CMPXCHG) */ static inline int futex_atomic_cmpxchg_inatomic(u32 *uval, u32 __user *uaddr, -- cgit v1.2.3-59-g8ed1b From e5ce5e7267ddcbe13ab9ead2542524e1b7993e5a Mon Sep 17 00:00:00 2001 From: Jeremy Linton Date: Mon, 15 Apr 2019 16:21:20 -0500 Subject: arm64: Provide a command line to disable spectre_v2 mitigation There are various reasons, such as benchmarking, to disable spectrev2 mitigation on a machine. Provide a command-line option to do so. Signed-off-by: Jeremy Linton Reviewed-by: Suzuki K Poulose Reviewed-by: Andre Przywara Reviewed-by: Catalin Marinas Tested-by: Stefan Wahren Cc: Jonathan Corbet Cc: linux-doc@vger.kernel.org Signed-off-by: Will Deacon --- Documentation/admin-guide/kernel-parameters.txt | 8 ++++---- arch/arm64/kernel/cpu_errata.c | 13 +++++++++++++ 2 files changed, 17 insertions(+), 4 deletions(-) diff --git a/Documentation/admin-guide/kernel-parameters.txt b/Documentation/admin-guide/kernel-parameters.txt index cf82bac648cd..6a929258faf7 100644 --- a/Documentation/admin-guide/kernel-parameters.txt +++ b/Documentation/admin-guide/kernel-parameters.txt @@ -2905,10 +2905,10 @@ check bypass). With this option data leaks are possible in the system. - nospectre_v2 [X86,PPC_FSL_BOOK3E] Disable all mitigations for the Spectre variant 2 - (indirect branch prediction) vulnerability. System may - allow data leaks with this option, which is equivalent - to spectre_v2=off. + nospectre_v2 [X86,PPC_FSL_BOOK3E,ARM64] Disable all mitigations for + the Spectre variant 2 (indirect branch prediction) + vulnerability. System may allow data leaks with this + option. nospec_store_bypass_disable [HW] Disable all mitigations for the Speculative Store Bypass vulnerability diff --git a/arch/arm64/kernel/cpu_errata.c b/arch/arm64/kernel/cpu_errata.c index 9950bb0cbd52..d2b2c69d31bb 100644 --- a/arch/arm64/kernel/cpu_errata.c +++ b/arch/arm64/kernel/cpu_errata.c @@ -220,6 +220,14 @@ static void qcom_link_stack_sanitization(void) : "=&r" (tmp)); } +static bool __nospectre_v2; +static int __init parse_nospectre_v2(char *str) +{ + __nospectre_v2 = true; + return 0; +} +early_param("nospectre_v2", parse_nospectre_v2); + static void enable_smccc_arch_workaround_1(const struct arm64_cpu_capabilities *entry) { @@ -231,6 +239,11 @@ enable_smccc_arch_workaround_1(const struct arm64_cpu_capabilities *entry) if (!entry->matches(entry, SCOPE_LOCAL_CPU)) return; + if (__nospectre_v2) { + pr_info_once("spectrev2 mitigation disabled by command line option\n"); + return; + } + if (psci_ops.smccc_version == SMCCC_VERSION_1_0) return; -- cgit v1.2.3-59-g8ed1b From 3891ebccace188af075ce143d8b072b65e90f695 Mon Sep 17 00:00:00 2001 From: Mian Yousaf Kaukab Date: Mon, 15 Apr 2019 16:21:21 -0500 Subject: arm64: Add sysfs vulnerability show for spectre-v1 spectre-v1 has been mitigated and the mitigation is always active. Report this to userspace via sysfs Signed-off-by: Mian Yousaf Kaukab Signed-off-by: Jeremy Linton Reviewed-by: Andre Przywara Reviewed-by: Catalin Marinas Tested-by: Stefan Wahren Acked-by: Suzuki K Poulose Signed-off-by: Will Deacon --- arch/arm64/kernel/cpu_errata.c | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/arch/arm64/kernel/cpu_errata.c b/arch/arm64/kernel/cpu_errata.c index d2b2c69d31bb..cf623657cf3c 100644 --- a/arch/arm64/kernel/cpu_errata.c +++ b/arch/arm64/kernel/cpu_errata.c @@ -755,3 +755,9 @@ const struct arm64_cpu_capabilities arm64_errata[] = { { } }; + +ssize_t cpu_show_spectre_v1(struct device *dev, struct device_attribute *attr, + char *buf) +{ + return sprintf(buf, "Mitigation: __user pointer sanitization\n"); +} -- cgit v1.2.3-59-g8ed1b From 1b3ccf4be0e7be8c4bd8522066b6cbc92591e912 Mon Sep 17 00:00:00 2001 From: Jeremy Linton Date: Mon, 15 Apr 2019 16:21:22 -0500 Subject: arm64: add sysfs vulnerability show for meltdown We implement page table isolation as a mitigation for meltdown. Report this to userspace via sysfs. Signed-off-by: Jeremy Linton Reviewed-by: Suzuki K Poulose Reviewed-by: Andre Przywara Reviewed-by: Catalin Marinas Tested-by: Stefan Wahren Signed-off-by: Will Deacon --- arch/arm64/kernel/cpufeature.c | 58 ++++++++++++++++++++++++++++++++---------- 1 file changed, 44 insertions(+), 14 deletions(-) diff --git a/arch/arm64/kernel/cpufeature.c b/arch/arm64/kernel/cpufeature.c index 4061de10cea6..703ee8564fbd 100644 --- a/arch/arm64/kernel/cpufeature.c +++ b/arch/arm64/kernel/cpufeature.c @@ -947,7 +947,7 @@ has_useable_cnp(const struct arm64_cpu_capabilities *entry, int scope) return has_cpuid_feature(entry, scope); } -#ifdef CONFIG_UNMAP_KERNEL_AT_EL0 +static bool __meltdown_safe = true; static int __kpti_forced; /* 0: not forced, >0: forced on, <0: forced off */ static bool unmap_kernel_at_el0(const struct arm64_cpu_capabilities *entry, @@ -967,6 +967,16 @@ static bool unmap_kernel_at_el0(const struct arm64_cpu_capabilities *entry, { /* sentinel */ } }; char const *str = "command line option"; + bool meltdown_safe; + + meltdown_safe = is_midr_in_range_list(read_cpuid_id(), kpti_safe_list); + + /* Defer to CPU feature registers */ + if (has_cpuid_feature(entry, scope)) + meltdown_safe = true; + + if (!meltdown_safe) + __meltdown_safe = false; /* * For reasons that aren't entirely clear, enabling KPTI on Cavium @@ -978,6 +988,19 @@ static bool unmap_kernel_at_el0(const struct arm64_cpu_capabilities *entry, __kpti_forced = -1; } + /* Useful for KASLR robustness */ + if (IS_ENABLED(CONFIG_RANDOMIZE_BASE) && kaslr_offset() > 0) { + if (!__kpti_forced) { + str = "KASLR"; + __kpti_forced = 1; + } + } + + if (!IS_ENABLED(CONFIG_UNMAP_KERNEL_AT_EL0)) { + pr_info_once("kernel page table isolation disabled by kernel configuration\n"); + return false; + } + /* Forced? */ if (__kpti_forced) { pr_info_once("kernel page table isolation forced %s by %s\n", @@ -985,18 +1008,10 @@ static bool unmap_kernel_at_el0(const struct arm64_cpu_capabilities *entry, return __kpti_forced > 0; } - /* Useful for KASLR robustness */ - if (IS_ENABLED(CONFIG_RANDOMIZE_BASE)) - return kaslr_offset() > 0; - - /* Don't force KPTI for CPUs that are not vulnerable */ - if (is_midr_in_range_list(read_cpuid_id(), kpti_safe_list)) - return false; - - /* Defer to CPU feature registers */ - return !has_cpuid_feature(entry, scope); + return !meltdown_safe; } +#ifdef CONFIG_UNMAP_KERNEL_AT_EL0 static void kpti_install_ng_mappings(const struct arm64_cpu_capabilities *__unused) { @@ -1026,6 +1041,12 @@ kpti_install_ng_mappings(const struct arm64_cpu_capabilities *__unused) return; } +#else +static void +kpti_install_ng_mappings(const struct arm64_cpu_capabilities *__unused) +{ +} +#endif /* CONFIG_UNMAP_KERNEL_AT_EL0 */ static int __init parse_kpti(char *str) { @@ -1039,7 +1060,6 @@ static int __init parse_kpti(char *str) return 0; } early_param("kpti", parse_kpti); -#endif /* CONFIG_UNMAP_KERNEL_AT_EL0 */ #ifdef CONFIG_ARM64_HW_AFDBM static inline void __cpu_enable_hw_dbm(void) @@ -1306,7 +1326,6 @@ static const struct arm64_cpu_capabilities arm64_features[] = { .field_pos = ID_AA64PFR0_EL0_SHIFT, .min_field_value = ID_AA64PFR0_EL0_32BIT_64BIT, }, -#ifdef CONFIG_UNMAP_KERNEL_AT_EL0 { .desc = "Kernel page table isolation (KPTI)", .capability = ARM64_UNMAP_KERNEL_AT_EL0, @@ -1322,7 +1341,6 @@ static const struct arm64_cpu_capabilities arm64_features[] = { .matches = unmap_kernel_at_el0, .cpu_enable = kpti_install_ng_mappings, }, -#endif { /* FP/SIMD is not implemented */ .capability = ARM64_HAS_NO_FPSIMD, @@ -2101,3 +2119,15 @@ static int __init enable_mrs_emulation(void) } core_initcall(enable_mrs_emulation); + +ssize_t cpu_show_meltdown(struct device *dev, struct device_attribute *attr, + char *buf) +{ + if (__meltdown_safe) + return sprintf(buf, "Not affected\n"); + + if (arm64_kernel_unmapped_at_el0()) + return sprintf(buf, "Mitigation: PTI\n"); + + return sprintf(buf, "Vulnerable\n"); +} -- cgit v1.2.3-59-g8ed1b From 73f38166095947f3b86b02fbed6bd592223a7ac8 Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Mon, 15 Apr 2019 16:21:23 -0500 Subject: arm64: Advertise mitigation of Spectre-v2, or lack thereof We currently have a list of CPUs affected by Spectre-v2, for which we check that the firmware implements ARCH_WORKAROUND_1. It turns out that not all firmwares do implement the required mitigation, and that we fail to let the user know about it. Instead, let's slightly revamp our checks, and rely on a whitelist of cores that are known to be non-vulnerable, and let the user know the status of the mitigation in the kernel log. Signed-off-by: Marc Zyngier Signed-off-by: Jeremy Linton Reviewed-by: Andre Przywara Reviewed-by: Suzuki K Poulose Reviewed-by: Catalin Marinas Tested-by: Stefan Wahren Signed-off-by: Will Deacon --- arch/arm64/kernel/cpu_errata.c | 109 +++++++++++++++++++++-------------------- 1 file changed, 56 insertions(+), 53 deletions(-) diff --git a/arch/arm64/kernel/cpu_errata.c b/arch/arm64/kernel/cpu_errata.c index cf623657cf3c..032f1a4dbea2 100644 --- a/arch/arm64/kernel/cpu_errata.c +++ b/arch/arm64/kernel/cpu_errata.c @@ -131,9 +131,9 @@ static void __copy_hyp_vect_bpi(int slot, const char *hyp_vecs_start, __flush_icache_range((uintptr_t)dst, (uintptr_t)dst + SZ_2K); } -static void __install_bp_hardening_cb(bp_hardening_cb_t fn, - const char *hyp_vecs_start, - const char *hyp_vecs_end) +static void install_bp_hardening_cb(bp_hardening_cb_t fn, + const char *hyp_vecs_start, + const char *hyp_vecs_end) { static DEFINE_RAW_SPINLOCK(bp_lock); int cpu, slot = -1; @@ -169,7 +169,7 @@ static void __install_bp_hardening_cb(bp_hardening_cb_t fn, #define __smccc_workaround_1_smc_start NULL #define __smccc_workaround_1_smc_end NULL -static void __install_bp_hardening_cb(bp_hardening_cb_t fn, +static void install_bp_hardening_cb(bp_hardening_cb_t fn, const char *hyp_vecs_start, const char *hyp_vecs_end) { @@ -177,23 +177,6 @@ static void __install_bp_hardening_cb(bp_hardening_cb_t fn, } #endif /* CONFIG_KVM_INDIRECT_VECTORS */ -static void install_bp_hardening_cb(const struct arm64_cpu_capabilities *entry, - bp_hardening_cb_t fn, - const char *hyp_vecs_start, - const char *hyp_vecs_end) -{ - u64 pfr0; - - if (!entry->matches(entry, SCOPE_LOCAL_CPU)) - return; - - pfr0 = read_cpuid(ID_AA64PFR0_EL1); - if (cpuid_feature_extract_unsigned_field(pfr0, ID_AA64PFR0_CSV2_SHIFT)) - return; - - __install_bp_hardening_cb(fn, hyp_vecs_start, hyp_vecs_end); -} - #include #include #include @@ -228,31 +211,27 @@ static int __init parse_nospectre_v2(char *str) } early_param("nospectre_v2", parse_nospectre_v2); -static void -enable_smccc_arch_workaround_1(const struct arm64_cpu_capabilities *entry) +/* + * -1: No workaround + * 0: No workaround required + * 1: Workaround installed + */ +static int detect_harden_bp_fw(void) { bp_hardening_cb_t cb; void *smccc_start, *smccc_end; struct arm_smccc_res res; u32 midr = read_cpuid_id(); - if (!entry->matches(entry, SCOPE_LOCAL_CPU)) - return; - - if (__nospectre_v2) { - pr_info_once("spectrev2 mitigation disabled by command line option\n"); - return; - } - if (psci_ops.smccc_version == SMCCC_VERSION_1_0) - return; + return -1; switch (psci_ops.conduit) { case PSCI_CONDUIT_HVC: arm_smccc_1_1_hvc(ARM_SMCCC_ARCH_FEATURES_FUNC_ID, ARM_SMCCC_ARCH_WORKAROUND_1, &res); if ((int)res.a0 < 0) - return; + return -1; cb = call_hvc_arch_workaround_1; /* This is a guest, no need to patch KVM vectors */ smccc_start = NULL; @@ -263,23 +242,23 @@ enable_smccc_arch_workaround_1(const struct arm64_cpu_capabilities *entry) arm_smccc_1_1_smc(ARM_SMCCC_ARCH_FEATURES_FUNC_ID, ARM_SMCCC_ARCH_WORKAROUND_1, &res); if ((int)res.a0 < 0) - return; + return -1; cb = call_smc_arch_workaround_1; smccc_start = __smccc_workaround_1_smc_start; smccc_end = __smccc_workaround_1_smc_end; break; default: - return; + return -1; } if (((midr & MIDR_CPU_MODEL_MASK) == MIDR_QCOM_FALKOR) || ((midr & MIDR_CPU_MODEL_MASK) == MIDR_QCOM_FALKOR_V1)) cb = qcom_link_stack_sanitization; - install_bp_hardening_cb(entry, cb, smccc_start, smccc_end); + install_bp_hardening_cb(cb, smccc_start, smccc_end); - return; + return 1; } #endif /* CONFIG_HARDEN_BRANCH_PREDICTOR */ @@ -521,24 +500,48 @@ cpu_enable_cache_maint_trap(const struct arm64_cpu_capabilities *__unused) CAP_MIDR_RANGE_LIST(midr_list) #ifdef CONFIG_HARDEN_BRANCH_PREDICTOR - /* - * List of CPUs where we need to issue a psci call to - * harden the branch predictor. + * List of CPUs that do not need any Spectre-v2 mitigation at all. */ -static const struct midr_range arm64_bp_harden_smccc_cpus[] = { - MIDR_ALL_VERSIONS(MIDR_CORTEX_A57), - MIDR_ALL_VERSIONS(MIDR_CORTEX_A72), - MIDR_ALL_VERSIONS(MIDR_CORTEX_A73), - MIDR_ALL_VERSIONS(MIDR_CORTEX_A75), - MIDR_ALL_VERSIONS(MIDR_BRCM_VULCAN), - MIDR_ALL_VERSIONS(MIDR_CAVIUM_THUNDERX2), - MIDR_ALL_VERSIONS(MIDR_QCOM_FALKOR_V1), - MIDR_ALL_VERSIONS(MIDR_QCOM_FALKOR), - MIDR_ALL_VERSIONS(MIDR_NVIDIA_DENVER), - {}, +static const struct midr_range spectre_v2_safe_list[] = { + MIDR_ALL_VERSIONS(MIDR_CORTEX_A35), + MIDR_ALL_VERSIONS(MIDR_CORTEX_A53), + MIDR_ALL_VERSIONS(MIDR_CORTEX_A55), + { /* sentinel */ } }; +static bool __maybe_unused +check_branch_predictor(const struct arm64_cpu_capabilities *entry, int scope) +{ + int need_wa; + + WARN_ON(scope != SCOPE_LOCAL_CPU || preemptible()); + + /* If the CPU has CSV2 set, we're safe */ + if (cpuid_feature_extract_unsigned_field(read_cpuid(ID_AA64PFR0_EL1), + ID_AA64PFR0_CSV2_SHIFT)) + return false; + + /* Alternatively, we have a list of unaffected CPUs */ + if (is_midr_in_range_list(read_cpuid_id(), spectre_v2_safe_list)) + return false; + + /* Fallback to firmware detection */ + need_wa = detect_harden_bp_fw(); + if (!need_wa) + return false; + + /* forced off */ + if (__nospectre_v2) { + pr_info_once("spectrev2 mitigation disabled by command line option\n"); + return false; + } + + if (need_wa < 0) + pr_warn_once("ARM_SMCCC_ARCH_WORKAROUND_1 missing from firmware\n"); + + return (need_wa > 0); +} #endif #ifdef CONFIG_HARDEN_EL2_VECTORS @@ -717,8 +720,8 @@ const struct arm64_cpu_capabilities arm64_errata[] = { #ifdef CONFIG_HARDEN_BRANCH_PREDICTOR { .capability = ARM64_HARDEN_BRANCH_PREDICTOR, - .cpu_enable = enable_smccc_arch_workaround_1, - ERRATA_MIDR_RANGE_LIST(arm64_bp_harden_smccc_cpus), + .type = ARM64_CPUCAP_LOCAL_CPU_ERRATUM, + .matches = check_branch_predictor, }, #endif #ifdef CONFIG_HARDEN_EL2_VECTORS -- cgit v1.2.3-59-g8ed1b From 517953c2c47f9c00a002f588ac856a5bc70cede3 Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Mon, 15 Apr 2019 16:21:24 -0500 Subject: arm64: Use firmware to detect CPUs that are not affected by Spectre-v2 The SMCCC ARCH_WORKAROUND_1 service can indicate that although the firmware knows about the Spectre-v2 mitigation, this particular CPU is not vulnerable, and it is thus not necessary to call the firmware on this CPU. Let's use this information to our benefit. Signed-off-by: Marc Zyngier Signed-off-by: Jeremy Linton Reviewed-by: Andre Przywara Reviewed-by: Catalin Marinas Tested-by: Stefan Wahren Signed-off-by: Will Deacon --- arch/arm64/kernel/cpu_errata.c | 32 +++++++++++++++++++++++--------- 1 file changed, 23 insertions(+), 9 deletions(-) diff --git a/arch/arm64/kernel/cpu_errata.c b/arch/arm64/kernel/cpu_errata.c index 032f1a4dbea2..60cf87c4deb7 100644 --- a/arch/arm64/kernel/cpu_errata.c +++ b/arch/arm64/kernel/cpu_errata.c @@ -230,22 +230,36 @@ static int detect_harden_bp_fw(void) case PSCI_CONDUIT_HVC: arm_smccc_1_1_hvc(ARM_SMCCC_ARCH_FEATURES_FUNC_ID, ARM_SMCCC_ARCH_WORKAROUND_1, &res); - if ((int)res.a0 < 0) + switch ((int)res.a0) { + case 1: + /* Firmware says we're just fine */ + return 0; + case 0: + cb = call_hvc_arch_workaround_1; + /* This is a guest, no need to patch KVM vectors */ + smccc_start = NULL; + smccc_end = NULL; + break; + default: return -1; - cb = call_hvc_arch_workaround_1; - /* This is a guest, no need to patch KVM vectors */ - smccc_start = NULL; - smccc_end = NULL; + } break; case PSCI_CONDUIT_SMC: arm_smccc_1_1_smc(ARM_SMCCC_ARCH_FEATURES_FUNC_ID, ARM_SMCCC_ARCH_WORKAROUND_1, &res); - if ((int)res.a0 < 0) + switch ((int)res.a0) { + case 1: + /* Firmware says we're just fine */ + return 0; + case 0: + cb = call_smc_arch_workaround_1; + smccc_start = __smccc_workaround_1_smc_start; + smccc_end = __smccc_workaround_1_smc_end; + break; + default: return -1; - cb = call_smc_arch_workaround_1; - smccc_start = __smccc_workaround_1_smc_start; - smccc_end = __smccc_workaround_1_smc_end; + } break; default: -- cgit v1.2.3-59-g8ed1b From 8c1e3d2bb44cbb998cb28ff9a18f105fee7f1eb3 Mon Sep 17 00:00:00 2001 From: Jeremy Linton Date: Mon, 15 Apr 2019 16:21:25 -0500 Subject: arm64: Always enable spectre-v2 vulnerability detection Ensure we are always able to detect whether or not the CPU is affected by Spectre-v2, so that we can later advertise this to userspace. Signed-off-by: Jeremy Linton Reviewed-by: Andre Przywara Reviewed-by: Catalin Marinas Tested-by: Stefan Wahren Signed-off-by: Will Deacon --- arch/arm64/kernel/cpu_errata.c | 15 ++++++++------- 1 file changed, 8 insertions(+), 7 deletions(-) diff --git a/arch/arm64/kernel/cpu_errata.c b/arch/arm64/kernel/cpu_errata.c index 60cf87c4deb7..a9c3ad4f7948 100644 --- a/arch/arm64/kernel/cpu_errata.c +++ b/arch/arm64/kernel/cpu_errata.c @@ -109,7 +109,6 @@ cpu_enable_trap_ctr_access(const struct arm64_cpu_capabilities *__unused) atomic_t arm64_el2_vector_last_slot = ATOMIC_INIT(-1); -#ifdef CONFIG_HARDEN_BRANCH_PREDICTOR #include #include @@ -270,11 +269,11 @@ static int detect_harden_bp_fw(void) ((midr & MIDR_CPU_MODEL_MASK) == MIDR_QCOM_FALKOR_V1)) cb = qcom_link_stack_sanitization; - install_bp_hardening_cb(cb, smccc_start, smccc_end); + if (IS_ENABLED(CONFIG_HARDEN_BRANCH_PREDICTOR)) + install_bp_hardening_cb(cb, smccc_start, smccc_end); return 1; } -#endif /* CONFIG_HARDEN_BRANCH_PREDICTOR */ #ifdef CONFIG_ARM64_SSBD DEFINE_PER_CPU_READ_MOSTLY(u64, arm64_ssbd_callback_required); @@ -513,7 +512,6 @@ cpu_enable_cache_maint_trap(const struct arm64_cpu_capabilities *__unused) .type = ARM64_CPUCAP_LOCAL_CPU_ERRATUM, \ CAP_MIDR_RANGE_LIST(midr_list) -#ifdef CONFIG_HARDEN_BRANCH_PREDICTOR /* * List of CPUs that do not need any Spectre-v2 mitigation at all. */ @@ -545,6 +543,12 @@ check_branch_predictor(const struct arm64_cpu_capabilities *entry, int scope) if (!need_wa) return false; + if (!IS_ENABLED(CONFIG_HARDEN_BRANCH_PREDICTOR)) { + pr_warn_once("spectrev2 mitigation disabled by kernel configuration\n"); + __hardenbp_enab = false; + return false; + } + /* forced off */ if (__nospectre_v2) { pr_info_once("spectrev2 mitigation disabled by command line option\n"); @@ -556,7 +560,6 @@ check_branch_predictor(const struct arm64_cpu_capabilities *entry, int scope) return (need_wa > 0); } -#endif #ifdef CONFIG_HARDEN_EL2_VECTORS @@ -731,13 +734,11 @@ const struct arm64_cpu_capabilities arm64_errata[] = { ERRATA_MIDR_ALL_VERSIONS(MIDR_CORTEX_A73), }, #endif -#ifdef CONFIG_HARDEN_BRANCH_PREDICTOR { .capability = ARM64_HARDEN_BRANCH_PREDICTOR, .type = ARM64_CPUCAP_LOCAL_CPU_ERRATUM, .matches = check_branch_predictor, }, -#endif #ifdef CONFIG_HARDEN_EL2_VECTORS { .desc = "EL2 vector hardening", -- cgit v1.2.3-59-g8ed1b From d2532e27b5638bb2e2dd52b80b7ea2ec65135377 Mon Sep 17 00:00:00 2001 From: Jeremy Linton Date: Mon, 15 Apr 2019 16:21:26 -0500 Subject: arm64: add sysfs vulnerability show for spectre-v2 Track whether all the cores in the machine are vulnerable to Spectre-v2, and whether all the vulnerable cores have been mitigated. We then expose this information to userspace via sysfs. Signed-off-by: Jeremy Linton Reviewed-by: Andre Przywara Reviewed-by: Catalin Marinas Tested-by: Stefan Wahren Signed-off-by: Will Deacon --- arch/arm64/kernel/cpu_errata.c | 27 ++++++++++++++++++++++++++- 1 file changed, 26 insertions(+), 1 deletion(-) diff --git a/arch/arm64/kernel/cpu_errata.c b/arch/arm64/kernel/cpu_errata.c index a9c3ad4f7948..d2bbafa04b3c 100644 --- a/arch/arm64/kernel/cpu_errata.c +++ b/arch/arm64/kernel/cpu_errata.c @@ -512,6 +512,10 @@ cpu_enable_cache_maint_trap(const struct arm64_cpu_capabilities *__unused) .type = ARM64_CPUCAP_LOCAL_CPU_ERRATUM, \ CAP_MIDR_RANGE_LIST(midr_list) +/* Track overall mitigation state. We are only mitigated if all cores are ok */ +static bool __hardenbp_enab = true; +static bool __spectrev2_safe = true; + /* * List of CPUs that do not need any Spectre-v2 mitigation at all. */ @@ -522,6 +526,10 @@ static const struct midr_range spectre_v2_safe_list[] = { { /* sentinel */ } }; +/* + * Track overall bp hardening for all heterogeneous cores in the machine. + * We are only considered "safe" if all booted cores are known safe. + */ static bool __maybe_unused check_branch_predictor(const struct arm64_cpu_capabilities *entry, int scope) { @@ -543,6 +551,8 @@ check_branch_predictor(const struct arm64_cpu_capabilities *entry, int scope) if (!need_wa) return false; + __spectrev2_safe = false; + if (!IS_ENABLED(CONFIG_HARDEN_BRANCH_PREDICTOR)) { pr_warn_once("spectrev2 mitigation disabled by kernel configuration\n"); __hardenbp_enab = false; @@ -552,11 +562,14 @@ check_branch_predictor(const struct arm64_cpu_capabilities *entry, int scope) /* forced off */ if (__nospectre_v2) { pr_info_once("spectrev2 mitigation disabled by command line option\n"); + __hardenbp_enab = false; return false; } - if (need_wa < 0) + if (need_wa < 0) { pr_warn_once("ARM_SMCCC_ARCH_WORKAROUND_1 missing from firmware\n"); + __hardenbp_enab = false; + } return (need_wa > 0); } @@ -779,3 +792,15 @@ ssize_t cpu_show_spectre_v1(struct device *dev, struct device_attribute *attr, { return sprintf(buf, "Mitigation: __user pointer sanitization\n"); } + +ssize_t cpu_show_spectre_v2(struct device *dev, struct device_attribute *attr, + char *buf) +{ + if (__spectrev2_safe) + return sprintf(buf, "Not affected\n"); + + if (__hardenbp_enab) + return sprintf(buf, "Mitigation: Branch predictor hardening\n"); + + return sprintf(buf, "Vulnerable\n"); +} -- cgit v1.2.3-59-g8ed1b From d42281b6e49510f078ace15a8ea10f71e6262581 Mon Sep 17 00:00:00 2001 From: Jeremy Linton Date: Mon, 15 Apr 2019 16:21:27 -0500 Subject: arm64: Always enable ssb vulnerability detection Ensure we are always able to detect whether or not the CPU is affected by SSB, so that we can later advertise this to userspace. Signed-off-by: Jeremy Linton Reviewed-by: Andre Przywara Reviewed-by: Catalin Marinas Tested-by: Stefan Wahren [will: Use IS_ENABLED instead of #ifdef] Signed-off-by: Will Deacon --- arch/arm64/include/asm/cpufeature.h | 4 ---- arch/arm64/kernel/cpu_errata.c | 9 +++++---- 2 files changed, 5 insertions(+), 8 deletions(-) diff --git a/arch/arm64/include/asm/cpufeature.h b/arch/arm64/include/asm/cpufeature.h index e505e1fbd2b9..6ccdc97e5d6a 100644 --- a/arch/arm64/include/asm/cpufeature.h +++ b/arch/arm64/include/asm/cpufeature.h @@ -638,11 +638,7 @@ static inline int arm64_get_ssbd_state(void) #endif } -#ifdef CONFIG_ARM64_SSBD void arm64_set_ssbd_mitigation(bool state); -#else -static inline void arm64_set_ssbd_mitigation(bool state) {} -#endif extern int do_emulate_mrs(struct pt_regs *regs, u32 sys_reg, u32 rt); diff --git a/arch/arm64/kernel/cpu_errata.c b/arch/arm64/kernel/cpu_errata.c index d2bbafa04b3c..b6132783e8cf 100644 --- a/arch/arm64/kernel/cpu_errata.c +++ b/arch/arm64/kernel/cpu_errata.c @@ -275,7 +275,6 @@ static int detect_harden_bp_fw(void) return 1; } -#ifdef CONFIG_ARM64_SSBD DEFINE_PER_CPU_READ_MOSTLY(u64, arm64_ssbd_callback_required); int ssbd_state __read_mostly = ARM64_SSBD_KERNEL; @@ -348,6 +347,11 @@ void __init arm64_enable_wa2_handling(struct alt_instr *alt, void arm64_set_ssbd_mitigation(bool state) { + if (!IS_ENABLED(CONFIG_ARM64_SSBD)) { + pr_info_once("SSBD disabled by kernel configuration\n"); + return; + } + if (this_cpu_has_cap(ARM64_SSBS)) { if (state) asm volatile(SET_PSTATE_SSBS(0)); @@ -467,7 +471,6 @@ out_printmsg: return required; } -#endif /* CONFIG_ARM64_SSBD */ static void __maybe_unused cpu_enable_cache_maint_trap(const struct arm64_cpu_capabilities *__unused) @@ -759,14 +762,12 @@ const struct arm64_cpu_capabilities arm64_errata[] = { ERRATA_MIDR_RANGE_LIST(arm64_harden_el2_vectors), }, #endif -#ifdef CONFIG_ARM64_SSBD { .desc = "Speculative Store Bypass Disable", .capability = ARM64_SSBD, .type = ARM64_CPUCAP_LOCAL_CPU_ERRATUM, .matches = has_ssbd_mitigation, }, -#endif #ifdef CONFIG_ARM64_ERRATUM_1188873 { /* Cortex-A76 r0p0 to r2p0 */ -- cgit v1.2.3-59-g8ed1b From bc15cf701fa4875d9710f16ca4d2ce25e66ce4a0 Mon Sep 17 00:00:00 2001 From: Will Deacon Date: Mon, 29 Apr 2019 14:21:11 +0100 Subject: arm64: Kconfig: Tidy up errata workaround help text The nature of silicon errata means that the Kconfig help text for our various software workarounds has been written by many different people. Along the way, we've accumulated typos and inconsistencies which make the options needlessly difficult to read. Fix up minor issues with the help text. Signed-off-by: Will Deacon --- arch/arm64/Kconfig | 29 +++++++++++++---------------- 1 file changed, 13 insertions(+), 16 deletions(-) diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig index 4791d4857124..d28f12e9c160 100644 --- a/arch/arm64/Kconfig +++ b/arch/arm64/Kconfig @@ -298,7 +298,7 @@ menu "Kernel Features" menu "ARM errata workarounds via the alternatives framework" config ARM64_WORKAROUND_CLEAN_CACHE - def_bool n + bool config ARM64_ERRATUM_826319 bool "Cortex-A53: 826319: System might deadlock if a write cannot complete until read data is accepted" @@ -465,13 +465,13 @@ config ARM64_ERRATUM_1024718 bool "Cortex-A55: 1024718: Update of DBM/AP bits without break before make might result in incorrect update" default y help - This option adds work around for Arm Cortex-A55 Erratum 1024718. + This option adds a workaround for ARM Cortex-A55 Erratum 1024718. Affected Cortex-A55 cores (r0p0, r0p1, r1p0) could cause incorrect update of the hardware dirty bit when the DBM/AP bits are updated - without a break-before-make. The work around is to disable the usage + without a break-before-make. The workaround is to disable the usage of hardware DBM locally on the affected cores. CPUs not affected by - erratum will continue to use the feature. + this erratum will continue to use the feature. If unsure, say Y. @@ -480,7 +480,7 @@ config ARM64_ERRATUM_1188873 default y select ARM_ARCH_TIMER_OOL_WORKAROUND help - This option adds work arounds for ARM Cortex-A76 erratum 1188873 + This option adds a workaround for ARM Cortex-A76 erratum 1188873. Affected Cortex-A76 cores (r0p0, r1p0, r2p0) could cause register corruption when accessing the timer registers from @@ -492,7 +492,7 @@ config ARM64_ERRATUM_1165522 bool "Cortex-A76: Speculative AT instruction using out-of-context translation regime could cause subsequent request to generate an incorrect translation" default y help - This option adds work arounds for ARM Cortex-A76 erratum 1165522 + This option adds a workaround for ARM Cortex-A76 erratum 1165522. Affected Cortex-A76 cores (r0p0, r1p0, r2p0) could end-up with corrupted TLBs by speculating an AT instruction during a guest @@ -505,7 +505,7 @@ config ARM64_ERRATUM_1286807 default y select ARM64_WORKAROUND_REPEAT_TLBI help - This option adds workaround for ARM Cortex-A76 erratum 1286807 + This option adds a workaround for ARM Cortex-A76 erratum 1286807. On the affected Cortex-A76 cores (r0p0 to r3p0), if a virtual address for a cacheable mapping of a location is being @@ -522,10 +522,10 @@ config CAVIUM_ERRATUM_22375 bool "Cavium erratum 22375, 24313" default y help - Enable workaround for erratum 22375, 24313. + Enable workaround for errata 22375 and 24313. This implements two gicv3-its errata workarounds for ThunderX. Both - with small impact affecting only ITS table allocation. + with a small impact affecting only ITS table allocation. erratum 22375: only alloc 8MB table size erratum 24313: ignore memory access type @@ -589,9 +589,6 @@ config QCOM_FALKOR_ERRATUM_1003 config ARM64_WORKAROUND_REPEAT_TLBI bool - help - Enable the repeat TLBI workaround for Falkor erratum 1009 and - Cortex-A76 erratum 1286807. config QCOM_FALKOR_ERRATUM_1009 bool "Falkor E1009: Prematurely complete a DSB after a TLBI" @@ -627,7 +624,7 @@ config HISILICON_ERRATUM_161600802 bool "Hip07 161600802: Erroneous redistributor VLPI base" default y help - The HiSilicon Hip07 SoC usees the wrong redistributor base + The HiSilicon Hip07 SoC uses the wrong redistributor base when issued ITS commands such as VMOVP and VMAPP, and requires a 128kB offset to be applied to the target address in this commands. @@ -647,7 +644,7 @@ config FUJITSU_ERRATUM_010001 bool "Fujitsu-A64FX erratum E#010001: Undefined fault may occur wrongly" default y help - This option adds workaround for Fujitsu-A64FX erratum E#010001. + This option adds a workaround for Fujitsu-A64FX erratum E#010001. On some variants of the Fujitsu-A64FX cores ver(1.0, 1.1), memory accesses may cause undefined fault (Data abort, DFSC=0b111111). This fault occurs under a specific hardware condition when a @@ -658,7 +655,7 @@ config FUJITSU_ERRATUM_010001 case-4 TTBR1_EL2 with TCR_EL2.NFD1 == 1. The workaround is to ensure these bits are clear in TCR_ELx. - The workaround only affect the Fujitsu-A64FX. + The workaround only affects the Fujitsu-A64FX. If unsure, say Y. @@ -1408,7 +1405,7 @@ config ARM64_PSEUDO_NMI help Adds support for mimicking Non-Maskable Interrupts through the use of GIC interrupt priority. This support requires version 3 or later of - Arm GIC. + ARM GIC. This high priority configuration for interrupts needs to be explicitly enabled by setting the kernel parameter -- cgit v1.2.3-59-g8ed1b From f08cae2f28db24d95be5204046b60618d8de4ddc Mon Sep 17 00:00:00 2001 From: Boyang Zhou Date: Mon, 29 Apr 2019 15:27:19 +0100 Subject: arm64: mmap: Ensure file offset is treated as unsigned The file offset argument to the arm64 sys_mmap() implementation is scaled from bytes to pages by shifting right by PAGE_SHIFT. Unfortunately, the offset is passed in as a signed 'off_t' type and therefore large offsets (i.e. with the top bit set) are incorrectly sign-extended by the shift. This has been observed to cause false mmap() failures when mapping GPU doorbells on an arm64 server part. Change the type of the file offset argument to sys_mmap() from 'off_t' to 'unsigned long' so that the shifting scales the value as expected. Cc: Signed-off-by: Boyang Zhou [will: rewrote commit message] Signed-off-by: Will Deacon --- arch/arm64/kernel/sys.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/arm64/kernel/sys.c b/arch/arm64/kernel/sys.c index b44065fb1616..6f91e8116514 100644 --- a/arch/arm64/kernel/sys.c +++ b/arch/arm64/kernel/sys.c @@ -31,7 +31,7 @@ SYSCALL_DEFINE6(mmap, unsigned long, addr, unsigned long, len, unsigned long, prot, unsigned long, flags, - unsigned long, fd, off_t, off) + unsigned long, fd, unsigned long, off) { if (offset_in_page(off) != 0) return -EINVAL; -- cgit v1.2.3-59-g8ed1b From 2f1d4e24d91b1f475f939685e19bb1e537031661 Mon Sep 17 00:00:00 2001 From: Xiongfeng Wang Date: Fri, 26 Apr 2019 10:16:36 +0800 Subject: firmware: arm_sdei: Prohibit probing in '_sdei_handler' Functions called in '_sdei_handler' are needed to be marked as 'nokprobe'. Because these functions are called in NMI context and neither the arch-code's debug infrastructure nor kprobes core supports this. Signed-off-by: Xiongfeng Wang Reviewed-by: James Morse Signed-off-by: Will Deacon --- drivers/firmware/arm_sdei.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/firmware/arm_sdei.c b/drivers/firmware/arm_sdei.c index e6376f985ef7..9cd70d1a5622 100644 --- a/drivers/firmware/arm_sdei.c +++ b/drivers/firmware/arm_sdei.c @@ -165,6 +165,7 @@ static int invoke_sdei_fn(unsigned long function_id, unsigned long arg0, return err; } +NOKPROBE_SYMBOL(invoke_sdei_fn); static struct sdei_event *sdei_event_find(u32 event_num) { @@ -879,6 +880,7 @@ static void sdei_smccc_smc(unsigned long function_id, { arm_smccc_smc(function_id, arg0, arg1, arg2, arg3, arg4, 0, 0, res); } +NOKPROBE_SYMBOL(sdei_smccc_smc); static void sdei_smccc_hvc(unsigned long function_id, unsigned long arg0, unsigned long arg1, @@ -887,6 +889,7 @@ static void sdei_smccc_hvc(unsigned long function_id, { arm_smccc_hvc(function_id, arg0, arg1, arg2, arg3, arg4, 0, 0, res); } +NOKPROBE_SYMBOL(sdei_smccc_hvc); int sdei_register_ghes(struct ghes *ghes, sdei_event_callback *normal_cb, sdei_event_callback *critical_cb) -- cgit v1.2.3-59-g8ed1b From 75a19a0202db21638a1c2b424afb867e1f9a2376 Mon Sep 17 00:00:00 2001 From: Will Deacon Date: Mon, 29 Apr 2019 17:26:22 +0100 Subject: arm64: arch_timer: Ensure counter register reads occur with seqlock held When executing clock_gettime(), either in the vDSO or via a system call, we need to ensure that the read of the counter register occurs within the seqlock reader critical section. This ensures that updates to the clocksource parameters (e.g. the multiplier) are consistent with the counter value and therefore avoids the situation where time appears to go backwards across multiple reads. Extend the vDSO logic so that the seqlock critical section covers the read of the counter register as well as accesses to the data page. Since reads of the counter system registers are not ordered by memory barrier instructions, introduce dependency ordering from the counter read to a subsequent memory access so that the seqlock memory barriers apply to the counter access in both the vDSO and the system call paths. Cc: Cc: Marc Zyngier Tested-by: Vincenzo Frascino Link: https://lore.kernel.org/linux-arm-kernel/alpine.DEB.2.21.1902081950260.1662@nanos.tec.linutronix.de/ Reported-by: Thomas Gleixner Signed-off-by: Will Deacon --- arch/arm64/include/asm/arch_timer.h | 33 +++++++++++++++++++++++++++++++-- arch/arm64/kernel/vdso/gettimeofday.S | 15 +++++++++++---- 2 files changed, 42 insertions(+), 6 deletions(-) diff --git a/arch/arm64/include/asm/arch_timer.h b/arch/arm64/include/asm/arch_timer.h index f2a234d6516c..93e07512b4b6 100644 --- a/arch/arm64/include/asm/arch_timer.h +++ b/arch/arm64/include/asm/arch_timer.h @@ -148,18 +148,47 @@ static inline void arch_timer_set_cntkctl(u32 cntkctl) isb(); } +/* + * Ensure that reads of the counter are treated the same as memory reads + * for the purposes of ordering by subsequent memory barriers. + * + * This insanity brought to you by speculative system register reads, + * out-of-order memory accesses, sequence locks and Thomas Gleixner. + * + * http://lists.infradead.org/pipermail/linux-arm-kernel/2019-February/631195.html + */ +#define arch_counter_enforce_ordering(val) do { \ + u64 tmp, _val = (val); \ + \ + asm volatile( \ + " eor %0, %1, %1\n" \ + " add %0, sp, %0\n" \ + " ldr xzr, [%0]" \ + : "=r" (tmp) : "r" (_val)); \ +} while (0) + static inline u64 arch_counter_get_cntpct(void) { + u64 cnt; + isb(); - return arch_timer_reg_read_stable(cntpct_el0); + cnt = arch_timer_reg_read_stable(cntpct_el0); + arch_counter_enforce_ordering(cnt); + return cnt; } static inline u64 arch_counter_get_cntvct(void) { + u64 cnt; + isb(); - return arch_timer_reg_read_stable(cntvct_el0); + cnt = arch_timer_reg_read_stable(cntvct_el0); + arch_counter_enforce_ordering(cnt); + return cnt; } +#undef arch_counter_enforce_ordering + static inline int arch_timer_arch_init(void) { return 0; diff --git a/arch/arm64/kernel/vdso/gettimeofday.S b/arch/arm64/kernel/vdso/gettimeofday.S index 21805e416483..856fee6d3512 100644 --- a/arch/arm64/kernel/vdso/gettimeofday.S +++ b/arch/arm64/kernel/vdso/gettimeofday.S @@ -73,6 +73,13 @@ x_tmp .req x8 movn x_tmp, #0xff00, lsl #48 and \res, x_tmp, \res mul \res, \res, \mult + /* + * Fake address dependency from the value computed from the counter + * register to subsequent data page accesses so that the sequence + * locking also orders the read of the counter. + */ + and x_tmp, \res, xzr + add vdso_data, vdso_data, x_tmp .endm /* @@ -147,12 +154,12 @@ ENTRY(__kernel_gettimeofday) /* w11 = cs_mono_mult, w12 = cs_shift */ ldp w11, w12, [vdso_data, #VDSO_CS_MONO_MULT] ldp x13, x14, [vdso_data, #VDSO_XTIME_CLK_SEC] - seqcnt_check fail=1b get_nsec_per_sec res=x9 lsl x9, x9, x12 get_clock_shifted_nsec res=x15, cycle_last=x10, mult=x11 + seqcnt_check fail=1b get_ts_realtime res_sec=x10, res_nsec=x11, \ clock_nsec=x15, xtime_sec=x13, xtime_nsec=x14, nsec_to_sec=x9 @@ -211,13 +218,13 @@ realtime: /* w11 = cs_mono_mult, w12 = cs_shift */ ldp w11, w12, [vdso_data, #VDSO_CS_MONO_MULT] ldp x13, x14, [vdso_data, #VDSO_XTIME_CLK_SEC] - seqcnt_check fail=realtime /* All computations are done with left-shifted nsecs. */ get_nsec_per_sec res=x9 lsl x9, x9, x12 get_clock_shifted_nsec res=x15, cycle_last=x10, mult=x11 + seqcnt_check fail=realtime get_ts_realtime res_sec=x10, res_nsec=x11, \ clock_nsec=x15, xtime_sec=x13, xtime_nsec=x14, nsec_to_sec=x9 clock_gettime_return, shift=1 @@ -231,7 +238,6 @@ monotonic: ldp w11, w12, [vdso_data, #VDSO_CS_MONO_MULT] ldp x13, x14, [vdso_data, #VDSO_XTIME_CLK_SEC] ldp x3, x4, [vdso_data, #VDSO_WTM_CLK_SEC] - seqcnt_check fail=monotonic /* All computations are done with left-shifted nsecs. */ lsl x4, x4, x12 @@ -239,6 +245,7 @@ monotonic: lsl x9, x9, x12 get_clock_shifted_nsec res=x15, cycle_last=x10, mult=x11 + seqcnt_check fail=monotonic get_ts_realtime res_sec=x10, res_nsec=x11, \ clock_nsec=x15, xtime_sec=x13, xtime_nsec=x14, nsec_to_sec=x9 @@ -253,13 +260,13 @@ monotonic_raw: /* w11 = cs_raw_mult, w12 = cs_shift */ ldp w12, w11, [vdso_data, #VDSO_CS_SHIFT] ldp x13, x14, [vdso_data, #VDSO_RAW_TIME_SEC] - seqcnt_check fail=monotonic_raw /* All computations are done with left-shifted nsecs. */ get_nsec_per_sec res=x9 lsl x9, x9, x12 get_clock_shifted_nsec res=x15, cycle_last=x10, mult=x11 + seqcnt_check fail=monotonic_raw get_ts_clock_raw res_sec=x10, res_nsec=x11, \ clock_nsec=x15, nsec_to_sec=x9 -- cgit v1.2.3-59-g8ed1b From 359db57c34af15edf35492944af4d4417f59886c Mon Sep 17 00:00:00 2001 From: Vincenzo Frascino Date: Mon, 29 Apr 2019 18:27:13 +0100 Subject: arm64: compat: Reduce address limit for 64K pages With the introduction of the config option that allows to enable kuser helpers, it is now possible to reduce TASK_SIZE_32 when these are disabled and 64K pages are enabled. This extends the compliance with the section 6.5.8 of the C standard (C99). Acked-by: Catalin Marinas Reported-by: Catalin Marinas Signed-off-by: Vincenzo Frascino Signed-off-by: Will Deacon --- arch/arm64/include/asm/processor.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/arm64/include/asm/processor.h b/arch/arm64/include/asm/processor.h index 228af56ece48..fcd0e691b1ea 100644 --- a/arch/arm64/include/asm/processor.h +++ b/arch/arm64/include/asm/processor.h @@ -57,7 +57,7 @@ #define TASK_SIZE_64 (UL(1) << vabits_user) #ifdef CONFIG_COMPAT -#ifdef CONFIG_ARM64_64K_PAGES +#if defined(CONFIG_ARM64_64K_PAGES) && defined(CONFIG_KUSER_HELPERS) /* * With CONFIG_ARM64_64K_PAGES enabled, the last page is occupied * by the compat vectors page. -- cgit v1.2.3-59-g8ed1b From 74dd022f9e6260c3b5b8d15901d27ebcc5f21eda Mon Sep 17 00:00:00 2001 From: Qian Cai Date: Mon, 29 Apr 2019 13:37:01 -0400 Subject: arm64: Fix compiler warning from pte_unmap() with -Wunused-but-set-variable When building with -Wunused-but-set-variable, the compiler shouts about a number of pte_unmap() users, since this expands to an empty macro on arm64: | mm/gup.c: In function 'gup_pte_range': | mm/gup.c:1727:16: warning: variable 'ptem' set but not used | [-Wunused-but-set-variable] | mm/gup.c: At top level: | mm/memory.c: In function 'copy_pte_range': | mm/memory.c:821:24: warning: variable 'orig_dst_pte' set but not used | [-Wunused-but-set-variable] | mm/memory.c:821:9: warning: variable 'orig_src_pte' set but not used | [-Wunused-but-set-variable] | mm/swap_state.c: In function 'swap_ra_info': | mm/swap_state.c:641:15: warning: variable 'orig_pte' set but not used | [-Wunused-but-set-variable] | mm/madvise.c: In function 'madvise_free_pte_range': | mm/madvise.c:318:9: warning: variable 'orig_pte' set but not used | [-Wunused-but-set-variable] Rewrite pte_unmap() as a static inline function, which silences the warnings. Signed-off-by: Qian Cai Signed-off-by: Will Deacon --- arch/arm64/include/asm/pgtable.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/arch/arm64/include/asm/pgtable.h b/arch/arm64/include/asm/pgtable.h index de70c1eabf33..74ebe9693714 100644 --- a/arch/arm64/include/asm/pgtable.h +++ b/arch/arm64/include/asm/pgtable.h @@ -478,6 +478,8 @@ static inline phys_addr_t pmd_page_paddr(pmd_t pmd) return __pmd_to_phys(pmd); } +static inline void pte_unmap(pte_t *pte) { } + /* Find an entry in the third-level page table. */ #define pte_index(addr) (((addr) >> PAGE_SHIFT) & (PTRS_PER_PTE - 1)) @@ -486,7 +488,6 @@ static inline phys_addr_t pmd_page_paddr(pmd_t pmd) #define pte_offset_map(dir,addr) pte_offset_kernel((dir), (addr)) #define pte_offset_map_nested(dir,addr) pte_offset_kernel((dir), (addr)) -#define pte_unmap(pte) do { } while (0) #define pte_unmap_nested(pte) do { } while (0) #define pte_set_fixmap(addr) ((pte_t *)set_fixmap_offset(FIX_PTE, addr)) -- cgit v1.2.3-59-g8ed1b From 5fbbeedb9a8f039e0a531435c7894905c1d51e77 Mon Sep 17 00:00:00 2001 From: Qian Cai Date: Mon, 29 Apr 2019 13:37:02 -0400 Subject: arm64: mm: Remove pte_unmap_nested() As of commit ece0e2b6406a ("mm: remove pte_*map_nested()"), pte_unmap_nested() is no longer used and can be removed from the arm64 code. Signed-off-by: Qian Cai [will: also remove pte_offset_map_nested()] Signed-off-by: Will Deacon --- arch/arm64/include/asm/pgtable.h | 2 -- 1 file changed, 2 deletions(-) diff --git a/arch/arm64/include/asm/pgtable.h b/arch/arm64/include/asm/pgtable.h index 74ebe9693714..2c41b04708fe 100644 --- a/arch/arm64/include/asm/pgtable.h +++ b/arch/arm64/include/asm/pgtable.h @@ -487,8 +487,6 @@ static inline void pte_unmap(pte_t *pte) { } #define pte_offset_kernel(dir,addr) ((pte_t *)__va(pte_offset_phys((dir), (addr)))) #define pte_offset_map(dir,addr) pte_offset_kernel((dir), (addr)) -#define pte_offset_map_nested(dir,addr) pte_offset_kernel((dir), (addr)) -#define pte_unmap_nested(pte) do { } while (0) #define pte_set_fixmap(addr) ((pte_t *)set_fixmap_offset(FIX_PTE, addr)) #define pte_set_fixmap_offset(pmd, addr) pte_set_fixmap(pte_offset_phys(pmd, addr)) -- cgit v1.2.3-59-g8ed1b From 0f80cad3124f986d0e46c14d46b8da06d87a2bf4 Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Mon, 15 Apr 2019 13:03:51 +0100 Subject: arm64: Restrict ARM64_ERRATUM_1188873 mitigation to AArch32 We currently deal with ARM64_ERRATUM_1188873 by always trapping EL0 accesses for both instruction sets. Although nothing wrong comes out of that, people trying to squeeze the last drop of performance from buggy HW find this over the top. Oh well. Let's change the mitigation by flipping the counter enable bit on return to userspace. Non-broken HW gets an extra branch on the fast path, which is hopefully not the end of the world. The arch timer workaround is also removed. Acked-by: Daniel Lezcano Signed-off-by: Marc Zyngier Signed-off-by: Will Deacon --- arch/arm64/kernel/entry.S | 19 +++++++++++++++++-- drivers/clocksource/arm_arch_timer.c | 15 --------------- 2 files changed, 17 insertions(+), 17 deletions(-) diff --git a/arch/arm64/kernel/entry.S b/arch/arm64/kernel/entry.S index c50a7a75f2e0..1a7811b7e3c4 100644 --- a/arch/arm64/kernel/entry.S +++ b/arch/arm64/kernel/entry.S @@ -336,6 +336,21 @@ alternative_if ARM64_WORKAROUND_845719 alternative_else_nop_endif #endif 3: +#ifdef CONFIG_ARM64_ERRATUM_1188873 +alternative_if_not ARM64_WORKAROUND_1188873 + b 4f +alternative_else_nop_endif + /* + * if (x22.mode32 == cntkctl_el1.el0vcten) + * cntkctl_el1.el0vcten = ~cntkctl_el1.el0vcten + */ + mrs x1, cntkctl_el1 + eon x0, x1, x22, lsr #3 + tbz x0, #1, 4f + eor x1, x1, #2 // ARCH_TIMER_USR_VCT_ACCESS_EN + msr cntkctl_el1, x1 +4: +#endif apply_ssbd 0, x0, x1 .endif @@ -362,11 +377,11 @@ alternative_else_nop_endif .if \el == 0 alternative_insn eret, nop, ARM64_UNMAP_KERNEL_AT_EL0 #ifdef CONFIG_UNMAP_KERNEL_AT_EL0 - bne 4f + bne 5f msr far_el1, x30 tramp_alias x30, tramp_exit_native br x30 -4: +5: tramp_alias x30, tramp_exit_compat br x30 #endif diff --git a/drivers/clocksource/arm_arch_timer.c b/drivers/clocksource/arm_arch_timer.c index aa4ec53281ce..da11a9508b77 100644 --- a/drivers/clocksource/arm_arch_timer.c +++ b/drivers/clocksource/arm_arch_timer.c @@ -319,13 +319,6 @@ static u64 notrace arm64_858921_read_cntvct_el0(void) } #endif -#ifdef CONFIG_ARM64_ERRATUM_1188873 -static u64 notrace arm64_1188873_read_cntvct_el0(void) -{ - return read_sysreg(cntvct_el0); -} -#endif - #ifdef CONFIG_SUN50I_ERRATUM_UNKNOWN1 /* * The low bits of the counter registers are indeterminate while bit 10 or @@ -457,14 +450,6 @@ static const struct arch_timer_erratum_workaround ool_workarounds[] = { .read_cntvct_el0 = arm64_858921_read_cntvct_el0, }, #endif -#ifdef CONFIG_ARM64_ERRATUM_1188873 - { - .match_type = ate_match_local_cap_id, - .id = (void *)ARM64_WORKAROUND_1188873, - .desc = "ARM erratum 1188873", - .read_cntvct_el0 = arm64_1188873_read_cntvct_el0, - }, -#endif #ifdef CONFIG_SUN50I_ERRATUM_UNKNOWN1 { .match_type = ate_match_dt, -- cgit v1.2.3-59-g8ed1b From c2b5bba3967a000764e9148e6f020d776b7ecd82 Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Mon, 15 Apr 2019 13:03:52 +0100 Subject: arm64: Make ARM64_ERRATUM_1188873 depend on COMPAT Since ARM64_ERRATUM_1188873 only affects AArch32 EL0, it makes some sense that it should depend on COMPAT. Signed-off-by: Marc Zyngier Signed-off-by: Will Deacon --- arch/arm64/Kconfig | 1 + 1 file changed, 1 insertion(+) diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig index 7e34b9eba5de..560f2a860637 100644 --- a/arch/arm64/Kconfig +++ b/arch/arm64/Kconfig @@ -477,6 +477,7 @@ config ARM64_ERRATUM_1024718 config ARM64_ERRATUM_1188873 bool "Cortex-A76: MRC read following MRRC read of specific Generic Timer in AArch32 might give incorrect result" default y + depends on COMPAT select ARM_ARCH_TIMER_OOL_WORKAROUND help This option adds work arounds for ARM Cortex-A76 erratum 1188873 -- cgit v1.2.3-59-g8ed1b From 0cf57b86859c49381addb3ce47be70aadf5fd2c0 Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Mon, 15 Apr 2019 13:03:53 +0100 Subject: arm64: Add part number for Neoverse N1 New CPU, new part number. You know the drill. Signed-off-by: Marc Zyngier Signed-off-by: Will Deacon --- arch/arm64/include/asm/cputype.h | 2 ++ 1 file changed, 2 insertions(+) diff --git a/arch/arm64/include/asm/cputype.h b/arch/arm64/include/asm/cputype.h index 5f1437099b99..2602bae334fb 100644 --- a/arch/arm64/include/asm/cputype.h +++ b/arch/arm64/include/asm/cputype.h @@ -89,6 +89,7 @@ #define ARM_CPU_PART_CORTEX_A35 0xD04 #define ARM_CPU_PART_CORTEX_A55 0xD05 #define ARM_CPU_PART_CORTEX_A76 0xD0B +#define ARM_CPU_PART_NEOVERSE_N1 0xD0C #define APM_CPU_PART_POTENZA 0x000 @@ -118,6 +119,7 @@ #define MIDR_CORTEX_A35 MIDR_CPU_MODEL(ARM_CPU_IMP_ARM, ARM_CPU_PART_CORTEX_A35) #define MIDR_CORTEX_A55 MIDR_CPU_MODEL(ARM_CPU_IMP_ARM, ARM_CPU_PART_CORTEX_A55) #define MIDR_CORTEX_A76 MIDR_CPU_MODEL(ARM_CPU_IMP_ARM, ARM_CPU_PART_CORTEX_A76) +#define MIDR_NEOVERSE_N1 MIDR_CPU_MODEL(ARM_CPU_IMP_ARM, ARM_CPU_PART_NEOVERSE_N1) #define MIDR_THUNDERX MIDR_CPU_MODEL(ARM_CPU_IMP_CAVIUM, CAVIUM_CPU_PART_THUNDERX) #define MIDR_THUNDERX_81XX MIDR_CPU_MODEL(ARM_CPU_IMP_CAVIUM, CAVIUM_CPU_PART_THUNDERX_81XX) #define MIDR_THUNDERX_83XX MIDR_CPU_MODEL(ARM_CPU_IMP_CAVIUM, CAVIUM_CPU_PART_THUNDERX_83XX) -- cgit v1.2.3-59-g8ed1b From 6989303a3b2d864fd8e17d3fa3365d3e9649a598 Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Mon, 15 Apr 2019 13:03:54 +0100 Subject: arm64: Apply ARM64_ERRATUM_1188873 to Neoverse-N1 Neoverse-N1 is also affected by ARM64_ERRATUM_1188873, so let's add it to the list of affected CPUs. Signed-off-by: Marc Zyngier [will: Update silicon-errata.txt] Signed-off-by: Will Deacon --- Documentation/arm64/silicon-errata.txt | 1 + arch/arm64/Kconfig | 11 ++++++----- arch/arm64/kernel/cpu_errata.c | 13 +++++++++++-- 3 files changed, 18 insertions(+), 7 deletions(-) diff --git a/Documentation/arm64/silicon-errata.txt b/Documentation/arm64/silicon-errata.txt index d1e2bb801e1b..d5a124d7e242 100644 --- a/Documentation/arm64/silicon-errata.txt +++ b/Documentation/arm64/silicon-errata.txt @@ -61,6 +61,7 @@ stable kernels. | ARM | Cortex-A76 | #1188873 | ARM64_ERRATUM_1188873 | | ARM | Cortex-A76 | #1165522 | ARM64_ERRATUM_1165522 | | ARM | Cortex-A76 | #1286807 | ARM64_ERRATUM_1286807 | +| ARM | Neoverse-N1 | #1188873 | ARM64_ERRATUM_1188873 | | ARM | MMU-500 | #841119,#826419 | N/A | | | | | | | Cavium | ThunderX ITS | #22375, #24313 | CAVIUM_ERRATUM_22375 | diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig index 560f2a860637..fcda4e21fa8f 100644 --- a/arch/arm64/Kconfig +++ b/arch/arm64/Kconfig @@ -475,16 +475,17 @@ config ARM64_ERRATUM_1024718 If unsure, say Y. config ARM64_ERRATUM_1188873 - bool "Cortex-A76: MRC read following MRRC read of specific Generic Timer in AArch32 might give incorrect result" + bool "Cortex-A76/Neoverse-N1: MRC read following MRRC read of specific Generic Timer in AArch32 might give incorrect result" default y depends on COMPAT select ARM_ARCH_TIMER_OOL_WORKAROUND help - This option adds work arounds for ARM Cortex-A76 erratum 1188873 + This option adds work arounds for ARM Cortex-A76/Neoverse-N1 + erratum 1188873 - Affected Cortex-A76 cores (r0p0, r1p0, r2p0) could cause - register corruption when accessing the timer registers from - AArch32 userspace. + Affected Cortex-A76/Neoverse-N1 cores (r0p0, r1p0, r2p0) could + cause register corruption when accessing the timer registers + from AArch32 userspace. If unsure, say Y. diff --git a/arch/arm64/kernel/cpu_errata.c b/arch/arm64/kernel/cpu_errata.c index 9950bb0cbd52..06f1c8aae1dc 100644 --- a/arch/arm64/kernel/cpu_errata.c +++ b/arch/arm64/kernel/cpu_errata.c @@ -603,6 +603,16 @@ static const struct midr_range workaround_clean_cache[] = { }; #endif +#ifdef CONFIG_ARM64_ERRATUM_1188873 +static const struct midr_range erratum_1188873_list[] = { + /* Cortex-A76 r0p0 to r2p0 */ + MIDR_RANGE(MIDR_CORTEX_A76, 0, 0, 2, 0), + /* Neoverse-N1 r0p0 to r2p0 */ + MIDR_RANGE(MIDR_NEOVERSE_N1, 0, 0, 2, 0), + {}, +}; +#endif + const struct arm64_cpu_capabilities arm64_errata[] = { #ifdef CONFIG_ARM64_WORKAROUND_CLEAN_CACHE { @@ -725,10 +735,9 @@ const struct arm64_cpu_capabilities arm64_errata[] = { #endif #ifdef CONFIG_ARM64_ERRATUM_1188873 { - /* Cortex-A76 r0p0 to r2p0 */ .desc = "ARM erratum 1188873", .capability = ARM64_WORKAROUND_1188873, - ERRATA_MIDR_RANGE(MIDR_CORTEX_A76, 0, 0, 2, 0), + ERRATA_MIDR_RANGE_LIST(erratum_1188873_list), }, #endif #ifdef CONFIG_ARM64_ERRATUM_1165522 -- cgit v1.2.3-59-g8ed1b From 1f5b62f09f6b314c8d70b9de5182dae4de1f94da Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Mon, 8 Apr 2019 16:49:01 +0100 Subject: ARM: vdso: Remove dependency with the arch_timer driver internals The VDSO code uses the kernel helper that was originally designed to abstract the access between 32 and 64bit systems. It worked so far because this function is declared as 'inline'. As we're about to revamp that part of the code, the VDSO would break. Let's fix it by doing what should have been done from the start, a proper system register access. Reviewed-by: Mark Rutland Signed-off-by: Marc Zyngier Signed-off-by: Will Deacon --- arch/arm/include/asm/cp15.h | 2 ++ arch/arm/vdso/vgettimeofday.c | 5 +++-- 2 files changed, 5 insertions(+), 2 deletions(-) diff --git a/arch/arm/include/asm/cp15.h b/arch/arm/include/asm/cp15.h index 07e27f212dc7..d2453e2d3f1f 100644 --- a/arch/arm/include/asm/cp15.h +++ b/arch/arm/include/asm/cp15.h @@ -68,6 +68,8 @@ #define BPIALL __ACCESS_CP15(c7, 0, c5, 6) #define ICIALLU __ACCESS_CP15(c7, 0, c5, 0) +#define CNTVCT __ACCESS_CP15_64(1, c14) + extern unsigned long cr_alignment; /* defined in entry-armv.S */ static inline unsigned long get_cr(void) diff --git a/arch/arm/vdso/vgettimeofday.c b/arch/arm/vdso/vgettimeofday.c index a9dd619c6c29..7bdbf5d5c47d 100644 --- a/arch/arm/vdso/vgettimeofday.c +++ b/arch/arm/vdso/vgettimeofday.c @@ -18,9 +18,9 @@ #include #include #include -#include #include #include +#include #include #include #include @@ -123,7 +123,8 @@ static notrace u64 get_ns(struct vdso_data *vdata) u64 cycle_now; u64 nsec; - cycle_now = arch_counter_get_cntvct(); + isb(); + cycle_now = read_sysreg(CNTVCT); cycle_delta = (cycle_now - vdata->cs_cycle_last) & vdata->cs_mask; -- cgit v1.2.3-59-g8ed1b From eae1ddc615bef9e1a1958a4b867a0a1678049bb3 Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Mon, 8 Apr 2019 16:49:02 +0100 Subject: watchdog/sbsa: Use arch_timer_read_counter instead of arch_counter_get_cntvct Only arch_timer_read_counter will guarantee that workarounds are applied. So let's use this one instead of arch_counter_get_cntvct. Acked-by: Mark Rutland Reviewed-by: Guenter Roeck Signed-off-by: Marc Zyngier Signed-off-by: Will Deacon --- drivers/watchdog/sbsa_gwdt.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/watchdog/sbsa_gwdt.c b/drivers/watchdog/sbsa_gwdt.c index e8bd9887c566..e221e47396ab 100644 --- a/drivers/watchdog/sbsa_gwdt.c +++ b/drivers/watchdog/sbsa_gwdt.c @@ -161,7 +161,7 @@ static unsigned int sbsa_gwdt_get_timeleft(struct watchdog_device *wdd) timeleft += readl(gwdt->control_base + SBSA_GWDT_WOR); timeleft += lo_hi_readq(gwdt->control_base + SBSA_GWDT_WCV) - - arch_counter_get_cntvct(); + arch_timer_read_counter(); do_div(timeleft, gwdt->clk); -- cgit v1.2.3-59-g8ed1b From dea86a80033f8b0fb25a805f46dde9f3b1a7c23a Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Mon, 8 Apr 2019 16:49:03 +0100 Subject: arm64: Use arch_timer_read_counter instead of arch_counter_get_cntvct Only arch_timer_read_counter will guarantee that workarounds are applied. So let's use this one instead of arch_counter_get_cntvct. Acked-by: Mark Rutland Signed-off-by: Marc Zyngier Signed-off-by: Will Deacon --- arch/arm64/kernel/traps.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/arch/arm64/kernel/traps.c b/arch/arm64/kernel/traps.c index 8ad119c3f665..6190a60388cf 100644 --- a/arch/arm64/kernel/traps.c +++ b/arch/arm64/kernel/traps.c @@ -493,7 +493,7 @@ static void cntvct_read_handler(unsigned int esr, struct pt_regs *regs) { int rt = ESR_ELx_SYS64_ISS_RT(esr); - pt_regs_write_reg(regs, rt, arch_counter_get_cntvct()); + pt_regs_write_reg(regs, rt, arch_timer_read_counter()); arm64_skip_faulting_instruction(regs, AARCH64_INSN_SIZE); } @@ -665,7 +665,7 @@ static void compat_cntvct_read_handler(unsigned int esr, struct pt_regs *regs) { int rt = (esr & ESR_ELx_CP15_64_ISS_RT_MASK) >> ESR_ELx_CP15_64_ISS_RT_SHIFT; int rt2 = (esr & ESR_ELx_CP15_64_ISS_RT2_MASK) >> ESR_ELx_CP15_64_ISS_RT2_SHIFT; - u64 val = arch_counter_get_cntvct(); + u64 val = arch_timer_read_counter(); pt_regs_write_reg(regs, rt, lower_32_bits(val)); pt_regs_write_reg(regs, rt2, upper_32_bits(val)); -- cgit v1.2.3-59-g8ed1b From 5ef19a161cfa88a59508979e2f39d3d092c1d5c0 Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Mon, 8 Apr 2019 16:49:04 +0100 Subject: clocksource/arm_arch_timer: Direcly assign set_next_event workaround When a given timer is affected by an erratum and requires an alternative implementation of set_next_event, we do a rather complicated dance to detect and call the workaround on each set_next_event call. This is clearly idiotic, as we can perfectly detect whether this CPU requires a workaround while setting up the clock event device. This only requires the CPU-specific detection to be done a bit earlier, and we can then safely override the set_next_event pointer if we have a workaround associated to that CPU. Acked-by: Mark Rutland Acked-by; Daniel Lezcano Signed-off-by: Marc Zyngier Signed-off-by: Will Deacon --- arch/arm/include/asm/arch_timer.h | 4 ++++ arch/arm64/include/asm/arch_timer.h | 16 +++++++++++++ drivers/clocksource/arm_arch_timer.c | 46 +++++++----------------------------- 3 files changed, 28 insertions(+), 38 deletions(-) diff --git a/arch/arm/include/asm/arch_timer.h b/arch/arm/include/asm/arch_timer.h index 0a8d7bba2cb0..3f0a0191f763 100644 --- a/arch/arm/include/asm/arch_timer.h +++ b/arch/arm/include/asm/arch_timer.h @@ -11,6 +11,10 @@ #include #ifdef CONFIG_ARM_ARCH_TIMER +/* 32bit ARM doesn't know anything about timer errata... */ +#define has_erratum_handler(h) (false) +#define erratum_handler(h) (arch_timer_##h) + int arch_timer_arch_init(void); /* diff --git a/arch/arm64/include/asm/arch_timer.h b/arch/arm64/include/asm/arch_timer.h index f2a234d6516c..c3762ffcc933 100644 --- a/arch/arm64/include/asm/arch_timer.h +++ b/arch/arm64/include/asm/arch_timer.h @@ -31,10 +31,26 @@ #include #if IS_ENABLED(CONFIG_ARM_ARCH_TIMER_OOL_WORKAROUND) +#define has_erratum_handler(h) \ + ({ \ + const struct arch_timer_erratum_workaround *__wa; \ + __wa = __this_cpu_read(timer_unstable_counter_workaround); \ + (__wa && __wa->h); \ + }) + +#define erratum_handler(h) \ + ({ \ + const struct arch_timer_erratum_workaround *__wa; \ + __wa = __this_cpu_read(timer_unstable_counter_workaround); \ + (__wa && __wa->h) ? __wa->h : arch_timer_##h; \ + }) + extern struct static_key_false arch_timer_read_ool_enabled; #define needs_unstable_timer_counter_workaround() \ static_branch_unlikely(&arch_timer_read_ool_enabled) #else +#define has_erratum_handler(h) false +#define erratum_handler(h) (arch_timer_##h) #define needs_unstable_timer_counter_workaround() false #endif diff --git a/drivers/clocksource/arm_arch_timer.c b/drivers/clocksource/arm_arch_timer.c index da11a9508b77..b2a88a64aab4 100644 --- a/drivers/clocksource/arm_arch_timer.c +++ b/drivers/clocksource/arm_arch_timer.c @@ -598,36 +598,12 @@ static void arch_timer_check_ool_workaround(enum arch_timer_erratum_match_type t local ? "local" : "global", wa->desc); } -#define erratum_handler(fn, r, ...) \ -({ \ - bool __val; \ - if (needs_unstable_timer_counter_workaround()) { \ - const struct arch_timer_erratum_workaround *__wa; \ - __wa = __this_cpu_read(timer_unstable_counter_workaround); \ - if (__wa && __wa->fn) { \ - r = __wa->fn(__VA_ARGS__); \ - __val = true; \ - } else { \ - __val = false; \ - } \ - } else { \ - __val = false; \ - } \ - __val; \ -}) - static bool arch_timer_this_cpu_has_cntvct_wa(void) { - const struct arch_timer_erratum_workaround *wa; - - wa = __this_cpu_read(timer_unstable_counter_workaround); - return wa && wa->read_cntvct_el0; + return has_erratum_handler(read_cntvct_el0); } #else #define arch_timer_check_ool_workaround(t,a) do { } while(0) -#define erratum_set_next_event_tval_virt(...) ({BUG(); 0;}) -#define erratum_set_next_event_tval_phys(...) ({BUG(); 0;}) -#define erratum_handler(fn, r, ...) ({false;}) #define arch_timer_this_cpu_has_cntvct_wa() ({false;}) #endif /* CONFIG_ARM_ARCH_TIMER_OOL_WORKAROUND */ @@ -721,11 +697,6 @@ static __always_inline void set_next_event(const int access, unsigned long evt, static int arch_timer_set_next_event_virt(unsigned long evt, struct clock_event_device *clk) { - int ret; - - if (erratum_handler(set_next_event_virt, ret, evt, clk)) - return ret; - set_next_event(ARCH_TIMER_VIRT_ACCESS, evt, clk); return 0; } @@ -733,11 +704,6 @@ static int arch_timer_set_next_event_virt(unsigned long evt, static int arch_timer_set_next_event_phys(unsigned long evt, struct clock_event_device *clk) { - int ret; - - if (erratum_handler(set_next_event_phys, ret, evt, clk)) - return ret; - set_next_event(ARCH_TIMER_PHYS_ACCESS, evt, clk); return 0; } @@ -762,6 +728,10 @@ static void __arch_timer_setup(unsigned type, clk->features = CLOCK_EVT_FEAT_ONESHOT; if (type == ARCH_TIMER_TYPE_CP15) { + typeof(clk->set_next_event) sne; + + arch_timer_check_ool_workaround(ate_match_local_cap_id, NULL); + if (arch_timer_c3stop) clk->features |= CLOCK_EVT_FEAT_C3STOP; clk->name = "arch_sys_timer"; @@ -772,20 +742,20 @@ static void __arch_timer_setup(unsigned type, case ARCH_TIMER_VIRT_PPI: clk->set_state_shutdown = arch_timer_shutdown_virt; clk->set_state_oneshot_stopped = arch_timer_shutdown_virt; - clk->set_next_event = arch_timer_set_next_event_virt; + sne = erratum_handler(set_next_event_virt); break; case ARCH_TIMER_PHYS_SECURE_PPI: case ARCH_TIMER_PHYS_NONSECURE_PPI: case ARCH_TIMER_HYP_PPI: clk->set_state_shutdown = arch_timer_shutdown_phys; clk->set_state_oneshot_stopped = arch_timer_shutdown_phys; - clk->set_next_event = arch_timer_set_next_event_phys; + sne = erratum_handler(set_next_event_phys); break; default: BUG(); } - arch_timer_check_ool_workaround(ate_match_local_cap_id, NULL); + clk->set_next_event = sne; } else { clk->features |= CLOCK_EVT_FEAT_DYNIRQ; clk->name = "arch_mem_timer"; -- cgit v1.2.3-59-g8ed1b From 57f27666f91a85431492b092f5db53ecab1a0739 Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Mon, 8 Apr 2019 16:49:05 +0100 Subject: clocksource/arm_arch_timer: Drop use of static key in arch_timer_reg_read_stable Let's start with the removal of the arch_timer_read_ool_enabled static key in arch_timer_reg_read_stable. It is not a fast path, and we can simplify things a bit. Acked-by: Mark Rutland Acked-by: Daniel Lezcano Signed-off-by: Marc Zyngier Signed-off-by: Will Deacon --- arch/arm64/include/asm/arch_timer.h | 42 ++++++++++++++++++++++++------------- 1 file changed, 28 insertions(+), 14 deletions(-) diff --git a/arch/arm64/include/asm/arch_timer.h b/arch/arm64/include/asm/arch_timer.h index c3762ffcc933..4a06d46def7e 100644 --- a/arch/arm64/include/asm/arch_timer.h +++ b/arch/arm64/include/asm/arch_timer.h @@ -77,23 +77,37 @@ struct arch_timer_erratum_workaround { DECLARE_PER_CPU(const struct arch_timer_erratum_workaround *, timer_unstable_counter_workaround); +/* inline sysreg accessors that make erratum_handler() work */ +static inline notrace u32 arch_timer_read_cntp_tval_el0(void) +{ + return read_sysreg(cntp_tval_el0); +} + +static inline notrace u32 arch_timer_read_cntv_tval_el0(void) +{ + return read_sysreg(cntv_tval_el0); +} + +static inline notrace u64 arch_timer_read_cntpct_el0(void) +{ + return read_sysreg(cntpct_el0); +} + +static inline notrace u64 arch_timer_read_cntvct_el0(void) +{ + return read_sysreg(cntvct_el0); +} + #define arch_timer_reg_read_stable(reg) \ -({ \ - u64 _val; \ - if (needs_unstable_timer_counter_workaround()) { \ - const struct arch_timer_erratum_workaround *wa; \ + ({ \ + u64 _val; \ + \ preempt_disable_notrace(); \ - wa = __this_cpu_read(timer_unstable_counter_workaround); \ - if (wa && wa->read_##reg) \ - _val = wa->read_##reg(); \ - else \ - _val = read_sysreg(reg); \ + _val = erratum_handler(read_ ## reg)(); \ preempt_enable_notrace(); \ - } else { \ - _val = read_sysreg(reg); \ - } \ - _val; \ -}) + \ + _val; \ + }) /* * These register accessors are marked inline so the compiler can -- cgit v1.2.3-59-g8ed1b From a862fc2254bdbcee3b5da4f730984e5d8393a2f1 Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Mon, 8 Apr 2019 16:49:06 +0100 Subject: clocksource/arm_arch_timer: Remove use of workaround static key The use of a static key in a hotplug path has proved to be a real nightmare, and makes it impossible to have scream-free lockdep kernel. Let's remove the static key altogether, and focus on something saner. Acked-by: Mark Rutland Acked-by: Daniel Lezcano Signed-off-by: Marc Zyngier Signed-off-by: Will Deacon --- arch/arm64/include/asm/arch_timer.h | 4 ---- drivers/clocksource/arm_arch_timer.c | 25 +++++++------------------ 2 files changed, 7 insertions(+), 22 deletions(-) diff --git a/arch/arm64/include/asm/arch_timer.h b/arch/arm64/include/asm/arch_timer.h index 4a06d46def7e..5502ea049b63 100644 --- a/arch/arm64/include/asm/arch_timer.h +++ b/arch/arm64/include/asm/arch_timer.h @@ -45,13 +45,9 @@ (__wa && __wa->h) ? __wa->h : arch_timer_##h; \ }) -extern struct static_key_false arch_timer_read_ool_enabled; -#define needs_unstable_timer_counter_workaround() \ - static_branch_unlikely(&arch_timer_read_ool_enabled) #else #define has_erratum_handler(h) false #define erratum_handler(h) (arch_timer_##h) -#define needs_unstable_timer_counter_workaround() false #endif enum arch_timer_erratum_match_type { diff --git a/drivers/clocksource/arm_arch_timer.c b/drivers/clocksource/arm_arch_timer.c index b2a88a64aab4..8f22976247c0 100644 --- a/drivers/clocksource/arm_arch_timer.c +++ b/drivers/clocksource/arm_arch_timer.c @@ -365,8 +365,6 @@ static u32 notrace sun50i_a64_read_cntv_tval_el0(void) DEFINE_PER_CPU(const struct arch_timer_erratum_workaround *, timer_unstable_counter_workaround); EXPORT_SYMBOL_GPL(timer_unstable_counter_workaround); -DEFINE_STATIC_KEY_FALSE(arch_timer_read_ool_enabled); -EXPORT_SYMBOL_GPL(arch_timer_read_ool_enabled); static void erratum_set_next_event_tval_generic(const int access, unsigned long evt, struct clock_event_device *clk) @@ -537,12 +535,6 @@ void arch_timer_enable_workaround(const struct arch_timer_erratum_workaround *wa per_cpu(timer_unstable_counter_workaround, i) = wa; } - /* - * Use the locked version, as we're called from the CPU - * hotplug framework. Otherwise, we end-up in deadlock-land. - */ - static_branch_enable_cpuslocked(&arch_timer_read_ool_enabled); - /* * Don't use the vdso fastpath if errata require using the * out-of-line counter accessor. We may change our mind pretty @@ -558,7 +550,7 @@ void arch_timer_enable_workaround(const struct arch_timer_erratum_workaround *wa static void arch_timer_check_ool_workaround(enum arch_timer_erratum_match_type type, void *arg) { - const struct arch_timer_erratum_workaround *wa; + const struct arch_timer_erratum_workaround *wa, *__wa; ate_match_fn_t match_fn = NULL; bool local = false; @@ -582,16 +574,13 @@ static void arch_timer_check_ool_workaround(enum arch_timer_erratum_match_type t if (!wa) return; - if (needs_unstable_timer_counter_workaround()) { - const struct arch_timer_erratum_workaround *__wa; - __wa = __this_cpu_read(timer_unstable_counter_workaround); - if (__wa && wa != __wa) - pr_warn("Can't enable workaround for %s (clashes with %s\n)", - wa->desc, __wa->desc); + __wa = __this_cpu_read(timer_unstable_counter_workaround); + if (__wa && wa != __wa) + pr_warn("Can't enable workaround for %s (clashes with %s\n)", + wa->desc, __wa->desc); - if (__wa) - return; - } + if (__wa) + return; arch_timer_enable_workaround(wa, local); pr_info("Enabling %s workaround for %s\n", -- cgit v1.2.3-59-g8ed1b From 0ea415390cd345b7d09e8c9ebd4b68adfe873043 Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Mon, 8 Apr 2019 16:49:07 +0100 Subject: clocksource/arm_arch_timer: Use arch_timer_read_counter to access stable counters Instead of always going via arch_counter_get_cntvct_stable to access the counter workaround, let's have arch_timer_read_counter point to the right method. For that, we need to track whether any CPU in the system has a workaround for the counter. This is done by having an atomic variable tracking this. Acked-by: Mark Rutland Signed-off-by: Marc Zyngier Signed-off-by: Will Deacon --- arch/arm/include/asm/arch_timer.h | 14 +++++++++-- arch/arm64/include/asm/arch_timer.h | 16 ++++++++++-- drivers/clocksource/arm_arch_timer.c | 48 +++++++++++++++++++++++++++++++++--- 3 files changed, 70 insertions(+), 8 deletions(-) diff --git a/arch/arm/include/asm/arch_timer.h b/arch/arm/include/asm/arch_timer.h index 3f0a0191f763..4b66ecd6be99 100644 --- a/arch/arm/include/asm/arch_timer.h +++ b/arch/arm/include/asm/arch_timer.h @@ -83,7 +83,7 @@ static inline u32 arch_timer_get_cntfrq(void) return val; } -static inline u64 arch_counter_get_cntpct(void) +static inline u64 __arch_counter_get_cntpct(void) { u64 cval; @@ -92,7 +92,12 @@ static inline u64 arch_counter_get_cntpct(void) return cval; } -static inline u64 arch_counter_get_cntvct(void) +static inline u64 __arch_counter_get_cntpct_stable(void) +{ + return __arch_counter_get_cntpct(); +} + +static inline u64 __arch_counter_get_cntvct(void) { u64 cval; @@ -101,6 +106,11 @@ static inline u64 arch_counter_get_cntvct(void) return cval; } +static inline u64 __arch_counter_get_cntvct_stable(void) +{ + return __arch_counter_get_cntvct(); +} + static inline u32 arch_timer_get_cntkctl(void) { u32 cntkctl; diff --git a/arch/arm64/include/asm/arch_timer.h b/arch/arm64/include/asm/arch_timer.h index 5502ea049b63..48b2100f4aaa 100644 --- a/arch/arm64/include/asm/arch_timer.h +++ b/arch/arm64/include/asm/arch_timer.h @@ -174,18 +174,30 @@ static inline void arch_timer_set_cntkctl(u32 cntkctl) isb(); } -static inline u64 arch_counter_get_cntpct(void) +static inline u64 __arch_counter_get_cntpct_stable(void) { isb(); return arch_timer_reg_read_stable(cntpct_el0); } -static inline u64 arch_counter_get_cntvct(void) +static inline u64 __arch_counter_get_cntpct(void) +{ + isb(); + return read_sysreg(cntpct_el0); +} + +static inline u64 __arch_counter_get_cntvct_stable(void) { isb(); return arch_timer_reg_read_stable(cntvct_el0); } +static inline u64 __arch_counter_get_cntvct(void) +{ + isb(); + return read_sysreg(cntvct_el0); +} + static inline int arch_timer_arch_init(void) { return 0; diff --git a/drivers/clocksource/arm_arch_timer.c b/drivers/clocksource/arm_arch_timer.c index 8f22976247c0..27acc9eb0f7c 100644 --- a/drivers/clocksource/arm_arch_timer.c +++ b/drivers/clocksource/arm_arch_timer.c @@ -152,6 +152,26 @@ u32 arch_timer_reg_read(int access, enum arch_timer_reg reg, return val; } +static u64 arch_counter_get_cntpct_stable(void) +{ + return __arch_counter_get_cntpct_stable(); +} + +static u64 arch_counter_get_cntpct(void) +{ + return __arch_counter_get_cntpct(); +} + +static u64 arch_counter_get_cntvct_stable(void) +{ + return __arch_counter_get_cntvct_stable(); +} + +static u64 arch_counter_get_cntvct(void) +{ + return __arch_counter_get_cntvct(); +} + /* * Default to cp15 based access because arm64 uses this function for * sched_clock() before DT is probed and the cp15 method is guaranteed @@ -365,6 +385,7 @@ static u32 notrace sun50i_a64_read_cntv_tval_el0(void) DEFINE_PER_CPU(const struct arch_timer_erratum_workaround *, timer_unstable_counter_workaround); EXPORT_SYMBOL_GPL(timer_unstable_counter_workaround); +static atomic_t timer_unstable_counter_workaround_in_use = ATOMIC_INIT(0); static void erratum_set_next_event_tval_generic(const int access, unsigned long evt, struct clock_event_device *clk) @@ -535,6 +556,9 @@ void arch_timer_enable_workaround(const struct arch_timer_erratum_workaround *wa per_cpu(timer_unstable_counter_workaround, i) = wa; } + if (wa->read_cntvct_el0 || wa->read_cntpct_el0) + atomic_set(&timer_unstable_counter_workaround_in_use, 1); + /* * Don't use the vdso fastpath if errata require using the * out-of-line counter accessor. We may change our mind pretty @@ -591,9 +615,15 @@ static bool arch_timer_this_cpu_has_cntvct_wa(void) { return has_erratum_handler(read_cntvct_el0); } + +static bool arch_timer_counter_has_wa(void) +{ + return atomic_read(&timer_unstable_counter_workaround_in_use); +} #else #define arch_timer_check_ool_workaround(t,a) do { } while(0) #define arch_timer_this_cpu_has_cntvct_wa() ({false;}) +#define arch_timer_counter_has_wa() ({false;}) #endif /* CONFIG_ARM_ARCH_TIMER_OOL_WORKAROUND */ static __always_inline irqreturn_t timer_handler(const int access, @@ -942,12 +972,22 @@ static void __init arch_counter_register(unsigned type) /* Register the CP15 based counter if we have one */ if (type & ARCH_TIMER_TYPE_CP15) { + u64 (*rd)(void); + if ((IS_ENABLED(CONFIG_ARM64) && !is_hyp_mode_available()) || - arch_timer_uses_ppi == ARCH_TIMER_VIRT_PPI) - arch_timer_read_counter = arch_counter_get_cntvct; - else - arch_timer_read_counter = arch_counter_get_cntpct; + arch_timer_uses_ppi == ARCH_TIMER_VIRT_PPI) { + if (arch_timer_counter_has_wa()) + rd = arch_counter_get_cntvct_stable; + else + rd = arch_counter_get_cntvct; + } else { + if (arch_timer_counter_has_wa()) + rd = arch_counter_get_cntpct_stable; + else + rd = arch_counter_get_cntpct; + } + arch_timer_read_counter = rd; clocksource_counter.archdata.vdso_direct = vdso_default; } else { arch_timer_read_counter = arch_counter_get_cntvct_mem; -- cgit v1.2.3-59-g8ed1b From 61cf61d81e326163ce1557ceccfca76e11d0e57c Mon Sep 17 00:00:00 2001 From: Arun KS Date: Tue, 30 Apr 2019 16:05:04 +0530 Subject: arm64: Fix size of __early_cpu_boot_status __early_cpu_boot_status is of type long. Use quad assembler directive to allocate proper size. Acked-by: Mark Rutland Signed-off-by: Arun KS Signed-off-by: Will Deacon --- arch/arm64/kernel/head.S | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/arm64/kernel/head.S b/arch/arm64/kernel/head.S index f533305849d7..fcae3f85c6cd 100644 --- a/arch/arm64/kernel/head.S +++ b/arch/arm64/kernel/head.S @@ -684,7 +684,7 @@ ENTRY(__boot_cpu_mode) * with MMU turned off. */ ENTRY(__early_cpu_boot_status) - .long 0 + .quad 0 .popsection -- cgit v1.2.3-59-g8ed1b From 526e065dbca6df0b5a130b84b836b8b3c9f54e21 Mon Sep 17 00:00:00 2001 From: Jeremy Linton Date: Mon, 15 Apr 2019 16:21:28 -0500 Subject: arm64: add sysfs vulnerability show for speculative store bypass Return status based on ssbd_state and __ssb_safe. If the mitigation is disabled, or the firmware isn't responding then return the expected machine state based on a whitelist of known good cores. Given a heterogeneous machine, the overall machine vulnerability defaults to safe but is reset to unsafe when we miss the whitelist and the firmware doesn't explicitly tell us the core is safe. In order to make that work we delay transitioning to vulnerable until we know the firmware isn't responding to avoid a case where we miss the whitelist, but the firmware goes ahead and reports the core is not vulnerable. If all the cores in the machine have SSBS, then __ssb_safe will remain true. Tested-by: Stefan Wahren Signed-off-by: Jeremy Linton Signed-off-by: Will Deacon --- arch/arm64/kernel/cpu_errata.c | 42 ++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 42 insertions(+) diff --git a/arch/arm64/kernel/cpu_errata.c b/arch/arm64/kernel/cpu_errata.c index b6132783e8cf..4bb0f7cad418 100644 --- a/arch/arm64/kernel/cpu_errata.c +++ b/arch/arm64/kernel/cpu_errata.c @@ -278,6 +278,7 @@ static int detect_harden_bp_fw(void) DEFINE_PER_CPU_READ_MOSTLY(u64, arm64_ssbd_callback_required); int ssbd_state __read_mostly = ARM64_SSBD_KERNEL; +static bool __ssb_safe = true; static const struct ssbd_options { const char *str; @@ -381,6 +382,7 @@ static bool has_ssbd_mitigation(const struct arm64_cpu_capabilities *entry, struct arm_smccc_res res; bool required = true; s32 val; + bool this_cpu_safe = false; WARN_ON(scope != SCOPE_LOCAL_CPU || preemptible()); @@ -389,8 +391,14 @@ static bool has_ssbd_mitigation(const struct arm64_cpu_capabilities *entry, goto out_printmsg; } + /* delay setting __ssb_safe until we get a firmware response */ + if (is_midr_in_range_list(read_cpuid_id(), entry->midr_range_list)) + this_cpu_safe = true; + if (psci_ops.smccc_version == SMCCC_VERSION_1_0) { ssbd_state = ARM64_SSBD_UNKNOWN; + if (!this_cpu_safe) + __ssb_safe = false; return false; } @@ -407,6 +415,8 @@ static bool has_ssbd_mitigation(const struct arm64_cpu_capabilities *entry, default: ssbd_state = ARM64_SSBD_UNKNOWN; + if (!this_cpu_safe) + __ssb_safe = false; return false; } @@ -415,14 +425,18 @@ static bool has_ssbd_mitigation(const struct arm64_cpu_capabilities *entry, switch (val) { case SMCCC_RET_NOT_SUPPORTED: ssbd_state = ARM64_SSBD_UNKNOWN; + if (!this_cpu_safe) + __ssb_safe = false; return false; + /* machines with mixed mitigation requirements must not return this */ case SMCCC_RET_NOT_REQUIRED: pr_info_once("%s mitigation not required\n", entry->desc); ssbd_state = ARM64_SSBD_MITIGATED; return false; case SMCCC_RET_SUCCESS: + __ssb_safe = false; required = true; break; @@ -432,6 +446,8 @@ static bool has_ssbd_mitigation(const struct arm64_cpu_capabilities *entry, default: WARN_ON(1); + if (!this_cpu_safe) + __ssb_safe = false; return false; } @@ -472,6 +488,14 @@ out_printmsg: return required; } +/* known invulnerable cores */ +static const struct midr_range arm64_ssb_cpus[] = { + MIDR_ALL_VERSIONS(MIDR_CORTEX_A35), + MIDR_ALL_VERSIONS(MIDR_CORTEX_A53), + MIDR_ALL_VERSIONS(MIDR_CORTEX_A55), + {}, +}; + static void __maybe_unused cpu_enable_cache_maint_trap(const struct arm64_cpu_capabilities *__unused) { @@ -767,6 +791,7 @@ const struct arm64_cpu_capabilities arm64_errata[] = { .capability = ARM64_SSBD, .type = ARM64_CPUCAP_LOCAL_CPU_ERRATUM, .matches = has_ssbd_mitigation, + .midr_range_list = arm64_ssb_cpus, }, #ifdef CONFIG_ARM64_ERRATUM_1188873 { @@ -805,3 +830,20 @@ ssize_t cpu_show_spectre_v2(struct device *dev, struct device_attribute *attr, return sprintf(buf, "Vulnerable\n"); } + +ssize_t cpu_show_spec_store_bypass(struct device *dev, + struct device_attribute *attr, char *buf) +{ + if (__ssb_safe) + return sprintf(buf, "Not affected\n"); + + switch (ssbd_state) { + case ARM64_SSBD_KERNEL: + case ARM64_SSBD_FORCE_ENABLE: + if (IS_ENABLED(CONFIG_ARM64_SSBD)) + return sprintf(buf, + "Mitigation: Speculative Store Bypass disabled via prctl\n"); + } + + return sprintf(buf, "Vulnerable\n"); +} -- cgit v1.2.3-59-g8ed1b From 61ae1321f06c4489c724c803e9b8363dea576da3 Mon Sep 17 00:00:00 2001 From: Mian Yousaf Kaukab Date: Mon, 15 Apr 2019 16:21:29 -0500 Subject: arm64: enable generic CPU vulnerabilites support Enable CPU vulnerabilty show functions for spectre_v1, spectre_v2, meltdown and store-bypass. Signed-off-by: Mian Yousaf Kaukab Signed-off-by: Jeremy Linton Reviewed-by: Andre Przywara Reviewed-by: Catalin Marinas Tested-by: Stefan Wahren Signed-off-by: Will Deacon --- arch/arm64/Kconfig | 1 + 1 file changed, 1 insertion(+) diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig index 7e34b9eba5de..6a7b7d4e0e90 100644 --- a/arch/arm64/Kconfig +++ b/arch/arm64/Kconfig @@ -90,6 +90,7 @@ config ARM64 select GENERIC_CLOCKEVENTS select GENERIC_CLOCKEVENTS_BROADCAST select GENERIC_CPU_AUTOPROBE + select GENERIC_CPU_VULNERABILITIES select GENERIC_EARLY_IOREMAP select GENERIC_IDLE_POLL_SETUP select GENERIC_IRQ_MULTI_HANDLER -- cgit v1.2.3-59-g8ed1b From eb337cdfcd5dd3b10522c2f34140a73a4c285c30 Mon Sep 17 00:00:00 2001 From: Will Deacon Date: Tue, 30 Apr 2019 16:58:56 +0100 Subject: arm64: ssbs: Don't treat CPUs with SSBS as unaffected by SSB SSBS provides a relatively cheap mitigation for SSB, but it is still a mitigation and its presence does not indicate that the CPU is unaffected by the vulnerability. Tweak the mitigation logic so that we report the correct string in sysfs. Signed-off-by: Will Deacon --- arch/arm64/kernel/cpu_errata.c | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/arch/arm64/kernel/cpu_errata.c b/arch/arm64/kernel/cpu_errata.c index 4bb0f7cad418..44ef98be001e 100644 --- a/arch/arm64/kernel/cpu_errata.c +++ b/arch/arm64/kernel/cpu_errata.c @@ -386,15 +386,17 @@ static bool has_ssbd_mitigation(const struct arm64_cpu_capabilities *entry, WARN_ON(scope != SCOPE_LOCAL_CPU || preemptible()); + /* delay setting __ssb_safe until we get a firmware response */ + if (is_midr_in_range_list(read_cpuid_id(), entry->midr_range_list)) + this_cpu_safe = true; + if (this_cpu_has_cap(ARM64_SSBS)) { + if (!this_cpu_safe) + __ssb_safe = false; required = false; goto out_printmsg; } - /* delay setting __ssb_safe until we get a firmware response */ - if (is_midr_in_range_list(read_cpuid_id(), entry->midr_range_list)) - this_cpu_safe = true; - if (psci_ops.smccc_version == SMCCC_VERSION_1_0) { ssbd_state = ARM64_SSBD_UNKNOWN; if (!this_cpu_safe) -- cgit v1.2.3-59-g8ed1b From a111b7c0f20e13b54df2fa959b3dc0bdf1925ae6 Mon Sep 17 00:00:00 2001 From: Josh Poimboeuf Date: Fri, 12 Apr 2019 15:39:32 -0500 Subject: arm64/speculation: Support 'mitigations=' cmdline option Configure arm64 runtime CPU speculation bug mitigations in accordance with the 'mitigations=' cmdline option. This affects Meltdown, Spectre v2, and Speculative Store Bypass. The default behavior is unchanged. Signed-off-by: Josh Poimboeuf [will: reorder checks so KASLR implies KPTI and SSBS is affected by cmdline] Signed-off-by: Will Deacon --- Documentation/admin-guide/kernel-parameters.txt | 8 +++++--- arch/arm64/kernel/cpu_errata.c | 6 +++++- arch/arm64/kernel/cpufeature.c | 8 +++++++- 3 files changed, 17 insertions(+), 5 deletions(-) diff --git a/Documentation/admin-guide/kernel-parameters.txt b/Documentation/admin-guide/kernel-parameters.txt index 6a929258faf7..ce226f7ee566 100644 --- a/Documentation/admin-guide/kernel-parameters.txt +++ b/Documentation/admin-guide/kernel-parameters.txt @@ -2545,8 +2545,8 @@ http://repo.or.cz/w/linux-2.6/mini2440.git mitigations= - [X86,PPC,S390] Control optional mitigations for CPU - vulnerabilities. This is a set of curated, + [X86,PPC,S390,ARM64] Control optional mitigations for + CPU vulnerabilities. This is a set of curated, arch-independent options, each of which is an aggregation of existing arch-specific options. @@ -2555,11 +2555,13 @@ improves system performance, but it may also expose users to several CPU vulnerabilities. Equivalent to: nopti [X86,PPC] + kpti=0 [ARM64] nospectre_v1 [PPC] nobp=0 [S390] - nospectre_v2 [X86,PPC,S390] + nospectre_v2 [X86,PPC,S390,ARM64] spectre_v2_user=off [X86] spec_store_bypass_disable=off [X86,PPC] + ssbd=force-off [ARM64] l1tf=off [X86] auto (default) diff --git a/arch/arm64/kernel/cpu_errata.c b/arch/arm64/kernel/cpu_errata.c index 44ef98be001e..1b9ce0fdd81d 100644 --- a/arch/arm64/kernel/cpu_errata.c +++ b/arch/arm64/kernel/cpu_errata.c @@ -19,6 +19,7 @@ #include #include #include +#include #include #include #include @@ -386,6 +387,9 @@ static bool has_ssbd_mitigation(const struct arm64_cpu_capabilities *entry, WARN_ON(scope != SCOPE_LOCAL_CPU || preemptible()); + if (cpu_mitigations_off()) + ssbd_state = ARM64_SSBD_FORCE_DISABLE; + /* delay setting __ssb_safe until we get a firmware response */ if (is_midr_in_range_list(read_cpuid_id(), entry->midr_range_list)) this_cpu_safe = true; @@ -589,7 +593,7 @@ check_branch_predictor(const struct arm64_cpu_capabilities *entry, int scope) } /* forced off */ - if (__nospectre_v2) { + if (__nospectre_v2 || cpu_mitigations_off()) { pr_info_once("spectrev2 mitigation disabled by command line option\n"); __hardenbp_enab = false; return false; diff --git a/arch/arm64/kernel/cpufeature.c b/arch/arm64/kernel/cpufeature.c index 703ee8564fbd..f3b32d88f165 100644 --- a/arch/arm64/kernel/cpufeature.c +++ b/arch/arm64/kernel/cpufeature.c @@ -25,6 +25,7 @@ #include #include #include +#include #include #include #include @@ -966,7 +967,7 @@ static bool unmap_kernel_at_el0(const struct arm64_cpu_capabilities *entry, MIDR_ALL_VERSIONS(MIDR_HISI_TSV110), { /* sentinel */ } }; - char const *str = "command line option"; + char const *str = "kpti command line option"; bool meltdown_safe; meltdown_safe = is_midr_in_range_list(read_cpuid_id(), kpti_safe_list); @@ -996,6 +997,11 @@ static bool unmap_kernel_at_el0(const struct arm64_cpu_capabilities *entry, } } + if (cpu_mitigations_off() && !__kpti_forced) { + str = "mitigations=off"; + __kpti_forced = -1; + } + if (!IS_ENABLED(CONFIG_UNMAP_KERNEL_AT_EL0)) { pr_info_once("kernel page table isolation disabled by kernel configuration\n"); return false; -- cgit v1.2.3-59-g8ed1b From 4ad499c94264a2ee05aacc518b9bde658318e510 Mon Sep 17 00:00:00 2001 From: Josh Poimboeuf Date: Fri, 12 Apr 2019 22:56:21 -0500 Subject: Documentation: Add ARM64 to kernel-parameters.rst Add ARM64 to the legend of architectures. It's already used in several places in kernel-parameters.txt. Suggested-by: Randy Dunlap Signed-off-by: Josh Poimboeuf Signed-off-by: Will Deacon --- Documentation/admin-guide/kernel-parameters.rst | 1 + 1 file changed, 1 insertion(+) diff --git a/Documentation/admin-guide/kernel-parameters.rst b/Documentation/admin-guide/kernel-parameters.rst index b8d0bc07ed0a..0124980dca2d 100644 --- a/Documentation/admin-guide/kernel-parameters.rst +++ b/Documentation/admin-guide/kernel-parameters.rst @@ -88,6 +88,7 @@ parameter is applicable:: APIC APIC support is enabled. APM Advanced Power Management support is enabled. ARM ARM architecture is enabled. + ARM64 ARM64 architecture is enabled. AX25 Appropriate AX.25 support is enabled. CLK Common clock infrastructure is enabled. CMA Contiguous Memory Area support is enabled. -- cgit v1.2.3-59-g8ed1b