From 48f99c8ec0b25756d0283ab058826ae07d14fad7 Mon Sep 17 00:00:00 2001 From: Dong Bo Date: Tue, 25 Apr 2017 14:11:29 +0800 Subject: arm64: Preventing READ_IMPLIES_EXEC propagation Like arch/arm/, we inherit the READ_IMPLIES_EXEC personality flag across fork(). This is undesirable for a number of reasons: * ELF files that don't require executable stack can end up with it anyway * We end up performing un-necessary I-cache maintenance when mapping what should be non-executable pages * Restricting what is executable is generally desirable when defending against overflow attacks This patch clears the personality flag when setting up the personality for newly spwaned native tasks. Given that semi-recent AArch64 toolchains emit a non-executable PT_GNU_STACK header, userspace applications can already not rely on READ_IMPLIES_EXEC so shouldn't be adversely affected by this change. Cc: Reported-by: Peter Maydell Signed-off-by: Dong Bo [will: added comment to compat code, rewrote commit message] Signed-off-by: Will Deacon --- arch/arm64/include/asm/elf.h | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/arch/arm64/include/asm/elf.h b/arch/arm64/include/asm/elf.h index 5d1700425efe..ac3fb7441510 100644 --- a/arch/arm64/include/asm/elf.h +++ b/arch/arm64/include/asm/elf.h @@ -142,6 +142,7 @@ typedef struct user_fpsimd_state elf_fpregset_t; ({ \ clear_bit(TIF_32BIT, ¤t->mm->context.flags); \ clear_thread_flag(TIF_32BIT); \ + current->personality &= ~READ_IMPLIES_EXEC; \ }) /* update AT_VECTOR_SIZE_ARCH if the number of NEW_AUX_ENT entries changes */ @@ -187,6 +188,11 @@ typedef compat_elf_greg_t compat_elf_gregset_t[COMPAT_ELF_NGREG]; ((x)->e_flags & EF_ARM_EABI_MASK)) #define compat_start_thread compat_start_thread +/* + * Unlike the native SET_PERSONALITY macro, the compat version inherits + * READ_IMPLIES_EXEC across a fork() since this is the behaviour on + * arch/arm/. + */ #define COMPAT_SET_PERSONALITY(ex) \ ({ \ set_bit(TIF_32BIT, ¤t->mm->context.flags); \ -- cgit v1.2.3-59-g8ed1b From c07ab957d9af8092fc3caf3db5a0fb49098fdc65 Mon Sep 17 00:00:00 2001 From: Kefeng Wang Date: Tue, 9 May 2017 09:53:36 +0800 Subject: arm64: Call __show_regs directly Generic code expects show_regs() to also dump the stack, but arm64's show_reg() does not do this. Some arm64 callers of show_regs() *only* want the registers dumped, without the stack. To enable generic code to work as expected, we need to make show_regs() dump the stack. Where we only want the registers dumped, we must use __show_regs(). This patch updates code to use __show_regs() where only registers are desired. A subsequent patch will modify show_regs(). Acked-by: Mark Rutland Signed-off-by: Kefeng Wang Signed-off-by: Will Deacon --- arch/arm64/kernel/probes/kprobes.c | 4 ++-- arch/arm64/mm/fault.c | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/arch/arm64/kernel/probes/kprobes.c b/arch/arm64/kernel/probes/kprobes.c index c5c45942fb6e..d849d9804011 100644 --- a/arch/arm64/kernel/probes/kprobes.c +++ b/arch/arm64/kernel/probes/kprobes.c @@ -522,9 +522,9 @@ int __kprobes longjmp_break_handler(struct kprobe *p, struct pt_regs *regs) pr_err("current sp %lx does not match saved sp %lx\n", orig_sp, stack_addr); pr_err("Saved registers for jprobe %p\n", jp); - show_regs(saved_regs); + __show_regs(saved_regs); pr_err("Current registers\n"); - show_regs(regs); + __show_regs(regs); BUG(); } unpause_graph_tracing(); diff --git a/arch/arm64/mm/fault.c b/arch/arm64/mm/fault.c index 37b95dff0b07..c3f2b1048f83 100644 --- a/arch/arm64/mm/fault.c +++ b/arch/arm64/mm/fault.c @@ -250,7 +250,7 @@ static void __do_user_fault(struct task_struct *tsk, unsigned long addr, tsk->comm, task_pid_nr(tsk), inf->name, sig, addr, esr); show_pte(tsk->mm, addr); - show_regs(regs); + __show_regs(regs); } tsk->thread.fault_address = addr; -- cgit v1.2.3-59-g8ed1b From 1149aad10b1e2f2cf1ea023889ac8604ae869c5a Mon Sep 17 00:00:00 2001 From: Kefeng Wang Date: Tue, 9 May 2017 09:53:37 +0800 Subject: arm64: Add dump_backtrace() in show_regs Generic code expects show_regs() to dump the stack, but arm64's show_regs() does not. This makes it hard to debug softlockups and other issues that result in show_regs() being called. This patch updates arm64's show_regs() to dump the stack, as common code expects. Acked-by: Mark Rutland Signed-off-by: Kefeng Wang [will: folded in bug_handler fix from mrutland] Signed-off-by: Will Deacon --- arch/arm64/include/asm/stacktrace.h | 1 + arch/arm64/kernel/process.c | 1 + arch/arm64/kernel/traps.c | 4 +--- 3 files changed, 3 insertions(+), 3 deletions(-) diff --git a/arch/arm64/include/asm/stacktrace.h b/arch/arm64/include/asm/stacktrace.h index 801a16dbbdf6..5b6eafccc5d8 100644 --- a/arch/arm64/include/asm/stacktrace.h +++ b/arch/arm64/include/asm/stacktrace.h @@ -30,5 +30,6 @@ struct stackframe { extern int unwind_frame(struct task_struct *tsk, struct stackframe *frame); extern void walk_stackframe(struct task_struct *tsk, struct stackframe *frame, int (*fn)(struct stackframe *, void *), void *data); +extern void dump_backtrace(struct pt_regs *regs, struct task_struct *tsk); #endif /* __ASM_STACKTRACE_H */ diff --git a/arch/arm64/kernel/process.c b/arch/arm64/kernel/process.c index ae2a835898d7..af1ea258c212 100644 --- a/arch/arm64/kernel/process.c +++ b/arch/arm64/kernel/process.c @@ -210,6 +210,7 @@ void __show_regs(struct pt_regs *regs) void show_regs(struct pt_regs * regs) { __show_regs(regs); + dump_backtrace(regs, NULL); } static void tls_thread_flush(void) diff --git a/arch/arm64/kernel/traps.c b/arch/arm64/kernel/traps.c index 0805b44f986a..3ebfb1d00b53 100644 --- a/arch/arm64/kernel/traps.c +++ b/arch/arm64/kernel/traps.c @@ -140,7 +140,7 @@ static void dump_instr(const char *lvl, struct pt_regs *regs) } } -static void dump_backtrace(struct pt_regs *regs, struct task_struct *tsk) +void dump_backtrace(struct pt_regs *regs, struct task_struct *tsk) { struct stackframe frame; unsigned long irq_stack_ptr; @@ -728,8 +728,6 @@ static int bug_handler(struct pt_regs *regs, unsigned int esr) break; case BUG_TRAP_TYPE_WARN: - /* Ideally, report_bug() should backtrace for us... but no. */ - dump_backtrace(regs, NULL); break; default: -- cgit v1.2.3-59-g8ed1b From 6efd8499d9bda657fc82621b24d6122a01332c19 Mon Sep 17 00:00:00 2001 From: Tobias Klauser Date: Mon, 15 May 2017 13:40:20 +0200 Subject: arm64: mm: explicity include linux/vmalloc.h arm64's mm/mmu.c uses vm_area_add_early, struct vm_area and other definitions but relies on implict inclusion of linux/vmalloc.h which means that changes in other headers could break the build. Thus, add an explicit include. Acked-by: Ard Biesheuvel Signed-off-by: Tobias Klauser Signed-off-by: Will Deacon --- arch/arm64/mm/mmu.c | 1 + 1 file changed, 1 insertion(+) diff --git a/arch/arm64/mm/mmu.c b/arch/arm64/mm/mmu.c index 0c429ec6fde8..23c2d89a362e 100644 --- a/arch/arm64/mm/mmu.c +++ b/arch/arm64/mm/mmu.c @@ -31,6 +31,7 @@ #include #include #include +#include #include #include -- cgit v1.2.3-59-g8ed1b From 3fde2999fac549024a56353966844ac7633b74ae Mon Sep 17 00:00:00 2001 From: Will Deacon Date: Thu, 18 May 2017 16:10:34 +0100 Subject: arm64: cpufeature: Don't dump useless backtrace on CPU_OUT_OF_SPEC Unfortunately, it turns out that mismatched CPU features in big.LITTLE systems are starting to appear in the wild. Whilst we should continue to taint the kernel with CPU_OUT_OF_SPEC for features that differ in ways that we can't fix up, dumping a useless backtrace out of the cpufeature code is pointless and irritating. This patch removes the backtrace from the taint. Signed-off-by: Mark Rutland Signed-off-by: Will Deacon --- arch/arm64/kernel/cpufeature.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/arch/arm64/kernel/cpufeature.c b/arch/arm64/kernel/cpufeature.c index 817ce3365e20..22f554320581 100644 --- a/arch/arm64/kernel/cpufeature.c +++ b/arch/arm64/kernel/cpufeature.c @@ -639,8 +639,8 @@ void update_cpu_features(int cpu, * Mismatched CPU features are a recipe for disaster. Don't even * pretend to support them. */ - WARN_TAINT_ONCE(taint, TAINT_CPU_OUT_OF_SPEC, - "Unsupported CPU feature variation.\n"); + pr_warn_once("Unsupported CPU feature variation detected.\n"); + add_taint(TAINT_CPU_OUT_OF_SPEC, LOCKDEP_STILL_OK); } u64 read_sanitised_ftr_reg(u32 id) -- cgit v1.2.3-59-g8ed1b From 690e95dd4d67be2eb905e1480b692c84e612a71a Mon Sep 17 00:00:00 2001 From: Kefeng Wang Date: Tue, 16 May 2017 15:36:16 +0800 Subject: arm64: check return value of of_flat_dt_get_machine_name It's useless to print machine name and setup arch-specific system identifiers if of_flat_dt_get_machine_name() return NULL, especially when ACPI-based boot. Reviewed-by: Geert Uytterhoeven Signed-off-by: Kefeng Wang Signed-off-by: Will Deacon --- arch/arm64/kernel/setup.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/arch/arm64/kernel/setup.c b/arch/arm64/kernel/setup.c index 2c822ef94f34..d4b740538ad5 100644 --- a/arch/arm64/kernel/setup.c +++ b/arch/arm64/kernel/setup.c @@ -194,6 +194,9 @@ static void __init setup_machine_fdt(phys_addr_t dt_phys) } name = of_flat_dt_get_machine_name(); + if (!name) + return; + pr_info("Machine model: %s\n", name); dump_stack_set_arch_desc("%s (DT)", name); } -- cgit v1.2.3-59-g8ed1b From 5f16a046f8e144c294ef98cd29d9458b5f8273e5 Mon Sep 17 00:00:00 2001 From: Will Deacon Date: Wed, 5 Apr 2017 11:14:05 +0100 Subject: arm64: futex: Fix undefined behaviour with FUTEX_OP_OPARG_SHIFT usage FUTEX_OP_OPARG_SHIFT instructs the futex code to treat the 12-bit oparg field as a shift value, potentially leading to a left shift value that is negative or with an absolute value that is significantly larger then the size of the type. UBSAN chokes with: ================================================================================ UBSAN: Undefined behaviour in ./arch/arm64/include/asm/futex.h:60:13 shift exponent -1 is negative CPU: 1 PID: 1449 Comm: syz-executor0 Not tainted 4.11.0-rc4-00005-g977eb52-dirty #11 Hardware name: linux,dummy-virt (DT) Call trace: [] dump_backtrace+0x0/0x538 arch/arm64/kernel/traps.c:73 [] show_stack+0x20/0x30 arch/arm64/kernel/traps.c:228 [] __dump_stack lib/dump_stack.c:16 [inline] [] dump_stack+0x120/0x188 lib/dump_stack.c:52 [] ubsan_epilogue+0x18/0x98 lib/ubsan.c:164 [] __ubsan_handle_shift_out_of_bounds+0x250/0x294 lib/ubsan.c:421 [] futex_atomic_op_inuser arch/arm64/include/asm/futex.h:60 [inline] [] futex_wake_op kernel/futex.c:1489 [inline] [] do_futex+0x137c/0x1740 kernel/futex.c:3231 [] SYSC_futex kernel/futex.c:3281 [inline] [] SyS_futex+0x114/0x268 kernel/futex.c:3249 [] el0_svc_naked+0x24/0x28 ================================================================================ syz-executor1 uses obsolete (PF_INET,SOCK_PACKET) sock: process `syz-executor0' is using obsolete setsockopt SO_BSDCOMPAT This patch attempts to fix some of this by: * Making encoded_op an unsigned type, so we can shift it left even if the top bit is set. * Casting to signed prior to shifting right when extracting oparg and cmparg * Consider only the bottom 5 bits of oparg when using it as a left-shift value. Whilst I think this catches all of the issues, I'd much prefer to remove this stuff, as I think it's unused and the bugs are copy-pasted between a bunch of architectures. Reviewed-by: Robin Murphy Signed-off-by: Will Deacon --- arch/arm64/include/asm/futex.h | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/arch/arm64/include/asm/futex.h b/arch/arm64/include/asm/futex.h index 85c4a8981d47..f32b42e8725d 100644 --- a/arch/arm64/include/asm/futex.h +++ b/arch/arm64/include/asm/futex.h @@ -48,16 +48,16 @@ do { \ } while (0) static inline int -futex_atomic_op_inuser (int encoded_op, u32 __user *uaddr) +futex_atomic_op_inuser(unsigned int encoded_op, u32 __user *uaddr) { int op = (encoded_op >> 28) & 7; int cmp = (encoded_op >> 24) & 15; - int oparg = (encoded_op << 8) >> 20; - int cmparg = (encoded_op << 20) >> 20; + int oparg = (int)(encoded_op << 8) >> 20; + int cmparg = (int)(encoded_op << 20) >> 20; int oldval = 0, ret, tmp; if (encoded_op & (FUTEX_OP_OPARG_SHIFT << 28)) - oparg = 1 << oparg; + oparg = 1U << (oparg & 0x1f); if (!access_ok(VERIFY_WRITE, uaddr, sizeof(u32))) return -EFAULT; -- cgit v1.2.3-59-g8ed1b From db46a72b9713fd20c405e796d7ef841f6d9bd15f Mon Sep 17 00:00:00 2001 From: Lorenzo Pieralisi Date: Wed, 24 May 2017 18:22:19 +0100 Subject: ARM64/PCI: Set root bus NUMA node on ACPI systems PCI core requires the NUMA node for the struct pci_host_bridge.dev to be set by using the pcibus_to_node(struct pci_bus*) API, that on ARM64 systems relies on the struct pci_host_bridge->bus.dev NUMA node. The struct pci_host_bridge.dev NUMA node is then propagated through the PCI device hierarchy as PCI devices (and bridges) are enumerated under it. Therefore, in order to set-up the PCI NUMA hierarchy appropriately, the struct pci_host_bridge->bus.dev NUMA node must be set before core code calls pcibus_to_node(struct pci_bus*) on it so that PCI core can retrieve the NUMA node for the struct pci_host_bridge.dev device and can propagate it through the PCI bus tree. On ARM64 ACPI based systems the struct pci_host_bridge->bus.dev NUMA node can be set-up in pcibios_root_bridge_prepare() by parsing the root bridge ACPI device firmware binding. Add code to the pcibios_root_bridge_prepare() that, when booting with ACPI, parse the root bridge ACPI device companion NUMA binding and set the corresponding struct pci_host_bridge->bus.dev NUMA node appropriately. Cc: Bjorn Helgaas Cc: Catalin Marinas Reviewed-by: Robert Richter Tested-by: Robert Richter Signed-off-by: Lorenzo Pieralisi Signed-off-by: Will Deacon --- arch/arm64/kernel/pci.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/arch/arm64/kernel/pci.c b/arch/arm64/kernel/pci.c index 4f0e3ebfea4b..108283443336 100644 --- a/arch/arm64/kernel/pci.c +++ b/arch/arm64/kernel/pci.c @@ -108,7 +108,10 @@ int pcibios_root_bridge_prepare(struct pci_host_bridge *bridge) if (!acpi_disabled) { struct pci_config_window *cfg = bridge->bus->sysdata; struct acpi_device *adev = to_acpi_device(cfg->parent); + struct device *bus_dev = &bridge->bus->dev; + ACPI_COMPANION_SET(&bridge->dev, adev); + set_dev_node(bus_dev, acpi_get_node(acpi_device_handle(adev))); } return 0; -- cgit v1.2.3-59-g8ed1b From fe7296e19221c6dc125a06b52e28ccbdb76d9b58 Mon Sep 17 00:00:00 2001 From: Shaokun Zhang Date: Wed, 24 May 2017 15:43:18 +0800 Subject: arm64: perf: Extend event config for ARMv8.1 Perf has supported ARMv8.1 feature with 16-bit evtCount filed [see c210ae8 arm64: perf: Extend event mask for ARMv8.1], event config should be extended to 16-bit too, otherwise, if use -e event_name whose event_code is more than 0x3ff, pmu_config_term will return -EINVAL because function pmu_format_max_value depends on event config. This patch extends event config to 16-bit. Signed-off-by: Shaokun Zhang Signed-off-by: Will Deacon --- arch/arm64/kernel/perf_event.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/arm64/kernel/perf_event.c b/arch/arm64/kernel/perf_event.c index 83a1b1ad189f..b5798ba21189 100644 --- a/arch/arm64/kernel/perf_event.c +++ b/arch/arm64/kernel/perf_event.c @@ -529,7 +529,7 @@ static struct attribute_group armv8_pmuv3_events_attr_group = { .is_visible = armv8pmu_event_attr_is_visible, }; -PMU_FORMAT_ATTR(event, "config:0-9"); +PMU_FORMAT_ATTR(event, "config:0-15"); static struct attribute *armv8_pmuv3_format_attrs[] = { &format_attr_event.attr, -- cgit v1.2.3-59-g8ed1b From 1151f838cb626005f4d69bf675dacaaa5ea909d6 Mon Sep 17 00:00:00 2001 From: Ard Biesheuvel Date: Fri, 19 May 2017 16:42:00 +0100 Subject: arm64: kernel: restrict /dev/mem read() calls to linear region When running lscpu on an AArch64 system that has SMBIOS version 2.0 tables, it will segfault in the following way: Unable to handle kernel paging request at virtual address ffff8000bfff0000 pgd = ffff8000f9615000 [ffff8000bfff0000] *pgd=0000000000000000 Internal error: Oops: 96000007 [#1] PREEMPT SMP Modules linked in: CPU: 0 PID: 1284 Comm: lscpu Not tainted 4.11.0-rc3+ #103 Hardware name: QEMU QEMU Virtual Machine, BIOS 0.0.0 02/06/2015 task: ffff8000fa78e800 task.stack: ffff8000f9780000 PC is at __arch_copy_to_user+0x90/0x220 LR is at read_mem+0xcc/0x140 This is caused by the fact that lspci issues a read() on /dev/mem at the offset where it expects to find the SMBIOS structure array. However, this region is classified as EFI_RUNTIME_SERVICE_DATA (as per the UEFI spec), and so it is omitted from the linear mapping. So let's restrict /dev/mem read/write access to those areas that are covered by the linear region. Reported-by: Alexander Graf Fixes: 4dffbfc48d65 ("arm64/efi: mark UEFI reserved regions as MEMBLOCK_NOMAP") Signed-off-by: Ard Biesheuvel Signed-off-by: Will Deacon --- arch/arm64/mm/mmap.c | 19 +++++++++++++------ 1 file changed, 13 insertions(+), 6 deletions(-) diff --git a/arch/arm64/mm/mmap.c b/arch/arm64/mm/mmap.c index 7b0d55756eb1..adc208c2ae9c 100644 --- a/arch/arm64/mm/mmap.c +++ b/arch/arm64/mm/mmap.c @@ -18,6 +18,7 @@ #include #include +#include #include #include #include @@ -103,12 +104,18 @@ void arch_pick_mmap_layout(struct mm_struct *mm) */ int valid_phys_addr_range(phys_addr_t addr, size_t size) { - if (addr < PHYS_OFFSET) - return 0; - if (addr + size > __pa(high_memory - 1) + 1) - return 0; - - return 1; + /* + * Check whether addr is covered by a memory region without the + * MEMBLOCK_NOMAP attribute, and whether that region covers the + * entire range. In theory, this could lead to false negatives + * if the range is covered by distinct but adjacent memory regions + * that only differ in other attributes. However, few of such + * attributes have been defined, and it is debatable whether it + * follows that /dev/mem read() calls should be able traverse + * such boundaries. + */ + return memblock_is_region_memory(addr, size) && + memblock_is_map_memory(addr); } /* -- cgit v1.2.3-59-g8ed1b From 8dd0ee651d8aefdc2d8ae0fcc9c68dfc943c9e4c Mon Sep 17 00:00:00 2001 From: Will Deacon Date: Mon, 5 Jun 2017 11:40:23 +0100 Subject: arm64: cpufeature: Fix CPU_OUT_OF_SPEC taint for uniform systems Commit 3fde2999fac5 ("arm64: cpufeature: Don't dump useless backtrace on CPU_OUT_OF_SPEC") changed the cpufeature detection code to use add_taint instead of WARN_TAINT_ONCE when detecting a heterogeneous system with mismatched feature support. Unfortunately, this resulted in all systems getting the taint, regardless of any feature mismatch. This patch fixes the problem by conditionalising the taint on detecting a feature mismatch. Acked-by: Mark Rutland Reported-by: Heiner Kallweit Signed-off-by: Will Deacon --- arch/arm64/kernel/cpufeature.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/arch/arm64/kernel/cpufeature.c b/arch/arm64/kernel/cpufeature.c index 22f554320581..55d5c72a507d 100644 --- a/arch/arm64/kernel/cpufeature.c +++ b/arch/arm64/kernel/cpufeature.c @@ -639,8 +639,10 @@ void update_cpu_features(int cpu, * Mismatched CPU features are a recipe for disaster. Don't even * pretend to support them. */ - pr_warn_once("Unsupported CPU feature variation detected.\n"); - add_taint(TAINT_CPU_OUT_OF_SPEC, LOCKDEP_STILL_OK); + if (taint) { + pr_warn_once("Unsupported CPU feature variation detected.\n"); + add_taint(TAINT_CPU_OUT_OF_SPEC, LOCKDEP_STILL_OK); + } } u64 read_sanitised_ftr_reg(u32 id) -- cgit v1.2.3-59-g8ed1b From dbbb08f500d6146398b794fdc68a8e811366b451 Mon Sep 17 00:00:00 2001 From: Kees Cook Date: Mon, 5 Jun 2017 21:52:30 -0700 Subject: arm64, vdso: Define vdso_{start,end} as array Adjust vdso_{start|end} to be char arrays to avoid compile-time analysis that flags "too large" memcmp() calls with CONFIG_FORTIFY_SOURCE. Cc: Jisheng Zhang Acked-by: Catalin Marinas Suggested-by: Mark Rutland Signed-off-by: Kees Cook Signed-off-by: Will Deacon --- arch/arm64/kernel/vdso.c | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/arch/arm64/kernel/vdso.c b/arch/arm64/kernel/vdso.c index 41b6e31f8f55..ae35f1855e06 100644 --- a/arch/arm64/kernel/vdso.c +++ b/arch/arm64/kernel/vdso.c @@ -37,7 +37,7 @@ #include #include -extern char vdso_start, vdso_end; +extern char vdso_start[], vdso_end[]; static unsigned long vdso_pages __ro_after_init; /* @@ -125,14 +125,14 @@ static int __init vdso_init(void) struct page **vdso_pagelist; unsigned long pfn; - if (memcmp(&vdso_start, "\177ELF", 4)) { + if (memcmp(vdso_start, "\177ELF", 4)) { pr_err("vDSO is not a valid ELF object!\n"); return -EINVAL; } - vdso_pages = (&vdso_end - &vdso_start) >> PAGE_SHIFT; + vdso_pages = (vdso_end - vdso_start) >> PAGE_SHIFT; pr_info("vdso: %ld pages (%ld code @ %p, %ld data @ %p)\n", - vdso_pages + 1, vdso_pages, &vdso_start, 1L, vdso_data); + vdso_pages + 1, vdso_pages, vdso_start, 1L, vdso_data); /* Allocate the vDSO pagelist, plus a page for the data. */ vdso_pagelist = kcalloc(vdso_pages + 1, sizeof(struct page *), @@ -145,7 +145,7 @@ static int __init vdso_init(void) /* Grab the vDSO code pages. */ - pfn = sym_to_pfn(&vdso_start); + pfn = sym_to_pfn(vdso_start); for (i = 0; i < vdso_pages; i++) vdso_pagelist[i + 1] = pfn_to_page(pfn + i); -- cgit v1.2.3-59-g8ed1b From f8af0b364e249eef0c71200826563947cd74267e Mon Sep 17 00:00:00 2001 From: Ard Biesheuvel Date: Tue, 6 Jun 2017 17:00:21 +0000 Subject: arm64: ftrace: don't validate branch via PLT in ftrace_make_nop() When turning branch instructions into NOPs, we attempt to validate the action by comparing the old value at the call site with the opcode of a direct relative branch instruction pointing at the old target. However, these call sites are statically initialized to call _mcount(), and may be redirected via a PLT entry if the module is loaded far away from the kernel text, leading to false negatives and spurious errors. So skip the validation if CONFIG_ARM64_MODULE_PLTS is configured. Signed-off-by: Ard Biesheuvel Signed-off-by: Will Deacon --- arch/arm64/kernel/ftrace.c | 46 +++++++++++++++++++++++++++++++++++++++++++--- 1 file changed, 43 insertions(+), 3 deletions(-) diff --git a/arch/arm64/kernel/ftrace.c b/arch/arm64/kernel/ftrace.c index 40ad08ac569a..4cb576374b82 100644 --- a/arch/arm64/kernel/ftrace.c +++ b/arch/arm64/kernel/ftrace.c @@ -84,12 +84,52 @@ int ftrace_make_nop(struct module *mod, struct dyn_ftrace *rec, unsigned long addr) { unsigned long pc = rec->ip; - u32 old, new; + long offset = (long)pc - (long)addr; + bool validate = true; + u32 old = 0, new; + + if (IS_ENABLED(CONFIG_ARM64_MODULE_PLTS) && + (offset < -SZ_128M || offset >= SZ_128M)) { + u32 replaced; + + /* + * 'mod' is only set at module load time, but if we end up + * dealing with an out-of-range condition, we can assume it + * is due to a module being loaded far away from the kernel. + */ + if (!mod) { + preempt_disable(); + mod = __module_text_address(pc); + preempt_enable(); + + if (WARN_ON(!mod)) + return -EINVAL; + } + + /* + * The instruction we are about to patch may be a branch and + * link instruction that was redirected via a PLT entry. In + * this case, the normal validation will fail, but we can at + * least check that we are dealing with a branch and link + * instruction that points into the right module. + */ + if (aarch64_insn_read((void *)pc, &replaced)) + return -EFAULT; + + if (!aarch64_insn_is_bl(replaced) || + !within_module(pc + aarch64_get_branch_offset(replaced), + mod)) + return -EINVAL; + + validate = false; + } else { + old = aarch64_insn_gen_branch_imm(pc, addr, + AARCH64_INSN_BRANCH_LINK); + } - old = aarch64_insn_gen_branch_imm(pc, addr, AARCH64_INSN_BRANCH_LINK); new = aarch64_insn_gen_nop(); - return ftrace_modify_code(pc, old, new, true); + return ftrace_modify_code(pc, old, new, validate); } void arch_ftrace_update_code(int command) -- cgit v1.2.3-59-g8ed1b From e71a4e1bebaf7fd990efbdc04b38e5526914f0f1 Mon Sep 17 00:00:00 2001 From: Ard Biesheuvel Date: Tue, 6 Jun 2017 17:00:22 +0000 Subject: arm64: ftrace: add support for far branches to dynamic ftrace Currently, dynamic ftrace support in the arm64 kernel assumes that all core kernel code is within range of ordinary branch instructions that occur in module code, which is usually the case, but is no longer guaranteed now that we have support for module PLTs and address space randomization. Since on arm64, all patching of branch instructions involves function calls to the same entry point [ftrace_caller()], we can emit the modules with a trampoline that has unlimited range, and patch both the trampoline itself and the branch instruction to redirect the call via the trampoline. Signed-off-by: Ard Biesheuvel [will: minor clarification to smp_wmb() comment] Signed-off-by: Will Deacon --- arch/arm64/Kconfig | 2 +- arch/arm64/Makefile | 3 +++ arch/arm64/include/asm/module.h | 3 +++ arch/arm64/kernel/Makefile | 3 +++ arch/arm64/kernel/ftrace-mod.S | 18 +++++++++++++++ arch/arm64/kernel/ftrace.c | 51 +++++++++++++++++++++++++++++++++++++++++ arch/arm64/kernel/module.c | 6 ++++- 7 files changed, 84 insertions(+), 2 deletions(-) create mode 100644 arch/arm64/kernel/ftrace-mod.S diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig index 3dcd7ec69bca..22f769b254b4 100644 --- a/arch/arm64/Kconfig +++ b/arch/arm64/Kconfig @@ -982,7 +982,7 @@ config RANDOMIZE_BASE config RANDOMIZE_MODULE_REGION_FULL bool "Randomize the module region independently from the core kernel" - depends on RANDOMIZE_BASE && !DYNAMIC_FTRACE + depends on RANDOMIZE_BASE default y help Randomizes the location of the module region without considering the diff --git a/arch/arm64/Makefile b/arch/arm64/Makefile index f839ecd919f9..1ce57b42f390 100644 --- a/arch/arm64/Makefile +++ b/arch/arm64/Makefile @@ -70,6 +70,9 @@ endif ifeq ($(CONFIG_ARM64_MODULE_PLTS),y) KBUILD_LDFLAGS_MODULE += -T $(srctree)/arch/arm64/kernel/module.lds +ifeq ($(CONFIG_DYNAMIC_FTRACE),y) +KBUILD_LDFLAGS_MODULE += $(objtree)/arch/arm64/kernel/ftrace-mod.o +endif endif # Default value diff --git a/arch/arm64/include/asm/module.h b/arch/arm64/include/asm/module.h index d57693f5d4ec..19bd97671bb8 100644 --- a/arch/arm64/include/asm/module.h +++ b/arch/arm64/include/asm/module.h @@ -30,6 +30,9 @@ struct mod_plt_sec { struct mod_arch_specific { struct mod_plt_sec core; struct mod_plt_sec init; + + /* for CONFIG_DYNAMIC_FTRACE */ + void *ftrace_trampoline; }; #endif diff --git a/arch/arm64/kernel/Makefile b/arch/arm64/kernel/Makefile index 1dcb69d3d0e5..f2b4e816b6de 100644 --- a/arch/arm64/kernel/Makefile +++ b/arch/arm64/kernel/Makefile @@ -62,3 +62,6 @@ extra-y += $(head-y) vmlinux.lds ifeq ($(CONFIG_DEBUG_EFI),y) AFLAGS_head.o += -DVMLINUX_PATH="\"$(realpath $(objtree)/vmlinux)\"" endif + +# will be included by each individual module but not by the core kernel itself +extra-$(CONFIG_DYNAMIC_FTRACE) += ftrace-mod.o diff --git a/arch/arm64/kernel/ftrace-mod.S b/arch/arm64/kernel/ftrace-mod.S new file mode 100644 index 000000000000..00c4025be4ff --- /dev/null +++ b/arch/arm64/kernel/ftrace-mod.S @@ -0,0 +1,18 @@ +/* + * Copyright (C) 2017 Linaro Ltd + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ + +#include +#include + + .section ".text.ftrace_trampoline", "ax" + .align 3 +0: .quad 0 +__ftrace_trampoline: + ldr x16, 0b + br x16 +ENDPROC(__ftrace_trampoline) diff --git a/arch/arm64/kernel/ftrace.c b/arch/arm64/kernel/ftrace.c index 4cb576374b82..8a42be0693c9 100644 --- a/arch/arm64/kernel/ftrace.c +++ b/arch/arm64/kernel/ftrace.c @@ -10,10 +10,12 @@ */ #include +#include #include #include #include +#include #include #include @@ -69,8 +71,57 @@ int ftrace_update_ftrace_func(ftrace_func_t func) int ftrace_make_call(struct dyn_ftrace *rec, unsigned long addr) { unsigned long pc = rec->ip; + long offset = (long)pc - (long)addr; u32 old, new; + if (IS_ENABLED(CONFIG_ARM64_MODULE_PLTS) && + (offset < -SZ_128M || offset >= SZ_128M)) { + unsigned long *trampoline; + struct module *mod; + + /* + * On kernels that support module PLTs, the offset between the + * branch instruction and its target may legally exceed the + * range of an ordinary relative 'bl' opcode. In this case, we + * need to branch via a trampoline in the module. + * + * NOTE: __module_text_address() must be called with preemption + * disabled, but we can rely on ftrace_lock to ensure that 'mod' + * retains its validity throughout the remainder of this code. + */ + preempt_disable(); + mod = __module_text_address(pc); + preempt_enable(); + + if (WARN_ON(!mod)) + return -EINVAL; + + /* + * There is only one ftrace trampoline per module. For now, + * this is not a problem since on arm64, all dynamic ftrace + * invocations are routed via ftrace_caller(). This will need + * to be revisited if support for multiple ftrace entry points + * is added in the future, but for now, the pr_err() below + * deals with a theoretical issue only. + */ + trampoline = (unsigned long *)mod->arch.ftrace_trampoline; + if (trampoline[0] != addr) { + if (trampoline[0] != 0) { + pr_err("ftrace: far branches to multiple entry points unsupported inside a single module\n"); + return -EINVAL; + } + + /* point the trampoline to our ftrace entry point */ + module_disable_ro(mod); + trampoline[0] = addr; + module_enable_ro(mod, true); + + /* update trampoline before patching in the branch */ + smp_wmb(); + } + addr = (unsigned long)&trampoline[1]; + } + old = aarch64_insn_gen_nop(); new = aarch64_insn_gen_branch_imm(pc, addr, AARCH64_INSN_BRANCH_LINK); diff --git a/arch/arm64/kernel/module.c b/arch/arm64/kernel/module.c index f035ff6fb223..8c3a7264fb0f 100644 --- a/arch/arm64/kernel/module.c +++ b/arch/arm64/kernel/module.c @@ -420,8 +420,12 @@ int module_finalize(const Elf_Ehdr *hdr, for (s = sechdrs, se = sechdrs + hdr->e_shnum; s < se; s++) { if (strcmp(".altinstructions", secstrs + s->sh_name) == 0) { apply_alternatives((void *)s->sh_addr, s->sh_size); - return 0; } +#ifdef CONFIG_ARM64_MODULE_PLTS + if (IS_ENABLED(CONFIG_DYNAMIC_FTRACE) && + !strcmp(".text.ftrace_trampoline", secstrs + s->sh_name)) + me->arch.ftrace_trampoline = (void *)s->sh_addr; +#endif } return 0; -- cgit v1.2.3-59-g8ed1b From 67ce16ec15ce9d97d3d85e72beabbc5d7017193e Mon Sep 17 00:00:00 2001 From: Kristina Martsenko Date: Fri, 9 Jun 2017 16:35:52 +0100 Subject: arm64: mm: print out correct page table entries When we take a fault that can't be handled, we print out the page table entries associated with the faulting address. In some cases we currently print out the wrong entries. For a faulting TTBR1 address, we sometimes print out TTBR0 table entries instead, and for a faulting TTBR0 address we sometimes print out TTBR1 table entries. Fix this by choosing the tables based on the faulting address. Acked-by: Mark Rutland Signed-off-by: Kristina Martsenko [will: zero-extend addrs to 64-bit, don't walk swapper w/ TTBR0 addr] Signed-off-by: Will Deacon --- arch/arm64/include/asm/system_misc.h | 2 +- arch/arm64/mm/fault.c | 36 +++++++++++++++++++++++++----------- 2 files changed, 26 insertions(+), 12 deletions(-) diff --git a/arch/arm64/include/asm/system_misc.h b/arch/arm64/include/asm/system_misc.h index bc812435bc76..d0beefeb6d25 100644 --- a/arch/arm64/include/asm/system_misc.h +++ b/arch/arm64/include/asm/system_misc.h @@ -40,7 +40,7 @@ void hook_debug_fault_code(int nr, int (*fn)(unsigned long, unsigned int, int sig, int code, const char *name); struct mm_struct; -extern void show_pte(struct mm_struct *mm, unsigned long addr); +extern void show_pte(unsigned long addr); extern void __show_regs(struct pt_regs *); extern void (*arm_pm_restart)(enum reboot_mode reboot_mode, const char *cmd); diff --git a/arch/arm64/mm/fault.c b/arch/arm64/mm/fault.c index c3f2b1048f83..9d27b1720c52 100644 --- a/arch/arm64/mm/fault.c +++ b/arch/arm64/mm/fault.c @@ -80,18 +80,33 @@ static inline int notify_page_fault(struct pt_regs *regs, unsigned int esr) #endif /* - * Dump out the page tables associated with 'addr' in mm 'mm'. + * Dump out the page tables associated with 'addr' in the currently active mm. */ -void show_pte(struct mm_struct *mm, unsigned long addr) +void show_pte(unsigned long addr) { + struct mm_struct *mm; pgd_t *pgd; - if (!mm) + if (addr < TASK_SIZE) { + /* TTBR0 */ + mm = current->active_mm; + if (mm == &init_mm) { + pr_alert("[%016lx] user address but active_mm is swapper\n", + addr); + return; + } + } else if (addr >= VA_START) { + /* TTBR1 */ mm = &init_mm; + } else { + pr_alert("[%016lx] address between user and kernel address ranges\n", + addr); + return; + } pr_alert("pgd = %p\n", mm->pgd); pgd = pgd_offset(mm, addr); - pr_alert("[%08lx] *pgd=%016llx", addr, pgd_val(*pgd)); + pr_alert("[%016lx] *pgd=%016llx", addr, pgd_val(*pgd)); do { pud_t *pud; @@ -196,8 +211,8 @@ static inline bool is_permission_fault(unsigned int esr, struct pt_regs *regs, /* * The kernel tried to access some page that wasn't present. */ -static void __do_kernel_fault(struct mm_struct *mm, unsigned long addr, - unsigned int esr, struct pt_regs *regs) +static void __do_kernel_fault(unsigned long addr, unsigned int esr, + struct pt_regs *regs) { const char *msg; @@ -227,7 +242,7 @@ static void __do_kernel_fault(struct mm_struct *mm, unsigned long addr, pr_alert("Unable to handle kernel %s at virtual address %08lx\n", msg, addr); - show_pte(mm, addr); + show_pte(addr); die("Oops", regs, esr); bust_spinlocks(0); do_exit(SIGKILL); @@ -249,7 +264,7 @@ static void __do_user_fault(struct task_struct *tsk, unsigned long addr, pr_info("%s[%d]: unhandled %s (%d) at 0x%08lx, esr 0x%03x\n", tsk->comm, task_pid_nr(tsk), inf->name, sig, addr, esr); - show_pte(tsk->mm, addr); + show_pte(addr); __show_regs(regs); } @@ -265,7 +280,6 @@ static void __do_user_fault(struct task_struct *tsk, unsigned long addr, static void do_bad_area(unsigned long addr, unsigned int esr, struct pt_regs *regs) { struct task_struct *tsk = current; - struct mm_struct *mm = tsk->active_mm; const struct fault_info *inf; /* @@ -276,7 +290,7 @@ static void do_bad_area(unsigned long addr, unsigned int esr, struct pt_regs *re inf = esr_to_fault_info(esr); __do_user_fault(tsk, addr, esr, inf->sig, inf->code, regs); } else - __do_kernel_fault(mm, addr, esr, regs); + __do_kernel_fault(addr, esr, regs); } #define VM_FAULT_BADMAP 0x010000 @@ -475,7 +489,7 @@ retry: return 0; no_context: - __do_kernel_fault(mm, addr, esr, regs); + __do_kernel_fault(addr, esr, regs); return 0; } -- cgit v1.2.3-59-g8ed1b From bf396c09c2447a787d02af34cf167e953f85fa42 Mon Sep 17 00:00:00 2001 From: Kristina Martsenko Date: Fri, 9 Jun 2017 16:35:53 +0100 Subject: arm64: mm: don't print out page table entries on EL0 faults When we take a fault from EL0 that can't be handled, we print out the page table entries associated with the faulting address. This allows userspace to print out any current page table entries, including kernel (TTBR1) entries. Exposing kernel mappings like this could pose a security risk, so don't print out page table information on EL0 faults. (But still print it out for EL1 faults.) This also follows the same behaviour as x86, printing out page table entries on kernel mode faults but not user mode faults. Acked-by: Mark Rutland Signed-off-by: Kristina Martsenko Signed-off-by: Will Deacon --- arch/arm64/mm/fault.c | 1 - 1 file changed, 1 deletion(-) diff --git a/arch/arm64/mm/fault.c b/arch/arm64/mm/fault.c index 9d27b1720c52..b5a1605398b7 100644 --- a/arch/arm64/mm/fault.c +++ b/arch/arm64/mm/fault.c @@ -264,7 +264,6 @@ static void __do_user_fault(struct task_struct *tsk, unsigned long addr, pr_info("%s[%d]: unhandled %s (%d) at 0x%08lx, esr 0x%03x\n", tsk->comm, task_pid_nr(tsk), inf->name, sig, addr, esr); - show_pte(addr); __show_regs(regs); } -- cgit v1.2.3-59-g8ed1b From 83016b204225d69516de3d57489783fafd6a0ecc Mon Sep 17 00:00:00 2001 From: Kristina Martsenko Date: Fri, 9 Jun 2017 16:35:54 +0100 Subject: arm64: mm: print file name of faulting vma Print out the name of the file associated with the vma that faulted. This is usually the executable or shared library name. We already print out the task name, but also printing the library name is useful for pinpointing bugs to libraries. Also print the base address and size of the vma, which together with the PC (printed by __show_regs) gives the offset into the library. Fault prints now look like: test[2361]: unhandled level 2 translation fault (11) at 0x00000012, esr 0x92000006, in libfoo.so[ffffa0145000+1000] This is already done on x86, for more details see commit 03252919b798 ("x86: print which shared library/executable faulted in segfault etc. messages v3"). Acked-by: Mark Rutland Signed-off-by: Kristina Martsenko Signed-off-by: Will Deacon --- arch/arm64/mm/fault.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/arch/arm64/mm/fault.c b/arch/arm64/mm/fault.c index b5a1605398b7..ad7c9c94d379 100644 --- a/arch/arm64/mm/fault.c +++ b/arch/arm64/mm/fault.c @@ -261,9 +261,11 @@ static void __do_user_fault(struct task_struct *tsk, unsigned long addr, if (unhandled_signal(tsk, sig) && show_unhandled_signals_ratelimited()) { inf = esr_to_fault_info(esr); - pr_info("%s[%d]: unhandled %s (%d) at 0x%08lx, esr 0x%03x\n", + pr_info("%s[%d]: unhandled %s (%d) at 0x%08lx, esr 0x%03x", tsk->comm, task_pid_nr(tsk), inf->name, sig, addr, esr); + print_vma_addr(KERN_CONT ", in ", regs->pc); + pr_cont("\n"); __show_regs(regs); } -- cgit v1.2.3-59-g8ed1b From 1eb34b6e5160f20e1889b2551182bf4d61084d6b Mon Sep 17 00:00:00 2001 From: Will Deacon Date: Mon, 15 May 2017 15:23:58 +0100 Subject: arm64: fault: Print info about page table structure when dumping pte Whilst debugging a remote crash, I noticed that show_pte is unhelpful when it comes to describing the structure of the page table being walked. This is easily fixed by printing out the page table (swapper vs user), page size and virtual address size when displaying the PGD address. Acked-by: Mark Rutland Signed-off-by: Will Deacon --- arch/arm64/mm/fault.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/arch/arm64/mm/fault.c b/arch/arm64/mm/fault.c index ad7c9c94d379..222427ae23d6 100644 --- a/arch/arm64/mm/fault.c +++ b/arch/arm64/mm/fault.c @@ -104,7 +104,9 @@ void show_pte(unsigned long addr) return; } - pr_alert("pgd = %p\n", mm->pgd); + pr_alert("%s pgtable: %luk pages, %u-bit VAs, pgd = %p\n", + mm == &init_mm ? "swapper" : "user", PAGE_SIZE / SZ_1K, + VA_BITS, mm->pgd); pgd = pgd_offset(mm, addr); pr_alert("[%016lx] *pgd=%016llx", addr, pgd_val(*pgd)); -- cgit v1.2.3-59-g8ed1b From 687644209a6e95576ea453977b26dbd6248cadda Mon Sep 17 00:00:00 2001 From: Will Deacon Date: Mon, 12 Jun 2017 14:43:25 +0100 Subject: arm64: ftrace: fix building without CONFIG_MODULES When CONFIG_MODULES is disabled, we cannot dereference a module pointer: arch/arm64/kernel/ftrace.c: In function 'ftrace_make_call': arch/arm64/kernel/ftrace.c:107:36: error: dereferencing pointer to incomplete type 'struct module' trampoline = (unsigned long *)mod->arch.ftrace_trampoline; Also, the within_module() function is not defined: arch/arm64/kernel/ftrace.c: In function 'ftrace_make_nop': arch/arm64/kernel/ftrace.c:171:8: error: implicit declaration of function 'within_module'; did you mean 'init_module'? [-Werror=implicit-function-declaration] This addresses both by adding replacing the IS_ENABLED(CONFIG_ARM64_MODULE_PLTS) checks with #ifdef versions. Fixes: e71a4e1bebaf ("arm64: ftrace: add support for far branches to dynamic ftrace") Reported-by: Arnd Bergmann Acked-by: Ard Biesheuvel Signed-off-by: Will Deacon --- arch/arm64/kernel/ftrace.c | 16 ++++++++++------ 1 file changed, 10 insertions(+), 6 deletions(-) diff --git a/arch/arm64/kernel/ftrace.c b/arch/arm64/kernel/ftrace.c index 8a42be0693c9..401aa27808a4 100644 --- a/arch/arm64/kernel/ftrace.c +++ b/arch/arm64/kernel/ftrace.c @@ -71,11 +71,12 @@ int ftrace_update_ftrace_func(ftrace_func_t func) int ftrace_make_call(struct dyn_ftrace *rec, unsigned long addr) { unsigned long pc = rec->ip; - long offset = (long)pc - (long)addr; u32 old, new; - if (IS_ENABLED(CONFIG_ARM64_MODULE_PLTS) && - (offset < -SZ_128M || offset >= SZ_128M)) { +#ifdef CONFIG_ARM64_MODULE_PLTS + long offset = (long)pc - (long)addr; + + if (offset < -SZ_128M || offset >= SZ_128M) { unsigned long *trampoline; struct module *mod; @@ -121,6 +122,7 @@ int ftrace_make_call(struct dyn_ftrace *rec, unsigned long addr) } addr = (unsigned long)&trampoline[1]; } +#endif /* CONFIG_ARM64_MODULE_PLTS */ old = aarch64_insn_gen_nop(); new = aarch64_insn_gen_branch_imm(pc, addr, AARCH64_INSN_BRANCH_LINK); @@ -135,12 +137,13 @@ int ftrace_make_nop(struct module *mod, struct dyn_ftrace *rec, unsigned long addr) { unsigned long pc = rec->ip; - long offset = (long)pc - (long)addr; bool validate = true; u32 old = 0, new; - if (IS_ENABLED(CONFIG_ARM64_MODULE_PLTS) && - (offset < -SZ_128M || offset >= SZ_128M)) { +#ifdef CONFIG_ARM64_MODULE_PLTS + long offset = (long)pc - (long)addr; + + if (offset < -SZ_128M || offset >= SZ_128M) { u32 replaced; /* @@ -177,6 +180,7 @@ int ftrace_make_nop(struct module *mod, struct dyn_ftrace *rec, old = aarch64_insn_gen_branch_imm(pc, addr, AARCH64_INSN_BRANCH_LINK); } +#endif /* CONFIG_ARM64_MODULE_PLTS */ new = aarch64_insn_gen_nop(); -- cgit v1.2.3-59-g8ed1b From f02ab08afbe76ee7b0b2a34a9970e7dd200d8b01 Mon Sep 17 00:00:00 2001 From: Punit Agrawal Date: Thu, 8 Jun 2017 18:25:26 +0100 Subject: arm64: hugetlb: Fix huge_pte_offset to return poisoned page table entries When memory failure is enabled, a poisoned hugepage pte is marked as a swap entry. huge_pte_offset() does not return the poisoned page table entries when it encounters PUD/PMD hugepages. This behaviour of huge_pte_offset() leads to error such as below when munmap is called on poisoned hugepages. [ 344.165544] mm/pgtable-generic.c:33: bad pmd 000000083af00074. Fix huge_pte_offset() to return the poisoned pte which is then appropriately handled by the generic layer code. Signed-off-by: Punit Agrawal Acked-by: Steve Capper Reviewed-by: Catalin Marinas Cc: David Woods Tested-by: Manoj Iyer Signed-off-by: Will Deacon --- arch/arm64/include/asm/pgtable.h | 2 +- arch/arm64/mm/hugetlbpage.c | 29 ++++++++++------------------- 2 files changed, 11 insertions(+), 20 deletions(-) diff --git a/arch/arm64/include/asm/pgtable.h b/arch/arm64/include/asm/pgtable.h index c213fdbd056c..6eae342ced6b 100644 --- a/arch/arm64/include/asm/pgtable.h +++ b/arch/arm64/include/asm/pgtable.h @@ -441,7 +441,7 @@ static inline phys_addr_t pmd_page_paddr(pmd_t pmd) #define pud_none(pud) (!pud_val(pud)) #define pud_bad(pud) (!(pud_val(pud) & PUD_TABLE_BIT)) -#define pud_present(pud) (pud_val(pud)) +#define pud_present(pud) pte_present(pud_pte(pud)) static inline void set_pud(pud_t *pudp, pud_t pud) { diff --git a/arch/arm64/mm/hugetlbpage.c b/arch/arm64/mm/hugetlbpage.c index 7514a000e361..69b8200b1cfd 100644 --- a/arch/arm64/mm/hugetlbpage.c +++ b/arch/arm64/mm/hugetlbpage.c @@ -136,36 +136,27 @@ pte_t *huge_pte_offset(struct mm_struct *mm, unsigned long addr) { pgd_t *pgd; pud_t *pud; - pmd_t *pmd = NULL; - pte_t *pte = NULL; + pmd_t *pmd; pgd = pgd_offset(mm, addr); pr_debug("%s: addr:0x%lx pgd:%p\n", __func__, addr, pgd); if (!pgd_present(*pgd)) return NULL; + pud = pud_offset(pgd, addr); - if (!pud_present(*pud)) + if (pud_none(*pud)) return NULL; - - if (pud_huge(*pud)) + /* swap or huge page */ + if (!pud_present(*pud) || pud_huge(*pud)) return (pte_t *)pud; + /* table; check the next level */ + pmd = pmd_offset(pud, addr); - if (!pmd_present(*pmd)) + if (pmd_none(*pmd)) return NULL; - - if (pte_cont(pmd_pte(*pmd))) { - pmd = pmd_offset( - pud, (addr & CONT_PMD_MASK)); - return (pte_t *)pmd; - } - if (pmd_huge(*pmd)) + if (!pmd_present(*pmd) || pmd_huge(*pmd)) return (pte_t *)pmd; - pte = pte_offset_kernel(pmd, addr); - if (pte_present(*pte) && pte_cont(*pte)) { - pte = pte_offset_kernel( - pmd, (addr & CONT_PTE_MASK)); - return pte; - } + return NULL; } -- cgit v1.2.3-59-g8ed1b From e7c600f149b89e06073ab50f4f12e79828d3d2f0 Mon Sep 17 00:00:00 2001 From: "Jonathan (Zhixiong) Zhang" Date: Thu, 8 Jun 2017 18:25:27 +0100 Subject: arm64: hwpoison: add VM_FAULT_HWPOISON[_LARGE] handling Add VM_FAULT_HWPOISON[_LARGE] handling to the arm64 page fault handler. Handling of VM_FAULT_HWPOISON[_LARGE] is very similar to VM_FAULT_OOM, the only difference is that a different si_code (BUS_MCEERR_AR) is passed to user space and si_addr_lsb field is initialized. Signed-off-by: Jonathan (Zhixiong) Zhang Signed-off-by: Tyler Baicar (fix new __do_user_fault call-site) Signed-off-by: Punit Agrawal Acked-by: Steve Capper Tested-by: Manoj Iyer Signed-off-by: Will Deacon --- arch/arm64/mm/fault.c | 22 +++++++++++++++++++--- 1 file changed, 19 insertions(+), 3 deletions(-) diff --git a/arch/arm64/mm/fault.c b/arch/arm64/mm/fault.c index 222427ae23d6..d73e7f1fe184 100644 --- a/arch/arm64/mm/fault.c +++ b/arch/arm64/mm/fault.c @@ -31,6 +31,7 @@ #include #include #include +#include #include #include @@ -256,10 +257,11 @@ static void __do_kernel_fault(unsigned long addr, unsigned int esr, */ static void __do_user_fault(struct task_struct *tsk, unsigned long addr, unsigned int esr, unsigned int sig, int code, - struct pt_regs *regs) + struct pt_regs *regs, int fault) { struct siginfo si; const struct fault_info *inf; + unsigned int lsb = 0; if (unhandled_signal(tsk, sig) && show_unhandled_signals_ratelimited()) { inf = esr_to_fault_info(esr); @@ -277,6 +279,17 @@ static void __do_user_fault(struct task_struct *tsk, unsigned long addr, si.si_errno = 0; si.si_code = code; si.si_addr = (void __user *)addr; + /* + * Either small page or large page may be poisoned. + * In other words, VM_FAULT_HWPOISON_LARGE and + * VM_FAULT_HWPOISON are mutually exclusive. + */ + if (fault & VM_FAULT_HWPOISON_LARGE) + lsb = hstate_index_to_shift(VM_FAULT_GET_HINDEX(fault)); + else if (fault & VM_FAULT_HWPOISON) + lsb = PAGE_SHIFT; + si.si_addr_lsb = lsb; + force_sig_info(sig, &si, tsk); } @@ -291,7 +304,7 @@ static void do_bad_area(unsigned long addr, unsigned int esr, struct pt_regs *re */ if (user_mode(regs)) { inf = esr_to_fault_info(esr); - __do_user_fault(tsk, addr, esr, inf->sig, inf->code, regs); + __do_user_fault(tsk, addr, esr, inf->sig, inf->code, regs, 0); } else __do_kernel_fault(addr, esr, regs); } @@ -478,6 +491,9 @@ retry: */ sig = SIGBUS; code = BUS_ADRERR; + } else if (fault & (VM_FAULT_HWPOISON | VM_FAULT_HWPOISON_LARGE)) { + sig = SIGBUS; + code = BUS_MCEERR_AR; } else { /* * Something tried to access memory that isn't in our memory @@ -488,7 +504,7 @@ retry: SEGV_ACCERR : SEGV_MAPERR; } - __do_user_fault(tsk, addr, esr, sig, code, regs); + __do_user_fault(tsk, addr, esr, sig, code, regs, fault); return 0; no_context: -- cgit v1.2.3-59-g8ed1b From 0e3a9026396cd7d0eb5777ba923a8f30a3d9db19 Mon Sep 17 00:00:00 2001 From: Punit Agrawal Date: Thu, 8 Jun 2017 18:25:28 +0100 Subject: arm64: mm: Update perf accounting to handle poison faults Re-organise the perf accounting for fault handling in preparation for enabling handling of hardware poison faults in subsequent commits. The change updates perf accounting to be inline with the behaviour on x86. With this update, the perf fault accounting - * Always report PERF_COUNT_SW_PAGE_FAULTS * Doesn't report anything else for VM_FAULT_ERROR (which includes hwpoison faults) * Reports PERF_COUNT_SW_PAGE_FAULTS_MAJ if it's a major fault (indicated by VM_FAULT_MAJOR) * Otherwise, reports PERF_COUNT_SW_PAGE_FAULTS_MIN Signed-off-by: Punit Agrawal Signed-off-by: Will Deacon --- arch/arm64/mm/fault.c | 68 +++++++++++++++++++++++++++------------------------ 1 file changed, 36 insertions(+), 32 deletions(-) diff --git a/arch/arm64/mm/fault.c b/arch/arm64/mm/fault.c index d73e7f1fe184..ea2ea68d1bd7 100644 --- a/arch/arm64/mm/fault.c +++ b/arch/arm64/mm/fault.c @@ -359,7 +359,7 @@ static int __kprobes do_page_fault(unsigned long addr, unsigned int esr, { struct task_struct *tsk; struct mm_struct *mm; - int fault, sig, code; + int fault, sig, code, major = 0; unsigned long vm_flags = VM_READ | VM_WRITE; unsigned int mm_flags = FAULT_FLAG_ALLOW_RETRY | FAULT_FLAG_KILLABLE; @@ -398,6 +398,8 @@ static int __kprobes do_page_fault(unsigned long addr, unsigned int esr, die("Accessing user space memory outside uaccess.h routines", regs, esr); } + perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS, 1, regs, addr); + /* * As per x86, we may deadlock here. However, since the kernel only * validly references user space from well defined areas of the code, @@ -421,24 +423,42 @@ retry: } fault = __do_page_fault(mm, addr, mm_flags, vm_flags, tsk); + major |= fault & VM_FAULT_MAJOR; - /* - * If we need to retry but a fatal signal is pending, handle the - * signal first. We do not need to release the mmap_sem because it - * would already be released in __lock_page_or_retry in mm/filemap.c. - */ - if ((fault & VM_FAULT_RETRY) && fatal_signal_pending(current)) - return 0; + if (fault & VM_FAULT_RETRY) { + /* + * If we need to retry but a fatal signal is pending, + * handle the signal first. We do not need to release + * the mmap_sem because it would already be released + * in __lock_page_or_retry in mm/filemap.c. + */ + if (fatal_signal_pending(current)) + return 0; + + /* + * Clear FAULT_FLAG_ALLOW_RETRY to avoid any risk of + * starvation. + */ + if (mm_flags & FAULT_FLAG_ALLOW_RETRY) { + mm_flags &= ~FAULT_FLAG_ALLOW_RETRY; + mm_flags |= FAULT_FLAG_TRIED; + goto retry; + } + } + up_read(&mm->mmap_sem); /* - * Major/minor page fault accounting is only done on the initial - * attempt. If we go through a retry, it is extremely likely that the - * page will be found in page cache at that point. + * Handle the "normal" (no error) case first. */ - - perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS, 1, regs, addr); - if (mm_flags & FAULT_FLAG_ALLOW_RETRY) { - if (fault & VM_FAULT_MAJOR) { + if (likely(!(fault & (VM_FAULT_ERROR | VM_FAULT_BADMAP | + VM_FAULT_BADACCESS)))) { + /* + * Major/minor page fault accounting is only done + * once. If we go through a retry, it is extremely + * likely that the page will be found in page cache at + * that point. + */ + if (major) { tsk->maj_flt++; perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS_MAJ, 1, regs, addr); @@ -447,25 +467,9 @@ retry: perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS_MIN, 1, regs, addr); } - if (fault & VM_FAULT_RETRY) { - /* - * Clear FAULT_FLAG_ALLOW_RETRY to avoid any risk of - * starvation. - */ - mm_flags &= ~FAULT_FLAG_ALLOW_RETRY; - mm_flags |= FAULT_FLAG_TRIED; - goto retry; - } - } - - up_read(&mm->mmap_sem); - /* - * Handle the "normal" case first - VM_FAULT_MAJOR - */ - if (likely(!(fault & (VM_FAULT_ERROR | VM_FAULT_BADMAP | - VM_FAULT_BADACCESS)))) return 0; + } /* * If we are in kernel mode at this point, we have no context to -- cgit v1.2.3-59-g8ed1b From c484f2564db12fa2b01b198ecb6ff0751a3e5e32 Mon Sep 17 00:00:00 2001 From: "Jonathan (Zhixiong) Zhang" Date: Thu, 8 Jun 2017 18:25:29 +0100 Subject: arm64: kconfig: allow support for memory failure handling Declare ARCH_SUPPORTS_MEMORY_FAILURE, as arm64 does support memory failure recovery attempt. Signed-off-by: Jonathan (Zhixiong) Zhang Signed-off-by: Tyler Baicar (Dropped changes to ACPI APEI Kconfig and updated commit log) Signed-off-by: Punit Agrawal Acked-by: Steve Capper Tested-by: Manoj Iyer Signed-off-by: Will Deacon --- arch/arm64/Kconfig | 1 + 1 file changed, 1 insertion(+) diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig index 22f769b254b4..e7fcdc9a616d 100644 --- a/arch/arm64/Kconfig +++ b/arch/arm64/Kconfig @@ -20,6 +20,7 @@ config ARM64 select ARCH_HAS_STRICT_MODULE_RWX select ARCH_HAS_TICK_BROADCAST if GENERIC_CLOCKEVENTS_BROADCAST select ARCH_USE_CMPXCHG_LOCKREF + select ARCH_SUPPORTS_MEMORY_FAILURE select ARCH_SUPPORTS_ATOMIC_RMW select ARCH_SUPPORTS_NUMA_BALANCING select ARCH_WANT_COMPAT_IPC_PARSE_VERSION -- cgit v1.2.3-59-g8ed1b From d5f7b828ffab28e5684913e76697e94e2442d164 Mon Sep 17 00:00:00 2001 From: Arvind Yadav Date: Thu, 15 Jun 2017 14:55:37 +0530 Subject: arm: perf: make of_device_ids const of_device_ids are not supposed to change at runtime. All functions working with of_device_ids provided by work with const of_device_ids. So mark the non-const structs as const. Signed-off-by: Arvind Yadav Signed-off-by: Will Deacon --- arch/arm/kernel/perf_event_v6.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/arm/kernel/perf_event_v6.c b/arch/arm/kernel/perf_event_v6.c index 96b7a477a8db..8226d0b71fd3 100644 --- a/arch/arm/kernel/perf_event_v6.c +++ b/arch/arm/kernel/perf_event_v6.c @@ -552,7 +552,7 @@ static int armv6mpcore_pmu_init(struct arm_pmu *cpu_pmu) return 0; } -static struct of_device_id armv6_pmu_of_device_ids[] = { +static const struct of_device_id armv6_pmu_of_device_ids[] = { {.compatible = "arm,arm11mpcore-pmu", .data = armv6mpcore_pmu_init}, {.compatible = "arm,arm1176-pmu", .data = armv6_1176_pmu_init}, {.compatible = "arm,arm1136-pmu", .data = armv6_1136_pmu_init}, -- cgit v1.2.3-59-g8ed1b From bddb9b68d3fb0dfbd75715bce6160c40d4dae233 Mon Sep 17 00:00:00 2001 From: Mark Rutland Date: Tue, 13 Jun 2017 13:45:51 +0100 Subject: drivers/perf: commonise PERF_EVENTS dependency All PMU drivers are going to depend on PERF_EVENTS, so let's make this dependency common and simplify the individual Kconfig entries. Signed-off-by: Mark Rutland Cc: Will Deacon Signed-off-by: Will Deacon --- drivers/perf/Kconfig | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/drivers/perf/Kconfig b/drivers/perf/Kconfig index aa587edaf9ea..e5197ffb7422 100644 --- a/drivers/perf/Kconfig +++ b/drivers/perf/Kconfig @@ -3,9 +3,10 @@ # menu "Performance monitor support" + depends on PERF_EVENTS config ARM_PMU - depends on PERF_EVENTS && (ARM || ARM64) + depends on ARM || ARM64 bool "ARM PMU framework" default y help @@ -18,7 +19,7 @@ config ARM_PMU_ACPI config QCOM_L2_PMU bool "Qualcomm Technologies L2-cache PMU" - depends on ARCH_QCOM && ARM64 && PERF_EVENTS && ACPI + depends on ARCH_QCOM && ARM64 && ACPI help Provides support for the L2 cache performance monitor unit (PMU) in Qualcomm Technologies processors. @@ -27,7 +28,7 @@ config QCOM_L2_PMU config QCOM_L3_PMU bool "Qualcomm Technologies L3-cache PMU" - depends on ARCH_QCOM && ARM64 && PERF_EVENTS && ACPI + depends on ARCH_QCOM && ARM64 && ACPI select QCOM_IRQ_COMBINER help Provides support for the L3 cache performance monitor unit (PMU) @@ -36,7 +37,7 @@ config QCOM_L3_PMU monitoring L3 cache events. config XGENE_PMU - depends on PERF_EVENTS && ARCH_XGENE + depends on ARCH_XGENE bool "APM X-Gene SoC PMU" default n help -- cgit v1.2.3-59-g8ed1b From 577dfe16b852a90f26cf677cdc9113ec34199de6 Mon Sep 17 00:00:00 2001 From: Olav Haugan Date: Tue, 13 Jun 2017 13:56:14 -0700 Subject: arm64/dma-mapping: Remove extraneous null-pointer checks The current null-pointer check in __dma_alloc_coherent and __dma_free_coherent is not needed anymore since the __dma_alloc/__dma_free functions won't be called if !dev (dummy ops will be called instead). Reviewed-by: Catalin Marinas Signed-off-by: Olav Haugan Signed-off-by: Will Deacon --- arch/arm64/include/asm/dma-mapping.h | 2 -- arch/arm64/mm/dma-mapping.c | 9 --------- 2 files changed, 11 deletions(-) diff --git a/arch/arm64/include/asm/dma-mapping.h b/arch/arm64/include/asm/dma-mapping.h index 5392dbeffa45..f72779aad276 100644 --- a/arch/arm64/include/asm/dma-mapping.h +++ b/arch/arm64/include/asm/dma-mapping.h @@ -48,8 +48,6 @@ void arch_teardown_dma_ops(struct device *dev); /* do not use this function in a driver */ static inline bool is_device_dma_coherent(struct device *dev) { - if (!dev) - return false; return dev->archdata.dma_coherent; } diff --git a/arch/arm64/mm/dma-mapping.c b/arch/arm64/mm/dma-mapping.c index 3216e098c058..3e340b625436 100644 --- a/arch/arm64/mm/dma-mapping.c +++ b/arch/arm64/mm/dma-mapping.c @@ -95,11 +95,6 @@ static void *__dma_alloc_coherent(struct device *dev, size_t size, dma_addr_t *dma_handle, gfp_t flags, unsigned long attrs) { - if (dev == NULL) { - WARN_ONCE(1, "Use an actual device structure for DMA allocation\n"); - return NULL; - } - if (IS_ENABLED(CONFIG_ZONE_DMA) && dev->coherent_dma_mask <= DMA_BIT_MASK(32)) flags |= GFP_DMA; @@ -128,10 +123,6 @@ static void __dma_free_coherent(struct device *dev, size_t size, bool freed; phys_addr_t paddr = dma_to_phys(dev, dma_handle); - if (dev == NULL) { - WARN_ONCE(1, "Use an actual device structure for DMA allocation\n"); - return; - } freed = dma_release_from_contiguous(dev, phys_to_page(paddr), -- cgit v1.2.3-59-g8ed1b From c6bb8f89fa6df7a8d62bad2f66063b4b7433ea59 Mon Sep 17 00:00:00 2001 From: Lorenzo Pieralisi Date: Wed, 14 Jun 2017 17:37:12 +0100 Subject: ARM64/irqchip: Update ACPI_IORT symbol selection logic ACPI IORT is an ACPI addendum to describe the connection topology of devices with IOMMUs and interrupt controllers on ARM64 ACPI systems. Currently the ACPI IORT Kbuild symbol is selected whenever the Kbuild symbol ARM_GIC_V3_ITS is enabled, which in turn is selected by ARM64 Kbuild defaults. This makes the logic behind ACPI_IORT selection a bit twisted and not easy to follow. On ARM64 systems enabling ACPI the kbuild symbol ACPI_IORT should always be selected in that it is a kernel layer provided to the ARM64 arch code to parse and enable ACPI firmware bindings. Make the ACPI_IORT selection explicit in ARM64 Kbuild and remove the selection from ARM_GIC_V3_ITS entry, making the ACPI_IORT selection logic clearer to follow. Acked-by: Hanjun Guo Signed-off-by: Lorenzo Pieralisi Cc: Will Deacon Cc: Hanjun Guo Cc: Catalin Marinas Cc: Marc Zyngier Signed-off-by: Will Deacon --- arch/arm64/Kconfig | 1 + drivers/irqchip/Kconfig | 1 - 2 files changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig index e7fcdc9a616d..79c0dec93030 100644 --- a/arch/arm64/Kconfig +++ b/arch/arm64/Kconfig @@ -3,6 +3,7 @@ config ARM64 select ACPI_CCA_REQUIRED if ACPI select ACPI_GENERIC_GSI if ACPI select ACPI_GTDT if ACPI + select ACPI_IORT if ACPI select ACPI_REDUCED_HARDWARE_ONLY if ACPI select ACPI_MCFG if ACPI select ACPI_SPCR_TABLE if ACPI diff --git a/drivers/irqchip/Kconfig b/drivers/irqchip/Kconfig index 478f8ace2664..4a57b8f21488 100644 --- a/drivers/irqchip/Kconfig +++ b/drivers/irqchip/Kconfig @@ -39,7 +39,6 @@ config ARM_GIC_V3_ITS bool depends on PCI depends on PCI_MSI - select ACPI_IORT if ACPI config ARM_NVIC bool -- cgit v1.2.3-59-g8ed1b From 91e0bf81258c07aad27a4833368569ce873cd83e Mon Sep 17 00:00:00 2001 From: Lorenzo Pieralisi Date: Wed, 14 Jun 2017 17:37:13 +0100 Subject: ACPI/IORT: Remove iort_node_match() Commit 316ca8804ea8 ("ACPI/IORT: Remove linker section for IORT entries probing") removed the linker section for IORT entries probing. Since those IORT entries were the only iort_node_match() interface users, the iort_node_match() became obsolete and can then be removed. Remove the ACPI IORT iort_node_match() interface from the kernel. Acked-by: Marc Zyngier Acked-by: Hanjun Guo Signed-off-by: Lorenzo Pieralisi Cc: Hanjun Guo Signed-off-by: Will Deacon --- drivers/acpi/arm64/iort.c | 15 --------------- include/linux/acpi_iort.h | 2 -- 2 files changed, 17 deletions(-) diff --git a/drivers/acpi/arm64/iort.c b/drivers/acpi/arm64/iort.c index c5fecf97ee2f..8c77598dd6aa 100644 --- a/drivers/acpi/arm64/iort.c +++ b/drivers/acpi/arm64/iort.c @@ -234,21 +234,6 @@ static struct acpi_iort_node *iort_scan_node(enum acpi_iort_node_type type, return NULL; } -static acpi_status -iort_match_type_callback(struct acpi_iort_node *node, void *context) -{ - return AE_OK; -} - -bool iort_node_match(u8 type) -{ - struct acpi_iort_node *node; - - node = iort_scan_node(type, iort_match_type_callback, NULL); - - return node != NULL; -} - static acpi_status iort_match_node_callback(struct acpi_iort_node *node, void *context) { diff --git a/include/linux/acpi_iort.h b/include/linux/acpi_iort.h index 3ff9acea8616..8379d406ad2e 100644 --- a/include/linux/acpi_iort.h +++ b/include/linux/acpi_iort.h @@ -31,7 +31,6 @@ void iort_deregister_domain_token(int trans_id); struct fwnode_handle *iort_find_domain_token(int trans_id); #ifdef CONFIG_ACPI_IORT void acpi_iort_init(void); -bool iort_node_match(u8 type); u32 iort_msi_map_rid(struct device *dev, u32 req_id); struct irq_domain *iort_get_device_domain(struct device *dev, u32 req_id); void acpi_configure_pmsi_domain(struct device *dev); @@ -41,7 +40,6 @@ void iort_set_dma_mask(struct device *dev); const struct iommu_ops *iort_iommu_configure(struct device *dev); #else static inline void acpi_iort_init(void) { } -static inline bool iort_node_match(u8 type) { return false; } static inline u32 iort_msi_map_rid(struct device *dev, u32 req_id) { return req_id; } static inline struct irq_domain *iort_get_device_domain(struct device *dev, -- cgit v1.2.3-59-g8ed1b From e27c7fa015d61c8be6a2c32b2144aad2ae6ec975 Mon Sep 17 00:00:00 2001 From: Dustin Brown Date: Tue, 13 Jun 2017 11:40:56 -0700 Subject: arm64: Export save_stack_trace_tsk() The kernel watchdog is a great debugging tool for finding tasks that consume a disproportionate amount of CPU time in contiguous chunks. One can imagine building a similar watchdog for arbitrary driver threads using save_stack_trace_tsk() and print_stack_trace(). However, this is not viable for dynamically loaded driver modules on ARM platforms because save_stack_trace_tsk() is not exported for those architectures. Export save_stack_trace_tsk() for the ARM64 architecture to align with x86 and support various debugging use cases such as arbitrary driver thread watchdog timers. Signed-off-by: Dustin Brown Signed-off-by: Will Deacon --- arch/arm64/kernel/stacktrace.c | 1 + 1 file changed, 1 insertion(+) diff --git a/arch/arm64/kernel/stacktrace.c b/arch/arm64/kernel/stacktrace.c index feac80c22f61..09d37d66b630 100644 --- a/arch/arm64/kernel/stacktrace.c +++ b/arch/arm64/kernel/stacktrace.c @@ -210,6 +210,7 @@ void save_stack_trace_tsk(struct task_struct *tsk, struct stack_trace *trace) put_task_stack(tsk); } +EXPORT_SYMBOL_GPL(save_stack_trace_tsk); void save_stack_trace(struct stack_trace *trace) { -- cgit v1.2.3-59-g8ed1b From 06c35ef1fdf8d955684448683f7e48ac5f15ccfd Mon Sep 17 00:00:00 2001 From: Ard Biesheuvel Date: Tue, 20 Jun 2017 08:59:00 +0200 Subject: drivers/char: kmem: disable on arm64 As it turns out, arm64 deviates from other architectures in the way it maps the VMALLOC region: on most (all?) other architectures, it resides strictly above the kernel's direct mapping of DRAM, but on arm64, this is the other way around. For instance, for a 48-bit VA configuration, we have modules : 0xffff000000000000 - 0xffff000008000000 ( 128 MB) vmalloc : 0xffff000008000000 - 0xffff7dffbfff0000 (129022 GB) ... vmemmap : 0xffff7e0000000000 - 0xffff800000000000 ( 2048 GB maximum) 0xffff7e0000000000 - 0xffff7e0003ff0000 ( 63 MB actual) memory : 0xffff800000000000 - 0xffff8000ffc00000 ( 4092 MB) This has mostly gone unnoticed until now, but it does appear that it breaks an assumption in the kmem read/write code, which does something like if (p < (unsigned long) high_memory) { ... use straight copy_[to|from]_user() using p as virtual address ... } ... if (count > 0) { ... use vread/vwrite for accesses past high_memory ... } The first condition will inadvertently hold for the VMALLOC region if VMALLOC_START < PAGE_OFFSET [which is the case on arm64], but the read or write will subsequently fail the virt_addr_valid() check, resulting in a -ENXIO return value. Given how kmem seems to be living in borrowed time anyway, and given the fact that nobody noticed that the read/write interface is broken on arm64 in the first place, let's not bother trying to fix it, but simply disable the /dev/kmem interface entirely for arm64. Acked-by: Mark Rutland Signed-off-by: Ard Biesheuvel Signed-off-by: Will Deacon --- drivers/char/Kconfig | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/char/Kconfig b/drivers/char/Kconfig index 31adbebf812e..8102ee7b3247 100644 --- a/drivers/char/Kconfig +++ b/drivers/char/Kconfig @@ -17,6 +17,8 @@ config DEVMEM config DEVKMEM bool "/dev/kmem virtual device support" + # On arm64, VMALLOC_START < PAGE_OFFSET, which confuses kmem read/write + depends on !ARM64 help Say Y here if you want to support the /dev/kmem device. The /dev/kmem device is rarely used, but can be used for certain -- cgit v1.2.3-59-g8ed1b From 737326aa510b5f7d2f38ded739914a9d5e4e4cea Mon Sep 17 00:00:00 2001 From: Ard Biesheuvel Date: Wed, 14 Jun 2017 12:43:54 +0200 Subject: fs/proc: kcore: use kcore_list type to check for vmalloc/module address Instead of passing each start address into is_vmalloc_or_module_addr() to decide whether it falls into either the VMALLOC or the MODULES region, we can simply check the type field of the current kcore_list entry, since it will be set to KCORE_VMALLOC based on exactly the same conditions. As a bonus, when reading the KCORE_TEXT region on architectures that have one, this will avoid using vread() on the region if it happens to intersect with a KCORE_VMALLOC region. This is due the fact that the KCORE_TEXT region is the first one to be added to the kcore region list. Reported-by: Tan Xiaojun Tested-by: Tan Xiaojun Tested-by: Mark Rutland Acked-by: Mark Rutland Reviewed-by: Laura Abbott Reviewed-by: Jiri Olsa Signed-off-by: Ard Biesheuvel Signed-off-by: Will Deacon --- fs/proc/kcore.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/proc/kcore.c b/fs/proc/kcore.c index 4ee55274f155..45629f4b5402 100644 --- a/fs/proc/kcore.c +++ b/fs/proc/kcore.c @@ -504,7 +504,7 @@ read_kcore(struct file *file, char __user *buffer, size_t buflen, loff_t *fpos) if (&m->list == &kclist_head) { if (clear_user(buffer, tsz)) return -EFAULT; - } else if (is_vmalloc_or_module_addr((void *)start)) { + } else if (m->type == KCORE_VMALLOC) { vread(buf, (char *)start, tsz); /* we have to zero-fill user buffer even if no read */ if (copy_to_user(buffer, buf, tsz)) -- cgit v1.2.3-59-g8ed1b From 8f36094802e4e6de180b36bcac4cfd9d319e1b64 Mon Sep 17 00:00:00 2001 From: Ard Biesheuvel Date: Wed, 14 Jun 2017 12:43:55 +0200 Subject: arm64: mm: select CONFIG_ARCH_PROC_KCORE_TEXT To avoid issues with the /proc/kcore code getting confused about the kernels block mappings in the VMALLOC region, enable the existing facility that describes the [_text, _end) interval as a separate KCORE_TEXT region, which supersedes the KCORE_VMALLOC region that it intersects with on arm64. Reported-by: Tan Xiaojun Tested-by: Tan Xiaojun Tested-by: Mark Rutland Acked-by: Mark Rutland Reviewed-by: Laura Abbott Reviewed-by: Jiri Olsa Signed-off-by: Ard Biesheuvel Signed-off-by: Will Deacon --- arch/arm64/Kconfig | 3 +++ 1 file changed, 3 insertions(+) diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig index 79c0dec93030..b37ac9b2a36b 100644 --- a/arch/arm64/Kconfig +++ b/arch/arm64/Kconfig @@ -246,6 +246,9 @@ config PGTABLE_LEVELS config ARCH_SUPPORTS_UPROBES def_bool y +config ARCH_PROC_KCORE_TEXT + def_bool y + source "init/Kconfig" source "kernel/Kconfig.freezer" -- cgit v1.2.3-59-g8ed1b From 20987de3c2c45c314e0386f724aa85f55d984ef2 Mon Sep 17 00:00:00 2001 From: Dave Martin Date: Thu, 15 Jun 2017 15:03:38 +0100 Subject: arm64: signal: split frame link record from sigcontext structure In order to be able to increase the amount of the data currently written to the __reserved[] array in the signal frame, it is necessary to overwrite the locations currently occupied by the {fp,lr} frame link record pushed at the top of the signal stack. In order for this to work, this patch detaches the frame link record from struct rt_sigframe and places it separately at the top of the signal stack. This will allow subsequent patches to insert data between it and __reserved[]. This change relies on the non-ABI status of the placement of the frame record with respect to struct sigframe: this status is undocumented, but the placement is not declared or described in the user headers, and known unwinder implementations (libgcc, libunwind, gdb) appear not to rely on it. Reviewed-by: Catalin Marinas Signed-off-by: Dave Martin Signed-off-by: Will Deacon --- arch/arm64/kernel/signal.c | 50 +++++++++++++++++++++++++++++----------------- 1 file changed, 32 insertions(+), 18 deletions(-) diff --git a/arch/arm64/kernel/signal.c b/arch/arm64/kernel/signal.c index c7b6de62f9d3..1e5ed3be78ed 100644 --- a/arch/arm64/kernel/signal.c +++ b/arch/arm64/kernel/signal.c @@ -19,6 +19,7 @@ #include #include +#include #include #include #include @@ -41,10 +42,18 @@ struct rt_sigframe { struct siginfo info; struct ucontext uc; +}; + +struct frame_record { u64 fp; u64 lr; }; +struct rt_sigframe_user_layout { + struct rt_sigframe __user *sigframe; + struct frame_record __user *next_frame; +}; + static int preserve_fpsimd_context(struct fpsimd_context __user *ctx) { struct fpsimd_state *fpsimd = ¤t->thread.fpsimd_state; @@ -162,16 +171,17 @@ badframe: return 0; } -static int setup_sigframe(struct rt_sigframe __user *sf, +static int setup_sigframe(struct rt_sigframe_user_layout *user, struct pt_regs *regs, sigset_t *set) { int i, err = 0; + struct rt_sigframe __user *sf = user->sigframe; void *aux = sf->uc.uc_mcontext.__reserved; struct _aarch64_ctx *end; /* set up the stack frame for unwinding */ - __put_user_error(regs->regs[29], &sf->fp, err); - __put_user_error(regs->regs[30], &sf->lr, err); + __put_user_error(regs->regs[29], &user->next_frame->fp, err); + __put_user_error(regs->regs[30], &user->next_frame->lr, err); for (i = 0; i < 31; i++) __put_user_error(regs->regs[i], &sf->uc.uc_mcontext.regs[i], @@ -209,34 +219,36 @@ static int setup_sigframe(struct rt_sigframe __user *sf, return err; } -static struct rt_sigframe __user *get_sigframe(struct ksignal *ksig, - struct pt_regs *regs) +static int get_sigframe(struct rt_sigframe_user_layout *user, + struct ksignal *ksig, struct pt_regs *regs) { unsigned long sp, sp_top; - struct rt_sigframe __user *frame; sp = sp_top = sigsp(regs->sp, ksig); - sp = (sp - sizeof(struct rt_sigframe)) & ~15; - frame = (struct rt_sigframe __user *)sp; + sp = round_down(sp - sizeof(struct frame_record), 16); + user->next_frame = (struct frame_record __user *)sp; + + sp = round_down(sp - sizeof(struct rt_sigframe), 16); + user->sigframe = (struct rt_sigframe __user *)sp; /* * Check that we can actually write to the signal frame. */ - if (!access_ok(VERIFY_WRITE, frame, sp_top - sp)) - frame = NULL; + if (!access_ok(VERIFY_WRITE, user->sigframe, sp_top - sp)) + return -EFAULT; - return frame; + return 0; } static void setup_return(struct pt_regs *regs, struct k_sigaction *ka, - void __user *frame, int usig) + struct rt_sigframe_user_layout *user, int usig) { __sigrestore_t sigtramp; regs->regs[0] = usig; - regs->sp = (unsigned long)frame; - regs->regs[29] = regs->sp + offsetof(struct rt_sigframe, fp); + regs->sp = (unsigned long)user->sigframe; + regs->regs[29] = (unsigned long)&user->next_frame->fp; regs->pc = (unsigned long)ka->sa.sa_handler; if (ka->sa.sa_flags & SA_RESTORER) @@ -250,20 +262,22 @@ static void setup_return(struct pt_regs *regs, struct k_sigaction *ka, static int setup_rt_frame(int usig, struct ksignal *ksig, sigset_t *set, struct pt_regs *regs) { + struct rt_sigframe_user_layout user; struct rt_sigframe __user *frame; int err = 0; - frame = get_sigframe(ksig, regs); - if (!frame) + if (get_sigframe(&user, ksig, regs)) return 1; + frame = user.sigframe; + __put_user_error(0, &frame->uc.uc_flags, err); __put_user_error(NULL, &frame->uc.uc_link, err); err |= __save_altstack(&frame->uc.uc_stack, regs->sp); - err |= setup_sigframe(frame, regs, set); + err |= setup_sigframe(&user, regs, set); if (err == 0) { - setup_return(regs, &ksig->ka, frame, usig); + setup_return(regs, &ksig->ka, &user, usig); if (ksig->ka.sa.sa_flags & SA_SIGINFO) { err |= copy_siginfo_to_user(&frame->info, &ksig->info); regs->regs[1] = (unsigned long)&frame->info; -- cgit v1.2.3-59-g8ed1b From 47ccb02868cead34578d953b5fe0cdd58394605e Mon Sep 17 00:00:00 2001 From: Dave Martin Date: Thu, 15 Jun 2017 15:03:39 +0100 Subject: arm64: signal: Refactor sigcontext parsing in rt_sigreturn Currently, rt_sigreturn does very limited checking on the sigcontext coming from userspace. Future additions to the sigcontext data will increase the potential for surprises. Also, it is not clear whether the sigcontext extension records are supposed to occur in a particular order. To allow the parsing code to be extended more easily, this patch factors out the sigcontext parsing into a separate function, and adds extra checks to validate the well-formedness of the sigcontext structure. Reviewed-by: Catalin Marinas Signed-off-by: Dave Martin Signed-off-by: Will Deacon --- arch/arm64/kernel/signal.c | 86 ++++++++++++++++++++++++++++++++++++++++++---- 1 file changed, 80 insertions(+), 6 deletions(-) diff --git a/arch/arm64/kernel/signal.c b/arch/arm64/kernel/signal.c index 1e5ed3be78ed..67769f68ae06 100644 --- a/arch/arm64/kernel/signal.c +++ b/arch/arm64/kernel/signal.c @@ -23,6 +23,7 @@ #include #include #include +#include #include #include #include @@ -101,12 +102,86 @@ static int restore_fpsimd_context(struct fpsimd_context __user *ctx) return err ? -EFAULT : 0; } +struct user_ctxs { + struct fpsimd_context __user *fpsimd; +}; + +static int parse_user_sigframe(struct user_ctxs *user, + struct rt_sigframe __user *sf) +{ + struct sigcontext __user *const sc = &sf->uc.uc_mcontext; + struct _aarch64_ctx __user *head = + (struct _aarch64_ctx __user *)&sc->__reserved; + size_t offset = 0; + + user->fpsimd = NULL; + + while (1) { + int err; + u32 magic, size; + + head = (struct _aarch64_ctx __user *)&sc->__reserved[offset]; + if (!IS_ALIGNED((unsigned long)head, 16)) + goto invalid; + + err = 0; + __get_user_error(magic, &head->magic, err); + __get_user_error(size, &head->size, err); + if (err) + return err; + + switch (magic) { + case 0: + if (size) + goto invalid; + + goto done; + + case FPSIMD_MAGIC: + if (user->fpsimd) + goto invalid; + + if (offset > sizeof(sc->__reserved) - + sizeof(*user->fpsimd) || + size < sizeof(*user->fpsimd)) + goto invalid; + + user->fpsimd = (struct fpsimd_context __user *)head; + break; + + case ESR_MAGIC: + /* ignore */ + break; + + default: + goto invalid; + } + + if (size < sizeof(*head)) + goto invalid; + + if (size > sizeof(sc->__reserved) - (sizeof(*head) + offset)) + goto invalid; + + offset += size; + } + +done: + if (!user->fpsimd) + goto invalid; + + return 0; + +invalid: + return -EINVAL; +} + static int restore_sigframe(struct pt_regs *regs, struct rt_sigframe __user *sf) { sigset_t set; int i, err; - void *aux = sf->uc.uc_mcontext.__reserved; + struct user_ctxs user; err = __copy_from_user(&set, &sf->uc.uc_sigmask, sizeof(set)); if (err == 0) @@ -125,12 +200,11 @@ static int restore_sigframe(struct pt_regs *regs, regs->syscallno = ~0UL; err |= !valid_user_regs(®s->user_regs, current); + if (err == 0) + err = parse_user_sigframe(&user, sf); - if (err == 0) { - struct fpsimd_context *fpsimd_ctx = - container_of(aux, struct fpsimd_context, head); - err |= restore_fpsimd_context(fpsimd_ctx); - } + if (err == 0) + err = restore_fpsimd_context(user.fpsimd); return err; } -- cgit v1.2.3-59-g8ed1b From bb4891a6c3551f27fa4d548b87a66c258bdb100b Mon Sep 17 00:00:00 2001 From: Dave Martin Date: Thu, 15 Jun 2017 15:03:40 +0100 Subject: arm64: signal: factor frame layout and population into separate passes In preparation for expanding the signal frame, this patch refactors the signal frame setup code in setup_sigframe() into two separate passes. The first pass, setup_sigframe_layout(), determines the size of the signal frame and its internal layout, including the presence and location of optional records. The resulting knowledge is used to allocate and locate the user stack space required for the signal frame and to determine which optional records to include. The second pass, setup_sigframe(), is called once the stack frame is allocated in order to populate it with the necessary context information. As a result of these changes, it becomes more natural to represent locations in the signal frame by a base pointer and an offset, since the absolute address of each location is not known during the layout pass. To be more consistent with this logic, parse_user_sigframe() is refactored to describe signal frame locations in a similar way. This change has no effect on the signal ABI, but will make it easier to expand the signal frame in future patches. Reviewed-by: Catalin Marinas Signed-off-by: Dave Martin Signed-off-by: Will Deacon --- arch/arm64/kernel/signal.c | 111 +++++++++++++++++++++++++++++++++++---------- 1 file changed, 88 insertions(+), 23 deletions(-) diff --git a/arch/arm64/kernel/signal.c b/arch/arm64/kernel/signal.c index 67769f68ae06..eaef530579f8 100644 --- a/arch/arm64/kernel/signal.c +++ b/arch/arm64/kernel/signal.c @@ -25,6 +25,7 @@ #include #include #include +#include #include #include @@ -53,8 +54,39 @@ struct frame_record { struct rt_sigframe_user_layout { struct rt_sigframe __user *sigframe; struct frame_record __user *next_frame; + + unsigned long size; /* size of allocated sigframe data */ + unsigned long limit; /* largest allowed size */ + + unsigned long fpsimd_offset; + unsigned long esr_offset; + unsigned long end_offset; }; +static void init_user_layout(struct rt_sigframe_user_layout *user) +{ + memset(user, 0, sizeof(*user)); + user->size = offsetof(struct rt_sigframe, uc.uc_mcontext.__reserved); + + user->limit = user->size + + sizeof(user->sigframe->uc.uc_mcontext.__reserved) - + round_up(sizeof(struct _aarch64_ctx), 16); + /* ^ reserve space for terminator */ +} + +static size_t sigframe_size(struct rt_sigframe_user_layout const *user) +{ + return round_up(max(user->size, sizeof(struct rt_sigframe)), 16); +} + +static void __user *apply_user_offset( + struct rt_sigframe_user_layout const *user, unsigned long offset) +{ + char __user *base = (char __user *)user->sigframe; + + return base + offset; +} + static int preserve_fpsimd_context(struct fpsimd_context __user *ctx) { struct fpsimd_state *fpsimd = ¤t->thread.fpsimd_state; @@ -110,26 +142,35 @@ static int parse_user_sigframe(struct user_ctxs *user, struct rt_sigframe __user *sf) { struct sigcontext __user *const sc = &sf->uc.uc_mcontext; - struct _aarch64_ctx __user *head = - (struct _aarch64_ctx __user *)&sc->__reserved; + struct _aarch64_ctx __user *head; + char __user *base = (char __user *)&sc->__reserved; size_t offset = 0; + size_t limit = sizeof(sc->__reserved); user->fpsimd = NULL; + if (!IS_ALIGNED((unsigned long)base, 16)) + goto invalid; + while (1) { - int err; + int err = 0; u32 magic, size; - head = (struct _aarch64_ctx __user *)&sc->__reserved[offset]; - if (!IS_ALIGNED((unsigned long)head, 16)) + if (limit - offset < sizeof(*head)) goto invalid; - err = 0; + if (!IS_ALIGNED(offset, 16)) + goto invalid; + + head = (struct _aarch64_ctx __user *)(base + offset); __get_user_error(magic, &head->magic, err); __get_user_error(size, &head->size, err); if (err) return err; + if (limit - offset < size) + goto invalid; + switch (magic) { case 0: if (size) @@ -141,9 +182,7 @@ static int parse_user_sigframe(struct user_ctxs *user, if (user->fpsimd) goto invalid; - if (offset > sizeof(sc->__reserved) - - sizeof(*user->fpsimd) || - size < sizeof(*user->fpsimd)) + if (size < sizeof(*user->fpsimd)) goto invalid; user->fpsimd = (struct fpsimd_context __user *)head; @@ -160,7 +199,7 @@ static int parse_user_sigframe(struct user_ctxs *user, if (size < sizeof(*head)) goto invalid; - if (size > sizeof(sc->__reserved) - (sizeof(*head) + offset)) + if (limit - offset < size) goto invalid; offset += size; @@ -245,13 +284,30 @@ badframe: return 0; } +/* Determine the layout of optional records in the signal frame */ +static int setup_sigframe_layout(struct rt_sigframe_user_layout *user) +{ + user->fpsimd_offset = user->size; + user->size += round_up(sizeof(struct fpsimd_context), 16); + + /* fault information, if valid */ + if (current->thread.fault_code) { + user->esr_offset = user->size; + user->size += round_up(sizeof(struct esr_context), 16); + } + + /* set the "end" magic */ + user->end_offset = user->size; + + return 0; +} + + static int setup_sigframe(struct rt_sigframe_user_layout *user, struct pt_regs *regs, sigset_t *set) { int i, err = 0; struct rt_sigframe __user *sf = user->sigframe; - void *aux = sf->uc.uc_mcontext.__reserved; - struct _aarch64_ctx *end; /* set up the stack frame for unwinding */ __put_user_error(regs->regs[29], &user->next_frame->fp, err); @@ -269,26 +325,29 @@ static int setup_sigframe(struct rt_sigframe_user_layout *user, err |= __copy_to_user(&sf->uc.uc_sigmask, set, sizeof(*set)); if (err == 0) { - struct fpsimd_context *fpsimd_ctx = - container_of(aux, struct fpsimd_context, head); + struct fpsimd_context __user *fpsimd_ctx = + apply_user_offset(user, user->fpsimd_offset); err |= preserve_fpsimd_context(fpsimd_ctx); - aux += sizeof(*fpsimd_ctx); } /* fault information, if valid */ - if (current->thread.fault_code) { - struct esr_context *esr_ctx = - container_of(aux, struct esr_context, head); + if (err == 0 && user->esr_offset) { + struct esr_context __user *esr_ctx = + apply_user_offset(user, user->esr_offset); + __put_user_error(ESR_MAGIC, &esr_ctx->head.magic, err); __put_user_error(sizeof(*esr_ctx), &esr_ctx->head.size, err); __put_user_error(current->thread.fault_code, &esr_ctx->esr, err); - aux += sizeof(*esr_ctx); } /* set the "end" magic */ - end = aux; - __put_user_error(0, &end->magic, err); - __put_user_error(0, &end->size, err); + if (err == 0) { + struct _aarch64_ctx __user *end = + apply_user_offset(user, user->end_offset); + + __put_user_error(0, &end->magic, err); + __put_user_error(0, &end->size, err); + } return err; } @@ -297,13 +356,19 @@ static int get_sigframe(struct rt_sigframe_user_layout *user, struct ksignal *ksig, struct pt_regs *regs) { unsigned long sp, sp_top; + int err; + + init_user_layout(user); + err = setup_sigframe_layout(user); + if (err) + return err; sp = sp_top = sigsp(regs->sp, ksig); sp = round_down(sp - sizeof(struct frame_record), 16); user->next_frame = (struct frame_record __user *)sp; - sp = round_down(sp - sizeof(struct rt_sigframe), 16); + sp = round_down(sp, 16) - sigframe_size(user); user->sigframe = (struct rt_sigframe __user *)sp; /* -- cgit v1.2.3-59-g8ed1b From bb4322f74340de578bc61ed0cfb9690ddeb9ef76 Mon Sep 17 00:00:00 2001 From: Dave Martin Date: Thu, 15 Jun 2017 15:03:41 +0100 Subject: arm64: signal: factor out signal frame record allocation This patch factors out the allocator for signal frame optional records into a separate function, to ensure consistency and facilitate later expansion. No overrun checking is currently done, because the allocation is in user memory and anyway the kernel never tries to allocate enough space in the signal frame yet for an overrun to occur. This behaviour will be refined in future patches. The approach taken in this patch to allocation of the terminator record is not very clean: this will also be replaced in subsequent patches. For future extension, a comment is added in sigcontext.h documenting the current static allocations in __reserved[]. This will be important for determining under what circumstances userspace may or may not see an expanded signal frame. Reviewed-by: Catalin Marinas Signed-off-by: Dave Martin Signed-off-by: Will Deacon --- arch/arm64/include/uapi/asm/sigcontext.h | 19 ++++++++++++++ arch/arm64/kernel/signal.c | 43 ++++++++++++++++++++++++++------ 2 files changed, 55 insertions(+), 7 deletions(-) diff --git a/arch/arm64/include/uapi/asm/sigcontext.h b/arch/arm64/include/uapi/asm/sigcontext.h index ee469be1ae1d..1328a2c14371 100644 --- a/arch/arm64/include/uapi/asm/sigcontext.h +++ b/arch/arm64/include/uapi/asm/sigcontext.h @@ -33,6 +33,25 @@ struct sigcontext { __u8 __reserved[4096] __attribute__((__aligned__(16))); }; +/* + * Allocation of __reserved[]: + * (Note: records do not necessarily occur in the order shown here.) + * + * size description + * + * 0x210 fpsimd_context + * 0x10 esr_context + * 0x10 terminator (null _aarch64_ctx) + * + * 0xdd0 (reserved for future allocation) + * + * New records that can exceed this space need to be opt-in for userspace, so + * that an expanded signal frame is not generated unexpectedly. The mechanism + * for opting in will depend on the extension that generates each new record. + * The above table documents the maximum set and sizes of records than can be + * generated when userspace does not opt in for any such extension. + */ + /* * Header to be used at the beginning of structures extending the user * context. Such structures must be placed after the rt_sigframe on the stack diff --git a/arch/arm64/kernel/signal.c b/arch/arm64/kernel/signal.c index eaef530579f8..fa787e6ac7c2 100644 --- a/arch/arm64/kernel/signal.c +++ b/arch/arm64/kernel/signal.c @@ -79,6 +79,22 @@ static size_t sigframe_size(struct rt_sigframe_user_layout const *user) return round_up(max(user->size, sizeof(struct rt_sigframe)), 16); } +/* + * Allocate space for an optional record of bytes in the user + * signal frame. The offset from the signal frame base address to the + * allocated block is assigned to *offset. + */ +static int sigframe_alloc(struct rt_sigframe_user_layout *user, + unsigned long *offset, size_t size) +{ + size_t padded_size = round_up(size, 16); + + *offset = user->size; + user->size += padded_size; + + return 0; +} + static void __user *apply_user_offset( struct rt_sigframe_user_layout const *user, unsigned long offset) { @@ -287,19 +303,32 @@ badframe: /* Determine the layout of optional records in the signal frame */ static int setup_sigframe_layout(struct rt_sigframe_user_layout *user) { - user->fpsimd_offset = user->size; - user->size += round_up(sizeof(struct fpsimd_context), 16); + int err; + + err = sigframe_alloc(user, &user->fpsimd_offset, + sizeof(struct fpsimd_context)); + if (err) + return err; /* fault information, if valid */ if (current->thread.fault_code) { - user->esr_offset = user->size; - user->size += round_up(sizeof(struct esr_context), 16); + err = sigframe_alloc(user, &user->esr_offset, + sizeof(struct esr_context)); + if (err) + return err; } - /* set the "end" magic */ - user->end_offset = user->size; + /* + * Allocate space for the terminator record. + * HACK: here we undo the reservation of space for the end record. + * This bodge should be replaced with a cleaner approach later on. + */ + user->limit = offsetof(struct rt_sigframe, uc.uc_mcontext.__reserved) + + sizeof(user->sigframe->uc.uc_mcontext.__reserved); - return 0; + err = sigframe_alloc(user, &user->end_offset, + sizeof(struct _aarch64_ctx)); + return err; } -- cgit v1.2.3-59-g8ed1b From f5d284900c0f960e318a063f4c40826b6e3aa6a8 Mon Sep 17 00:00:00 2001 From: Luc Van Oostenryck Date: Tue, 20 Jun 2017 14:24:43 +0200 Subject: arm64: pass machine size to sparse When using sparse on the arm64 tree we get many thousands of warnings like 'constant ... is so big it is unsigned long long' or 'shift too big (32) for type unsigned long'. This happens because by default sparse considers the machine as 32bit and defines the size of the types accordingly. Fix this by passing the '-m64' flag to sparse so that sparse can correctly define longs as being 64bit. CC: Catalin Marinas CC: Will Deacon CC: linux-arm-kernel@lists.infradead.org Signed-off-by: Luc Van Oostenryck Signed-off-by: Will Deacon --- arch/arm64/Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/arm64/Makefile b/arch/arm64/Makefile index 1ce57b42f390..fdb0133142ff 100644 --- a/arch/arm64/Makefile +++ b/arch/arm64/Makefile @@ -62,7 +62,7 @@ LD += -EL UTS_MACHINE := aarch64 endif -CHECKFLAGS += -D__aarch64__ +CHECKFLAGS += -D__aarch64__ -m64 ifeq ($(CONFIG_ARM64_MODULE_CMODEL_LARGE), y) KBUILD_CFLAGS_MODULE += -mcmodel=large -- cgit v1.2.3-59-g8ed1b From 42aa560446622da6c33dce54fc8f4cd81516e906 Mon Sep 17 00:00:00 2001 From: Tyler Baicar Date: Wed, 21 Jun 2017 12:17:04 -0600 Subject: acpi: apei: read ack upon ghes record consumption A RAS (Reliability, Availability, Serviceability) controller may be a separate processor running in parallel with OS execution, and may generate error records for consumption by the OS. If the RAS controller produces multiple error records, then they may be overwritten before the OS has consumed them. The Generic Hardware Error Source (GHES) v2 structure introduces the capability for the OS to acknowledge the consumption of the error record generated by the RAS controller. A RAS controller supporting GHESv2 shall wait for the acknowledgment before writing a new error record, thus eliminating the race condition. Add support for parsing of GHESv2 sub-tables as well. Signed-off-by: Tyler Baicar CC: Jonathan (Zhixiong) Zhang Reviewed-by: James Morse Signed-off-by: Will Deacon --- drivers/acpi/apei/ghes.c | 59 +++++++++++++++++++++++++++++++++++++++++++++--- drivers/acpi/apei/hest.c | 7 ++++-- include/acpi/ghes.h | 5 +++- 3 files changed, 65 insertions(+), 6 deletions(-) diff --git a/drivers/acpi/apei/ghes.c b/drivers/acpi/apei/ghes.c index 980515e029fa..866d244c6311 100644 --- a/drivers/acpi/apei/ghes.c +++ b/drivers/acpi/apei/ghes.c @@ -46,6 +46,7 @@ #include #include +#include #include #include #include @@ -80,6 +81,11 @@ ((struct acpi_hest_generic_status *) \ ((struct ghes_estatus_node *)(estatus_node) + 1)) +static inline bool is_hest_type_generic_v2(struct ghes *ghes) +{ + return ghes->generic->header.type == ACPI_HEST_TYPE_GENERIC_ERROR_V2; +} + /* * This driver isn't really modular, however for the time being, * continuing to use module_param is the easiest way to remain @@ -240,6 +246,16 @@ static int ghes_estatus_pool_expand(unsigned long len) return 0; } +static int map_gen_v2(struct ghes *ghes) +{ + return apei_map_generic_address(&ghes->generic_v2->read_ack_register); +} + +static void unmap_gen_v2(struct ghes *ghes) +{ + apei_unmap_generic_address(&ghes->generic_v2->read_ack_register); +} + static struct ghes *ghes_new(struct acpi_hest_generic *generic) { struct ghes *ghes; @@ -249,10 +265,17 @@ static struct ghes *ghes_new(struct acpi_hest_generic *generic) ghes = kzalloc(sizeof(*ghes), GFP_KERNEL); if (!ghes) return ERR_PTR(-ENOMEM); + ghes->generic = generic; + if (is_hest_type_generic_v2(ghes)) { + rc = map_gen_v2(ghes); + if (rc) + goto err_free; + } + rc = apei_map_generic_address(&generic->error_status_address); if (rc) - goto err_free; + goto err_unmap_read_ack_addr; error_block_length = generic->error_block_length; if (error_block_length > GHES_ESTATUS_MAX_SIZE) { pr_warning(FW_WARN GHES_PFX @@ -264,13 +287,16 @@ static struct ghes *ghes_new(struct acpi_hest_generic *generic) ghes->estatus = kmalloc(error_block_length, GFP_KERNEL); if (!ghes->estatus) { rc = -ENOMEM; - goto err_unmap; + goto err_unmap_status_addr; } return ghes; -err_unmap: +err_unmap_status_addr: apei_unmap_generic_address(&generic->error_status_address); +err_unmap_read_ack_addr: + if (is_hest_type_generic_v2(ghes)) + unmap_gen_v2(ghes); err_free: kfree(ghes); return ERR_PTR(rc); @@ -280,6 +306,8 @@ static void ghes_fini(struct ghes *ghes) { kfree(ghes->estatus); apei_unmap_generic_address(&ghes->generic->error_status_address); + if (is_hest_type_generic_v2(ghes)) + unmap_gen_v2(ghes); } static inline int ghes_severity(int severity) @@ -649,6 +677,21 @@ static void ghes_estatus_cache_add( rcu_read_unlock(); } +static int ghes_ack_error(struct acpi_hest_generic_v2 *gv2) +{ + int rc; + u64 val = 0; + + rc = apei_read(&val, &gv2->read_ack_register); + if (rc) + return rc; + + val &= gv2->read_ack_preserve << gv2->read_ack_register.bit_offset; + val |= gv2->read_ack_write << gv2->read_ack_register.bit_offset; + + return apei_write(val, &gv2->read_ack_register); +} + static int ghes_proc(struct ghes *ghes) { int rc; @@ -661,6 +704,16 @@ static int ghes_proc(struct ghes *ghes) ghes_estatus_cache_add(ghes->generic, ghes->estatus); } ghes_do_proc(ghes, ghes->estatus); + + /* + * GHESv2 type HEST entries introduce support for error acknowledgment, + * so only acknowledge the error if this support is present. + */ + if (is_hest_type_generic_v2(ghes)) { + rc = ghes_ack_error(ghes->generic_v2); + if (rc) + return rc; + } out: ghes_clear_estatus(ghes); return rc; diff --git a/drivers/acpi/apei/hest.c b/drivers/acpi/apei/hest.c index 8f2a98e23bba..456b488eb1df 100644 --- a/drivers/acpi/apei/hest.c +++ b/drivers/acpi/apei/hest.c @@ -52,6 +52,7 @@ static const int hest_esrc_len_tab[ACPI_HEST_TYPE_RESERVED] = { [ACPI_HEST_TYPE_AER_ENDPOINT] = sizeof(struct acpi_hest_aer), [ACPI_HEST_TYPE_AER_BRIDGE] = sizeof(struct acpi_hest_aer_bridge), [ACPI_HEST_TYPE_GENERIC_ERROR] = sizeof(struct acpi_hest_generic), + [ACPI_HEST_TYPE_GENERIC_ERROR_V2] = sizeof(struct acpi_hest_generic_v2), }; static int hest_esrc_len(struct acpi_hest_header *hest_hdr) @@ -141,7 +142,8 @@ static int __init hest_parse_ghes_count(struct acpi_hest_header *hest_hdr, void { int *count = data; - if (hest_hdr->type == ACPI_HEST_TYPE_GENERIC_ERROR) + if (hest_hdr->type == ACPI_HEST_TYPE_GENERIC_ERROR || + hest_hdr->type == ACPI_HEST_TYPE_GENERIC_ERROR_V2) (*count)++; return 0; } @@ -152,7 +154,8 @@ static int __init hest_parse_ghes(struct acpi_hest_header *hest_hdr, void *data) struct ghes_arr *ghes_arr = data; int rc, i; - if (hest_hdr->type != ACPI_HEST_TYPE_GENERIC_ERROR) + if (hest_hdr->type != ACPI_HEST_TYPE_GENERIC_ERROR && + hest_hdr->type != ACPI_HEST_TYPE_GENERIC_ERROR_V2) return 0; if (!((struct acpi_hest_generic *)hest_hdr)->enabled) diff --git a/include/acpi/ghes.h b/include/acpi/ghes.h index 720446cb243e..68f088a92398 100644 --- a/include/acpi/ghes.h +++ b/include/acpi/ghes.h @@ -13,7 +13,10 @@ #define GHES_EXITING 0x0002 struct ghes { - struct acpi_hest_generic *generic; + union { + struct acpi_hest_generic *generic; + struct acpi_hest_generic_v2 *generic_v2; + }; struct acpi_hest_generic_status *estatus; u64 buffer_paddr; unsigned long flags; -- cgit v1.2.3-59-g8ed1b From bbcc2e7b642ed241651fee50ac6ed59643cb1736 Mon Sep 17 00:00:00 2001 From: Tyler Baicar Date: Wed, 21 Jun 2017 12:17:05 -0600 Subject: ras: acpi/apei: cper: add support for generic data v3 structure The ACPI 6.1 spec adds a new revision of the generic error data entry structure. Add support to handle the new structure as well as properly verify and iterate through the generic data entries. Signed-off-by: Tyler Baicar CC: Jonathan (Zhixiong) Zhang Signed-off-by: Will Deacon --- drivers/acpi/apei/ghes.c | 11 +++++------ drivers/firmware/efi/cper.c | 37 ++++++++++++++++++++++--------------- include/acpi/ghes.h | 36 ++++++++++++++++++++++++++++++++++++ 3 files changed, 63 insertions(+), 21 deletions(-) diff --git a/drivers/acpi/apei/ghes.c b/drivers/acpi/apei/ghes.c index 866d244c6311..81e06e3cb08f 100644 --- a/drivers/acpi/apei/ghes.c +++ b/drivers/acpi/apei/ghes.c @@ -428,8 +428,7 @@ static void ghes_handle_memory_failure(struct acpi_hest_generic_data *gdata, int unsigned long pfn; int flags = -1; int sec_sev = ghes_severity(gdata->error_severity); - struct cper_sec_mem_err *mem_err; - mem_err = (struct cper_sec_mem_err *)(gdata + 1); + struct cper_sec_mem_err *mem_err = acpi_hest_get_payload(gdata); if (!(mem_err->validation_bits & CPER_MEM_VALID_PA)) return; @@ -466,8 +465,8 @@ static void ghes_do_proc(struct ghes *ghes, sec_type = (guid_t *)gdata->section_type; sec_sev = ghes_severity(gdata->error_severity); if (guid_equal(sec_type, &CPER_SEC_PLATFORM_MEM)) { - struct cper_sec_mem_err *mem_err; - mem_err = (struct cper_sec_mem_err *)(gdata+1); + struct cper_sec_mem_err *mem_err = acpi_hest_get_payload(gdata); + ghes_edac_report_mem_error(ghes, sev, mem_err); arch_apei_report_mem_error(sev, mem_err); @@ -475,8 +474,8 @@ static void ghes_do_proc(struct ghes *ghes, } #ifdef CONFIG_ACPI_APEI_PCIEAER else if (guid_equal(sec_type, &CPER_SEC_PCIE)) { - struct cper_sec_pcie *pcie_err; - pcie_err = (struct cper_sec_pcie *)(gdata+1); + struct cper_sec_pcie *pcie_err = acpi_hest_get_payload(gdata); + if (sev == GHES_SEV_RECOVERABLE && sec_sev == GHES_SEV_RECOVERABLE && pcie_err->validation_bits & CPER_PCIE_VALID_DEVICE_ID && diff --git a/drivers/firmware/efi/cper.c b/drivers/firmware/efi/cper.c index d42537425438..902475704311 100644 --- a/drivers/firmware/efi/cper.c +++ b/drivers/firmware/efi/cper.c @@ -32,6 +32,7 @@ #include #include #include +#include #define INDENT_SP " " @@ -386,8 +387,9 @@ static void cper_print_pcie(const char *pfx, const struct cper_sec_pcie *pcie, pfx, pcie->bridge.secondary_status, pcie->bridge.control); } -static void cper_estatus_print_section( - const char *pfx, const struct acpi_hest_generic_data *gdata, int sec_no) +static void +cper_estatus_print_section(const char *pfx, struct acpi_hest_generic_data *gdata, + int sec_no) { uuid_le *sec_type = (uuid_le *)gdata->section_type; __u16 severity; @@ -403,14 +405,16 @@ static void cper_estatus_print_section( snprintf(newpfx, sizeof(newpfx), "%s%s", pfx, INDENT_SP); if (!uuid_le_cmp(*sec_type, CPER_SEC_PROC_GENERIC)) { - struct cper_sec_proc_generic *proc_err = (void *)(gdata + 1); + struct cper_sec_proc_generic *proc_err = acpi_hest_get_payload(gdata); + printk("%s""section_type: general processor error\n", newpfx); if (gdata->error_data_length >= sizeof(*proc_err)) cper_print_proc_generic(newpfx, proc_err); else goto err_section_too_small; } else if (!uuid_le_cmp(*sec_type, CPER_SEC_PLATFORM_MEM)) { - struct cper_sec_mem_err *mem_err = (void *)(gdata + 1); + struct cper_sec_mem_err *mem_err = acpi_hest_get_payload(gdata); + printk("%s""section_type: memory error\n", newpfx); if (gdata->error_data_length >= sizeof(struct cper_sec_mem_err_old)) @@ -419,7 +423,8 @@ static void cper_estatus_print_section( else goto err_section_too_small; } else if (!uuid_le_cmp(*sec_type, CPER_SEC_PCIE)) { - struct cper_sec_pcie *pcie = (void *)(gdata + 1); + struct cper_sec_pcie *pcie = acpi_hest_get_payload(gdata); + printk("%s""section_type: PCIe error\n", newpfx); if (gdata->error_data_length >= sizeof(*pcie)) cper_print_pcie(newpfx, pcie, gdata); @@ -438,7 +443,7 @@ void cper_estatus_print(const char *pfx, const struct acpi_hest_generic_status *estatus) { struct acpi_hest_generic_data *gdata; - unsigned int data_len, gedata_len; + unsigned int data_len; int sec_no = 0; char newpfx[64]; __u16 severity; @@ -452,11 +457,11 @@ void cper_estatus_print(const char *pfx, data_len = estatus->data_length; gdata = (struct acpi_hest_generic_data *)(estatus + 1); snprintf(newpfx, sizeof(newpfx), "%s%s", pfx, INDENT_SP); - while (data_len >= sizeof(*gdata)) { - gedata_len = gdata->error_data_length; + + while (data_len >= acpi_hest_get_size(gdata)) { cper_estatus_print_section(newpfx, gdata, sec_no); - data_len -= gedata_len + sizeof(*gdata); - gdata = (void *)(gdata + 1) + gedata_len; + data_len -= acpi_hest_get_record_size(gdata); + gdata = acpi_hest_get_next(gdata); sec_no++; } } @@ -486,12 +491,14 @@ int cper_estatus_check(const struct acpi_hest_generic_status *estatus) return rc; data_len = estatus->data_length; gdata = (struct acpi_hest_generic_data *)(estatus + 1); - while (data_len >= sizeof(*gdata)) { - gedata_len = gdata->error_data_length; - if (gedata_len > data_len - sizeof(*gdata)) + + while (data_len >= acpi_hest_get_size(gdata)) { + gedata_len = acpi_hest_get_error_length(gdata); + if (gedata_len > data_len - acpi_hest_get_size(gdata)) return -EINVAL; - data_len -= gedata_len + sizeof(*gdata); - gdata = (void *)(gdata + 1) + gedata_len; + + data_len -= acpi_hest_get_record_size(gdata); + gdata = acpi_hest_get_next(gdata); } if (data_len) return -EINVAL; diff --git a/include/acpi/ghes.h b/include/acpi/ghes.h index 68f088a92398..8f8fea004df5 100644 --- a/include/acpi/ghes.h +++ b/include/acpi/ghes.h @@ -73,3 +73,39 @@ static inline void ghes_edac_unregister(struct ghes *ghes) { } #endif + +static inline int acpi_hest_get_version(struct acpi_hest_generic_data *gdata) +{ + return gdata->revision >> 8; +} + +static inline void *acpi_hest_get_payload(struct acpi_hest_generic_data *gdata) +{ + if (acpi_hest_get_version(gdata) >= 3) + return (void *)(((struct acpi_hest_generic_data_v300 *)(gdata)) + 1); + + return gdata + 1; +} + +static inline int acpi_hest_get_error_length(struct acpi_hest_generic_data *gdata) +{ + return ((struct acpi_hest_generic_data *)(gdata))->error_data_length; +} + +static inline int acpi_hest_get_size(struct acpi_hest_generic_data *gdata) +{ + if (acpi_hest_get_version(gdata) >= 3) + return sizeof(struct acpi_hest_generic_data_v300); + + return sizeof(struct acpi_hest_generic_data); +} + +static inline int acpi_hest_get_record_size(struct acpi_hest_generic_data *gdata) +{ + return (acpi_hest_get_size(gdata) + acpi_hest_get_error_length(gdata)); +} + +static inline void *acpi_hest_get_next(struct acpi_hest_generic_data *gdata) +{ + return (void *)(gdata) + acpi_hest_get_record_size(gdata); +} -- cgit v1.2.3-59-g8ed1b From 8a94471fb73fd53589746561a2dba29e073ed829 Mon Sep 17 00:00:00 2001 From: Tyler Baicar Date: Wed, 21 Jun 2017 12:17:06 -0600 Subject: cper: add timestamp print to CPER status printing The ACPI 6.1 spec added a timestamp to the generic error data entry structure. Print the timestamp out when printing out the error information. Signed-off-by: Tyler Baicar CC: Jonathan (Zhixiong) Zhang Signed-off-by: Will Deacon --- drivers/firmware/efi/cper.c | 26 ++++++++++++++++++++++++++ 1 file changed, 26 insertions(+) diff --git a/drivers/firmware/efi/cper.c b/drivers/firmware/efi/cper.c index 902475704311..229cf9277524 100644 --- a/drivers/firmware/efi/cper.c +++ b/drivers/firmware/efi/cper.c @@ -32,6 +32,8 @@ #include #include #include +#include +#include #include #define INDENT_SP " " @@ -387,6 +389,27 @@ static void cper_print_pcie(const char *pfx, const struct cper_sec_pcie *pcie, pfx, pcie->bridge.secondary_status, pcie->bridge.control); } +static void cper_print_tstamp(const char *pfx, + struct acpi_hest_generic_data_v300 *gdata) +{ + __u8 hour, min, sec, day, mon, year, century, *timestamp; + + if (gdata->validation_bits & ACPI_HEST_GEN_VALID_TIMESTAMP) { + timestamp = (__u8 *)&(gdata->time_stamp); + sec = bcd2bin(timestamp[0]); + min = bcd2bin(timestamp[1]); + hour = bcd2bin(timestamp[2]); + day = bcd2bin(timestamp[4]); + mon = bcd2bin(timestamp[5]); + year = bcd2bin(timestamp[6]); + century = bcd2bin(timestamp[7]); + + printk("%s%ststamp: %02d%02d-%02d-%02d %02d:%02d:%02d\n", pfx, + (timestamp[3] & 0x1 ? "precise " : "imprecise "), + century, year, mon, day, hour, min, sec); + } +} + static void cper_estatus_print_section(const char *pfx, struct acpi_hest_generic_data *gdata, int sec_no) @@ -395,6 +418,9 @@ cper_estatus_print_section(const char *pfx, struct acpi_hest_generic_data *gdata __u16 severity; char newpfx[64]; + if (acpi_hest_get_version(gdata) >= 3) + cper_print_tstamp(pfx, (struct acpi_hest_generic_data_v300 *)gdata); + severity = gdata->error_severity; printk("%s""Error %d, type: %s\n", pfx, sec_no, cper_severity_str(severity)); -- cgit v1.2.3-59-g8ed1b From 2f74f09bce4f8d0236f20174a6daae63e10fe733 Mon Sep 17 00:00:00 2001 From: Tyler Baicar Date: Wed, 21 Jun 2017 12:17:07 -0600 Subject: efi: parse ARM processor error Add support for ARM Common Platform Error Record (CPER). UEFI 2.6 specification adds support for ARM specific processor error information to be reported as part of the CPER records. This provides more detail on for processor error logs. Signed-off-by: Tyler Baicar CC: Jonathan (Zhixiong) Zhang Reviewed-by: James Morse Reviewed-by: Ard Biesheuvel Signed-off-by: Will Deacon --- drivers/firmware/efi/cper.c | 129 ++++++++++++++++++++++++++++++++++++++++++++ include/linux/cper.h | 54 +++++++++++++++++++ 2 files changed, 183 insertions(+) diff --git a/drivers/firmware/efi/cper.c b/drivers/firmware/efi/cper.c index 229cf9277524..eac08548d233 100644 --- a/drivers/firmware/efi/cper.c +++ b/drivers/firmware/efi/cper.c @@ -110,12 +110,15 @@ void cper_print_bits(const char *pfx, unsigned int bits, static const char * const proc_type_strs[] = { "IA32/X64", "IA64", + "ARM", }; static const char * const proc_isa_strs[] = { "IA32", "IA64", "X64", + "ARM A32/T32", + "ARM A64", }; static const char * const proc_error_type_strs[] = { @@ -184,6 +187,122 @@ static void cper_print_proc_generic(const char *pfx, printk("%s""IP: 0x%016llx\n", pfx, proc->ip); } +#if defined(CONFIG_ARM64) || defined(CONFIG_ARM) +static const char * const arm_reg_ctx_strs[] = { + "AArch32 general purpose registers", + "AArch32 EL1 context registers", + "AArch32 EL2 context registers", + "AArch32 secure context registers", + "AArch64 general purpose registers", + "AArch64 EL1 context registers", + "AArch64 EL2 context registers", + "AArch64 EL3 context registers", + "Misc. system register structure", +}; + +static void cper_print_proc_arm(const char *pfx, + const struct cper_sec_proc_arm *proc) +{ + int i, len, max_ctx_type; + struct cper_arm_err_info *err_info; + struct cper_arm_ctx_info *ctx_info; + char newpfx[64]; + + printk("%sMIDR: 0x%016llx\n", pfx, proc->midr); + + len = proc->section_length - (sizeof(*proc) + + proc->err_info_num * (sizeof(*err_info))); + if (len < 0) { + printk("%ssection length: %d\n", pfx, proc->section_length); + printk("%ssection length is too small\n", pfx); + printk("%sfirmware-generated error record is incorrect\n", pfx); + printk("%sERR_INFO_NUM is %d\n", pfx, proc->err_info_num); + return; + } + + if (proc->validation_bits & CPER_ARM_VALID_MPIDR) + printk("%sMultiprocessor Affinity Register (MPIDR): 0x%016llx\n", + pfx, proc->mpidr); + + if (proc->validation_bits & CPER_ARM_VALID_AFFINITY_LEVEL) + printk("%serror affinity level: %d\n", pfx, + proc->affinity_level); + + if (proc->validation_bits & CPER_ARM_VALID_RUNNING_STATE) { + printk("%srunning state: 0x%x\n", pfx, proc->running_state); + printk("%sPower State Coordination Interface state: %d\n", + pfx, proc->psci_state); + } + + snprintf(newpfx, sizeof(newpfx), "%s%s", pfx, INDENT_SP); + + err_info = (struct cper_arm_err_info *)(proc + 1); + for (i = 0; i < proc->err_info_num; i++) { + printk("%sError info structure %d:\n", pfx, i); + + printk("%snum errors: %d\n", pfx, err_info->multiple_error + 1); + + if (err_info->validation_bits & CPER_ARM_INFO_VALID_FLAGS) { + if (err_info->flags & CPER_ARM_INFO_FLAGS_FIRST) + printk("%sfirst error captured\n", newpfx); + if (err_info->flags & CPER_ARM_INFO_FLAGS_LAST) + printk("%slast error captured\n", newpfx); + if (err_info->flags & CPER_ARM_INFO_FLAGS_PROPAGATED) + printk("%spropagated error captured\n", + newpfx); + if (err_info->flags & CPER_ARM_INFO_FLAGS_OVERFLOW) + printk("%soverflow occurred, error info is incomplete\n", + newpfx); + } + + printk("%serror_type: %d, %s\n", newpfx, err_info->type, + err_info->type < ARRAY_SIZE(proc_error_type_strs) ? + proc_error_type_strs[err_info->type] : "unknown"); + if (err_info->validation_bits & CPER_ARM_INFO_VALID_ERR_INFO) + printk("%serror_info: 0x%016llx\n", newpfx, + err_info->error_info); + if (err_info->validation_bits & CPER_ARM_INFO_VALID_VIRT_ADDR) + printk("%svirtual fault address: 0x%016llx\n", + newpfx, err_info->virt_fault_addr); + if (err_info->validation_bits & CPER_ARM_INFO_VALID_PHYSICAL_ADDR) + printk("%sphysical fault address: 0x%016llx\n", + newpfx, err_info->physical_fault_addr); + err_info += 1; + } + + ctx_info = (struct cper_arm_ctx_info *)err_info; + max_ctx_type = ARRAY_SIZE(arm_reg_ctx_strs) - 1; + for (i = 0; i < proc->context_info_num; i++) { + int size = sizeof(*ctx_info) + ctx_info->size; + + printk("%sContext info structure %d:\n", pfx, i); + if (len < size) { + printk("%ssection length is too small\n", newpfx); + printk("%sfirmware-generated error record is incorrect\n", pfx); + return; + } + if (ctx_info->type > max_ctx_type) { + printk("%sInvalid context type: %d (max: %d)\n", + newpfx, ctx_info->type, max_ctx_type); + return; + } + printk("%sregister context type: %s\n", newpfx, + arm_reg_ctx_strs[ctx_info->type]); + print_hex_dump(newpfx, "", DUMP_PREFIX_OFFSET, 16, 4, + (ctx_info + 1), ctx_info->size, 0); + len -= size; + ctx_info = (struct cper_arm_ctx_info *)((long)ctx_info + size); + } + + if (len > 0) { + printk("%sVendor specific error info has %u bytes:\n", pfx, + len); + print_hex_dump(newpfx, "", DUMP_PREFIX_OFFSET, 16, 4, ctx_info, + len, true); + } +} +#endif + static const char * const mem_err_type_strs[] = { "unknown", "no error", @@ -456,6 +575,16 @@ cper_estatus_print_section(const char *pfx, struct acpi_hest_generic_data *gdata cper_print_pcie(newpfx, pcie, gdata); else goto err_section_too_small; +#if defined(CONFIG_ARM64) || defined(CONFIG_ARM) + } else if (!uuid_le_cmp(*sec_type, CPER_SEC_PROC_ARM)) { + struct cper_sec_proc_arm *arm_err = acpi_hest_get_payload(gdata); + + printk("%ssection_type: ARM processor error\n", newpfx); + if (gdata->error_data_length >= sizeof(*arm_err)) + cper_print_proc_arm(newpfx, arm_err); + else + goto err_section_too_small; +#endif } else printk("%s""section type: unknown, %pUl\n", newpfx, sec_type); diff --git a/include/linux/cper.h b/include/linux/cper.h index dcacb1a72e26..4c671fc2081e 100644 --- a/include/linux/cper.h +++ b/include/linux/cper.h @@ -180,6 +180,10 @@ enum { #define CPER_SEC_PROC_IPF \ UUID_LE(0xE429FAF1, 0x3CB7, 0x11D4, 0x0B, 0xCA, 0x07, 0x00, \ 0x80, 0xC7, 0x3C, 0x88, 0x81) +/* Processor Specific: ARM */ +#define CPER_SEC_PROC_ARM \ + UUID_LE(0xE19E3D16, 0xBC11, 0x11E4, 0x9C, 0xAA, 0xC2, 0x05, \ + 0x1D, 0x5D, 0x46, 0xB0) /* Platform Memory */ #define CPER_SEC_PLATFORM_MEM \ UUID_LE(0xA5BC1114, 0x6F64, 0x4EDE, 0xB8, 0x63, 0x3E, 0x83, \ @@ -255,6 +259,22 @@ enum { #define CPER_PCIE_SLOT_SHIFT 3 +#define CPER_ARM_VALID_MPIDR BIT(0) +#define CPER_ARM_VALID_AFFINITY_LEVEL BIT(1) +#define CPER_ARM_VALID_RUNNING_STATE BIT(2) +#define CPER_ARM_VALID_VENDOR_INFO BIT(3) + +#define CPER_ARM_INFO_VALID_MULTI_ERR BIT(0) +#define CPER_ARM_INFO_VALID_FLAGS BIT(1) +#define CPER_ARM_INFO_VALID_ERR_INFO BIT(2) +#define CPER_ARM_INFO_VALID_VIRT_ADDR BIT(3) +#define CPER_ARM_INFO_VALID_PHYSICAL_ADDR BIT(4) + +#define CPER_ARM_INFO_FLAGS_FIRST BIT(0) +#define CPER_ARM_INFO_FLAGS_LAST BIT(1) +#define CPER_ARM_INFO_FLAGS_PROPAGATED BIT(2) +#define CPER_ARM_INFO_FLAGS_OVERFLOW BIT(3) + /* * All tables and structs must be byte-packed to match CPER * specification, since the tables are provided by the system BIOS @@ -340,6 +360,40 @@ struct cper_ia_proc_ctx { __u64 mm_reg_addr; }; +/* ARM Processor Error Section */ +struct cper_sec_proc_arm { + __u32 validation_bits; + __u16 err_info_num; /* Number of Processor Error Info */ + __u16 context_info_num; /* Number of Processor Context Info Records*/ + __u32 section_length; + __u8 affinity_level; + __u8 reserved[3]; /* must be zero */ + __u64 mpidr; + __u64 midr; + __u32 running_state; /* Bit 0 set - Processor running. PSCI = 0 */ + __u32 psci_state; +}; + +/* ARM Processor Error Information Structure */ +struct cper_arm_err_info { + __u8 version; + __u8 length; + __u16 validation_bits; + __u8 type; + __u16 multiple_error; + __u8 flags; + __u64 error_info; + __u64 virt_fault_addr; + __u64 physical_fault_addr; +}; + +/* ARM Processor Context Information Structure */ +struct cper_arm_ctx_info { + __u16 version; + __u16 type; + __u32 size; +}; + /* Old Memory Error Section UEFI 2.1, 2.2 */ struct cper_sec_mem_err_old { __u64 validation_bits; -- cgit v1.2.3-59-g8ed1b From af66b2d88a76574d55e81d712292abd34beb6178 Mon Sep 17 00:00:00 2001 From: Dave Martin Date: Wed, 21 Jun 2017 16:00:42 +0100 Subject: arm64: ptrace: Fix VFP register dumping in compat coredumps Currently, VFP registers are omitted from coredumps for compat processes, due to a bug in the REGSET_COMPAT_VFP regset implementation. compat_vfp_get() needs to transfer non-contiguous data from thread_struct.fpsimd_state, and uses put_user() to handle the offending trailing word (FPSCR). This fails when copying to a kernel address (i.e., kbuf && !ubuf), which is what happens when dumping core. As a result, the ELF coredump core code silently omits the NT_ARM_VFP note from the dump. It would be possible to work around this with additional special case code for the put_user(), but since user_regset_copyout() is explicitly designed to handle this scenario it is cleaner to port the put_user() to a user_regset_copyout() call, which this patch does. Signed-off-by: Dave Martin Signed-off-by: Will Deacon --- arch/arm64/kernel/ptrace.c | 11 +++++++---- 1 file changed, 7 insertions(+), 4 deletions(-) diff --git a/arch/arm64/kernel/ptrace.c b/arch/arm64/kernel/ptrace.c index c142459a88f3..0e5aaec5b751 100644 --- a/arch/arm64/kernel/ptrace.c +++ b/arch/arm64/kernel/ptrace.c @@ -894,7 +894,7 @@ static int compat_vfp_get(struct task_struct *target, { struct user_fpsimd_state *uregs; compat_ulong_t fpscr; - int ret; + int ret, vregs_end_pos; uregs = &target->thread.fpsimd_state.user_fpsimd; @@ -902,13 +902,16 @@ static int compat_vfp_get(struct task_struct *target, * The VFP registers are packed into the fpsimd_state, so they all sit * nicely together for us. We just need to create the fpscr separately. */ - ret = user_regset_copyout(&pos, &count, &kbuf, &ubuf, uregs, 0, - VFP_STATE_SIZE - sizeof(compat_ulong_t)); + vregs_end_pos = VFP_STATE_SIZE - sizeof(compat_ulong_t); + ret = user_regset_copyout(&pos, &count, &kbuf, &ubuf, uregs, + 0, vregs_end_pos); if (count && !ret) { fpscr = (uregs->fpsr & VFP_FPSCR_STAT_MASK) | (uregs->fpcr & VFP_FPSCR_CTRL_MASK); - ret = put_user(fpscr, (compat_ulong_t *)ubuf); + + ret = user_regset_copyout(&pos, &count, &kbuf, &ubuf, &fpscr, + vregs_end_pos, VFP_STATE_SIZE); } return ret; -- cgit v1.2.3-59-g8ed1b From e1d5a8fb73e6c65280c21ec188180345649ee650 Mon Sep 17 00:00:00 2001 From: Dave Martin Date: Wed, 21 Jun 2017 16:00:43 +0100 Subject: arm64: ptrace: Flush FPSIMD regs back to thread_struct before reading When reading the FPSIMD state of current (which occurs when dumping core), it is possible that userspace has modified the FPSIMD registers since the time the task was last scheduled out. Such changes are not guaranteed to be reflected immedately in thread_struct. As a result, a coredump can contain stale values for these registers. Reading the registers of a stopped task via ptrace is unaffected. This patch explicitly flushes the CPU state back to thread_struct before dumping when operating on current, thus ensuring that coredump contents are up to date. Signed-off-by: Dave Martin Signed-off-by: Will Deacon --- arch/arm64/kernel/ptrace.c | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/arch/arm64/kernel/ptrace.c b/arch/arm64/kernel/ptrace.c index 0e5aaec5b751..eeef01a219a6 100644 --- a/arch/arm64/kernel/ptrace.c +++ b/arch/arm64/kernel/ptrace.c @@ -623,6 +623,10 @@ static int fpr_get(struct task_struct *target, const struct user_regset *regset, { struct user_fpsimd_state *uregs; uregs = &target->thread.fpsimd_state.user_fpsimd; + + if (target == current) + fpsimd_preserve_current_state(); + return user_regset_copyout(&pos, &count, &kbuf, &ubuf, uregs, 0, -1); } @@ -898,6 +902,9 @@ static int compat_vfp_get(struct task_struct *target, uregs = &target->thread.fpsimd_state.user_fpsimd; + if (target == current) + fpsimd_preserve_current_state(); + /* * The VFP registers are packed into the fpsimd_state, so they all sit * nicely together for us. We just need to create the fpscr separately. -- cgit v1.2.3-59-g8ed1b From 936eb65ca22ad856cb3a995e8cd742e982dc2dd0 Mon Sep 17 00:00:00 2001 From: Dave Martin Date: Wed, 21 Jun 2017 16:00:44 +0100 Subject: arm64: ptrace: Flush user-RW TLS reg to thread_struct before reading When reading current's user-writable TLS register (which occurs when dumping core for native tasks), it is possible that userspace has modified it since the time the task was last scheduled out. The new TLS register value is not guaranteed to have been written immediately back to thread_struct in this case. As a result, a coredump can capture stale data for this register. Reading the register for a stopped task via ptrace is unaffected. For native tasks, this patch explicitly flushes the TPIDR_EL0 register back to thread_struct before dumping when operating on current, thus ensuring that coredump contents are up to date. For compat tasks, the TLS register is not user-writable and so cannot be out of sync, so no flush is required in compat_tls_get(). Signed-off-by: Dave Martin Signed-off-by: Will Deacon --- arch/arm64/include/asm/processor.h | 3 +++ arch/arm64/kernel/process.c | 8 ++++++-- arch/arm64/kernel/ptrace.c | 4 ++++ 3 files changed, 13 insertions(+), 2 deletions(-) diff --git a/arch/arm64/include/asm/processor.h b/arch/arm64/include/asm/processor.h index 9428b93fefb2..64c9e78f9882 100644 --- a/arch/arm64/include/asm/processor.h +++ b/arch/arm64/include/asm/processor.h @@ -104,6 +104,9 @@ struct thread_struct { #define task_user_tls(t) (&(t)->thread.tp_value) #endif +/* Sync TPIDR_EL0 back to thread_struct for current */ +void tls_preserve_current_state(void); + #define INIT_THREAD { } static inline void start_thread_common(struct pt_regs *regs, unsigned long pc) diff --git a/arch/arm64/kernel/process.c b/arch/arm64/kernel/process.c index af1ea258c212..659ae8094ed5 100644 --- a/arch/arm64/kernel/process.c +++ b/arch/arm64/kernel/process.c @@ -298,12 +298,16 @@ int copy_thread(unsigned long clone_flags, unsigned long stack_start, return 0; } +void tls_preserve_current_state(void) +{ + *task_user_tls(current) = read_sysreg(tpidr_el0); +} + static void tls_thread_switch(struct task_struct *next) { unsigned long tpidr, tpidrro; - tpidr = read_sysreg(tpidr_el0); - *task_user_tls(current) = tpidr; + tls_preserve_current_state(); tpidr = *task_user_tls(next); tpidrro = is_compat_thread(task_thread_info(next)) ? diff --git a/arch/arm64/kernel/ptrace.c b/arch/arm64/kernel/ptrace.c index eeef01a219a6..35846f1550db 100644 --- a/arch/arm64/kernel/ptrace.c +++ b/arch/arm64/kernel/ptrace.c @@ -652,6 +652,10 @@ static int tls_get(struct task_struct *target, const struct user_regset *regset, void *kbuf, void __user *ubuf) { unsigned long *tls = &target->thread.tp_value; + + if (target == current) + tls_preserve_current_state(); + return user_regset_copyout(&pos, &count, &kbuf, &ubuf, tls, 0, -1); } -- cgit v1.2.3-59-g8ed1b From 8effeaaf2cacc8a8007d3089e253e7baaff57bb7 Mon Sep 17 00:00:00 2001 From: Mark Rutland Date: Wed, 21 Jun 2017 18:11:23 +0100 Subject: arm64: dump cpu_hwcaps at panic time When debugging a kernel panic(), it can be useful to know which CPU features have been detected by the kernel, as some code paths can depend on these (and may have been patched at runtime). This patch adds a notifier to dump the detected CPU caps (as a hex string) at panic(), when we log other information useful for debugging. On a Juno R1 system running v4.12-rc5, this looks like: [ 615.431249] Kernel panic - not syncing: Fatal exception in interrupt [ 615.437609] SMP: stopping secondary CPUs [ 615.441872] Kernel Offset: disabled [ 615.445372] CPU features: 0x02086 [ 615.448522] Memory Limit: none A developer can decode this by looking at the corresponding bits. For example, the above decodes as: * bit 1: ARM64_WORKAROUND_DEVICE_LOAD_ACQUIRE * bit 2: ARM64_WORKAROUND_845719 * bit 7: ARM64_WORKAROUND_834220 * bit 13: ARM64_HAS_32BIT_EL0 Signed-off-by: Mark Rutland Acked-by: Steve Capper Cc: Catalin Marinas Cc: Suzuki K Poulose Cc: Will Deacon Signed-off-by: Will Deacon --- arch/arm64/kernel/cpufeature.c | 19 +++++++++++++++++++ 1 file changed, 19 insertions(+) diff --git a/arch/arm64/kernel/cpufeature.c b/arch/arm64/kernel/cpufeature.c index 55d5c72a507d..9f9e0064c8c1 100644 --- a/arch/arm64/kernel/cpufeature.c +++ b/arch/arm64/kernel/cpufeature.c @@ -51,6 +51,25 @@ unsigned int compat_elf_hwcap2 __read_mostly; DECLARE_BITMAP(cpu_hwcaps, ARM64_NCAPS); EXPORT_SYMBOL(cpu_hwcaps); +static int dump_cpu_hwcaps(struct notifier_block *self, unsigned long v, void *p) +{ + /* file-wide pr_fmt adds "CPU features: " prefix */ + pr_emerg("0x%*pb\n", ARM64_NCAPS, &cpu_hwcaps); + return 0; +} + +static struct notifier_block cpu_hwcaps_notifier = { + .notifier_call = dump_cpu_hwcaps +}; + +static int __init register_cpu_hwcaps_dumper(void) +{ + atomic_notifier_chain_register(&panic_notifier_list, + &cpu_hwcaps_notifier); + return 0; +} +__initcall(register_cpu_hwcaps_dumper); + DEFINE_STATIC_KEY_ARRAY_FALSE(cpu_hwcap_keys, ARM64_NCAPS); EXPORT_SYMBOL(cpu_hwcap_keys); -- cgit v1.2.3-59-g8ed1b From 32015c235603a209697d1228667b1fb1cc8a8d06 Mon Sep 17 00:00:00 2001 From: Tyler Baicar Date: Wed, 21 Jun 2017 12:17:08 -0600 Subject: arm64: exception: handle Synchronous External Abort SEA exceptions are often caused by an uncorrected hardware error, and are handled when data abort and instruction abort exception classes have specific values for their Fault Status Code. When SEA occurs, before killing the process, report the error in the kernel logs. Update fault_info[] with specific SEA faults so that the new SEA handler is used. Signed-off-by: Tyler Baicar CC: Jonathan (Zhixiong) Zhang Reviewed-by: James Morse Acked-by: Catalin Marinas [will: use NULL instead of 0 when assigning si_addr] Signed-off-by: Will Deacon --- arch/arm64/include/asm/esr.h | 1 + arch/arm64/mm/fault.c | 45 ++++++++++++++++++++++++++++++++++---------- 2 files changed, 36 insertions(+), 10 deletions(-) diff --git a/arch/arm64/include/asm/esr.h b/arch/arm64/include/asm/esr.h index 85997c0e5443..28bf02efce76 100644 --- a/arch/arm64/include/asm/esr.h +++ b/arch/arm64/include/asm/esr.h @@ -83,6 +83,7 @@ #define ESR_ELx_WNR (UL(1) << 6) /* Shared ISS field definitions for Data/Instruction aborts */ +#define ESR_ELx_FnV (UL(1) << 10) #define ESR_ELx_EA (UL(1) << 9) #define ESR_ELx_S1PTW (UL(1) << 7) diff --git a/arch/arm64/mm/fault.c b/arch/arm64/mm/fault.c index 37b95dff0b07..246e64f60610 100644 --- a/arch/arm64/mm/fault.c +++ b/arch/arm64/mm/fault.c @@ -522,6 +522,31 @@ static int do_bad(unsigned long addr, unsigned int esr, struct pt_regs *regs) return 1; } +/* + * This abort handler deals with Synchronous External Abort. + * It calls notifiers, and then returns "fault". + */ +static int do_sea(unsigned long addr, unsigned int esr, struct pt_regs *regs) +{ + struct siginfo info; + const struct fault_info *inf; + + inf = esr_to_fault_info(esr); + pr_err("Synchronous External Abort: %s (0x%08x) at 0x%016lx\n", + inf->name, esr, addr); + + info.si_signo = SIGBUS; + info.si_errno = 0; + info.si_code = 0; + if (esr & ESR_ELx_FnV) + info.si_addr = NULL; + else + info.si_addr = (void __user *)addr; + arm64_notify_die("", regs, &info, esr); + + return 0; +} + static const struct fault_info fault_info[] = { { do_bad, SIGBUS, 0, "ttbr address size fault" }, { do_bad, SIGBUS, 0, "level 1 address size fault" }, @@ -539,22 +564,22 @@ static const struct fault_info fault_info[] = { { do_page_fault, SIGSEGV, SEGV_ACCERR, "level 1 permission fault" }, { do_page_fault, SIGSEGV, SEGV_ACCERR, "level 2 permission fault" }, { do_page_fault, SIGSEGV, SEGV_ACCERR, "level 3 permission fault" }, - { do_bad, SIGBUS, 0, "synchronous external abort" }, + { do_sea, SIGBUS, 0, "synchronous external abort" }, { do_bad, SIGBUS, 0, "unknown 17" }, { do_bad, SIGBUS, 0, "unknown 18" }, { do_bad, SIGBUS, 0, "unknown 19" }, - { do_bad, SIGBUS, 0, "synchronous external abort (translation table walk)" }, - { do_bad, SIGBUS, 0, "synchronous external abort (translation table walk)" }, - { do_bad, SIGBUS, 0, "synchronous external abort (translation table walk)" }, - { do_bad, SIGBUS, 0, "synchronous external abort (translation table walk)" }, - { do_bad, SIGBUS, 0, "synchronous parity error" }, + { do_sea, SIGBUS, 0, "level 0 (translation table walk)" }, + { do_sea, SIGBUS, 0, "level 1 (translation table walk)" }, + { do_sea, SIGBUS, 0, "level 2 (translation table walk)" }, + { do_sea, SIGBUS, 0, "level 3 (translation table walk)" }, + { do_sea, SIGBUS, 0, "synchronous parity or ECC error" }, { do_bad, SIGBUS, 0, "unknown 25" }, { do_bad, SIGBUS, 0, "unknown 26" }, { do_bad, SIGBUS, 0, "unknown 27" }, - { do_bad, SIGBUS, 0, "synchronous parity error (translation table walk)" }, - { do_bad, SIGBUS, 0, "synchronous parity error (translation table walk)" }, - { do_bad, SIGBUS, 0, "synchronous parity error (translation table walk)" }, - { do_bad, SIGBUS, 0, "synchronous parity error (translation table walk)" }, + { do_sea, SIGBUS, 0, "level 0 synchronous parity error (translation table walk)" }, + { do_sea, SIGBUS, 0, "level 1 synchronous parity error (translation table walk)" }, + { do_sea, SIGBUS, 0, "level 2 synchronous parity error (translation table walk)" }, + { do_sea, SIGBUS, 0, "level 3 synchronous parity error (translation table walk)" }, { do_bad, SIGBUS, 0, "unknown 32" }, { do_alignment_fault, SIGBUS, BUS_ADRALN, "alignment fault" }, { do_bad, SIGBUS, 0, "unknown 34" }, -- cgit v1.2.3-59-g8ed1b From 7edda0886bc3d1e5418951558a2555af1bc73b0a Mon Sep 17 00:00:00 2001 From: Tyler Baicar Date: Wed, 21 Jun 2017 12:17:09 -0600 Subject: acpi: apei: handle SEA notification type for ARMv8 ARM APEI extension proposal added SEA (Synchronous External Abort) notification type for ARMv8. Add a new GHES error source handling function for SEA. If an error source's notification type is SEA, then this function can be registered into the SEA exception handler. That way GHES will parse and report SEA exceptions when they occur. An SEA can interrupt code that had interrupts masked and is treated as an NMI. To aid this the page of address space for mapping APEI buffers while in_nmi() is always reserved, and ghes_ioremap_pfn_nmi() is changed to use the helper methods to find the prot_t to map with in the same way as ghes_ioremap_pfn_irq(). Signed-off-by: Tyler Baicar CC: Jonathan (Zhixiong) Zhang Reviewed-by: James Morse Acked-by: Catalin Marinas Signed-off-by: Will Deacon --- arch/arm64/Kconfig | 2 ++ arch/arm64/mm/fault.c | 17 ++++++++++++ drivers/acpi/apei/Kconfig | 15 ++++++++++ drivers/acpi/apei/ghes.c | 70 +++++++++++++++++++++++++++++++++++++++++++---- include/acpi/ghes.h | 7 +++++ 5 files changed, 105 insertions(+), 6 deletions(-) diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig index 3dcd7ec69bca..80559977a83e 100644 --- a/arch/arm64/Kconfig +++ b/arch/arm64/Kconfig @@ -19,6 +19,7 @@ config ARM64 select ARCH_HAS_STRICT_KERNEL_RWX select ARCH_HAS_STRICT_MODULE_RWX select ARCH_HAS_TICK_BROADCAST if GENERIC_CLOCKEVENTS_BROADCAST + select ARCH_HAVE_NMI_SAFE_CMPXCHG if ACPI_APEI_SEA select ARCH_USE_CMPXCHG_LOCKREF select ARCH_SUPPORTS_ATOMIC_RMW select ARCH_SUPPORTS_NUMA_BALANCING @@ -92,6 +93,7 @@ config ARM64 select HAVE_IRQ_TIME_ACCOUNTING select HAVE_MEMBLOCK select HAVE_MEMBLOCK_NODE_MAP if NUMA + select HAVE_NMI if ACPI_APEI_SEA select HAVE_PATA_PLATFORM select HAVE_PERF_EVENTS select HAVE_PERF_REGS diff --git a/arch/arm64/mm/fault.c b/arch/arm64/mm/fault.c index 246e64f60610..07481af15fd7 100644 --- a/arch/arm64/mm/fault.c +++ b/arch/arm64/mm/fault.c @@ -42,6 +42,8 @@ #include #include +#include + struct fault_info { int (*fn)(unsigned long addr, unsigned int esr, struct pt_regs *regs); @@ -535,6 +537,21 @@ static int do_sea(unsigned long addr, unsigned int esr, struct pt_regs *regs) pr_err("Synchronous External Abort: %s (0x%08x) at 0x%016lx\n", inf->name, esr, addr); + /* + * Synchronous aborts may interrupt code which had interrupts masked. + * Before calling out into the wider kernel tell the interested + * subsystems. + */ + if (IS_ENABLED(CONFIG_ACPI_APEI_SEA)) { + if (interrupts_enabled(regs)) + nmi_enter(); + + ghes_notify_sea(); + + if (interrupts_enabled(regs)) + nmi_exit(); + } + info.si_signo = SIGBUS; info.si_errno = 0; info.si_code = 0; diff --git a/drivers/acpi/apei/Kconfig b/drivers/acpi/apei/Kconfig index b0140c8fc733..de14d49a5c90 100644 --- a/drivers/acpi/apei/Kconfig +++ b/drivers/acpi/apei/Kconfig @@ -39,6 +39,21 @@ config ACPI_APEI_PCIEAER PCIe AER errors may be reported via APEI firmware first mode. Turn on this option to enable the corresponding support. +config ACPI_APEI_SEA + bool "APEI Synchronous External Abort logging/recovering support" + depends on ARM64 && ACPI_APEI_GHES + default y + help + This option should be enabled if the system supports + firmware first handling of SEA (Synchronous External Abort). + SEA happens with certain faults of data abort or instruction + abort synchronous exceptions on ARMv8 systems. If a system + supports firmware first handling of SEA, the platform analyzes + and handles hardware error notifications from SEA, and it may then + form a HW error record for the OS to parse and handle. This + option allows the OS to look for such hardware error record, and + take appropriate action. + config ACPI_APEI_MEMORY_FAILURE bool "APEI memory error recovering support" depends on ACPI_APEI && MEMORY_FAILURE diff --git a/drivers/acpi/apei/ghes.c b/drivers/acpi/apei/ghes.c index 81e06e3cb08f..42ddc0eff25b 100644 --- a/drivers/acpi/apei/ghes.c +++ b/drivers/acpi/apei/ghes.c @@ -116,11 +116,7 @@ static DEFINE_MUTEX(ghes_list_mutex); * Two virtual pages are used, one for IRQ/PROCESS context, the other for * NMI context (optionally). */ -#ifdef CONFIG_HAVE_ACPI_APEI_NMI #define GHES_IOREMAP_PAGES 2 -#else -#define GHES_IOREMAP_PAGES 1 -#endif #define GHES_IOREMAP_IRQ_PAGE(base) (base) #define GHES_IOREMAP_NMI_PAGE(base) ((base) + PAGE_SIZE) @@ -159,10 +155,14 @@ static void ghes_ioremap_exit(void) static void __iomem *ghes_ioremap_pfn_nmi(u64 pfn) { unsigned long vaddr; + phys_addr_t paddr; + pgprot_t prot; vaddr = (unsigned long)GHES_IOREMAP_NMI_PAGE(ghes_ioremap_area->addr); - ioremap_page_range(vaddr, vaddr + PAGE_SIZE, - pfn << PAGE_SHIFT, PAGE_KERNEL); + + paddr = pfn << PAGE_SHIFT; + prot = arch_apei_get_mem_attribute(paddr); + ioremap_page_range(vaddr, vaddr + PAGE_SIZE, paddr, prot); return (void __iomem *)vaddr; } @@ -774,6 +774,50 @@ static struct notifier_block ghes_notifier_sci = { .notifier_call = ghes_notify_sci, }; +#ifdef CONFIG_ACPI_APEI_SEA +static LIST_HEAD(ghes_sea); + +void ghes_notify_sea(void) +{ + struct ghes *ghes; + + /* + * synchronize_rcu() will wait for nmi_exit(), so no need to + * rcu_read_lock(). + */ + list_for_each_entry_rcu(ghes, &ghes_sea, list) { + ghes_proc(ghes); + } +} + +static void ghes_sea_add(struct ghes *ghes) +{ + mutex_lock(&ghes_list_mutex); + list_add_rcu(&ghes->list, &ghes_sea); + mutex_unlock(&ghes_list_mutex); +} + +static void ghes_sea_remove(struct ghes *ghes) +{ + mutex_lock(&ghes_list_mutex); + list_del_rcu(&ghes->list); + mutex_unlock(&ghes_list_mutex); + synchronize_rcu(); +} +#else /* CONFIG_ACPI_APEI_SEA */ +static inline void ghes_sea_add(struct ghes *ghes) +{ + pr_err(GHES_PFX "ID: %d, trying to add SEA notification which is not supported\n", + ghes->generic->header.source_id); +} + +static inline void ghes_sea_remove(struct ghes *ghes) +{ + pr_err(GHES_PFX "ID: %d, trying to remove SEA notification which is not supported\n", + ghes->generic->header.source_id); +} +#endif /* CONFIG_ACPI_APEI_SEA */ + #ifdef CONFIG_HAVE_ACPI_APEI_NMI /* * printk is not safe in NMI context. So in NMI handler, we allocate @@ -1019,6 +1063,14 @@ static int ghes_probe(struct platform_device *ghes_dev) case ACPI_HEST_NOTIFY_EXTERNAL: case ACPI_HEST_NOTIFY_SCI: break; + case ACPI_HEST_NOTIFY_SEA: + if (!IS_ENABLED(CONFIG_ACPI_APEI_SEA)) { + pr_warn(GHES_PFX "Generic hardware error source: %d notified via SEA is not supported\n", + generic->header.source_id); + rc = -ENOTSUPP; + goto err; + } + break; case ACPI_HEST_NOTIFY_NMI: if (!IS_ENABLED(CONFIG_HAVE_ACPI_APEI_NMI)) { pr_warn(GHES_PFX "Generic hardware error source: %d notified via NMI interrupt is not supported!\n", @@ -1083,6 +1135,9 @@ static int ghes_probe(struct platform_device *ghes_dev) list_add_rcu(&ghes->list, &ghes_sci); mutex_unlock(&ghes_list_mutex); break; + case ACPI_HEST_NOTIFY_SEA: + ghes_sea_add(ghes); + break; case ACPI_HEST_NOTIFY_NMI: ghes_nmi_add(ghes); break; @@ -1126,6 +1181,9 @@ static int ghes_remove(struct platform_device *ghes_dev) mutex_unlock(&ghes_list_mutex); synchronize_rcu(); break; + case ACPI_HEST_NOTIFY_SEA: + ghes_sea_remove(ghes); + break; case ACPI_HEST_NOTIFY_NMI: ghes_nmi_remove(ghes); break; diff --git a/include/acpi/ghes.h b/include/acpi/ghes.h index 8f8fea004df5..95305aeb13ff 100644 --- a/include/acpi/ghes.h +++ b/include/acpi/ghes.h @@ -1,3 +1,6 @@ +#ifndef GHES_H +#define GHES_H + #include #include @@ -109,3 +112,7 @@ static inline void *acpi_hest_get_next(struct acpi_hest_generic_data *gdata) { return (void *)(gdata) + acpi_hest_get_record_size(gdata); } + +void ghes_notify_sea(void); + +#endif /* GHES_H */ -- cgit v1.2.3-59-g8ed1b From 2fb5853e4442334cb66fc2ab33a51c91d4434769 Mon Sep 17 00:00:00 2001 From: "Jonathan (Zhixiong) Zhang" Date: Wed, 21 Jun 2017 12:17:10 -0600 Subject: acpi: apei: panic OS with fatal error status block Even if an error status block's severity is fatal, the kernel does not honor the severity level and panic. With the firmware first model, the platform could inform the OS about a fatal hardware error through the non-NMI GHES notification type. The OS should panic when a hardware error record is received with this severity. Call panic() after CPER data in error status block is printed if severity is fatal, before each error section is handled. Signed-off-by: Jonathan (Zhixiong) Zhang Signed-off-by: Tyler Baicar Reviewed-by: James Morse Signed-off-by: Will Deacon --- drivers/acpi/apei/ghes.c | 36 +++++++++++++++++++++--------------- 1 file changed, 21 insertions(+), 15 deletions(-) diff --git a/drivers/acpi/apei/ghes.c b/drivers/acpi/apei/ghes.c index 42ddc0eff25b..7a91ac7d6b75 100644 --- a/drivers/acpi/apei/ghes.c +++ b/drivers/acpi/apei/ghes.c @@ -135,6 +135,8 @@ static unsigned long ghes_estatus_pool_size_request; static struct ghes_estatus_cache *ghes_estatus_caches[GHES_ESTATUS_CACHES_SIZE]; static atomic_t ghes_estatus_cache_alloced; +static int ghes_panic_timeout __read_mostly = 30; + static int ghes_ioremap_init(void) { ghes_ioremap_area = __get_vm_area(PAGE_SIZE * GHES_IOREMAP_PAGES, @@ -691,6 +693,16 @@ static int ghes_ack_error(struct acpi_hest_generic_v2 *gv2) return apei_write(val, &gv2->read_ack_register); } +static void __ghes_panic(struct ghes *ghes) +{ + __ghes_print_estatus(KERN_EMERG, ghes->generic, ghes->estatus); + + /* reboot to log the error! */ + if (!panic_timeout) + panic_timeout = ghes_panic_timeout; + panic("Fatal hardware error!"); +} + static int ghes_proc(struct ghes *ghes) { int rc; @@ -698,6 +710,11 @@ static int ghes_proc(struct ghes *ghes) rc = ghes_read_estatus(ghes, 0); if (rc) goto out; + + if (ghes_severity(ghes->estatus->error_severity) >= GHES_SEV_PANIC) { + __ghes_panic(ghes); + } + if (!ghes_estatus_cached(ghes->estatus)) { if (ghes_print_estatus(NULL, ghes->generic, ghes->estatus)) ghes_estatus_cache_add(ghes->generic, ghes->estatus); @@ -838,8 +855,6 @@ static atomic_t ghes_in_nmi = ATOMIC_INIT(0); static LIST_HEAD(ghes_nmi); -static int ghes_panic_timeout __read_mostly = 30; - static void ghes_proc_in_irq(struct irq_work *irq_work) { struct llist_node *llnode, *next; @@ -925,18 +940,6 @@ static void __process_error(struct ghes *ghes) #endif } -static void __ghes_panic(struct ghes *ghes) -{ - oops_begin(); - ghes_print_queued_estatus(); - __ghes_print_estatus(KERN_EMERG, ghes->generic, ghes->estatus); - - /* reboot to log the error! */ - if (panic_timeout == 0) - panic_timeout = ghes_panic_timeout; - panic("Fatal hardware error!"); -} - static int ghes_notify_nmi(unsigned int cmd, struct pt_regs *regs) { struct ghes *ghes; @@ -954,8 +957,11 @@ static int ghes_notify_nmi(unsigned int cmd, struct pt_regs *regs) } sev = ghes_severity(ghes->estatus->error_severity); - if (sev >= GHES_SEV_PANIC) + if (sev >= GHES_SEV_PANIC) { + oops_begin(); + ghes_print_queued_estatus(); __ghes_panic(ghes); + } if (!(ghes->flags & GHES_TO_CLEAR)) continue; -- cgit v1.2.3-59-g8ed1b From 0fc300f414519b10c146fc3329a1b3094e4b6d52 Mon Sep 17 00:00:00 2001 From: Tyler Baicar Date: Wed, 21 Jun 2017 12:17:11 -0600 Subject: efi: print unrecognized CPER section UEFI spec allows for non-standard section in Common Platform Error Record. This is defined in section N.2.3 of UEFI version 2.5. Currently if the CPER section's type (UUID) does not match with one of the section types that the kernel knows how to parse, the section is skipped. Therefore, user is not able to see such CPER data, for instance, error record of non-standard section. This change prints out the raw data in hex in the dmesg buffer so that non-standard sections are reported to the user. Non-standard section type errors should be reported to the user because these can include errors which are vendor specific. The data length is taken from Error Data length field of Generic Error Data Entry. The following is a sample output from dmesg: Hardware error from APEI Generic Hardware Error Source: 2 It has been corrected by h/w and requires no further action event severity: corrected time: precise 2017-03-15 20:37:35 Error 0, type: corrected section type: unknown, d2e2621c-f936-468d-0d84-15a4ed015c8b section length: 0x238 00000000: 4d415201 4d492031 453a4d45 435f4343 .RAM1 IMEM:ECC_C 00000010: 53515f45 44525f42 00000000 00000000 E_QSB_RD........ 00000020: 00000000 00000000 00000000 00000000 ................ 00000030: 00000000 00000000 01010000 01010000 ................ 00000040: 00000000 00000000 00000005 00000000 ................ 00000050: 01010000 00000000 00000001 00dddd00 ................ ... The raw data from the error can then be decoded using vendor specific tools. Signed-off-by: Tyler Baicar CC: Jonathan (Zhixiong) Zhang Reviewed-by: James Morse Signed-off-by: Will Deacon --- drivers/firmware/efi/cper.c | 11 +++++++++-- 1 file changed, 9 insertions(+), 2 deletions(-) diff --git a/drivers/firmware/efi/cper.c b/drivers/firmware/efi/cper.c index eac08548d233..d5a5855906d6 100644 --- a/drivers/firmware/efi/cper.c +++ b/drivers/firmware/efi/cper.c @@ -585,8 +585,15 @@ cper_estatus_print_section(const char *pfx, struct acpi_hest_generic_data *gdata else goto err_section_too_small; #endif - } else - printk("%s""section type: unknown, %pUl\n", newpfx, sec_type); + } else { + const void *err = acpi_hest_get_payload(gdata); + + printk("%ssection type: unknown, %pUl\n", newpfx, sec_type); + printk("%ssection length: %#x\n", newpfx, + gdata->error_data_length); + print_hex_dump(newpfx, "", DUMP_PREFIX_OFFSET, 16, 4, err, + gdata->error_data_length, true); + } return; -- cgit v1.2.3-59-g8ed1b From 297b64c74385fc7ea5dfff66105ab6465f2df49a Mon Sep 17 00:00:00 2001 From: Tyler Baicar Date: Wed, 21 Jun 2017 12:17:12 -0600 Subject: ras: acpi / apei: generate trace event for unrecognized CPER section The UEFI spec includes non-standard section type support in the Common Platform Error Record. This is defined in section N.2.3 of UEFI version 2.5. Currently if the CPER section's type (UUID) does not match any section type that the kernel knows how to parse, a trace event is not generated. Generate a trace event which contains the raw error data for non-standard section type error records. Signed-off-by: Tyler Baicar CC: Jonathan (Zhixiong) Zhang Tested-by: Shiju Jose Signed-off-by: Will Deacon --- drivers/acpi/apei/ghes.c | 18 ++++++++++++++++++ drivers/ras/ras.c | 10 +++++++++- include/linux/ras.h | 12 ++++++++++++ include/linux/uuid.h | 4 +++- include/ras/ras_event.h | 45 +++++++++++++++++++++++++++++++++++++++++++++ 5 files changed, 87 insertions(+), 2 deletions(-) diff --git a/drivers/acpi/apei/ghes.c b/drivers/acpi/apei/ghes.c index 7a91ac7d6b75..ab36ad628c68 100644 --- a/drivers/acpi/apei/ghes.c +++ b/drivers/acpi/apei/ghes.c @@ -45,11 +45,14 @@ #include #include #include +#include +#include #include #include #include #include +#include #include "apei-internal.h" @@ -461,11 +464,19 @@ static void ghes_do_proc(struct ghes *ghes, int sev, sec_sev; struct acpi_hest_generic_data *gdata; guid_t *sec_type; + guid_t *fru_id = &NULL_UUID_LE; + char *fru_text = ""; sev = ghes_severity(estatus->error_severity); apei_estatus_for_each_section(estatus, gdata) { sec_type = (guid_t *)gdata->section_type; sec_sev = ghes_severity(gdata->error_severity); + if (gdata->validation_bits & CPER_SEC_VALID_FRU_ID) + fru_id = (guid_t *)gdata->fru_id; + + if (gdata->validation_bits & CPER_SEC_VALID_FRU_TEXT) + fru_text = gdata->fru_text; + if (guid_equal(sec_type, &CPER_SEC_PLATFORM_MEM)) { struct cper_sec_mem_err *mem_err = acpi_hest_get_payload(gdata); @@ -506,6 +517,13 @@ static void ghes_do_proc(struct ghes *ghes, } #endif + else { + void *err = acpi_hest_get_payload(gdata); + + log_non_standard_event(sec_type, fru_id, fru_text, + sec_sev, err, + gdata->error_data_length); + } } } diff --git a/drivers/ras/ras.c b/drivers/ras/ras.c index 94f8038864b4..e87fd9e32ee2 100644 --- a/drivers/ras/ras.c +++ b/drivers/ras/ras.c @@ -7,11 +7,19 @@ #include #include +#include #define CREATE_TRACE_POINTS #define TRACE_INCLUDE_PATH ../../include/ras #include +void log_non_standard_event(const uuid_le *sec_type, const uuid_le *fru_id, + const char *fru_text, const u8 sev, const u8 *err, + const u32 len) +{ + trace_non_standard_event(sec_type, fru_id, fru_text, sev, err, len); +} + static int __init ras_init(void) { int rc = 0; @@ -27,7 +35,7 @@ subsys_initcall(ras_init); EXPORT_TRACEPOINT_SYMBOL_GPL(extlog_mem_event); #endif EXPORT_TRACEPOINT_SYMBOL_GPL(mc_event); - +EXPORT_TRACEPOINT_SYMBOL_GPL(non_standard_event); int __init parse_ras_param(char *str) { diff --git a/include/linux/ras.h b/include/linux/ras.h index ffb147185e8d..62fac3042dce 100644 --- a/include/linux/ras.h +++ b/include/linux/ras.h @@ -2,6 +2,7 @@ #define __RAS_H__ #include +#include #ifdef CONFIG_DEBUG_FS int ras_userspace_consumers(void); @@ -22,4 +23,15 @@ static inline void __init cec_init(void) { } static inline int cec_add_elem(u64 pfn) { return -ENODEV; } #endif +#ifdef CONFIG_RAS +void log_non_standard_event(const guid_t *sec_type, + const guid_t *fru_id, const char *fru_text, + const u8 sev, const u8 *err, const u32 len); +#else +static void log_non_standard_event(const guid_t *sec_type, + const guid_t *fru_id, const char *fru_text, + const u8 sev, const u8 *err, + const u32 len) { return; } +#endif + #endif /* __RAS_H__ */ diff --git a/include/linux/uuid.h b/include/linux/uuid.h index 75f7182d5360..61641faca38b 100644 --- a/include/linux/uuid.h +++ b/include/linux/uuid.h @@ -18,8 +18,10 @@ #include +#define UUID_SIZE 16 + typedef struct { - __u8 b[16]; + __u8 b[UUID_SIZE]; } uuid_t; #define UUID_INIT(a, b, c, d0, d1, d2, d3, d4, d5, d6, d7) \ diff --git a/include/ras/ras_event.h b/include/ras/ras_event.h index 1791a12cfa85..4f79ba94fa6b 100644 --- a/include/ras/ras_event.h +++ b/include/ras/ras_event.h @@ -161,6 +161,51 @@ TRACE_EVENT(mc_event, __get_str(driver_detail)) ); +/* + * Non-Standard Section Report + * + * This event is generated when hardware detected a hardware + * error event, which may be of non-standard section as defined + * in UEFI spec appendix "Common Platform Error Record", or may + * be of sections for which TRACE_EVENT is not defined. + * + */ +TRACE_EVENT(non_standard_event, + + TP_PROTO(const uuid_le *sec_type, + const uuid_le *fru_id, + const char *fru_text, + const u8 sev, + const u8 *err, + const u32 len), + + TP_ARGS(sec_type, fru_id, fru_text, sev, err, len), + + TP_STRUCT__entry( + __array(char, sec_type, UUID_SIZE) + __array(char, fru_id, UUID_SIZE) + __string(fru_text, fru_text) + __field(u8, sev) + __field(u32, len) + __dynamic_array(u8, buf, len) + ), + + TP_fast_assign( + memcpy(__entry->sec_type, sec_type, UUID_SIZE); + memcpy(__entry->fru_id, fru_id, UUID_SIZE); + __assign_str(fru_text, fru_text); + __entry->sev = sev; + __entry->len = len; + memcpy(__get_dynamic_array(buf), err, len); + ), + + TP_printk("severity: %d; sec type:%pU; FRU: %pU %s; data len:%d; raw data:%s", + __entry->sev, __entry->sec_type, + __entry->fru_id, __get_str(fru_text), + __entry->len, + __print_hex(__get_dynamic_array(buf), __entry->len)) +); + /* * PCIe AER Trace event * -- cgit v1.2.3-59-g8ed1b From e9279e83ad1f4b5af541a522a81888f828210b40 Mon Sep 17 00:00:00 2001 From: Tyler Baicar Date: Wed, 21 Jun 2017 12:17:13 -0600 Subject: trace, ras: add ARM processor error trace event Currently there are trace events for the various RAS errors with the exception of ARM processor type errors. Add a new trace event for such errors so that the user will know when they occur. These trace events are consistent with the ARM processor error section type defined in UEFI 2.6 spec section N.2.4.4. Signed-off-by: Tyler Baicar Acked-by: Steven Rostedt Reviewed-by: Xie XiuQi Signed-off-by: Will Deacon --- drivers/acpi/apei/ghes.c | 6 +++++- drivers/firmware/efi/cper.c | 1 + drivers/ras/ras.c | 6 ++++++ include/linux/ras.h | 3 +++ include/ras/ras_event.h | 45 +++++++++++++++++++++++++++++++++++++++++++++ 5 files changed, 60 insertions(+), 1 deletion(-) diff --git a/drivers/acpi/apei/ghes.c b/drivers/acpi/apei/ghes.c index ab36ad628c68..5073d035bb6e 100644 --- a/drivers/acpi/apei/ghes.c +++ b/drivers/acpi/apei/ghes.c @@ -517,7 +517,11 @@ static void ghes_do_proc(struct ghes *ghes, } #endif - else { + else if (guid_equal(sec_type, &CPER_SEC_PROC_ARM)) { + struct cper_sec_proc_arm *err = acpi_hest_get_payload(gdata); + + log_arm_hw_error(err); + } else { void *err = acpi_hest_get_payload(gdata); log_non_standard_event(sec_type, fru_id, fru_text, diff --git a/drivers/firmware/efi/cper.c b/drivers/firmware/efi/cper.c index d5a5855906d6..48a8f69da42a 100644 --- a/drivers/firmware/efi/cper.c +++ b/drivers/firmware/efi/cper.c @@ -35,6 +35,7 @@ #include #include #include +#include #define INDENT_SP " " diff --git a/drivers/ras/ras.c b/drivers/ras/ras.c index e87fd9e32ee2..39701a5c3f49 100644 --- a/drivers/ras/ras.c +++ b/drivers/ras/ras.c @@ -20,6 +20,11 @@ void log_non_standard_event(const uuid_le *sec_type, const uuid_le *fru_id, trace_non_standard_event(sec_type, fru_id, fru_text, sev, err, len); } +void log_arm_hw_error(struct cper_sec_proc_arm *err) +{ + trace_arm_event(err); +} + static int __init ras_init(void) { int rc = 0; @@ -36,6 +41,7 @@ EXPORT_TRACEPOINT_SYMBOL_GPL(extlog_mem_event); #endif EXPORT_TRACEPOINT_SYMBOL_GPL(mc_event); EXPORT_TRACEPOINT_SYMBOL_GPL(non_standard_event); +EXPORT_TRACEPOINT_SYMBOL_GPL(arm_event); int __init parse_ras_param(char *str) { diff --git a/include/linux/ras.h b/include/linux/ras.h index 62fac3042dce..7d61863ff265 100644 --- a/include/linux/ras.h +++ b/include/linux/ras.h @@ -3,6 +3,7 @@ #include #include +#include #ifdef CONFIG_DEBUG_FS int ras_userspace_consumers(void); @@ -27,11 +28,13 @@ static inline int cec_add_elem(u64 pfn) { return -ENODEV; } void log_non_standard_event(const guid_t *sec_type, const guid_t *fru_id, const char *fru_text, const u8 sev, const u8 *err, const u32 len); +void log_arm_hw_error(struct cper_sec_proc_arm *err); #else static void log_non_standard_event(const guid_t *sec_type, const guid_t *fru_id, const char *fru_text, const u8 sev, const u8 *err, const u32 len) { return; } +static void log_arm_hw_error(struct cper_sec_proc_arm *err) { return; } #endif #endif /* __RAS_H__ */ diff --git a/include/ras/ras_event.h b/include/ras/ras_event.h index 4f79ba94fa6b..429f46fb61e4 100644 --- a/include/ras/ras_event.h +++ b/include/ras/ras_event.h @@ -161,6 +161,51 @@ TRACE_EVENT(mc_event, __get_str(driver_detail)) ); +/* + * ARM Processor Events Report + * + * This event is generated when hardware detects an ARM processor error + * has occurred. UEFI 2.6 spec section N.2.4.4. + */ +TRACE_EVENT(arm_event, + + TP_PROTO(const struct cper_sec_proc_arm *proc), + + TP_ARGS(proc), + + TP_STRUCT__entry( + __field(u64, mpidr) + __field(u64, midr) + __field(u32, running_state) + __field(u32, psci_state) + __field(u8, affinity) + ), + + TP_fast_assign( + if (proc->validation_bits & CPER_ARM_VALID_AFFINITY_LEVEL) + __entry->affinity = proc->affinity_level; + else + __entry->affinity = ~0; + if (proc->validation_bits & CPER_ARM_VALID_MPIDR) + __entry->mpidr = proc->mpidr; + else + __entry->mpidr = 0ULL; + __entry->midr = proc->midr; + if (proc->validation_bits & CPER_ARM_VALID_RUNNING_STATE) { + __entry->running_state = proc->running_state; + __entry->psci_state = proc->psci_state; + } else { + __entry->running_state = ~0; + __entry->psci_state = ~0; + } + ), + + TP_printk("affinity level: %d; MPIDR: %016llx; MIDR: %016llx; " + "running state: %d; PSCI state: %d", + __entry->affinity, __entry->mpidr, __entry->midr, + __entry->running_state, __entry->psci_state) +); + /* * Non-Standard Section Report * -- cgit v1.2.3-59-g8ed1b From 621f48e40ee9b0100a802531069166d7d94796e0 Mon Sep 17 00:00:00 2001 From: Tyler Baicar Date: Wed, 21 Jun 2017 12:17:14 -0600 Subject: arm/arm64: KVM: add guest SEA support Currently external aborts are unsupported by the guest abort handling. Add handling for SEAs so that the host kernel reports SEAs which occur in the guest kernel. When an SEA occurs in the guest kernel, the guest exits and is routed to kvm_handle_guest_abort(). Prior to this patch, a print message of an unsupported FSC would be printed and nothing else would happen. With this patch, the code gets routed to the APEI handling of SEAs in the host kernel to report the SEA information. Signed-off-by: Tyler Baicar Acked-by: Catalin Marinas Acked-by: Marc Zyngier Acked-by: Christoffer Dall Signed-off-by: Will Deacon --- arch/arm/include/asm/kvm_arm.h | 10 ++++++++++ arch/arm/include/asm/system_misc.h | 5 +++++ arch/arm64/include/asm/kvm_arm.h | 10 ++++++++++ arch/arm64/include/asm/system_misc.h | 2 ++ arch/arm64/mm/fault.c | 22 ++++++++++++++++++++-- drivers/acpi/apei/ghes.c | 17 +++++++++++------ include/acpi/ghes.h | 2 +- virt/kvm/arm/mmu.c | 36 +++++++++++++++++++++++++++++++++--- 8 files changed, 92 insertions(+), 12 deletions(-) diff --git a/arch/arm/include/asm/kvm_arm.h b/arch/arm/include/asm/kvm_arm.h index a3f0b3d50089..ebf020b02bc8 100644 --- a/arch/arm/include/asm/kvm_arm.h +++ b/arch/arm/include/asm/kvm_arm.h @@ -187,6 +187,16 @@ #define FSC_FAULT (0x04) #define FSC_ACCESS (0x08) #define FSC_PERM (0x0c) +#define FSC_SEA (0x10) +#define FSC_SEA_TTW0 (0x14) +#define FSC_SEA_TTW1 (0x15) +#define FSC_SEA_TTW2 (0x16) +#define FSC_SEA_TTW3 (0x17) +#define FSC_SECC (0x18) +#define FSC_SECC_TTW0 (0x1c) +#define FSC_SECC_TTW1 (0x1d) +#define FSC_SECC_TTW2 (0x1e) +#define FSC_SECC_TTW3 (0x1f) /* Hyp Prefetch Fault Address Register (HPFAR/HDFAR) */ #define HPFAR_MASK (~0xf) diff --git a/arch/arm/include/asm/system_misc.h b/arch/arm/include/asm/system_misc.h index a3d61ad984af..8c4a89f5ce7d 100644 --- a/arch/arm/include/asm/system_misc.h +++ b/arch/arm/include/asm/system_misc.h @@ -22,6 +22,11 @@ extern void (*arm_pm_idle)(void); extern unsigned int user_debug; +static inline int handle_guest_sea(phys_addr_t addr, unsigned int esr) +{ + return -1; +} + #endif /* !__ASSEMBLY__ */ #endif /* __ASM_ARM_SYSTEM_MISC_H */ diff --git a/arch/arm64/include/asm/kvm_arm.h b/arch/arm64/include/asm/kvm_arm.h index 6e99978e83bd..61d694c2eae5 100644 --- a/arch/arm64/include/asm/kvm_arm.h +++ b/arch/arm64/include/asm/kvm_arm.h @@ -204,6 +204,16 @@ #define FSC_FAULT ESR_ELx_FSC_FAULT #define FSC_ACCESS ESR_ELx_FSC_ACCESS #define FSC_PERM ESR_ELx_FSC_PERM +#define FSC_SEA ESR_ELx_FSC_EXTABT +#define FSC_SEA_TTW0 (0x14) +#define FSC_SEA_TTW1 (0x15) +#define FSC_SEA_TTW2 (0x16) +#define FSC_SEA_TTW3 (0x17) +#define FSC_SECC (0x18) +#define FSC_SECC_TTW0 (0x1c) +#define FSC_SECC_TTW1 (0x1d) +#define FSC_SECC_TTW2 (0x1e) +#define FSC_SECC_TTW3 (0x1f) /* Hyp Prefetch Fault Address Register (HPFAR/HDFAR) */ #define HPFAR_MASK (~UL(0xf)) diff --git a/arch/arm64/include/asm/system_misc.h b/arch/arm64/include/asm/system_misc.h index bc812435bc76..95aa4423247d 100644 --- a/arch/arm64/include/asm/system_misc.h +++ b/arch/arm64/include/asm/system_misc.h @@ -56,6 +56,8 @@ extern void (*arm_pm_restart)(enum reboot_mode reboot_mode, const char *cmd); __show_ratelimited; \ }) +int handle_guest_sea(phys_addr_t addr, unsigned int esr); + #endif /* __ASSEMBLY__ */ #endif /* __ASM_SYSTEM_MISC_H */ diff --git a/arch/arm64/mm/fault.c b/arch/arm64/mm/fault.c index 07481af15fd7..a8c9f2db20ba 100644 --- a/arch/arm64/mm/fault.c +++ b/arch/arm64/mm/fault.c @@ -532,6 +532,7 @@ static int do_sea(unsigned long addr, unsigned int esr, struct pt_regs *regs) { struct siginfo info; const struct fault_info *inf; + int ret = 0; inf = esr_to_fault_info(esr); pr_err("Synchronous External Abort: %s (0x%08x) at 0x%016lx\n", @@ -546,7 +547,7 @@ static int do_sea(unsigned long addr, unsigned int esr, struct pt_regs *regs) if (interrupts_enabled(regs)) nmi_enter(); - ghes_notify_sea(); + ret = ghes_notify_sea(); if (interrupts_enabled(regs)) nmi_exit(); @@ -561,7 +562,7 @@ static int do_sea(unsigned long addr, unsigned int esr, struct pt_regs *regs) info.si_addr = (void __user *)addr; arm64_notify_die("", regs, &info, esr); - return 0; + return ret; } static const struct fault_info fault_info[] = { @@ -631,6 +632,23 @@ static const struct fault_info fault_info[] = { { do_bad, SIGBUS, 0, "unknown 63" }, }; +/* + * Handle Synchronous External Aborts that occur in a guest kernel. + * + * The return value will be zero if the SEA was successfully handled + * and non-zero if there was an error processing the error or there was + * no error to process. + */ +int handle_guest_sea(phys_addr_t addr, unsigned int esr) +{ + int ret = -ENOENT; + + if (IS_ENABLED(CONFIG_ACPI_APEI_SEA)) + ret = ghes_notify_sea(); + + return ret; +} + /* * Dispatch a data abort to the relevant handler. */ diff --git a/drivers/acpi/apei/ghes.c b/drivers/acpi/apei/ghes.c index 5073d035bb6e..bc717bdf50f1 100644 --- a/drivers/acpi/apei/ghes.c +++ b/drivers/acpi/apei/ghes.c @@ -816,17 +816,22 @@ static struct notifier_block ghes_notifier_sci = { #ifdef CONFIG_ACPI_APEI_SEA static LIST_HEAD(ghes_sea); -void ghes_notify_sea(void) +/* + * Return 0 only if one of the SEA error sources successfully reported an error + * record sent from the firmware. + */ +int ghes_notify_sea(void) { struct ghes *ghes; + int ret = -ENOENT; - /* - * synchronize_rcu() will wait for nmi_exit(), so no need to - * rcu_read_lock(). - */ + rcu_read_lock(); list_for_each_entry_rcu(ghes, &ghes_sea, list) { - ghes_proc(ghes); + if (!ghes_proc(ghes)) + ret = 0; } + rcu_read_unlock(); + return ret; } static void ghes_sea_add(struct ghes *ghes) diff --git a/include/acpi/ghes.h b/include/acpi/ghes.h index 95305aeb13ff..9f26e01186ae 100644 --- a/include/acpi/ghes.h +++ b/include/acpi/ghes.h @@ -113,6 +113,6 @@ static inline void *acpi_hest_get_next(struct acpi_hest_generic_data *gdata) return (void *)(gdata) + acpi_hest_get_record_size(gdata); } -void ghes_notify_sea(void); +int ghes_notify_sea(void); #endif /* GHES_H */ diff --git a/virt/kvm/arm/mmu.c b/virt/kvm/arm/mmu.c index a2d63247d1bb..bde0cdd60997 100644 --- a/virt/kvm/arm/mmu.c +++ b/virt/kvm/arm/mmu.c @@ -29,6 +29,7 @@ #include #include #include +#include #include "trace.h" @@ -1427,6 +1428,25 @@ out: kvm_set_pfn_accessed(pfn); } +static bool is_abort_sea(unsigned long fault_status) +{ + switch (fault_status) { + case FSC_SEA: + case FSC_SEA_TTW0: + case FSC_SEA_TTW1: + case FSC_SEA_TTW2: + case FSC_SEA_TTW3: + case FSC_SECC: + case FSC_SECC_TTW0: + case FSC_SECC_TTW1: + case FSC_SECC_TTW2: + case FSC_SECC_TTW3: + return true; + default: + return false; + } +} + /** * kvm_handle_guest_abort - handles all 2nd stage aborts * @vcpu: the VCPU pointer @@ -1449,19 +1469,29 @@ int kvm_handle_guest_abort(struct kvm_vcpu *vcpu, struct kvm_run *run) gfn_t gfn; int ret, idx; + fault_status = kvm_vcpu_trap_get_fault_type(vcpu); + + fault_ipa = kvm_vcpu_get_fault_ipa(vcpu); + + /* + * The host kernel will handle the synchronous external abort. There + * is no need to pass the error into the guest. + */ + if (is_abort_sea(fault_status)) { + if (!handle_guest_sea(fault_ipa, kvm_vcpu_get_hsr(vcpu))) + return 1; + } + is_iabt = kvm_vcpu_trap_is_iabt(vcpu); if (unlikely(!is_iabt && kvm_vcpu_dabt_isextabt(vcpu))) { kvm_inject_vabt(vcpu); return 1; } - fault_ipa = kvm_vcpu_get_fault_ipa(vcpu); - trace_kvm_guest_fault(*vcpu_pc(vcpu), kvm_vcpu_get_hsr(vcpu), kvm_vcpu_get_hfar(vcpu), fault_ipa); /* Check the stage-2 fault is trans. fault or write fault */ - fault_status = kvm_vcpu_trap_get_fault_type(vcpu); if (fault_status != FSC_FAULT && fault_status != FSC_PERM && fault_status != FSC_ACCESS) { kvm_err("Unsupported FSC: EC=%#x xFSC=%#lx ESR_EL2=%#lx\n", -- cgit v1.2.3-59-g8ed1b From 838955e2a3c010aff9089fd705ae2cd5638cdee8 Mon Sep 17 00:00:00 2001 From: Hoan Tran Date: Thu, 22 Jun 2017 19:26:03 +0100 Subject: perf: xgene: Parse PMU subnode from the match table This patch parses PMU Subnode from a match table. Signed-off-by: Hoan Tran Signed-off-by: Mark Rutland Signed-off-by: Will Deacon --- drivers/perf/xgene_pmu.c | 40 ++++++++++++++++++++++++++++++---------- 1 file changed, 30 insertions(+), 10 deletions(-) diff --git a/drivers/perf/xgene_pmu.c b/drivers/perf/xgene_pmu.c index 35b5289bc5da..5ffd58028a62 100644 --- a/drivers/perf/xgene_pmu.c +++ b/drivers/perf/xgene_pmu.c @@ -1047,9 +1047,35 @@ err: return NULL; } +static const struct acpi_device_id xgene_pmu_acpi_type_match[] = { + {"APMC0D5D", PMU_TYPE_L3C}, + {"APMC0D5E", PMU_TYPE_IOB}, + {"APMC0D5F", PMU_TYPE_MCB}, + {"APMC0D60", PMU_TYPE_MC}, + {}, +}; + +static const struct acpi_device_id *xgene_pmu_acpi_match_type( + const struct acpi_device_id *ids, + struct acpi_device *adev) +{ + const struct acpi_device_id *match_id = NULL; + const struct acpi_device_id *id; + + for (id = ids; id->id[0] || id->cls; id++) { + if (!acpi_match_device_ids(adev, id)) + match_id = id; + else if (match_id) + break; + } + + return match_id; +} + static acpi_status acpi_pmu_dev_add(acpi_handle handle, u32 level, void *data, void **return_value) { + const struct acpi_device_id *acpi_id; struct xgene_pmu *xgene_pmu = data; struct xgene_pmu_dev_ctx *ctx; struct acpi_device *adev; @@ -1059,17 +1085,11 @@ static acpi_status acpi_pmu_dev_add(acpi_handle handle, u32 level, if (acpi_bus_get_status(adev) || !adev->status.present) return AE_OK; - if (!strcmp(acpi_device_hid(adev), "APMC0D5D")) - ctx = acpi_get_pmu_hw_inf(xgene_pmu, adev, PMU_TYPE_L3C); - else if (!strcmp(acpi_device_hid(adev), "APMC0D5E")) - ctx = acpi_get_pmu_hw_inf(xgene_pmu, adev, PMU_TYPE_IOB); - else if (!strcmp(acpi_device_hid(adev), "APMC0D5F")) - ctx = acpi_get_pmu_hw_inf(xgene_pmu, adev, PMU_TYPE_MCB); - else if (!strcmp(acpi_device_hid(adev), "APMC0D60")) - ctx = acpi_get_pmu_hw_inf(xgene_pmu, adev, PMU_TYPE_MC); - else - ctx = NULL; + acpi_id = xgene_pmu_acpi_match_type(xgene_pmu_acpi_type_match, adev); + if (!acpi_id) + return AE_OK; + ctx = acpi_get_pmu_hw_inf(xgene_pmu, adev, (u32)acpi_id->driver_data); if (!ctx) return AE_OK; -- cgit v1.2.3-59-g8ed1b From e35e0a04a9965bcb4fffe4375baaecd408ad2357 Mon Sep 17 00:00:00 2001 From: Hoan Tran Date: Thu, 22 Jun 2017 19:26:04 +0100 Subject: perf: xgene: Move PMU leaf functions into function pointer structure This patch moves PMU leaf functions into a function pointer structure. It helps code maintain and expasion easier. Signed-off-by: Hoan Tran [Mark: remove redundant cast] Signed-off-by: Mark Rutland [will: make xgene_pmu_ops static] Signed-off-by: Will Deacon --- drivers/perf/xgene_pmu.c | 83 +++++++++++++++++++++++++++++++++++++----------- 1 file changed, 65 insertions(+), 18 deletions(-) diff --git a/drivers/perf/xgene_pmu.c b/drivers/perf/xgene_pmu.c index 5ffd58028a62..53e9e31053d6 100644 --- a/drivers/perf/xgene_pmu.c +++ b/drivers/perf/xgene_pmu.c @@ -96,6 +96,23 @@ struct xgene_pmu_dev { struct perf_event *pmu_counter_event[PMU_MAX_COUNTERS]; }; +struct xgene_pmu_ops { + void (*mask_int)(struct xgene_pmu *pmu); + void (*unmask_int)(struct xgene_pmu *pmu); + u64 (*read_counter)(struct xgene_pmu_dev *pmu, int idx); + void (*write_counter)(struct xgene_pmu_dev *pmu, int idx, u64 val); + void (*write_evttype)(struct xgene_pmu_dev *pmu_dev, int idx, u32 val); + void (*write_agentmsk)(struct xgene_pmu_dev *pmu_dev, u32 val); + void (*write_agent1msk)(struct xgene_pmu_dev *pmu_dev, u32 val); + void (*enable_counter)(struct xgene_pmu_dev *pmu_dev, int idx); + void (*disable_counter)(struct xgene_pmu_dev *pmu_dev, int idx); + void (*enable_counter_int)(struct xgene_pmu_dev *pmu_dev, int idx); + void (*disable_counter_int)(struct xgene_pmu_dev *pmu_dev, int idx); + void (*reset_counters)(struct xgene_pmu_dev *pmu_dev); + void (*start_counters)(struct xgene_pmu_dev *pmu_dev); + void (*stop_counters)(struct xgene_pmu_dev *pmu_dev); +}; + struct xgene_pmu { struct device *dev; int version; @@ -104,6 +121,7 @@ struct xgene_pmu { u32 mc_active_mask; cpumask_t cpu; raw_spinlock_t lock; + const struct xgene_pmu_ops *ops; struct list_head l3cpmus; struct list_head iobpmus; struct list_head mcbpmus; @@ -392,13 +410,14 @@ static inline void xgene_pmu_unmask_int(struct xgene_pmu *xgene_pmu) writel(PCPPMU_INTCLRMASK, xgene_pmu->pcppmu_csr + PCPPMU_INTMASK_REG); } -static inline u32 xgene_pmu_read_counter(struct xgene_pmu_dev *pmu_dev, int idx) +static inline u64 xgene_pmu_read_counter32(struct xgene_pmu_dev *pmu_dev, + int idx) { return readl(pmu_dev->inf->csr + PMU_PMEVCNTR0 + (4 * idx)); } static inline void -xgene_pmu_write_counter(struct xgene_pmu_dev *pmu_dev, int idx, u32 val) +xgene_pmu_write_counter32(struct xgene_pmu_dev *pmu_dev, int idx, u64 val) { writel(val, pmu_dev->inf->csr + PMU_PMEVCNTR0 + (4 * idx)); } @@ -491,20 +510,22 @@ static inline void xgene_pmu_stop_counters(struct xgene_pmu_dev *pmu_dev) static void xgene_perf_pmu_enable(struct pmu *pmu) { struct xgene_pmu_dev *pmu_dev = to_pmu_dev(pmu); + struct xgene_pmu *xgene_pmu = pmu_dev->parent; int enabled = bitmap_weight(pmu_dev->cntr_assign_mask, pmu_dev->max_counters); if (!enabled) return; - xgene_pmu_start_counters(pmu_dev); + xgene_pmu->ops->start_counters(pmu_dev); } static void xgene_perf_pmu_disable(struct pmu *pmu) { struct xgene_pmu_dev *pmu_dev = to_pmu_dev(pmu); + struct xgene_pmu *xgene_pmu = pmu_dev->parent; - xgene_pmu_stop_counters(pmu_dev); + xgene_pmu->ops->stop_counters(pmu_dev); } static int xgene_perf_event_init(struct perf_event *event) @@ -572,27 +593,32 @@ static int xgene_perf_event_init(struct perf_event *event) static void xgene_perf_enable_event(struct perf_event *event) { struct xgene_pmu_dev *pmu_dev = to_pmu_dev(event->pmu); + struct xgene_pmu *xgene_pmu = pmu_dev->parent; - xgene_pmu_write_evttype(pmu_dev, GET_CNTR(event), GET_EVENTID(event)); - xgene_pmu_write_agentmsk(pmu_dev, ~((u32)GET_AGENTID(event))); + xgene_pmu->ops->write_evttype(pmu_dev, GET_CNTR(event), + GET_EVENTID(event)); + xgene_pmu->ops->write_agentmsk(pmu_dev, ~((u32)GET_AGENTID(event))); if (pmu_dev->inf->type == PMU_TYPE_IOB) - xgene_pmu_write_agent1msk(pmu_dev, ~((u32)GET_AGENT1ID(event))); + xgene_pmu->ops->write_agent1msk(pmu_dev, + ~((u32)GET_AGENT1ID(event))); - xgene_pmu_enable_counter(pmu_dev, GET_CNTR(event)); - xgene_pmu_enable_counter_int(pmu_dev, GET_CNTR(event)); + xgene_pmu->ops->enable_counter(pmu_dev, GET_CNTR(event)); + xgene_pmu->ops->enable_counter_int(pmu_dev, GET_CNTR(event)); } static void xgene_perf_disable_event(struct perf_event *event) { struct xgene_pmu_dev *pmu_dev = to_pmu_dev(event->pmu); + struct xgene_pmu *xgene_pmu = pmu_dev->parent; - xgene_pmu_disable_counter(pmu_dev, GET_CNTR(event)); - xgene_pmu_disable_counter_int(pmu_dev, GET_CNTR(event)); + xgene_pmu->ops->disable_counter(pmu_dev, GET_CNTR(event)); + xgene_pmu->ops->disable_counter_int(pmu_dev, GET_CNTR(event)); } static void xgene_perf_event_set_period(struct perf_event *event) { struct xgene_pmu_dev *pmu_dev = to_pmu_dev(event->pmu); + struct xgene_pmu *xgene_pmu = pmu_dev->parent; struct hw_perf_event *hw = &event->hw; /* * The X-Gene PMU counters have a period of 2^32. To account for the @@ -603,18 +629,19 @@ static void xgene_perf_event_set_period(struct perf_event *event) u64 val = 1ULL << 31; local64_set(&hw->prev_count, val); - xgene_pmu_write_counter(pmu_dev, hw->idx, (u32) val); + xgene_pmu->ops->write_counter(pmu_dev, hw->idx, val); } static void xgene_perf_event_update(struct perf_event *event) { struct xgene_pmu_dev *pmu_dev = to_pmu_dev(event->pmu); + struct xgene_pmu *xgene_pmu = pmu_dev->parent; struct hw_perf_event *hw = &event->hw; u64 delta, prev_raw_count, new_raw_count; again: prev_raw_count = local64_read(&hw->prev_count); - new_raw_count = xgene_pmu_read_counter(pmu_dev, GET_CNTR(event)); + new_raw_count = xgene_pmu->ops->read_counter(pmu_dev, GET_CNTR(event)); if (local64_cmpxchg(&hw->prev_count, prev_raw_count, new_raw_count) != prev_raw_count) @@ -633,6 +660,7 @@ static void xgene_perf_read(struct perf_event *event) static void xgene_perf_start(struct perf_event *event, int flags) { struct xgene_pmu_dev *pmu_dev = to_pmu_dev(event->pmu); + struct xgene_pmu *xgene_pmu = pmu_dev->parent; struct hw_perf_event *hw = &event->hw; if (WARN_ON_ONCE(!(hw->state & PERF_HES_STOPPED))) @@ -646,8 +674,8 @@ static void xgene_perf_start(struct perf_event *event, int flags) if (flags & PERF_EF_RELOAD) { u64 prev_raw_count = local64_read(&hw->prev_count); - xgene_pmu_write_counter(pmu_dev, GET_CNTR(event), - (u32) prev_raw_count); + xgene_pmu->ops->write_counter(pmu_dev, GET_CNTR(event), + prev_raw_count); } xgene_perf_enable_event(event); @@ -736,8 +764,8 @@ static int xgene_init_perf(struct xgene_pmu_dev *pmu_dev, char *name) }; /* Hardware counter init */ - xgene_pmu_stop_counters(pmu_dev); - xgene_pmu_reset_counters(pmu_dev); + xgene_pmu->ops->stop_counters(pmu_dev); + xgene_pmu->ops->reset_counters(pmu_dev); return perf_pmu_register(&pmu_dev->pmu, name, -1); } @@ -1255,6 +1283,23 @@ static const struct xgene_pmu_data xgene_pmu_v2_data = { .id = PCP_PMU_V2, }; +static const struct xgene_pmu_ops xgene_pmu_ops = { + .mask_int = xgene_pmu_mask_int, + .unmask_int = xgene_pmu_unmask_int, + .read_counter = xgene_pmu_read_counter32, + .write_counter = xgene_pmu_write_counter32, + .write_evttype = xgene_pmu_write_evttype, + .write_agentmsk = xgene_pmu_write_agentmsk, + .write_agent1msk = xgene_pmu_write_agent1msk, + .enable_counter = xgene_pmu_enable_counter, + .disable_counter = xgene_pmu_disable_counter, + .enable_counter_int = xgene_pmu_enable_counter_int, + .disable_counter_int = xgene_pmu_disable_counter_int, + .reset_counters = xgene_pmu_reset_counters, + .start_counters = xgene_pmu_start_counters, + .stop_counters = xgene_pmu_stop_counters, +}; + static const struct of_device_id xgene_pmu_of_match[] = { { .compatible = "apm,xgene-pmu", .data = &xgene_pmu_data }, { .compatible = "apm,xgene-pmu-v2", .data = &xgene_pmu_v2_data }, @@ -1304,6 +1349,8 @@ static int xgene_pmu_probe(struct platform_device *pdev) if (version < 0) return -ENODEV; + xgene_pmu->ops = &xgene_pmu_ops; + INIT_LIST_HEAD(&xgene_pmu->l3cpmus); INIT_LIST_HEAD(&xgene_pmu->iobpmus); INIT_LIST_HEAD(&xgene_pmu->mcbpmus); @@ -1362,7 +1409,7 @@ static int xgene_pmu_probe(struct platform_device *pdev) } /* Enable interrupt */ - xgene_pmu_unmask_int(xgene_pmu); + xgene_pmu->ops->unmask_int(xgene_pmu); return 0; -- cgit v1.2.3-59-g8ed1b From c0f7f7acdecdd7cf9a19c0af5c3dc649e1b934f7 Mon Sep 17 00:00:00 2001 From: Hoan Tran Date: Thu, 22 Jun 2017 19:26:05 +0100 Subject: perf: xgene: Add support for SoC PMU version 3 This patch adds support for SoC-wide (AKA uncore) Performance Monitoring Unit version 3. It can support up to - 2 IOB PMU instances - 8 L3C PMU instances - 2 MCB PMU instances - 8 MCU PMU instances and these PMUs support 64 bit counter Signed-off-by: Hoan Tran [Mark: stop counters in _xgene_pmu_isr()] Signed-off-by: Mark Rutland [will: make xgene_pmu_v3_ops static] Signed-off-by: Will Deacon --- drivers/perf/xgene_pmu.c | 563 ++++++++++++++++++++++++++++++++++++++++++++--- 1 file changed, 534 insertions(+), 29 deletions(-) diff --git a/drivers/perf/xgene_pmu.c b/drivers/perf/xgene_pmu.c index 53e9e31053d6..e841282d690c 100644 --- a/drivers/perf/xgene_pmu.c +++ b/drivers/perf/xgene_pmu.c @@ -37,6 +37,8 @@ #define CSW_CSWCR 0x0000 #define CSW_CSWCR_DUALMCB_MASK BIT(0) +#define CSW_CSWCR_MCB0_ROUTING(x) (((x) & 0x0C) >> 2) +#define CSW_CSWCR_MCB1_ROUTING(x) (((x) & 0x30) >> 4) #define MCBADDRMR 0x0000 #define MCBADDRMR_DUALMCU_MODE_MASK BIT(2) @@ -50,8 +52,17 @@ #define PCPPMU_INT_L3C BIT(2) #define PCPPMU_INT_IOB BIT(3) +#define PCPPMU_V3_INTMASK 0x00FF33FF +#define PCPPMU_V3_INTENMASK 0xFFFFFFFF +#define PCPPMU_V3_INTCLRMASK 0xFF00CC00 +#define PCPPMU_V3_INT_MCU 0x000000FF +#define PCPPMU_V3_INT_MCB 0x00000300 +#define PCPPMU_V3_INT_L3C 0x00FF0000 +#define PCPPMU_V3_INT_IOB 0x00003000 + #define PMU_MAX_COUNTERS 4 -#define PMU_CNT_MAX_PERIOD 0x100000000ULL +#define PMU_CNT_MAX_PERIOD 0xFFFFFFFFULL +#define PMU_V3_CNT_MAX_PERIOD 0xFFFFFFFFFFFFFFFFULL #define PMU_OVERFLOW_MASK 0xF #define PMU_PMCR_E BIT(0) #define PMU_PMCR_P BIT(1) @@ -73,6 +84,10 @@ #define PMU_PMOVSR 0xC80 #define PMU_PMCR 0xE04 +/* PMU registers for V3 */ +#define PMU_PMOVSCLR 0xC80 +#define PMU_PMOVSSET 0xCC0 + #define to_pmu_dev(p) container_of(p, struct xgene_pmu_dev, pmu) #define GET_CNTR(ev) (ev->hw.idx) #define GET_EVENTID(ev) (ev->hw.config & 0xFFULL) @@ -119,6 +134,7 @@ struct xgene_pmu { void __iomem *pcppmu_csr; u32 mcb_active_mask; u32 mc_active_mask; + u32 l3c_active_mask; cpumask_t cpu; raw_spinlock_t lock; const struct xgene_pmu_ops *ops; @@ -143,11 +159,13 @@ struct xgene_pmu_data { enum xgene_pmu_version { PCP_PMU_V1 = 1, PCP_PMU_V2, + PCP_PMU_V3, }; enum xgene_pmu_dev_type { PMU_TYPE_L3C = 0, PMU_TYPE_IOB, + PMU_TYPE_IOB_SLOW, PMU_TYPE_MCB, PMU_TYPE_MC, }; @@ -213,6 +231,56 @@ static const struct attribute_group mc_pmu_format_attr_group = { .attrs = mc_pmu_format_attrs, }; +static struct attribute *l3c_pmu_v3_format_attrs[] = { + XGENE_PMU_FORMAT_ATTR(l3c_eventid, "config:0-39"), + NULL, +}; + +static struct attribute *iob_pmu_v3_format_attrs[] = { + XGENE_PMU_FORMAT_ATTR(iob_eventid, "config:0-47"), + NULL, +}; + +static struct attribute *iob_slow_pmu_v3_format_attrs[] = { + XGENE_PMU_FORMAT_ATTR(iob_slow_eventid, "config:0-16"), + NULL, +}; + +static struct attribute *mcb_pmu_v3_format_attrs[] = { + XGENE_PMU_FORMAT_ATTR(mcb_eventid, "config:0-35"), + NULL, +}; + +static struct attribute *mc_pmu_v3_format_attrs[] = { + XGENE_PMU_FORMAT_ATTR(mc_eventid, "config:0-44"), + NULL, +}; + +static const struct attribute_group l3c_pmu_v3_format_attr_group = { + .name = "format", + .attrs = l3c_pmu_v3_format_attrs, +}; + +static const struct attribute_group iob_pmu_v3_format_attr_group = { + .name = "format", + .attrs = iob_pmu_v3_format_attrs, +}; + +static const struct attribute_group iob_slow_pmu_v3_format_attr_group = { + .name = "format", + .attrs = iob_slow_pmu_v3_format_attrs, +}; + +static const struct attribute_group mcb_pmu_v3_format_attr_group = { + .name = "format", + .attrs = mcb_pmu_v3_format_attrs, +}; + +static const struct attribute_group mc_pmu_v3_format_attr_group = { + .name = "format", + .attrs = mc_pmu_v3_format_attrs, +}; + /* * sysfs event attributes */ @@ -329,6 +397,219 @@ static const struct attribute_group mc_pmu_events_attr_group = { .attrs = mc_pmu_events_attrs, }; +static struct attribute *l3c_pmu_v3_events_attrs[] = { + XGENE_PMU_EVENT_ATTR(cycle-count, 0x00), + XGENE_PMU_EVENT_ATTR(read-hit, 0x01), + XGENE_PMU_EVENT_ATTR(read-miss, 0x02), + XGENE_PMU_EVENT_ATTR(index-flush-eviction, 0x03), + XGENE_PMU_EVENT_ATTR(write-caused-replacement, 0x04), + XGENE_PMU_EVENT_ATTR(write-not-caused-replacement, 0x05), + XGENE_PMU_EVENT_ATTR(clean-eviction, 0x06), + XGENE_PMU_EVENT_ATTR(dirty-eviction, 0x07), + XGENE_PMU_EVENT_ATTR(read, 0x08), + XGENE_PMU_EVENT_ATTR(write, 0x09), + XGENE_PMU_EVENT_ATTR(request, 0x0a), + XGENE_PMU_EVENT_ATTR(tq-bank-conflict-issue-stall, 0x0b), + XGENE_PMU_EVENT_ATTR(tq-full, 0x0c), + XGENE_PMU_EVENT_ATTR(ackq-full, 0x0d), + XGENE_PMU_EVENT_ATTR(wdb-full, 0x0e), + XGENE_PMU_EVENT_ATTR(odb-full, 0x10), + XGENE_PMU_EVENT_ATTR(wbq-full, 0x11), + XGENE_PMU_EVENT_ATTR(input-req-async-fifo-stall, 0x12), + XGENE_PMU_EVENT_ATTR(output-req-async-fifo-stall, 0x13), + XGENE_PMU_EVENT_ATTR(output-data-async-fifo-stall, 0x14), + XGENE_PMU_EVENT_ATTR(total-insertion, 0x15), + XGENE_PMU_EVENT_ATTR(sip-insertions-r-set, 0x16), + XGENE_PMU_EVENT_ATTR(sip-insertions-r-clear, 0x17), + XGENE_PMU_EVENT_ATTR(dip-insertions-r-set, 0x18), + XGENE_PMU_EVENT_ATTR(dip-insertions-r-clear, 0x19), + XGENE_PMU_EVENT_ATTR(dip-insertions-force-r-set, 0x1a), + XGENE_PMU_EVENT_ATTR(egression, 0x1b), + XGENE_PMU_EVENT_ATTR(replacement, 0x1c), + XGENE_PMU_EVENT_ATTR(old-replacement, 0x1d), + XGENE_PMU_EVENT_ATTR(young-replacement, 0x1e), + XGENE_PMU_EVENT_ATTR(r-set-replacement, 0x1f), + XGENE_PMU_EVENT_ATTR(r-clear-replacement, 0x20), + XGENE_PMU_EVENT_ATTR(old-r-replacement, 0x21), + XGENE_PMU_EVENT_ATTR(old-nr-replacement, 0x22), + XGENE_PMU_EVENT_ATTR(young-r-replacement, 0x23), + XGENE_PMU_EVENT_ATTR(young-nr-replacement, 0x24), + XGENE_PMU_EVENT_ATTR(bloomfilter-clearing, 0x25), + XGENE_PMU_EVENT_ATTR(generation-flip, 0x26), + XGENE_PMU_EVENT_ATTR(vcc-droop-detected, 0x27), + NULL, +}; + +static struct attribute *iob_fast_pmu_v3_events_attrs[] = { + XGENE_PMU_EVENT_ATTR(cycle-count, 0x00), + XGENE_PMU_EVENT_ATTR(pa-req-buf-alloc-all, 0x01), + XGENE_PMU_EVENT_ATTR(pa-req-buf-alloc-rd, 0x02), + XGENE_PMU_EVENT_ATTR(pa-req-buf-alloc-wr, 0x03), + XGENE_PMU_EVENT_ATTR(pa-all-cp-req, 0x04), + XGENE_PMU_EVENT_ATTR(pa-cp-blk-req, 0x05), + XGENE_PMU_EVENT_ATTR(pa-cp-ptl-req, 0x06), + XGENE_PMU_EVENT_ATTR(pa-cp-rd-req, 0x07), + XGENE_PMU_EVENT_ATTR(pa-cp-wr-req, 0x08), + XGENE_PMU_EVENT_ATTR(ba-all-req, 0x09), + XGENE_PMU_EVENT_ATTR(ba-rd-req, 0x0a), + XGENE_PMU_EVENT_ATTR(ba-wr-req, 0x0b), + XGENE_PMU_EVENT_ATTR(pa-rd-shared-req-issued, 0x10), + XGENE_PMU_EVENT_ATTR(pa-rd-exclusive-req-issued, 0x11), + XGENE_PMU_EVENT_ATTR(pa-wr-invalidate-req-issued-stashable, 0x12), + XGENE_PMU_EVENT_ATTR(pa-wr-invalidate-req-issued-nonstashable, 0x13), + XGENE_PMU_EVENT_ATTR(pa-wr-back-req-issued-stashable, 0x14), + XGENE_PMU_EVENT_ATTR(pa-wr-back-req-issued-nonstashable, 0x15), + XGENE_PMU_EVENT_ATTR(pa-ptl-wr-req, 0x16), + XGENE_PMU_EVENT_ATTR(pa-ptl-rd-req, 0x17), + XGENE_PMU_EVENT_ATTR(pa-wr-back-clean-data, 0x18), + XGENE_PMU_EVENT_ATTR(pa-wr-back-cancelled-on-SS, 0x1b), + XGENE_PMU_EVENT_ATTR(pa-barrier-occurrence, 0x1c), + XGENE_PMU_EVENT_ATTR(pa-barrier-cycles, 0x1d), + XGENE_PMU_EVENT_ATTR(pa-total-cp-snoops, 0x20), + XGENE_PMU_EVENT_ATTR(pa-rd-shared-snoop, 0x21), + XGENE_PMU_EVENT_ATTR(pa-rd-shared-snoop-hit, 0x22), + XGENE_PMU_EVENT_ATTR(pa-rd-exclusive-snoop, 0x23), + XGENE_PMU_EVENT_ATTR(pa-rd-exclusive-snoop-hit, 0x24), + XGENE_PMU_EVENT_ATTR(pa-rd-wr-invalid-snoop, 0x25), + XGENE_PMU_EVENT_ATTR(pa-rd-wr-invalid-snoop-hit, 0x26), + XGENE_PMU_EVENT_ATTR(pa-req-buffer-full, 0x28), + XGENE_PMU_EVENT_ATTR(cswlf-outbound-req-fifo-full, 0x29), + XGENE_PMU_EVENT_ATTR(cswlf-inbound-snoop-fifo-backpressure, 0x2a), + XGENE_PMU_EVENT_ATTR(cswlf-outbound-lack-fifo-full, 0x2b), + XGENE_PMU_EVENT_ATTR(cswlf-inbound-gack-fifo-backpressure, 0x2c), + XGENE_PMU_EVENT_ATTR(cswlf-outbound-data-fifo-full, 0x2d), + XGENE_PMU_EVENT_ATTR(cswlf-inbound-data-fifo-backpressure, 0x2e), + XGENE_PMU_EVENT_ATTR(cswlf-inbound-req-backpressure, 0x2f), + NULL, +}; + +static struct attribute *iob_slow_pmu_v3_events_attrs[] = { + XGENE_PMU_EVENT_ATTR(cycle-count, 0x00), + XGENE_PMU_EVENT_ATTR(pa-axi0-rd-req, 0x01), + XGENE_PMU_EVENT_ATTR(pa-axi0-wr-req, 0x02), + XGENE_PMU_EVENT_ATTR(pa-axi1-rd-req, 0x03), + XGENE_PMU_EVENT_ATTR(pa-axi1-wr-req, 0x04), + XGENE_PMU_EVENT_ATTR(ba-all-axi-req, 0x07), + XGENE_PMU_EVENT_ATTR(ba-axi-rd-req, 0x08), + XGENE_PMU_EVENT_ATTR(ba-axi-wr-req, 0x09), + XGENE_PMU_EVENT_ATTR(ba-free-list-empty, 0x10), + NULL, +}; + +static struct attribute *mcb_pmu_v3_events_attrs[] = { + XGENE_PMU_EVENT_ATTR(cycle-count, 0x00), + XGENE_PMU_EVENT_ATTR(req-receive, 0x01), + XGENE_PMU_EVENT_ATTR(rd-req-recv, 0x02), + XGENE_PMU_EVENT_ATTR(rd-req-recv-2, 0x03), + XGENE_PMU_EVENT_ATTR(wr-req-recv, 0x04), + XGENE_PMU_EVENT_ATTR(wr-req-recv-2, 0x05), + XGENE_PMU_EVENT_ATTR(rd-req-sent-to-mcu, 0x06), + XGENE_PMU_EVENT_ATTR(rd-req-sent-to-mcu-2, 0x07), + XGENE_PMU_EVENT_ATTR(rd-req-sent-to-spec-mcu, 0x08), + XGENE_PMU_EVENT_ATTR(rd-req-sent-to-spec-mcu-2, 0x09), + XGENE_PMU_EVENT_ATTR(glbl-ack-recv-for-rd-sent-to-spec-mcu, 0x0a), + XGENE_PMU_EVENT_ATTR(glbl-ack-go-recv-for-rd-sent-to-spec-mcu, 0x0b), + XGENE_PMU_EVENT_ATTR(glbl-ack-nogo-recv-for-rd-sent-to-spec-mcu, 0x0c), + XGENE_PMU_EVENT_ATTR(glbl-ack-go-recv-any-rd-req, 0x0d), + XGENE_PMU_EVENT_ATTR(glbl-ack-go-recv-any-rd-req-2, 0x0e), + XGENE_PMU_EVENT_ATTR(wr-req-sent-to-mcu, 0x0f), + XGENE_PMU_EVENT_ATTR(gack-recv, 0x10), + XGENE_PMU_EVENT_ATTR(rd-gack-recv, 0x11), + XGENE_PMU_EVENT_ATTR(wr-gack-recv, 0x12), + XGENE_PMU_EVENT_ATTR(cancel-rd-gack, 0x13), + XGENE_PMU_EVENT_ATTR(cancel-wr-gack, 0x14), + XGENE_PMU_EVENT_ATTR(mcb-csw-req-stall, 0x15), + XGENE_PMU_EVENT_ATTR(mcu-req-intf-blocked, 0x16), + XGENE_PMU_EVENT_ATTR(mcb-mcu-rd-intf-stall, 0x17), + XGENE_PMU_EVENT_ATTR(csw-rd-intf-blocked, 0x18), + XGENE_PMU_EVENT_ATTR(csw-local-ack-intf-blocked, 0x19), + XGENE_PMU_EVENT_ATTR(mcu-req-table-full, 0x1a), + XGENE_PMU_EVENT_ATTR(mcu-stat-table-full, 0x1b), + XGENE_PMU_EVENT_ATTR(mcu-wr-table-full, 0x1c), + XGENE_PMU_EVENT_ATTR(mcu-rdreceipt-resp, 0x1d), + XGENE_PMU_EVENT_ATTR(mcu-wrcomplete-resp, 0x1e), + XGENE_PMU_EVENT_ATTR(mcu-retryack-resp, 0x1f), + XGENE_PMU_EVENT_ATTR(mcu-pcrdgrant-resp, 0x20), + XGENE_PMU_EVENT_ATTR(mcu-req-from-lastload, 0x21), + XGENE_PMU_EVENT_ATTR(mcu-req-from-bypass, 0x22), + XGENE_PMU_EVENT_ATTR(volt-droop-detect, 0x23), + NULL, +}; + +static struct attribute *mc_pmu_v3_events_attrs[] = { + XGENE_PMU_EVENT_ATTR(cycle-count, 0x00), + XGENE_PMU_EVENT_ATTR(act-sent, 0x01), + XGENE_PMU_EVENT_ATTR(pre-sent, 0x02), + XGENE_PMU_EVENT_ATTR(rd-sent, 0x03), + XGENE_PMU_EVENT_ATTR(rda-sent, 0x04), + XGENE_PMU_EVENT_ATTR(wr-sent, 0x05), + XGENE_PMU_EVENT_ATTR(wra-sent, 0x06), + XGENE_PMU_EVENT_ATTR(pd-entry-vld, 0x07), + XGENE_PMU_EVENT_ATTR(sref-entry-vld, 0x08), + XGENE_PMU_EVENT_ATTR(prea-sent, 0x09), + XGENE_PMU_EVENT_ATTR(ref-sent, 0x0a), + XGENE_PMU_EVENT_ATTR(rd-rda-sent, 0x0b), + XGENE_PMU_EVENT_ATTR(wr-wra-sent, 0x0c), + XGENE_PMU_EVENT_ATTR(raw-hazard, 0x0d), + XGENE_PMU_EVENT_ATTR(war-hazard, 0x0e), + XGENE_PMU_EVENT_ATTR(waw-hazard, 0x0f), + XGENE_PMU_EVENT_ATTR(rar-hazard, 0x10), + XGENE_PMU_EVENT_ATTR(raw-war-waw-hazard, 0x11), + XGENE_PMU_EVENT_ATTR(hprd-lprd-wr-req-vld, 0x12), + XGENE_PMU_EVENT_ATTR(lprd-req-vld, 0x13), + XGENE_PMU_EVENT_ATTR(hprd-req-vld, 0x14), + XGENE_PMU_EVENT_ATTR(hprd-lprd-req-vld, 0x15), + XGENE_PMU_EVENT_ATTR(wr-req-vld, 0x16), + XGENE_PMU_EVENT_ATTR(partial-wr-req-vld, 0x17), + XGENE_PMU_EVENT_ATTR(rd-retry, 0x18), + XGENE_PMU_EVENT_ATTR(wr-retry, 0x19), + XGENE_PMU_EVENT_ATTR(retry-gnt, 0x1a), + XGENE_PMU_EVENT_ATTR(rank-change, 0x1b), + XGENE_PMU_EVENT_ATTR(dir-change, 0x1c), + XGENE_PMU_EVENT_ATTR(rank-dir-change, 0x1d), + XGENE_PMU_EVENT_ATTR(rank-active, 0x1e), + XGENE_PMU_EVENT_ATTR(rank-idle, 0x1f), + XGENE_PMU_EVENT_ATTR(rank-pd, 0x20), + XGENE_PMU_EVENT_ATTR(rank-sref, 0x21), + XGENE_PMU_EVENT_ATTR(queue-fill-gt-thresh, 0x22), + XGENE_PMU_EVENT_ATTR(queue-rds-gt-thresh, 0x23), + XGENE_PMU_EVENT_ATTR(queue-wrs-gt-thresh, 0x24), + XGENE_PMU_EVENT_ATTR(phy-updt-complt, 0x25), + XGENE_PMU_EVENT_ATTR(tz-fail, 0x26), + XGENE_PMU_EVENT_ATTR(dram-errc, 0x27), + XGENE_PMU_EVENT_ATTR(dram-errd, 0x28), + XGENE_PMU_EVENT_ATTR(rd-enq, 0x29), + XGENE_PMU_EVENT_ATTR(wr-enq, 0x2a), + XGENE_PMU_EVENT_ATTR(tmac-limit-reached, 0x2b), + XGENE_PMU_EVENT_ATTR(tmaw-tracker-full, 0x2c), + NULL, +}; + +static const struct attribute_group l3c_pmu_v3_events_attr_group = { + .name = "events", + .attrs = l3c_pmu_v3_events_attrs, +}; + +static const struct attribute_group iob_fast_pmu_v3_events_attr_group = { + .name = "events", + .attrs = iob_fast_pmu_v3_events_attrs, +}; + +static const struct attribute_group iob_slow_pmu_v3_events_attr_group = { + .name = "events", + .attrs = iob_slow_pmu_v3_events_attrs, +}; + +static const struct attribute_group mcb_pmu_v3_events_attr_group = { + .name = "events", + .attrs = mcb_pmu_v3_events_attrs, +}; + +static const struct attribute_group mc_pmu_v3_events_attr_group = { + .name = "events", + .attrs = mc_pmu_v3_events_attrs, +}; + /* * sysfs cpumask attributes */ @@ -352,7 +633,7 @@ static const struct attribute_group pmu_cpumask_attr_group = { }; /* - * Per PMU device attribute groups + * Per PMU device attribute groups of PMU v1 and v2 */ static const struct attribute_group *l3c_pmu_attr_groups[] = { &l3c_pmu_format_attr_group, @@ -382,6 +663,44 @@ static const struct attribute_group *mc_pmu_attr_groups[] = { NULL }; +/* + * Per PMU device attribute groups of PMU v3 + */ +static const struct attribute_group *l3c_pmu_v3_attr_groups[] = { + &l3c_pmu_v3_format_attr_group, + &pmu_cpumask_attr_group, + &l3c_pmu_v3_events_attr_group, + NULL +}; + +static const struct attribute_group *iob_fast_pmu_v3_attr_groups[] = { + &iob_pmu_v3_format_attr_group, + &pmu_cpumask_attr_group, + &iob_fast_pmu_v3_events_attr_group, + NULL +}; + +static const struct attribute_group *iob_slow_pmu_v3_attr_groups[] = { + &iob_slow_pmu_v3_format_attr_group, + &pmu_cpumask_attr_group, + &iob_slow_pmu_v3_events_attr_group, + NULL +}; + +static const struct attribute_group *mcb_pmu_v3_attr_groups[] = { + &mcb_pmu_v3_format_attr_group, + &pmu_cpumask_attr_group, + &mcb_pmu_v3_events_attr_group, + NULL +}; + +static const struct attribute_group *mc_pmu_v3_attr_groups[] = { + &mc_pmu_v3_format_attr_group, + &pmu_cpumask_attr_group, + &mc_pmu_v3_events_attr_group, + NULL +}; + static int get_next_avail_cntr(struct xgene_pmu_dev *pmu_dev) { int cntr; @@ -405,23 +724,66 @@ static inline void xgene_pmu_mask_int(struct xgene_pmu *xgene_pmu) writel(PCPPMU_INTENMASK, xgene_pmu->pcppmu_csr + PCPPMU_INTMASK_REG); } +static inline void xgene_pmu_v3_mask_int(struct xgene_pmu *xgene_pmu) +{ + writel(PCPPMU_V3_INTENMASK, xgene_pmu->pcppmu_csr + PCPPMU_INTMASK_REG); +} + static inline void xgene_pmu_unmask_int(struct xgene_pmu *xgene_pmu) { writel(PCPPMU_INTCLRMASK, xgene_pmu->pcppmu_csr + PCPPMU_INTMASK_REG); } +static inline void xgene_pmu_v3_unmask_int(struct xgene_pmu *xgene_pmu) +{ + writel(PCPPMU_V3_INTCLRMASK, + xgene_pmu->pcppmu_csr + PCPPMU_INTMASK_REG); +} + static inline u64 xgene_pmu_read_counter32(struct xgene_pmu_dev *pmu_dev, int idx) { return readl(pmu_dev->inf->csr + PMU_PMEVCNTR0 + (4 * idx)); } +static inline u64 xgene_pmu_read_counter64(struct xgene_pmu_dev *pmu_dev, + int idx) +{ + u32 lo, hi; + + /* + * v3 has 64-bit counter registers composed by 2 32-bit registers + * This can be a problem if the counter increases and carries + * out of bit [31] between 2 reads. The extra reads would help + * to prevent this issue. + */ + do { + hi = xgene_pmu_read_counter32(pmu_dev, 2 * idx + 1); + lo = xgene_pmu_read_counter32(pmu_dev, 2 * idx); + } while (hi != xgene_pmu_read_counter32(pmu_dev, 2 * idx + 1)); + + return (((u64)hi << 32) | lo); +} + static inline void xgene_pmu_write_counter32(struct xgene_pmu_dev *pmu_dev, int idx, u64 val) { writel(val, pmu_dev->inf->csr + PMU_PMEVCNTR0 + (4 * idx)); } +static inline void +xgene_pmu_write_counter64(struct xgene_pmu_dev *pmu_dev, int idx, u64 val) +{ + u32 cnt_lo, cnt_hi; + + cnt_hi = upper_32_bits(val); + cnt_lo = lower_32_bits(val); + + /* v3 has 64-bit counter registers composed by 2 32-bit registers */ + xgene_pmu_write_counter32(pmu_dev, 2 * idx, cnt_lo); + xgene_pmu_write_counter32(pmu_dev, 2 * idx + 1, cnt_hi); +} + static inline void xgene_pmu_write_evttype(struct xgene_pmu_dev *pmu_dev, int idx, u32 val) { @@ -434,12 +796,18 @@ xgene_pmu_write_agentmsk(struct xgene_pmu_dev *pmu_dev, u32 val) writel(val, pmu_dev->inf->csr + PMU_PMAMR0); } +static inline void +xgene_pmu_v3_write_agentmsk(struct xgene_pmu_dev *pmu_dev, u32 val) { } + static inline void xgene_pmu_write_agent1msk(struct xgene_pmu_dev *pmu_dev, u32 val) { writel(val, pmu_dev->inf->csr + PMU_PMAMR1); } +static inline void +xgene_pmu_v3_write_agent1msk(struct xgene_pmu_dev *pmu_dev, u32 val) { } + static inline void xgene_pmu_enable_counter(struct xgene_pmu_dev *pmu_dev, int idx) { @@ -621,10 +989,11 @@ static void xgene_perf_event_set_period(struct perf_event *event) struct xgene_pmu *xgene_pmu = pmu_dev->parent; struct hw_perf_event *hw = &event->hw; /* - * The X-Gene PMU counters have a period of 2^32. To account for the - * possiblity of extreme interrupt latency we program for a period of - * half that. Hopefully we can handle the interrupt before another 2^31 + * For 32 bit counter, it has a period of 2^32. To account for the + * possibility of extreme interrupt latency we program for a period of + * half that. Hopefully, we can handle the interrupt before another 2^31 * events occur and the counter overtakes its previous value. + * For 64 bit counter, we don't expect it overflow. */ u64 val = 1ULL << 31; @@ -741,7 +1110,10 @@ static int xgene_init_perf(struct xgene_pmu_dev *pmu_dev, char *name) { struct xgene_pmu *xgene_pmu; - pmu_dev->max_period = PMU_CNT_MAX_PERIOD - 1; + if (pmu_dev->parent->version == PCP_PMU_V3) + pmu_dev->max_period = PMU_V3_CNT_MAX_PERIOD; + else + pmu_dev->max_period = PMU_CNT_MAX_PERIOD; /* First version PMU supports only single event counter */ xgene_pmu = pmu_dev->parent; if (xgene_pmu->version == PCP_PMU_V1) @@ -786,20 +1158,38 @@ xgene_pmu_dev_add(struct xgene_pmu *xgene_pmu, struct xgene_pmu_dev_ctx *ctx) switch (pmu->inf->type) { case PMU_TYPE_L3C: - pmu->attr_groups = l3c_pmu_attr_groups; + if (!(xgene_pmu->l3c_active_mask & pmu->inf->enable_mask)) + goto dev_err; + if (xgene_pmu->version == PCP_PMU_V3) + pmu->attr_groups = l3c_pmu_v3_attr_groups; + else + pmu->attr_groups = l3c_pmu_attr_groups; break; case PMU_TYPE_IOB: - pmu->attr_groups = iob_pmu_attr_groups; + if (xgene_pmu->version == PCP_PMU_V3) + pmu->attr_groups = iob_fast_pmu_v3_attr_groups; + else + pmu->attr_groups = iob_pmu_attr_groups; + break; + case PMU_TYPE_IOB_SLOW: + if (xgene_pmu->version == PCP_PMU_V3) + pmu->attr_groups = iob_slow_pmu_v3_attr_groups; break; case PMU_TYPE_MCB: if (!(xgene_pmu->mcb_active_mask & pmu->inf->enable_mask)) goto dev_err; - pmu->attr_groups = mcb_pmu_attr_groups; + if (xgene_pmu->version == PCP_PMU_V3) + pmu->attr_groups = mcb_pmu_v3_attr_groups; + else + pmu->attr_groups = mcb_pmu_attr_groups; break; case PMU_TYPE_MC: if (!(xgene_pmu->mc_active_mask & pmu->inf->enable_mask)) goto dev_err; - pmu->attr_groups = mc_pmu_attr_groups; + if (xgene_pmu->version == PCP_PMU_V3) + pmu->attr_groups = mc_pmu_v3_attr_groups; + else + pmu->attr_groups = mc_pmu_attr_groups; break; default: return -EINVAL; @@ -823,18 +1213,27 @@ dev_err: static void _xgene_pmu_isr(int irq, struct xgene_pmu_dev *pmu_dev) { struct xgene_pmu *xgene_pmu = pmu_dev->parent; + void __iomem *csr = pmu_dev->inf->csr; u32 pmovsr; int idx; - pmovsr = readl(pmu_dev->inf->csr + PMU_PMOVSR) & PMU_OVERFLOW_MASK; + xgene_pmu->ops->stop_counters(pmu_dev); + + if (xgene_pmu->version == PCP_PMU_V3) + pmovsr = readl(csr + PMU_PMOVSSET) & PMU_OVERFLOW_MASK; + else + pmovsr = readl(csr + PMU_PMOVSR) & PMU_OVERFLOW_MASK; + if (!pmovsr) - return; + goto out; /* Clear interrupt flag */ if (xgene_pmu->version == PCP_PMU_V1) - writel(0x0, pmu_dev->inf->csr + PMU_PMOVSR); + writel(0x0, csr + PMU_PMOVSR); + else if (xgene_pmu->version == PCP_PMU_V2) + writel(pmovsr, csr + PMU_PMOVSR); else - writel(pmovsr, pmu_dev->inf->csr + PMU_PMOVSR); + writel(pmovsr, csr + PMU_PMOVSCLR); for (idx = 0; idx < PMU_MAX_COUNTERS; idx++) { struct perf_event *event = pmu_dev->pmu_counter_event[idx]; @@ -846,10 +1245,14 @@ static void _xgene_pmu_isr(int irq, struct xgene_pmu_dev *pmu_dev) xgene_perf_event_update(event); xgene_perf_event_set_period(event); } + +out: + xgene_pmu->ops->start_counters(pmu_dev); } static irqreturn_t xgene_pmu_isr(int irq, void *dev_id) { + u32 intr_mcu, intr_mcb, intr_l3c, intr_iob; struct xgene_pmu_dev_ctx *ctx; struct xgene_pmu *xgene_pmu = dev_id; unsigned long flags; @@ -859,22 +1262,33 @@ static irqreturn_t xgene_pmu_isr(int irq, void *dev_id) /* Get Interrupt PMU source */ val = readl(xgene_pmu->pcppmu_csr + PCPPMU_INTSTATUS_REG); - if (val & PCPPMU_INT_MCU) { + if (xgene_pmu->version == PCP_PMU_V3) { + intr_mcu = PCPPMU_V3_INT_MCU; + intr_mcb = PCPPMU_V3_INT_MCB; + intr_l3c = PCPPMU_V3_INT_L3C; + intr_iob = PCPPMU_V3_INT_IOB; + } else { + intr_mcu = PCPPMU_INT_MCU; + intr_mcb = PCPPMU_INT_MCB; + intr_l3c = PCPPMU_INT_L3C; + intr_iob = PCPPMU_INT_IOB; + } + if (val & intr_mcu) { list_for_each_entry(ctx, &xgene_pmu->mcpmus, next) { _xgene_pmu_isr(irq, ctx->pmu_dev); } } - if (val & PCPPMU_INT_MCB) { + if (val & intr_mcb) { list_for_each_entry(ctx, &xgene_pmu->mcbpmus, next) { _xgene_pmu_isr(irq, ctx->pmu_dev); } } - if (val & PCPPMU_INT_L3C) { + if (val & intr_l3c) { list_for_each_entry(ctx, &xgene_pmu->l3cpmus, next) { _xgene_pmu_isr(irq, ctx->pmu_dev); } } - if (val & PCPPMU_INT_IOB) { + if (val & intr_iob) { list_for_each_entry(ctx, &xgene_pmu->iobpmus, next) { _xgene_pmu_isr(irq, ctx->pmu_dev); } @@ -885,8 +1299,8 @@ static irqreturn_t xgene_pmu_isr(int irq, void *dev_id) return IRQ_HANDLED; } -static int acpi_pmu_probe_active_mcb_mcu(struct xgene_pmu *xgene_pmu, - struct platform_device *pdev) +static int acpi_pmu_probe_active_mcb_mcu_l3c(struct xgene_pmu *xgene_pmu, + struct platform_device *pdev) { void __iomem *csw_csr, *mcba_csr, *mcbb_csr; struct resource *res; @@ -913,6 +1327,8 @@ static int acpi_pmu_probe_active_mcb_mcu(struct xgene_pmu *xgene_pmu, return PTR_ERR(mcbb_csr); } + xgene_pmu->l3c_active_mask = 0x1; + reg = readl(csw_csr + CSW_CSWCR); if (reg & CSW_CSWCR_DUALMCB_MASK) { /* Dual MCB active */ @@ -933,8 +1349,56 @@ static int acpi_pmu_probe_active_mcb_mcu(struct xgene_pmu *xgene_pmu, return 0; } -static int fdt_pmu_probe_active_mcb_mcu(struct xgene_pmu *xgene_pmu, - struct platform_device *pdev) +static int acpi_pmu_v3_probe_active_mcb_mcu_l3c(struct xgene_pmu *xgene_pmu, + struct platform_device *pdev) +{ + void __iomem *csw_csr; + struct resource *res; + unsigned int reg; + u32 mcb0routing; + u32 mcb1routing; + + res = platform_get_resource(pdev, IORESOURCE_MEM, 1); + csw_csr = devm_ioremap_resource(&pdev->dev, res); + if (IS_ERR(csw_csr)) { + dev_err(&pdev->dev, "ioremap failed for CSW CSR resource\n"); + return PTR_ERR(csw_csr); + } + + reg = readl(csw_csr + CSW_CSWCR); + mcb0routing = CSW_CSWCR_MCB0_ROUTING(reg); + mcb1routing = CSW_CSWCR_MCB1_ROUTING(reg); + if (reg & CSW_CSWCR_DUALMCB_MASK) { + /* Dual MCB active */ + xgene_pmu->mcb_active_mask = 0x3; + /* Probe all active L3C(s), maximum is 8 */ + xgene_pmu->l3c_active_mask = 0xFF; + /* Probe all active MC(s), maximum is 8 */ + if ((mcb0routing == 0x2) && (mcb1routing == 0x2)) + xgene_pmu->mc_active_mask = 0xFF; + else if ((mcb0routing == 0x1) && (mcb1routing == 0x1)) + xgene_pmu->mc_active_mask = 0x33; + else + xgene_pmu->mc_active_mask = 0x11; + } else { + /* Single MCB active */ + xgene_pmu->mcb_active_mask = 0x1; + /* Probe all active L3C(s), maximum is 4 */ + xgene_pmu->l3c_active_mask = 0x0F; + /* Probe all active MC(s), maximum is 4 */ + if (mcb0routing == 0x2) + xgene_pmu->mc_active_mask = 0x0F; + else if (mcb0routing == 0x1) + xgene_pmu->mc_active_mask = 0x03; + else + xgene_pmu->mc_active_mask = 0x01; + } + + return 0; +} + +static int fdt_pmu_probe_active_mcb_mcu_l3c(struct xgene_pmu *xgene_pmu, + struct platform_device *pdev) { struct regmap *csw_map, *mcba_map, *mcbb_map; struct device_node *np = pdev->dev.of_node; @@ -958,6 +1422,7 @@ static int fdt_pmu_probe_active_mcb_mcu(struct xgene_pmu *xgene_pmu, return PTR_ERR(mcbb_map); } + xgene_pmu->l3c_active_mask = 0x1; if (regmap_read(csw_map, CSW_CSWCR, ®)) return -EINVAL; @@ -982,12 +1447,18 @@ static int fdt_pmu_probe_active_mcb_mcu(struct xgene_pmu *xgene_pmu, return 0; } -static int xgene_pmu_probe_active_mcb_mcu(struct xgene_pmu *xgene_pmu, - struct platform_device *pdev) +static int xgene_pmu_probe_active_mcb_mcu_l3c(struct xgene_pmu *xgene_pmu, + struct platform_device *pdev) { - if (has_acpi_companion(&pdev->dev)) - return acpi_pmu_probe_active_mcb_mcu(xgene_pmu, pdev); - return fdt_pmu_probe_active_mcb_mcu(xgene_pmu, pdev); + if (has_acpi_companion(&pdev->dev)) { + if (xgene_pmu->version == PCP_PMU_V3) + return acpi_pmu_v3_probe_active_mcb_mcu_l3c(xgene_pmu, + pdev); + else + return acpi_pmu_probe_active_mcb_mcu_l3c(xgene_pmu, + pdev); + } + return fdt_pmu_probe_active_mcb_mcu_l3c(xgene_pmu, pdev); } static char *xgene_pmu_dev_name(struct device *dev, u32 type, int id) @@ -997,6 +1468,8 @@ static char *xgene_pmu_dev_name(struct device *dev, u32 type, int id) return devm_kasprintf(dev, GFP_KERNEL, "l3c%d", id); case PMU_TYPE_IOB: return devm_kasprintf(dev, GFP_KERNEL, "iob%d", id); + case PMU_TYPE_IOB_SLOW: + return devm_kasprintf(dev, GFP_KERNEL, "iob-slow%d", id); case PMU_TYPE_MCB: return devm_kasprintf(dev, GFP_KERNEL, "mcb%d", id); case PMU_TYPE_MC: @@ -1080,6 +1553,11 @@ static const struct acpi_device_id xgene_pmu_acpi_type_match[] = { {"APMC0D5E", PMU_TYPE_IOB}, {"APMC0D5F", PMU_TYPE_MCB}, {"APMC0D60", PMU_TYPE_MC}, + {"APMC0D84", PMU_TYPE_L3C}, + {"APMC0D85", PMU_TYPE_IOB}, + {"APMC0D86", PMU_TYPE_IOB_SLOW}, + {"APMC0D87", PMU_TYPE_MCB}, + {"APMC0D88", PMU_TYPE_MC}, {}, }; @@ -1134,6 +1612,9 @@ static acpi_status acpi_pmu_dev_add(acpi_handle handle, u32 level, case PMU_TYPE_IOB: list_add(&ctx->next, &xgene_pmu->iobpmus); break; + case PMU_TYPE_IOB_SLOW: + list_add(&ctx->next, &xgene_pmu->iobpmus); + break; case PMU_TYPE_MCB: list_add(&ctx->next, &xgene_pmu->mcbpmus); break; @@ -1255,6 +1736,9 @@ static int fdt_pmu_probe_pmu_dev(struct xgene_pmu *xgene_pmu, case PMU_TYPE_IOB: list_add(&ctx->next, &xgene_pmu->iobpmus); break; + case PMU_TYPE_IOB_SLOW: + list_add(&ctx->next, &xgene_pmu->iobpmus); + break; case PMU_TYPE_MCB: list_add(&ctx->next, &xgene_pmu->mcbpmus); break; @@ -1300,6 +1784,23 @@ static const struct xgene_pmu_ops xgene_pmu_ops = { .stop_counters = xgene_pmu_stop_counters, }; +static const struct xgene_pmu_ops xgene_pmu_v3_ops = { + .mask_int = xgene_pmu_v3_mask_int, + .unmask_int = xgene_pmu_v3_unmask_int, + .read_counter = xgene_pmu_read_counter64, + .write_counter = xgene_pmu_write_counter64, + .write_evttype = xgene_pmu_write_evttype, + .write_agentmsk = xgene_pmu_v3_write_agentmsk, + .write_agent1msk = xgene_pmu_v3_write_agent1msk, + .enable_counter = xgene_pmu_enable_counter, + .disable_counter = xgene_pmu_disable_counter, + .enable_counter_int = xgene_pmu_enable_counter_int, + .disable_counter_int = xgene_pmu_disable_counter_int, + .reset_counters = xgene_pmu_reset_counters, + .start_counters = xgene_pmu_start_counters, + .stop_counters = xgene_pmu_stop_counters, +}; + static const struct of_device_id xgene_pmu_of_match[] = { { .compatible = "apm,xgene-pmu", .data = &xgene_pmu_data }, { .compatible = "apm,xgene-pmu-v2", .data = &xgene_pmu_v2_data }, @@ -1310,6 +1811,7 @@ MODULE_DEVICE_TABLE(of, xgene_pmu_of_match); static const struct acpi_device_id xgene_pmu_acpi_match[] = { {"APMC0D5B", PCP_PMU_V1}, {"APMC0D5C", PCP_PMU_V2}, + {"APMC0D83", PCP_PMU_V3}, {}, }; MODULE_DEVICE_TABLE(acpi, xgene_pmu_acpi_match); @@ -1349,7 +1851,10 @@ static int xgene_pmu_probe(struct platform_device *pdev) if (version < 0) return -ENODEV; - xgene_pmu->ops = &xgene_pmu_ops; + if (version == PCP_PMU_V3) + xgene_pmu->ops = &xgene_pmu_v3_ops; + else + xgene_pmu->ops = &xgene_pmu_ops; INIT_LIST_HEAD(&xgene_pmu->l3cpmus); INIT_LIST_HEAD(&xgene_pmu->iobpmus); @@ -1384,7 +1889,7 @@ static int xgene_pmu_probe(struct platform_device *pdev) raw_spin_lock_init(&xgene_pmu->lock); /* Check for active MCBs and MCUs */ - rc = xgene_pmu_probe_active_mcb_mcu(xgene_pmu, pdev); + rc = xgene_pmu_probe_active_mcb_mcu_l3c(xgene_pmu, pdev); if (rc) { dev_warn(&pdev->dev, "Unknown MCB/MCU active status\n"); xgene_pmu->mcb_active_mask = 0x1; -- cgit v1.2.3-59-g8ed1b From 77b246b32b2c4bc21e352dcb8b53a8aba81ee5a4 Mon Sep 17 00:00:00 2001 From: Tyler Baicar Date: Wed, 21 Jun 2017 12:17:15 -0600 Subject: acpi: apei: check for pending errors when probing GHES entries Check for pending errors when probing GHES entries. It is possible that a fatal error is already pending at this point, so we should handle it as soon as the driver is probed. This also avoids a potential issue if there was an interrupt that was already cleared for an error since the GHES driver wasn't present. Signed-off-by: Tyler Baicar Signed-off-by: Will Deacon --- drivers/acpi/apei/ghes.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/acpi/apei/ghes.c b/drivers/acpi/apei/ghes.c index bc717bdf50f1..bb830444df28 100644 --- a/drivers/acpi/apei/ghes.c +++ b/drivers/acpi/apei/ghes.c @@ -1179,6 +1179,9 @@ static int ghes_probe(struct platform_device *ghes_dev) } platform_set_drvdata(ghes_dev, ghes); + /* Handle any pending errors right away */ + ghes_proc(ghes); + return 0; err_edac_unreg: ghes_edac_unregister(ghes); -- cgit v1.2.3-59-g8ed1b From 33f082614c3443d937f50fe936f284f62bbb4a1b Mon Sep 17 00:00:00 2001 From: Dave Martin Date: Tue, 20 Jun 2017 18:23:39 +0100 Subject: arm64: signal: Allow expansion of the signal frame This patch defines an extra_context signal frame record that can be used to describe an expanded signal frame, and modifies the context block allocator and signal frame setup and parsing code to create, populate, parse and decode this block as necessary. To avoid abuse by userspace, parse_user_sigframe() attempts to ensure that: * no more than one extra_context is accepted; * the extra context data is a sensible size, and properly placed and aligned. The extra_context data is required to start at the first 16-byte aligned address immediately after the dummy terminator record following extra_context in rt_sigframe.__reserved[] (as ensured during signal delivery). This serves as a sanity-check that the signal frame has not been moved or copied without taking the extra data into account. Signed-off-by: Dave Martin Reviewed-by: Catalin Marinas [will: add __force annotation when casting extra_datap to __user pointer] Signed-off-by: Will Deacon --- arch/arm64/include/uapi/asm/sigcontext.h | 38 +++++- arch/arm64/kernel/signal.c | 195 ++++++++++++++++++++++++++++--- 2 files changed, 214 insertions(+), 19 deletions(-) diff --git a/arch/arm64/include/uapi/asm/sigcontext.h b/arch/arm64/include/uapi/asm/sigcontext.h index 1328a2c14371..f0a76b9fcd6e 100644 --- a/arch/arm64/include/uapi/asm/sigcontext.h +++ b/arch/arm64/include/uapi/asm/sigcontext.h @@ -41,9 +41,10 @@ struct sigcontext { * * 0x210 fpsimd_context * 0x10 esr_context + * 0x20 extra_context (optional) * 0x10 terminator (null _aarch64_ctx) * - * 0xdd0 (reserved for future allocation) + * 0xdb0 (reserved for future allocation) * * New records that can exceed this space need to be opt-in for userspace, so * that an expanded signal frame is not generated unexpectedly. The mechanism @@ -80,4 +81,39 @@ struct esr_context { __u64 esr; }; +/* + * extra_context: describes extra space in the signal frame for + * additional structures that don't fit in sigcontext.__reserved[]. + * + * Note: + * + * 1) fpsimd_context, esr_context and extra_context must be placed in + * sigcontext.__reserved[] if present. They cannot be placed in the + * extra space. Any other record can be placed either in the extra + * space or in sigcontext.__reserved[], unless otherwise specified in + * this file. + * + * 2) There must not be more than one extra_context. + * + * 3) If extra_context is present, it must be followed immediately in + * sigcontext.__reserved[] by the terminating null _aarch64_ctx. + * + * 4) The extra space to which datap points must start at the first + * 16-byte aligned address immediately after the terminating null + * _aarch64_ctx that follows the extra_context structure in + * __reserved[]. The extra space may overrun the end of __reserved[], + * as indicated by a sufficiently large value for the size field. + * + * 5) The extra space must itself be terminated with a null + * _aarch64_ctx. + */ +#define EXTRA_MAGIC 0x45585401 + +struct extra_context { + struct _aarch64_ctx head; + __u64 datap; /* 16-byte aligned pointer to extra space cast to __u64 */ + __u32 size; /* size in bytes of the extra space */ + __u32 __reserved[3]; +}; + #endif /* _UAPI__ASM_SIGCONTEXT_H */ diff --git a/arch/arm64/kernel/signal.c b/arch/arm64/kernel/signal.c index fa787e6ac7c2..089c3747995d 100644 --- a/arch/arm64/kernel/signal.c +++ b/arch/arm64/kernel/signal.c @@ -25,6 +25,7 @@ #include #include #include +#include #include #include #include @@ -60,18 +61,27 @@ struct rt_sigframe_user_layout { unsigned long fpsimd_offset; unsigned long esr_offset; + unsigned long extra_offset; unsigned long end_offset; }; +#define BASE_SIGFRAME_SIZE round_up(sizeof(struct rt_sigframe), 16) +#define TERMINATOR_SIZE round_up(sizeof(struct _aarch64_ctx), 16) +#define EXTRA_CONTEXT_SIZE round_up(sizeof(struct extra_context), 16) + static void init_user_layout(struct rt_sigframe_user_layout *user) { + const size_t reserved_size = + sizeof(user->sigframe->uc.uc_mcontext.__reserved); + memset(user, 0, sizeof(*user)); user->size = offsetof(struct rt_sigframe, uc.uc_mcontext.__reserved); - user->limit = user->size + - sizeof(user->sigframe->uc.uc_mcontext.__reserved) - - round_up(sizeof(struct _aarch64_ctx), 16); - /* ^ reserve space for terminator */ + user->limit = user->size + reserved_size; + + user->limit -= TERMINATOR_SIZE; + user->limit -= EXTRA_CONTEXT_SIZE; + /* Reserve space for extension and terminator ^ */ } static size_t sigframe_size(struct rt_sigframe_user_layout const *user) @@ -79,6 +89,52 @@ static size_t sigframe_size(struct rt_sigframe_user_layout const *user) return round_up(max(user->size, sizeof(struct rt_sigframe)), 16); } +/* + * Sanity limit on the approximate maximum size of signal frame we'll + * try to generate. Stack alignment padding and the frame record are + * not taken into account. This limit is not a guarantee and is + * NOT ABI. + */ +#define SIGFRAME_MAXSZ SZ_64K + +static int __sigframe_alloc(struct rt_sigframe_user_layout *user, + unsigned long *offset, size_t size, bool extend) +{ + size_t padded_size = round_up(size, 16); + + if (padded_size > user->limit - user->size && + !user->extra_offset && + extend) { + int ret; + + user->limit += EXTRA_CONTEXT_SIZE; + ret = __sigframe_alloc(user, &user->extra_offset, + sizeof(struct extra_context), false); + if (ret) { + user->limit -= EXTRA_CONTEXT_SIZE; + return ret; + } + + /* Reserve space for the __reserved[] terminator */ + user->size += TERMINATOR_SIZE; + + /* + * Allow expansion up to SIGFRAME_MAXSZ, ensuring space for + * the terminator: + */ + user->limit = SIGFRAME_MAXSZ - TERMINATOR_SIZE; + } + + /* Still not enough space? Bad luck! */ + if (padded_size > user->limit - user->size) + return -ENOMEM; + + *offset = user->size; + user->size += padded_size; + + return 0; +} + /* * Allocate space for an optional record of bytes in the user * signal frame. The offset from the signal frame base address to the @@ -87,11 +143,24 @@ static size_t sigframe_size(struct rt_sigframe_user_layout const *user) static int sigframe_alloc(struct rt_sigframe_user_layout *user, unsigned long *offset, size_t size) { - size_t padded_size = round_up(size, 16); + return __sigframe_alloc(user, offset, size, true); +} - *offset = user->size; - user->size += padded_size; +/* Allocate the null terminator record and prevent further allocations */ +static int sigframe_alloc_end(struct rt_sigframe_user_layout *user) +{ + int ret; + + /* Un-reserve the space reserved for the terminator: */ + user->limit += TERMINATOR_SIZE; + ret = sigframe_alloc(user, &user->end_offset, + sizeof(struct _aarch64_ctx)); + if (ret) + return ret; + + /* Prevent further allocation: */ + user->limit = user->size; return 0; } @@ -162,6 +231,8 @@ static int parse_user_sigframe(struct user_ctxs *user, char __user *base = (char __user *)&sc->__reserved; size_t offset = 0; size_t limit = sizeof(sc->__reserved); + bool have_extra_context = false; + char const __user *const sfp = (char const __user *)sf; user->fpsimd = NULL; @@ -171,6 +242,12 @@ static int parse_user_sigframe(struct user_ctxs *user, while (1) { int err = 0; u32 magic, size; + char const __user *userp; + struct extra_context const __user *extra; + u64 extra_datap; + u32 extra_size; + struct _aarch64_ctx const __user *end; + u32 end_magic, end_size; if (limit - offset < sizeof(*head)) goto invalid; @@ -208,6 +285,64 @@ static int parse_user_sigframe(struct user_ctxs *user, /* ignore */ break; + case EXTRA_MAGIC: + if (have_extra_context) + goto invalid; + + if (size < sizeof(*extra)) + goto invalid; + + userp = (char const __user *)head; + + extra = (struct extra_context const __user *)userp; + userp += size; + + __get_user_error(extra_datap, &extra->datap, err); + __get_user_error(extra_size, &extra->size, err); + if (err) + return err; + + /* Check for the dummy terminator in __reserved[]: */ + + if (limit - offset - size < TERMINATOR_SIZE) + goto invalid; + + end = (struct _aarch64_ctx const __user *)userp; + userp += TERMINATOR_SIZE; + + __get_user_error(end_magic, &end->magic, err); + __get_user_error(end_size, &end->size, err); + if (err) + return err; + + if (end_magic || end_size) + goto invalid; + + /* Prevent looping/repeated parsing of extra_context */ + have_extra_context = true; + + base = (__force void __user *)extra_datap; + if (!IS_ALIGNED((unsigned long)base, 16)) + goto invalid; + + if (!IS_ALIGNED(extra_size, 16)) + goto invalid; + + if (base != userp) + goto invalid; + + /* Reject "unreasonably large" frames: */ + if (extra_size > sfp + SIGFRAME_MAXSZ - userp) + goto invalid; + + /* + * Ignore trailing terminator in __reserved[] + * and start parsing extra data: + */ + offset = 0; + limit = extra_size; + continue; + default: goto invalid; } @@ -318,17 +453,7 @@ static int setup_sigframe_layout(struct rt_sigframe_user_layout *user) return err; } - /* - * Allocate space for the terminator record. - * HACK: here we undo the reservation of space for the end record. - * This bodge should be replaced with a cleaner approach later on. - */ - user->limit = offsetof(struct rt_sigframe, uc.uc_mcontext.__reserved) + - sizeof(user->sigframe->uc.uc_mcontext.__reserved); - - err = sigframe_alloc(user, &user->end_offset, - sizeof(struct _aarch64_ctx)); - return err; + return sigframe_alloc_end(user); } @@ -369,6 +494,40 @@ static int setup_sigframe(struct rt_sigframe_user_layout *user, __put_user_error(current->thread.fault_code, &esr_ctx->esr, err); } + if (err == 0 && user->extra_offset) { + char __user *sfp = (char __user *)user->sigframe; + char __user *userp = + apply_user_offset(user, user->extra_offset); + + struct extra_context __user *extra; + struct _aarch64_ctx __user *end; + u64 extra_datap; + u32 extra_size; + + extra = (struct extra_context __user *)userp; + userp += EXTRA_CONTEXT_SIZE; + + end = (struct _aarch64_ctx __user *)userp; + userp += TERMINATOR_SIZE; + + /* + * extra_datap is just written to the signal frame. + * The value gets cast back to a void __user * + * during sigreturn. + */ + extra_datap = (__force u64)userp; + extra_size = sfp + round_up(user->size, 16) - userp; + + __put_user_error(EXTRA_MAGIC, &extra->head.magic, err); + __put_user_error(EXTRA_CONTEXT_SIZE, &extra->head.size, err); + __put_user_error(extra_datap, &extra->datap, err); + __put_user_error(extra_size, &extra->size, err); + + /* Add the terminator */ + __put_user_error(0, &end->magic, err); + __put_user_error(0, &end->size, err); + } + /* set the "end" magic */ if (err == 0) { struct _aarch64_ctx __user *end = -- cgit v1.2.3-59-g8ed1b From 8486e54d30e170c969090f80bbdfa7142d33ed6a Mon Sep 17 00:00:00 2001 From: Mark Rutland Date: Fri, 23 Jun 2017 18:02:06 +0100 Subject: arm64: ftrace: fix !CONFIG_ARM64_MODULE_PLTS kernels When a kernel is built without CONFIG_ARM64_MODULE_PLTS, we don't generate the expected branch instruction in ftrace_make_nop(). This means we pass zero (rather than a valid branch) to ftrace_modify_code() as the expected instruction to validate. This causes us to return -EINVAL to the core ftrace code for a valid case, resulting in a splat at boot time. This was an unintended effect of commit: 687644209a6e9557 ("arm64: ftrace: fix building without CONFIG_MODULES") ... which incorrectly moved the generation of the branch instruction into the ifdef for CONFIG_ARM64_MODULE_PLTS. This patch fixes the issue by moving the ifdef inside of the relevant if-else case, and always checking that the branch is in range, regardless of CONFIG_ARM64_MODULE_PLTS. This ensures that we generate the expected branch instruction, and also improves our sanity checks. For consistency, both ftrace_make_nop() and ftrace_make_call() are updated with this pattern. Fixes: 687644209a6e9557 ("arm64: ftrace: fix building without CONFIG_MODULES") Signed-off-by: Mark Rutland Reported-by: Marc Zyngier Reviewed-by: Ard Biesheuvel Cc: Arnd Bergmann Cc: Catalin Marinas Cc: Will Deacon Signed-off-by: Will Deacon --- arch/arm64/kernel/ftrace.c | 14 ++++++++------ 1 file changed, 8 insertions(+), 6 deletions(-) diff --git a/arch/arm64/kernel/ftrace.c b/arch/arm64/kernel/ftrace.c index 401aa27808a4..c13b1fca0e5b 100644 --- a/arch/arm64/kernel/ftrace.c +++ b/arch/arm64/kernel/ftrace.c @@ -72,11 +72,10 @@ int ftrace_make_call(struct dyn_ftrace *rec, unsigned long addr) { unsigned long pc = rec->ip; u32 old, new; - -#ifdef CONFIG_ARM64_MODULE_PLTS long offset = (long)pc - (long)addr; if (offset < -SZ_128M || offset >= SZ_128M) { +#ifdef CONFIG_ARM64_MODULE_PLTS unsigned long *trampoline; struct module *mod; @@ -121,8 +120,10 @@ int ftrace_make_call(struct dyn_ftrace *rec, unsigned long addr) smp_wmb(); } addr = (unsigned long)&trampoline[1]; - } +#else /* CONFIG_ARM64_MODULE_PLTS */ + return -EINVAL; #endif /* CONFIG_ARM64_MODULE_PLTS */ + } old = aarch64_insn_gen_nop(); new = aarch64_insn_gen_branch_imm(pc, addr, AARCH64_INSN_BRANCH_LINK); @@ -139,11 +140,10 @@ int ftrace_make_nop(struct module *mod, struct dyn_ftrace *rec, unsigned long pc = rec->ip; bool validate = true; u32 old = 0, new; - -#ifdef CONFIG_ARM64_MODULE_PLTS long offset = (long)pc - (long)addr; if (offset < -SZ_128M || offset >= SZ_128M) { +#ifdef CONFIG_ARM64_MODULE_PLTS u32 replaced; /* @@ -176,11 +176,13 @@ int ftrace_make_nop(struct module *mod, struct dyn_ftrace *rec, return -EINVAL; validate = false; +#else /* CONFIG_ARM64_MODULE_PLTS */ + return -EINVAL; +#endif /* CONFIG_ARM64_MODULE_PLTS */ } else { old = aarch64_insn_gen_branch_imm(pc, addr, AARCH64_INSN_BRANCH_LINK); } -#endif /* CONFIG_ARM64_MODULE_PLTS */ new = aarch64_insn_gen_nop(); -- cgit v1.2.3-59-g8ed1b From bcde519e8c325f3cc1fcf443eb6466e6bb3a3aca Mon Sep 17 00:00:00 2001 From: Luc Van Oostenryck Date: Sat, 24 Jun 2017 17:42:11 +0200 Subject: arm64: pass endianness info to sparse ARM64 depends on the macro __AARCH64EB__ being defined or not to correctly select or define endian-specific macros, structures or pieces of code. This macro is predefined by the compiler but sparse knows nothing about it and thus may pre-process files differently from what gcc would. Fix this by passing '-D__AARCH64EL__' or '-D__AARCH64EB__' to sparse depending of the endianness of the kernel, like defined by GCC. Note: In most case it won't change anything since most arm64 use little-endian (but an allyesconfig would use big-endian!). CC: Catalin Marinas CC: Will Deacon CC: linux-arm-kernel@lists.infradead.org Signed-off-by: Luc Van Oostenryck Signed-off-by: Will Deacon --- arch/arm64/Makefile | 2 ++ 1 file changed, 2 insertions(+) diff --git a/arch/arm64/Makefile b/arch/arm64/Makefile index fdb0133142ff..9b41f1e3b1a0 100644 --- a/arch/arm64/Makefile +++ b/arch/arm64/Makefile @@ -52,11 +52,13 @@ KBUILD_AFLAGS += $(lseinstr) $(brokengasinst) ifeq ($(CONFIG_CPU_BIG_ENDIAN), y) KBUILD_CPPFLAGS += -mbig-endian +CHECKFLAGS += -D__AARCH64EB__ AS += -EB LD += -EB UTS_MACHINE := aarch64_be else KBUILD_CPPFLAGS += -mlittle-endian +CHECKFLAGS += -D__AARCH64EL__ AS += -EL LD += -EL UTS_MACHINE := aarch64 -- cgit v1.2.3-59-g8ed1b From 0607512d0a8d7fac86667466b884095e04b10a59 Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Tue, 27 Jun 2017 17:35:41 +0200 Subject: ras: mark stub functions as 'inline' With CONFIG_RAS disabled, we get two harmless warnings about unused functions: include/linux/ras.h:37:13: error: 'log_arm_hw_error' defined but not used [-Werror=unused-function] static void log_arm_hw_error(struct cper_sec_proc_arm *err) { return; } include/linux/ras.h:33:13: error: 'log_non_standard_event' defined but not used [-Werror=unused-function] static void log_non_standard_event(const guid_t *sec_type, Clearly these are meant to be 'inline', like the other stubs in the same header. Fixes: 297b64c74385 ("ras: acpi / apei: generate trace event for unrecognized CPER section") Fixes: e9279e83ad1f ("trace, ras: add ARM processor error trace event") Acked-by: Borislav Petkov Signed-off-by: Arnd Bergmann Signed-off-by: Will Deacon --- include/linux/ras.h | 12 +++++++----- 1 file changed, 7 insertions(+), 5 deletions(-) diff --git a/include/linux/ras.h b/include/linux/ras.h index 7d61863ff265..be5338a35d57 100644 --- a/include/linux/ras.h +++ b/include/linux/ras.h @@ -30,11 +30,13 @@ void log_non_standard_event(const guid_t *sec_type, const u8 sev, const u8 *err, const u32 len); void log_arm_hw_error(struct cper_sec_proc_arm *err); #else -static void log_non_standard_event(const guid_t *sec_type, - const guid_t *fru_id, const char *fru_text, - const u8 sev, const u8 *err, - const u32 len) { return; } -static void log_arm_hw_error(struct cper_sec_proc_arm *err) { return; } +static inline void +log_non_standard_event(const guid_t *sec_type, + const guid_t *fru_id, const char *fru_text, + const u8 sev, const u8 *err, const u32 len) +{ return; } +static inline void +log_arm_hw_error(struct cper_sec_proc_arm *err) { return; } #endif #endif /* __RAS_H__ */ -- cgit v1.2.3-59-g8ed1b From a5018b0e6f036a598e55371e9135e287dc3b25e5 Mon Sep 17 00:00:00 2001 From: Luc Van Oostenryck Date: Wed, 28 Jun 2017 16:55:52 +0200 Subject: arm64: fix endianness annotation for debug-monitors.c Here we're reading thumb or ARM instructions, which are always stored in memory in little-endian order. These values are thus correctly converted to native order but the intermediate value should be annotated as for little-endian values. Fix this by declaring the intermediate var as __le32 or __le16. Signed-off-by: Luc Van Oostenryck Signed-off-by: Will Deacon --- arch/arm64/kernel/debug-monitors.c | 14 ++++++++------ 1 file changed, 8 insertions(+), 6 deletions(-) diff --git a/arch/arm64/kernel/debug-monitors.c b/arch/arm64/kernel/debug-monitors.c index d618e25c3de1..c7ef99904934 100644 --- a/arch/arm64/kernel/debug-monitors.c +++ b/arch/arm64/kernel/debug-monitors.c @@ -341,20 +341,22 @@ int aarch32_break_handler(struct pt_regs *regs) if (compat_thumb_mode(regs)) { /* get 16-bit Thumb instruction */ - get_user(thumb_instr, (u16 __user *)pc); - thumb_instr = le16_to_cpu(thumb_instr); + __le16 instr; + get_user(instr, (__le16 __user *)pc); + thumb_instr = le16_to_cpu(instr); if (thumb_instr == AARCH32_BREAK_THUMB2_LO) { /* get second half of 32-bit Thumb-2 instruction */ - get_user(thumb_instr, (u16 __user *)(pc + 2)); - thumb_instr = le16_to_cpu(thumb_instr); + get_user(instr, (__le16 __user *)(pc + 2)); + thumb_instr = le16_to_cpu(instr); bp = thumb_instr == AARCH32_BREAK_THUMB2_HI; } else { bp = thumb_instr == AARCH32_BREAK_THUMB; } } else { /* 32-bit ARM instruction */ - get_user(arm_instr, (u32 __user *)pc); - arm_instr = le32_to_cpu(arm_instr); + __le32 instr; + get_user(instr, (__le32 __user *)pc); + arm_instr = le32_to_cpu(instr); bp = (arm_instr & ~0xf0000000) == AARCH32_BREAK_ARM; } -- cgit v1.2.3-59-g8ed1b From 6cf5d4af83e04f4cfae91bfdefd9d4d6949c09b2 Mon Sep 17 00:00:00 2001 From: Luc Van Oostenryck Date: Wed, 28 Jun 2017 16:55:55 +0200 Subject: arm64: fix endianness annotation in call_undef_hook() Here we're reading thumb or ARM instructions, which are always stored in memory in little-endian order. These values are thus correctly converted to native order but the intermediate value should be annotated as for little-endian values. Fix this by declaring the intermediate var as __le32 or __le16. Signed-off-by: Luc Van Oostenryck Signed-off-by: Will Deacon --- arch/arm64/kernel/traps.c | 14 ++++++++------ 1 file changed, 8 insertions(+), 6 deletions(-) diff --git a/arch/arm64/kernel/traps.c b/arch/arm64/kernel/traps.c index 3ebfb1d00b53..c7c7088097be 100644 --- a/arch/arm64/kernel/traps.c +++ b/arch/arm64/kernel/traps.c @@ -344,22 +344,24 @@ static int call_undef_hook(struct pt_regs *regs) if (compat_thumb_mode(regs)) { /* 16-bit Thumb instruction */ - if (get_user(instr, (u16 __user *)pc)) + __le16 instr_le; + if (get_user(instr_le, (__le16 __user *)pc)) goto exit; - instr = le16_to_cpu(instr); + instr = le16_to_cpu(instr_le); if (aarch32_insn_is_wide(instr)) { u32 instr2; - if (get_user(instr2, (u16 __user *)(pc + 2))) + if (get_user(instr_le, (__le16 __user *)(pc + 2))) goto exit; - instr2 = le16_to_cpu(instr2); + instr2 = le16_to_cpu(instr_le); instr = (instr << 16) | instr2; } } else { /* 32-bit ARM instruction */ - if (get_user(instr, (u32 __user *)pc)) + __le32 instr_le; + if (get_user(instr_le, (__le32 __user *)pc)) goto exit; - instr = le32_to_cpu(instr); + instr = le32_to_cpu(instr_le); } raw_spin_lock_irqsave(&undef_lock, flags); -- cgit v1.2.3-59-g8ed1b From 65de142143206c7ffd98b0fcb062a79b3c6f1934 Mon Sep 17 00:00:00 2001 From: Luc Van Oostenryck Date: Wed, 28 Jun 2017 16:58:09 +0200 Subject: arm64: fix endianness annotation in aarch64_insn_read() The function arch64_insn_read() is used to read an instruction. On AM64 instructions are always stored in little-endian order and thus the function correctly do a little-to-native endian conversion to the value just read. However, the variable used to hold the value before the conversion is not declared for a little-endian value but for a native one. Fix this by using the correct type for the declaration: __le32 Note: This only works because the function reading the value, probe_kernel_read((), takes a void pointer and void pointers are endian-agnostic. Otherwise probe_kernel_read() should also be properly annotated (or worse, need to be specialized). Signed-off-by: Luc Van Oostenryck Signed-off-by: Will Deacon --- arch/arm64/kernel/insn.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/arm64/kernel/insn.c b/arch/arm64/kernel/insn.c index b884a926a632..d4d80b32cb69 100644 --- a/arch/arm64/kernel/insn.c +++ b/arch/arm64/kernel/insn.c @@ -117,7 +117,7 @@ static void __kprobes patch_unmap(int fixmap) int __kprobes aarch64_insn_read(void *addr, u32 *insnp) { int ret; - u32 val; + __le32 val; ret = probe_kernel_read(&val, addr, AARCH64_INSN_SIZE); if (!ret) -- cgit v1.2.3-59-g8ed1b From 57c138357d5922878b3bc5207bd59b8512ee80e6 Mon Sep 17 00:00:00 2001 From: Luc Van Oostenryck Date: Wed, 28 Jun 2017 16:58:11 +0200 Subject: arm64: fix endianness annotation for aarch64_insn_write() aarch64_insn_write() is used to write an instruction. As on ARM64 in-memory instructions are always stored in little-endian order, this function, taking the instruction opcode in native order, correctly convert it to little-endian before sending it to an helper function __aarch64_insn_write() which will do the effective write. This is all good, but the variable and argument holding the converted value are not annotated for a little-endian value but left for native values. Fix this by adjusting the prototype of the helper and directly using the result of cpu_to_le32() without passing by an intermediate variable (which was not a distinct one but the same as the one holding the native value). Signed-off-by: Luc Van Oostenryck Signed-off-by: Will Deacon --- arch/arm64/kernel/insn.c | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/arch/arm64/kernel/insn.c b/arch/arm64/kernel/insn.c index d4d80b32cb69..60a2f03d76bc 100644 --- a/arch/arm64/kernel/insn.c +++ b/arch/arm64/kernel/insn.c @@ -126,7 +126,7 @@ int __kprobes aarch64_insn_read(void *addr, u32 *insnp) return ret; } -static int __kprobes __aarch64_insn_write(void *addr, u32 insn) +static int __kprobes __aarch64_insn_write(void *addr, __le32 insn) { void *waddr = addr; unsigned long flags = 0; @@ -145,8 +145,7 @@ static int __kprobes __aarch64_insn_write(void *addr, u32 insn) int __kprobes aarch64_insn_write(void *addr, u32 insn) { - insn = cpu_to_le32(insn); - return __aarch64_insn_write(addr, insn); + return __aarch64_insn_write(addr, cpu_to_le32(insn)); } static bool __kprobes __aarch64_insn_hotpatch_safe(u32 insn) -- cgit v1.2.3-59-g8ed1b From 02129ae5fea83294b45c8f16c4ff14ae94e6858d Mon Sep 17 00:00:00 2001 From: Luc Van Oostenryck Date: Wed, 28 Jun 2017 16:56:00 +0200 Subject: arm64: fix endianness annotation for reloc_insn_movw() & reloc_insn_imm() Here the functions reloc_insn_movw() & reloc_insn_imm() are used to read, modify and write back ARM instructions, which are always stored in memory in little-endian order. These values are thus correctly converted to/from native order but the pointers used to hold their addresses are declared as for native order values. Fix this by declaring the pointers as __le32* and remove the casts that are now unneeded. Signed-off-by: Luc Van Oostenryck Signed-off-by: Will Deacon --- arch/arm64/kernel/module.c | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/arch/arm64/kernel/module.c b/arch/arm64/kernel/module.c index 8c3a7264fb0f..f469e0435903 100644 --- a/arch/arm64/kernel/module.c +++ b/arch/arm64/kernel/module.c @@ -74,7 +74,7 @@ enum aarch64_reloc_op { RELOC_OP_PAGE, }; -static u64 do_reloc(enum aarch64_reloc_op reloc_op, void *place, u64 val) +static u64 do_reloc(enum aarch64_reloc_op reloc_op, __le32 *place, u64 val) { switch (reloc_op) { case RELOC_OP_ABS: @@ -121,12 +121,12 @@ enum aarch64_insn_movw_imm_type { AARCH64_INSN_IMM_MOVKZ, }; -static int reloc_insn_movw(enum aarch64_reloc_op op, void *place, u64 val, +static int reloc_insn_movw(enum aarch64_reloc_op op, __le32 *place, u64 val, int lsb, enum aarch64_insn_movw_imm_type imm_type) { u64 imm; s64 sval; - u32 insn = le32_to_cpu(*(u32 *)place); + u32 insn = le32_to_cpu(*place); sval = do_reloc(op, place, val); imm = sval >> lsb; @@ -154,7 +154,7 @@ static int reloc_insn_movw(enum aarch64_reloc_op op, void *place, u64 val, /* Update the instruction with the new encoding. */ insn = aarch64_insn_encode_immediate(AARCH64_INSN_IMM_16, insn, imm); - *(u32 *)place = cpu_to_le32(insn); + *place = cpu_to_le32(insn); if (imm > U16_MAX) return -ERANGE; @@ -162,12 +162,12 @@ static int reloc_insn_movw(enum aarch64_reloc_op op, void *place, u64 val, return 0; } -static int reloc_insn_imm(enum aarch64_reloc_op op, void *place, u64 val, +static int reloc_insn_imm(enum aarch64_reloc_op op, __le32 *place, u64 val, int lsb, int len, enum aarch64_insn_imm_type imm_type) { u64 imm, imm_mask; s64 sval; - u32 insn = le32_to_cpu(*(u32 *)place); + u32 insn = le32_to_cpu(*place); /* Calculate the relocation value. */ sval = do_reloc(op, place, val); @@ -179,7 +179,7 @@ static int reloc_insn_imm(enum aarch64_reloc_op op, void *place, u64 val, /* Update the instruction's immediate field. */ insn = aarch64_insn_encode_immediate(imm_type, insn, imm); - *(u32 *)place = cpu_to_le32(insn); + *place = cpu_to_le32(insn); /* * Extract the upper value bits (including the sign bit) and -- cgit v1.2.3-59-g8ed1b From c0d109de4c0ca365a2bd180e2e65501196fa8ef4 Mon Sep 17 00:00:00 2001 From: Luc Van Oostenryck Date: Mon, 26 Jun 2017 15:16:25 +0200 Subject: arm64: use readq() instead of readl() to read 64bit entry_point Here the entrypoint, declared as a 64 bit integer, is read from a pointer to 64bit integer but the read is done via readl_relaxed() which is for 32bit quantities. All the high bits will thus be lost which change the meaning of the test against zero done later. Fix this by using readq_relaxed() instead as it should be for 64bit quantities. Signed-off-by: Luc Van Oostenryck Signed-off-by: Will Deacon --- arch/arm64/kernel/acpi_parking_protocol.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/arm64/kernel/acpi_parking_protocol.c b/arch/arm64/kernel/acpi_parking_protocol.c index 1f5655cd9cc9..f35e80aad378 100644 --- a/arch/arm64/kernel/acpi_parking_protocol.c +++ b/arch/arm64/kernel/acpi_parking_protocol.c @@ -125,7 +125,7 @@ static void acpi_parking_protocol_cpu_postboot(void) struct parking_protocol_mailbox __iomem *mailbox = cpu_entry->mailbox; __le64 entry_point; - entry_point = readl_relaxed(&mailbox->entry_point); + entry_point = readq_relaxed(&mailbox->entry_point); /* * Check if firmware has cleared the entry_point as expected * by the protocol specification. -- cgit v1.2.3-59-g8ed1b From f0cda7e6dc5893b4b4daa3440512fc1226bc983f Mon Sep 17 00:00:00 2001 From: Luc Van Oostenryck Date: Wed, 28 Jun 2017 16:58:07 +0200 Subject: arm64: fix endianness annotation in acpi_parking_protocol.c Here both variables 'cpu_id' and 'entry_point' are read via read[lq]_relaxed(), from a little-endian annotated pointer and then used as a native endian value. This is correct since the read[lq]() family of function internally do a little-to-native endian conversion. But in this case, it is wrong to declare these variable as little-endian since there are native ones. Fix this by changing the declaration of these variables as 'u32' or 'u64' instead of '__le32' / '__le64'. Signed-off-by: Luc Van Oostenryck Signed-off-by: Will Deacon --- arch/arm64/kernel/acpi_parking_protocol.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/arch/arm64/kernel/acpi_parking_protocol.c b/arch/arm64/kernel/acpi_parking_protocol.c index f35e80aad378..98a20e58758b 100644 --- a/arch/arm64/kernel/acpi_parking_protocol.c +++ b/arch/arm64/kernel/acpi_parking_protocol.c @@ -71,7 +71,7 @@ static int acpi_parking_protocol_cpu_boot(unsigned int cpu) { struct cpu_mailbox_entry *cpu_entry = &cpu_mailbox_entries[cpu]; struct parking_protocol_mailbox __iomem *mailbox; - __le32 cpu_id; + u32 cpu_id; /* * Map mailbox memory with attribute device nGnRE (ie ioremap - @@ -123,7 +123,7 @@ static void acpi_parking_protocol_cpu_postboot(void) int cpu = smp_processor_id(); struct cpu_mailbox_entry *cpu_entry = &cpu_mailbox_entries[cpu]; struct parking_protocol_mailbox __iomem *mailbox = cpu_entry->mailbox; - __le64 entry_point; + u64 entry_point; entry_point = readq_relaxed(&mailbox->entry_point); /* -- cgit v1.2.3-59-g8ed1b From 50a4b05609929003ce98987bb901ee10fe21fb20 Mon Sep 17 00:00:00 2001 From: Luc Van Oostenryck Date: Thu, 29 Jun 2017 16:31:40 +0200 Subject: arm64: add missing conversion to __wsum in ip_fast_csum() ARM64 implementation of ip_fast_csum() do most of the work in 128 or 64 bit and call csum_fold() to finalize. csum_fold() itself take a __wsum argument, to insure that this value is always a 32bit native-order value. Fix this by adding the sadly needed '__force' to cast the native 'sum' to the type '__wsum'. Signed-off-by: Luc Van Oostenryck Signed-off-by: Will Deacon --- arch/arm64/include/asm/checksum.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/arm64/include/asm/checksum.h b/arch/arm64/include/asm/checksum.h index 09f65339d66d..0b6f5a7d4027 100644 --- a/arch/arm64/include/asm/checksum.h +++ b/arch/arm64/include/asm/checksum.h @@ -42,7 +42,7 @@ static inline __sum16 ip_fast_csum(const void *iph, unsigned int ihl) } while (--ihl); sum += ((sum >> 32) | (sum << 32)); - return csum_fold(sum >> 32); + return csum_fold((__force u32)(sum >> 32)); } #define ip_fast_csum ip_fast_csum -- cgit v1.2.3-59-g8ed1b From 67831edf8a826750b43d5612792a9c3bc449f227 Mon Sep 17 00:00:00 2001 From: Luc Van Oostenryck Date: Thu, 29 Jun 2017 16:35:29 +0200 Subject: arm64: fix endianness annotation in get_kaslr_seed() In the flattened device tree format, all integer properties are in big-endian order. Here the property "kaslr-seed" is read from the fdt and then correctly converted to native order (via fdt64_to_cpu()) but the pointer used for this is not annotated as being for big-endian. Fix this by declaring the pointer as fdt64_t instead of u64 (fdt64_t being itself typedefed to __be64). Signed-off-by: Luc Van Oostenryck Signed-off-by: Will Deacon --- arch/arm64/kernel/kaslr.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/arm64/kernel/kaslr.c b/arch/arm64/kernel/kaslr.c index d7e90d97f5c4..a9710efb8c01 100644 --- a/arch/arm64/kernel/kaslr.c +++ b/arch/arm64/kernel/kaslr.c @@ -27,7 +27,7 @@ u16 __initdata memstart_offset_seed; static __init u64 get_kaslr_seed(void *fdt) { int node, len; - u64 *prop; + fdt64_t *prop; u64 ret; node = fdt_path_offset(fdt, "/chosen"); -- cgit v1.2.3-59-g8ed1b From 15ad6ace52039c7e39435c4d712d147126604a97 Mon Sep 17 00:00:00 2001 From: Luc Van Oostenryck Date: Thu, 29 Jun 2017 16:40:12 +0200 Subject: arm64: fix endianness annotation for __apply_alternatives()/get_alt_insn() get_alt_insn() is used to read and create ARM instructions, which are always stored in memory in little-endian order. These values are thus correctly converted to/from native order when processed but the pointers used to hold the address of these instructions are declared as for native order values. Fix this by declaring the pointers as __le32* instead of u32* and make the few appropriate needed changes like removing the unneeded cast '(u32*)' in front of __ALT_PTR()'s definition. Signed-off-by: Luc Van Oostenryck Signed-off-by: Will Deacon --- arch/arm64/kernel/alternative.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/arch/arm64/kernel/alternative.c b/arch/arm64/kernel/alternative.c index 8840c109c5d6..6dd0a3a3e5c9 100644 --- a/arch/arm64/kernel/alternative.c +++ b/arch/arm64/kernel/alternative.c @@ -28,7 +28,7 @@ #include #include -#define __ALT_PTR(a,f) (u32 *)((void *)&(a)->f + (a)->f) +#define __ALT_PTR(a,f) ((void *)&(a)->f + (a)->f) #define ALT_ORIG_PTR(a) __ALT_PTR(a, orig_offset) #define ALT_REPL_PTR(a) __ALT_PTR(a, alt_offset) @@ -60,7 +60,7 @@ static bool branch_insn_requires_update(struct alt_instr *alt, unsigned long pc) #define align_down(x, a) ((unsigned long)(x) & ~(((unsigned long)(a)) - 1)) -static u32 get_alt_insn(struct alt_instr *alt, u32 *insnptr, u32 *altinsnptr) +static u32 get_alt_insn(struct alt_instr *alt, __le32 *insnptr, __le32 *altinsnptr) { u32 insn; @@ -109,7 +109,7 @@ static void __apply_alternatives(void *alt_region, bool use_linear_alias) { struct alt_instr *alt; struct alt_region *region = alt_region; - u32 *origptr, *replptr, *updptr; + __le32 *origptr, *replptr, *updptr; for (alt = region->begin; alt < region->end; alt++) { u32 insn; @@ -124,7 +124,7 @@ static void __apply_alternatives(void *alt_region, bool use_linear_alias) origptr = ALT_ORIG_PTR(alt); replptr = ALT_REPL_PTR(alt); - updptr = use_linear_alias ? (u32 *)lm_alias(origptr) : origptr; + updptr = use_linear_alias ? lm_alias(origptr) : origptr; nr_inst = alt->alt_len / sizeof(insn); for (i = 0; i < nr_inst; i++) { -- cgit v1.2.3-59-g8ed1b From 53b1a742ed251780267a57415bc955bd50f40c3d Mon Sep 17 00:00:00 2001 From: Dave Martin Date: Thu, 29 Jun 2017 15:25:47 +0100 Subject: arm64: ptrace: Avoid setting compat FP[SC]R to garbage if get_user fails If get_user() fails when reading the new FPSCR value from userspace in compat_vfp_get(), then garbage* will be written to the task's FPSR and FPCR registers. This patch prevents this by checking the return from get_user() first. [*] Actually, zero, due to the behaviour of get_user() on error, but that's still not what userspace expects. Fixes: 478fcb2cdb23 ("arm64: Debugging support") Signed-off-by: Dave Martin Signed-off-by: Will Deacon --- arch/arm64/kernel/ptrace.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/arch/arm64/kernel/ptrace.c b/arch/arm64/kernel/ptrace.c index 35846f1550db..4c068dcf1977 100644 --- a/arch/arm64/kernel/ptrace.c +++ b/arch/arm64/kernel/ptrace.c @@ -947,8 +947,10 @@ static int compat_vfp_set(struct task_struct *target, if (count && !ret) { ret = get_user(fpscr, (compat_ulong_t *)ubuf); - uregs->fpsr = fpscr & VFP_FPSCR_STAT_MASK; - uregs->fpcr = fpscr & VFP_FPSCR_CTRL_MASK; + if (!ret) { + uregs->fpsr = fpscr & VFP_FPSCR_STAT_MASK; + uregs->fpcr = fpscr & VFP_FPSCR_CTRL_MASK; + } } fpsimd_flush_task_state(target); -- cgit v1.2.3-59-g8ed1b From 16d38acb12d065ebe3494e4e31e8b4438f3214da Mon Sep 17 00:00:00 2001 From: Dave Martin Date: Thu, 29 Jun 2017 15:25:48 +0100 Subject: arm64: ptrace: Remove redundant overrun check from compat_vfp_set() compat_vfp_set() checks for userspace trying to write an excessive amount of data to the regset. However this check is conspicuous for its absence from every other _set() in the arm64 ptrace implementation. In fact, the core ptrace_regset() already clamps userspace's iov_len to the regset size before the individual regset .{get,set}() methods get called. This patch removes the redundant check. Signed-off-by: Dave Martin Signed-off-by: Will Deacon --- arch/arm64/kernel/ptrace.c | 3 --- 1 file changed, 3 deletions(-) diff --git a/arch/arm64/kernel/ptrace.c b/arch/arm64/kernel/ptrace.c index 4c068dcf1977..949ab6bdfbad 100644 --- a/arch/arm64/kernel/ptrace.c +++ b/arch/arm64/kernel/ptrace.c @@ -937,9 +937,6 @@ static int compat_vfp_set(struct task_struct *target, compat_ulong_t fpscr; int ret; - if (pos + count > VFP_STATE_SIZE) - return -EIO; - uregs = &target->thread.fpsimd_state.user_fpsimd; ret = user_regset_copyin(&pos, &count, &kbuf, &ubuf, uregs, 0, -- cgit v1.2.3-59-g8ed1b From 5fbd5fc49fc39ac8433da62d16682a1d0217ea4f Mon Sep 17 00:00:00 2001 From: Dave Martin Date: Thu, 29 Jun 2017 15:25:49 +0100 Subject: arm64: ptrace: Fix incorrect get_user() use in compat_vfp_set() Now that compat_vfp_get() uses the regset API to copy the FPSCR value out to userspace, compat_vfp_set() looks inconsistent. In particular, compat_vfp_set() will fail if called with kbuf != NULL && ubuf == NULL (which is valid usage according to the regset API). This patch fixes compat_vfp_set() to use user_regset_copyin(), similarly to compat_vfp_get(). This also squashes a sparse warning triggered by the cast that drops __user when calling get_user(). Signed-off-by: Dave Martin Signed-off-by: Will Deacon --- arch/arm64/kernel/ptrace.c | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/arch/arm64/kernel/ptrace.c b/arch/arm64/kernel/ptrace.c index 949ab6bdfbad..1b38c0150aec 100644 --- a/arch/arm64/kernel/ptrace.c +++ b/arch/arm64/kernel/ptrace.c @@ -935,15 +935,17 @@ static int compat_vfp_set(struct task_struct *target, { struct user_fpsimd_state *uregs; compat_ulong_t fpscr; - int ret; + int ret, vregs_end_pos; uregs = &target->thread.fpsimd_state.user_fpsimd; + vregs_end_pos = VFP_STATE_SIZE - sizeof(compat_ulong_t); ret = user_regset_copyin(&pos, &count, &kbuf, &ubuf, uregs, 0, - VFP_STATE_SIZE - sizeof(compat_ulong_t)); + vregs_end_pos); if (count && !ret) { - ret = get_user(fpscr, (compat_ulong_t *)ubuf); + ret = user_regset_copyin(&pos, &count, &kbuf, &ubuf, &fpscr, + vregs_end_pos, VFP_STATE_SIZE); if (!ret) { uregs->fpsr = fpscr & VFP_FPSCR_STAT_MASK; uregs->fpcr = fpscr & VFP_FPSCR_CTRL_MASK; -- cgit v1.2.3-59-g8ed1b From 70a62ad19e40a6b0d9e3a048fe0d0a391d76ad12 Mon Sep 17 00:00:00 2001 From: Arvind Yadav Date: Fri, 30 Jun 2017 17:00:20 +0530 Subject: arm64: cpuinfo: constify attribute_group structures. attribute_groups are not supposed to change at runtime. All functions working with attribute_groups provided by work with const attribute_group. So mark the non-const structs as const. Signed-off-by: Arvind Yadav Signed-off-by: Will Deacon --- arch/arm64/kernel/cpuinfo.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/arm64/kernel/cpuinfo.c b/arch/arm64/kernel/cpuinfo.c index 68b1f364c515..f495ee5049fd 100644 --- a/arch/arm64/kernel/cpuinfo.c +++ b/arch/arm64/kernel/cpuinfo.c @@ -227,7 +227,7 @@ static struct attribute *cpuregs_id_attrs[] = { NULL }; -static struct attribute_group cpuregs_attr_group = { +static const struct attribute_group cpuregs_attr_group = { .attrs = cpuregs_id_attrs, .name = "identification" }; -- cgit v1.2.3-59-g8ed1b From 425e1ed73e6574e4fe186ec82fd37213cbd47df0 Mon Sep 17 00:00:00 2001 From: Luc Van Oostenryck Date: Wed, 28 Jun 2017 16:58:03 +0200 Subject: arm64: fix endianness annotation for 'struct jit_ctx' and friends struct jit_ctx::image is used the store a pointer to the jitted intructions, which are always little-endian. These instructions are thus correctly converted from native order to little-endian before being stored but the pointer 'image' is declared as for native order values. Fix this by declaring the field as __le32* instead of u32*. Same for the pointer used in jit_fill_hole() to initialize the image. Signed-off-by: Luc Van Oostenryck Signed-off-by: Will Deacon --- arch/arm64/net/bpf_jit_comp.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/arch/arm64/net/bpf_jit_comp.c b/arch/arm64/net/bpf_jit_comp.c index 71f930501ade..bd2ac9912087 100644 --- a/arch/arm64/net/bpf_jit_comp.c +++ b/arch/arm64/net/bpf_jit_comp.c @@ -68,7 +68,7 @@ struct jit_ctx { int idx; int epilogue_offset; int *offset; - u32 *image; + __le32 *image; }; static inline void emit(const u32 insn, struct jit_ctx *ctx) @@ -128,7 +128,7 @@ static inline int bpf2a64_offset(int bpf_to, int bpf_from, static void jit_fill_hole(void *area, unsigned int size) { - u32 *ptr; + __le32 *ptr; /* We are guaranteed to have aligned memory. */ for (ptr = area; size >= sizeof(u32); size -= sizeof(u32)) *ptr++ = cpu_to_le32(AARCH64_BREAK_FAULT); @@ -871,7 +871,7 @@ struct bpf_prog *bpf_int_jit_compile(struct bpf_prog *prog) /* 2. Now, the actual pass. */ - ctx.image = (u32 *)image_ptr; + ctx.image = (__le32 *)image_ptr; ctx.idx = 0; build_prologue(&ctx); -- cgit v1.2.3-59-g8ed1b