aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--arch/arm/include/asm/kvm_arm.h10
-rw-r--r--arch/arm/include/asm/system_misc.h5
-rw-r--r--arch/arm/kernel/perf_event_v6.c2
-rw-r--r--arch/arm64/Kconfig9
-rw-r--r--arch/arm64/Makefile7
-rw-r--r--arch/arm64/include/asm/checksum.h2
-rw-r--r--arch/arm64/include/asm/dma-mapping.h2
-rw-r--r--arch/arm64/include/asm/elf.h6
-rw-r--r--arch/arm64/include/asm/esr.h1
-rw-r--r--arch/arm64/include/asm/futex.h8
-rw-r--r--arch/arm64/include/asm/kvm_arm.h10
-rw-r--r--arch/arm64/include/asm/module.h3
-rw-r--r--arch/arm64/include/asm/pgtable.h2
-rw-r--r--arch/arm64/include/asm/processor.h3
-rw-r--r--arch/arm64/include/asm/stacktrace.h1
-rw-r--r--arch/arm64/include/asm/system_misc.h4
-rw-r--r--arch/arm64/include/uapi/asm/sigcontext.h55
-rw-r--r--arch/arm64/kernel/Makefile3
-rw-r--r--arch/arm64/kernel/acpi_parking_protocol.c6
-rw-r--r--arch/arm64/kernel/alternative.c8
-rw-r--r--arch/arm64/kernel/cpufeature.c25
-rw-r--r--arch/arm64/kernel/cpuinfo.c2
-rw-r--r--arch/arm64/kernel/debug-monitors.c14
-rw-r--r--arch/arm64/kernel/ftrace-mod.S18
-rw-r--r--arch/arm64/kernel/ftrace.c103
-rw-r--r--arch/arm64/kernel/insn.c7
-rw-r--r--arch/arm64/kernel/kaslr.c2
-rw-r--r--arch/arm64/kernel/module.c20
-rw-r--r--arch/arm64/kernel/pci.c3
-rw-r--r--arch/arm64/kernel/perf_event.c2
-rw-r--r--arch/arm64/kernel/probes/kprobes.c4
-rw-r--r--arch/arm64/kernel/process.c9
-rw-r--r--arch/arm64/kernel/ptrace.c39
-rw-r--r--arch/arm64/kernel/setup.c3
-rw-r--r--arch/arm64/kernel/signal.c413
-rw-r--r--arch/arm64/kernel/stacktrace.c1
-rw-r--r--arch/arm64/kernel/traps.c18
-rw-r--r--arch/arm64/kernel/vdso.c10
-rw-r--r--arch/arm64/mm/dma-mapping.c9
-rw-r--r--arch/arm64/mm/fault.c215
-rw-r--r--arch/arm64/mm/hugetlbpage.c29
-rw-r--r--arch/arm64/mm/mmap.c19
-rw-r--r--arch/arm64/mm/mmu.c1
-rw-r--r--arch/arm64/net/bpf_jit_comp.c6
-rw-r--r--drivers/acpi/apei/Kconfig15
-rw-r--r--drivers/acpi/apei/ghes.c206
-rw-r--r--drivers/acpi/apei/hest.c7
-rw-r--r--drivers/acpi/arm64/iort.c15
-rw-r--r--drivers/char/Kconfig2
-rw-r--r--drivers/firmware/efi/cper.c204
-rw-r--r--drivers/irqchip/Kconfig1
-rw-r--r--drivers/perf/Kconfig9
-rw-r--r--drivers/perf/xgene_pmu.c684
-rw-r--r--drivers/ras/ras.c16
-rw-r--r--fs/proc/kcore.c2
-rw-r--r--include/acpi/ghes.h48
-rw-r--r--include/linux/acpi_iort.h2
-rw-r--r--include/linux/cper.h54
-rw-r--r--include/linux/ras.h17
-rw-r--r--include/linux/uuid.h4
-rw-r--r--include/ras/ras_event.h90
-rw-r--r--virt/kvm/arm/mmu.c36
62 files changed, 2190 insertions, 341 deletions
diff --git a/arch/arm/include/asm/kvm_arm.h b/arch/arm/include/asm/kvm_arm.h
index a3f0b3d50089..ebf020b02bc8 100644
--- a/arch/arm/include/asm/kvm_arm.h
+++ b/arch/arm/include/asm/kvm_arm.h
@@ -187,6 +187,16 @@
#define FSC_FAULT (0x04)
#define FSC_ACCESS (0x08)
#define FSC_PERM (0x0c)
+#define FSC_SEA (0x10)
+#define FSC_SEA_TTW0 (0x14)
+#define FSC_SEA_TTW1 (0x15)
+#define FSC_SEA_TTW2 (0x16)
+#define FSC_SEA_TTW3 (0x17)
+#define FSC_SECC (0x18)
+#define FSC_SECC_TTW0 (0x1c)
+#define FSC_SECC_TTW1 (0x1d)
+#define FSC_SECC_TTW2 (0x1e)
+#define FSC_SECC_TTW3 (0x1f)
/* Hyp Prefetch Fault Address Register (HPFAR/HDFAR) */
#define HPFAR_MASK (~0xf)
diff --git a/arch/arm/include/asm/system_misc.h b/arch/arm/include/asm/system_misc.h
index a3d61ad984af..8c4a89f5ce7d 100644
--- a/arch/arm/include/asm/system_misc.h
+++ b/arch/arm/include/asm/system_misc.h
@@ -22,6 +22,11 @@ extern void (*arm_pm_idle)(void);
extern unsigned int user_debug;
+static inline int handle_guest_sea(phys_addr_t addr, unsigned int esr)
+{
+ return -1;
+}
+
#endif /* !__ASSEMBLY__ */
#endif /* __ASM_ARM_SYSTEM_MISC_H */
diff --git a/arch/arm/kernel/perf_event_v6.c b/arch/arm/kernel/perf_event_v6.c
index 96b7a477a8db..8226d0b71fd3 100644
--- a/arch/arm/kernel/perf_event_v6.c
+++ b/arch/arm/kernel/perf_event_v6.c
@@ -552,7 +552,7 @@ static int armv6mpcore_pmu_init(struct arm_pmu *cpu_pmu)
return 0;
}
-static struct of_device_id armv6_pmu_of_device_ids[] = {
+static const struct of_device_id armv6_pmu_of_device_ids[] = {
{.compatible = "arm,arm11mpcore-pmu", .data = armv6mpcore_pmu_init},
{.compatible = "arm,arm1176-pmu", .data = armv6_1176_pmu_init},
{.compatible = "arm,arm1136-pmu", .data = armv6_1136_pmu_init},
diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig
index 300146dc8433..9f7a934ff707 100644
--- a/arch/arm64/Kconfig
+++ b/arch/arm64/Kconfig
@@ -3,6 +3,7 @@ config ARM64
select ACPI_CCA_REQUIRED if ACPI
select ACPI_GENERIC_GSI if ACPI
select ACPI_GTDT if ACPI
+ select ACPI_IORT if ACPI
select ACPI_REDUCED_HARDWARE_ONLY if ACPI
select ACPI_MCFG if ACPI
select ACPI_SPCR_TABLE if ACPI
@@ -19,7 +20,9 @@ config ARM64
select ARCH_HAS_STRICT_KERNEL_RWX
select ARCH_HAS_STRICT_MODULE_RWX
select ARCH_HAS_TICK_BROADCAST if GENERIC_CLOCKEVENTS_BROADCAST
+ select ARCH_HAVE_NMI_SAFE_CMPXCHG if ACPI_APEI_SEA
select ARCH_USE_CMPXCHG_LOCKREF
+ select ARCH_SUPPORTS_MEMORY_FAILURE
select ARCH_SUPPORTS_ATOMIC_RMW
select ARCH_SUPPORTS_NUMA_BALANCING
select ARCH_WANT_COMPAT_IPC_PARSE_VERSION
@@ -93,6 +96,7 @@ config ARM64
select HAVE_IRQ_TIME_ACCOUNTING
select HAVE_MEMBLOCK
select HAVE_MEMBLOCK_NODE_MAP if NUMA
+ select HAVE_NMI if ACPI_APEI_SEA
select HAVE_PATA_PLATFORM
select HAVE_PERF_EVENTS
select HAVE_PERF_REGS
@@ -245,6 +249,9 @@ config PGTABLE_LEVELS
config ARCH_SUPPORTS_UPROBES
def_bool y
+config ARCH_PROC_KCORE_TEXT
+ def_bool y
+
source "init/Kconfig"
source "kernel/Kconfig.freezer"
@@ -983,7 +990,7 @@ config RANDOMIZE_BASE
config RANDOMIZE_MODULE_REGION_FULL
bool "Randomize the module region independently from the core kernel"
- depends on RANDOMIZE_BASE && !DYNAMIC_FTRACE
+ depends on RANDOMIZE_BASE
default y
help
Randomizes the location of the module region without considering the
diff --git a/arch/arm64/Makefile b/arch/arm64/Makefile
index f839ecd919f9..9b41f1e3b1a0 100644
--- a/arch/arm64/Makefile
+++ b/arch/arm64/Makefile
@@ -52,17 +52,19 @@ KBUILD_AFLAGS += $(lseinstr) $(brokengasinst)
ifeq ($(CONFIG_CPU_BIG_ENDIAN), y)
KBUILD_CPPFLAGS += -mbig-endian
+CHECKFLAGS += -D__AARCH64EB__
AS += -EB
LD += -EB
UTS_MACHINE := aarch64_be
else
KBUILD_CPPFLAGS += -mlittle-endian
+CHECKFLAGS += -D__AARCH64EL__
AS += -EL
LD += -EL
UTS_MACHINE := aarch64
endif
-CHECKFLAGS += -D__aarch64__
+CHECKFLAGS += -D__aarch64__ -m64
ifeq ($(CONFIG_ARM64_MODULE_CMODEL_LARGE), y)
KBUILD_CFLAGS_MODULE += -mcmodel=large
@@ -70,6 +72,9 @@ endif
ifeq ($(CONFIG_ARM64_MODULE_PLTS),y)
KBUILD_LDFLAGS_MODULE += -T $(srctree)/arch/arm64/kernel/module.lds
+ifeq ($(CONFIG_DYNAMIC_FTRACE),y)
+KBUILD_LDFLAGS_MODULE += $(objtree)/arch/arm64/kernel/ftrace-mod.o
+endif
endif
# Default value
diff --git a/arch/arm64/include/asm/checksum.h b/arch/arm64/include/asm/checksum.h
index 09f65339d66d..0b6f5a7d4027 100644
--- a/arch/arm64/include/asm/checksum.h
+++ b/arch/arm64/include/asm/checksum.h
@@ -42,7 +42,7 @@ static inline __sum16 ip_fast_csum(const void *iph, unsigned int ihl)
} while (--ihl);
sum += ((sum >> 32) | (sum << 32));
- return csum_fold(sum >> 32);
+ return csum_fold((__force u32)(sum >> 32));
}
#define ip_fast_csum ip_fast_csum
diff --git a/arch/arm64/include/asm/dma-mapping.h b/arch/arm64/include/asm/dma-mapping.h
index 5392dbeffa45..f72779aad276 100644
--- a/arch/arm64/include/asm/dma-mapping.h
+++ b/arch/arm64/include/asm/dma-mapping.h
@@ -48,8 +48,6 @@ void arch_teardown_dma_ops(struct device *dev);
/* do not use this function in a driver */
static inline bool is_device_dma_coherent(struct device *dev)
{
- if (!dev)
- return false;
return dev->archdata.dma_coherent;
}
diff --git a/arch/arm64/include/asm/elf.h b/arch/arm64/include/asm/elf.h
index 5d1700425efe..ac3fb7441510 100644
--- a/arch/arm64/include/asm/elf.h
+++ b/arch/arm64/include/asm/elf.h
@@ -142,6 +142,7 @@ typedef struct user_fpsimd_state elf_fpregset_t;
({ \
clear_bit(TIF_32BIT, &current->mm->context.flags); \
clear_thread_flag(TIF_32BIT); \
+ current->personality &= ~READ_IMPLIES_EXEC; \
})
/* update AT_VECTOR_SIZE_ARCH if the number of NEW_AUX_ENT entries changes */
@@ -187,6 +188,11 @@ typedef compat_elf_greg_t compat_elf_gregset_t[COMPAT_ELF_NGREG];
((x)->e_flags & EF_ARM_EABI_MASK))
#define compat_start_thread compat_start_thread
+/*
+ * Unlike the native SET_PERSONALITY macro, the compat version inherits
+ * READ_IMPLIES_EXEC across a fork() since this is the behaviour on
+ * arch/arm/.
+ */
#define COMPAT_SET_PERSONALITY(ex) \
({ \
set_bit(TIF_32BIT, &current->mm->context.flags); \
diff --git a/arch/arm64/include/asm/esr.h b/arch/arm64/include/asm/esr.h
index 85997c0e5443..28bf02efce76 100644
--- a/arch/arm64/include/asm/esr.h
+++ b/arch/arm64/include/asm/esr.h
@@ -83,6 +83,7 @@
#define ESR_ELx_WNR (UL(1) << 6)
/* Shared ISS field definitions for Data/Instruction aborts */
+#define ESR_ELx_FnV (UL(1) << 10)
#define ESR_ELx_EA (UL(1) << 9)
#define ESR_ELx_S1PTW (UL(1) << 7)
diff --git a/arch/arm64/include/asm/futex.h b/arch/arm64/include/asm/futex.h
index 85c4a8981d47..f32b42e8725d 100644
--- a/arch/arm64/include/asm/futex.h
+++ b/arch/arm64/include/asm/futex.h
@@ -48,16 +48,16 @@ do { \
} while (0)
static inline int
-futex_atomic_op_inuser (int encoded_op, u32 __user *uaddr)
+futex_atomic_op_inuser(unsigned int encoded_op, u32 __user *uaddr)
{
int op = (encoded_op >> 28) & 7;
int cmp = (encoded_op >> 24) & 15;
- int oparg = (encoded_op << 8) >> 20;
- int cmparg = (encoded_op << 20) >> 20;
+ int oparg = (int)(encoded_op << 8) >> 20;
+ int cmparg = (int)(encoded_op << 20) >> 20;
int oldval = 0, ret, tmp;
if (encoded_op & (FUTEX_OP_OPARG_SHIFT << 28))
- oparg = 1 << oparg;
+ oparg = 1U << (oparg & 0x1f);
if (!access_ok(VERIFY_WRITE, uaddr, sizeof(u32)))
return -EFAULT;
diff --git a/arch/arm64/include/asm/kvm_arm.h b/arch/arm64/include/asm/kvm_arm.h
index 6e99978e83bd..61d694c2eae5 100644
--- a/arch/arm64/include/asm/kvm_arm.h
+++ b/arch/arm64/include/asm/kvm_arm.h
@@ -204,6 +204,16 @@
#define FSC_FAULT ESR_ELx_FSC_FAULT
#define FSC_ACCESS ESR_ELx_FSC_ACCESS
#define FSC_PERM ESR_ELx_FSC_PERM
+#define FSC_SEA ESR_ELx_FSC_EXTABT
+#define FSC_SEA_TTW0 (0x14)
+#define FSC_SEA_TTW1 (0x15)
+#define FSC_SEA_TTW2 (0x16)
+#define FSC_SEA_TTW3 (0x17)
+#define FSC_SECC (0x18)
+#define FSC_SECC_TTW0 (0x1c)
+#define FSC_SECC_TTW1 (0x1d)
+#define FSC_SECC_TTW2 (0x1e)
+#define FSC_SECC_TTW3 (0x1f)
/* Hyp Prefetch Fault Address Register (HPFAR/HDFAR) */
#define HPFAR_MASK (~UL(0xf))
diff --git a/arch/arm64/include/asm/module.h b/arch/arm64/include/asm/module.h
index d57693f5d4ec..19bd97671bb8 100644
--- a/arch/arm64/include/asm/module.h
+++ b/arch/arm64/include/asm/module.h
@@ -30,6 +30,9 @@ struct mod_plt_sec {
struct mod_arch_specific {
struct mod_plt_sec core;
struct mod_plt_sec init;
+
+ /* for CONFIG_DYNAMIC_FTRACE */
+ void *ftrace_trampoline;
};
#endif
diff --git a/arch/arm64/include/asm/pgtable.h b/arch/arm64/include/asm/pgtable.h
index c213fdbd056c..6eae342ced6b 100644
--- a/arch/arm64/include/asm/pgtable.h
+++ b/arch/arm64/include/asm/pgtable.h
@@ -441,7 +441,7 @@ static inline phys_addr_t pmd_page_paddr(pmd_t pmd)
#define pud_none(pud) (!pud_val(pud))
#define pud_bad(pud) (!(pud_val(pud) & PUD_TABLE_BIT))
-#define pud_present(pud) (pud_val(pud))
+#define pud_present(pud) pte_present(pud_pte(pud))
static inline void set_pud(pud_t *pudp, pud_t pud)
{
diff --git a/arch/arm64/include/asm/processor.h b/arch/arm64/include/asm/processor.h
index 9428b93fefb2..64c9e78f9882 100644
--- a/arch/arm64/include/asm/processor.h
+++ b/arch/arm64/include/asm/processor.h
@@ -104,6 +104,9 @@ struct thread_struct {
#define task_user_tls(t) (&(t)->thread.tp_value)
#endif
+/* Sync TPIDR_EL0 back to thread_struct for current */
+void tls_preserve_current_state(void);
+
#define INIT_THREAD { }
static inline void start_thread_common(struct pt_regs *regs, unsigned long pc)
diff --git a/arch/arm64/include/asm/stacktrace.h b/arch/arm64/include/asm/stacktrace.h
index 801a16dbbdf6..5b6eafccc5d8 100644
--- a/arch/arm64/include/asm/stacktrace.h
+++ b/arch/arm64/include/asm/stacktrace.h
@@ -30,5 +30,6 @@ struct stackframe {
extern int unwind_frame(struct task_struct *tsk, struct stackframe *frame);
extern void walk_stackframe(struct task_struct *tsk, struct stackframe *frame,
int (*fn)(struct stackframe *, void *), void *data);
+extern void dump_backtrace(struct pt_regs *regs, struct task_struct *tsk);
#endif /* __ASM_STACKTRACE_H */
diff --git a/arch/arm64/include/asm/system_misc.h b/arch/arm64/include/asm/system_misc.h
index bc812435bc76..07aa8e3c5630 100644
--- a/arch/arm64/include/asm/system_misc.h
+++ b/arch/arm64/include/asm/system_misc.h
@@ -40,7 +40,7 @@ void hook_debug_fault_code(int nr, int (*fn)(unsigned long, unsigned int,
int sig, int code, const char *name);
struct mm_struct;
-extern void show_pte(struct mm_struct *mm, unsigned long addr);
+extern void show_pte(unsigned long addr);
extern void __show_regs(struct pt_regs *);
extern void (*arm_pm_restart)(enum reboot_mode reboot_mode, const char *cmd);
@@ -56,6 +56,8 @@ extern void (*arm_pm_restart)(enum reboot_mode reboot_mode, const char *cmd);
__show_ratelimited; \
})
+int handle_guest_sea(phys_addr_t addr, unsigned int esr);
+
#endif /* __ASSEMBLY__ */
#endif /* __ASM_SYSTEM_MISC_H */
diff --git a/arch/arm64/include/uapi/asm/sigcontext.h b/arch/arm64/include/uapi/asm/sigcontext.h
index ee469be1ae1d..f0a76b9fcd6e 100644
--- a/arch/arm64/include/uapi/asm/sigcontext.h
+++ b/arch/arm64/include/uapi/asm/sigcontext.h
@@ -34,6 +34,26 @@ struct sigcontext {
};
/*
+ * Allocation of __reserved[]:
+ * (Note: records do not necessarily occur in the order shown here.)
+ *
+ * size description
+ *
+ * 0x210 fpsimd_context
+ * 0x10 esr_context
+ * 0x20 extra_context (optional)
+ * 0x10 terminator (null _aarch64_ctx)
+ *
+ * 0xdb0 (reserved for future allocation)
+ *
+ * New records that can exceed this space need to be opt-in for userspace, so
+ * that an expanded signal frame is not generated unexpectedly. The mechanism
+ * for opting in will depend on the extension that generates each new record.
+ * The above table documents the maximum set and sizes of records than can be
+ * generated when userspace does not opt in for any such extension.
+ */
+
+/*
* Header to be used at the beginning of structures extending the user
* context. Such structures must be placed after the rt_sigframe on the stack
* and be 16-byte aligned. The last structure must be a dummy one with the
@@ -61,4 +81,39 @@ struct esr_context {
__u64 esr;
};
+/*
+ * extra_context: describes extra space in the signal frame for
+ * additional structures that don't fit in sigcontext.__reserved[].
+ *
+ * Note:
+ *
+ * 1) fpsimd_context, esr_context and extra_context must be placed in
+ * sigcontext.__reserved[] if present. They cannot be placed in the
+ * extra space. Any other record can be placed either in the extra
+ * space or in sigcontext.__reserved[], unless otherwise specified in
+ * this file.
+ *
+ * 2) There must not be more than one extra_context.
+ *
+ * 3) If extra_context is present, it must be followed immediately in
+ * sigcontext.__reserved[] by the terminating null _aarch64_ctx.
+ *
+ * 4) The extra space to which datap points must start at the first
+ * 16-byte aligned address immediately after the terminating null
+ * _aarch64_ctx that follows the extra_context structure in
+ * __reserved[]. The extra space may overrun the end of __reserved[],
+ * as indicated by a sufficiently large value for the size field.
+ *
+ * 5) The extra space must itself be terminated with a null
+ * _aarch64_ctx.
+ */
+#define EXTRA_MAGIC 0x45585401
+
+struct extra_context {
+ struct _aarch64_ctx head;
+ __u64 datap; /* 16-byte aligned pointer to extra space cast to __u64 */
+ __u32 size; /* size in bytes of the extra space */
+ __u32 __reserved[3];
+};
+
#endif /* _UAPI__ASM_SIGCONTEXT_H */
diff --git a/arch/arm64/kernel/Makefile b/arch/arm64/kernel/Makefile
index 1dcb69d3d0e5..f2b4e816b6de 100644
--- a/arch/arm64/kernel/Makefile
+++ b/arch/arm64/kernel/Makefile
@@ -62,3 +62,6 @@ extra-y += $(head-y) vmlinux.lds
ifeq ($(CONFIG_DEBUG_EFI),y)
AFLAGS_head.o += -DVMLINUX_PATH="\"$(realpath $(objtree)/vmlinux)\""
endif
+
+# will be included by each individual module but not by the core kernel itself
+extra-$(CONFIG_DYNAMIC_FTRACE) += ftrace-mod.o
diff --git a/arch/arm64/kernel/acpi_parking_protocol.c b/arch/arm64/kernel/acpi_parking_protocol.c
index 1f5655cd9cc9..98a20e58758b 100644
--- a/arch/arm64/kernel/acpi_parking_protocol.c
+++ b/arch/arm64/kernel/acpi_parking_protocol.c
@@ -71,7 +71,7 @@ static int acpi_parking_protocol_cpu_boot(unsigned int cpu)
{
struct cpu_mailbox_entry *cpu_entry = &cpu_mailbox_entries[cpu];
struct parking_protocol_mailbox __iomem *mailbox;
- __le32 cpu_id;
+ u32 cpu_id;
/*
* Map mailbox memory with attribute device nGnRE (ie ioremap -
@@ -123,9 +123,9 @@ static void acpi_parking_protocol_cpu_postboot(void)
int cpu = smp_processor_id();
struct cpu_mailbox_entry *cpu_entry = &cpu_mailbox_entries[cpu];
struct parking_protocol_mailbox __iomem *mailbox = cpu_entry->mailbox;
- __le64 entry_point;
+ u64 entry_point;
- entry_point = readl_relaxed(&mailbox->entry_point);
+ entry_point = readq_relaxed(&mailbox->entry_point);
/*
* Check if firmware has cleared the entry_point as expected
* by the protocol specification.
diff --git a/arch/arm64/kernel/alternative.c b/arch/arm64/kernel/alternative.c
index 8840c109c5d6..6dd0a3a3e5c9 100644
--- a/arch/arm64/kernel/alternative.c
+++ b/arch/arm64/kernel/alternative.c
@@ -28,7 +28,7 @@
#include <asm/sections.h>
#include <linux/stop_machine.h>
-#define __ALT_PTR(a,f) (u32 *)((void *)&(a)->f + (a)->f)
+#define __ALT_PTR(a,f) ((void *)&(a)->f + (a)->f)
#define ALT_ORIG_PTR(a) __ALT_PTR(a, orig_offset)
#define ALT_REPL_PTR(a) __ALT_PTR(a, alt_offset)
@@ -60,7 +60,7 @@ static bool branch_insn_requires_update(struct alt_instr *alt, unsigned long pc)
#define align_down(x, a) ((unsigned long)(x) & ~(((unsigned long)(a)) - 1))
-static u32 get_alt_insn(struct alt_instr *alt, u32 *insnptr, u32 *altinsnptr)
+static u32 get_alt_insn(struct alt_instr *alt, __le32 *insnptr, __le32 *altinsnptr)
{
u32 insn;
@@ -109,7 +109,7 @@ static void __apply_alternatives(void *alt_region, bool use_linear_alias)
{
struct alt_instr *alt;
struct alt_region *region = alt_region;
- u32 *origptr, *replptr, *updptr;
+ __le32 *origptr, *replptr, *updptr;
for (alt = region->begin; alt < region->end; alt++) {
u32 insn;
@@ -124,7 +124,7 @@ static void __apply_alternatives(void *alt_region, bool use_linear_alias)
origptr = ALT_ORIG_PTR(alt);
replptr = ALT_REPL_PTR(alt);
- updptr = use_linear_alias ? (u32 *)lm_alias(origptr) : origptr;
+ updptr = use_linear_alias ? lm_alias(origptr) : origptr;
nr_inst = alt->alt_len / sizeof(insn);
for (i = 0; i < nr_inst; i++) {
diff --git a/arch/arm64/kernel/cpufeature.c b/arch/arm64/kernel/cpufeature.c
index 817ce3365e20..9f9e0064c8c1 100644
--- a/arch/arm64/kernel/cpufeature.c
+++ b/arch/arm64/kernel/cpufeature.c
@@ -51,6 +51,25 @@ unsigned int compat_elf_hwcap2 __read_mostly;
DECLARE_BITMAP(cpu_hwcaps, ARM64_NCAPS);
EXPORT_SYMBOL(cpu_hwcaps);
+static int dump_cpu_hwcaps(struct notifier_block *self, unsigned long v, void *p)
+{
+ /* file-wide pr_fmt adds "CPU features: " prefix */
+ pr_emerg("0x%*pb\n", ARM64_NCAPS, &cpu_hwcaps);
+ return 0;
+}
+
+static struct notifier_block cpu_hwcaps_notifier = {
+ .notifier_call = dump_cpu_hwcaps
+};
+
+static int __init register_cpu_hwcaps_dumper(void)
+{
+ atomic_notifier_chain_register(&panic_notifier_list,
+ &cpu_hwcaps_notifier);
+ return 0;
+}
+__initcall(register_cpu_hwcaps_dumper);
+
DEFINE_STATIC_KEY_ARRAY_FALSE(cpu_hwcap_keys, ARM64_NCAPS);
EXPORT_SYMBOL(cpu_hwcap_keys);
@@ -639,8 +658,10 @@ void update_cpu_features(int cpu,
* Mismatched CPU features are a recipe for disaster. Don't even
* pretend to support them.
*/
- WARN_TAINT_ONCE(taint, TAINT_CPU_OUT_OF_SPEC,
- "Unsupported CPU feature variation.\n");
+ if (taint) {
+ pr_warn_once("Unsupported CPU feature variation detected.\n");
+ add_taint(TAINT_CPU_OUT_OF_SPEC, LOCKDEP_STILL_OK);
+ }
}
u64 read_sanitised_ftr_reg(u32 id)
diff --git a/arch/arm64/kernel/cpuinfo.c b/arch/arm64/kernel/cpuinfo.c
index 68b1f364c515..f495ee5049fd 100644
--- a/arch/arm64/kernel/cpuinfo.c
+++ b/arch/arm64/kernel/cpuinfo.c
@@ -227,7 +227,7 @@ static struct attribute *cpuregs_id_attrs[] = {
NULL
};
-static struct attribute_group cpuregs_attr_group = {
+static const struct attribute_group cpuregs_attr_group = {
.attrs = cpuregs_id_attrs,
.name = "identification"
};
diff --git a/arch/arm64/kernel/debug-monitors.c b/arch/arm64/kernel/debug-monitors.c
index d618e25c3de1..c7ef99904934 100644
--- a/arch/arm64/kernel/debug-monitors.c
+++ b/arch/arm64/kernel/debug-monitors.c
@@ -341,20 +341,22 @@ int aarch32_break_handler(struct pt_regs *regs)
if (compat_thumb_mode(regs)) {
/* get 16-bit Thumb instruction */
- get_user(thumb_instr, (u16 __user *)pc);
- thumb_instr = le16_to_cpu(thumb_instr);
+ __le16 instr;
+ get_user(instr, (__le16 __user *)pc);
+ thumb_instr = le16_to_cpu(instr);
if (thumb_instr == AARCH32_BREAK_THUMB2_LO) {
/* get second half of 32-bit Thumb-2 instruction */
- get_user(thumb_instr, (u16 __user *)(pc + 2));
- thumb_instr = le16_to_cpu(thumb_instr);
+ get_user(instr, (__le16 __user *)(pc + 2));
+ thumb_instr = le16_to_cpu(instr);
bp = thumb_instr == AARCH32_BREAK_THUMB2_HI;
} else {
bp = thumb_instr == AARCH32_BREAK_THUMB;
}
} else {
/* 32-bit ARM instruction */
- get_user(arm_instr, (u32 __user *)pc);
- arm_instr = le32_to_cpu(arm_instr);
+ __le32 instr;
+ get_user(instr, (__le32 __user *)pc);
+ arm_instr = le32_to_cpu(instr);
bp = (arm_instr & ~0xf0000000) == AARCH32_BREAK_ARM;
}
diff --git a/arch/arm64/kernel/ftrace-mod.S b/arch/arm64/kernel/ftrace-mod.S
new file mode 100644
index 000000000000..00c4025be4ff
--- /dev/null
+++ b/arch/arm64/kernel/ftrace-mod.S
@@ -0,0 +1,18 @@
+/*
+ * Copyright (C) 2017 Linaro Ltd <ard.biesheuvel@linaro.org>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#include <linux/linkage.h>
+#include <asm/assembler.h>
+
+ .section ".text.ftrace_trampoline", "ax"
+ .align 3
+0: .quad 0
+__ftrace_trampoline:
+ ldr x16, 0b
+ br x16
+ENDPROC(__ftrace_trampoline)
diff --git a/arch/arm64/kernel/ftrace.c b/arch/arm64/kernel/ftrace.c
index 40ad08ac569a..c13b1fca0e5b 100644
--- a/arch/arm64/kernel/ftrace.c
+++ b/arch/arm64/kernel/ftrace.c
@@ -10,10 +10,12 @@
*/
#include <linux/ftrace.h>
+#include <linux/module.h>
#include <linux/swab.h>
#include <linux/uaccess.h>
#include <asm/cacheflush.h>
+#include <asm/debug-monitors.h>
#include <asm/ftrace.h>
#include <asm/insn.h>
@@ -70,6 +72,58 @@ int ftrace_make_call(struct dyn_ftrace *rec, unsigned long addr)
{
unsigned long pc = rec->ip;
u32 old, new;
+ long offset = (long)pc - (long)addr;
+
+ if (offset < -SZ_128M || offset >= SZ_128M) {
+#ifdef CONFIG_ARM64_MODULE_PLTS
+ unsigned long *trampoline;
+ struct module *mod;
+
+ /*
+ * On kernels that support module PLTs, the offset between the
+ * branch instruction and its target may legally exceed the
+ * range of an ordinary relative 'bl' opcode. In this case, we
+ * need to branch via a trampoline in the module.
+ *
+ * NOTE: __module_text_address() must be called with preemption
+ * disabled, but we can rely on ftrace_lock to ensure that 'mod'
+ * retains its validity throughout the remainder of this code.
+ */
+ preempt_disable();
+ mod = __module_text_address(pc);
+ preempt_enable();
+
+ if (WARN_ON(!mod))
+ return -EINVAL;
+
+ /*
+ * There is only one ftrace trampoline per module. For now,
+ * this is not a problem since on arm64, all dynamic ftrace
+ * invocations are routed via ftrace_caller(). This will need
+ * to be revisited if support for multiple ftrace entry points
+ * is added in the future, but for now, the pr_err() below
+ * deals with a theoretical issue only.
+ */
+ trampoline = (unsigned long *)mod->arch.ftrace_trampoline;
+ if (trampoline[0] != addr) {
+ if (trampoline[0] != 0) {
+ pr_err("ftrace: far branches to multiple entry points unsupported inside a single module\n");
+ return -EINVAL;
+ }
+
+ /* point the trampoline to our ftrace entry point */
+ module_disable_ro(mod);
+ trampoline[0] = addr;
+ module_enable_ro(mod, true);
+
+ /* update trampoline before patching in the branch */
+ smp_wmb();
+ }
+ addr = (unsigned long)&trampoline[1];
+#else /* CONFIG_ARM64_MODULE_PLTS */
+ return -EINVAL;
+#endif /* CONFIG_ARM64_MODULE_PLTS */
+ }
old = aarch64_insn_gen_nop();
new = aarch64_insn_gen_branch_imm(pc, addr, AARCH64_INSN_BRANCH_LINK);
@@ -84,12 +138,55 @@ int ftrace_make_nop(struct module *mod, struct dyn_ftrace *rec,
unsigned long addr)
{
unsigned long pc = rec->ip;
- u32 old, new;
+ bool validate = true;
+ u32 old = 0, new;
+ long offset = (long)pc - (long)addr;
+
+ if (offset < -SZ_128M || offset >= SZ_128M) {
+#ifdef CONFIG_ARM64_MODULE_PLTS
+ u32 replaced;
+
+ /*
+ * 'mod' is only set at module load time, but if we end up
+ * dealing with an out-of-range condition, we can assume it
+ * is due to a module being loaded far away from the kernel.
+ */
+ if (!mod) {
+ preempt_disable();
+ mod = __module_text_address(pc);
+ preempt_enable();
+
+ if (WARN_ON(!mod))
+ return -EINVAL;
+ }
+
+ /*
+ * The instruction we are about to patch may be a branch and
+ * link instruction that was redirected via a PLT entry. In
+ * this case, the normal validation will fail, but we can at
+ * least check that we are dealing with a branch and link
+ * instruction that points into the right module.
+ */
+ if (aarch64_insn_read((void *)pc, &replaced))
+ return -EFAULT;
+
+ if (!aarch64_insn_is_bl(replaced) ||
+ !within_module(pc + aarch64_get_branch_offset(replaced),
+ mod))
+ return -EINVAL;
+
+ validate = false;
+#else /* CONFIG_ARM64_MODULE_PLTS */
+ return -EINVAL;
+#endif /* CONFIG_ARM64_MODULE_PLTS */
+ } else {
+ old = aarch64_insn_gen_branch_imm(pc, addr,
+ AARCH64_INSN_BRANCH_LINK);
+ }
- old = aarch64_insn_gen_branch_imm(pc, addr, AARCH64_INSN_BRANCH_LINK);
new = aarch64_insn_gen_nop();
- return ftrace_modify_code(pc, old, new, true);
+ return ftrace_modify_code(pc, old, new, validate);
}
void arch_ftrace_update_code(int command)
diff --git a/arch/arm64/kernel/insn.c b/arch/arm64/kernel/insn.c
index cd872133e88e..2718a77da165 100644
--- a/arch/arm64/kernel/insn.c
+++ b/arch/arm64/kernel/insn.c
@@ -117,7 +117,7 @@ static void __kprobes patch_unmap(int fixmap)
int __kprobes aarch64_insn_read(void *addr, u32 *insnp)
{
int ret;
- u32 val;
+ __le32 val;
ret = probe_kernel_read(&val, addr, AARCH64_INSN_SIZE);
if (!ret)
@@ -126,7 +126,7 @@ int __kprobes aarch64_insn_read(void *addr, u32 *insnp)
return ret;
}
-static int __kprobes __aarch64_insn_write(void *addr, u32 insn)
+static int __kprobes __aarch64_insn_write(void *addr, __le32 insn)
{
void *waddr = addr;
unsigned long flags = 0;
@@ -145,8 +145,7 @@ static int __kprobes __aarch64_insn_write(void *addr, u32 insn)
int __kprobes aarch64_insn_write(void *addr, u32 insn)
{
- insn = cpu_to_le32(insn);
- return __aarch64_insn_write(addr, insn);
+ return __aarch64_insn_write(addr, cpu_to_le32(insn));
}
static bool __kprobes __aarch64_insn_hotpatch_safe(u32 insn)
diff --git a/arch/arm64/kernel/kaslr.c b/arch/arm64/kernel/kaslr.c
index d7e90d97f5c4..a9710efb8c01 100644
--- a/arch/arm64/kernel/kaslr.c
+++ b/arch/arm64/kernel/kaslr.c
@@ -27,7 +27,7 @@ u16 __initdata memstart_offset_seed;
static __init u64 get_kaslr_seed(void *fdt)
{
int node, len;
- u64 *prop;
+ fdt64_t *prop;
u64 ret;
node = fdt_path_offset(fdt, "/chosen");
diff --git a/arch/arm64/kernel/module.c b/arch/arm64/kernel/module.c
index f035ff6fb223..f469e0435903 100644
--- a/arch/arm64/kernel/module.c
+++ b/arch/arm64/kernel/module.c
@@ -74,7 +74,7 @@ enum aarch64_reloc_op {
RELOC_OP_PAGE,
};
-static u64 do_reloc(enum aarch64_reloc_op reloc_op, void *place, u64 val)
+static u64 do_reloc(enum aarch64_reloc_op reloc_op, __le32 *place, u64 val)
{
switch (reloc_op) {
case RELOC_OP_ABS:
@@ -121,12 +121,12 @@ enum aarch64_insn_movw_imm_type {
AARCH64_INSN_IMM_MOVKZ,
};
-static int reloc_insn_movw(enum aarch64_reloc_op op, void *place, u64 val,
+static int reloc_insn_movw(enum aarch64_reloc_op op, __le32 *place, u64 val,
int lsb, enum aarch64_insn_movw_imm_type imm_type)
{
u64 imm;
s64 sval;
- u32 insn = le32_to_cpu(*(u32 *)place);
+ u32 insn = le32_to_cpu(*place);
sval = do_reloc(op, place, val);
imm = sval >> lsb;
@@ -154,7 +154,7 @@ static int reloc_insn_movw(enum aarch64_reloc_op op, void *place, u64 val,
/* Update the instruction with the new encoding. */
insn = aarch64_insn_encode_immediate(AARCH64_INSN_IMM_16, insn, imm);
- *(u32 *)place = cpu_to_le32(insn);
+ *place = cpu_to_le32(insn);
if (imm > U16_MAX)
return -ERANGE;
@@ -162,12 +162,12 @@ static int reloc_insn_movw(enum aarch64_reloc_op op, void *place, u64 val,
return 0;
}
-static int reloc_insn_imm(enum aarch64_reloc_op op, void *place, u64 val,
+static int reloc_insn_imm(enum aarch64_reloc_op op, __le32 *place, u64 val,
int lsb, int len, enum aarch64_insn_imm_type imm_type)
{
u64 imm, imm_mask;
s64 sval;
- u32 insn = le32_to_cpu(*(u32 *)place);
+ u32 insn = le32_to_cpu(*place);
/* Calculate the relocation value. */
sval = do_reloc(op, place, val);
@@ -179,7 +179,7 @@ static int reloc_insn_imm(enum aarch64_reloc_op op, void *place, u64 val,
/* Update the instruction's immediate field. */
insn = aarch64_insn_encode_immediate(imm_type, insn, imm);
- *(u32 *)place = cpu_to_le32(insn);
+ *place = cpu_to_le32(insn);
/*
* Extract the upper value bits (including the sign bit) and
@@ -420,8 +420,12 @@ int module_finalize(const Elf_Ehdr *hdr,
for (s = sechdrs, se = sechdrs + hdr->e_shnum; s < se; s++) {
if (strcmp(".altinstructions", secstrs + s->sh_name) == 0) {
apply_alternatives((void *)s->sh_addr, s->sh_size);
- return 0;
}
+#ifdef CONFIG_ARM64_MODULE_PLTS
+ if (IS_ENABLED(CONFIG_DYNAMIC_FTRACE) &&
+ !strcmp(".text.ftrace_trampoline", secstrs + s->sh_name))
+ me->arch.ftrace_trampoline = (void *)s->sh_addr;
+#endif
}
return 0;
diff --git a/arch/arm64/kernel/pci.c b/arch/arm64/kernel/pci.c
index c7e3e6387a49..a7f6c01c13b9 100644
--- a/arch/arm64/kernel/pci.c
+++ b/arch/arm64/kernel/pci.c
@@ -108,7 +108,10 @@ int pcibios_root_bridge_prepare(struct pci_host_bridge *bridge)
if (!acpi_disabled) {
struct pci_config_window *cfg = bridge->bus->sysdata;
struct acpi_device *adev = to_acpi_device(cfg->parent);
+ struct device *bus_dev = &bridge->bus->dev;
+
ACPI_COMPANION_SET(&bridge->dev, adev);
+ set_dev_node(bus_dev, acpi_get_node(acpi_device_handle(adev)));
}
return 0;
diff --git a/arch/arm64/kernel/perf_event.c b/arch/arm64/kernel/perf_event.c
index 83a1b1ad189f..b5798ba21189 100644
--- a/arch/arm64/kernel/perf_event.c
+++ b/arch/arm64/kernel/perf_event.c
@@ -529,7 +529,7 @@ static struct attribute_group armv8_pmuv3_events_attr_group = {
.is_visible = armv8pmu_event_attr_is_visible,
};
-PMU_FORMAT_ATTR(event, "config:0-9");
+PMU_FORMAT_ATTR(event, "config:0-15");
static struct attribute *armv8_pmuv3_format_attrs[] = {
&format_attr_event.attr,
diff --git a/arch/arm64/kernel/probes/kprobes.c b/arch/arm64/kernel/probes/kprobes.c
index c5c45942fb6e..d849d9804011 100644
--- a/arch/arm64/kernel/probes/kprobes.c
+++ b/arch/arm64/kernel/probes/kprobes.c
@@ -522,9 +522,9 @@ int __kprobes longjmp_break_handler(struct kprobe *p, struct pt_regs *regs)
pr_err("current sp %lx does not match saved sp %lx\n",
orig_sp, stack_addr);
pr_err("Saved registers for jprobe %p\n", jp);
- show_regs(saved_regs);
+ __show_regs(saved_regs);
pr_err("Current registers\n");
- show_regs(regs);
+ __show_regs(regs);
BUG();
}
unpause_graph_tracing();
diff --git a/arch/arm64/kernel/process.c b/arch/arm64/kernel/process.c
index ae2a835898d7..659ae8094ed5 100644
--- a/arch/arm64/kernel/process.c
+++ b/arch/arm64/kernel/process.c
@@ -210,6 +210,7 @@ void __show_regs(struct pt_regs *regs)
void show_regs(struct pt_regs * regs)
{
__show_regs(regs);
+ dump_backtrace(regs, NULL);
}
static void tls_thread_flush(void)
@@ -297,12 +298,16 @@ int copy_thread(unsigned long clone_flags, unsigned long stack_start,
return 0;
}
+void tls_preserve_current_state(void)
+{
+ *task_user_tls(current) = read_sysreg(tpidr_el0);
+}
+
static void tls_thread_switch(struct task_struct *next)
{
unsigned long tpidr, tpidrro;
- tpidr = read_sysreg(tpidr_el0);
- *task_user_tls(current) = tpidr;
+ tls_preserve_current_state();
tpidr = *task_user_tls(next);
tpidrro = is_compat_thread(task_thread_info(next)) ?
diff --git a/arch/arm64/kernel/ptrace.c b/arch/arm64/kernel/ptrace.c
index c142459a88f3..1b38c0150aec 100644
--- a/arch/arm64/kernel/ptrace.c
+++ b/arch/arm64/kernel/ptrace.c
@@ -623,6 +623,10 @@ static int fpr_get(struct task_struct *target, const struct user_regset *regset,
{
struct user_fpsimd_state *uregs;
uregs = &target->thread.fpsimd_state.user_fpsimd;
+
+ if (target == current)
+ fpsimd_preserve_current_state();
+
return user_regset_copyout(&pos, &count, &kbuf, &ubuf, uregs, 0, -1);
}
@@ -648,6 +652,10 @@ static int tls_get(struct task_struct *target, const struct user_regset *regset,
void *kbuf, void __user *ubuf)
{
unsigned long *tls = &target->thread.tp_value;
+
+ if (target == current)
+ tls_preserve_current_state();
+
return user_regset_copyout(&pos, &count, &kbuf, &ubuf, tls, 0, -1);
}
@@ -894,21 +902,27 @@ static int compat_vfp_get(struct task_struct *target,
{
struct user_fpsimd_state *uregs;
compat_ulong_t fpscr;
- int ret;
+ int ret, vregs_end_pos;
uregs = &target->thread.fpsimd_state.user_fpsimd;
+ if (target == current)
+ fpsimd_preserve_current_state();
+
/*
* The VFP registers are packed into the fpsimd_state, so they all sit
* nicely together for us. We just need to create the fpscr separately.
*/
- ret = user_regset_copyout(&pos, &count, &kbuf, &ubuf, uregs, 0,
- VFP_STATE_SIZE - sizeof(compat_ulong_t));
+ vregs_end_pos = VFP_STATE_SIZE - sizeof(compat_ulong_t);
+ ret = user_regset_copyout(&pos, &count, &kbuf, &ubuf, uregs,
+ 0, vregs_end_pos);
if (count && !ret) {
fpscr = (uregs->fpsr & VFP_FPSCR_STAT_MASK) |
(uregs->fpcr & VFP_FPSCR_CTRL_MASK);
- ret = put_user(fpscr, (compat_ulong_t *)ubuf);
+
+ ret = user_regset_copyout(&pos, &count, &kbuf, &ubuf, &fpscr,
+ vregs_end_pos, VFP_STATE_SIZE);
}
return ret;
@@ -921,20 +935,21 @@ static int compat_vfp_set(struct task_struct *target,
{
struct user_fpsimd_state *uregs;
compat_ulong_t fpscr;
- int ret;
-
- if (pos + count > VFP_STATE_SIZE)
- return -EIO;
+ int ret, vregs_end_pos;
uregs = &target->thread.fpsimd_state.user_fpsimd;
+ vregs_end_pos = VFP_STATE_SIZE - sizeof(compat_ulong_t);
ret = user_regset_copyin(&pos, &count, &kbuf, &ubuf, uregs, 0,
- VFP_STATE_SIZE - sizeof(compat_ulong_t));
+ vregs_end_pos);
if (count && !ret) {
- ret = get_user(fpscr, (compat_ulong_t *)ubuf);
- uregs->fpsr = fpscr & VFP_FPSCR_STAT_MASK;
- uregs->fpcr = fpscr & VFP_FPSCR_CTRL_MASK;
+ ret = user_regset_copyin(&pos, &count, &kbuf, &ubuf, &fpscr,
+ vregs_end_pos, VFP_STATE_SIZE);
+ if (!ret) {
+ uregs->fpsr = fpscr & VFP_FPSCR_STAT_MASK;
+ uregs->fpcr = fpscr & VFP_FPSCR_CTRL_MASK;
+ }
}
fpsimd_flush_task_state(target);
diff --git a/arch/arm64/kernel/setup.c b/arch/arm64/kernel/setup.c
index 2c822ef94f34..d4b740538ad5 100644
--- a/arch/arm64/kernel/setup.c
+++ b/arch/arm64/kernel/setup.c
@@ -194,6 +194,9 @@ static void __init setup_machine_fdt(phys_addr_t dt_phys)
}
name = of_flat_dt_get_machine_name();
+ if (!name)
+ return;
+
pr_info("Machine model: %s\n", name);
dump_stack_set_arch_desc("%s (DT)", name);
}
diff --git a/arch/arm64/kernel/signal.c b/arch/arm64/kernel/signal.c
index c7b6de62f9d3..089c3747995d 100644
--- a/arch/arm64/kernel/signal.c
+++ b/arch/arm64/kernel/signal.c
@@ -19,10 +19,14 @@
#include <linux/compat.h>
#include <linux/errno.h>
+#include <linux/kernel.h>
#include <linux/signal.h>
#include <linux/personality.h>
#include <linux/freezer.h>
+#include <linux/stddef.h>
#include <linux/uaccess.h>
+#include <linux/sizes.h>
+#include <linux/string.h>
#include <linux/tracehook.h>
#include <linux/ratelimit.h>
@@ -41,10 +45,133 @@
struct rt_sigframe {
struct siginfo info;
struct ucontext uc;
+};
+
+struct frame_record {
u64 fp;
u64 lr;
};
+struct rt_sigframe_user_layout {
+ struct rt_sigframe __user *sigframe;
+ struct frame_record __user *next_frame;
+
+ unsigned long size; /* size of allocated sigframe data */
+ unsigned long limit; /* largest allowed size */
+
+ unsigned long fpsimd_offset;
+ unsigned long esr_offset;
+ unsigned long extra_offset;
+ unsigned long end_offset;
+};
+
+#define BASE_SIGFRAME_SIZE round_up(sizeof(struct rt_sigframe), 16)
+#define TERMINATOR_SIZE round_up(sizeof(struct _aarch64_ctx), 16)
+#define EXTRA_CONTEXT_SIZE round_up(sizeof(struct extra_context), 16)
+
+static void init_user_layout(struct rt_sigframe_user_layout *user)
+{
+ const size_t reserved_size =
+ sizeof(user->sigframe->uc.uc_mcontext.__reserved);
+
+ memset(user, 0, sizeof(*user));
+ user->size = offsetof(struct rt_sigframe, uc.uc_mcontext.__reserved);
+
+ user->limit = user->size + reserved_size;
+
+ user->limit -= TERMINATOR_SIZE;
+ user->limit -= EXTRA_CONTEXT_SIZE;
+ /* Reserve space for extension and terminator ^ */
+}
+
+static size_t sigframe_size(struct rt_sigframe_user_layout const *user)
+{
+ return round_up(max(user->size, sizeof(struct rt_sigframe)), 16);
+}
+
+/*
+ * Sanity limit on the approximate maximum size of signal frame we'll
+ * try to generate. Stack alignment padding and the frame record are
+ * not taken into account. This limit is not a guarantee and is
+ * NOT ABI.
+ */
+#define SIGFRAME_MAXSZ SZ_64K
+
+static int __sigframe_alloc(struct rt_sigframe_user_layout *user,
+ unsigned long *offset, size_t size, bool extend)
+{
+ size_t padded_size = round_up(size, 16);
+
+ if (padded_size > user->limit - user->size &&
+ !user->extra_offset &&
+ extend) {
+ int ret;
+
+ user->limit += EXTRA_CONTEXT_SIZE;
+ ret = __sigframe_alloc(user, &user->extra_offset,
+ sizeof(struct extra_context), false);
+ if (ret) {
+ user->limit -= EXTRA_CONTEXT_SIZE;
+ return ret;
+ }
+
+ /* Reserve space for the __reserved[] terminator */
+ user->size += TERMINATOR_SIZE;
+
+ /*
+ * Allow expansion up to SIGFRAME_MAXSZ, ensuring space for
+ * the terminator:
+ */
+ user->limit = SIGFRAME_MAXSZ - TERMINATOR_SIZE;
+ }
+
+ /* Still not enough space? Bad luck! */
+ if (padded_size > user->limit - user->size)
+ return -ENOMEM;
+
+ *offset = user->size;
+ user->size += padded_size;
+
+ return 0;
+}
+
+/*
+ * Allocate space for an optional record of <size> bytes in the user
+ * signal frame. The offset from the signal frame base address to the
+ * allocated block is assigned to *offset.
+ */
+static int sigframe_alloc(struct rt_sigframe_user_layout *user,
+ unsigned long *offset, size_t size)
+{
+ return __sigframe_alloc(user, offset, size, true);
+}
+
+/* Allocate the null terminator record and prevent further allocations */
+static int sigframe_alloc_end(struct rt_sigframe_user_layout *user)
+{
+ int ret;
+
+ /* Un-reserve the space reserved for the terminator: */
+ user->limit += TERMINATOR_SIZE;
+
+ ret = sigframe_alloc(user, &user->end_offset,
+ sizeof(struct _aarch64_ctx));
+ if (ret)
+ return ret;
+
+ /* Prevent further allocation: */
+ user->limit = user->size;
+ return 0;
+}
+
+static void __user *apply_user_offset(
+ struct rt_sigframe_user_layout const *user, unsigned long offset)
+{
+ char __user *base = (char __user *)user->sigframe;
+
+ return base + offset;
+}
+
static int preserve_fpsimd_context(struct fpsimd_context __user *ctx)
{
struct fpsimd_state *fpsimd = &current->thread.fpsimd_state;
@@ -92,12 +219,159 @@ static int restore_fpsimd_context(struct fpsimd_context __user *ctx)
return err ? -EFAULT : 0;
}
+struct user_ctxs {
+ struct fpsimd_context __user *fpsimd;
+};
+
+static int parse_user_sigframe(struct user_ctxs *user,
+ struct rt_sigframe __user *sf)
+{
+ struct sigcontext __user *const sc = &sf->uc.uc_mcontext;
+ struct _aarch64_ctx __user *head;
+ char __user *base = (char __user *)&sc->__reserved;
+ size_t offset = 0;
+ size_t limit = sizeof(sc->__reserved);
+ bool have_extra_context = false;
+ char const __user *const sfp = (char const __user *)sf;
+
+ user->fpsimd = NULL;
+
+ if (!IS_ALIGNED((unsigned long)base, 16))
+ goto invalid;
+
+ while (1) {
+ int err = 0;
+ u32 magic, size;
+ char const __user *userp;
+ struct extra_context const __user *extra;
+ u64 extra_datap;
+ u32 extra_size;
+ struct _aarch64_ctx const __user *end;
+ u32 end_magic, end_size;
+
+ if (limit - offset < sizeof(*head))
+ goto invalid;
+
+ if (!IS_ALIGNED(offset, 16))
+ goto invalid;
+
+ head = (struct _aarch64_ctx __user *)(base + offset);
+ __get_user_error(magic, &head->magic, err);
+ __get_user_error(size, &head->size, err);
+ if (err)
+ return err;
+
+ if (limit - offset < size)
+ goto invalid;
+
+ switch (magic) {
+ case 0:
+ if (size)
+ goto invalid;
+
+ goto done;
+
+ case FPSIMD_MAGIC:
+ if (user->fpsimd)
+ goto invalid;
+
+ if (size < sizeof(*user->fpsimd))
+ goto invalid;
+
+ user->fpsimd = (struct fpsimd_context __user *)head;
+ break;
+
+ case ESR_MAGIC:
+ /* ignore */
+ break;
+
+ case EXTRA_MAGIC:
+ if (have_extra_context)
+ goto invalid;
+
+ if (size < sizeof(*extra))
+ goto invalid;
+
+ userp = (char const __user *)head;
+
+ extra = (struct extra_context const __user *)userp;
+ userp += size;
+
+ __get_user_error(extra_datap, &extra->datap, err);
+ __get_user_error(extra_size, &extra->size, err);
+ if (err)
+ return err;
+
+ /* Check for the dummy terminator in __reserved[]: */
+
+ if (limit - offset - size < TERMINATOR_SIZE)
+ goto invalid;
+
+ end = (struct _aarch64_ctx const __user *)userp;
+ userp += TERMINATOR_SIZE;
+
+ __get_user_error(end_magic, &end->magic, err);
+ __get_user_error(end_size, &end->size, err);
+ if (err)
+ return err;
+
+ if (end_magic || end_size)
+ goto invalid;
+
+ /* Prevent looping/repeated parsing of extra_context */
+ have_extra_context = true;
+
+ base = (__force void __user *)extra_datap;
+ if (!IS_ALIGNED((unsigned long)base, 16))
+ goto invalid;
+
+ if (!IS_ALIGNED(extra_size, 16))
+ goto invalid;
+
+ if (base != userp)
+ goto invalid;
+
+ /* Reject "unreasonably large" frames: */
+ if (extra_size > sfp + SIGFRAME_MAXSZ - userp)
+ goto invalid;
+
+ /*
+ * Ignore trailing terminator in __reserved[]
+ * and start parsing extra data:
+ */
+ offset = 0;
+ limit = extra_size;
+ continue;
+
+ default:
+ goto invalid;
+ }
+
+ if (size < sizeof(*head))
+ goto invalid;
+
+ if (limit - offset < size)
+ goto invalid;
+
+ offset += size;
+ }
+
+done:
+ if (!user->fpsimd)
+ goto invalid;
+
+ return 0;
+
+invalid:
+ return -EINVAL;
+}
+
static int restore_sigframe(struct pt_regs *regs,
struct rt_sigframe __user *sf)
{
sigset_t set;
int i, err;
- void *aux = sf->uc.uc_mcontext.__reserved;
+ struct user_ctxs user;
err = __copy_from_user(&set, &sf->uc.uc_sigmask, sizeof(set));
if (err == 0)
@@ -116,12 +390,11 @@ static int restore_sigframe(struct pt_regs *regs,
regs->syscallno = ~0UL;
err |= !valid_user_regs(&regs->user_regs, current);
+ if (err == 0)
+ err = parse_user_sigframe(&user, sf);
- if (err == 0) {
- struct fpsimd_context *fpsimd_ctx =
- container_of(aux, struct fpsimd_context, head);
- err |= restore_fpsimd_context(fpsimd_ctx);
- }
+ if (err == 0)
+ err = restore_fpsimd_context(user.fpsimd);
return err;
}
@@ -162,16 +435,37 @@ badframe:
return 0;
}
-static int setup_sigframe(struct rt_sigframe __user *sf,
+/* Determine the layout of optional records in the signal frame */
+static int setup_sigframe_layout(struct rt_sigframe_user_layout *user)
+{
+ int err;
+
+ err = sigframe_alloc(user, &user->fpsimd_offset,
+ sizeof(struct fpsimd_context));
+ if (err)
+ return err;
+
+ /* fault information, if valid */
+ if (current->thread.fault_code) {
+ err = sigframe_alloc(user, &user->esr_offset,
+ sizeof(struct esr_context));
+ if (err)
+ return err;
+ }
+
+ return sigframe_alloc_end(user);
+}
+
+
+static int setup_sigframe(struct rt_sigframe_user_layout *user,
struct pt_regs *regs, sigset_t *set)
{
int i, err = 0;
- void *aux = sf->uc.uc_mcontext.__reserved;
- struct _aarch64_ctx *end;
+ struct rt_sigframe __user *sf = user->sigframe;
/* set up the stack frame for unwinding */
- __put_user_error(regs->regs[29], &sf->fp, err);
- __put_user_error(regs->regs[30], &sf->lr, err);
+ __put_user_error(regs->regs[29], &user->next_frame->fp, err);
+ __put_user_error(regs->regs[30], &user->next_frame->lr, err);
for (i = 0; i < 31; i++)
__put_user_error(regs->regs[i], &sf->uc.uc_mcontext.regs[i],
@@ -185,58 +479,103 @@ static int setup_sigframe(struct rt_sigframe __user *sf,
err |= __copy_to_user(&sf->uc.uc_sigmask, set, sizeof(*set));
if (err == 0) {
- struct fpsimd_context *fpsimd_ctx =
- container_of(aux, struct fpsimd_context, head);
+ struct fpsimd_context __user *fpsimd_ctx =
+ apply_user_offset(user, user->fpsimd_offset);
err |= preserve_fpsimd_context(fpsimd_ctx);
- aux += sizeof(*fpsimd_ctx);
}
/* fault information, if valid */
- if (current->thread.fault_code) {
- struct esr_context *esr_ctx =
- container_of(aux, struct esr_context, head);
+ if (err == 0 && user->esr_offset) {
+ struct esr_context __user *esr_ctx =
+ apply_user_offset(user, user->esr_offset);
+
__put_user_error(ESR_MAGIC, &esr_ctx->head.magic, err);
__put_user_error(sizeof(*esr_ctx), &esr_ctx->head.size, err);
__put_user_error(current->thread.fault_code, &esr_ctx->esr, err);
- aux += sizeof(*esr_ctx);
+ }
+
+ if (err == 0 && user->extra_offset) {
+ char __user *sfp = (char __user *)user->sigframe;
+ char __user *userp =
+ apply_user_offset(user, user->extra_offset);
+
+ struct extra_context __user *extra;
+ struct _aarch64_ctx __user *end;
+ u64 extra_datap;
+ u32 extra_size;
+
+ extra = (struct extra_context __user *)userp;
+ userp += EXTRA_CONTEXT_SIZE;
+
+ end = (struct _aarch64_ctx __user *)userp;
+ userp += TERMINATOR_SIZE;
+
+ /*
+ * extra_datap is just written to the signal frame.
+ * The value gets cast back to a void __user *
+ * during sigreturn.
+ */
+ extra_datap = (__force u64)userp;
+ extra_size = sfp + round_up(user->size, 16) - userp;
+
+ __put_user_error(EXTRA_MAGIC, &extra->head.magic, err);
+ __put_user_error(EXTRA_CONTEXT_SIZE, &extra->head.size, err);
+ __put_user_error(extra_datap, &extra->datap, err);
+ __put_user_error(extra_size, &extra->size, err);
+
+ /* Add the terminator */
+ __put_user_error(0, &end->magic, err);
+ __put_user_error(0, &end->size, err);
}
/* set the "end" magic */
- end = aux;
- __put_user_error(0, &end->magic, err);
- __put_user_error(0, &end->size, err);
+ if (err == 0) {
+ struct _aarch64_ctx __user *end =
+ apply_user_offset(user, user->end_offset);
+
+ __put_user_error(0, &end->magic, err);
+ __put_user_error(0, &end->size, err);
+ }
return err;
}
-static struct rt_sigframe __user *get_sigframe(struct ksignal *ksig,
- struct pt_regs *regs)
+static int get_sigframe(struct rt_sigframe_user_layout *user,
+ struct ksignal *ksig, struct pt_regs *regs)
{
unsigned long sp, sp_top;
- struct rt_sigframe __user *frame;
+ int err;
+
+ init_user_layout(user);
+ err = setup_sigframe_layout(user);
+ if (err)
+ return err;
sp = sp_top = sigsp(regs->sp, ksig);
- sp = (sp - sizeof(struct rt_sigframe)) & ~15;
- frame = (struct rt_sigframe __user *)sp;
+ sp = round_down(sp - sizeof(struct frame_record), 16);
+ user->next_frame = (struct frame_record __user *)sp;
+
+ sp = round_down(sp, 16) - sigframe_size(user);
+ user->sigframe = (struct rt_sigframe __user *)sp;
/*
* Check that we can actually write to the signal frame.
*/
- if (!access_ok(VERIFY_WRITE, frame, sp_top - sp))
- frame = NULL;
+ if (!access_ok(VERIFY_WRITE, user->sigframe, sp_top - sp))
+ return -EFAULT;
- return frame;
+ return 0;
}
static void setup_return(struct pt_regs *regs, struct k_sigaction *ka,
- void __user *frame, int usig)
+ struct rt_sigframe_user_layout *user, int usig)
{
__sigrestore_t sigtramp;
regs->regs[0] = usig;
- regs->sp = (unsigned long)frame;
- regs->regs[29] = regs->sp + offsetof(struct rt_sigframe, fp);
+ regs->sp = (unsigned long)user->sigframe;
+ regs->regs[29] = (unsigned long)&user->next_frame->fp;
regs->pc = (unsigned long)ka->sa.sa_handler;
if (ka->sa.sa_flags & SA_RESTORER)
@@ -250,20 +589,22 @@ static void setup_return(struct pt_regs *regs, struct k_sigaction *ka,
static int setup_rt_frame(int usig, struct ksignal *ksig, sigset_t *set,
struct pt_regs *regs)
{
+ struct rt_sigframe_user_layout user;
struct rt_sigframe __user *frame;
int err = 0;
- frame = get_sigframe(ksig, regs);
- if (!frame)
+ if (get_sigframe(&user, ksig, regs))
return 1;
+ frame = user.sigframe;
+
__put_user_error(0, &frame->uc.uc_flags, err);
__put_user_error(NULL, &frame->uc.uc_link, err);
err |= __save_altstack(&frame->uc.uc_stack, regs->sp);
- err |= setup_sigframe(frame, regs, set);
+ err |= setup_sigframe(&user, regs, set);
if (err == 0) {
- setup_return(regs, &ksig->ka, frame, usig);
+ setup_return(regs, &ksig->ka, &user, usig);
if (ksig->ka.sa.sa_flags & SA_SIGINFO) {
err |= copy_siginfo_to_user(&frame->info, &ksig->info);
regs->regs[1] = (unsigned long)&frame->info;
diff --git a/arch/arm64/kernel/stacktrace.c b/arch/arm64/kernel/stacktrace.c
index feac80c22f61..09d37d66b630 100644
--- a/arch/arm64/kernel/stacktrace.c
+++ b/arch/arm64/kernel/stacktrace.c
@@ -210,6 +210,7 @@ void save_stack_trace_tsk(struct task_struct *tsk, struct stack_trace *trace)
put_task_stack(tsk);
}
+EXPORT_SYMBOL_GPL(save_stack_trace_tsk);
void save_stack_trace(struct stack_trace *trace)
{
diff --git a/arch/arm64/kernel/traps.c b/arch/arm64/kernel/traps.c
index 0805b44f986a..c7c7088097be 100644
--- a/arch/arm64/kernel/traps.c
+++ b/arch/arm64/kernel/traps.c
@@ -140,7 +140,7 @@ static void dump_instr(const char *lvl, struct pt_regs *regs)
}
}
-static void dump_backtrace(struct pt_regs *regs, struct task_struct *tsk)
+void dump_backtrace(struct pt_regs *regs, struct task_struct *tsk)
{
struct stackframe frame;
unsigned long irq_stack_ptr;
@@ -344,22 +344,24 @@ static int call_undef_hook(struct pt_regs *regs)
if (compat_thumb_mode(regs)) {
/* 16-bit Thumb instruction */
- if (get_user(instr, (u16 __user *)pc))
+ __le16 instr_le;
+ if (get_user(instr_le, (__le16 __user *)pc))
goto exit;
- instr = le16_to_cpu(instr);
+ instr = le16_to_cpu(instr_le);
if (aarch32_insn_is_wide(instr)) {
u32 instr2;
- if (get_user(instr2, (u16 __user *)(pc + 2)))
+ if (get_user(instr_le, (__le16 __user *)(pc + 2)))
goto exit;
- instr2 = le16_to_cpu(instr2);
+ instr2 = le16_to_cpu(instr_le);
instr = (instr << 16) | instr2;
}
} else {
/* 32-bit ARM instruction */
- if (get_user(instr, (u32 __user *)pc))
+ __le32 instr_le;
+ if (get_user(instr_le, (__le32 __user *)pc))
goto exit;
- instr = le32_to_cpu(instr);
+ instr = le32_to_cpu(instr_le);
}
raw_spin_lock_irqsave(&undef_lock, flags);
@@ -728,8 +730,6 @@ static int bug_handler(struct pt_regs *regs, unsigned int esr)
break;
case BUG_TRAP_TYPE_WARN:
- /* Ideally, report_bug() should backtrace for us... but no. */
- dump_backtrace(regs, NULL);
break;
default:
diff --git a/arch/arm64/kernel/vdso.c b/arch/arm64/kernel/vdso.c
index 7492d9009610..e8f759f764f2 100644
--- a/arch/arm64/kernel/vdso.c
+++ b/arch/arm64/kernel/vdso.c
@@ -37,7 +37,7 @@
#include <asm/vdso.h>
#include <asm/vdso_datapage.h>
-extern char vdso_start, vdso_end;
+extern char vdso_start[], vdso_end[];
static unsigned long vdso_pages __ro_after_init;
/*
@@ -125,14 +125,14 @@ static int __init vdso_init(void)
struct page **vdso_pagelist;
unsigned long pfn;
- if (memcmp(&vdso_start, "\177ELF", 4)) {
+ if (memcmp(vdso_start, "\177ELF", 4)) {
pr_err("vDSO is not a valid ELF object!\n");
return -EINVAL;
}
- vdso_pages = (&vdso_end - &vdso_start) >> PAGE_SHIFT;
+ vdso_pages = (vdso_end - vdso_start) >> PAGE_SHIFT;
pr_info("vdso: %ld pages (%ld code @ %p, %ld data @ %p)\n",
- vdso_pages + 1, vdso_pages, &vdso_start, 1L, vdso_data);
+ vdso_pages + 1, vdso_pages, vdso_start, 1L, vdso_data);
/* Allocate the vDSO pagelist, plus a page for the data. */
vdso_pagelist = kcalloc(vdso_pages + 1, sizeof(struct page *),
@@ -145,7 +145,7 @@ static int __init vdso_init(void)
/* Grab the vDSO code pages. */
- pfn = sym_to_pfn(&vdso_start);
+ pfn = sym_to_pfn(vdso_start);
for (i = 0; i < vdso_pages; i++)
vdso_pagelist[i + 1] = pfn_to_page(pfn + i);
diff --git a/arch/arm64/mm/dma-mapping.c b/arch/arm64/mm/dma-mapping.c
index 3216e098c058..3e340b625436 100644
--- a/arch/arm64/mm/dma-mapping.c
+++ b/arch/arm64/mm/dma-mapping.c
@@ -95,11 +95,6 @@ static void *__dma_alloc_coherent(struct device *dev, size_t size,
dma_addr_t *dma_handle, gfp_t flags,
unsigned long attrs)
{
- if (dev == NULL) {
- WARN_ONCE(1, "Use an actual device structure for DMA allocation\n");
- return NULL;
- }
-
if (IS_ENABLED(CONFIG_ZONE_DMA) &&
dev->coherent_dma_mask <= DMA_BIT_MASK(32))
flags |= GFP_DMA;
@@ -128,10 +123,6 @@ static void __dma_free_coherent(struct device *dev, size_t size,
bool freed;
phys_addr_t paddr = dma_to_phys(dev, dma_handle);
- if (dev == NULL) {
- WARN_ONCE(1, "Use an actual device structure for DMA allocation\n");
- return;
- }
freed = dma_release_from_contiguous(dev,
phys_to_page(paddr),
diff --git a/arch/arm64/mm/fault.c b/arch/arm64/mm/fault.c
index 37b95dff0b07..c7861c9864e6 100644
--- a/arch/arm64/mm/fault.c
+++ b/arch/arm64/mm/fault.c
@@ -31,6 +31,7 @@
#include <linux/highmem.h>
#include <linux/perf_event.h>
#include <linux/preempt.h>
+#include <linux/hugetlb.h>
#include <asm/bug.h>
#include <asm/cpufeature.h>
@@ -42,6 +43,8 @@
#include <asm/pgtable.h>
#include <asm/tlbflush.h>
+#include <acpi/ghes.h>
+
struct fault_info {
int (*fn)(unsigned long addr, unsigned int esr,
struct pt_regs *regs);
@@ -80,18 +83,35 @@ static inline int notify_page_fault(struct pt_regs *regs, unsigned int esr)
#endif
/*
- * Dump out the page tables associated with 'addr' in mm 'mm'.
+ * Dump out the page tables associated with 'addr' in the currently active mm.
*/
-void show_pte(struct mm_struct *mm, unsigned long addr)
+void show_pte(unsigned long addr)
{
+ struct mm_struct *mm;
pgd_t *pgd;
- if (!mm)
+ if (addr < TASK_SIZE) {
+ /* TTBR0 */
+ mm = current->active_mm;
+ if (mm == &init_mm) {
+ pr_alert("[%016lx] user address but active_mm is swapper\n",
+ addr);
+ return;
+ }
+ } else if (addr >= VA_START) {
+ /* TTBR1 */
mm = &init_mm;
+ } else {
+ pr_alert("[%016lx] address between user and kernel address ranges\n",
+ addr);
+ return;
+ }
- pr_alert("pgd = %p\n", mm->pgd);
+ pr_alert("%s pgtable: %luk pages, %u-bit VAs, pgd = %p\n",
+ mm == &init_mm ? "swapper" : "user", PAGE_SIZE / SZ_1K,
+ VA_BITS, mm->pgd);
pgd = pgd_offset(mm, addr);
- pr_alert("[%08lx] *pgd=%016llx", addr, pgd_val(*pgd));
+ pr_alert("[%016lx] *pgd=%016llx", addr, pgd_val(*pgd));
do {
pud_t *pud;
@@ -196,8 +216,8 @@ static inline bool is_permission_fault(unsigned int esr, struct pt_regs *regs,
/*
* The kernel tried to access some page that wasn't present.
*/
-static void __do_kernel_fault(struct mm_struct *mm, unsigned long addr,
- unsigned int esr, struct pt_regs *regs)
+static void __do_kernel_fault(unsigned long addr, unsigned int esr,
+ struct pt_regs *regs)
{
const char *msg;
@@ -227,7 +247,7 @@ static void __do_kernel_fault(struct mm_struct *mm, unsigned long addr,
pr_alert("Unable to handle kernel %s at virtual address %08lx\n", msg,
addr);
- show_pte(mm, addr);
+ show_pte(addr);
die("Oops", regs, esr);
bust_spinlocks(0);
do_exit(SIGKILL);
@@ -239,18 +259,20 @@ static void __do_kernel_fault(struct mm_struct *mm, unsigned long addr,
*/
static void __do_user_fault(struct task_struct *tsk, unsigned long addr,
unsigned int esr, unsigned int sig, int code,
- struct pt_regs *regs)
+ struct pt_regs *regs, int fault)
{
struct siginfo si;
const struct fault_info *inf;
+ unsigned int lsb = 0;
if (unhandled_signal(tsk, sig) && show_unhandled_signals_ratelimited()) {
inf = esr_to_fault_info(esr);
- pr_info("%s[%d]: unhandled %s (%d) at 0x%08lx, esr 0x%03x\n",
+ pr_info("%s[%d]: unhandled %s (%d) at 0x%08lx, esr 0x%03x",
tsk->comm, task_pid_nr(tsk), inf->name, sig,
addr, esr);
- show_pte(tsk->mm, addr);
- show_regs(regs);
+ print_vma_addr(KERN_CONT ", in ", regs->pc);
+ pr_cont("\n");
+ __show_regs(regs);
}
tsk->thread.fault_address = addr;
@@ -259,13 +281,23 @@ static void __do_user_fault(struct task_struct *tsk, unsigned long addr,
si.si_errno = 0;
si.si_code = code;
si.si_addr = (void __user *)addr;
+ /*
+ * Either small page or large page may be poisoned.
+ * In other words, VM_FAULT_HWPOISON_LARGE and
+ * VM_FAULT_HWPOISON are mutually exclusive.
+ */
+ if (fault & VM_FAULT_HWPOISON_LARGE)
+ lsb = hstate_index_to_shift(VM_FAULT_GET_HINDEX(fault));
+ else if (fault & VM_FAULT_HWPOISON)
+ lsb = PAGE_SHIFT;
+ si.si_addr_lsb = lsb;
+
force_sig_info(sig, &si, tsk);
}
static void do_bad_area(unsigned long addr, unsigned int esr, struct pt_regs *regs)
{
struct task_struct *tsk = current;
- struct mm_struct *mm = tsk->active_mm;
const struct fault_info *inf;
/*
@@ -274,9 +306,9 @@ static void do_bad_area(unsigned long addr, unsigned int esr, struct pt_regs *re
*/
if (user_mode(regs)) {
inf = esr_to_fault_info(esr);
- __do_user_fault(tsk, addr, esr, inf->sig, inf->code, regs);
+ __do_user_fault(tsk, addr, esr, inf->sig, inf->code, regs, 0);
} else
- __do_kernel_fault(mm, addr, esr, regs);
+ __do_kernel_fault(addr, esr, regs);
}
#define VM_FAULT_BADMAP 0x010000
@@ -329,7 +361,7 @@ static int __kprobes do_page_fault(unsigned long addr, unsigned int esr,
{
struct task_struct *tsk;
struct mm_struct *mm;
- int fault, sig, code;
+ int fault, sig, code, major = 0;
unsigned long vm_flags = VM_READ | VM_WRITE;
unsigned int mm_flags = FAULT_FLAG_ALLOW_RETRY | FAULT_FLAG_KILLABLE;
@@ -368,6 +400,8 @@ static int __kprobes do_page_fault(unsigned long addr, unsigned int esr,
die("Accessing user space memory outside uaccess.h routines", regs, esr);
}
+ perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS, 1, regs, addr);
+
/*
* As per x86, we may deadlock here. However, since the kernel only
* validly references user space from well defined areas of the code,
@@ -391,24 +425,42 @@ retry:
}
fault = __do_page_fault(mm, addr, mm_flags, vm_flags, tsk);
+ major |= fault & VM_FAULT_MAJOR;
- /*
- * If we need to retry but a fatal signal is pending, handle the
- * signal first. We do not need to release the mmap_sem because it
- * would already be released in __lock_page_or_retry in mm/filemap.c.
- */
- if ((fault & VM_FAULT_RETRY) && fatal_signal_pending(current))
- return 0;
+ if (fault & VM_FAULT_RETRY) {
+ /*
+ * If we need to retry but a fatal signal is pending,
+ * handle the signal first. We do not need to release
+ * the mmap_sem because it would already be released
+ * in __lock_page_or_retry in mm/filemap.c.
+ */
+ if (fatal_signal_pending(current))
+ return 0;
+
+ /*
+ * Clear FAULT_FLAG_ALLOW_RETRY to avoid any risk of
+ * starvation.
+ */
+ if (mm_flags & FAULT_FLAG_ALLOW_RETRY) {
+ mm_flags &= ~FAULT_FLAG_ALLOW_RETRY;
+ mm_flags |= FAULT_FLAG_TRIED;
+ goto retry;
+ }
+ }
+ up_read(&mm->mmap_sem);
/*
- * Major/minor page fault accounting is only done on the initial
- * attempt. If we go through a retry, it is extremely likely that the
- * page will be found in page cache at that point.
+ * Handle the "normal" (no error) case first.
*/
-
- perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS, 1, regs, addr);
- if (mm_flags & FAULT_FLAG_ALLOW_RETRY) {
- if (fault & VM_FAULT_MAJOR) {
+ if (likely(!(fault & (VM_FAULT_ERROR | VM_FAULT_BADMAP |
+ VM_FAULT_BADACCESS)))) {
+ /*
+ * Major/minor page fault accounting is only done
+ * once. If we go through a retry, it is extremely
+ * likely that the page will be found in page cache at
+ * that point.
+ */
+ if (major) {
tsk->maj_flt++;
perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS_MAJ, 1, regs,
addr);
@@ -417,25 +469,9 @@ retry:
perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS_MIN, 1, regs,
addr);
}
- if (fault & VM_FAULT_RETRY) {
- /*
- * Clear FAULT_FLAG_ALLOW_RETRY to avoid any risk of
- * starvation.
- */
- mm_flags &= ~FAULT_FLAG_ALLOW_RETRY;
- mm_flags |= FAULT_FLAG_TRIED;
- goto retry;
- }
- }
-
- up_read(&mm->mmap_sem);
- /*
- * Handle the "normal" case first - VM_FAULT_MAJOR
- */
- if (likely(!(fault & (VM_FAULT_ERROR | VM_FAULT_BADMAP |
- VM_FAULT_BADACCESS))))
return 0;
+ }
/*
* If we are in kernel mode at this point, we have no context to
@@ -461,6 +497,9 @@ retry:
*/
sig = SIGBUS;
code = BUS_ADRERR;
+ } else if (fault & (VM_FAULT_HWPOISON | VM_FAULT_HWPOISON_LARGE)) {
+ sig = SIGBUS;
+ code = BUS_MCEERR_AR;
} else {
/*
* Something tried to access memory that isn't in our memory
@@ -471,11 +510,11 @@ retry:
SEGV_ACCERR : SEGV_MAPERR;
}
- __do_user_fault(tsk, addr, esr, sig, code, regs);
+ __do_user_fault(tsk, addr, esr, sig, code, regs, fault);
return 0;
no_context:
- __do_kernel_fault(mm, addr, esr, regs);
+ __do_kernel_fault(addr, esr, regs);
return 0;
}
@@ -522,6 +561,47 @@ static int do_bad(unsigned long addr, unsigned int esr, struct pt_regs *regs)
return 1;
}
+/*
+ * This abort handler deals with Synchronous External Abort.
+ * It calls notifiers, and then returns "fault".
+ */
+static int do_sea(unsigned long addr, unsigned int esr, struct pt_regs *regs)
+{
+ struct siginfo info;
+ const struct fault_info *inf;
+ int ret = 0;
+
+ inf = esr_to_fault_info(esr);
+ pr_err("Synchronous External Abort: %s (0x%08x) at 0x%016lx\n",
+ inf->name, esr, addr);
+
+ /*
+ * Synchronous aborts may interrupt code which had interrupts masked.
+ * Before calling out into the wider kernel tell the interested
+ * subsystems.
+ */
+ if (IS_ENABLED(CONFIG_ACPI_APEI_SEA)) {
+ if (interrupts_enabled(regs))
+ nmi_enter();
+
+ ret = ghes_notify_sea();
+
+ if (interrupts_enabled(regs))
+ nmi_exit();
+ }
+
+ info.si_signo = SIGBUS;
+ info.si_errno = 0;
+ info.si_code = 0;
+ if (esr & ESR_ELx_FnV)
+ info.si_addr = NULL;
+ else
+ info.si_addr = (void __user *)addr;
+ arm64_notify_die("", regs, &info, esr);
+
+ return ret;
+}
+
static const struct fault_info fault_info[] = {
{ do_bad, SIGBUS, 0, "ttbr address size fault" },
{ do_bad, SIGBUS, 0, "level 1 address size fault" },
@@ -539,22 +619,22 @@ static const struct fault_info fault_info[] = {
{ do_page_fault, SIGSEGV, SEGV_ACCERR, "level 1 permission fault" },
{ do_page_fault, SIGSEGV, SEGV_ACCERR, "level 2 permission fault" },
{ do_page_fault, SIGSEGV, SEGV_ACCERR, "level 3 permission fault" },
- { do_bad, SIGBUS, 0, "synchronous external abort" },
+ { do_sea, SIGBUS, 0, "synchronous external abort" },
{ do_bad, SIGBUS, 0, "unknown 17" },
{ do_bad, SIGBUS, 0, "unknown 18" },
{ do_bad, SIGBUS, 0, "unknown 19" },
- { do_bad, SIGBUS, 0, "synchronous external abort (translation table walk)" },
- { do_bad, SIGBUS, 0, "synchronous external abort (translation table walk)" },
- { do_bad, SIGBUS, 0, "synchronous external abort (translation table walk)" },
- { do_bad, SIGBUS, 0, "synchronous external abort (translation table walk)" },
- { do_bad, SIGBUS, 0, "synchronous parity error" },
+ { do_sea, SIGBUS, 0, "level 0 (translation table walk)" },
+ { do_sea, SIGBUS, 0, "level 1 (translation table walk)" },
+ { do_sea, SIGBUS, 0, "level 2 (translation table walk)" },
+ { do_sea, SIGBUS, 0, "level 3 (translation table walk)" },
+ { do_sea, SIGBUS, 0, "synchronous parity or ECC error" },
{ do_bad, SIGBUS, 0, "unknown 25" },
{ do_bad, SIGBUS, 0, "unknown 26" },
{ do_bad, SIGBUS, 0, "unknown 27" },
- { do_bad, SIGBUS, 0, "synchronous parity error (translation table walk)" },
- { do_bad, SIGBUS, 0, "synchronous parity error (translation table walk)" },
- { do_bad, SIGBUS, 0, "synchronous parity error (translation table walk)" },
- { do_bad, SIGBUS, 0, "synchronous parity error (translation table walk)" },
+ { do_sea, SIGBUS, 0, "level 0 synchronous parity error (translation table walk)" },
+ { do_sea, SIGBUS, 0, "level 1 synchronous parity error (translation table walk)" },
+ { do_sea, SIGBUS, 0, "level 2 synchronous parity error (translation table walk)" },
+ { do_sea, SIGBUS, 0, "level 3 synchronous parity error (translation table walk)" },
{ do_bad, SIGBUS, 0, "unknown 32" },
{ do_alignment_fault, SIGBUS, BUS_ADRALN, "alignment fault" },
{ do_bad, SIGBUS, 0, "unknown 34" },
@@ -590,6 +670,23 @@ static const struct fault_info fault_info[] = {
};
/*
+ * Handle Synchronous External Aborts that occur in a guest kernel.
+ *
+ * The return value will be zero if the SEA was successfully handled
+ * and non-zero if there was an error processing the error or there was
+ * no error to process.
+ */
+int handle_guest_sea(phys_addr_t addr, unsigned int esr)
+{
+ int ret = -ENOENT;
+
+ if (IS_ENABLED(CONFIG_ACPI_APEI_SEA))
+ ret = ghes_notify_sea();
+
+ return ret;
+}
+
+/*
* Dispatch a data abort to the relevant handler.
*/
asmlinkage void __exception do_mem_abort(unsigned long addr, unsigned int esr,
diff --git a/arch/arm64/mm/hugetlbpage.c b/arch/arm64/mm/hugetlbpage.c
index 7514a000e361..69b8200b1cfd 100644
--- a/arch/arm64/mm/hugetlbpage.c
+++ b/arch/arm64/mm/hugetlbpage.c
@@ -136,36 +136,27 @@ pte_t *huge_pte_offset(struct mm_struct *mm, unsigned long addr)
{
pgd_t *pgd;
pud_t *pud;
- pmd_t *pmd = NULL;
- pte_t *pte = NULL;
+ pmd_t *pmd;
pgd = pgd_offset(mm, addr);
pr_debug("%s: addr:0x%lx pgd:%p\n", __func__, addr, pgd);
if (!pgd_present(*pgd))
return NULL;
+
pud = pud_offset(pgd, addr);
- if (!pud_present(*pud))
+ if (pud_none(*pud))
return NULL;
-
- if (pud_huge(*pud))
+ /* swap or huge page */
+ if (!pud_present(*pud) || pud_huge(*pud))
return (pte_t *)pud;
+ /* table; check the next level */
+
pmd = pmd_offset(pud, addr);
- if (!pmd_present(*pmd))
+ if (pmd_none(*pmd))
return NULL;
-
- if (pte_cont(pmd_pte(*pmd))) {
- pmd = pmd_offset(
- pud, (addr & CONT_PMD_MASK));
- return (pte_t *)pmd;
- }
- if (pmd_huge(*pmd))
+ if (!pmd_present(*pmd) || pmd_huge(*pmd))
return (pte_t *)pmd;
- pte = pte_offset_kernel(pmd, addr);
- if (pte_present(*pte) && pte_cont(*pte)) {
- pte = pte_offset_kernel(
- pmd, (addr & CONT_PTE_MASK));
- return pte;
- }
+
return NULL;
}
diff --git a/arch/arm64/mm/mmap.c b/arch/arm64/mm/mmap.c
index 7b0d55756eb1..adc208c2ae9c 100644
--- a/arch/arm64/mm/mmap.c
+++ b/arch/arm64/mm/mmap.c
@@ -18,6 +18,7 @@
#include <linux/elf.h>
#include <linux/fs.h>
+#include <linux/memblock.h>
#include <linux/mm.h>
#include <linux/mman.h>
#include <linux/export.h>
@@ -103,12 +104,18 @@ void arch_pick_mmap_layout(struct mm_struct *mm)
*/
int valid_phys_addr_range(phys_addr_t addr, size_t size)
{
- if (addr < PHYS_OFFSET)
- return 0;
- if (addr + size > __pa(high_memory - 1) + 1)
- return 0;
-
- return 1;
+ /*
+ * Check whether addr is covered by a memory region without the
+ * MEMBLOCK_NOMAP attribute, and whether that region covers the
+ * entire range. In theory, this could lead to false negatives
+ * if the range is covered by distinct but adjacent memory regions
+ * that only differ in other attributes. However, few of such
+ * attributes have been defined, and it is debatable whether it
+ * follows that /dev/mem read() calls should be able traverse
+ * such boundaries.
+ */
+ return memblock_is_region_memory(addr, size) &&
+ memblock_is_map_memory(addr);
}
/*
diff --git a/arch/arm64/mm/mmu.c b/arch/arm64/mm/mmu.c
index 0c429ec6fde8..23c2d89a362e 100644
--- a/arch/arm64/mm/mmu.c
+++ b/arch/arm64/mm/mmu.c
@@ -31,6 +31,7 @@
#include <linux/fs.h>
#include <linux/io.h>
#include <linux/mm.h>
+#include <linux/vmalloc.h>
#include <asm/barrier.h>
#include <asm/cputype.h>
diff --git a/arch/arm64/net/bpf_jit_comp.c b/arch/arm64/net/bpf_jit_comp.c
index 2f0505b5c240..f32144b2e07f 100644
--- a/arch/arm64/net/bpf_jit_comp.c
+++ b/arch/arm64/net/bpf_jit_comp.c
@@ -70,7 +70,7 @@ struct jit_ctx {
int idx;
int epilogue_offset;
int *offset;
- u32 *image;
+ __le32 *image;
u32 stack_size;
};
@@ -131,7 +131,7 @@ static inline int bpf2a64_offset(int bpf_to, int bpf_from,
static void jit_fill_hole(void *area, unsigned int size)
{
- u32 *ptr;
+ __le32 *ptr;
/* We are guaranteed to have aligned memory. */
for (ptr = area; size >= sizeof(u32); size -= sizeof(u32))
*ptr++ = cpu_to_le32(AARCH64_BREAK_FAULT);
@@ -874,7 +874,7 @@ struct bpf_prog *bpf_int_jit_compile(struct bpf_prog *prog)
/* 2. Now, the actual pass. */
- ctx.image = (u32 *)image_ptr;
+ ctx.image = (__le32 *)image_ptr;
ctx.idx = 0;
build_prologue(&ctx);
diff --git a/drivers/acpi/apei/Kconfig b/drivers/acpi/apei/Kconfig
index b0140c8fc733..de14d49a5c90 100644
--- a/drivers/acpi/apei/Kconfig
+++ b/drivers/acpi/apei/Kconfig
@@ -39,6 +39,21 @@ config ACPI_APEI_PCIEAER
PCIe AER errors may be reported via APEI firmware first mode.
Turn on this option to enable the corresponding support.
+config ACPI_APEI_SEA
+ bool "APEI Synchronous External Abort logging/recovering support"
+ depends on ARM64 && ACPI_APEI_GHES
+ default y
+ help
+ This option should be enabled if the system supports
+ firmware first handling of SEA (Synchronous External Abort).
+ SEA happens with certain faults of data abort or instruction
+ abort synchronous exceptions on ARMv8 systems. If a system
+ supports firmware first handling of SEA, the platform analyzes
+ and handles hardware error notifications from SEA, and it may then
+ form a HW error record for the OS to parse and handle. This
+ option allows the OS to look for such hardware error record, and
+ take appropriate action.
+
config ACPI_APEI_MEMORY_FAILURE
bool "APEI memory error recovering support"
depends on ACPI_APEI && MEMORY_FAILURE
diff --git a/drivers/acpi/apei/ghes.c b/drivers/acpi/apei/ghes.c
index 0968816f5755..d661d452b238 100644
--- a/drivers/acpi/apei/ghes.c
+++ b/drivers/acpi/apei/ghes.c
@@ -45,10 +45,14 @@
#include <linux/aer.h>
#include <linux/nmi.h>
#include <linux/sched/clock.h>
+#include <linux/uuid.h>
+#include <linux/ras.h>
+#include <acpi/actbl1.h>
#include <acpi/ghes.h>
#include <acpi/apei.h>
#include <asm/tlbflush.h>
+#include <ras/ras_event.h>
#include "apei-internal.h"
@@ -80,6 +84,11 @@
((struct acpi_hest_generic_status *) \
((struct ghes_estatus_node *)(estatus_node) + 1))
+static inline bool is_hest_type_generic_v2(struct ghes *ghes)
+{
+ return ghes->generic->header.type == ACPI_HEST_TYPE_GENERIC_ERROR_V2;
+}
+
/*
* This driver isn't really modular, however for the time being,
* continuing to use module_param is the easiest way to remain
@@ -110,11 +119,7 @@ static DEFINE_MUTEX(ghes_list_mutex);
* Two virtual pages are used, one for IRQ/PROCESS context, the other for
* NMI context (optionally).
*/
-#ifdef CONFIG_HAVE_ACPI_APEI_NMI
#define GHES_IOREMAP_PAGES 2
-#else
-#define GHES_IOREMAP_PAGES 1
-#endif
#define GHES_IOREMAP_IRQ_PAGE(base) (base)
#define GHES_IOREMAP_NMI_PAGE(base) ((base) + PAGE_SIZE)
@@ -133,6 +138,8 @@ static unsigned long ghes_estatus_pool_size_request;
static struct ghes_estatus_cache *ghes_estatus_caches[GHES_ESTATUS_CACHES_SIZE];
static atomic_t ghes_estatus_cache_alloced;
+static int ghes_panic_timeout __read_mostly = 30;
+
static int ghes_ioremap_init(void)
{
ghes_ioremap_area = __get_vm_area(PAGE_SIZE * GHES_IOREMAP_PAGES,
@@ -153,10 +160,14 @@ static void ghes_ioremap_exit(void)
static void __iomem *ghes_ioremap_pfn_nmi(u64 pfn)
{
unsigned long vaddr;
+ phys_addr_t paddr;
+ pgprot_t prot;
vaddr = (unsigned long)GHES_IOREMAP_NMI_PAGE(ghes_ioremap_area->addr);
- ioremap_page_range(vaddr, vaddr + PAGE_SIZE,
- pfn << PAGE_SHIFT, PAGE_KERNEL);
+
+ paddr = pfn << PAGE_SHIFT;
+ prot = arch_apei_get_mem_attribute(paddr);
+ ioremap_page_range(vaddr, vaddr + PAGE_SIZE, paddr, prot);
return (void __iomem *)vaddr;
}
@@ -240,6 +251,16 @@ static int ghes_estatus_pool_expand(unsigned long len)
return 0;
}
+static int map_gen_v2(struct ghes *ghes)
+{
+ return apei_map_generic_address(&ghes->generic_v2->read_ack_register);
+}
+
+static void unmap_gen_v2(struct ghes *ghes)
+{
+ apei_unmap_generic_address(&ghes->generic_v2->read_ack_register);
+}
+
static struct ghes *ghes_new(struct acpi_hest_generic *generic)
{
struct ghes *ghes;
@@ -249,10 +270,17 @@ static struct ghes *ghes_new(struct acpi_hest_generic *generic)
ghes = kzalloc(sizeof(*ghes), GFP_KERNEL);
if (!ghes)
return ERR_PTR(-ENOMEM);
+
ghes->generic = generic;
+ if (is_hest_type_generic_v2(ghes)) {
+ rc = map_gen_v2(ghes);
+ if (rc)
+ goto err_free;
+ }
+
rc = apei_map_generic_address(&generic->error_status_address);
if (rc)
- goto err_free;
+ goto err_unmap_read_ack_addr;
error_block_length = generic->error_block_length;
if (error_block_length > GHES_ESTATUS_MAX_SIZE) {
pr_warning(FW_WARN GHES_PFX
@@ -264,13 +292,16 @@ static struct ghes *ghes_new(struct acpi_hest_generic *generic)
ghes->estatus = kmalloc(error_block_length, GFP_KERNEL);
if (!ghes->estatus) {
rc = -ENOMEM;
- goto err_unmap;
+ goto err_unmap_status_addr;
}
return ghes;
-err_unmap:
+err_unmap_status_addr:
apei_unmap_generic_address(&generic->error_status_address);
+err_unmap_read_ack_addr:
+ if (is_hest_type_generic_v2(ghes))
+ unmap_gen_v2(ghes);
err_free:
kfree(ghes);
return ERR_PTR(rc);
@@ -280,6 +311,8 @@ static void ghes_fini(struct ghes *ghes)
{
kfree(ghes->estatus);
apei_unmap_generic_address(&ghes->generic->error_status_address);
+ if (is_hest_type_generic_v2(ghes))
+ unmap_gen_v2(ghes);
}
static inline int ghes_severity(int severity)
@@ -400,8 +433,7 @@ static void ghes_handle_memory_failure(struct acpi_hest_generic_data *gdata, int
unsigned long pfn;
int flags = -1;
int sec_sev = ghes_severity(gdata->error_severity);
- struct cper_sec_mem_err *mem_err;
- mem_err = (struct cper_sec_mem_err *)(gdata + 1);
+ struct cper_sec_mem_err *mem_err = acpi_hest_get_payload(gdata);
if (!(mem_err->validation_bits & CPER_MEM_VALID_PA))
return;
@@ -432,14 +464,22 @@ static void ghes_do_proc(struct ghes *ghes,
int sev, sec_sev;
struct acpi_hest_generic_data *gdata;
guid_t *sec_type;
+ guid_t *fru_id = &NULL_UUID_LE;
+ char *fru_text = "";
sev = ghes_severity(estatus->error_severity);
apei_estatus_for_each_section(estatus, gdata) {
sec_type = (guid_t *)gdata->section_type;
sec_sev = ghes_severity(gdata->error_severity);
+ if (gdata->validation_bits & CPER_SEC_VALID_FRU_ID)
+ fru_id = (guid_t *)gdata->fru_id;
+
+ if (gdata->validation_bits & CPER_SEC_VALID_FRU_TEXT)
+ fru_text = gdata->fru_text;
+
if (guid_equal(sec_type, &CPER_SEC_PLATFORM_MEM)) {
- struct cper_sec_mem_err *mem_err;
- mem_err = (struct cper_sec_mem_err *)(gdata+1);
+ struct cper_sec_mem_err *mem_err = acpi_hest_get_payload(gdata);
+
ghes_edac_report_mem_error(ghes, sev, mem_err);
arch_apei_report_mem_error(sev, mem_err);
@@ -447,8 +487,8 @@ static void ghes_do_proc(struct ghes *ghes,
}
#ifdef CONFIG_ACPI_APEI_PCIEAER
else if (guid_equal(sec_type, &CPER_SEC_PCIE)) {
- struct cper_sec_pcie *pcie_err;
- pcie_err = (struct cper_sec_pcie *)(gdata+1);
+ struct cper_sec_pcie *pcie_err = acpi_hest_get_payload(gdata);
+
if (sev == GHES_SEV_RECOVERABLE &&
sec_sev == GHES_SEV_RECOVERABLE &&
pcie_err->validation_bits & CPER_PCIE_VALID_DEVICE_ID &&
@@ -477,6 +517,17 @@ static void ghes_do_proc(struct ghes *ghes,
}
#endif
+ else if (guid_equal(sec_type, &CPER_SEC_PROC_ARM)) {
+ struct cper_sec_proc_arm *err = acpi_hest_get_payload(gdata);
+
+ log_arm_hw_error(err);
+ } else {
+ void *err = acpi_hest_get_payload(gdata);
+
+ log_non_standard_event(sec_type, fru_id, fru_text,
+ sec_sev, err,
+ gdata->error_data_length);
+ }
}
}
@@ -649,6 +700,31 @@ static void ghes_estatus_cache_add(
rcu_read_unlock();
}
+static int ghes_ack_error(struct acpi_hest_generic_v2 *gv2)
+{
+ int rc;
+ u64 val = 0;
+
+ rc = apei_read(&val, &gv2->read_ack_register);
+ if (rc)
+ return rc;
+
+ val &= gv2->read_ack_preserve << gv2->read_ack_register.bit_offset;
+ val |= gv2->read_ack_write << gv2->read_ack_register.bit_offset;
+
+ return apei_write(val, &gv2->read_ack_register);
+}
+
+static void __ghes_panic(struct ghes *ghes)
+{
+ __ghes_print_estatus(KERN_EMERG, ghes->generic, ghes->estatus);
+
+ /* reboot to log the error! */
+ if (!panic_timeout)
+ panic_timeout = ghes_panic_timeout;
+ panic("Fatal hardware error!");
+}
+
static int ghes_proc(struct ghes *ghes)
{
int rc;
@@ -656,11 +732,26 @@ static int ghes_proc(struct ghes *ghes)
rc = ghes_read_estatus(ghes, 0);
if (rc)
goto out;
+
+ if (ghes_severity(ghes->estatus->error_severity) >= GHES_SEV_PANIC) {
+ __ghes_panic(ghes);
+ }
+
if (!ghes_estatus_cached(ghes->estatus)) {
if (ghes_print_estatus(NULL, ghes->generic, ghes->estatus))
ghes_estatus_cache_add(ghes->generic, ghes->estatus);
}
ghes_do_proc(ghes, ghes->estatus);
+
+ /*
+ * GHESv2 type HEST entries introduce support for error acknowledgment,
+ * so only acknowledge the error if this support is present.
+ */
+ if (is_hest_type_generic_v2(ghes)) {
+ rc = ghes_ack_error(ghes->generic_v2);
+ if (rc)
+ return rc;
+ }
out:
ghes_clear_estatus(ghes);
return rc;
@@ -722,6 +813,55 @@ static struct notifier_block ghes_notifier_hed = {
.notifier_call = ghes_notify_hed,
};
+#ifdef CONFIG_ACPI_APEI_SEA
+static LIST_HEAD(ghes_sea);
+
+/*
+ * Return 0 only if one of the SEA error sources successfully reported an error
+ * record sent from the firmware.
+ */
+int ghes_notify_sea(void)
+{
+ struct ghes *ghes;
+ int ret = -ENOENT;
+
+ rcu_read_lock();
+ list_for_each_entry_rcu(ghes, &ghes_sea, list) {
+ if (!ghes_proc(ghes))
+ ret = 0;
+ }
+ rcu_read_unlock();
+ return ret;
+}
+
+static void ghes_sea_add(struct ghes *ghes)
+{
+ mutex_lock(&ghes_list_mutex);
+ list_add_rcu(&ghes->list, &ghes_sea);
+ mutex_unlock(&ghes_list_mutex);
+}
+
+static void ghes_sea_remove(struct ghes *ghes)
+{
+ mutex_lock(&ghes_list_mutex);
+ list_del_rcu(&ghes->list);
+ mutex_unlock(&ghes_list_mutex);
+ synchronize_rcu();
+}
+#else /* CONFIG_ACPI_APEI_SEA */
+static inline void ghes_sea_add(struct ghes *ghes)
+{
+ pr_err(GHES_PFX "ID: %d, trying to add SEA notification which is not supported\n",
+ ghes->generic->header.source_id);
+}
+
+static inline void ghes_sea_remove(struct ghes *ghes)
+{
+ pr_err(GHES_PFX "ID: %d, trying to remove SEA notification which is not supported\n",
+ ghes->generic->header.source_id);
+}
+#endif /* CONFIG_ACPI_APEI_SEA */
+
#ifdef CONFIG_HAVE_ACPI_APEI_NMI
/*
* printk is not safe in NMI context. So in NMI handler, we allocate
@@ -742,8 +882,6 @@ static atomic_t ghes_in_nmi = ATOMIC_INIT(0);
static LIST_HEAD(ghes_nmi);
-static int ghes_panic_timeout __read_mostly = 30;
-
static void ghes_proc_in_irq(struct irq_work *irq_work)
{
struct llist_node *llnode, *next;
@@ -829,18 +967,6 @@ static void __process_error(struct ghes *ghes)
#endif
}
-static void __ghes_panic(struct ghes *ghes)
-{
- oops_begin();
- ghes_print_queued_estatus();
- __ghes_print_estatus(KERN_EMERG, ghes->generic, ghes->estatus);
-
- /* reboot to log the error! */
- if (panic_timeout == 0)
- panic_timeout = ghes_panic_timeout;
- panic("Fatal hardware error!");
-}
-
static int ghes_notify_nmi(unsigned int cmd, struct pt_regs *regs)
{
struct ghes *ghes;
@@ -858,8 +984,11 @@ static int ghes_notify_nmi(unsigned int cmd, struct pt_regs *regs)
}
sev = ghes_severity(ghes->estatus->error_severity);
- if (sev >= GHES_SEV_PANIC)
+ if (sev >= GHES_SEV_PANIC) {
+ oops_begin();
+ ghes_print_queued_estatus();
__ghes_panic(ghes);
+ }
if (!(ghes->flags & GHES_TO_CLEAR))
continue;
@@ -970,6 +1099,14 @@ static int ghes_probe(struct platform_device *ghes_dev)
case ACPI_HEST_NOTIFY_GPIO:
break;
+ case ACPI_HEST_NOTIFY_SEA:
+ if (!IS_ENABLED(CONFIG_ACPI_APEI_SEA)) {
+ pr_warn(GHES_PFX "Generic hardware error source: %d notified via SEA is not supported\n",
+ generic->header.source_id);
+ rc = -ENOTSUPP;
+ goto err;
+ }
+ break;
case ACPI_HEST_NOTIFY_NMI:
if (!IS_ENABLED(CONFIG_HAVE_ACPI_APEI_NMI)) {
pr_warn(GHES_PFX "Generic hardware error source: %d notified via NMI interrupt is not supported!\n",
@@ -1038,6 +1175,9 @@ static int ghes_probe(struct platform_device *ghes_dev)
mutex_unlock(&ghes_list_mutex);
break;
+ case ACPI_HEST_NOTIFY_SEA:
+ ghes_sea_add(ghes);
+ break;
case ACPI_HEST_NOTIFY_NMI:
ghes_nmi_add(ghes);
break;
@@ -1046,6 +1186,9 @@ static int ghes_probe(struct platform_device *ghes_dev)
}
platform_set_drvdata(ghes_dev, ghes);
+ /* Handle any pending errors right away */
+ ghes_proc(ghes);
+
return 0;
err_edac_unreg:
ghes_edac_unregister(ghes);
@@ -1085,6 +1228,9 @@ static int ghes_remove(struct platform_device *ghes_dev)
synchronize_rcu();
break;
+ case ACPI_HEST_NOTIFY_SEA:
+ ghes_sea_remove(ghes);
+ break;
case ACPI_HEST_NOTIFY_NMI:
ghes_nmi_remove(ghes);
break;
diff --git a/drivers/acpi/apei/hest.c b/drivers/acpi/apei/hest.c
index 8f2a98e23bba..456b488eb1df 100644
--- a/drivers/acpi/apei/hest.c
+++ b/drivers/acpi/apei/hest.c
@@ -52,6 +52,7 @@ static const int hest_esrc_len_tab[ACPI_HEST_TYPE_RESERVED] = {
[ACPI_HEST_TYPE_AER_ENDPOINT] = sizeof(struct acpi_hest_aer),
[ACPI_HEST_TYPE_AER_BRIDGE] = sizeof(struct acpi_hest_aer_bridge),
[ACPI_HEST_TYPE_GENERIC_ERROR] = sizeof(struct acpi_hest_generic),
+ [ACPI_HEST_TYPE_GENERIC_ERROR_V2] = sizeof(struct acpi_hest_generic_v2),
};
static int hest_esrc_len(struct acpi_hest_header *hest_hdr)
@@ -141,7 +142,8 @@ static int __init hest_parse_ghes_count(struct acpi_hest_header *hest_hdr, void
{
int *count = data;
- if (hest_hdr->type == ACPI_HEST_TYPE_GENERIC_ERROR)
+ if (hest_hdr->type == ACPI_HEST_TYPE_GENERIC_ERROR ||
+ hest_hdr->type == ACPI_HEST_TYPE_GENERIC_ERROR_V2)
(*count)++;
return 0;
}
@@ -152,7 +154,8 @@ static int __init hest_parse_ghes(struct acpi_hest_header *hest_hdr, void *data)
struct ghes_arr *ghes_arr = data;
int rc, i;
- if (hest_hdr->type != ACPI_HEST_TYPE_GENERIC_ERROR)
+ if (hest_hdr->type != ACPI_HEST_TYPE_GENERIC_ERROR &&
+ hest_hdr->type != ACPI_HEST_TYPE_GENERIC_ERROR_V2)
return 0;
if (!((struct acpi_hest_generic *)hest_hdr)->enabled)
diff --git a/drivers/acpi/arm64/iort.c b/drivers/acpi/arm64/iort.c
index 797b28dc7b34..d048f72c23f8 100644
--- a/drivers/acpi/arm64/iort.c
+++ b/drivers/acpi/arm64/iort.c
@@ -234,21 +234,6 @@ static struct acpi_iort_node *iort_scan_node(enum acpi_iort_node_type type,
return NULL;
}
-static acpi_status
-iort_match_type_callback(struct acpi_iort_node *node, void *context)
-{
- return AE_OK;
-}
-
-bool iort_node_match(u8 type)
-{
- struct acpi_iort_node *node;
-
- node = iort_scan_node(type, iort_match_type_callback, NULL);
-
- return node != NULL;
-}
-
static acpi_status iort_match_node_callback(struct acpi_iort_node *node,
void *context)
{
diff --git a/drivers/char/Kconfig b/drivers/char/Kconfig
index 2af70014ee5a..ccd239ab879f 100644
--- a/drivers/char/Kconfig
+++ b/drivers/char/Kconfig
@@ -17,6 +17,8 @@ config DEVMEM
config DEVKMEM
bool "/dev/kmem virtual device support"
+ # On arm64, VMALLOC_START < PAGE_OFFSET, which confuses kmem read/write
+ depends on !ARM64
help
Say Y here if you want to support the /dev/kmem device. The
/dev/kmem device is rarely used, but can be used for certain
diff --git a/drivers/firmware/efi/cper.c b/drivers/firmware/efi/cper.c
index d42537425438..48a8f69da42a 100644
--- a/drivers/firmware/efi/cper.c
+++ b/drivers/firmware/efi/cper.c
@@ -32,6 +32,10 @@
#include <linux/acpi.h>
#include <linux/pci.h>
#include <linux/aer.h>
+#include <linux/printk.h>
+#include <linux/bcd.h>
+#include <acpi/ghes.h>
+#include <ras/ras_event.h>
#define INDENT_SP " "
@@ -107,12 +111,15 @@ void cper_print_bits(const char *pfx, unsigned int bits,
static const char * const proc_type_strs[] = {
"IA32/X64",
"IA64",
+ "ARM",
};
static const char * const proc_isa_strs[] = {
"IA32",
"IA64",
"X64",
+ "ARM A32/T32",
+ "ARM A64",
};
static const char * const proc_error_type_strs[] = {
@@ -181,6 +188,122 @@ static void cper_print_proc_generic(const char *pfx,
printk("%s""IP: 0x%016llx\n", pfx, proc->ip);
}
+#if defined(CONFIG_ARM64) || defined(CONFIG_ARM)
+static const char * const arm_reg_ctx_strs[] = {
+ "AArch32 general purpose registers",
+ "AArch32 EL1 context registers",
+ "AArch32 EL2 context registers",
+ "AArch32 secure context registers",
+ "AArch64 general purpose registers",
+ "AArch64 EL1 context registers",
+ "AArch64 EL2 context registers",
+ "AArch64 EL3 context registers",
+ "Misc. system register structure",
+};
+
+static void cper_print_proc_arm(const char *pfx,
+ const struct cper_sec_proc_arm *proc)
+{
+ int i, len, max_ctx_type;
+ struct cper_arm_err_info *err_info;
+ struct cper_arm_ctx_info *ctx_info;
+ char newpfx[64];
+
+ printk("%sMIDR: 0x%016llx\n", pfx, proc->midr);
+
+ len = proc->section_length - (sizeof(*proc) +
+ proc->err_info_num * (sizeof(*err_info)));
+ if (len < 0) {
+ printk("%ssection length: %d\n", pfx, proc->section_length);
+ printk("%ssection length is too small\n", pfx);
+ printk("%sfirmware-generated error record is incorrect\n", pfx);
+ printk("%sERR_INFO_NUM is %d\n", pfx, proc->err_info_num);
+ return;
+ }
+
+ if (proc->validation_bits & CPER_ARM_VALID_MPIDR)
+ printk("%sMultiprocessor Affinity Register (MPIDR): 0x%016llx\n",
+ pfx, proc->mpidr);
+
+ if (proc->validation_bits & CPER_ARM_VALID_AFFINITY_LEVEL)
+ printk("%serror affinity level: %d\n", pfx,
+ proc->affinity_level);
+
+ if (proc->validation_bits & CPER_ARM_VALID_RUNNING_STATE) {
+ printk("%srunning state: 0x%x\n", pfx, proc->running_state);
+ printk("%sPower State Coordination Interface state: %d\n",
+ pfx, proc->psci_state);
+ }
+
+ snprintf(newpfx, sizeof(newpfx), "%s%s", pfx, INDENT_SP);
+
+ err_info = (struct cper_arm_err_info *)(proc + 1);
+ for (i = 0; i < proc->err_info_num; i++) {
+ printk("%sError info structure %d:\n", pfx, i);
+
+ printk("%snum errors: %d\n", pfx, err_info->multiple_error + 1);
+
+ if (err_info->validation_bits & CPER_ARM_INFO_VALID_FLAGS) {
+ if (err_info->flags & CPER_ARM_INFO_FLAGS_FIRST)
+ printk("%sfirst error captured\n", newpfx);
+ if (err_info->flags & CPER_ARM_INFO_FLAGS_LAST)
+ printk("%slast error captured\n", newpfx);
+ if (err_info->flags & CPER_ARM_INFO_FLAGS_PROPAGATED)
+ printk("%spropagated error captured\n",
+ newpfx);
+ if (err_info->flags & CPER_ARM_INFO_FLAGS_OVERFLOW)
+ printk("%soverflow occurred, error info is incomplete\n",
+ newpfx);
+ }
+
+ printk("%serror_type: %d, %s\n", newpfx, err_info->type,
+ err_info->type < ARRAY_SIZE(proc_error_type_strs) ?
+ proc_error_type_strs[err_info->type] : "unknown");
+ if (err_info->validation_bits & CPER_ARM_INFO_VALID_ERR_INFO)
+ printk("%serror_info: 0x%016llx\n", newpfx,
+ err_info->error_info);
+ if (err_info->validation_bits & CPER_ARM_INFO_VALID_VIRT_ADDR)
+ printk("%svirtual fault address: 0x%016llx\n",
+ newpfx, err_info->virt_fault_addr);
+ if (err_info->validation_bits & CPER_ARM_INFO_VALID_PHYSICAL_ADDR)
+ printk("%sphysical fault address: 0x%016llx\n",
+ newpfx, err_info->physical_fault_addr);
+ err_info += 1;
+ }
+
+ ctx_info = (struct cper_arm_ctx_info *)err_info;
+ max_ctx_type = ARRAY_SIZE(arm_reg_ctx_strs) - 1;
+ for (i = 0; i < proc->context_info_num; i++) {
+ int size = sizeof(*ctx_info) + ctx_info->size;
+
+ printk("%sContext info structure %d:\n", pfx, i);
+ if (len < size) {
+ printk("%ssection length is too small\n", newpfx);
+ printk("%sfirmware-generated error record is incorrect\n", pfx);
+ return;
+ }
+ if (ctx_info->type > max_ctx_type) {
+ printk("%sInvalid context type: %d (max: %d)\n",
+ newpfx, ctx_info->type, max_ctx_type);
+ return;
+ }
+ printk("%sregister context type: %s\n", newpfx,
+ arm_reg_ctx_strs[ctx_info->type]);
+ print_hex_dump(newpfx, "", DUMP_PREFIX_OFFSET, 16, 4,
+ (ctx_info + 1), ctx_info->size, 0);
+ len -= size;
+ ctx_info = (struct cper_arm_ctx_info *)((long)ctx_info + size);
+ }
+
+ if (len > 0) {
+ printk("%sVendor specific error info has %u bytes:\n", pfx,
+ len);
+ print_hex_dump(newpfx, "", DUMP_PREFIX_OFFSET, 16, 4, ctx_info,
+ len, true);
+ }
+}
+#endif
+
static const char * const mem_err_type_strs[] = {
"unknown",
"no error",
@@ -386,13 +509,38 @@ static void cper_print_pcie(const char *pfx, const struct cper_sec_pcie *pcie,
pfx, pcie->bridge.secondary_status, pcie->bridge.control);
}
-static void cper_estatus_print_section(
- const char *pfx, const struct acpi_hest_generic_data *gdata, int sec_no)
+static void cper_print_tstamp(const char *pfx,
+ struct acpi_hest_generic_data_v300 *gdata)
+{
+ __u8 hour, min, sec, day, mon, year, century, *timestamp;
+
+ if (gdata->validation_bits & ACPI_HEST_GEN_VALID_TIMESTAMP) {
+ timestamp = (__u8 *)&(gdata->time_stamp);
+ sec = bcd2bin(timestamp[0]);
+ min = bcd2bin(timestamp[1]);
+ hour = bcd2bin(timestamp[2]);
+ day = bcd2bin(timestamp[4]);
+ mon = bcd2bin(timestamp[5]);
+ year = bcd2bin(timestamp[6]);
+ century = bcd2bin(timestamp[7]);
+
+ printk("%s%ststamp: %02d%02d-%02d-%02d %02d:%02d:%02d\n", pfx,
+ (timestamp[3] & 0x1 ? "precise " : "imprecise "),
+ century, year, mon, day, hour, min, sec);
+ }
+}
+
+static void
+cper_estatus_print_section(const char *pfx, struct acpi_hest_generic_data *gdata,
+ int sec_no)
{
uuid_le *sec_type = (uuid_le *)gdata->section_type;
__u16 severity;
char newpfx[64];
+ if (acpi_hest_get_version(gdata) >= 3)
+ cper_print_tstamp(pfx, (struct acpi_hest_generic_data_v300 *)gdata);
+
severity = gdata->error_severity;
printk("%s""Error %d, type: %s\n", pfx, sec_no,
cper_severity_str(severity));
@@ -403,14 +551,16 @@ static void cper_estatus_print_section(
snprintf(newpfx, sizeof(newpfx), "%s%s", pfx, INDENT_SP);
if (!uuid_le_cmp(*sec_type, CPER_SEC_PROC_GENERIC)) {
- struct cper_sec_proc_generic *proc_err = (void *)(gdata + 1);
+ struct cper_sec_proc_generic *proc_err = acpi_hest_get_payload(gdata);
+
printk("%s""section_type: general processor error\n", newpfx);
if (gdata->error_data_length >= sizeof(*proc_err))
cper_print_proc_generic(newpfx, proc_err);
else
goto err_section_too_small;
} else if (!uuid_le_cmp(*sec_type, CPER_SEC_PLATFORM_MEM)) {
- struct cper_sec_mem_err *mem_err = (void *)(gdata + 1);
+ struct cper_sec_mem_err *mem_err = acpi_hest_get_payload(gdata);
+
printk("%s""section_type: memory error\n", newpfx);
if (gdata->error_data_length >=
sizeof(struct cper_sec_mem_err_old))
@@ -419,14 +569,32 @@ static void cper_estatus_print_section(
else
goto err_section_too_small;
} else if (!uuid_le_cmp(*sec_type, CPER_SEC_PCIE)) {
- struct cper_sec_pcie *pcie = (void *)(gdata + 1);
+ struct cper_sec_pcie *pcie = acpi_hest_get_payload(gdata);
+
printk("%s""section_type: PCIe error\n", newpfx);
if (gdata->error_data_length >= sizeof(*pcie))
cper_print_pcie(newpfx, pcie, gdata);
else
goto err_section_too_small;
- } else
- printk("%s""section type: unknown, %pUl\n", newpfx, sec_type);
+#if defined(CONFIG_ARM64) || defined(CONFIG_ARM)
+ } else if (!uuid_le_cmp(*sec_type, CPER_SEC_PROC_ARM)) {
+ struct cper_sec_proc_arm *arm_err = acpi_hest_get_payload(gdata);
+
+ printk("%ssection_type: ARM processor error\n", newpfx);
+ if (gdata->error_data_length >= sizeof(*arm_err))
+ cper_print_proc_arm(newpfx, arm_err);
+ else
+ goto err_section_too_small;
+#endif
+ } else {
+ const void *err = acpi_hest_get_payload(gdata);
+
+ printk("%ssection type: unknown, %pUl\n", newpfx, sec_type);
+ printk("%ssection length: %#x\n", newpfx,
+ gdata->error_data_length);
+ print_hex_dump(newpfx, "", DUMP_PREFIX_OFFSET, 16, 4, err,
+ gdata->error_data_length, true);
+ }
return;
@@ -438,7 +606,7 @@ void cper_estatus_print(const char *pfx,
const struct acpi_hest_generic_status *estatus)
{
struct acpi_hest_generic_data *gdata;
- unsigned int data_len, gedata_len;
+ unsigned int data_len;
int sec_no = 0;
char newpfx[64];
__u16 severity;
@@ -452,11 +620,11 @@ void cper_estatus_print(const char *pfx,
data_len = estatus->data_length;
gdata = (struct acpi_hest_generic_data *)(estatus + 1);
snprintf(newpfx, sizeof(newpfx), "%s%s", pfx, INDENT_SP);
- while (data_len >= sizeof(*gdata)) {
- gedata_len = gdata->error_data_length;
+
+ while (data_len >= acpi_hest_get_size(gdata)) {
cper_estatus_print_section(newpfx, gdata, sec_no);
- data_len -= gedata_len + sizeof(*gdata);
- gdata = (void *)(gdata + 1) + gedata_len;
+ data_len -= acpi_hest_get_record_size(gdata);
+ gdata = acpi_hest_get_next(gdata);
sec_no++;
}
}
@@ -486,12 +654,14 @@ int cper_estatus_check(const struct acpi_hest_generic_status *estatus)
return rc;
data_len = estatus->data_length;
gdata = (struct acpi_hest_generic_data *)(estatus + 1);
- while (data_len >= sizeof(*gdata)) {
- gedata_len = gdata->error_data_length;
- if (gedata_len > data_len - sizeof(*gdata))
+
+ while (data_len >= acpi_hest_get_size(gdata)) {
+ gedata_len = acpi_hest_get_error_length(gdata);
+ if (gedata_len > data_len - acpi_hest_get_size(gdata))
return -EINVAL;
- data_len -= gedata_len + sizeof(*gdata);
- gdata = (void *)(gdata + 1) + gedata_len;
+
+ data_len -= acpi_hest_get_record_size(gdata);
+ gdata = acpi_hest_get_next(gdata);
}
if (data_len)
return -EINVAL;
diff --git a/drivers/irqchip/Kconfig b/drivers/irqchip/Kconfig
index 676232a94f9f..f1fd5f44d1d4 100644
--- a/drivers/irqchip/Kconfig
+++ b/drivers/irqchip/Kconfig
@@ -39,7 +39,6 @@ config ARM_GIC_V3_ITS
bool
depends on PCI
depends on PCI_MSI
- select ACPI_IORT if ACPI
config ARM_NVIC
bool
diff --git a/drivers/perf/Kconfig b/drivers/perf/Kconfig
index aa587edaf9ea..e5197ffb7422 100644
--- a/drivers/perf/Kconfig
+++ b/drivers/perf/Kconfig
@@ -3,9 +3,10 @@
#
menu "Performance monitor support"
+ depends on PERF_EVENTS
config ARM_PMU
- depends on PERF_EVENTS && (ARM || ARM64)
+ depends on ARM || ARM64
bool "ARM PMU framework"
default y
help
@@ -18,7 +19,7 @@ config ARM_PMU_ACPI
config QCOM_L2_PMU
bool "Qualcomm Technologies L2-cache PMU"
- depends on ARCH_QCOM && ARM64 && PERF_EVENTS && ACPI
+ depends on ARCH_QCOM && ARM64 && ACPI
help
Provides support for the L2 cache performance monitor unit (PMU)
in Qualcomm Technologies processors.
@@ -27,7 +28,7 @@ config QCOM_L2_PMU
config QCOM_L3_PMU
bool "Qualcomm Technologies L3-cache PMU"
- depends on ARCH_QCOM && ARM64 && PERF_EVENTS && ACPI
+ depends on ARCH_QCOM && ARM64 && ACPI
select QCOM_IRQ_COMBINER
help
Provides support for the L3 cache performance monitor unit (PMU)
@@ -36,7 +37,7 @@ config QCOM_L3_PMU
monitoring L3 cache events.
config XGENE_PMU
- depends on PERF_EVENTS && ARCH_XGENE
+ depends on ARCH_XGENE
bool "APM X-Gene SoC PMU"
default n
help
diff --git a/drivers/perf/xgene_pmu.c b/drivers/perf/xgene_pmu.c
index 35b5289bc5da..e841282d690c 100644
--- a/drivers/perf/xgene_pmu.c
+++ b/drivers/perf/xgene_pmu.c
@@ -37,6 +37,8 @@
#define CSW_CSWCR 0x0000
#define CSW_CSWCR_DUALMCB_MASK BIT(0)
+#define CSW_CSWCR_MCB0_ROUTING(x) (((x) & 0x0C) >> 2)
+#define CSW_CSWCR_MCB1_ROUTING(x) (((x) & 0x30) >> 4)
#define MCBADDRMR 0x0000
#define MCBADDRMR_DUALMCU_MODE_MASK BIT(2)
@@ -50,8 +52,17 @@
#define PCPPMU_INT_L3C BIT(2)
#define PCPPMU_INT_IOB BIT(3)
+#define PCPPMU_V3_INTMASK 0x00FF33FF
+#define PCPPMU_V3_INTENMASK 0xFFFFFFFF
+#define PCPPMU_V3_INTCLRMASK 0xFF00CC00
+#define PCPPMU_V3_INT_MCU 0x000000FF
+#define PCPPMU_V3_INT_MCB 0x00000300
+#define PCPPMU_V3_INT_L3C 0x00FF0000
+#define PCPPMU_V3_INT_IOB 0x00003000
+
#define PMU_MAX_COUNTERS 4
-#define PMU_CNT_MAX_PERIOD 0x100000000ULL
+#define PMU_CNT_MAX_PERIOD 0xFFFFFFFFULL
+#define PMU_V3_CNT_MAX_PERIOD 0xFFFFFFFFFFFFFFFFULL
#define PMU_OVERFLOW_MASK 0xF
#define PMU_PMCR_E BIT(0)
#define PMU_PMCR_P BIT(1)
@@ -73,6 +84,10 @@
#define PMU_PMOVSR 0xC80
#define PMU_PMCR 0xE04
+/* PMU registers for V3 */
+#define PMU_PMOVSCLR 0xC80
+#define PMU_PMOVSSET 0xCC0
+
#define to_pmu_dev(p) container_of(p, struct xgene_pmu_dev, pmu)
#define GET_CNTR(ev) (ev->hw.idx)
#define GET_EVENTID(ev) (ev->hw.config & 0xFFULL)
@@ -96,14 +111,33 @@ struct xgene_pmu_dev {
struct perf_event *pmu_counter_event[PMU_MAX_COUNTERS];
};
+struct xgene_pmu_ops {
+ void (*mask_int)(struct xgene_pmu *pmu);
+ void (*unmask_int)(struct xgene_pmu *pmu);
+ u64 (*read_counter)(struct xgene_pmu_dev *pmu, int idx);
+ void (*write_counter)(struct xgene_pmu_dev *pmu, int idx, u64 val);
+ void (*write_evttype)(struct xgene_pmu_dev *pmu_dev, int idx, u32 val);
+ void (*write_agentmsk)(struct xgene_pmu_dev *pmu_dev, u32 val);
+ void (*write_agent1msk)(struct xgene_pmu_dev *pmu_dev, u32 val);
+ void (*enable_counter)(struct xgene_pmu_dev *pmu_dev, int idx);
+ void (*disable_counter)(struct xgene_pmu_dev *pmu_dev, int idx);
+ void (*enable_counter_int)(struct xgene_pmu_dev *pmu_dev, int idx);
+ void (*disable_counter_int)(struct xgene_pmu_dev *pmu_dev, int idx);
+ void (*reset_counters)(struct xgene_pmu_dev *pmu_dev);
+ void (*start_counters)(struct xgene_pmu_dev *pmu_dev);
+ void (*stop_counters)(struct xgene_pmu_dev *pmu_dev);
+};
+
struct xgene_pmu {
struct device *dev;
int version;
void __iomem *pcppmu_csr;
u32 mcb_active_mask;
u32 mc_active_mask;
+ u32 l3c_active_mask;
cpumask_t cpu;
raw_spinlock_t lock;
+ const struct xgene_pmu_ops *ops;
struct list_head l3cpmus;
struct list_head iobpmus;
struct list_head mcbpmus;
@@ -125,11 +159,13 @@ struct xgene_pmu_data {
enum xgene_pmu_version {
PCP_PMU_V1 = 1,
PCP_PMU_V2,
+ PCP_PMU_V3,
};
enum xgene_pmu_dev_type {
PMU_TYPE_L3C = 0,
PMU_TYPE_IOB,
+ PMU_TYPE_IOB_SLOW,
PMU_TYPE_MCB,
PMU_TYPE_MC,
};
@@ -195,6 +231,56 @@ static const struct attribute_group mc_pmu_format_attr_group = {
.attrs = mc_pmu_format_attrs,
};
+static struct attribute *l3c_pmu_v3_format_attrs[] = {
+ XGENE_PMU_FORMAT_ATTR(l3c_eventid, "config:0-39"),
+ NULL,
+};
+
+static struct attribute *iob_pmu_v3_format_attrs[] = {
+ XGENE_PMU_FORMAT_ATTR(iob_eventid, "config:0-47"),
+ NULL,
+};
+
+static struct attribute *iob_slow_pmu_v3_format_attrs[] = {
+ XGENE_PMU_FORMAT_ATTR(iob_slow_eventid, "config:0-16"),
+ NULL,
+};
+
+static struct attribute *mcb_pmu_v3_format_attrs[] = {
+ XGENE_PMU_FORMAT_ATTR(mcb_eventid, "config:0-35"),
+ NULL,
+};
+
+static struct attribute *mc_pmu_v3_format_attrs[] = {
+ XGENE_PMU_FORMAT_ATTR(mc_eventid, "config:0-44"),
+ NULL,
+};
+
+static const struct attribute_group l3c_pmu_v3_format_attr_group = {
+ .name = "format",
+ .attrs = l3c_pmu_v3_format_attrs,
+};
+
+static const struct attribute_group iob_pmu_v3_format_attr_group = {
+ .name = "format",
+ .attrs = iob_pmu_v3_format_attrs,
+};
+
+static const struct attribute_group iob_slow_pmu_v3_format_attr_group = {
+ .name = "format",
+ .attrs = iob_slow_pmu_v3_format_attrs,
+};
+
+static const struct attribute_group mcb_pmu_v3_format_attr_group = {
+ .name = "format",
+ .attrs = mcb_pmu_v3_format_attrs,
+};
+
+static const struct attribute_group mc_pmu_v3_format_attr_group = {
+ .name = "format",
+ .attrs = mc_pmu_v3_format_attrs,
+};
+
/*
* sysfs event attributes
*/
@@ -311,6 +397,219 @@ static const struct attribute_group mc_pmu_events_attr_group = {
.attrs = mc_pmu_events_attrs,
};
+static struct attribute *l3c_pmu_v3_events_attrs[] = {
+ XGENE_PMU_EVENT_ATTR(cycle-count, 0x00),
+ XGENE_PMU_EVENT_ATTR(read-hit, 0x01),
+ XGENE_PMU_EVENT_ATTR(read-miss, 0x02),
+ XGENE_PMU_EVENT_ATTR(index-flush-eviction, 0x03),
+ XGENE_PMU_EVENT_ATTR(write-caused-replacement, 0x04),
+ XGENE_PMU_EVENT_ATTR(write-not-caused-replacement, 0x05),
+ XGENE_PMU_EVENT_ATTR(clean-eviction, 0x06),
+ XGENE_PMU_EVENT_ATTR(dirty-eviction, 0x07),
+ XGENE_PMU_EVENT_ATTR(read, 0x08),
+ XGENE_PMU_EVENT_ATTR(write, 0x09),
+ XGENE_PMU_EVENT_ATTR(request, 0x0a),
+ XGENE_PMU_EVENT_ATTR(tq-bank-conflict-issue-stall, 0x0b),
+ XGENE_PMU_EVENT_ATTR(tq-full, 0x0c),
+ XGENE_PMU_EVENT_ATTR(ackq-full, 0x0d),
+ XGENE_PMU_EVENT_ATTR(wdb-full, 0x0e),
+ XGENE_PMU_EVENT_ATTR(odb-full, 0x10),
+ XGENE_PMU_EVENT_ATTR(wbq-full, 0x11),
+ XGENE_PMU_EVENT_ATTR(input-req-async-fifo-stall, 0x12),
+ XGENE_PMU_EVENT_ATTR(output-req-async-fifo-stall, 0x13),
+ XGENE_PMU_EVENT_ATTR(output-data-async-fifo-stall, 0x14),
+ XGENE_PMU_EVENT_ATTR(total-insertion, 0x15),
+ XGENE_PMU_EVENT_ATTR(sip-insertions-r-set, 0x16),
+ XGENE_PMU_EVENT_ATTR(sip-insertions-r-clear, 0x17),
+ XGENE_PMU_EVENT_ATTR(dip-insertions-r-set, 0x18),
+ XGENE_PMU_EVENT_ATTR(dip-insertions-r-clear, 0x19),
+ XGENE_PMU_EVENT_ATTR(dip-insertions-force-r-set, 0x1a),
+ XGENE_PMU_EVENT_ATTR(egression, 0x1b),
+ XGENE_PMU_EVENT_ATTR(replacement, 0x1c),
+ XGENE_PMU_EVENT_ATTR(old-replacement, 0x1d),
+ XGENE_PMU_EVENT_ATTR(young-replacement, 0x1e),
+ XGENE_PMU_EVENT_ATTR(r-set-replacement, 0x1f),
+ XGENE_PMU_EVENT_ATTR(r-clear-replacement, 0x20),
+ XGENE_PMU_EVENT_ATTR(old-r-replacement, 0x21),
+ XGENE_PMU_EVENT_ATTR(old-nr-replacement, 0x22),
+ XGENE_PMU_EVENT_ATTR(young-r-replacement, 0x23),
+ XGENE_PMU_EVENT_ATTR(young-nr-replacement, 0x24),
+ XGENE_PMU_EVENT_ATTR(bloomfilter-clearing, 0x25),
+ XGENE_PMU_EVENT_ATTR(generation-flip, 0x26),
+ XGENE_PMU_EVENT_ATTR(vcc-droop-detected, 0x27),
+ NULL,
+};
+
+static struct attribute *iob_fast_pmu_v3_events_attrs[] = {
+ XGENE_PMU_EVENT_ATTR(cycle-count, 0x00),
+ XGENE_PMU_EVENT_ATTR(pa-req-buf-alloc-all, 0x01),
+ XGENE_PMU_EVENT_ATTR(pa-req-buf-alloc-rd, 0x02),
+ XGENE_PMU_EVENT_ATTR(pa-req-buf-alloc-wr, 0x03),
+ XGENE_PMU_EVENT_ATTR(pa-all-cp-req, 0x04),
+ XGENE_PMU_EVENT_ATTR(pa-cp-blk-req, 0x05),
+ XGENE_PMU_EVENT_ATTR(pa-cp-ptl-req, 0x06),
+ XGENE_PMU_EVENT_ATTR(pa-cp-rd-req, 0x07),
+ XGENE_PMU_EVENT_ATTR(pa-cp-wr-req, 0x08),
+ XGENE_PMU_EVENT_ATTR(ba-all-req, 0x09),
+ XGENE_PMU_EVENT_ATTR(ba-rd-req, 0x0a),
+ XGENE_PMU_EVENT_ATTR(ba-wr-req, 0x0b),
+ XGENE_PMU_EVENT_ATTR(pa-rd-shared-req-issued, 0x10),
+ XGENE_PMU_EVENT_ATTR(pa-rd-exclusive-req-issued, 0x11),
+ XGENE_PMU_EVENT_ATTR(pa-wr-invalidate-req-issued-stashable, 0x12),
+ XGENE_PMU_EVENT_ATTR(pa-wr-invalidate-req-issued-nonstashable, 0x13),
+ XGENE_PMU_EVENT_ATTR(pa-wr-back-req-issued-stashable, 0x14),
+ XGENE_PMU_EVENT_ATTR(pa-wr-back-req-issued-nonstashable, 0x15),
+ XGENE_PMU_EVENT_ATTR(pa-ptl-wr-req, 0x16),
+ XGENE_PMU_EVENT_ATTR(pa-ptl-rd-req, 0x17),
+ XGENE_PMU_EVENT_ATTR(pa-wr-back-clean-data, 0x18),
+ XGENE_PMU_EVENT_ATTR(pa-wr-back-cancelled-on-SS, 0x1b),
+ XGENE_PMU_EVENT_ATTR(pa-barrier-occurrence, 0x1c),
+ XGENE_PMU_EVENT_ATTR(pa-barrier-cycles, 0x1d),
+ XGENE_PMU_EVENT_ATTR(pa-total-cp-snoops, 0x20),
+ XGENE_PMU_EVENT_ATTR(pa-rd-shared-snoop, 0x21),
+ XGENE_PMU_EVENT_ATTR(pa-rd-shared-snoop-hit, 0x22),
+ XGENE_PMU_EVENT_ATTR(pa-rd-exclusive-snoop, 0x23),
+ XGENE_PMU_EVENT_ATTR(pa-rd-exclusive-snoop-hit, 0x24),
+ XGENE_PMU_EVENT_ATTR(pa-rd-wr-invalid-snoop, 0x25),
+ XGENE_PMU_EVENT_ATTR(pa-rd-wr-invalid-snoop-hit, 0x26),
+ XGENE_PMU_EVENT_ATTR(pa-req-buffer-full, 0x28),
+ XGENE_PMU_EVENT_ATTR(cswlf-outbound-req-fifo-full, 0x29),
+ XGENE_PMU_EVENT_ATTR(cswlf-inbound-snoop-fifo-backpressure, 0x2a),
+ XGENE_PMU_EVENT_ATTR(cswlf-outbound-lack-fifo-full, 0x2b),
+ XGENE_PMU_EVENT_ATTR(cswlf-inbound-gack-fifo-backpressure, 0x2c),
+ XGENE_PMU_EVENT_ATTR(cswlf-outbound-data-fifo-full, 0x2d),
+ XGENE_PMU_EVENT_ATTR(cswlf-inbound-data-fifo-backpressure, 0x2e),
+ XGENE_PMU_EVENT_ATTR(cswlf-inbound-req-backpressure, 0x2f),
+ NULL,
+};
+
+static struct attribute *iob_slow_pmu_v3_events_attrs[] = {
+ XGENE_PMU_EVENT_ATTR(cycle-count, 0x00),
+ XGENE_PMU_EVENT_ATTR(pa-axi0-rd-req, 0x01),
+ XGENE_PMU_EVENT_ATTR(pa-axi0-wr-req, 0x02),
+ XGENE_PMU_EVENT_ATTR(pa-axi1-rd-req, 0x03),
+ XGENE_PMU_EVENT_ATTR(pa-axi1-wr-req, 0x04),
+ XGENE_PMU_EVENT_ATTR(ba-all-axi-req, 0x07),
+ XGENE_PMU_EVENT_ATTR(ba-axi-rd-req, 0x08),
+ XGENE_PMU_EVENT_ATTR(ba-axi-wr-req, 0x09),
+ XGENE_PMU_EVENT_ATTR(ba-free-list-empty, 0x10),
+ NULL,
+};
+
+static struct attribute *mcb_pmu_v3_events_attrs[] = {
+ XGENE_PMU_EVENT_ATTR(cycle-count, 0x00),
+ XGENE_PMU_EVENT_ATTR(req-receive, 0x01),
+ XGENE_PMU_EVENT_ATTR(rd-req-recv, 0x02),
+ XGENE_PMU_EVENT_ATTR(rd-req-recv-2, 0x03),
+ XGENE_PMU_EVENT_ATTR(wr-req-recv, 0x04),
+ XGENE_PMU_EVENT_ATTR(wr-req-recv-2, 0x05),
+ XGENE_PMU_EVENT_ATTR(rd-req-sent-to-mcu, 0x06),
+ XGENE_PMU_EVENT_ATTR(rd-req-sent-to-mcu-2, 0x07),
+ XGENE_PMU_EVENT_ATTR(rd-req-sent-to-spec-mcu, 0x08),
+ XGENE_PMU_EVENT_ATTR(rd-req-sent-to-spec-mcu-2, 0x09),
+ XGENE_PMU_EVENT_ATTR(glbl-ack-recv-for-rd-sent-to-spec-mcu, 0x0a),
+ XGENE_PMU_EVENT_ATTR(glbl-ack-go-recv-for-rd-sent-to-spec-mcu, 0x0b),
+ XGENE_PMU_EVENT_ATTR(glbl-ack-nogo-recv-for-rd-sent-to-spec-mcu, 0x0c),
+ XGENE_PMU_EVENT_ATTR(glbl-ack-go-recv-any-rd-req, 0x0d),
+ XGENE_PMU_EVENT_ATTR(glbl-ack-go-recv-any-rd-req-2, 0x0e),
+ XGENE_PMU_EVENT_ATTR(wr-req-sent-to-mcu, 0x0f),
+ XGENE_PMU_EVENT_ATTR(gack-recv, 0x10),
+ XGENE_PMU_EVENT_ATTR(rd-gack-recv, 0x11),
+ XGENE_PMU_EVENT_ATTR(wr-gack-recv, 0x12),
+ XGENE_PMU_EVENT_ATTR(cancel-rd-gack, 0x13),
+ XGENE_PMU_EVENT_ATTR(cancel-wr-gack, 0x14),
+ XGENE_PMU_EVENT_ATTR(mcb-csw-req-stall, 0x15),
+ XGENE_PMU_EVENT_ATTR(mcu-req-intf-blocked, 0x16),
+ XGENE_PMU_EVENT_ATTR(mcb-mcu-rd-intf-stall, 0x17),
+ XGENE_PMU_EVENT_ATTR(csw-rd-intf-blocked, 0x18),
+ XGENE_PMU_EVENT_ATTR(csw-local-ack-intf-blocked, 0x19),
+ XGENE_PMU_EVENT_ATTR(mcu-req-table-full, 0x1a),
+ XGENE_PMU_EVENT_ATTR(mcu-stat-table-full, 0x1b),
+ XGENE_PMU_EVENT_ATTR(mcu-wr-table-full, 0x1c),
+ XGENE_PMU_EVENT_ATTR(mcu-rdreceipt-resp, 0x1d),
+ XGENE_PMU_EVENT_ATTR(mcu-wrcomplete-resp, 0x1e),
+ XGENE_PMU_EVENT_ATTR(mcu-retryack-resp, 0x1f),
+ XGENE_PMU_EVENT_ATTR(mcu-pcrdgrant-resp, 0x20),
+ XGENE_PMU_EVENT_ATTR(mcu-req-from-lastload, 0x21),
+ XGENE_PMU_EVENT_ATTR(mcu-req-from-bypass, 0x22),
+ XGENE_PMU_EVENT_ATTR(volt-droop-detect, 0x23),
+ NULL,
+};
+
+static struct attribute *mc_pmu_v3_events_attrs[] = {
+ XGENE_PMU_EVENT_ATTR(cycle-count, 0x00),
+ XGENE_PMU_EVENT_ATTR(act-sent, 0x01),
+ XGENE_PMU_EVENT_ATTR(pre-sent, 0x02),
+ XGENE_PMU_EVENT_ATTR(rd-sent, 0x03),
+ XGENE_PMU_EVENT_ATTR(rda-sent, 0x04),
+ XGENE_PMU_EVENT_ATTR(wr-sent, 0x05),
+ XGENE_PMU_EVENT_ATTR(wra-sent, 0x06),
+ XGENE_PMU_EVENT_ATTR(pd-entry-vld, 0x07),
+ XGENE_PMU_EVENT_ATTR(sref-entry-vld, 0x08),
+ XGENE_PMU_EVENT_ATTR(prea-sent, 0x09),
+ XGENE_PMU_EVENT_ATTR(ref-sent, 0x0a),
+ XGENE_PMU_EVENT_ATTR(rd-rda-sent, 0x0b),
+ XGENE_PMU_EVENT_ATTR(wr-wra-sent, 0x0c),
+ XGENE_PMU_EVENT_ATTR(raw-hazard, 0x0d),
+ XGENE_PMU_EVENT_ATTR(war-hazard, 0x0e),
+ XGENE_PMU_EVENT_ATTR(waw-hazard, 0x0f),
+ XGENE_PMU_EVENT_ATTR(rar-hazard, 0x10),
+ XGENE_PMU_EVENT_ATTR(raw-war-waw-hazard, 0x11),
+ XGENE_PMU_EVENT_ATTR(hprd-lprd-wr-req-vld, 0x12),
+ XGENE_PMU_EVENT_ATTR(lprd-req-vld, 0x13),
+ XGENE_PMU_EVENT_ATTR(hprd-req-vld, 0x14),
+ XGENE_PMU_EVENT_ATTR(hprd-lprd-req-vld, 0x15),
+ XGENE_PMU_EVENT_ATTR(wr-req-vld, 0x16),
+ XGENE_PMU_EVENT_ATTR(partial-wr-req-vld, 0x17),
+ XGENE_PMU_EVENT_ATTR(rd-retry, 0x18),
+ XGENE_PMU_EVENT_ATTR(wr-retry, 0x19),
+ XGENE_PMU_EVENT_ATTR(retry-gnt, 0x1a),
+ XGENE_PMU_EVENT_ATTR(rank-change, 0x1b),
+ XGENE_PMU_EVENT_ATTR(dir-change, 0x1c),
+ XGENE_PMU_EVENT_ATTR(rank-dir-change, 0x1d),
+ XGENE_PMU_EVENT_ATTR(rank-active, 0x1e),
+ XGENE_PMU_EVENT_ATTR(rank-idle, 0x1f),
+ XGENE_PMU_EVENT_ATTR(rank-pd, 0x20),
+ XGENE_PMU_EVENT_ATTR(rank-sref, 0x21),
+ XGENE_PMU_EVENT_ATTR(queue-fill-gt-thresh, 0x22),
+ XGENE_PMU_EVENT_ATTR(queue-rds-gt-thresh, 0x23),
+ XGENE_PMU_EVENT_ATTR(queue-wrs-gt-thresh, 0x24),
+ XGENE_PMU_EVENT_ATTR(phy-updt-complt, 0x25),
+ XGENE_PMU_EVENT_ATTR(tz-fail, 0x26),
+ XGENE_PMU_EVENT_ATTR(dram-errc, 0x27),
+ XGENE_PMU_EVENT_ATTR(dram-errd, 0x28),
+ XGENE_PMU_EVENT_ATTR(rd-enq, 0x29),
+ XGENE_PMU_EVENT_ATTR(wr-enq, 0x2a),
+ XGENE_PMU_EVENT_ATTR(tmac-limit-reached, 0x2b),
+ XGENE_PMU_EVENT_ATTR(tmaw-tracker-full, 0x2c),
+ NULL,
+};
+
+static const struct attribute_group l3c_pmu_v3_events_attr_group = {
+ .name = "events",
+ .attrs = l3c_pmu_v3_events_attrs,
+};
+
+static const struct attribute_group iob_fast_pmu_v3_events_attr_group = {
+ .name = "events",
+ .attrs = iob_fast_pmu_v3_events_attrs,
+};
+
+static const struct attribute_group iob_slow_pmu_v3_events_attr_group = {
+ .name = "events",
+ .attrs = iob_slow_pmu_v3_events_attrs,
+};
+
+static const struct attribute_group mcb_pmu_v3_events_attr_group = {
+ .name = "events",
+ .attrs = mcb_pmu_v3_events_attrs,
+};
+
+static const struct attribute_group mc_pmu_v3_events_attr_group = {
+ .name = "events",
+ .attrs = mc_pmu_v3_events_attrs,
+};
+
/*
* sysfs cpumask attributes
*/
@@ -334,7 +633,7 @@ static const struct attribute_group pmu_cpumask_attr_group = {
};
/*
- * Per PMU device attribute groups
+ * Per PMU device attribute groups of PMU v1 and v2
*/
static const struct attribute_group *l3c_pmu_attr_groups[] = {
&l3c_pmu_format_attr_group,
@@ -364,6 +663,44 @@ static const struct attribute_group *mc_pmu_attr_groups[] = {
NULL
};
+/*
+ * Per PMU device attribute groups of PMU v3
+ */
+static const struct attribute_group *l3c_pmu_v3_attr_groups[] = {
+ &l3c_pmu_v3_format_attr_group,
+ &pmu_cpumask_attr_group,
+ &l3c_pmu_v3_events_attr_group,
+ NULL
+};
+
+static const struct attribute_group *iob_fast_pmu_v3_attr_groups[] = {
+ &iob_pmu_v3_format_attr_group,
+ &pmu_cpumask_attr_group,
+ &iob_fast_pmu_v3_events_attr_group,
+ NULL
+};
+
+static const struct attribute_group *iob_slow_pmu_v3_attr_groups[] = {
+ &iob_slow_pmu_v3_format_attr_group,
+ &pmu_cpumask_attr_group,
+ &iob_slow_pmu_v3_events_attr_group,
+ NULL
+};
+
+static const struct attribute_group *mcb_pmu_v3_attr_groups[] = {
+ &mcb_pmu_v3_format_attr_group,
+ &pmu_cpumask_attr_group,
+ &mcb_pmu_v3_events_attr_group,
+ NULL
+};
+
+static const struct attribute_group *mc_pmu_v3_attr_groups[] = {
+ &mc_pmu_v3_format_attr_group,
+ &pmu_cpumask_attr_group,
+ &mc_pmu_v3_events_attr_group,
+ NULL
+};
+
static int get_next_avail_cntr(struct xgene_pmu_dev *pmu_dev)
{
int cntr;
@@ -387,23 +724,67 @@ static inline void xgene_pmu_mask_int(struct xgene_pmu *xgene_pmu)
writel(PCPPMU_INTENMASK, xgene_pmu->pcppmu_csr + PCPPMU_INTMASK_REG);
}
+static inline void xgene_pmu_v3_mask_int(struct xgene_pmu *xgene_pmu)
+{
+ writel(PCPPMU_V3_INTENMASK, xgene_pmu->pcppmu_csr + PCPPMU_INTMASK_REG);
+}
+
static inline void xgene_pmu_unmask_int(struct xgene_pmu *xgene_pmu)
{
writel(PCPPMU_INTCLRMASK, xgene_pmu->pcppmu_csr + PCPPMU_INTMASK_REG);
}
-static inline u32 xgene_pmu_read_counter(struct xgene_pmu_dev *pmu_dev, int idx)
+static inline void xgene_pmu_v3_unmask_int(struct xgene_pmu *xgene_pmu)
+{
+ writel(PCPPMU_V3_INTCLRMASK,
+ xgene_pmu->pcppmu_csr + PCPPMU_INTMASK_REG);
+}
+
+static inline u64 xgene_pmu_read_counter32(struct xgene_pmu_dev *pmu_dev,
+ int idx)
{
return readl(pmu_dev->inf->csr + PMU_PMEVCNTR0 + (4 * idx));
}
+static inline u64 xgene_pmu_read_counter64(struct xgene_pmu_dev *pmu_dev,
+ int idx)
+{
+ u32 lo, hi;
+
+ /*
+ * v3 has 64-bit counter registers composed by 2 32-bit registers
+ * This can be a problem if the counter increases and carries
+ * out of bit [31] between 2 reads. The extra reads would help
+ * to prevent this issue.
+ */
+ do {
+ hi = xgene_pmu_read_counter32(pmu_dev, 2 * idx + 1);
+ lo = xgene_pmu_read_counter32(pmu_dev, 2 * idx);
+ } while (hi != xgene_pmu_read_counter32(pmu_dev, 2 * idx + 1));
+
+ return (((u64)hi << 32) | lo);
+}
+
static inline void
-xgene_pmu_write_counter(struct xgene_pmu_dev *pmu_dev, int idx, u32 val)
+xgene_pmu_write_counter32(struct xgene_pmu_dev *pmu_dev, int idx, u64 val)
{
writel(val, pmu_dev->inf->csr + PMU_PMEVCNTR0 + (4 * idx));
}
static inline void
+xgene_pmu_write_counter64(struct xgene_pmu_dev *pmu_dev, int idx, u64 val)
+{
+ u32 cnt_lo, cnt_hi;
+
+ cnt_hi = upper_32_bits(val);
+ cnt_lo = lower_32_bits(val);
+
+ /* v3 has 64-bit counter registers composed by 2 32-bit registers */
+ xgene_pmu_write_counter32(pmu_dev, 2 * idx, cnt_lo);
+ xgene_pmu_write_counter32(pmu_dev, 2 * idx + 1, cnt_hi);
+}
+
+static inline void
xgene_pmu_write_evttype(struct xgene_pmu_dev *pmu_dev, int idx, u32 val)
{
writel(val, pmu_dev->inf->csr + PMU_PMEVTYPER0 + (4 * idx));
@@ -416,12 +797,18 @@ xgene_pmu_write_agentmsk(struct xgene_pmu_dev *pmu_dev, u32 val)
}
static inline void
+xgene_pmu_v3_write_agentmsk(struct xgene_pmu_dev *pmu_dev, u32 val) { }
+
+static inline void
xgene_pmu_write_agent1msk(struct xgene_pmu_dev *pmu_dev, u32 val)
{
writel(val, pmu_dev->inf->csr + PMU_PMAMR1);
}
static inline void
+xgene_pmu_v3_write_agent1msk(struct xgene_pmu_dev *pmu_dev, u32 val) { }
+
+static inline void
xgene_pmu_enable_counter(struct xgene_pmu_dev *pmu_dev, int idx)
{
u32 val;
@@ -491,20 +878,22 @@ static inline void xgene_pmu_stop_counters(struct xgene_pmu_dev *pmu_dev)
static void xgene_perf_pmu_enable(struct pmu *pmu)
{
struct xgene_pmu_dev *pmu_dev = to_pmu_dev(pmu);
+ struct xgene_pmu *xgene_pmu = pmu_dev->parent;
int enabled = bitmap_weight(pmu_dev->cntr_assign_mask,
pmu_dev->max_counters);
if (!enabled)
return;
- xgene_pmu_start_counters(pmu_dev);
+ xgene_pmu->ops->start_counters(pmu_dev);
}
static void xgene_perf_pmu_disable(struct pmu *pmu)
{
struct xgene_pmu_dev *pmu_dev = to_pmu_dev(pmu);
+ struct xgene_pmu *xgene_pmu = pmu_dev->parent;
- xgene_pmu_stop_counters(pmu_dev);
+ xgene_pmu->ops->stop_counters(pmu_dev);
}
static int xgene_perf_event_init(struct perf_event *event)
@@ -572,49 +961,56 @@ static int xgene_perf_event_init(struct perf_event *event)
static void xgene_perf_enable_event(struct perf_event *event)
{
struct xgene_pmu_dev *pmu_dev = to_pmu_dev(event->pmu);
+ struct xgene_pmu *xgene_pmu = pmu_dev->parent;
- xgene_pmu_write_evttype(pmu_dev, GET_CNTR(event), GET_EVENTID(event));
- xgene_pmu_write_agentmsk(pmu_dev, ~((u32)GET_AGENTID(event)));
+ xgene_pmu->ops->write_evttype(pmu_dev, GET_CNTR(event),
+ GET_EVENTID(event));
+ xgene_pmu->ops->write_agentmsk(pmu_dev, ~((u32)GET_AGENTID(event)));
if (pmu_dev->inf->type == PMU_TYPE_IOB)
- xgene_pmu_write_agent1msk(pmu_dev, ~((u32)GET_AGENT1ID(event)));
+ xgene_pmu->ops->write_agent1msk(pmu_dev,
+ ~((u32)GET_AGENT1ID(event)));
- xgene_pmu_enable_counter(pmu_dev, GET_CNTR(event));
- xgene_pmu_enable_counter_int(pmu_dev, GET_CNTR(event));
+ xgene_pmu->ops->enable_counter(pmu_dev, GET_CNTR(event));
+ xgene_pmu->ops->enable_counter_int(pmu_dev, GET_CNTR(event));
}
static void xgene_perf_disable_event(struct perf_event *event)
{
struct xgene_pmu_dev *pmu_dev = to_pmu_dev(event->pmu);
+ struct xgene_pmu *xgene_pmu = pmu_dev->parent;
- xgene_pmu_disable_counter(pmu_dev, GET_CNTR(event));
- xgene_pmu_disable_counter_int(pmu_dev, GET_CNTR(event));
+ xgene_pmu->ops->disable_counter(pmu_dev, GET_CNTR(event));
+ xgene_pmu->ops->disable_counter_int(pmu_dev, GET_CNTR(event));
}
static void xgene_perf_event_set_period(struct perf_event *event)
{
struct xgene_pmu_dev *pmu_dev = to_pmu_dev(event->pmu);
+ struct xgene_pmu *xgene_pmu = pmu_dev->parent;
struct hw_perf_event *hw = &event->hw;
/*
- * The X-Gene PMU counters have a period of 2^32. To account for the
- * possiblity of extreme interrupt latency we program for a period of
- * half that. Hopefully we can handle the interrupt before another 2^31
+ * For 32 bit counter, it has a period of 2^32. To account for the
+ * possibility of extreme interrupt latency we program for a period of
+ * half that. Hopefully, we can handle the interrupt before another 2^31
* events occur and the counter overtakes its previous value.
+ * For 64 bit counter, we don't expect it overflow.
*/
u64 val = 1ULL << 31;
local64_set(&hw->prev_count, val);
- xgene_pmu_write_counter(pmu_dev, hw->idx, (u32) val);
+ xgene_pmu->ops->write_counter(pmu_dev, hw->idx, val);
}
static void xgene_perf_event_update(struct perf_event *event)
{
struct xgene_pmu_dev *pmu_dev = to_pmu_dev(event->pmu);
+ struct xgene_pmu *xgene_pmu = pmu_dev->parent;
struct hw_perf_event *hw = &event->hw;
u64 delta, prev_raw_count, new_raw_count;
again:
prev_raw_count = local64_read(&hw->prev_count);
- new_raw_count = xgene_pmu_read_counter(pmu_dev, GET_CNTR(event));
+ new_raw_count = xgene_pmu->ops->read_counter(pmu_dev, GET_CNTR(event));
if (local64_cmpxchg(&hw->prev_count, prev_raw_count,
new_raw_count) != prev_raw_count)
@@ -633,6 +1029,7 @@ static void xgene_perf_read(struct perf_event *event)
static void xgene_perf_start(struct perf_event *event, int flags)
{
struct xgene_pmu_dev *pmu_dev = to_pmu_dev(event->pmu);
+ struct xgene_pmu *xgene_pmu = pmu_dev->parent;
struct hw_perf_event *hw = &event->hw;
if (WARN_ON_ONCE(!(hw->state & PERF_HES_STOPPED)))
@@ -646,8 +1043,8 @@ static void xgene_perf_start(struct perf_event *event, int flags)
if (flags & PERF_EF_RELOAD) {
u64 prev_raw_count = local64_read(&hw->prev_count);
- xgene_pmu_write_counter(pmu_dev, GET_CNTR(event),
- (u32) prev_raw_count);
+ xgene_pmu->ops->write_counter(pmu_dev, GET_CNTR(event),
+ prev_raw_count);
}
xgene_perf_enable_event(event);
@@ -713,7 +1110,10 @@ static int xgene_init_perf(struct xgene_pmu_dev *pmu_dev, char *name)
{
struct xgene_pmu *xgene_pmu;
- pmu_dev->max_period = PMU_CNT_MAX_PERIOD - 1;
+ if (pmu_dev->parent->version == PCP_PMU_V3)
+ pmu_dev->max_period = PMU_V3_CNT_MAX_PERIOD;
+ else
+ pmu_dev->max_period = PMU_CNT_MAX_PERIOD;
/* First version PMU supports only single event counter */
xgene_pmu = pmu_dev->parent;
if (xgene_pmu->version == PCP_PMU_V1)
@@ -736,8 +1136,8 @@ static int xgene_init_perf(struct xgene_pmu_dev *pmu_dev, char *name)
};
/* Hardware counter init */
- xgene_pmu_stop_counters(pmu_dev);
- xgene_pmu_reset_counters(pmu_dev);
+ xgene_pmu->ops->stop_counters(pmu_dev);
+ xgene_pmu->ops->reset_counters(pmu_dev);
return perf_pmu_register(&pmu_dev->pmu, name, -1);
}
@@ -758,20 +1158,38 @@ xgene_pmu_dev_add(struct xgene_pmu *xgene_pmu, struct xgene_pmu_dev_ctx *ctx)
switch (pmu->inf->type) {
case PMU_TYPE_L3C:
- pmu->attr_groups = l3c_pmu_attr_groups;
+ if (!(xgene_pmu->l3c_active_mask & pmu->inf->enable_mask))
+ goto dev_err;
+ if (xgene_pmu->version == PCP_PMU_V3)
+ pmu->attr_groups = l3c_pmu_v3_attr_groups;
+ else
+ pmu->attr_groups = l3c_pmu_attr_groups;
break;
case PMU_TYPE_IOB:
- pmu->attr_groups = iob_pmu_attr_groups;
+ if (xgene_pmu->version == PCP_PMU_V3)
+ pmu->attr_groups = iob_fast_pmu_v3_attr_groups;
+ else
+ pmu->attr_groups = iob_pmu_attr_groups;
+ break;
+ case PMU_TYPE_IOB_SLOW:
+ if (xgene_pmu->version == PCP_PMU_V3)
+ pmu->attr_groups = iob_slow_pmu_v3_attr_groups;
break;
case PMU_TYPE_MCB:
if (!(xgene_pmu->mcb_active_mask & pmu->inf->enable_mask))
goto dev_err;
- pmu->attr_groups = mcb_pmu_attr_groups;
+ if (xgene_pmu->version == PCP_PMU_V3)
+ pmu->attr_groups = mcb_pmu_v3_attr_groups;
+ else
+ pmu->attr_groups = mcb_pmu_attr_groups;
break;
case PMU_TYPE_MC:
if (!(xgene_pmu->mc_active_mask & pmu->inf->enable_mask))
goto dev_err;
- pmu->attr_groups = mc_pmu_attr_groups;
+ if (xgene_pmu->version == PCP_PMU_V3)
+ pmu->attr_groups = mc_pmu_v3_attr_groups;
+ else
+ pmu->attr_groups = mc_pmu_attr_groups;
break;
default:
return -EINVAL;
@@ -795,18 +1213,27 @@ dev_err:
static void _xgene_pmu_isr(int irq, struct xgene_pmu_dev *pmu_dev)
{
struct xgene_pmu *xgene_pmu = pmu_dev->parent;
+ void __iomem *csr = pmu_dev->inf->csr;
u32 pmovsr;
int idx;
- pmovsr = readl(pmu_dev->inf->csr + PMU_PMOVSR) & PMU_OVERFLOW_MASK;
+ xgene_pmu->ops->stop_counters(pmu_dev);
+
+ if (xgene_pmu->version == PCP_PMU_V3)
+ pmovsr = readl(csr + PMU_PMOVSSET) & PMU_OVERFLOW_MASK;
+ else
+ pmovsr = readl(csr + PMU_PMOVSR) & PMU_OVERFLOW_MASK;
+
if (!pmovsr)
- return;
+ goto out;
/* Clear interrupt flag */
if (xgene_pmu->version == PCP_PMU_V1)
- writel(0x0, pmu_dev->inf->csr + PMU_PMOVSR);
+ writel(0x0, csr + PMU_PMOVSR);
+ else if (xgene_pmu->version == PCP_PMU_V2)
+ writel(pmovsr, csr + PMU_PMOVSR);
else
- writel(pmovsr, pmu_dev->inf->csr + PMU_PMOVSR);
+ writel(pmovsr, csr + PMU_PMOVSCLR);
for (idx = 0; idx < PMU_MAX_COUNTERS; idx++) {
struct perf_event *event = pmu_dev->pmu_counter_event[idx];
@@ -818,10 +1245,14 @@ static void _xgene_pmu_isr(int irq, struct xgene_pmu_dev *pmu_dev)
xgene_perf_event_update(event);
xgene_perf_event_set_period(event);
}
+
+out:
+ xgene_pmu->ops->start_counters(pmu_dev);
}
static irqreturn_t xgene_pmu_isr(int irq, void *dev_id)
{
+ u32 intr_mcu, intr_mcb, intr_l3c, intr_iob;
struct xgene_pmu_dev_ctx *ctx;
struct xgene_pmu *xgene_pmu = dev_id;
unsigned long flags;
@@ -831,22 +1262,33 @@ static irqreturn_t xgene_pmu_isr(int irq, void *dev_id)
/* Get Interrupt PMU source */
val = readl(xgene_pmu->pcppmu_csr + PCPPMU_INTSTATUS_REG);
- if (val & PCPPMU_INT_MCU) {
+ if (xgene_pmu->version == PCP_PMU_V3) {
+ intr_mcu = PCPPMU_V3_INT_MCU;
+ intr_mcb = PCPPMU_V3_INT_MCB;
+ intr_l3c = PCPPMU_V3_INT_L3C;
+ intr_iob = PCPPMU_V3_INT_IOB;
+ } else {
+ intr_mcu = PCPPMU_INT_MCU;
+ intr_mcb = PCPPMU_INT_MCB;
+ intr_l3c = PCPPMU_INT_L3C;
+ intr_iob = PCPPMU_INT_IOB;
+ }
+ if (val & intr_mcu) {
list_for_each_entry(ctx, &xgene_pmu->mcpmus, next) {
_xgene_pmu_isr(irq, ctx->pmu_dev);
}
}
- if (val & PCPPMU_INT_MCB) {
+ if (val & intr_mcb) {
list_for_each_entry(ctx, &xgene_pmu->mcbpmus, next) {
_xgene_pmu_isr(irq, ctx->pmu_dev);
}
}
- if (val & PCPPMU_INT_L3C) {
+ if (val & intr_l3c) {
list_for_each_entry(ctx, &xgene_pmu->l3cpmus, next) {
_xgene_pmu_isr(irq, ctx->pmu_dev);
}
}
- if (val & PCPPMU_INT_IOB) {
+ if (val & intr_iob) {
list_for_each_entry(ctx, &xgene_pmu->iobpmus, next) {
_xgene_pmu_isr(irq, ctx->pmu_dev);
}
@@ -857,8 +1299,8 @@ static irqreturn_t xgene_pmu_isr(int irq, void *dev_id)
return IRQ_HANDLED;
}
-static int acpi_pmu_probe_active_mcb_mcu(struct xgene_pmu *xgene_pmu,
- struct platform_device *pdev)
+static int acpi_pmu_probe_active_mcb_mcu_l3c(struct xgene_pmu *xgene_pmu,
+ struct platform_device *pdev)
{
void __iomem *csw_csr, *mcba_csr, *mcbb_csr;
struct resource *res;
@@ -885,6 +1327,8 @@ static int acpi_pmu_probe_active_mcb_mcu(struct xgene_pmu *xgene_pmu,
return PTR_ERR(mcbb_csr);
}
+ xgene_pmu->l3c_active_mask = 0x1;
+
reg = readl(csw_csr + CSW_CSWCR);
if (reg & CSW_CSWCR_DUALMCB_MASK) {
/* Dual MCB active */
@@ -905,8 +1349,56 @@ static int acpi_pmu_probe_active_mcb_mcu(struct xgene_pmu *xgene_pmu,
return 0;
}
-static int fdt_pmu_probe_active_mcb_mcu(struct xgene_pmu *xgene_pmu,
- struct platform_device *pdev)
+static int acpi_pmu_v3_probe_active_mcb_mcu_l3c(struct xgene_pmu *xgene_pmu,
+ struct platform_device *pdev)
+{
+ void __iomem *csw_csr;
+ struct resource *res;
+ unsigned int reg;
+ u32 mcb0routing;
+ u32 mcb1routing;
+
+ res = platform_get_resource(pdev, IORESOURCE_MEM, 1);
+ csw_csr = devm_ioremap_resource(&pdev->dev, res);
+ if (IS_ERR(csw_csr)) {
+ dev_err(&pdev->dev, "ioremap failed for CSW CSR resource\n");
+ return PTR_ERR(csw_csr);
+ }
+
+ reg = readl(csw_csr + CSW_CSWCR);
+ mcb0routing = CSW_CSWCR_MCB0_ROUTING(reg);
+ mcb1routing = CSW_CSWCR_MCB1_ROUTING(reg);
+ if (reg & CSW_CSWCR_DUALMCB_MASK) {
+ /* Dual MCB active */
+ xgene_pmu->mcb_active_mask = 0x3;
+ /* Probe all active L3C(s), maximum is 8 */
+ xgene_pmu->l3c_active_mask = 0xFF;
+ /* Probe all active MC(s), maximum is 8 */
+ if ((mcb0routing == 0x2) && (mcb1routing == 0x2))
+ xgene_pmu->mc_active_mask = 0xFF;
+ else if ((mcb0routing == 0x1) && (mcb1routing == 0x1))
+ xgene_pmu->mc_active_mask = 0x33;
+ else
+ xgene_pmu->mc_active_mask = 0x11;
+ } else {
+ /* Single MCB active */
+ xgene_pmu->mcb_active_mask = 0x1;
+ /* Probe all active L3C(s), maximum is 4 */
+ xgene_pmu->l3c_active_mask = 0x0F;
+ /* Probe all active MC(s), maximum is 4 */
+ if (mcb0routing == 0x2)
+ xgene_pmu->mc_active_mask = 0x0F;
+ else if (mcb0routing == 0x1)
+ xgene_pmu->mc_active_mask = 0x03;
+ else
+ xgene_pmu->mc_active_mask = 0x01;
+ }
+
+ return 0;
+}
+
+static int fdt_pmu_probe_active_mcb_mcu_l3c(struct xgene_pmu *xgene_pmu,
+ struct platform_device *pdev)
{
struct regmap *csw_map, *mcba_map, *mcbb_map;
struct device_node *np = pdev->dev.of_node;
@@ -930,6 +1422,7 @@ static int fdt_pmu_probe_active_mcb_mcu(struct xgene_pmu *xgene_pmu,
return PTR_ERR(mcbb_map);
}
+ xgene_pmu->l3c_active_mask = 0x1;
if (regmap_read(csw_map, CSW_CSWCR, &reg))
return -EINVAL;
@@ -954,12 +1447,18 @@ static int fdt_pmu_probe_active_mcb_mcu(struct xgene_pmu *xgene_pmu,
return 0;
}
-static int xgene_pmu_probe_active_mcb_mcu(struct xgene_pmu *xgene_pmu,
- struct platform_device *pdev)
+static int xgene_pmu_probe_active_mcb_mcu_l3c(struct xgene_pmu *xgene_pmu,
+ struct platform_device *pdev)
{
- if (has_acpi_companion(&pdev->dev))
- return acpi_pmu_probe_active_mcb_mcu(xgene_pmu, pdev);
- return fdt_pmu_probe_active_mcb_mcu(xgene_pmu, pdev);
+ if (has_acpi_companion(&pdev->dev)) {
+ if (xgene_pmu->version == PCP_PMU_V3)
+ return acpi_pmu_v3_probe_active_mcb_mcu_l3c(xgene_pmu,
+ pdev);
+ else
+ return acpi_pmu_probe_active_mcb_mcu_l3c(xgene_pmu,
+ pdev);
+ }
+ return fdt_pmu_probe_active_mcb_mcu_l3c(xgene_pmu, pdev);
}
static char *xgene_pmu_dev_name(struct device *dev, u32 type, int id)
@@ -969,6 +1468,8 @@ static char *xgene_pmu_dev_name(struct device *dev, u32 type, int id)
return devm_kasprintf(dev, GFP_KERNEL, "l3c%d", id);
case PMU_TYPE_IOB:
return devm_kasprintf(dev, GFP_KERNEL, "iob%d", id);
+ case PMU_TYPE_IOB_SLOW:
+ return devm_kasprintf(dev, GFP_KERNEL, "iob-slow%d", id);
case PMU_TYPE_MCB:
return devm_kasprintf(dev, GFP_KERNEL, "mcb%d", id);
case PMU_TYPE_MC:
@@ -1047,9 +1548,40 @@ err:
return NULL;
}
+static const struct acpi_device_id xgene_pmu_acpi_type_match[] = {
+ {"APMC0D5D", PMU_TYPE_L3C},
+ {"APMC0D5E", PMU_TYPE_IOB},
+ {"APMC0D5F", PMU_TYPE_MCB},
+ {"APMC0D60", PMU_TYPE_MC},
+ {"APMC0D84", PMU_TYPE_L3C},
+ {"APMC0D85", PMU_TYPE_IOB},
+ {"APMC0D86", PMU_TYPE_IOB_SLOW},
+ {"APMC0D87", PMU_TYPE_MCB},
+ {"APMC0D88", PMU_TYPE_MC},
+ {},
+};
+
+static const struct acpi_device_id *xgene_pmu_acpi_match_type(
+ const struct acpi_device_id *ids,
+ struct acpi_device *adev)
+{
+ const struct acpi_device_id *match_id = NULL;
+ const struct acpi_device_id *id;
+
+ for (id = ids; id->id[0] || id->cls; id++) {
+ if (!acpi_match_device_ids(adev, id))
+ match_id = id;
+ else if (match_id)
+ break;
+ }
+
+ return match_id;
+}
+
static acpi_status acpi_pmu_dev_add(acpi_handle handle, u32 level,
void *data, void **return_value)
{
+ const struct acpi_device_id *acpi_id;
struct xgene_pmu *xgene_pmu = data;
struct xgene_pmu_dev_ctx *ctx;
struct acpi_device *adev;
@@ -1059,17 +1591,11 @@ static acpi_status acpi_pmu_dev_add(acpi_handle handle, u32 level,
if (acpi_bus_get_status(adev) || !adev->status.present)
return AE_OK;
- if (!strcmp(acpi_device_hid(adev), "APMC0D5D"))
- ctx = acpi_get_pmu_hw_inf(xgene_pmu, adev, PMU_TYPE_L3C);
- else if (!strcmp(acpi_device_hid(adev), "APMC0D5E"))
- ctx = acpi_get_pmu_hw_inf(xgene_pmu, adev, PMU_TYPE_IOB);
- else if (!strcmp(acpi_device_hid(adev), "APMC0D5F"))
- ctx = acpi_get_pmu_hw_inf(xgene_pmu, adev, PMU_TYPE_MCB);
- else if (!strcmp(acpi_device_hid(adev), "APMC0D60"))
- ctx = acpi_get_pmu_hw_inf(xgene_pmu, adev, PMU_TYPE_MC);
- else
- ctx = NULL;
+ acpi_id = xgene_pmu_acpi_match_type(xgene_pmu_acpi_type_match, adev);
+ if (!acpi_id)
+ return AE_OK;
+ ctx = acpi_get_pmu_hw_inf(xgene_pmu, adev, (u32)acpi_id->driver_data);
if (!ctx)
return AE_OK;
@@ -1086,6 +1612,9 @@ static acpi_status acpi_pmu_dev_add(acpi_handle handle, u32 level,
case PMU_TYPE_IOB:
list_add(&ctx->next, &xgene_pmu->iobpmus);
break;
+ case PMU_TYPE_IOB_SLOW:
+ list_add(&ctx->next, &xgene_pmu->iobpmus);
+ break;
case PMU_TYPE_MCB:
list_add(&ctx->next, &xgene_pmu->mcbpmus);
break;
@@ -1207,6 +1736,9 @@ static int fdt_pmu_probe_pmu_dev(struct xgene_pmu *xgene_pmu,
case PMU_TYPE_IOB:
list_add(&ctx->next, &xgene_pmu->iobpmus);
break;
+ case PMU_TYPE_IOB_SLOW:
+ list_add(&ctx->next, &xgene_pmu->iobpmus);
+ break;
case PMU_TYPE_MCB:
list_add(&ctx->next, &xgene_pmu->mcbpmus);
break;
@@ -1235,6 +1767,40 @@ static const struct xgene_pmu_data xgene_pmu_v2_data = {
.id = PCP_PMU_V2,
};
+static const struct xgene_pmu_ops xgene_pmu_ops = {
+ .mask_int = xgene_pmu_mask_int,
+ .unmask_int = xgene_pmu_unmask_int,
+ .read_counter = xgene_pmu_read_counter32,
+ .write_counter = xgene_pmu_write_counter32,
+ .write_evttype = xgene_pmu_write_evttype,
+ .write_agentmsk = xgene_pmu_write_agentmsk,
+ .write_agent1msk = xgene_pmu_write_agent1msk,
+ .enable_counter = xgene_pmu_enable_counter,
+ .disable_counter = xgene_pmu_disable_counter,
+ .enable_counter_int = xgene_pmu_enable_counter_int,
+ .disable_counter_int = xgene_pmu_disable_counter_int,
+ .reset_counters = xgene_pmu_reset_counters,
+ .start_counters = xgene_pmu_start_counters,
+ .stop_counters = xgene_pmu_stop_counters,
+};
+
+static const struct xgene_pmu_ops xgene_pmu_v3_ops = {
+ .mask_int = xgene_pmu_v3_mask_int,
+ .unmask_int = xgene_pmu_v3_unmask_int,
+ .read_counter = xgene_pmu_read_counter64,
+ .write_counter = xgene_pmu_write_counter64,
+ .write_evttype = xgene_pmu_write_evttype,
+ .write_agentmsk = xgene_pmu_v3_write_agentmsk,
+ .write_agent1msk = xgene_pmu_v3_write_agent1msk,
+ .enable_counter = xgene_pmu_enable_counter,
+ .disable_counter = xgene_pmu_disable_counter,
+ .enable_counter_int = xgene_pmu_enable_counter_int,
+ .disable_counter_int = xgene_pmu_disable_counter_int,
+ .reset_counters = xgene_pmu_reset_counters,
+ .start_counters = xgene_pmu_start_counters,
+ .stop_counters = xgene_pmu_stop_counters,
+};
+
static const struct of_device_id xgene_pmu_of_match[] = {
{ .compatible = "apm,xgene-pmu", .data = &xgene_pmu_data },
{ .compatible = "apm,xgene-pmu-v2", .data = &xgene_pmu_v2_data },
@@ -1245,6 +1811,7 @@ MODULE_DEVICE_TABLE(of, xgene_pmu_of_match);
static const struct acpi_device_id xgene_pmu_acpi_match[] = {
{"APMC0D5B", PCP_PMU_V1},
{"APMC0D5C", PCP_PMU_V2},
+ {"APMC0D83", PCP_PMU_V3},
{},
};
MODULE_DEVICE_TABLE(acpi, xgene_pmu_acpi_match);
@@ -1284,6 +1851,11 @@ static int xgene_pmu_probe(struct platform_device *pdev)
if (version < 0)
return -ENODEV;
+ if (version == PCP_PMU_V3)
+ xgene_pmu->ops = &xgene_pmu_v3_ops;
+ else
+ xgene_pmu->ops = &xgene_pmu_ops;
+
INIT_LIST_HEAD(&xgene_pmu->l3cpmus);
INIT_LIST_HEAD(&xgene_pmu->iobpmus);
INIT_LIST_HEAD(&xgene_pmu->mcbpmus);
@@ -1317,7 +1889,7 @@ static int xgene_pmu_probe(struct platform_device *pdev)
raw_spin_lock_init(&xgene_pmu->lock);
/* Check for active MCBs and MCUs */
- rc = xgene_pmu_probe_active_mcb_mcu(xgene_pmu, pdev);
+ rc = xgene_pmu_probe_active_mcb_mcu_l3c(xgene_pmu, pdev);
if (rc) {
dev_warn(&pdev->dev, "Unknown MCB/MCU active status\n");
xgene_pmu->mcb_active_mask = 0x1;
@@ -1342,7 +1914,7 @@ static int xgene_pmu_probe(struct platform_device *pdev)
}
/* Enable interrupt */
- xgene_pmu_unmask_int(xgene_pmu);
+ xgene_pmu->ops->unmask_int(xgene_pmu);
return 0;
diff --git a/drivers/ras/ras.c b/drivers/ras/ras.c
index ed4c343d08c4..5429d3795732 100644
--- a/drivers/ras/ras.c
+++ b/drivers/ras/ras.c
@@ -7,11 +7,24 @@
#include <linux/init.h>
#include <linux/ras.h>
+#include <linux/uuid.h>
#define CREATE_TRACE_POINTS
#define TRACE_INCLUDE_PATH ../../include/ras
#include <ras/ras_event.h>
+void log_non_standard_event(const uuid_le *sec_type, const uuid_le *fru_id,
+ const char *fru_text, const u8 sev, const u8 *err,
+ const u32 len)
+{
+ trace_non_standard_event(sec_type, fru_id, fru_text, sev, err, len);
+}
+
+void log_arm_hw_error(struct cper_sec_proc_arm *err)
+{
+ trace_arm_event(err);
+}
+
static int __init ras_init(void)
{
int rc = 0;
@@ -27,7 +40,8 @@ subsys_initcall(ras_init);
EXPORT_TRACEPOINT_SYMBOL_GPL(extlog_mem_event);
#endif
EXPORT_TRACEPOINT_SYMBOL_GPL(mc_event);
-
+EXPORT_TRACEPOINT_SYMBOL_GPL(non_standard_event);
+EXPORT_TRACEPOINT_SYMBOL_GPL(arm_event);
static int __init parse_ras_param(char *str)
{
diff --git a/fs/proc/kcore.c b/fs/proc/kcore.c
index 4ee55274f155..45629f4b5402 100644
--- a/fs/proc/kcore.c
+++ b/fs/proc/kcore.c
@@ -504,7 +504,7 @@ read_kcore(struct file *file, char __user *buffer, size_t buflen, loff_t *fpos)
if (&m->list == &kclist_head) {
if (clear_user(buffer, tsz))
return -EFAULT;
- } else if (is_vmalloc_or_module_addr((void *)start)) {
+ } else if (m->type == KCORE_VMALLOC) {
vread(buf, (char *)start, tsz);
/* we have to zero-fill user buffer even if no read */
if (copy_to_user(buffer, buf, tsz))
diff --git a/include/acpi/ghes.h b/include/acpi/ghes.h
index 720446cb243e..9f26e01186ae 100644
--- a/include/acpi/ghes.h
+++ b/include/acpi/ghes.h
@@ -1,3 +1,6 @@
+#ifndef GHES_H
+#define GHES_H
+
#include <acpi/apei.h>
#include <acpi/hed.h>
@@ -13,7 +16,10 @@
#define GHES_EXITING 0x0002
struct ghes {
- struct acpi_hest_generic *generic;
+ union {
+ struct acpi_hest_generic *generic;
+ struct acpi_hest_generic_v2 *generic_v2;
+ };
struct acpi_hest_generic_status *estatus;
u64 buffer_paddr;
unsigned long flags;
@@ -70,3 +76,43 @@ static inline void ghes_edac_unregister(struct ghes *ghes)
{
}
#endif
+
+static inline int acpi_hest_get_version(struct acpi_hest_generic_data *gdata)
+{
+ return gdata->revision >> 8;
+}
+
+static inline void *acpi_hest_get_payload(struct acpi_hest_generic_data *gdata)
+{
+ if (acpi_hest_get_version(gdata) >= 3)
+ return (void *)(((struct acpi_hest_generic_data_v300 *)(gdata)) + 1);
+
+ return gdata + 1;
+}
+
+static inline int acpi_hest_get_error_length(struct acpi_hest_generic_data *gdata)
+{
+ return ((struct acpi_hest_generic_data *)(gdata))->error_data_length;
+}
+
+static inline int acpi_hest_get_size(struct acpi_hest_generic_data *gdata)
+{
+ if (acpi_hest_get_version(gdata) >= 3)
+ return sizeof(struct acpi_hest_generic_data_v300);
+
+ return sizeof(struct acpi_hest_generic_data);
+}
+
+static inline int acpi_hest_get_record_size(struct acpi_hest_generic_data *gdata)
+{
+ return (acpi_hest_get_size(gdata) + acpi_hest_get_error_length(gdata));
+}
+
+static inline void *acpi_hest_get_next(struct acpi_hest_generic_data *gdata)
+{
+ return (void *)(gdata) + acpi_hest_get_record_size(gdata);
+}
+
+int ghes_notify_sea(void);
+
+#endif /* GHES_H */
diff --git a/include/linux/acpi_iort.h b/include/linux/acpi_iort.h
index 3ff9acea8616..8379d406ad2e 100644
--- a/include/linux/acpi_iort.h
+++ b/include/linux/acpi_iort.h
@@ -31,7 +31,6 @@ void iort_deregister_domain_token(int trans_id);
struct fwnode_handle *iort_find_domain_token(int trans_id);
#ifdef CONFIG_ACPI_IORT
void acpi_iort_init(void);
-bool iort_node_match(u8 type);
u32 iort_msi_map_rid(struct device *dev, u32 req_id);
struct irq_domain *iort_get_device_domain(struct device *dev, u32 req_id);
void acpi_configure_pmsi_domain(struct device *dev);
@@ -41,7 +40,6 @@ void iort_set_dma_mask(struct device *dev);
const struct iommu_ops *iort_iommu_configure(struct device *dev);
#else
static inline void acpi_iort_init(void) { }
-static inline bool iort_node_match(u8 type) { return false; }
static inline u32 iort_msi_map_rid(struct device *dev, u32 req_id)
{ return req_id; }
static inline struct irq_domain *iort_get_device_domain(struct device *dev,
diff --git a/include/linux/cper.h b/include/linux/cper.h
index dcacb1a72e26..4c671fc2081e 100644
--- a/include/linux/cper.h
+++ b/include/linux/cper.h
@@ -180,6 +180,10 @@ enum {
#define CPER_SEC_PROC_IPF \
UUID_LE(0xE429FAF1, 0x3CB7, 0x11D4, 0x0B, 0xCA, 0x07, 0x00, \
0x80, 0xC7, 0x3C, 0x88, 0x81)
+/* Processor Specific: ARM */
+#define CPER_SEC_PROC_ARM \
+ UUID_LE(0xE19E3D16, 0xBC11, 0x11E4, 0x9C, 0xAA, 0xC2, 0x05, \
+ 0x1D, 0x5D, 0x46, 0xB0)
/* Platform Memory */
#define CPER_SEC_PLATFORM_MEM \
UUID_LE(0xA5BC1114, 0x6F64, 0x4EDE, 0xB8, 0x63, 0x3E, 0x83, \
@@ -255,6 +259,22 @@ enum {
#define CPER_PCIE_SLOT_SHIFT 3
+#define CPER_ARM_VALID_MPIDR BIT(0)
+#define CPER_ARM_VALID_AFFINITY_LEVEL BIT(1)
+#define CPER_ARM_VALID_RUNNING_STATE BIT(2)
+#define CPER_ARM_VALID_VENDOR_INFO BIT(3)
+
+#define CPER_ARM_INFO_VALID_MULTI_ERR BIT(0)
+#define CPER_ARM_INFO_VALID_FLAGS BIT(1)
+#define CPER_ARM_INFO_VALID_ERR_INFO BIT(2)
+#define CPER_ARM_INFO_VALID_VIRT_ADDR BIT(3)
+#define CPER_ARM_INFO_VALID_PHYSICAL_ADDR BIT(4)
+
+#define CPER_ARM_INFO_FLAGS_FIRST BIT(0)
+#define CPER_ARM_INFO_FLAGS_LAST BIT(1)
+#define CPER_ARM_INFO_FLAGS_PROPAGATED BIT(2)
+#define CPER_ARM_INFO_FLAGS_OVERFLOW BIT(3)
+
/*
* All tables and structs must be byte-packed to match CPER
* specification, since the tables are provided by the system BIOS
@@ -340,6 +360,40 @@ struct cper_ia_proc_ctx {
__u64 mm_reg_addr;
};
+/* ARM Processor Error Section */
+struct cper_sec_proc_arm {
+ __u32 validation_bits;
+ __u16 err_info_num; /* Number of Processor Error Info */
+ __u16 context_info_num; /* Number of Processor Context Info Records*/
+ __u32 section_length;
+ __u8 affinity_level;
+ __u8 reserved[3]; /* must be zero */
+ __u64 mpidr;
+ __u64 midr;
+ __u32 running_state; /* Bit 0 set - Processor running. PSCI = 0 */
+ __u32 psci_state;
+};
+
+/* ARM Processor Error Information Structure */
+struct cper_arm_err_info {
+ __u8 version;
+ __u8 length;
+ __u16 validation_bits;
+ __u8 type;
+ __u16 multiple_error;
+ __u8 flags;
+ __u64 error_info;
+ __u64 virt_fault_addr;
+ __u64 physical_fault_addr;
+};
+
+/* ARM Processor Context Information Structure */
+struct cper_arm_ctx_info {
+ __u16 version;
+ __u16 type;
+ __u32 size;
+};
+
/* Old Memory Error Section UEFI 2.1, 2.2 */
struct cper_sec_mem_err_old {
__u64 validation_bits;
diff --git a/include/linux/ras.h b/include/linux/ras.h
index ffb147185e8d..be5338a35d57 100644
--- a/include/linux/ras.h
+++ b/include/linux/ras.h
@@ -2,6 +2,8 @@
#define __RAS_H__
#include <asm/errno.h>
+#include <linux/uuid.h>
+#include <linux/cper.h>
#ifdef CONFIG_DEBUG_FS
int ras_userspace_consumers(void);
@@ -22,4 +24,19 @@ static inline void __init cec_init(void) { }
static inline int cec_add_elem(u64 pfn) { return -ENODEV; }
#endif
+#ifdef CONFIG_RAS
+void log_non_standard_event(const guid_t *sec_type,
+ const guid_t *fru_id, const char *fru_text,
+ const u8 sev, const u8 *err, const u32 len);
+void log_arm_hw_error(struct cper_sec_proc_arm *err);
+#else
+static inline void
+log_non_standard_event(const guid_t *sec_type,
+ const guid_t *fru_id, const char *fru_text,
+ const u8 sev, const u8 *err, const u32 len)
+{ return; }
+static inline void
+log_arm_hw_error(struct cper_sec_proc_arm *err) { return; }
+#endif
+
#endif /* __RAS_H__ */
diff --git a/include/linux/uuid.h b/include/linux/uuid.h
index d1defe4ab167..2251e1925ea4 100644
--- a/include/linux/uuid.h
+++ b/include/linux/uuid.h
@@ -18,8 +18,10 @@
#include <uapi/linux/uuid.h>
+#define UUID_SIZE 16
+
typedef struct {
- __u8 b[16];
+ __u8 b[UUID_SIZE];
} uuid_t;
#define UUID_INIT(a, b, c, d0, d1, d2, d3, d4, d5, d6, d7) \
diff --git a/include/ras/ras_event.h b/include/ras/ras_event.h
index 1791a12cfa85..429f46fb61e4 100644
--- a/include/ras/ras_event.h
+++ b/include/ras/ras_event.h
@@ -162,6 +162,96 @@ TRACE_EVENT(mc_event,
);
/*
+ * ARM Processor Events Report
+ *
+ * This event is generated when hardware detects an ARM processor error
+ * has occurred. UEFI 2.6 spec section N.2.4.4.
+ */
+TRACE_EVENT(arm_event,
+
+ TP_PROTO(const struct cper_sec_proc_arm *proc),
+
+ TP_ARGS(proc),
+
+ TP_STRUCT__entry(
+ __field(u64, mpidr)
+ __field(u64, midr)
+ __field(u32, running_state)
+ __field(u32, psci_state)
+ __field(u8, affinity)
+ ),
+
+ TP_fast_assign(
+ if (proc->validation_bits & CPER_ARM_VALID_AFFINITY_LEVEL)
+ __entry->affinity = proc->affinity_level;
+ else
+ __entry->affinity = ~0;
+ if (proc->validation_bits & CPER_ARM_VALID_MPIDR)
+ __entry->mpidr = proc->mpidr;
+ else
+ __entry->mpidr = 0ULL;
+ __entry->midr = proc->midr;
+ if (proc->validation_bits & CPER_ARM_VALID_RUNNING_STATE) {
+ __entry->running_state = proc->running_state;
+ __entry->psci_state = proc->psci_state;
+ } else {
+ __entry->running_state = ~0;
+ __entry->psci_state = ~0;
+ }
+ ),
+
+ TP_printk("affinity level: %d; MPIDR: %016llx; MIDR: %016llx; "
+ "running state: %d; PSCI state: %d",
+ __entry->affinity, __entry->mpidr, __entry->midr,
+ __entry->running_state, __entry->psci_state)
+);
+
+/*
+ * Non-Standard Section Report
+ *
+ * This event is generated when hardware detected a hardware
+ * error event, which may be of non-standard section as defined
+ * in UEFI spec appendix "Common Platform Error Record", or may
+ * be of sections for which TRACE_EVENT is not defined.
+ *
+ */
+TRACE_EVENT(non_standard_event,
+
+ TP_PROTO(const uuid_le *sec_type,
+ const uuid_le *fru_id,
+ const char *fru_text,
+ const u8 sev,
+ const u8 *err,
+ const u32 len),
+
+ TP_ARGS(sec_type, fru_id, fru_text, sev, err, len),
+
+ TP_STRUCT__entry(
+ __array(char, sec_type, UUID_SIZE)
+ __array(char, fru_id, UUID_SIZE)
+ __string(fru_text, fru_text)
+ __field(u8, sev)
+ __field(u32, len)
+ __dynamic_array(u8, buf, len)
+ ),
+
+ TP_fast_assign(
+ memcpy(__entry->sec_type, sec_type, UUID_SIZE);
+ memcpy(__entry->fru_id, fru_id, UUID_SIZE);
+ __assign_str(fru_text, fru_text);
+ __entry->sev = sev;
+ __entry->len = len;
+ memcpy(__get_dynamic_array(buf), err, len);
+ ),
+
+ TP_printk("severity: %d; sec type:%pU; FRU: %pU %s; data len:%d; raw data:%s",
+ __entry->sev, __entry->sec_type,
+ __entry->fru_id, __get_str(fru_text),
+ __entry->len,
+ __print_hex(__get_dynamic_array(buf), __entry->len))
+);
+
+/*
* PCIe AER Trace event
*
* These events are generated when hardware detects a corrected or
diff --git a/virt/kvm/arm/mmu.c b/virt/kvm/arm/mmu.c
index e2e5effba2a9..1c44aa35f909 100644
--- a/virt/kvm/arm/mmu.c
+++ b/virt/kvm/arm/mmu.c
@@ -29,6 +29,7 @@
#include <asm/kvm_asm.h>
#include <asm/kvm_emulate.h>
#include <asm/virt.h>
+#include <asm/system_misc.h>
#include "trace.h"
@@ -1430,6 +1431,25 @@ out:
kvm_set_pfn_accessed(pfn);
}
+static bool is_abort_sea(unsigned long fault_status)
+{
+ switch (fault_status) {
+ case FSC_SEA:
+ case FSC_SEA_TTW0:
+ case FSC_SEA_TTW1:
+ case FSC_SEA_TTW2:
+ case FSC_SEA_TTW3:
+ case FSC_SECC:
+ case FSC_SECC_TTW0:
+ case FSC_SECC_TTW1:
+ case FSC_SECC_TTW2:
+ case FSC_SECC_TTW3:
+ return true;
+ default:
+ return false;
+ }
+}
+
/**
* kvm_handle_guest_abort - handles all 2nd stage aborts
* @vcpu: the VCPU pointer
@@ -1452,19 +1472,29 @@ int kvm_handle_guest_abort(struct kvm_vcpu *vcpu, struct kvm_run *run)
gfn_t gfn;
int ret, idx;
+ fault_status = kvm_vcpu_trap_get_fault_type(vcpu);
+
+ fault_ipa = kvm_vcpu_get_fault_ipa(vcpu);
+
+ /*
+ * The host kernel will handle the synchronous external abort. There
+ * is no need to pass the error into the guest.
+ */
+ if (is_abort_sea(fault_status)) {
+ if (!handle_guest_sea(fault_ipa, kvm_vcpu_get_hsr(vcpu)))
+ return 1;
+ }
+
is_iabt = kvm_vcpu_trap_is_iabt(vcpu);
if (unlikely(!is_iabt && kvm_vcpu_dabt_isextabt(vcpu))) {
kvm_inject_vabt(vcpu);
return 1;
}
- fault_ipa = kvm_vcpu_get_fault_ipa(vcpu);
-
trace_kvm_guest_fault(*vcpu_pc(vcpu), kvm_vcpu_get_hsr(vcpu),
kvm_vcpu_get_hfar(vcpu), fault_ipa);
/* Check the stage-2 fault is trans. fault or write fault */
- fault_status = kvm_vcpu_trap_get_fault_type(vcpu);
if (fault_status != FSC_FAULT && fault_status != FSC_PERM &&
fault_status != FSC_ACCESS) {
kvm_err("Unsupported FSC: EC=%#x xFSC=%#lx ESR_EL2=%#lx\n",