diff options
Diffstat (limited to 'arch/powerpc/kernel')
163 files changed, 11030 insertions, 9847 deletions
diff --git a/arch/powerpc/kernel/fsl_booke_entry_mapping.S b/arch/powerpc/kernel/85xx_entry_mapping.S index dedc17fac8f8..dedc17fac8f8 100644 --- a/arch/powerpc/kernel/fsl_booke_entry_mapping.S +++ b/arch/powerpc/kernel/85xx_entry_mapping.S diff --git a/arch/powerpc/kernel/Makefile b/arch/powerpc/kernel/Makefile index c8cf924bf9c0..2f0a2e69c607 100644 --- a/arch/powerpc/kernel/Makefile +++ b/arch/powerpc/kernel/Makefile @@ -3,9 +3,6 @@ # Makefile for the linux kernel. # -ifdef CONFIG_PPC64 -CFLAGS_prom_init.o += $(NO_MINIMAL_TOC) -endif ifdef CONFIG_PPC32 CFLAGS_prom_init.o += -fPIC CFLAGS_btext.o += -fPIC @@ -54,18 +51,32 @@ CFLAGS_cputable.o += -DDISABLE_BRANCH_PROFILING CFLAGS_btext.o += -DDISABLE_BRANCH_PROFILING endif -obj-y := cputable.o syscalls.o \ +KCSAN_SANITIZE_early_32.o := n +KCSAN_SANITIZE_cputable.o := n +KCSAN_SANITIZE_btext.o := n +KCSAN_SANITIZE_paca.o := n +KCSAN_SANITIZE_setup_64.o := n + +#ifdef CONFIG_RANDOMIZE_KSTACK_OFFSET +# Remove stack protector to avoid triggering unneeded stack canary +# checks due to randomize_kstack_offset. +CFLAGS_REMOVE_syscall.o = -fstack-protector -fstack-protector-strong +CFLAGS_syscall.o += -fno-stack-protector +#endif + +obj-y := cputable.o syscalls.o switch.o \ irq.o align.o signal_$(BITS).o pmc.o vdso.o \ process.o systbl.o idle.o \ signal.o sysfs.o cacheinfo.o time.o \ prom.o traps.o setup-common.o \ udbg.o misc.o io.o misc_$(BITS).o \ - of_platform.o prom_parse.o firmware.o \ + prom_parse.o firmware.o \ hw_breakpoint_constraints.o interrupt.o \ - kdebugfs.o stacktrace.o + kdebugfs.o stacktrace.o syscall.o obj-y += ptrace/ -obj-$(CONFIG_PPC64) += setup_64.o \ +obj-$(CONFIG_PPC64) += setup_64.o irq_64.o\ paca.o nvram_64.o note.o +obj-$(CONFIG_PPC32) += sys_ppc32.o obj-$(CONFIG_COMPAT) += sys_ppc32.o signal_32.o obj-$(CONFIG_VDSO32) += vdso32_wrapper.o obj-$(CONFIG_PPC_WATCHDOG) += watchdog.o @@ -73,8 +84,9 @@ obj-$(CONFIG_HAVE_HW_BREAKPOINT) += hw_breakpoint.o obj-$(CONFIG_PPC_DAWR) += dawr.o obj-$(CONFIG_PPC_BOOK3S_64) += cpu_setup_ppc970.o cpu_setup_pa6t.o obj-$(CONFIG_PPC_BOOK3S_64) += cpu_setup_power.o +obj-$(CONFIG_PPC_BOOK3S_64) += dexcr.o obj-$(CONFIG_PPC_BOOK3S_64) += mce.o mce_power.o -obj-$(CONFIG_PPC_BOOK3E_64) += exceptions-64e.o idle_book3e.o +obj-$(CONFIG_PPC_BOOK3E_64) += exceptions-64e.o idle_64e.o obj-$(CONFIG_PPC_BARRIER_NOSPEC) += security.o obj-$(CONFIG_PPC64) += vdso64_wrapper.o obj-$(CONFIG_ALTIVEC) += vecemu.o @@ -93,31 +105,28 @@ obj-$(CONFIG_GENERIC_TBSYNC) += smp-tbsync.o obj-$(CONFIG_CRASH_DUMP) += crash_dump.o obj-$(CONFIG_FA_DUMP) += fadump.o obj-$(CONFIG_PRESERVE_FA_DUMP) += fadump.o -ifdef CONFIG_PPC32 -obj-$(CONFIG_E500) += idle_e500.o -endif +obj-$(CONFIG_PPC_85xx) += idle_85xx.o obj-$(CONFIG_PPC_BOOK3S_32) += idle_6xx.o l2cr_6xx.o cpu_setup_6xx.o obj-$(CONFIG_TAU) += tau_6xx.o obj-$(CONFIG_HIBERNATION) += swsusp.o suspend.o -ifdef CONFIG_FSL_BOOKE -obj-$(CONFIG_HIBERNATION) += swsusp_booke.o +ifdef CONFIG_PPC_85xx +obj-$(CONFIG_HIBERNATION) += swsusp_85xx.o else obj-$(CONFIG_HIBERNATION) += swsusp_$(BITS).o endif obj64-$(CONFIG_HIBERNATION) += swsusp_asm64.o obj-$(CONFIG_MODULES) += module.o module_$(BITS).o obj-$(CONFIG_44x) += cpu_setup_44x.o -obj-$(CONFIG_PPC_FSL_BOOK3E) += cpu_setup_fsl_booke.o +obj-$(CONFIG_PPC_E500) += cpu_setup_e500.o obj-$(CONFIG_PPC_DOORBELL) += dbell.o obj-$(CONFIG_JUMP_LABEL) += jump_label.o -extra-$(CONFIG_PPC64) := head_64.o -extra-$(CONFIG_PPC_BOOK3S_32) := head_book3s_32.o -extra-$(CONFIG_40x) := head_40x.o -extra-$(CONFIG_44x) := head_44x.o -extra-$(CONFIG_FSL_BOOKE) := head_fsl_booke.o -extra-$(CONFIG_PPC_8xx) := head_8xx.o -extra-y += vmlinux.lds +obj-$(CONFIG_PPC64) += head_64.o +obj-$(CONFIG_PPC_BOOK3S_32) += head_book3s_32.o +obj-$(CONFIG_44x) += head_44x.o +obj-$(CONFIG_PPC_8xx) += head_8xx.o +obj-$(CONFIG_PPC_85xx) += head_85xx.o +always-$(KBUILD_BUILTIN) += vmlinux.lds obj-$(CONFIG_RELOCATABLE) += reloc_$(BITS).o @@ -130,6 +139,7 @@ obj-$(CONFIG_KPROBES) += kprobes.o obj-$(CONFIG_OPTPROBES) += optprobes.o optprobes_head.o obj-$(CONFIG_KPROBES_ON_FTRACE) += kprobes-ftrace.o obj-$(CONFIG_UPROBES) += uprobes.o +obj-$(CONFIG_RETHOOK) += rethook.o obj-$(CONFIG_PPC_UDBG_16550) += legacy_serial.o udbg_16550.o obj-$(CONFIG_SWIOTLB) += dma-swiotlb.o obj-$(CONFIG_ARCH_HAS_DMA_SET_MASK) += dma-mask.o @@ -142,8 +152,6 @@ obj-$(CONFIG_PCI_MSI) += msi.o obj-$(CONFIG_AUDIT) += audit.o obj64-$(CONFIG_AUDIT) += compat_audit.o -obj-$(CONFIG_PPC_IO_WORKAROUNDS) += io-workarounds.o - obj-y += trace/ ifneq ($(CONFIG_PPC_INDIRECT_PIO),y) @@ -152,12 +160,7 @@ endif obj64-$(CONFIG_PPC_TRANSACTIONAL_MEM) += tm.o -obj-$(CONFIG_PPC64) += $(obj64-y) -obj-$(CONFIG_PPC32) += $(obj32-y) - -ifneq ($(CONFIG_XMON)$(CONFIG_KEXEC_CORE)(CONFIG_PPC_BOOK3S),) obj-y += ppc_save_regs.o -endif obj-$(CONFIG_EPAPR_PARAVIRT) += epapr_paravirt.o epapr_hcalls.o obj-$(CONFIG_KVM_GUEST) += kvm.o kvm_emul.o @@ -171,16 +174,16 @@ obj-$(CONFIG_PPC_SECVAR_SYSFS) += secvar-sysfs.o # Disable GCOV, KCOV & sanitizers in odd or sensitive code GCOV_PROFILE_prom_init.o := n KCOV_INSTRUMENT_prom_init.o := n +KCSAN_SANITIZE_prom_init.o := n UBSAN_SANITIZE_prom_init.o := n GCOV_PROFILE_kprobes.o := n KCOV_INSTRUMENT_kprobes.o := n +KCSAN_SANITIZE_kprobes.o := n UBSAN_SANITIZE_kprobes.o := n GCOV_PROFILE_kprobes-ftrace.o := n KCOV_INSTRUMENT_kprobes-ftrace.o := n +KCSAN_SANITIZE_kprobes-ftrace.o := n UBSAN_SANITIZE_kprobes-ftrace.o := n -GCOV_PROFILE_syscall_64.o := n -KCOV_INSTRUMENT_syscall_64.o := n -UBSAN_SANITIZE_syscall_64.o := n UBSAN_SANITIZE_vdso.o := n # Necessary for booting with kcov enabled on book3e machines @@ -191,12 +194,17 @@ KCOV_INSTRUMENT_paca.o := n CFLAGS_setup_64.o += -fno-stack-protector CFLAGS_paca.o += -fno-stack-protector -extra-$(CONFIG_PPC_FPU) += fpu.o -extra-$(CONFIG_ALTIVEC) += vector.o -extra-$(CONFIG_PPC64) += entry_64.o -extra-$(CONFIG_PPC_OF_BOOT_TRAMPOLINE) += prom_init.o +obj-$(CONFIG_PPC_FPU) += fpu.o +obj-$(CONFIG_ALTIVEC) += vector.o -extra-$(CONFIG_PPC_OF_BOOT_TRAMPOLINE) += prom_init_check +obj-$(CONFIG_PPC_OF_BOOT_TRAMPOLINE) += prom_init.o +obj64-$(CONFIG_PPC_OF_BOOT_TRAMPOLINE) += prom_entry_64.o +ifdef KBUILD_BUILTIN +always-$(CONFIG_PPC_OF_BOOT_TRAMPOLINE) += prom_init_check +endif + +obj-$(CONFIG_PPC64) += $(obj64-y) +obj-$(CONFIG_PPC32) += $(obj32-y) quiet_cmd_prom_init_check = PROMCHK $@ cmd_prom_init_check = $(CONFIG_SHELL) $< "$(NM)" $(obj)/prom_init.o; touch $@ diff --git a/arch/powerpc/kernel/asm-offsets.c b/arch/powerpc/kernel/asm-offsets.c index eec536aef83a..b3048f6d3822 100644 --- a/arch/powerpc/kernel/asm-offsets.c +++ b/arch/powerpc/kernel/asm-offsets.c @@ -54,12 +54,12 @@ #endif #ifdef CONFIG_PPC32 -#ifdef CONFIG_BOOKE_OR_40x +#ifdef CONFIG_BOOKE #include "head_booke.h" #endif #endif -#if defined(CONFIG_PPC_FSL_BOOK3E) +#if defined(CONFIG_PPC_E500) #include "../mm/mmu_decl.h" #endif @@ -72,7 +72,7 @@ #endif #define STACK_PT_REGS_OFFSET(sym, val) \ - DEFINE(sym, STACK_FRAME_OVERHEAD + offsetof(struct pt_regs, val)) + DEFINE(sym, STACK_INT_FRAME_REGS + offsetof(struct pt_regs, val)) int main(void) { @@ -167,9 +167,8 @@ int main(void) OFFSET(THREAD_CKVRSTATE, thread_struct, ckvr_state.vr); OFFSET(THREAD_CKVRSAVE, thread_struct, ckvrsave); OFFSET(THREAD_CKFPSTATE, thread_struct, ckfp_state.fpr); - /* Local pt_regs on stack for Transactional Memory funcs. */ - DEFINE(TM_FRAME_SIZE, STACK_FRAME_OVERHEAD + - sizeof(struct pt_regs) + 16); + /* Local pt_regs on stack in int frame form, plus 16 bytes for TM */ + DEFINE(TM_FRAME_SIZE, STACK_INT_FRAME_SIZE + 16); #endif /* CONFIG_PPC_TRANSACTIONAL_MEM */ OFFSET(TI_LOCAL_FLAGS, thread_info, local_flags); @@ -186,7 +185,9 @@ int main(void) offsetof(struct task_struct, thread_info)); OFFSET(PACASAVEDMSR, paca_struct, saved_msr); OFFSET(PACAR1, paca_struct, saved_r1); +#ifndef CONFIG_PPC_KERNEL_PCREL OFFSET(PACATOC, paca_struct, kernel_toc); +#endif OFFSET(PACAKBASE, paca_struct, kernelbase); OFFSET(PACAKMSR, paca_struct, kernel_msr); #ifdef CONFIG_PPC_BOOK3S_64 @@ -197,7 +198,7 @@ int main(void) OFFSET(PACAIRQHAPPENED, paca_struct, irq_happened); OFFSET(PACA_FTRACE_ENABLED, paca_struct, ftrace_enabled); -#ifdef CONFIG_PPC_BOOK3E +#ifdef CONFIG_PPC_BOOK3E_64 OFFSET(PACAPGD, paca_struct, pgd); OFFSET(PACA_KERNELPGD, paca_struct, kernel_pgd); OFFSET(PACA_EXGEN, paca_struct, exgen); @@ -213,7 +214,7 @@ int main(void) OFFSET(TCD_ESEL_NEXT, tlb_core_data, esel_next); OFFSET(TCD_ESEL_MAX, tlb_core_data, esel_max); OFFSET(TCD_ESEL_FIRST, tlb_core_data, esel_first); -#endif /* CONFIG_PPC_BOOK3E */ +#endif /* CONFIG_PPC_BOOK3E_64 */ #ifdef CONFIG_PPC_BOOK3S_64 OFFSET(PACA_EXGEN, paca_struct, exgen); @@ -245,10 +246,8 @@ int main(void) OFFSET(PACAHWCPUID, paca_struct, hw_cpu_id); OFFSET(PACAKEXECSTATE, paca_struct, kexec_state); OFFSET(PACA_DSCR_DEFAULT, paca_struct, dscr_default); -#ifdef CONFIG_PPC64 OFFSET(PACA_EXIT_SAVE_R1, paca_struct, exit_save_r1); -#endif -#ifdef CONFIG_PPC_BOOK3E +#ifdef CONFIG_PPC_BOOK3E_64 OFFSET(PACA_TRAP_SAVE, paca_struct, trap_save); #endif OFFSET(PACA_SPRG_VDSO, paca_struct, sprg_vdso); @@ -261,7 +260,7 @@ int main(void) /* Interrupt register frame */ DEFINE(INT_FRAME_SIZE, STACK_INT_FRAME_SIZE); - DEFINE(SWITCH_FRAME_SIZE, STACK_FRAME_WITH_PT_REGS); + DEFINE(SWITCH_FRAME_SIZE, STACK_SWITCH_FRAME_SIZE); STACK_PT_REGS_OFFSET(GPR0, gpr[0]); STACK_PT_REGS_OFFSET(GPR1, gpr[1]); STACK_PT_REGS_OFFSET(GPR2, gpr[2]); @@ -335,7 +334,6 @@ int main(void) #endif /* ! CONFIG_PPC64 */ /* datapage offsets for use by vdso */ - OFFSET(VDSO_DATA_OFFSET, vdso_arch_data, data); OFFSET(CFG_TB_TICKS_PER_SEC, vdso_arch_data, tb_ticks_per_sec); #ifdef CONFIG_PPC64 OFFSET(CFG_ICACHE_BLOCKSZ, vdso_arch_data, icache_block_size); @@ -379,7 +377,7 @@ int main(void) OFFSET(VCPU_SPRG2, kvm_vcpu, arch.shregs.sprg2); OFFSET(VCPU_SPRG3, kvm_vcpu, arch.shregs.sprg3); #endif -#ifdef CONFIG_KVM_BOOK3S_HV_EXIT_TIMING +#ifdef CONFIG_KVM_BOOK3S_HV_P8_TIMING OFFSET(VCPU_TB_RMENTRY, kvm_vcpu, arch.rm_entry); OFFSET(VCPU_TB_RMINTR, kvm_vcpu, arch.rm_intr); OFFSET(VCPU_TB_RMEXIT, kvm_vcpu, arch.rm_exit); @@ -418,21 +416,18 @@ int main(void) /* book3s */ #ifdef CONFIG_KVM_BOOK3S_HV_POSSIBLE - OFFSET(KVM_TLB_SETS, kvm, arch.tlb_sets); OFFSET(KVM_SDR1, kvm, arch.sdr1); OFFSET(KVM_HOST_LPID, kvm, arch.host_lpid); OFFSET(KVM_HOST_LPCR, kvm, arch.host_lpcr); OFFSET(KVM_HOST_SDR1, kvm, arch.host_sdr1); OFFSET(KVM_ENABLED_HCALLS, kvm, arch.enabled_hcalls); OFFSET(KVM_VRMA_SLB_V, kvm, arch.vrma_slb_v); - OFFSET(KVM_RADIX, kvm, arch.radix); OFFSET(KVM_SECURE_GUEST, kvm, arch.secure_guest); OFFSET(VCPU_DSISR, kvm_vcpu, arch.shregs.dsisr); OFFSET(VCPU_DAR, kvm_vcpu, arch.shregs.dar); OFFSET(VCPU_VPA, kvm_vcpu, arch.vpa.pinned_addr); OFFSET(VCPU_VPA_DIRTY, kvm_vcpu, arch.vpa.dirty); OFFSET(VCPU_HEIR, kvm_vcpu, arch.emul_inst); - OFFSET(VCPU_NESTED, kvm_vcpu, arch.nested); OFFSET(VCPU_CPU, kvm_vcpu, cpu); OFFSET(VCPU_THREAD_CPU, kvm_vcpu, arch.thread_cpu); #endif @@ -449,16 +444,12 @@ int main(void) OFFSET(VCPU_DABRX, kvm_vcpu, arch.dabrx); OFFSET(VCPU_DAWR0, kvm_vcpu, arch.dawr0); OFFSET(VCPU_DAWRX0, kvm_vcpu, arch.dawrx0); - OFFSET(VCPU_DAWR1, kvm_vcpu, arch.dawr1); - OFFSET(VCPU_DAWRX1, kvm_vcpu, arch.dawrx1); OFFSET(VCPU_CIABR, kvm_vcpu, arch.ciabr); OFFSET(VCPU_HFLAGS, kvm_vcpu, arch.hflags); OFFSET(VCPU_DEC_EXPIRES, kvm_vcpu, arch.dec_expires); OFFSET(VCPU_PENDING_EXC, kvm_vcpu, arch.pending_exceptions); OFFSET(VCPU_CEDED, kvm_vcpu, arch.ceded); OFFSET(VCPU_PRODDED, kvm_vcpu, arch.prodded); - OFFSET(VCPU_IRQ_PENDING, kvm_vcpu, arch.irq_pending); - OFFSET(VCPU_DBELL_REQ, kvm_vcpu, arch.doorbell_request); OFFSET(VCPU_MMCR, kvm_vcpu, arch.mmcr); OFFSET(VCPU_MMCRA, kvm_vcpu, arch.mmcra); OFFSET(VCPU_MMCRS, kvm_vcpu, arch.mmcrs); @@ -486,8 +477,6 @@ int main(void) OFFSET(VCPU_TCSCR, kvm_vcpu, arch.tcscr); OFFSET(VCPU_ACOP, kvm_vcpu, arch.acop); OFFSET(VCPU_WORT, kvm_vcpu, arch.wort); - OFFSET(VCPU_TID, kvm_vcpu, arch.tid); - OFFSET(VCPU_PSSCR, kvm_vcpu, arch.psscr); OFFSET(VCPU_HFSCR, kvm_vcpu, arch.hfscr); OFFSET(VCORE_ENTRY_EXIT, kvmppc_vcore, entry_exit_map); OFFSET(VCORE_IN_GUEST, kvmppc_vcore, in_guest); @@ -582,8 +571,6 @@ int main(void) HSTATE_FIELD(HSTATE_HWTHREAD_STATE, hwthread_state); HSTATE_FIELD(HSTATE_KVM_VCPU, kvm_vcpu); HSTATE_FIELD(HSTATE_KVM_VCORE, kvm_vcore); - HSTATE_FIELD(HSTATE_XIVE_TIMA_PHYS, xive_tima_phys); - HSTATE_FIELD(HSTATE_XIVE_TIMA_VIRT, xive_tima_virt); HSTATE_FIELD(HSTATE_HOST_IPI, host_ipi); HSTATE_FIELD(HSTATE_PTID, ptid); HSTATE_FIELD(HSTATE_FAKE_SUSPEND, fake_suspend); @@ -594,9 +581,6 @@ int main(void) HSTATE_FIELD(HSTATE_SDAR, host_mmcr[4]); HSTATE_FIELD(HSTATE_MMCR2, host_mmcr[5]); HSTATE_FIELD(HSTATE_SIER, host_mmcr[6]); - HSTATE_FIELD(HSTATE_MMCR3, host_mmcr[7]); - HSTATE_FIELD(HSTATE_SIER2, host_mmcr[8]); - HSTATE_FIELD(HSTATE_SIER3, host_mmcr[9]); HSTATE_FIELD(HSTATE_PMC1, host_pmc[0]); HSTATE_FIELD(HSTATE_PMC2, host_pmc[1]); HSTATE_FIELD(HSTATE_PMC3, host_pmc[2]); @@ -609,7 +593,6 @@ int main(void) HSTATE_FIELD(HSTATE_DABR, dabr); HSTATE_FIELD(HSTATE_DECEXP, dec_expires); HSTATE_FIELD(HSTATE_SPLIT_MODE, kvm_split_mode); - DEFINE(IPI_PRIORITY, IPI_PRIORITY); OFFSET(KVM_SPLIT_RPR, kvm_split_mode, rpr); OFFSET(KVM_SPLIT_PMMAR, kvm_split_mode, pmmar); OFFSET(KVM_SPLIT_LDBAR, kvm_split_mode, ldbar); @@ -651,7 +634,7 @@ int main(void) DEFINE(PGD_T_LOG2, PGD_T_LOG2); DEFINE(PTE_T_LOG2, PTE_T_LOG2); #endif -#ifdef CONFIG_PPC_FSL_BOOK3E +#ifdef CONFIG_PPC_E500 DEFINE(TLBCAM_SIZE, sizeof(struct tlbcam)); OFFSET(TLBCAM_MAS0, tlbcam, MAS0); OFFSET(TLBCAM_MAS1, tlbcam, MAS1); @@ -672,17 +655,6 @@ int main(void) OFFSET(VCPU_HOST_MAS6, kvm_vcpu, arch.host_mas6); #endif -#ifdef CONFIG_KVM_XICS - DEFINE(VCPU_XIVE_SAVED_STATE, offsetof(struct kvm_vcpu, - arch.xive_saved_state)); - DEFINE(VCPU_XIVE_CAM_WORD, offsetof(struct kvm_vcpu, - arch.xive_cam_word)); - DEFINE(VCPU_XIVE_PUSHED, offsetof(struct kvm_vcpu, arch.xive_pushed)); - DEFINE(VCPU_XIVE_ESC_ON, offsetof(struct kvm_vcpu, arch.xive_esc_on)); - DEFINE(VCPU_XIVE_ESC_RADDR, offsetof(struct kvm_vcpu, arch.xive_esc_raddr)); - DEFINE(VCPU_XIVE_ESC_VADDR, offsetof(struct kvm_vcpu, arch.xive_esc_vaddr)); -#endif - #ifdef CONFIG_KVM_EXIT_TIMING OFFSET(VCPU_TIMING_EXIT_TBU, kvm_vcpu, arch.timing_exit.tv32.tbu); OFFSET(VCPU_TIMING_EXIT_TBL, kvm_vcpu, arch.timing_exit.tv32.tbl); @@ -700,5 +672,16 @@ int main(void) DEFINE(BPT_SIZE, BPT_SIZE); #endif +#ifdef CONFIG_PPC_FTRACE_OUT_OF_LINE + DEFINE(FTRACE_OOL_STUB_SIZE, sizeof(struct ftrace_ool_stub)); +#endif + +#ifdef CONFIG_DYNAMIC_FTRACE_WITH_CALL_OPS + OFFSET(FTRACE_OPS_FUNC, ftrace_ops, func); +#ifdef CONFIG_DYNAMIC_FTRACE_WITH_DIRECT_CALLS + OFFSET(FTRACE_OPS_DIRECT_CALL, ftrace_ops, direct_call); +#endif +#endif + return 0; } diff --git a/arch/powerpc/kernel/audit.c b/arch/powerpc/kernel/audit.c index 1bcfca5fdf67..92298d6a3a37 100644 --- a/arch/powerpc/kernel/audit.c +++ b/arch/powerpc/kernel/audit.c @@ -4,6 +4,8 @@ #include <linux/audit.h> #include <asm/unistd.h> +#include "audit_32.h" + static unsigned dir_class[] = { #include <asm-generic/audit_dir_write.h> ~0U @@ -41,7 +43,6 @@ int audit_classify_arch(int arch) int audit_classify_syscall(int abi, unsigned syscall) { #ifdef CONFIG_PPC64 - extern int ppc32_classify_syscall(unsigned); if (abi == AUDIT_ARCH_PPC) return ppc32_classify_syscall(syscall); #endif diff --git a/arch/powerpc/kernel/audit_32.h b/arch/powerpc/kernel/audit_32.h new file mode 100644 index 000000000000..c6c79c3041ab --- /dev/null +++ b/arch/powerpc/kernel/audit_32.h @@ -0,0 +1,7 @@ +// SPDX-License-Identifier: GPL-2.0 +#ifndef __AUDIT_32_H__ +#define __AUDIT_32_H__ + +extern int ppc32_classify_syscall(unsigned); + +#endif diff --git a/arch/powerpc/kernel/btext.c b/arch/powerpc/kernel/btext.c index 8f69bb07e500..7f63f1cdc6c3 100644 --- a/arch/powerpc/kernel/btext.c +++ b/arch/powerpc/kernel/btext.c @@ -8,6 +8,7 @@ #include <linux/string.h> #include <linux/init.h> #include <linux/export.h> +#include <linux/font.h> #include <linux/memblock.h> #include <linux/pgtable.h> #include <linux/of.h> @@ -41,10 +42,6 @@ static unsigned char *logicalDisplayBase __force_data; unsigned long disp_BAT[2] __initdata = {0, 0}; -#define cmapsz (16*256) - -static unsigned char vga_font[cmapsz]; - static int boot_text_mapped __force_data; extern void rmci_on(void); @@ -73,7 +70,7 @@ static inline void rmci_maybe_off(void) * the display during identify_machine() and MMU_Init() * * The display is mapped to virtual address 0xD0000000, rather - * than 1:1, because some some CHRP machines put the frame buffer + * than 1:1, because some CHRP machines put the frame buffer * in the region starting at 0xC0000000 (PAGE_OFFSET). * This mapping is temporary and will disappear as soon as the * setup done by MMU_Init() is applied. @@ -235,7 +232,7 @@ int __init btext_find_display(int allow_nonstdout) return rc; for_each_node_by_type(np, "display") { - if (of_get_property(np, "linux,opened", NULL)) { + if (of_property_read_bool(np, "linux,opened")) { printk("trying %pOF ...\n", np); rc = btext_initialize(np); printk("result: %d\n", rc); @@ -407,7 +404,7 @@ static unsigned int expand_bits_16[4] = { }; -static void draw_byte_32(unsigned char *font, unsigned int *base, int rb) +static void draw_byte_32(const unsigned char *font, unsigned int *base, int rb) { int l, bits; int fg = 0xFFFFFFFFUL; @@ -428,7 +425,7 @@ static void draw_byte_32(unsigned char *font, unsigned int *base, int rb) } } -static inline void draw_byte_16(unsigned char *font, unsigned int *base, int rb) +static inline void draw_byte_16(const unsigned char *font, unsigned int *base, int rb) { int l, bits; int fg = 0xFFFFFFFFUL; @@ -446,7 +443,7 @@ static inline void draw_byte_16(unsigned char *font, unsigned int *base, int rb) } } -static inline void draw_byte_8(unsigned char *font, unsigned int *base, int rb) +static inline void draw_byte_8(const unsigned char *font, unsigned int *base, int rb) { int l, bits; int fg = 0x0F0F0F0FUL; @@ -465,7 +462,8 @@ static inline void draw_byte_8(unsigned char *font, unsigned int *base, int rb) static noinline void draw_byte(unsigned char c, long locX, long locY) { unsigned char *base = calc_base(locX << 3, locY << 4); - unsigned char *font = &vga_font[((unsigned int)c) * 16]; + unsigned int font_index = c * 16; + const unsigned char *font = font_sun_8x16.data + font_index; int rb = dispDeviceRowBytes; rmci_maybe_on(); @@ -583,349 +581,3 @@ void __init udbg_init_btext(void) */ udbg_putc = btext_drawchar; } - -static unsigned char vga_font[cmapsz] = { -0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x7e, 0x81, 0xa5, 0x81, 0x81, 0xbd, -0x99, 0x81, 0x81, 0x7e, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x7e, 0xff, -0xdb, 0xff, 0xff, 0xc3, 0xe7, 0xff, 0xff, 0x7e, 0x00, 0x00, 0x00, 0x00, -0x00, 0x00, 0x00, 0x00, 0x6c, 0xfe, 0xfe, 0xfe, 0xfe, 0x7c, 0x38, 0x10, -0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x10, 0x38, 0x7c, 0xfe, -0x7c, 0x38, 0x10, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x18, -0x3c, 0x3c, 0xe7, 0xe7, 0xe7, 0x18, 0x18, 0x3c, 0x00, 0x00, 0x00, 0x00, -0x00, 0x00, 0x00, 0x18, 0x3c, 0x7e, 0xff, 0xff, 0x7e, 0x18, 0x18, 0x3c, -0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x18, 0x3c, -0x3c, 0x18, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xff, 0xff, 0xff, 0xff, -0xff, 0xff, 0xe7, 0xc3, 0xc3, 0xe7, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, -0x00, 0x00, 0x00, 0x00, 0x00, 0x3c, 0x66, 0x42, 0x42, 0x66, 0x3c, 0x00, -0x00, 0x00, 0x00, 0x00, 0xff, 0xff, 0xff, 0xff, 0xff, 0xc3, 0x99, 0xbd, -0xbd, 0x99, 0xc3, 0xff, 0xff, 0xff, 0xff, 0xff, 0x00, 0x00, 0x1e, 0x0e, -0x1a, 0x32, 0x78, 0xcc, 0xcc, 0xcc, 0xcc, 0x78, 0x00, 0x00, 0x00, 0x00, -0x00, 0x00, 0x3c, 0x66, 0x66, 0x66, 0x66, 0x3c, 0x18, 0x7e, 0x18, 0x18, -0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x3f, 0x33, 0x3f, 0x30, 0x30, 0x30, -0x30, 0x70, 0xf0, 0xe0, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x7f, 0x63, -0x7f, 0x63, 0x63, 0x63, 0x63, 0x67, 0xe7, 0xe6, 0xc0, 0x00, 0x00, 0x00, -0x00, 0x00, 0x00, 0x18, 0x18, 0xdb, 0x3c, 0xe7, 0x3c, 0xdb, 0x18, 0x18, -0x00, 0x00, 0x00, 0x00, 0x00, 0x80, 0xc0, 0xe0, 0xf0, 0xf8, 0xfe, 0xf8, -0xf0, 0xe0, 0xc0, 0x80, 0x00, 0x00, 0x00, 0x00, 0x00, 0x02, 0x06, 0x0e, -0x1e, 0x3e, 0xfe, 0x3e, 0x1e, 0x0e, 0x06, 0x02, 0x00, 0x00, 0x00, 0x00, -0x00, 0x00, 0x18, 0x3c, 0x7e, 0x18, 0x18, 0x18, 0x7e, 0x3c, 0x18, 0x00, -0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x66, 0x66, 0x66, 0x66, 0x66, 0x66, -0x66, 0x00, 0x66, 0x66, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x7f, 0xdb, -0xdb, 0xdb, 0x7b, 0x1b, 0x1b, 0x1b, 0x1b, 0x1b, 0x00, 0x00, 0x00, 0x00, -0x00, 0x7c, 0xc6, 0x60, 0x38, 0x6c, 0xc6, 0xc6, 0x6c, 0x38, 0x0c, 0xc6, -0x7c, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -0xfe, 0xfe, 0xfe, 0xfe, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x18, 0x3c, -0x7e, 0x18, 0x18, 0x18, 0x7e, 0x3c, 0x18, 0x7e, 0x00, 0x00, 0x00, 0x00, -0x00, 0x00, 0x18, 0x3c, 0x7e, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, -0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, -0x18, 0x7e, 0x3c, 0x18, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -0x00, 0x18, 0x0c, 0xfe, 0x0c, 0x18, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -0x00, 0x00, 0x00, 0x00, 0x00, 0x30, 0x60, 0xfe, 0x60, 0x30, 0x00, 0x00, -0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xc0, 0xc0, -0xc0, 0xfe, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -0x00, 0x24, 0x66, 0xff, 0x66, 0x24, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -0x00, 0x00, 0x00, 0x00, 0x10, 0x38, 0x38, 0x7c, 0x7c, 0xfe, 0xfe, 0x00, -0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xfe, 0xfe, 0x7c, 0x7c, -0x38, 0x38, 0x10, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -0x00, 0x00, 0x18, 0x3c, 0x3c, 0x3c, 0x18, 0x18, 0x18, 0x00, 0x18, 0x18, -0x00, 0x00, 0x00, 0x00, 0x00, 0x66, 0x66, 0x66, 0x24, 0x00, 0x00, 0x00, -0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x6c, -0x6c, 0xfe, 0x6c, 0x6c, 0x6c, 0xfe, 0x6c, 0x6c, 0x00, 0x00, 0x00, 0x00, -0x18, 0x18, 0x7c, 0xc6, 0xc2, 0xc0, 0x7c, 0x06, 0x06, 0x86, 0xc6, 0x7c, -0x18, 0x18, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xc2, 0xc6, 0x0c, 0x18, -0x30, 0x60, 0xc6, 0x86, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x38, 0x6c, -0x6c, 0x38, 0x76, 0xdc, 0xcc, 0xcc, 0xcc, 0x76, 0x00, 0x00, 0x00, 0x00, -0x00, 0x30, 0x30, 0x30, 0x60, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x0c, 0x18, 0x30, 0x30, 0x30, 0x30, -0x30, 0x30, 0x18, 0x0c, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x30, 0x18, -0x0c, 0x0c, 0x0c, 0x0c, 0x0c, 0x0c, 0x18, 0x30, 0x00, 0x00, 0x00, 0x00, -0x00, 0x00, 0x00, 0x00, 0x00, 0x66, 0x3c, 0xff, 0x3c, 0x66, 0x00, 0x00, -0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x18, 0x18, 0x7e, -0x18, 0x18, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -0x00, 0x00, 0x00, 0x00, 0x00, 0x18, 0x18, 0x18, 0x30, 0x00, 0x00, 0x00, -0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x7e, 0x00, 0x00, 0x00, 0x00, -0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -0x00, 0x00, 0x18, 0x18, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -0x02, 0x06, 0x0c, 0x18, 0x30, 0x60, 0xc0, 0x80, 0x00, 0x00, 0x00, 0x00, -0x00, 0x00, 0x7c, 0xc6, 0xc6, 0xce, 0xde, 0xf6, 0xe6, 0xc6, 0xc6, 0x7c, -0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x18, 0x38, 0x78, 0x18, 0x18, 0x18, -0x18, 0x18, 0x18, 0x7e, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x7c, 0xc6, -0x06, 0x0c, 0x18, 0x30, 0x60, 0xc0, 0xc6, 0xfe, 0x00, 0x00, 0x00, 0x00, -0x00, 0x00, 0x7c, 0xc6, 0x06, 0x06, 0x3c, 0x06, 0x06, 0x06, 0xc6, 0x7c, -0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x0c, 0x1c, 0x3c, 0x6c, 0xcc, 0xfe, -0x0c, 0x0c, 0x0c, 0x1e, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xfe, 0xc0, -0xc0, 0xc0, 0xfc, 0x06, 0x06, 0x06, 0xc6, 0x7c, 0x00, 0x00, 0x00, 0x00, -0x00, 0x00, 0x38, 0x60, 0xc0, 0xc0, 0xfc, 0xc6, 0xc6, 0xc6, 0xc6, 0x7c, -0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xfe, 0xc6, 0x06, 0x06, 0x0c, 0x18, -0x30, 0x30, 0x30, 0x30, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x7c, 0xc6, -0xc6, 0xc6, 0x7c, 0xc6, 0xc6, 0xc6, 0xc6, 0x7c, 0x00, 0x00, 0x00, 0x00, -0x00, 0x00, 0x7c, 0xc6, 0xc6, 0xc6, 0x7e, 0x06, 0x06, 0x06, 0x0c, 0x78, -0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x18, 0x18, 0x00, 0x00, -0x00, 0x18, 0x18, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -0x18, 0x18, 0x00, 0x00, 0x00, 0x18, 0x18, 0x30, 0x00, 0x00, 0x00, 0x00, -0x00, 0x00, 0x00, 0x06, 0x0c, 0x18, 0x30, 0x60, 0x30, 0x18, 0x0c, 0x06, -0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x7e, 0x00, 0x00, -0x7e, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x60, -0x30, 0x18, 0x0c, 0x06, 0x0c, 0x18, 0x30, 0x60, 0x00, 0x00, 0x00, 0x00, -0x00, 0x00, 0x7c, 0xc6, 0xc6, 0x0c, 0x18, 0x18, 0x18, 0x00, 0x18, 0x18, -0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x7c, 0xc6, 0xc6, 0xc6, 0xde, 0xde, -0xde, 0xdc, 0xc0, 0x7c, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x10, 0x38, -0x6c, 0xc6, 0xc6, 0xfe, 0xc6, 0xc6, 0xc6, 0xc6, 0x00, 0x00, 0x00, 0x00, -0x00, 0x00, 0xfc, 0x66, 0x66, 0x66, 0x7c, 0x66, 0x66, 0x66, 0x66, 0xfc, -0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x3c, 0x66, 0xc2, 0xc0, 0xc0, 0xc0, -0xc0, 0xc2, 0x66, 0x3c, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xf8, 0x6c, -0x66, 0x66, 0x66, 0x66, 0x66, 0x66, 0x6c, 0xf8, 0x00, 0x00, 0x00, 0x00, -0x00, 0x00, 0xfe, 0x66, 0x62, 0x68, 0x78, 0x68, 0x60, 0x62, 0x66, 0xfe, -0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xfe, 0x66, 0x62, 0x68, 0x78, 0x68, -0x60, 0x60, 0x60, 0xf0, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x3c, 0x66, -0xc2, 0xc0, 0xc0, 0xde, 0xc6, 0xc6, 0x66, 0x3a, 0x00, 0x00, 0x00, 0x00, -0x00, 0x00, 0xc6, 0xc6, 0xc6, 0xc6, 0xfe, 0xc6, 0xc6, 0xc6, 0xc6, 0xc6, -0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x3c, 0x18, 0x18, 0x18, 0x18, 0x18, -0x18, 0x18, 0x18, 0x3c, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x1e, 0x0c, -0x0c, 0x0c, 0x0c, 0x0c, 0xcc, 0xcc, 0xcc, 0x78, 0x00, 0x00, 0x00, 0x00, -0x00, 0x00, 0xe6, 0x66, 0x66, 0x6c, 0x78, 0x78, 0x6c, 0x66, 0x66, 0xe6, -0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xf0, 0x60, 0x60, 0x60, 0x60, 0x60, -0x60, 0x62, 0x66, 0xfe, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xc3, 0xe7, -0xff, 0xff, 0xdb, 0xc3, 0xc3, 0xc3, 0xc3, 0xc3, 0x00, 0x00, 0x00, 0x00, -0x00, 0x00, 0xc6, 0xe6, 0xf6, 0xfe, 0xde, 0xce, 0xc6, 0xc6, 0xc6, 0xc6, -0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x7c, 0xc6, 0xc6, 0xc6, 0xc6, 0xc6, -0xc6, 0xc6, 0xc6, 0x7c, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xfc, 0x66, -0x66, 0x66, 0x7c, 0x60, 0x60, 0x60, 0x60, 0xf0, 0x00, 0x00, 0x00, 0x00, -0x00, 0x00, 0x7c, 0xc6, 0xc6, 0xc6, 0xc6, 0xc6, 0xc6, 0xd6, 0xde, 0x7c, -0x0c, 0x0e, 0x00, 0x00, 0x00, 0x00, 0xfc, 0x66, 0x66, 0x66, 0x7c, 0x6c, -0x66, 0x66, 0x66, 0xe6, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x7c, 0xc6, -0xc6, 0x60, 0x38, 0x0c, 0x06, 0xc6, 0xc6, 0x7c, 0x00, 0x00, 0x00, 0x00, -0x00, 0x00, 0xff, 0xdb, 0x99, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x3c, -0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xc6, 0xc6, 0xc6, 0xc6, 0xc6, 0xc6, -0xc6, 0xc6, 0xc6, 0x7c, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xc3, 0xc3, -0xc3, 0xc3, 0xc3, 0xc3, 0xc3, 0x66, 0x3c, 0x18, 0x00, 0x00, 0x00, 0x00, -0x00, 0x00, 0xc3, 0xc3, 0xc3, 0xc3, 0xc3, 0xdb, 0xdb, 0xff, 0x66, 0x66, -0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xc3, 0xc3, 0x66, 0x3c, 0x18, 0x18, -0x3c, 0x66, 0xc3, 0xc3, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xc3, 0xc3, -0xc3, 0x66, 0x3c, 0x18, 0x18, 0x18, 0x18, 0x3c, 0x00, 0x00, 0x00, 0x00, -0x00, 0x00, 0xff, 0xc3, 0x86, 0x0c, 0x18, 0x30, 0x60, 0xc1, 0xc3, 0xff, -0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x3c, 0x30, 0x30, 0x30, 0x30, 0x30, -0x30, 0x30, 0x30, 0x3c, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x80, -0xc0, 0xe0, 0x70, 0x38, 0x1c, 0x0e, 0x06, 0x02, 0x00, 0x00, 0x00, 0x00, -0x00, 0x00, 0x3c, 0x0c, 0x0c, 0x0c, 0x0c, 0x0c, 0x0c, 0x0c, 0x0c, 0x3c, -0x00, 0x00, 0x00, 0x00, 0x10, 0x38, 0x6c, 0xc6, 0x00, 0x00, 0x00, 0x00, -0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xff, 0x00, 0x00, -0x30, 0x30, 0x18, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x78, 0x0c, 0x7c, -0xcc, 0xcc, 0xcc, 0x76, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xe0, 0x60, -0x60, 0x78, 0x6c, 0x66, 0x66, 0x66, 0x66, 0x7c, 0x00, 0x00, 0x00, 0x00, -0x00, 0x00, 0x00, 0x00, 0x00, 0x7c, 0xc6, 0xc0, 0xc0, 0xc0, 0xc6, 0x7c, -0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x1c, 0x0c, 0x0c, 0x3c, 0x6c, 0xcc, -0xcc, 0xcc, 0xcc, 0x76, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -0x00, 0x7c, 0xc6, 0xfe, 0xc0, 0xc0, 0xc6, 0x7c, 0x00, 0x00, 0x00, 0x00, -0x00, 0x00, 0x38, 0x6c, 0x64, 0x60, 0xf0, 0x60, 0x60, 0x60, 0x60, 0xf0, -0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x76, 0xcc, 0xcc, -0xcc, 0xcc, 0xcc, 0x7c, 0x0c, 0xcc, 0x78, 0x00, 0x00, 0x00, 0xe0, 0x60, -0x60, 0x6c, 0x76, 0x66, 0x66, 0x66, 0x66, 0xe6, 0x00, 0x00, 0x00, 0x00, -0x00, 0x00, 0x18, 0x18, 0x00, 0x38, 0x18, 0x18, 0x18, 0x18, 0x18, 0x3c, -0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x06, 0x06, 0x00, 0x0e, 0x06, 0x06, -0x06, 0x06, 0x06, 0x06, 0x66, 0x66, 0x3c, 0x00, 0x00, 0x00, 0xe0, 0x60, -0x60, 0x66, 0x6c, 0x78, 0x78, 0x6c, 0x66, 0xe6, 0x00, 0x00, 0x00, 0x00, -0x00, 0x00, 0x38, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x3c, -0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xe6, 0xff, 0xdb, -0xdb, 0xdb, 0xdb, 0xdb, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -0x00, 0xdc, 0x66, 0x66, 0x66, 0x66, 0x66, 0x66, 0x00, 0x00, 0x00, 0x00, -0x00, 0x00, 0x00, 0x00, 0x00, 0x7c, 0xc6, 0xc6, 0xc6, 0xc6, 0xc6, 0x7c, -0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xdc, 0x66, 0x66, -0x66, 0x66, 0x66, 0x7c, 0x60, 0x60, 0xf0, 0x00, 0x00, 0x00, 0x00, 0x00, -0x00, 0x76, 0xcc, 0xcc, 0xcc, 0xcc, 0xcc, 0x7c, 0x0c, 0x0c, 0x1e, 0x00, -0x00, 0x00, 0x00, 0x00, 0x00, 0xdc, 0x76, 0x66, 0x60, 0x60, 0x60, 0xf0, -0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x7c, 0xc6, 0x60, -0x38, 0x0c, 0xc6, 0x7c, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x10, 0x30, -0x30, 0xfc, 0x30, 0x30, 0x30, 0x30, 0x36, 0x1c, 0x00, 0x00, 0x00, 0x00, -0x00, 0x00, 0x00, 0x00, 0x00, 0xcc, 0xcc, 0xcc, 0xcc, 0xcc, 0xcc, 0x76, -0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xc3, 0xc3, 0xc3, -0xc3, 0x66, 0x3c, 0x18, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -0x00, 0xc3, 0xc3, 0xc3, 0xdb, 0xdb, 0xff, 0x66, 0x00, 0x00, 0x00, 0x00, -0x00, 0x00, 0x00, 0x00, 0x00, 0xc3, 0x66, 0x3c, 0x18, 0x3c, 0x66, 0xc3, -0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xc6, 0xc6, 0xc6, -0xc6, 0xc6, 0xc6, 0x7e, 0x06, 0x0c, 0xf8, 0x00, 0x00, 0x00, 0x00, 0x00, -0x00, 0xfe, 0xcc, 0x18, 0x30, 0x60, 0xc6, 0xfe, 0x00, 0x00, 0x00, 0x00, -0x00, 0x00, 0x0e, 0x18, 0x18, 0x18, 0x70, 0x18, 0x18, 0x18, 0x18, 0x0e, -0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x18, 0x18, 0x18, 0x18, 0x00, 0x18, -0x18, 0x18, 0x18, 0x18, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x70, 0x18, -0x18, 0x18, 0x0e, 0x18, 0x18, 0x18, 0x18, 0x70, 0x00, 0x00, 0x00, 0x00, -0x00, 0x00, 0x76, 0xdc, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x10, 0x38, 0x6c, 0xc6, -0xc6, 0xc6, 0xfe, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x3c, 0x66, -0xc2, 0xc0, 0xc0, 0xc0, 0xc2, 0x66, 0x3c, 0x0c, 0x06, 0x7c, 0x00, 0x00, -0x00, 0x00, 0xcc, 0x00, 0x00, 0xcc, 0xcc, 0xcc, 0xcc, 0xcc, 0xcc, 0x76, -0x00, 0x00, 0x00, 0x00, 0x00, 0x0c, 0x18, 0x30, 0x00, 0x7c, 0xc6, 0xfe, -0xc0, 0xc0, 0xc6, 0x7c, 0x00, 0x00, 0x00, 0x00, 0x00, 0x10, 0x38, 0x6c, -0x00, 0x78, 0x0c, 0x7c, 0xcc, 0xcc, 0xcc, 0x76, 0x00, 0x00, 0x00, 0x00, -0x00, 0x00, 0xcc, 0x00, 0x00, 0x78, 0x0c, 0x7c, 0xcc, 0xcc, 0xcc, 0x76, -0x00, 0x00, 0x00, 0x00, 0x00, 0x60, 0x30, 0x18, 0x00, 0x78, 0x0c, 0x7c, -0xcc, 0xcc, 0xcc, 0x76, 0x00, 0x00, 0x00, 0x00, 0x00, 0x38, 0x6c, 0x38, -0x00, 0x78, 0x0c, 0x7c, 0xcc, 0xcc, 0xcc, 0x76, 0x00, 0x00, 0x00, 0x00, -0x00, 0x00, 0x00, 0x00, 0x3c, 0x66, 0x60, 0x60, 0x66, 0x3c, 0x0c, 0x06, -0x3c, 0x00, 0x00, 0x00, 0x00, 0x10, 0x38, 0x6c, 0x00, 0x7c, 0xc6, 0xfe, -0xc0, 0xc0, 0xc6, 0x7c, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xc6, 0x00, -0x00, 0x7c, 0xc6, 0xfe, 0xc0, 0xc0, 0xc6, 0x7c, 0x00, 0x00, 0x00, 0x00, -0x00, 0x60, 0x30, 0x18, 0x00, 0x7c, 0xc6, 0xfe, 0xc0, 0xc0, 0xc6, 0x7c, -0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x66, 0x00, 0x00, 0x38, 0x18, 0x18, -0x18, 0x18, 0x18, 0x3c, 0x00, 0x00, 0x00, 0x00, 0x00, 0x18, 0x3c, 0x66, -0x00, 0x38, 0x18, 0x18, 0x18, 0x18, 0x18, 0x3c, 0x00, 0x00, 0x00, 0x00, -0x00, 0x60, 0x30, 0x18, 0x00, 0x38, 0x18, 0x18, 0x18, 0x18, 0x18, 0x3c, -0x00, 0x00, 0x00, 0x00, 0x00, 0xc6, 0x00, 0x10, 0x38, 0x6c, 0xc6, 0xc6, -0xfe, 0xc6, 0xc6, 0xc6, 0x00, 0x00, 0x00, 0x00, 0x38, 0x6c, 0x38, 0x00, -0x38, 0x6c, 0xc6, 0xc6, 0xfe, 0xc6, 0xc6, 0xc6, 0x00, 0x00, 0x00, 0x00, -0x18, 0x30, 0x60, 0x00, 0xfe, 0x66, 0x60, 0x7c, 0x60, 0x60, 0x66, 0xfe, -0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x6e, 0x3b, 0x1b, -0x7e, 0xd8, 0xdc, 0x77, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x3e, 0x6c, -0xcc, 0xcc, 0xfe, 0xcc, 0xcc, 0xcc, 0xcc, 0xce, 0x00, 0x00, 0x00, 0x00, -0x00, 0x10, 0x38, 0x6c, 0x00, 0x7c, 0xc6, 0xc6, 0xc6, 0xc6, 0xc6, 0x7c, -0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xc6, 0x00, 0x00, 0x7c, 0xc6, 0xc6, -0xc6, 0xc6, 0xc6, 0x7c, 0x00, 0x00, 0x00, 0x00, 0x00, 0x60, 0x30, 0x18, -0x00, 0x7c, 0xc6, 0xc6, 0xc6, 0xc6, 0xc6, 0x7c, 0x00, 0x00, 0x00, 0x00, -0x00, 0x30, 0x78, 0xcc, 0x00, 0xcc, 0xcc, 0xcc, 0xcc, 0xcc, 0xcc, 0x76, -0x00, 0x00, 0x00, 0x00, 0x00, 0x60, 0x30, 0x18, 0x00, 0xcc, 0xcc, 0xcc, -0xcc, 0xcc, 0xcc, 0x76, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xc6, 0x00, -0x00, 0xc6, 0xc6, 0xc6, 0xc6, 0xc6, 0xc6, 0x7e, 0x06, 0x0c, 0x78, 0x00, -0x00, 0xc6, 0x00, 0x7c, 0xc6, 0xc6, 0xc6, 0xc6, 0xc6, 0xc6, 0xc6, 0x7c, -0x00, 0x00, 0x00, 0x00, 0x00, 0xc6, 0x00, 0xc6, 0xc6, 0xc6, 0xc6, 0xc6, -0xc6, 0xc6, 0xc6, 0x7c, 0x00, 0x00, 0x00, 0x00, 0x00, 0x18, 0x18, 0x7e, -0xc3, 0xc0, 0xc0, 0xc0, 0xc3, 0x7e, 0x18, 0x18, 0x00, 0x00, 0x00, 0x00, -0x00, 0x38, 0x6c, 0x64, 0x60, 0xf0, 0x60, 0x60, 0x60, 0x60, 0xe6, 0xfc, -0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xc3, 0x66, 0x3c, 0x18, 0xff, 0x18, -0xff, 0x18, 0x18, 0x18, 0x00, 0x00, 0x00, 0x00, 0x00, 0xfc, 0x66, 0x66, -0x7c, 0x62, 0x66, 0x6f, 0x66, 0x66, 0x66, 0xf3, 0x00, 0x00, 0x00, 0x00, -0x00, 0x0e, 0x1b, 0x18, 0x18, 0x18, 0x7e, 0x18, 0x18, 0x18, 0x18, 0x18, -0xd8, 0x70, 0x00, 0x00, 0x00, 0x18, 0x30, 0x60, 0x00, 0x78, 0x0c, 0x7c, -0xcc, 0xcc, 0xcc, 0x76, 0x00, 0x00, 0x00, 0x00, 0x00, 0x0c, 0x18, 0x30, -0x00, 0x38, 0x18, 0x18, 0x18, 0x18, 0x18, 0x3c, 0x00, 0x00, 0x00, 0x00, -0x00, 0x18, 0x30, 0x60, 0x00, 0x7c, 0xc6, 0xc6, 0xc6, 0xc6, 0xc6, 0x7c, -0x00, 0x00, 0x00, 0x00, 0x00, 0x18, 0x30, 0x60, 0x00, 0xcc, 0xcc, 0xcc, -0xcc, 0xcc, 0xcc, 0x76, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x76, 0xdc, -0x00, 0xdc, 0x66, 0x66, 0x66, 0x66, 0x66, 0x66, 0x00, 0x00, 0x00, 0x00, -0x76, 0xdc, 0x00, 0xc6, 0xe6, 0xf6, 0xfe, 0xde, 0xce, 0xc6, 0xc6, 0xc6, -0x00, 0x00, 0x00, 0x00, 0x00, 0x3c, 0x6c, 0x6c, 0x3e, 0x00, 0x7e, 0x00, -0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x38, 0x6c, 0x6c, -0x38, 0x00, 0x7c, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -0x00, 0x00, 0x30, 0x30, 0x00, 0x30, 0x30, 0x60, 0xc0, 0xc6, 0xc6, 0x7c, -0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xfe, 0xc0, -0xc0, 0xc0, 0xc0, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -0x00, 0x00, 0xfe, 0x06, 0x06, 0x06, 0x06, 0x00, 0x00, 0x00, 0x00, 0x00, -0x00, 0xc0, 0xc0, 0xc2, 0xc6, 0xcc, 0x18, 0x30, 0x60, 0xce, 0x9b, 0x06, -0x0c, 0x1f, 0x00, 0x00, 0x00, 0xc0, 0xc0, 0xc2, 0xc6, 0xcc, 0x18, 0x30, -0x66, 0xce, 0x96, 0x3e, 0x06, 0x06, 0x00, 0x00, 0x00, 0x00, 0x18, 0x18, -0x00, 0x18, 0x18, 0x18, 0x3c, 0x3c, 0x3c, 0x18, 0x00, 0x00, 0x00, 0x00, -0x00, 0x00, 0x00, 0x00, 0x00, 0x36, 0x6c, 0xd8, 0x6c, 0x36, 0x00, 0x00, -0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xd8, 0x6c, 0x36, -0x6c, 0xd8, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x11, 0x44, 0x11, 0x44, -0x11, 0x44, 0x11, 0x44, 0x11, 0x44, 0x11, 0x44, 0x11, 0x44, 0x11, 0x44, -0x55, 0xaa, 0x55, 0xaa, 0x55, 0xaa, 0x55, 0xaa, 0x55, 0xaa, 0x55, 0xaa, -0x55, 0xaa, 0x55, 0xaa, 0xdd, 0x77, 0xdd, 0x77, 0xdd, 0x77, 0xdd, 0x77, -0xdd, 0x77, 0xdd, 0x77, 0xdd, 0x77, 0xdd, 0x77, 0x18, 0x18, 0x18, 0x18, -0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, -0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0xf8, 0x18, 0x18, 0x18, 0x18, -0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0xf8, 0x18, 0xf8, -0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x36, 0x36, 0x36, 0x36, -0x36, 0x36, 0x36, 0xf6, 0x36, 0x36, 0x36, 0x36, 0x36, 0x36, 0x36, 0x36, -0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xfe, 0x36, 0x36, 0x36, 0x36, -0x36, 0x36, 0x36, 0x36, 0x00, 0x00, 0x00, 0x00, 0x00, 0xf8, 0x18, 0xf8, -0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x36, 0x36, 0x36, 0x36, -0x36, 0xf6, 0x06, 0xf6, 0x36, 0x36, 0x36, 0x36, 0x36, 0x36, 0x36, 0x36, -0x36, 0x36, 0x36, 0x36, 0x36, 0x36, 0x36, 0x36, 0x36, 0x36, 0x36, 0x36, -0x36, 0x36, 0x36, 0x36, 0x00, 0x00, 0x00, 0x00, 0x00, 0xfe, 0x06, 0xf6, -0x36, 0x36, 0x36, 0x36, 0x36, 0x36, 0x36, 0x36, 0x36, 0x36, 0x36, 0x36, -0x36, 0xf6, 0x06, 0xfe, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -0x36, 0x36, 0x36, 0x36, 0x36, 0x36, 0x36, 0xfe, 0x00, 0x00, 0x00, 0x00, -0x00, 0x00, 0x00, 0x00, 0x18, 0x18, 0x18, 0x18, 0x18, 0xf8, 0x18, 0xf8, -0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -0x00, 0x00, 0x00, 0xf8, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, -0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x1f, 0x00, 0x00, 0x00, 0x00, -0x00, 0x00, 0x00, 0x00, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0xff, -0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -0x00, 0x00, 0x00, 0xff, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, -0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x1f, 0x18, 0x18, 0x18, 0x18, -0x18, 0x18, 0x18, 0x18, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xff, -0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x18, 0x18, 0x18, 0x18, -0x18, 0x18, 0x18, 0xff, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, -0x18, 0x18, 0x18, 0x18, 0x18, 0x1f, 0x18, 0x1f, 0x18, 0x18, 0x18, 0x18, -0x18, 0x18, 0x18, 0x18, 0x36, 0x36, 0x36, 0x36, 0x36, 0x36, 0x36, 0x37, -0x36, 0x36, 0x36, 0x36, 0x36, 0x36, 0x36, 0x36, 0x36, 0x36, 0x36, 0x36, -0x36, 0x37, 0x30, 0x3f, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -0x00, 0x00, 0x00, 0x00, 0x00, 0x3f, 0x30, 0x37, 0x36, 0x36, 0x36, 0x36, -0x36, 0x36, 0x36, 0x36, 0x36, 0x36, 0x36, 0x36, 0x36, 0xf7, 0x00, 0xff, -0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -0x00, 0xff, 0x00, 0xf7, 0x36, 0x36, 0x36, 0x36, 0x36, 0x36, 0x36, 0x36, -0x36, 0x36, 0x36, 0x36, 0x36, 0x37, 0x30, 0x37, 0x36, 0x36, 0x36, 0x36, -0x36, 0x36, 0x36, 0x36, 0x00, 0x00, 0x00, 0x00, 0x00, 0xff, 0x00, 0xff, -0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x36, 0x36, 0x36, 0x36, -0x36, 0xf7, 0x00, 0xf7, 0x36, 0x36, 0x36, 0x36, 0x36, 0x36, 0x36, 0x36, -0x18, 0x18, 0x18, 0x18, 0x18, 0xff, 0x00, 0xff, 0x00, 0x00, 0x00, 0x00, -0x00, 0x00, 0x00, 0x00, 0x36, 0x36, 0x36, 0x36, 0x36, 0x36, 0x36, 0xff, -0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -0x00, 0xff, 0x00, 0xff, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, -0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xff, 0x36, 0x36, 0x36, 0x36, -0x36, 0x36, 0x36, 0x36, 0x36, 0x36, 0x36, 0x36, 0x36, 0x36, 0x36, 0x3f, -0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x18, 0x18, 0x18, 0x18, -0x18, 0x1f, 0x18, 0x1f, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -0x00, 0x00, 0x00, 0x00, 0x00, 0x1f, 0x18, 0x1f, 0x18, 0x18, 0x18, 0x18, -0x18, 0x18, 0x18, 0x18, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x3f, -0x36, 0x36, 0x36, 0x36, 0x36, 0x36, 0x36, 0x36, 0x36, 0x36, 0x36, 0x36, -0x36, 0x36, 0x36, 0xff, 0x36, 0x36, 0x36, 0x36, 0x36, 0x36, 0x36, 0x36, -0x18, 0x18, 0x18, 0x18, 0x18, 0xff, 0x18, 0xff, 0x18, 0x18, 0x18, 0x18, -0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0xf8, -0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -0x00, 0x00, 0x00, 0x1f, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, -0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, -0xff, 0xff, 0xff, 0xff, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xff, -0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xf0, 0xf0, 0xf0, 0xf0, -0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0, -0x0f, 0x0f, 0x0f, 0x0f, 0x0f, 0x0f, 0x0f, 0x0f, 0x0f, 0x0f, 0x0f, 0x0f, -0x0f, 0x0f, 0x0f, 0x0f, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0x00, -0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -0x00, 0x76, 0xdc, 0xd8, 0xd8, 0xd8, 0xdc, 0x76, 0x00, 0x00, 0x00, 0x00, -0x00, 0x00, 0x78, 0xcc, 0xcc, 0xcc, 0xd8, 0xcc, 0xc6, 0xc6, 0xc6, 0xcc, -0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xfe, 0xc6, 0xc6, 0xc0, 0xc0, 0xc0, -0xc0, 0xc0, 0xc0, 0xc0, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -0xfe, 0x6c, 0x6c, 0x6c, 0x6c, 0x6c, 0x6c, 0x6c, 0x00, 0x00, 0x00, 0x00, -0x00, 0x00, 0x00, 0xfe, 0xc6, 0x60, 0x30, 0x18, 0x30, 0x60, 0xc6, 0xfe, -0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x7e, 0xd8, 0xd8, -0xd8, 0xd8, 0xd8, 0x70, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -0x66, 0x66, 0x66, 0x66, 0x66, 0x7c, 0x60, 0x60, 0xc0, 0x00, 0x00, 0x00, -0x00, 0x00, 0x00, 0x00, 0x76, 0xdc, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, -0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x7e, 0x18, 0x3c, 0x66, 0x66, -0x66, 0x3c, 0x18, 0x7e, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x38, -0x6c, 0xc6, 0xc6, 0xfe, 0xc6, 0xc6, 0x6c, 0x38, 0x00, 0x00, 0x00, 0x00, -0x00, 0x00, 0x38, 0x6c, 0xc6, 0xc6, 0xc6, 0x6c, 0x6c, 0x6c, 0x6c, 0xee, -0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x1e, 0x30, 0x18, 0x0c, 0x3e, 0x66, -0x66, 0x66, 0x66, 0x3c, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -0x00, 0x7e, 0xdb, 0xdb, 0xdb, 0x7e, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -0x00, 0x00, 0x00, 0x03, 0x06, 0x7e, 0xdb, 0xdb, 0xf3, 0x7e, 0x60, 0xc0, -0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x1c, 0x30, 0x60, 0x60, 0x7c, 0x60, -0x60, 0x60, 0x30, 0x1c, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x7c, -0xc6, 0xc6, 0xc6, 0xc6, 0xc6, 0xc6, 0xc6, 0xc6, 0x00, 0x00, 0x00, 0x00, -0x00, 0x00, 0x00, 0x00, 0xfe, 0x00, 0x00, 0xfe, 0x00, 0x00, 0xfe, 0x00, -0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x18, 0x18, 0x7e, 0x18, -0x18, 0x00, 0x00, 0xff, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x30, -0x18, 0x0c, 0x06, 0x0c, 0x18, 0x30, 0x00, 0x7e, 0x00, 0x00, 0x00, 0x00, -0x00, 0x00, 0x00, 0x0c, 0x18, 0x30, 0x60, 0x30, 0x18, 0x0c, 0x00, 0x7e, -0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x0e, 0x1b, 0x1b, 0x1b, 0x18, 0x18, -0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, -0x18, 0x18, 0x18, 0x18, 0xd8, 0xd8, 0xd8, 0x70, 0x00, 0x00, 0x00, 0x00, -0x00, 0x00, 0x00, 0x00, 0x18, 0x18, 0x00, 0x7e, 0x00, 0x18, 0x18, 0x00, -0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x76, 0xdc, 0x00, -0x76, 0xdc, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x38, 0x6c, 0x6c, -0x38, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x18, 0x18, 0x00, 0x00, 0x00, -0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -0x18, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x0f, 0x0c, 0x0c, -0x0c, 0x0c, 0x0c, 0xec, 0x6c, 0x6c, 0x3c, 0x1c, 0x00, 0x00, 0x00, 0x00, -0x00, 0xd8, 0x6c, 0x6c, 0x6c, 0x6c, 0x6c, 0x00, 0x00, 0x00, 0x00, 0x00, -0x00, 0x00, 0x00, 0x00, 0x00, 0x70, 0xd8, 0x30, 0x60, 0xc8, 0xf8, 0x00, -0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -0x7c, 0x7c, 0x7c, 0x7c, 0x7c, 0x7c, 0x7c, 0x00, 0x00, 0x00, 0x00, 0x00, -0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -0x00, 0x00, 0x00, 0x00, -}; - diff --git a/arch/powerpc/kernel/cacheinfo.c b/arch/powerpc/kernel/cacheinfo.c index f502337dd37d..0fcc463b02e2 100644 --- a/arch/powerpc/kernel/cacheinfo.c +++ b/arch/powerpc/kernel/cacheinfo.c @@ -735,7 +735,7 @@ static const struct sysfs_ops cache_index_ops = { .show = cache_index_show, }; -static struct kobj_type cache_index_type = { +static const struct kobj_type cache_index_type = { .release = cache_index_release, .sysfs_ops = &cache_index_ops, .default_groups = cache_index_default_groups, diff --git a/arch/powerpc/kernel/compat_audit.c b/arch/powerpc/kernel/compat_audit.c index d92ffe4e5dc1..57b38c592b9f 100644 --- a/arch/powerpc/kernel/compat_audit.c +++ b/arch/powerpc/kernel/compat_audit.c @@ -3,6 +3,8 @@ #include <linux/audit_arch.h> #include <asm/unistd.h> +#include "audit_32.h" + unsigned ppc32_dir_class[] = { #include <asm-generic/audit_dir_write.h> ~0U diff --git a/arch/powerpc/kernel/cpu_setup_6xx.S b/arch/powerpc/kernel/cpu_setup_6xx.S index f8b5ff64b604..ab3ca74e6730 100644 --- a/arch/powerpc/kernel/cpu_setup_6xx.S +++ b/arch/powerpc/kernel/cpu_setup_6xx.S @@ -4,6 +4,8 @@ * Copyright (C) 2003 Benjamin Herrenschmidt (benh@kernel.crashing.org) */ +#include <linux/linkage.h> + #include <asm/processor.h> #include <asm/page.h> #include <asm/cputable.h> @@ -24,6 +26,15 @@ BEGIN_FTR_SECTION bl __init_fpu_registers END_FTR_SECTION_IFCLR(CPU_FTR_FPU_UNAVAILABLE) bl setup_common_caches + + /* + * This assumes that all cores using __setup_cpu_603 with + * MMU_FTR_USE_HIGH_BATS are G2_LE compatible + */ +BEGIN_MMU_FTR_SECTION + bl setup_g2_le_hid2 +END_MMU_FTR_SECTION_IFSET(MMU_FTR_USE_HIGH_BATS) + mtlr r5 blr _GLOBAL(__setup_cpu_604) @@ -81,7 +92,7 @@ _GLOBAL(__setup_cpu_745x) blr /* Enable caches for 603's, 604, 750 & 7400 */ -setup_common_caches: +SYM_FUNC_START_LOCAL(setup_common_caches) mfspr r11,SPRN_HID0 andi. r0,r11,HID0_DCE ori r11,r11,HID0_ICE|HID0_DCE @@ -95,11 +106,12 @@ setup_common_caches: sync isync blr +SYM_FUNC_END(setup_common_caches) /* 604, 604e, 604ev, ... * Enable superscalar execution & branch history table */ -setup_604_hid0: +SYM_FUNC_START_LOCAL(setup_604_hid0) mfspr r11,SPRN_HID0 ori r11,r11,HID0_SIED|HID0_BHTE ori r8,r11,HID0_BTCD @@ -110,6 +122,17 @@ setup_604_hid0: sync isync blr +SYM_FUNC_END(setup_604_hid0) + +/* Enable high BATs for G2_LE and derivatives like e300cX */ +SYM_FUNC_START_LOCAL(setup_g2_le_hid2) + mfspr r11,SPRN_HID2_G2_LE + oris r11,r11,HID2_G2_LE_HBE@h + mtspr SPRN_HID2_G2_LE,r11 + sync + isync + blr +SYM_FUNC_END(setup_g2_le_hid2) /* 7400 <= rev 2.7 and 7410 rev = 1.0 suffer from some * erratas we work around here. @@ -125,13 +148,14 @@ setup_604_hid0: * needed once we have applied workaround #5 (though it's * not set by Apple's firmware at least). */ -setup_7400_workarounds: +SYM_FUNC_START_LOCAL(setup_7400_workarounds) mfpvr r3 rlwinm r3,r3,0,20,31 cmpwi 0,r3,0x0207 ble 1f blr -setup_7410_workarounds: +SYM_FUNC_END(setup_7400_workarounds) +SYM_FUNC_START_LOCAL(setup_7410_workarounds) mfpvr r3 rlwinm r3,r3,0,20,31 cmpwi 0,r3,0x0100 @@ -151,6 +175,7 @@ setup_7410_workarounds: sync isync blr +SYM_FUNC_END(setup_7410_workarounds) /* 740/750/7400/7410 * Enable Store Gathering (SGE), Address Broadcast (ABE), @@ -158,7 +183,7 @@ setup_7410_workarounds: * Dynamic Power Management (DPM), Speculative (SPD) * Clear Instruction cache throttling (ICTC) */ -setup_750_7400_hid0: +SYM_FUNC_START_LOCAL(setup_750_7400_hid0) mfspr r11,SPRN_HID0 ori r11,r11,HID0_SGE | HID0_ABE | HID0_BHTE | HID0_BTIC oris r11,r11,HID0_DPM@h @@ -177,12 +202,13 @@ END_FTR_SECTION_IFSET(CPU_FTR_NO_DPM) sync isync blr +SYM_FUNC_END(setup_750_7400_hid0) /* 750cx specific * Looks like we have to disable NAP feature for some PLL settings... * (waiting for confirmation) */ -setup_750cx: +SYM_FUNC_START_LOCAL(setup_750cx) mfspr r10, SPRN_HID1 rlwinm r10,r10,4,28,31 cmpwi cr0,r10,7 @@ -196,11 +222,13 @@ setup_750cx: andc r6,r6,r7 stw r6,CPU_SPEC_FEATURES(r4) blr +SYM_FUNC_END(setup_750cx) /* 750fx specific */ -setup_750fx: +SYM_FUNC_START_LOCAL(setup_750fx) blr +SYM_FUNC_END(setup_750fx) /* MPC 745x * Enable Store Gathering (SGE), Branch Folding (FOLD) @@ -212,7 +240,7 @@ setup_750fx: * Clear Instruction cache throttling (ICTC) * Enable L2 HW prefetch */ -setup_745x_specifics: +SYM_FUNC_START_LOCAL(setup_745x_specifics) /* We check for the presence of an L3 cache setup by * the firmware. If any, we disable NAP capability as * it's known to be bogus on rev 2.1 and earlier @@ -270,6 +298,7 @@ END_FTR_SECTION_IFSET(CPU_FTR_NO_DPM) sync isync blr +SYM_FUNC_END(setup_745x_specifics) /* * Initialize the FPU registers. This is needed to work around an errata @@ -372,7 +401,7 @@ _GLOBAL(__save_cpu_setup) andi. r3,r3,0xff00 cmpwi cr0,r3,0x0200 bne 1f - mfspr r4,SPRN_HID2 + mfspr r4,SPRN_HID2_750FX stw r4,CS_HID2(r5) 1: mtcr r7 @@ -467,7 +496,7 @@ _GLOBAL(__restore_cpu_setup) bne 4f lwz r4,CS_HID2(r5) rlwinm r4,r4,0,19,17 - mtspr SPRN_HID2,r4 + mtspr SPRN_HID2_750FX,r4 sync 4: lwz r4,CS_HID1(r5) @@ -485,4 +514,3 @@ _GLOBAL(__restore_cpu_setup) mtcr r7 blr _ASM_NOKPROBE_SYMBOL(__restore_cpu_setup) - diff --git a/arch/powerpc/kernel/cpu_setup_fsl_booke.S b/arch/powerpc/kernel/cpu_setup_e500.S index 4bf33f1b4193..077cfccc3461 100644 --- a/arch/powerpc/kernel/cpu_setup_fsl_booke.S +++ b/arch/powerpc/kernel/cpu_setup_e500.S @@ -8,11 +8,13 @@ * Benjamin Herrenschmidt <benh@kernel.crashing.org> */ +#include <linux/linkage.h> + #include <asm/page.h> #include <asm/processor.h> #include <asm/cputable.h> #include <asm/ppc_asm.h> -#include <asm/nohash/mmu-book3e.h> +#include <asm/nohash/mmu-e500.h> #include <asm/asm-offsets.h> #include <asm/mpc85xx.h> @@ -108,7 +110,7 @@ _GLOBAL(__setup_cpu_e6500) #endif /* CONFIG_PPC_E500MC */ #ifdef CONFIG_PPC32 -#ifdef CONFIG_E500 +#ifdef CONFIG_PPC_E500 #ifndef CONFIG_PPC_E500MC _GLOBAL(__setup_cpu_e500v1) _GLOBAL(__setup_cpu_e500v2) @@ -156,7 +158,7 @@ _GLOBAL(__setup_cpu_e5500) mtlr r5 blr #endif /* CONFIG_PPC_E500MC */ -#endif /* CONFIG_E500 */ +#endif /* CONFIG_PPC_E500 */ #endif /* CONFIG_PPC32 */ #ifdef CONFIG_PPC_BOOK3E_64 @@ -274,7 +276,7 @@ _GLOBAL(flush_dcache_L1) blr -has_L2_cache: +SYM_FUNC_START_LOCAL(has_L2_cache) /* skip L2 cache on P2040/P2040E as they have no L2 cache */ mfspr r3, SPRN_SVR /* shift right by 8 bits and clear E bit of SVR */ @@ -290,9 +292,10 @@ has_L2_cache: 1: li r3, 0 blr +SYM_FUNC_END(has_L2_cache) /* flush backside L2 cache */ -flush_backside_L2_cache: +SYM_FUNC_START_LOCAL(flush_backside_L2_cache) mflr r10 bl has_L2_cache mtlr r10 @@ -313,6 +316,7 @@ flush_backside_L2_cache: bne 1b 2: blr +SYM_FUNC_END(flush_backside_L2_cache) _GLOBAL(cpu_down_flush_e500v2) mflr r0 diff --git a/arch/powerpc/kernel/cpu_setup_power.c b/arch/powerpc/kernel/cpu_setup_power.c index 3dc61e203f37..98bd4e6c1770 100644 --- a/arch/powerpc/kernel/cpu_setup_power.c +++ b/arch/powerpc/kernel/cpu_setup_power.c @@ -11,7 +11,7 @@ #include <asm/synch.h> #include <linux/bitops.h> #include <asm/cputable.h> -#include <asm/cpu_setup_power.h> +#include <asm/cpu_setup.h> /* Disable CPU_FTR_HVMODE and return false if MSR:HV is not set */ static bool init_hvmode_206(struct cpu_spec *t) @@ -126,6 +126,12 @@ static void init_PMU_ISA31(void) mtspr(SPRN_MMCR0, MMCR0_FC | MMCR0_PMCCEXT); } +static void init_DEXCR(void) +{ + mtspr(SPRN_DEXCR, DEXCR_INIT); + mtspr(SPRN_HASHKEYR, 0); +} + /* * Note that we can be called twice of pseudo-PVRs. * The parameter offset is not used. @@ -241,6 +247,7 @@ void __setup_cpu_power10(unsigned long offset, struct cpu_spec *t) init_FSCR_power10(); init_PMU(); init_PMU_ISA31(); + init_DEXCR(); if (!init_hvmode_206(t)) return; @@ -263,6 +270,7 @@ void __restore_cpu_power10(void) init_FSCR_power10(); init_PMU(); init_PMU_ISA31(); + init_DEXCR(); msr = mfmsr(); if (!(msr & MSR_HV)) diff --git a/arch/powerpc/kernel/cpu_specs.h b/arch/powerpc/kernel/cpu_specs.h new file mode 100644 index 000000000000..5ea14605bb41 --- /dev/null +++ b/arch/powerpc/kernel/cpu_specs.h @@ -0,0 +1,25 @@ +/* SPDX-License-Identifier: GPL-2.0-or-later */ + +#ifdef CONFIG_PPC_47x +#include "cpu_specs_47x.h" +#elif defined(CONFIG_44x) +#include "cpu_specs_44x.h" +#endif + +#ifdef CONFIG_PPC_8xx +#include "cpu_specs_8xx.h" +#endif + +#ifdef CONFIG_PPC_E500MC +#include "cpu_specs_e500mc.h" +#elif defined(CONFIG_PPC_85xx) +#include "cpu_specs_85xx.h" +#endif + +#ifdef CONFIG_PPC_BOOK3S_32 +#include "cpu_specs_book3s_32.h" +#endif + +#ifdef CONFIG_PPC_BOOK3S_64 +#include "cpu_specs_book3s_64.h" +#endif diff --git a/arch/powerpc/kernel/cpu_specs_44x.h b/arch/powerpc/kernel/cpu_specs_44x.h new file mode 100644 index 000000000000..69c4cdc0cdee --- /dev/null +++ b/arch/powerpc/kernel/cpu_specs_44x.h @@ -0,0 +1,304 @@ +/* SPDX-License-Identifier: GPL-2.0-or-later */ +/* + * Copyright (C) 2001 Ben. Herrenschmidt (benh@kernel.crashing.org) + */ + +#define COMMON_USER_BOOKE (PPC_FEATURE_32 | PPC_FEATURE_HAS_MMU | \ + PPC_FEATURE_BOOKE) + +static struct cpu_spec cpu_specs[] __initdata = { + { + .pvr_mask = 0xf0000fff, + .pvr_value = 0x40000850, + .cpu_name = "440GR Rev. A", + .cpu_features = CPU_FTRS_44X, + .cpu_user_features = COMMON_USER_BOOKE, + .mmu_features = MMU_FTR_TYPE_44x, + .icache_bsize = 32, + .dcache_bsize = 32, + .machine_check = machine_check_4xx, + .platform = "ppc440", + }, + { /* Use logical PVR for 440EP (logical pvr = pvr | 0x8) */ + .pvr_mask = 0xf0000fff, + .pvr_value = 0x40000858, + .cpu_name = "440EP Rev. A", + .cpu_features = CPU_FTRS_44X, + .cpu_user_features = COMMON_USER_BOOKE | PPC_FEATURE_HAS_FPU, + .mmu_features = MMU_FTR_TYPE_44x, + .icache_bsize = 32, + .dcache_bsize = 32, + .cpu_setup = __setup_cpu_440ep, + .machine_check = machine_check_4xx, + .platform = "ppc440", + }, + { + .pvr_mask = 0xf0000fff, + .pvr_value = 0x400008d3, + .cpu_name = "440GR Rev. B", + .cpu_features = CPU_FTRS_44X, + .cpu_user_features = COMMON_USER_BOOKE | PPC_FEATURE_HAS_FPU, + .mmu_features = MMU_FTR_TYPE_44x, + .icache_bsize = 32, + .dcache_bsize = 32, + .machine_check = machine_check_4xx, + .platform = "ppc440", + }, + { /* Matches both physical and logical PVR for 440EP (logical pvr = pvr | 0x8) */ + .pvr_mask = 0xf0000ff7, + .pvr_value = 0x400008d4, + .cpu_name = "440EP Rev. C", + .cpu_features = CPU_FTRS_44X, + .cpu_user_features = COMMON_USER_BOOKE | PPC_FEATURE_HAS_FPU, + .mmu_features = MMU_FTR_TYPE_44x, + .icache_bsize = 32, + .dcache_bsize = 32, + .cpu_setup = __setup_cpu_440ep, + .machine_check = machine_check_4xx, + .platform = "ppc440", + }, + { /* Use logical PVR for 440EP (logical pvr = pvr | 0x8) */ + .pvr_mask = 0xf0000fff, + .pvr_value = 0x400008db, + .cpu_name = "440EP Rev. B", + .cpu_features = CPU_FTRS_44X, + .cpu_user_features = COMMON_USER_BOOKE | PPC_FEATURE_HAS_FPU, + .mmu_features = MMU_FTR_TYPE_44x, + .icache_bsize = 32, + .dcache_bsize = 32, + .cpu_setup = __setup_cpu_440ep, + .machine_check = machine_check_4xx, + .platform = "ppc440", + }, + { /* 440GRX */ + .pvr_mask = 0xf0000ffb, + .pvr_value = 0x200008D0, + .cpu_name = "440GRX", + .cpu_features = CPU_FTRS_44X, + .cpu_user_features = COMMON_USER_BOOKE, + .mmu_features = MMU_FTR_TYPE_44x, + .icache_bsize = 32, + .dcache_bsize = 32, + .cpu_setup = __setup_cpu_440grx, + .machine_check = machine_check_440A, + .platform = "ppc440", + }, + { /* Use logical PVR for 440EPx (logical pvr = pvr | 0x8) */ + .pvr_mask = 0xf0000ffb, + .pvr_value = 0x200008D8, + .cpu_name = "440EPX", + .cpu_features = CPU_FTRS_44X, + .cpu_user_features = COMMON_USER_BOOKE | PPC_FEATURE_HAS_FPU, + .mmu_features = MMU_FTR_TYPE_44x, + .icache_bsize = 32, + .dcache_bsize = 32, + .cpu_setup = __setup_cpu_440epx, + .machine_check = machine_check_440A, + .platform = "ppc440", + }, + { /* 440GP Rev. B */ + .pvr_mask = 0xf0000fff, + .pvr_value = 0x40000440, + .cpu_name = "440GP Rev. B", + .cpu_features = CPU_FTRS_44X, + .cpu_user_features = COMMON_USER_BOOKE, + .mmu_features = MMU_FTR_TYPE_44x, + .icache_bsize = 32, + .dcache_bsize = 32, + .machine_check = machine_check_4xx, + .platform = "ppc440gp", + }, + { /* 440GP Rev. C */ + .pvr_mask = 0xf0000fff, + .pvr_value = 0x40000481, + .cpu_name = "440GP Rev. C", + .cpu_features = CPU_FTRS_44X, + .cpu_user_features = COMMON_USER_BOOKE, + .mmu_features = MMU_FTR_TYPE_44x, + .icache_bsize = 32, + .dcache_bsize = 32, + .machine_check = machine_check_4xx, + .platform = "ppc440gp", + }, + { /* 440GX Rev. A */ + .pvr_mask = 0xf0000fff, + .pvr_value = 0x50000850, + .cpu_name = "440GX Rev. A", + .cpu_features = CPU_FTRS_44X, + .cpu_user_features = COMMON_USER_BOOKE, + .mmu_features = MMU_FTR_TYPE_44x, + .icache_bsize = 32, + .dcache_bsize = 32, + .cpu_setup = __setup_cpu_440gx, + .machine_check = machine_check_440A, + .platform = "ppc440", + }, + { /* 440GX Rev. B */ + .pvr_mask = 0xf0000fff, + .pvr_value = 0x50000851, + .cpu_name = "440GX Rev. B", + .cpu_features = CPU_FTRS_44X, + .cpu_user_features = COMMON_USER_BOOKE, + .mmu_features = MMU_FTR_TYPE_44x, + .icache_bsize = 32, + .dcache_bsize = 32, + .cpu_setup = __setup_cpu_440gx, + .machine_check = machine_check_440A, + .platform = "ppc440", + }, + { /* 440GX Rev. C */ + .pvr_mask = 0xf0000fff, + .pvr_value = 0x50000892, + .cpu_name = "440GX Rev. C", + .cpu_features = CPU_FTRS_44X, + .cpu_user_features = COMMON_USER_BOOKE, + .mmu_features = MMU_FTR_TYPE_44x, + .icache_bsize = 32, + .dcache_bsize = 32, + .cpu_setup = __setup_cpu_440gx, + .machine_check = machine_check_440A, + .platform = "ppc440", + }, + { /* 440GX Rev. F */ + .pvr_mask = 0xf0000fff, + .pvr_value = 0x50000894, + .cpu_name = "440GX Rev. F", + .cpu_features = CPU_FTRS_44X, + .cpu_user_features = COMMON_USER_BOOKE, + .mmu_features = MMU_FTR_TYPE_44x, + .icache_bsize = 32, + .dcache_bsize = 32, + .cpu_setup = __setup_cpu_440gx, + .machine_check = machine_check_440A, + .platform = "ppc440", + }, + { /* 440SP Rev. A */ + .pvr_mask = 0xfff00fff, + .pvr_value = 0x53200891, + .cpu_name = "440SP Rev. A", + .cpu_features = CPU_FTRS_44X, + .cpu_user_features = COMMON_USER_BOOKE, + .mmu_features = MMU_FTR_TYPE_44x, + .icache_bsize = 32, + .dcache_bsize = 32, + .machine_check = machine_check_4xx, + .platform = "ppc440", + }, + { /* 440SPe Rev. A */ + .pvr_mask = 0xfff00fff, + .pvr_value = 0x53400890, + .cpu_name = "440SPe Rev. A", + .cpu_features = CPU_FTRS_44X, + .cpu_user_features = COMMON_USER_BOOKE, + .mmu_features = MMU_FTR_TYPE_44x, + .icache_bsize = 32, + .dcache_bsize = 32, + .cpu_setup = __setup_cpu_440spe, + .machine_check = machine_check_440A, + .platform = "ppc440", + }, + { /* 440SPe Rev. B */ + .pvr_mask = 0xfff00fff, + .pvr_value = 0x53400891, + .cpu_name = "440SPe Rev. B", + .cpu_features = CPU_FTRS_44X, + .cpu_user_features = COMMON_USER_BOOKE, + .mmu_features = MMU_FTR_TYPE_44x, + .icache_bsize = 32, + .dcache_bsize = 32, + .cpu_setup = __setup_cpu_440spe, + .machine_check = machine_check_440A, + .platform = "ppc440", + }, + { /* 460EX */ + .pvr_mask = 0xffff0006, + .pvr_value = 0x13020002, + .cpu_name = "460EX", + .cpu_features = CPU_FTRS_440x6, + .cpu_user_features = COMMON_USER_BOOKE | PPC_FEATURE_HAS_FPU, + .mmu_features = MMU_FTR_TYPE_44x, + .icache_bsize = 32, + .dcache_bsize = 32, + .cpu_setup = __setup_cpu_460ex, + .machine_check = machine_check_440A, + .platform = "ppc440", + }, + { /* 460EX Rev B */ + .pvr_mask = 0xffff0007, + .pvr_value = 0x13020004, + .cpu_name = "460EX Rev. B", + .cpu_features = CPU_FTRS_440x6, + .cpu_user_features = COMMON_USER_BOOKE | PPC_FEATURE_HAS_FPU, + .mmu_features = MMU_FTR_TYPE_44x, + .icache_bsize = 32, + .dcache_bsize = 32, + .cpu_setup = __setup_cpu_460ex, + .machine_check = machine_check_440A, + .platform = "ppc440", + }, + { /* 460GT */ + .pvr_mask = 0xffff0006, + .pvr_value = 0x13020000, + .cpu_name = "460GT", + .cpu_features = CPU_FTRS_440x6, + .cpu_user_features = COMMON_USER_BOOKE | PPC_FEATURE_HAS_FPU, + .mmu_features = MMU_FTR_TYPE_44x, + .icache_bsize = 32, + .dcache_bsize = 32, + .cpu_setup = __setup_cpu_460gt, + .machine_check = machine_check_440A, + .platform = "ppc440", + }, + { /* 460GT Rev B */ + .pvr_mask = 0xffff0007, + .pvr_value = 0x13020005, + .cpu_name = "460GT Rev. B", + .cpu_features = CPU_FTRS_440x6, + .cpu_user_features = COMMON_USER_BOOKE | PPC_FEATURE_HAS_FPU, + .mmu_features = MMU_FTR_TYPE_44x, + .icache_bsize = 32, + .dcache_bsize = 32, + .cpu_setup = __setup_cpu_460gt, + .machine_check = machine_check_440A, + .platform = "ppc440", + }, + { /* 460SX */ + .pvr_mask = 0xffffff00, + .pvr_value = 0x13541800, + .cpu_name = "460SX", + .cpu_features = CPU_FTRS_44X, + .cpu_user_features = COMMON_USER_BOOKE, + .mmu_features = MMU_FTR_TYPE_44x, + .icache_bsize = 32, + .dcache_bsize = 32, + .cpu_setup = __setup_cpu_460sx, + .machine_check = machine_check_440A, + .platform = "ppc440", + }, + { /* 464 in APM821xx */ + .pvr_mask = 0xfffffff0, + .pvr_value = 0x12C41C80, + .cpu_name = "APM821XX", + .cpu_features = CPU_FTRS_44X, + .cpu_user_features = COMMON_USER_BOOKE | + PPC_FEATURE_HAS_FPU, + .mmu_features = MMU_FTR_TYPE_44x, + .icache_bsize = 32, + .dcache_bsize = 32, + .cpu_setup = __setup_cpu_apm821xx, + .machine_check = machine_check_440A, + .platform = "ppc440", + }, + { /* default match */ + .pvr_mask = 0x00000000, + .pvr_value = 0x00000000, + .cpu_name = "(generic 44x PPC)", + .cpu_features = CPU_FTRS_44X, + .cpu_user_features = COMMON_USER_BOOKE, + .mmu_features = MMU_FTR_TYPE_44x, + .icache_bsize = 32, + .dcache_bsize = 32, + .machine_check = machine_check_4xx, + .platform = "ppc440", + } +}; diff --git a/arch/powerpc/kernel/cpu_specs_47x.h b/arch/powerpc/kernel/cpu_specs_47x.h new file mode 100644 index 000000000000..3143cd504a51 --- /dev/null +++ b/arch/powerpc/kernel/cpu_specs_47x.h @@ -0,0 +1,74 @@ +/* SPDX-License-Identifier: GPL-2.0-or-later */ +/* + * Copyright (C) 2001 Ben. Herrenschmidt (benh@kernel.crashing.org) + */ + +#define COMMON_USER_BOOKE (PPC_FEATURE_32 | PPC_FEATURE_HAS_MMU | \ + PPC_FEATURE_BOOKE) + +static struct cpu_spec cpu_specs[] __initdata = { + { /* 476 DD2 core */ + .pvr_mask = 0xffffffff, + .pvr_value = 0x11a52080, + .cpu_name = "476", + .cpu_features = CPU_FTRS_47X | CPU_FTR_476_DD2, + .cpu_user_features = COMMON_USER_BOOKE | PPC_FEATURE_HAS_FPU, + .mmu_features = MMU_FTR_TYPE_47x | MMU_FTR_USE_TLBIVAX_BCAST | + MMU_FTR_LOCK_BCAST_INVAL, + .icache_bsize = 32, + .dcache_bsize = 128, + .machine_check = machine_check_47x, + .platform = "ppc470", + }, + { /* 476fpe */ + .pvr_mask = 0xffff0000, + .pvr_value = 0x7ff50000, + .cpu_name = "476fpe", + .cpu_features = CPU_FTRS_47X | CPU_FTR_476_DD2, + .cpu_user_features = COMMON_USER_BOOKE | PPC_FEATURE_HAS_FPU, + .mmu_features = MMU_FTR_TYPE_47x | MMU_FTR_USE_TLBIVAX_BCAST | + MMU_FTR_LOCK_BCAST_INVAL, + .icache_bsize = 32, + .dcache_bsize = 128, + .machine_check = machine_check_47x, + .platform = "ppc470", + }, + { /* 476 iss */ + .pvr_mask = 0xffff0000, + .pvr_value = 0x00050000, + .cpu_name = "476", + .cpu_features = CPU_FTRS_47X, + .cpu_user_features = COMMON_USER_BOOKE | PPC_FEATURE_HAS_FPU, + .mmu_features = MMU_FTR_TYPE_47x | MMU_FTR_USE_TLBIVAX_BCAST | + MMU_FTR_LOCK_BCAST_INVAL, + .icache_bsize = 32, + .dcache_bsize = 128, + .machine_check = machine_check_47x, + .platform = "ppc470", + }, + { /* 476 others */ + .pvr_mask = 0xffff0000, + .pvr_value = 0x11a50000, + .cpu_name = "476", + .cpu_features = CPU_FTRS_47X, + .cpu_user_features = COMMON_USER_BOOKE | PPC_FEATURE_HAS_FPU, + .mmu_features = MMU_FTR_TYPE_47x | MMU_FTR_USE_TLBIVAX_BCAST | + MMU_FTR_LOCK_BCAST_INVAL, + .icache_bsize = 32, + .dcache_bsize = 128, + .machine_check = machine_check_47x, + .platform = "ppc470", + }, + { /* default match */ + .pvr_mask = 0x00000000, + .pvr_value = 0x00000000, + .cpu_name = "(generic 47x PPC)", + .cpu_features = CPU_FTRS_47X, + .cpu_user_features = COMMON_USER_BOOKE, + .mmu_features = MMU_FTR_TYPE_47x, + .icache_bsize = 32, + .dcache_bsize = 128, + .machine_check = machine_check_47x, + .platform = "ppc470", + } +}; diff --git a/arch/powerpc/kernel/cpu_specs_85xx.h b/arch/powerpc/kernel/cpu_specs_85xx.h new file mode 100644 index 000000000000..aaae202c1a89 --- /dev/null +++ b/arch/powerpc/kernel/cpu_specs_85xx.h @@ -0,0 +1,57 @@ +/* SPDX-License-Identifier: GPL-2.0-or-later */ +/* + * Copyright (C) 2001 Ben. Herrenschmidt (benh@kernel.crashing.org) + */ + +#define COMMON_USER_BOOKE (PPC_FEATURE_32 | PPC_FEATURE_HAS_MMU | \ + PPC_FEATURE_BOOKE) + +static struct cpu_spec cpu_specs[] __initdata = { + { /* e500 */ + .pvr_mask = 0xffff0000, + .pvr_value = 0x80200000, + .cpu_name = "e500", + .cpu_features = CPU_FTRS_E500, + .cpu_user_features = COMMON_USER_BOOKE | PPC_FEATURE_HAS_SPE_COMP | + PPC_FEATURE_HAS_EFP_SINGLE_COMP, + .cpu_user_features2 = PPC_FEATURE2_ISEL, + .mmu_features = MMU_FTR_TYPE_FSL_E, + .icache_bsize = 32, + .dcache_bsize = 32, + .num_pmcs = 4, + .cpu_setup = __setup_cpu_e500v1, + .machine_check = machine_check_e500, + .platform = "ppc8540", + }, + { /* e500v2 */ + .pvr_mask = 0xffff0000, + .pvr_value = 0x80210000, + .cpu_name = "e500v2", + .cpu_features = CPU_FTRS_E500_2, + .cpu_user_features = COMMON_USER_BOOKE | PPC_FEATURE_HAS_SPE_COMP | + PPC_FEATURE_HAS_EFP_SINGLE_COMP | + PPC_FEATURE_HAS_EFP_DOUBLE_COMP, + .cpu_user_features2 = PPC_FEATURE2_ISEL, + .mmu_features = MMU_FTR_TYPE_FSL_E | MMU_FTR_BIG_PHYS, + .icache_bsize = 32, + .dcache_bsize = 32, + .num_pmcs = 4, + .cpu_setup = __setup_cpu_e500v2, + .machine_check = machine_check_e500, + .platform = "ppc8548", + .cpu_down_flush = cpu_down_flush_e500v2, + }, + { /* default match */ + .pvr_mask = 0x00000000, + .pvr_value = 0x00000000, + .cpu_name = "(generic E500 PPC)", + .cpu_features = CPU_FTRS_E500, + .cpu_user_features = COMMON_USER_BOOKE | PPC_FEATURE_HAS_SPE_COMP | + PPC_FEATURE_HAS_EFP_SINGLE_COMP, + .mmu_features = MMU_FTR_TYPE_FSL_E, + .icache_bsize = 32, + .dcache_bsize = 32, + .machine_check = machine_check_e500, + .platform = "powerpc", + } +}; diff --git a/arch/powerpc/kernel/cpu_specs_8xx.h b/arch/powerpc/kernel/cpu_specs_8xx.h new file mode 100644 index 000000000000..93ddbc202ba3 --- /dev/null +++ b/arch/powerpc/kernel/cpu_specs_8xx.h @@ -0,0 +1,23 @@ +/* SPDX-License-Identifier: GPL-2.0-or-later */ +/* + * Copyright (C) 2001 Ben. Herrenschmidt (benh@kernel.crashing.org) + */ + +static struct cpu_spec cpu_specs[] __initdata = { + { /* 8xx */ + .pvr_mask = 0xffff0000, + .pvr_value = PVR_8xx, + .cpu_name = "8xx", + /* + * CPU_FTR_MAYBE_CAN_DOZE is possible, + * if the 8xx code is there.... + */ + .cpu_features = CPU_FTRS_8XX, + .cpu_user_features = PPC_FEATURE_32 | PPC_FEATURE_HAS_MMU, + .mmu_features = MMU_FTR_TYPE_8xx, + .icache_bsize = 16, + .dcache_bsize = 16, + .machine_check = machine_check_8xx, + .platform = "ppc823", + }, +}; diff --git a/arch/powerpc/kernel/cpu_specs_book3s_32.h b/arch/powerpc/kernel/cpu_specs_book3s_32.h new file mode 100644 index 000000000000..3714634d194a --- /dev/null +++ b/arch/powerpc/kernel/cpu_specs_book3s_32.h @@ -0,0 +1,605 @@ +/* SPDX-License-Identifier: GPL-2.0-or-later */ +/* + * Copyright (C) 2001 Ben. Herrenschmidt (benh@kernel.crashing.org) + */ + +#define COMMON_USER (PPC_FEATURE_32 | PPC_FEATURE_HAS_FPU | \ + PPC_FEATURE_HAS_MMU) + +static struct cpu_spec cpu_specs[] __initdata = { +#ifdef CONFIG_PPC_BOOK3S_603 + { /* 603 */ + .pvr_mask = 0xffff0000, + .pvr_value = 0x00030000, + .cpu_name = "603", + .cpu_features = CPU_FTRS_603, + .cpu_user_features = COMMON_USER | PPC_FEATURE_PPC_LE, + .mmu_features = 0, + .icache_bsize = 32, + .dcache_bsize = 32, + .cpu_setup = __setup_cpu_603, + .machine_check = machine_check_generic, + .platform = "ppc603", + }, + { /* 603e */ + .pvr_mask = 0xffff0000, + .pvr_value = 0x00060000, + .cpu_name = "603e", + .cpu_features = CPU_FTRS_603, + .cpu_user_features = COMMON_USER | PPC_FEATURE_PPC_LE, + .mmu_features = 0, + .icache_bsize = 32, + .dcache_bsize = 32, + .cpu_setup = __setup_cpu_603, + .machine_check = machine_check_generic, + .platform = "ppc603", + }, + { /* 603ev */ + .pvr_mask = 0xffff0000, + .pvr_value = 0x00070000, + .cpu_name = "603ev", + .cpu_features = CPU_FTRS_603, + .cpu_user_features = COMMON_USER | PPC_FEATURE_PPC_LE, + .mmu_features = 0, + .icache_bsize = 32, + .dcache_bsize = 32, + .cpu_setup = __setup_cpu_603, + .machine_check = machine_check_generic, + .platform = "ppc603", + }, + { /* 82xx (8240, 8245, 8260 are all 603e cores) */ + .pvr_mask = 0x7fff0000, + .pvr_value = 0x00810000, + .cpu_name = "82xx", + .cpu_features = CPU_FTRS_82XX, + .cpu_user_features = COMMON_USER, + .mmu_features = 0, + .icache_bsize = 32, + .dcache_bsize = 32, + .cpu_setup = __setup_cpu_603, + .machine_check = machine_check_generic, + .platform = "ppc603", + }, + { /* All G2_LE (603e core, plus some) have the same pvr */ + .pvr_mask = 0x7fff0000, + .pvr_value = 0x00820000, + .cpu_name = "G2_LE", + .cpu_features = CPU_FTRS_G2_LE, + .cpu_user_features = COMMON_USER, + .mmu_features = MMU_FTR_USE_HIGH_BATS, + .icache_bsize = 32, + .dcache_bsize = 32, + .cpu_setup = __setup_cpu_603, + .machine_check = machine_check_generic, + .platform = "ppc603", + }, +#ifdef CONFIG_PPC_83xx + { /* e300c1 (a 603e core, plus some) on 83xx */ + .pvr_mask = 0x7fff0000, + .pvr_value = 0x00830000, + .cpu_name = "e300c1", + .cpu_features = CPU_FTRS_E300, + .cpu_user_features = COMMON_USER, + .mmu_features = MMU_FTR_USE_HIGH_BATS, + .icache_bsize = 32, + .dcache_bsize = 32, + .cpu_setup = __setup_cpu_603, + .machine_check = machine_check_83xx, + .platform = "ppc603", + }, + { /* e300c2 (an e300c1 core, plus some, minus FPU) on 83xx */ + .pvr_mask = 0x7fff0000, + .pvr_value = 0x00840000, + .cpu_name = "e300c2", + .cpu_features = CPU_FTRS_E300C2, + .cpu_user_features = PPC_FEATURE_32 | PPC_FEATURE_HAS_MMU, + .mmu_features = MMU_FTR_USE_HIGH_BATS | MMU_FTR_NEED_DTLB_SW_LRU, + .icache_bsize = 32, + .dcache_bsize = 32, + .cpu_setup = __setup_cpu_603, + .machine_check = machine_check_83xx, + .platform = "ppc603", + }, + { /* e300c3 (e300c1, plus one IU, half cache size) on 83xx */ + .pvr_mask = 0x7fff0000, + .pvr_value = 0x00850000, + .cpu_name = "e300c3", + .cpu_features = CPU_FTRS_E300, + .cpu_user_features = COMMON_USER, + .mmu_features = MMU_FTR_USE_HIGH_BATS | MMU_FTR_NEED_DTLB_SW_LRU, + .icache_bsize = 32, + .dcache_bsize = 32, + .cpu_setup = __setup_cpu_603, + .machine_check = machine_check_83xx, + .num_pmcs = 4, + .platform = "ppc603", + }, + { /* e300c4 (e300c1, plus one IU) */ + .pvr_mask = 0x7fff0000, + .pvr_value = 0x00860000, + .cpu_name = "e300c4", + .cpu_features = CPU_FTRS_E300, + .cpu_user_features = COMMON_USER, + .mmu_features = MMU_FTR_USE_HIGH_BATS | MMU_FTR_NEED_DTLB_SW_LRU, + .icache_bsize = 32, + .dcache_bsize = 32, + .cpu_setup = __setup_cpu_603, + .machine_check = machine_check_83xx, + .num_pmcs = 4, + .platform = "ppc603", + }, +#endif +#endif /* CONFIG_PPC_BOOK3S_603 */ +#ifdef CONFIG_PPC_BOOK3S_604 + { /* 604 */ + .pvr_mask = 0xffff0000, + .pvr_value = 0x00040000, + .cpu_name = "604", + .cpu_features = CPU_FTRS_604, + .cpu_user_features = COMMON_USER | PPC_FEATURE_PPC_LE, + .mmu_features = MMU_FTR_HPTE_TABLE, + .icache_bsize = 32, + .dcache_bsize = 32, + .num_pmcs = 2, + .cpu_setup = __setup_cpu_604, + .machine_check = machine_check_generic, + .platform = "ppc604", + }, + { /* 604e */ + .pvr_mask = 0xfffff000, + .pvr_value = 0x00090000, + .cpu_name = "604e", + .cpu_features = CPU_FTRS_604, + .cpu_user_features = COMMON_USER | PPC_FEATURE_PPC_LE, + .mmu_features = MMU_FTR_HPTE_TABLE, + .icache_bsize = 32, + .dcache_bsize = 32, + .num_pmcs = 4, + .cpu_setup = __setup_cpu_604, + .machine_check = machine_check_generic, + .platform = "ppc604", + }, + { /* 604r */ + .pvr_mask = 0xffff0000, + .pvr_value = 0x00090000, + .cpu_name = "604r", + .cpu_features = CPU_FTRS_604, + .cpu_user_features = COMMON_USER | PPC_FEATURE_PPC_LE, + .mmu_features = MMU_FTR_HPTE_TABLE, + .icache_bsize = 32, + .dcache_bsize = 32, + .num_pmcs = 4, + .cpu_setup = __setup_cpu_604, + .machine_check = machine_check_generic, + .platform = "ppc604", + }, + { /* 604ev */ + .pvr_mask = 0xffff0000, + .pvr_value = 0x000a0000, + .cpu_name = "604ev", + .cpu_features = CPU_FTRS_604, + .cpu_user_features = COMMON_USER | PPC_FEATURE_PPC_LE, + .mmu_features = MMU_FTR_HPTE_TABLE, + .icache_bsize = 32, + .dcache_bsize = 32, + .num_pmcs = 4, + .cpu_setup = __setup_cpu_604, + .machine_check = machine_check_generic, + .platform = "ppc604", + }, + { /* 740/750 (0x4202, don't support TAU ?) */ + .pvr_mask = 0xffffffff, + .pvr_value = 0x00084202, + .cpu_name = "740/750", + .cpu_features = CPU_FTRS_740_NOTAU, + .cpu_user_features = COMMON_USER | PPC_FEATURE_PPC_LE, + .mmu_features = MMU_FTR_HPTE_TABLE, + .icache_bsize = 32, + .dcache_bsize = 32, + .num_pmcs = 4, + .cpu_setup = __setup_cpu_750, + .machine_check = machine_check_generic, + .platform = "ppc750", + }, + { /* 750CX (80100 and 8010x?) */ + .pvr_mask = 0xfffffff0, + .pvr_value = 0x00080100, + .cpu_name = "750CX", + .cpu_features = CPU_FTRS_750, + .cpu_user_features = COMMON_USER | PPC_FEATURE_PPC_LE, + .mmu_features = MMU_FTR_HPTE_TABLE, + .icache_bsize = 32, + .dcache_bsize = 32, + .num_pmcs = 4, + .cpu_setup = __setup_cpu_750cx, + .machine_check = machine_check_generic, + .platform = "ppc750", + }, + { /* 750CX (82201 and 82202) */ + .pvr_mask = 0xfffffff0, + .pvr_value = 0x00082200, + .cpu_name = "750CX", + .cpu_features = CPU_FTRS_750, + .cpu_user_features = COMMON_USER | PPC_FEATURE_PPC_LE, + .mmu_features = MMU_FTR_HPTE_TABLE, + .icache_bsize = 32, + .dcache_bsize = 32, + .num_pmcs = 4, + .pmc_type = PPC_PMC_IBM, + .cpu_setup = __setup_cpu_750cx, + .machine_check = machine_check_generic, + .platform = "ppc750", + }, + { /* 750CXe (82214) */ + .pvr_mask = 0xfffffff0, + .pvr_value = 0x00082210, + .cpu_name = "750CXe", + .cpu_features = CPU_FTRS_750, + .cpu_user_features = COMMON_USER | PPC_FEATURE_PPC_LE, + .mmu_features = MMU_FTR_HPTE_TABLE, + .icache_bsize = 32, + .dcache_bsize = 32, + .num_pmcs = 4, + .pmc_type = PPC_PMC_IBM, + .cpu_setup = __setup_cpu_750cx, + .machine_check = machine_check_generic, + .platform = "ppc750", + }, + { /* 750CXe "Gekko" (83214) */ + .pvr_mask = 0xffffffff, + .pvr_value = 0x00083214, + .cpu_name = "750CXe", + .cpu_features = CPU_FTRS_750, + .cpu_user_features = COMMON_USER | PPC_FEATURE_PPC_LE, + .mmu_features = MMU_FTR_HPTE_TABLE, + .icache_bsize = 32, + .dcache_bsize = 32, + .num_pmcs = 4, + .pmc_type = PPC_PMC_IBM, + .cpu_setup = __setup_cpu_750cx, + .machine_check = machine_check_generic, + .platform = "ppc750", + }, + { /* 750CL (and "Broadway") */ + .pvr_mask = 0xfffff0e0, + .pvr_value = 0x00087000, + .cpu_name = "750CL", + .cpu_features = CPU_FTRS_750CL, + .cpu_user_features = COMMON_USER | PPC_FEATURE_PPC_LE, + .mmu_features = MMU_FTR_HPTE_TABLE | MMU_FTR_USE_HIGH_BATS, + .icache_bsize = 32, + .dcache_bsize = 32, + .num_pmcs = 4, + .pmc_type = PPC_PMC_IBM, + .cpu_setup = __setup_cpu_750, + .machine_check = machine_check_generic, + .platform = "ppc750", + }, + { /* 745/755 */ + .pvr_mask = 0xfffff000, + .pvr_value = 0x00083000, + .cpu_name = "745/755", + .cpu_features = CPU_FTRS_750, + .cpu_user_features = COMMON_USER | PPC_FEATURE_PPC_LE, + .mmu_features = MMU_FTR_HPTE_TABLE, + .icache_bsize = 32, + .dcache_bsize = 32, + .num_pmcs = 4, + .pmc_type = PPC_PMC_IBM, + .cpu_setup = __setup_cpu_750, + .machine_check = machine_check_generic, + .platform = "ppc750", + }, + { /* 750FX rev 1.x */ + .pvr_mask = 0xffffff00, + .pvr_value = 0x70000100, + .cpu_name = "750FX", + .cpu_features = CPU_FTRS_750FX1, + .cpu_user_features = COMMON_USER | PPC_FEATURE_PPC_LE, + .mmu_features = MMU_FTR_HPTE_TABLE, + .icache_bsize = 32, + .dcache_bsize = 32, + .num_pmcs = 4, + .pmc_type = PPC_PMC_IBM, + .cpu_setup = __setup_cpu_750, + .machine_check = machine_check_generic, + .platform = "ppc750", + }, + { /* 750FX rev 2.0 must disable HID0[DPM] */ + .pvr_mask = 0xffffffff, + .pvr_value = 0x70000200, + .cpu_name = "750FX", + .cpu_features = CPU_FTRS_750FX2, + .cpu_user_features = COMMON_USER | PPC_FEATURE_PPC_LE, + .mmu_features = MMU_FTR_HPTE_TABLE, + .icache_bsize = 32, + .dcache_bsize = 32, + .num_pmcs = 4, + .pmc_type = PPC_PMC_IBM, + .cpu_setup = __setup_cpu_750, + .machine_check = machine_check_generic, + .platform = "ppc750", + }, + { /* 750FX (All revs except 2.0) */ + .pvr_mask = 0xffff0000, + .pvr_value = 0x70000000, + .cpu_name = "750FX", + .cpu_features = CPU_FTRS_750FX, + .cpu_user_features = COMMON_USER | PPC_FEATURE_PPC_LE, + .mmu_features = MMU_FTR_HPTE_TABLE | MMU_FTR_USE_HIGH_BATS, + .icache_bsize = 32, + .dcache_bsize = 32, + .num_pmcs = 4, + .pmc_type = PPC_PMC_IBM, + .cpu_setup = __setup_cpu_750fx, + .machine_check = machine_check_generic, + .platform = "ppc750", + }, + { /* 750GX */ + .pvr_mask = 0xffff0000, + .pvr_value = 0x70020000, + .cpu_name = "750GX", + .cpu_features = CPU_FTRS_750GX, + .cpu_user_features = COMMON_USER | PPC_FEATURE_PPC_LE, + .mmu_features = MMU_FTR_HPTE_TABLE | MMU_FTR_USE_HIGH_BATS, + .icache_bsize = 32, + .dcache_bsize = 32, + .num_pmcs = 4, + .pmc_type = PPC_PMC_IBM, + .cpu_setup = __setup_cpu_750fx, + .machine_check = machine_check_generic, + .platform = "ppc750", + }, + { /* 740/750 (L2CR bit need fixup for 740) */ + .pvr_mask = 0xffff0000, + .pvr_value = 0x00080000, + .cpu_name = "740/750", + .cpu_features = CPU_FTRS_740, + .cpu_user_features = COMMON_USER | PPC_FEATURE_PPC_LE, + .mmu_features = MMU_FTR_HPTE_TABLE, + .icache_bsize = 32, + .dcache_bsize = 32, + .num_pmcs = 4, + .pmc_type = PPC_PMC_IBM, + .cpu_setup = __setup_cpu_750, + .machine_check = machine_check_generic, + .platform = "ppc750", + }, + { /* 7400 rev 1.1 ? (no TAU) */ + .pvr_mask = 0xffffffff, + .pvr_value = 0x000c1101, + .cpu_name = "7400 (1.1)", + .cpu_features = CPU_FTRS_7400_NOTAU, + .cpu_user_features = COMMON_USER | PPC_FEATURE_HAS_ALTIVEC_COMP | + PPC_FEATURE_PPC_LE, + .mmu_features = MMU_FTR_HPTE_TABLE, + .icache_bsize = 32, + .dcache_bsize = 32, + .num_pmcs = 4, + .pmc_type = PPC_PMC_G4, + .cpu_setup = __setup_cpu_7400, + .machine_check = machine_check_generic, + .platform = "ppc7400", + }, + { /* 7400 */ + .pvr_mask = 0xffff0000, + .pvr_value = 0x000c0000, + .cpu_name = "7400", + .cpu_features = CPU_FTRS_7400, + .cpu_user_features = COMMON_USER | PPC_FEATURE_HAS_ALTIVEC_COMP | + PPC_FEATURE_PPC_LE, + .mmu_features = MMU_FTR_HPTE_TABLE, + .icache_bsize = 32, + .dcache_bsize = 32, + .num_pmcs = 4, + .pmc_type = PPC_PMC_G4, + .cpu_setup = __setup_cpu_7400, + .machine_check = machine_check_generic, + .platform = "ppc7400", + }, + { /* 7410 */ + .pvr_mask = 0xffff0000, + .pvr_value = 0x800c0000, + .cpu_name = "7410", + .cpu_features = CPU_FTRS_7400, + .cpu_user_features = COMMON_USER | PPC_FEATURE_HAS_ALTIVEC_COMP | + PPC_FEATURE_PPC_LE, + .mmu_features = MMU_FTR_HPTE_TABLE, + .icache_bsize = 32, + .dcache_bsize = 32, + .num_pmcs = 4, + .pmc_type = PPC_PMC_G4, + .cpu_setup = __setup_cpu_7410, + .machine_check = machine_check_generic, + .platform = "ppc7400", + }, + { /* 7450 2.0 - no doze/nap */ + .pvr_mask = 0xffffffff, + .pvr_value = 0x80000200, + .cpu_name = "7450", + .cpu_features = CPU_FTRS_7450_20, + .cpu_user_features = COMMON_USER | PPC_FEATURE_HAS_ALTIVEC_COMP | + PPC_FEATURE_PPC_LE, + .mmu_features = MMU_FTR_HPTE_TABLE, + .icache_bsize = 32, + .dcache_bsize = 32, + .num_pmcs = 6, + .pmc_type = PPC_PMC_G4, + .cpu_setup = __setup_cpu_745x, + .machine_check = machine_check_generic, + .platform = "ppc7450", + }, + { /* 7450 2.1 */ + .pvr_mask = 0xffffffff, + .pvr_value = 0x80000201, + .cpu_name = "7450", + .cpu_features = CPU_FTRS_7450_21, + .cpu_user_features = COMMON_USER | PPC_FEATURE_HAS_ALTIVEC_COMP | + PPC_FEATURE_PPC_LE, + .mmu_features = MMU_FTR_HPTE_TABLE, + .icache_bsize = 32, + .dcache_bsize = 32, + .num_pmcs = 6, + .pmc_type = PPC_PMC_G4, + .cpu_setup = __setup_cpu_745x, + .machine_check = machine_check_generic, + .platform = "ppc7450", + }, + { /* 7450 2.3 and newer */ + .pvr_mask = 0xffff0000, + .pvr_value = 0x80000000, + .cpu_name = "7450", + .cpu_features = CPU_FTRS_7450_23, + .cpu_user_features = COMMON_USER | PPC_FEATURE_HAS_ALTIVEC_COMP | + PPC_FEATURE_PPC_LE, + .mmu_features = MMU_FTR_HPTE_TABLE, + .icache_bsize = 32, + .dcache_bsize = 32, + .num_pmcs = 6, + .pmc_type = PPC_PMC_G4, + .cpu_setup = __setup_cpu_745x, + .machine_check = machine_check_generic, + .platform = "ppc7450", + }, + { /* 7455 rev 1.x */ + .pvr_mask = 0xffffff00, + .pvr_value = 0x80010100, + .cpu_name = "7455", + .cpu_features = CPU_FTRS_7455_1, + .cpu_user_features = COMMON_USER | PPC_FEATURE_HAS_ALTIVEC_COMP | + PPC_FEATURE_PPC_LE, + .mmu_features = MMU_FTR_HPTE_TABLE | MMU_FTR_USE_HIGH_BATS, + .icache_bsize = 32, + .dcache_bsize = 32, + .num_pmcs = 6, + .pmc_type = PPC_PMC_G4, + .cpu_setup = __setup_cpu_745x, + .machine_check = machine_check_generic, + .platform = "ppc7450", + }, + { /* 7455 rev 2.0 */ + .pvr_mask = 0xffffffff, + .pvr_value = 0x80010200, + .cpu_name = "7455", + .cpu_features = CPU_FTRS_7455_20, + .cpu_user_features = COMMON_USER | PPC_FEATURE_HAS_ALTIVEC_COMP | + PPC_FEATURE_PPC_LE, + .mmu_features = MMU_FTR_HPTE_TABLE | MMU_FTR_USE_HIGH_BATS, + .icache_bsize = 32, + .dcache_bsize = 32, + .num_pmcs = 6, + .pmc_type = PPC_PMC_G4, + .cpu_setup = __setup_cpu_745x, + .machine_check = machine_check_generic, + .platform = "ppc7450", + }, + { /* 7455 others */ + .pvr_mask = 0xffff0000, + .pvr_value = 0x80010000, + .cpu_name = "7455", + .cpu_features = CPU_FTRS_7455, + .cpu_user_features = COMMON_USER | PPC_FEATURE_HAS_ALTIVEC_COMP | + PPC_FEATURE_PPC_LE, + .mmu_features = MMU_FTR_HPTE_TABLE | MMU_FTR_USE_HIGH_BATS, + .icache_bsize = 32, + .dcache_bsize = 32, + .num_pmcs = 6, + .pmc_type = PPC_PMC_G4, + .cpu_setup = __setup_cpu_745x, + .machine_check = machine_check_generic, + .platform = "ppc7450", + }, + { /* 7447/7457 Rev 1.0 */ + .pvr_mask = 0xffffffff, + .pvr_value = 0x80020100, + .cpu_name = "7447/7457", + .cpu_features = CPU_FTRS_7447_10, + .cpu_user_features = COMMON_USER | PPC_FEATURE_HAS_ALTIVEC_COMP | + PPC_FEATURE_PPC_LE, + .mmu_features = MMU_FTR_HPTE_TABLE | MMU_FTR_USE_HIGH_BATS, + .icache_bsize = 32, + .dcache_bsize = 32, + .num_pmcs = 6, + .pmc_type = PPC_PMC_G4, + .cpu_setup = __setup_cpu_745x, + .machine_check = machine_check_generic, + .platform = "ppc7450", + }, + { /* 7447/7457 Rev 1.1 */ + .pvr_mask = 0xffffffff, + .pvr_value = 0x80020101, + .cpu_name = "7447/7457", + .cpu_features = CPU_FTRS_7447_10, + .cpu_user_features = COMMON_USER | PPC_FEATURE_HAS_ALTIVEC_COMP | + PPC_FEATURE_PPC_LE, + .mmu_features = MMU_FTR_HPTE_TABLE | MMU_FTR_USE_HIGH_BATS, + .icache_bsize = 32, + .dcache_bsize = 32, + .num_pmcs = 6, + .pmc_type = PPC_PMC_G4, + .cpu_setup = __setup_cpu_745x, + .machine_check = machine_check_generic, + .platform = "ppc7450", + }, + { /* 7447/7457 Rev 1.2 and later */ + .pvr_mask = 0xffff0000, + .pvr_value = 0x80020000, + .cpu_name = "7447/7457", + .cpu_features = CPU_FTRS_7447, + .cpu_user_features = COMMON_USER | PPC_FEATURE_HAS_ALTIVEC_COMP | + PPC_FEATURE_PPC_LE, + .mmu_features = MMU_FTR_HPTE_TABLE | MMU_FTR_USE_HIGH_BATS, + .icache_bsize = 32, + .dcache_bsize = 32, + .num_pmcs = 6, + .pmc_type = PPC_PMC_G4, + .cpu_setup = __setup_cpu_745x, + .machine_check = machine_check_generic, + .platform = "ppc7450", + }, + { /* 7447A */ + .pvr_mask = 0xffff0000, + .pvr_value = 0x80030000, + .cpu_name = "7447A", + .cpu_features = CPU_FTRS_7447A, + .cpu_user_features = COMMON_USER | PPC_FEATURE_HAS_ALTIVEC_COMP | + PPC_FEATURE_PPC_LE, + .mmu_features = MMU_FTR_HPTE_TABLE | MMU_FTR_USE_HIGH_BATS, + .icache_bsize = 32, + .dcache_bsize = 32, + .num_pmcs = 6, + .pmc_type = PPC_PMC_G4, + .cpu_setup = __setup_cpu_745x, + .machine_check = machine_check_generic, + .platform = "ppc7450", + }, + { /* 7448 */ + .pvr_mask = 0xffff0000, + .pvr_value = 0x80040000, + .cpu_name = "7448", + .cpu_features = CPU_FTRS_7448, + .cpu_user_features = COMMON_USER | PPC_FEATURE_HAS_ALTIVEC_COMP | + PPC_FEATURE_PPC_LE, + .mmu_features = MMU_FTR_HPTE_TABLE | MMU_FTR_USE_HIGH_BATS, + .icache_bsize = 32, + .dcache_bsize = 32, + .num_pmcs = 6, + .pmc_type = PPC_PMC_G4, + .cpu_setup = __setup_cpu_745x, + .machine_check = machine_check_generic, + .platform = "ppc7450", + }, + { /* default match, we assume split I/D cache & TB (non-601)... */ + .pvr_mask = 0x00000000, + .pvr_value = 0x00000000, + .cpu_name = "(generic PPC)", + .cpu_features = CPU_FTRS_CLASSIC32, + .cpu_user_features = COMMON_USER, + .mmu_features = MMU_FTR_HPTE_TABLE, + .icache_bsize = 32, + .dcache_bsize = 32, + .machine_check = machine_check_generic, + .platform = "ppc603", + }, +#endif /* CONFIG_PPC_BOOK3S_604 */ +}; diff --git a/arch/powerpc/kernel/cpu_specs_book3s_64.h b/arch/powerpc/kernel/cpu_specs_book3s_64.h new file mode 100644 index 000000000000..98d4274a1b6b --- /dev/null +++ b/arch/powerpc/kernel/cpu_specs_book3s_64.h @@ -0,0 +1,530 @@ +/* SPDX-License-Identifier: GPL-2.0-or-later */ +/* + * Copyright (C) 2001 Ben. Herrenschmidt (benh@kernel.crashing.org) + * + * Modifications for ppc64: + * Copyright (C) 2003 Dave Engebretsen <engebret@us.ibm.com> + */ + +/* NOTE: + * Unlike ppc32, ppc64 will only call cpu_setup() for the boot CPU, it's + * the responsibility of the appropriate CPU save/restore functions to + * eventually copy these settings over. Those save/restore aren't yet + * part of the cputable though. That has to be fixed for both ppc32 + * and ppc64 + */ +#define COMMON_USER_PPC64 (PPC_FEATURE_32 | PPC_FEATURE_HAS_FPU | \ + PPC_FEATURE_HAS_MMU | PPC_FEATURE_64) +#define COMMON_USER_POWER4 (COMMON_USER_PPC64 | PPC_FEATURE_POWER4) +#define COMMON_USER_POWER5 (COMMON_USER_PPC64 | PPC_FEATURE_POWER5 |\ + PPC_FEATURE_SMT | PPC_FEATURE_ICACHE_SNOOP) +#define COMMON_USER_POWER5_PLUS (COMMON_USER_PPC64 | PPC_FEATURE_POWER5_PLUS|\ + PPC_FEATURE_SMT | PPC_FEATURE_ICACHE_SNOOP) +#define COMMON_USER_POWER6 (COMMON_USER_PPC64 | PPC_FEATURE_ARCH_2_05 |\ + PPC_FEATURE_SMT | PPC_FEATURE_ICACHE_SNOOP | \ + PPC_FEATURE_TRUE_LE | \ + PPC_FEATURE_PSERIES_PERFMON_COMPAT) +#define COMMON_USER_POWER7 (COMMON_USER_PPC64 | PPC_FEATURE_ARCH_2_06 |\ + PPC_FEATURE_SMT | PPC_FEATURE_ICACHE_SNOOP | \ + PPC_FEATURE_TRUE_LE | \ + PPC_FEATURE_PSERIES_PERFMON_COMPAT) +#define COMMON_USER2_POWER7 (PPC_FEATURE2_DSCR) +#define COMMON_USER_POWER8 (COMMON_USER_PPC64 | PPC_FEATURE_ARCH_2_06 |\ + PPC_FEATURE_SMT | PPC_FEATURE_ICACHE_SNOOP | \ + PPC_FEATURE_TRUE_LE | \ + PPC_FEATURE_PSERIES_PERFMON_COMPAT) +#define COMMON_USER2_POWER8 (PPC_FEATURE2_ARCH_2_07 | \ + PPC_FEATURE2_HTM_COMP | \ + PPC_FEATURE2_HTM_NOSC_COMP | \ + PPC_FEATURE2_DSCR | \ + PPC_FEATURE2_ISEL | PPC_FEATURE2_TAR | \ + PPC_FEATURE2_VEC_CRYPTO) +#define COMMON_USER_PA6T (COMMON_USER_PPC64 | PPC_FEATURE_PA6T |\ + PPC_FEATURE_TRUE_LE | \ + PPC_FEATURE_HAS_ALTIVEC_COMP) +#define COMMON_USER_POWER9 COMMON_USER_POWER8 +#define COMMON_USER2_POWER9 (COMMON_USER2_POWER8 | \ + PPC_FEATURE2_ARCH_3_00 | \ + PPC_FEATURE2_HAS_IEEE128 | \ + PPC_FEATURE2_DARN | \ + PPC_FEATURE2_SCV) +#define COMMON_USER_POWER10 COMMON_USER_POWER9 +#define COMMON_USER2_POWER10 (PPC_FEATURE2_ARCH_3_1 | \ + PPC_FEATURE2_MMA | \ + PPC_FEATURE2_ARCH_3_00 | \ + PPC_FEATURE2_HAS_IEEE128 | \ + PPC_FEATURE2_DARN | \ + PPC_FEATURE2_SCV | \ + PPC_FEATURE2_ARCH_2_07 | \ + PPC_FEATURE2_DSCR | \ + PPC_FEATURE2_ISEL | PPC_FEATURE2_TAR | \ + PPC_FEATURE2_VEC_CRYPTO) + +#define COMMON_USER_POWER11 COMMON_USER_POWER10 +#define COMMON_USER2_POWER11 COMMON_USER2_POWER10 + +static struct cpu_spec cpu_specs[] __initdata = { + { /* PPC970 */ + .pvr_mask = 0xffff0000, + .pvr_value = 0x00390000, + .cpu_name = "PPC970", + .cpu_features = CPU_FTRS_PPC970, + .cpu_user_features = COMMON_USER_POWER4 | PPC_FEATURE_HAS_ALTIVEC_COMP, + .mmu_features = MMU_FTRS_PPC970, + .icache_bsize = 128, + .dcache_bsize = 128, + .num_pmcs = 8, + .pmc_type = PPC_PMC_IBM, + .cpu_setup = __setup_cpu_ppc970, + .cpu_restore = __restore_cpu_ppc970, + .platform = "ppc970", + }, + { /* PPC970FX */ + .pvr_mask = 0xffff0000, + .pvr_value = 0x003c0000, + .cpu_name = "PPC970FX", + .cpu_features = CPU_FTRS_PPC970, + .cpu_user_features = COMMON_USER_POWER4 | PPC_FEATURE_HAS_ALTIVEC_COMP, + .mmu_features = MMU_FTRS_PPC970, + .icache_bsize = 128, + .dcache_bsize = 128, + .num_pmcs = 8, + .pmc_type = PPC_PMC_IBM, + .cpu_setup = __setup_cpu_ppc970, + .cpu_restore = __restore_cpu_ppc970, + .platform = "ppc970", + }, + { /* PPC970MP DD1.0 - no DEEPNAP, use regular 970 init */ + .pvr_mask = 0xffffffff, + .pvr_value = 0x00440100, + .cpu_name = "PPC970MP", + .cpu_features = CPU_FTRS_PPC970, + .cpu_user_features = COMMON_USER_POWER4 | PPC_FEATURE_HAS_ALTIVEC_COMP, + .mmu_features = MMU_FTRS_PPC970, + .icache_bsize = 128, + .dcache_bsize = 128, + .num_pmcs = 8, + .pmc_type = PPC_PMC_IBM, + .cpu_setup = __setup_cpu_ppc970, + .cpu_restore = __restore_cpu_ppc970, + .platform = "ppc970", + }, + { /* PPC970MP */ + .pvr_mask = 0xffff0000, + .pvr_value = 0x00440000, + .cpu_name = "PPC970MP", + .cpu_features = CPU_FTRS_PPC970, + .cpu_user_features = COMMON_USER_POWER4 | PPC_FEATURE_HAS_ALTIVEC_COMP, + .mmu_features = MMU_FTRS_PPC970, + .icache_bsize = 128, + .dcache_bsize = 128, + .num_pmcs = 8, + .pmc_type = PPC_PMC_IBM, + .cpu_setup = __setup_cpu_ppc970MP, + .cpu_restore = __restore_cpu_ppc970, + .platform = "ppc970", + }, + { /* PPC970GX */ + .pvr_mask = 0xffff0000, + .pvr_value = 0x00450000, + .cpu_name = "PPC970GX", + .cpu_features = CPU_FTRS_PPC970, + .cpu_user_features = COMMON_USER_POWER4 | PPC_FEATURE_HAS_ALTIVEC_COMP, + .mmu_features = MMU_FTRS_PPC970, + .icache_bsize = 128, + .dcache_bsize = 128, + .num_pmcs = 8, + .pmc_type = PPC_PMC_IBM, + .cpu_setup = __setup_cpu_ppc970, + .platform = "ppc970", + }, + { /* Power5 GR */ + .pvr_mask = 0xffff0000, + .pvr_value = 0x003a0000, + .cpu_name = "POWER5 (gr)", + .cpu_features = CPU_FTRS_POWER5, + .cpu_user_features = COMMON_USER_POWER5, + .mmu_features = MMU_FTRS_POWER5, + .icache_bsize = 128, + .dcache_bsize = 128, + .num_pmcs = 6, + .pmc_type = PPC_PMC_IBM, + .platform = "power5", + }, + { /* Power5++ */ + .pvr_mask = 0xffffff00, + .pvr_value = 0x003b0300, + .cpu_name = "POWER5+ (gs)", + .cpu_features = CPU_FTRS_POWER5, + .cpu_user_features = COMMON_USER_POWER5_PLUS, + .mmu_features = MMU_FTRS_POWER5, + .icache_bsize = 128, + .dcache_bsize = 128, + .num_pmcs = 6, + .platform = "power5+", + }, + { /* Power5 GS */ + .pvr_mask = 0xffff0000, + .pvr_value = 0x003b0000, + .cpu_name = "POWER5+ (gs)", + .cpu_features = CPU_FTRS_POWER5, + .cpu_user_features = COMMON_USER_POWER5_PLUS, + .mmu_features = MMU_FTRS_POWER5, + .icache_bsize = 128, + .dcache_bsize = 128, + .num_pmcs = 6, + .pmc_type = PPC_PMC_IBM, + .platform = "power5+", + }, + { /* POWER6 in P5+ mode; 2.04-compliant processor */ + .pvr_mask = 0xffffffff, + .pvr_value = 0x0f000001, + .cpu_name = "POWER5+", + .cpu_features = CPU_FTRS_POWER5, + .cpu_user_features = COMMON_USER_POWER5_PLUS, + .mmu_features = MMU_FTRS_POWER5, + .icache_bsize = 128, + .dcache_bsize = 128, + .platform = "power5+", + }, + { /* Power6 */ + .pvr_mask = 0xffff0000, + .pvr_value = 0x003e0000, + .cpu_name = "POWER6 (raw)", + .cpu_features = CPU_FTRS_POWER6, + .cpu_user_features = COMMON_USER_POWER6 | PPC_FEATURE_POWER6_EXT, + .mmu_features = MMU_FTRS_POWER6, + .icache_bsize = 128, + .dcache_bsize = 128, + .num_pmcs = 6, + .pmc_type = PPC_PMC_IBM, + .platform = "power6x", + }, + { /* 2.05-compliant processor, i.e. Power6 "architected" mode */ + .pvr_mask = 0xffffffff, + .pvr_value = 0x0f000002, + .cpu_name = "POWER6 (architected)", + .cpu_features = CPU_FTRS_POWER6, + .cpu_user_features = COMMON_USER_POWER6, + .mmu_features = MMU_FTRS_POWER6, + .icache_bsize = 128, + .dcache_bsize = 128, + .platform = "power6", + }, + { /* 2.06-compliant processor, i.e. Power7 "architected" mode */ + .pvr_mask = 0xffffffff, + .pvr_value = 0x0f000003, + .cpu_name = "POWER7 (architected)", + .cpu_features = CPU_FTRS_POWER7, + .cpu_user_features = COMMON_USER_POWER7, + .cpu_user_features2 = COMMON_USER2_POWER7, + .mmu_features = MMU_FTRS_POWER7, + .icache_bsize = 128, + .dcache_bsize = 128, + .cpu_setup = __setup_cpu_power7, + .cpu_restore = __restore_cpu_power7, + .machine_check_early = __machine_check_early_realmode_p7, + .platform = "power7", + }, + { /* 2.07-compliant processor, i.e. Power8 "architected" mode */ + .pvr_mask = 0xffffffff, + .pvr_value = 0x0f000004, + .cpu_name = "POWER8 (architected)", + .cpu_features = CPU_FTRS_POWER8, + .cpu_user_features = COMMON_USER_POWER8, + .cpu_user_features2 = COMMON_USER2_POWER8, + .mmu_features = MMU_FTRS_POWER8, + .icache_bsize = 128, + .dcache_bsize = 128, + .cpu_setup = __setup_cpu_power8, + .cpu_restore = __restore_cpu_power8, + .machine_check_early = __machine_check_early_realmode_p8, + .platform = "power8", + }, + { /* 2.07-compliant processor, HeXin C2000 processor */ + .pvr_mask = 0xffff0000, + .pvr_value = 0x00660000, + .cpu_name = "HX-C2000", + .cpu_features = CPU_FTRS_POWER8, + .cpu_user_features = COMMON_USER_POWER8, + .cpu_user_features2 = COMMON_USER2_POWER8, + .mmu_features = MMU_FTRS_POWER8, + .icache_bsize = 128, + .dcache_bsize = 128, + .cpu_setup = __setup_cpu_power8, + .cpu_restore = __restore_cpu_power8, + .machine_check_early = __machine_check_early_realmode_p8, + .platform = "power8", + }, + { /* 3.00-compliant processor, i.e. Power9 "architected" mode */ + .pvr_mask = 0xffffffff, + .pvr_value = 0x0f000005, + .cpu_name = "POWER9 (architected)", + .cpu_features = CPU_FTRS_POWER9, + .cpu_user_features = COMMON_USER_POWER9, + .cpu_user_features2 = COMMON_USER2_POWER9, + .mmu_features = MMU_FTRS_POWER9, + .icache_bsize = 128, + .dcache_bsize = 128, + .cpu_setup = __setup_cpu_power9, + .cpu_restore = __restore_cpu_power9, + .platform = "power9", + }, + { /* 3.1-compliant processor, i.e. Power10 "architected" mode */ + .pvr_mask = 0xffffffff, + .pvr_value = 0x0f000006, + .cpu_name = "POWER10 (architected)", + .cpu_features = CPU_FTRS_POWER10, + .cpu_user_features = COMMON_USER_POWER10, + .cpu_user_features2 = COMMON_USER2_POWER10, + .mmu_features = MMU_FTRS_POWER10, + .icache_bsize = 128, + .dcache_bsize = 128, + .cpu_setup = __setup_cpu_power10, + .cpu_restore = __restore_cpu_power10, + .platform = "power10", + }, + { /* 3.1-compliant processor, i.e. Power11 "architected" mode */ + .pvr_mask = 0xffffffff, + .pvr_value = 0x0f000007, + .cpu_name = "Power11 (architected)", + .cpu_features = CPU_FTRS_POWER11, + .cpu_user_features = COMMON_USER_POWER11, + .cpu_user_features2 = COMMON_USER2_POWER11, + .mmu_features = MMU_FTRS_POWER11, + .icache_bsize = 128, + .dcache_bsize = 128, + .cpu_setup = __setup_cpu_power10, + .cpu_restore = __restore_cpu_power10, + .platform = "power11", + }, + { /* Power7 */ + .pvr_mask = 0xffff0000, + .pvr_value = 0x003f0000, + .cpu_name = "POWER7 (raw)", + .cpu_features = CPU_FTRS_POWER7, + .cpu_user_features = COMMON_USER_POWER7, + .cpu_user_features2 = COMMON_USER2_POWER7, + .mmu_features = MMU_FTRS_POWER7, + .icache_bsize = 128, + .dcache_bsize = 128, + .num_pmcs = 6, + .pmc_type = PPC_PMC_IBM, + .cpu_setup = __setup_cpu_power7, + .cpu_restore = __restore_cpu_power7, + .machine_check_early = __machine_check_early_realmode_p7, + .platform = "power7", + }, + { /* Power7+ */ + .pvr_mask = 0xffff0000, + .pvr_value = 0x004A0000, + .cpu_name = "POWER7+ (raw)", + .cpu_features = CPU_FTRS_POWER7, + .cpu_user_features = COMMON_USER_POWER7, + .cpu_user_features2 = COMMON_USER2_POWER7, + .mmu_features = MMU_FTRS_POWER7, + .icache_bsize = 128, + .dcache_bsize = 128, + .num_pmcs = 6, + .pmc_type = PPC_PMC_IBM, + .cpu_setup = __setup_cpu_power7, + .cpu_restore = __restore_cpu_power7, + .machine_check_early = __machine_check_early_realmode_p7, + .platform = "power7+", + }, + { /* Power8E */ + .pvr_mask = 0xffff0000, + .pvr_value = 0x004b0000, + .cpu_name = "POWER8E (raw)", + .cpu_features = CPU_FTRS_POWER8E, + .cpu_user_features = COMMON_USER_POWER8, + .cpu_user_features2 = COMMON_USER2_POWER8, + .mmu_features = MMU_FTRS_POWER8, + .icache_bsize = 128, + .dcache_bsize = 128, + .num_pmcs = 6, + .pmc_type = PPC_PMC_IBM, + .cpu_setup = __setup_cpu_power8, + .cpu_restore = __restore_cpu_power8, + .machine_check_early = __machine_check_early_realmode_p8, + .platform = "power8", + }, + { /* Power8NVL */ + .pvr_mask = 0xffff0000, + .pvr_value = 0x004c0000, + .cpu_name = "POWER8NVL (raw)", + .cpu_features = CPU_FTRS_POWER8, + .cpu_user_features = COMMON_USER_POWER8, + .cpu_user_features2 = COMMON_USER2_POWER8, + .mmu_features = MMU_FTRS_POWER8, + .icache_bsize = 128, + .dcache_bsize = 128, + .num_pmcs = 6, + .pmc_type = PPC_PMC_IBM, + .cpu_setup = __setup_cpu_power8, + .cpu_restore = __restore_cpu_power8, + .machine_check_early = __machine_check_early_realmode_p8, + .platform = "power8", + }, + { /* Power8 */ + .pvr_mask = 0xffff0000, + .pvr_value = 0x004d0000, + .cpu_name = "POWER8 (raw)", + .cpu_features = CPU_FTRS_POWER8, + .cpu_user_features = COMMON_USER_POWER8, + .cpu_user_features2 = COMMON_USER2_POWER8, + .mmu_features = MMU_FTRS_POWER8, + .icache_bsize = 128, + .dcache_bsize = 128, + .num_pmcs = 6, + .pmc_type = PPC_PMC_IBM, + .cpu_setup = __setup_cpu_power8, + .cpu_restore = __restore_cpu_power8, + .machine_check_early = __machine_check_early_realmode_p8, + .platform = "power8", + }, + { /* Power9 DD2.0 */ + .pvr_mask = 0xffffefff, + .pvr_value = 0x004e0200, + .cpu_name = "POWER9 (raw)", + .cpu_features = CPU_FTRS_POWER9_DD2_0, + .cpu_user_features = COMMON_USER_POWER9, + .cpu_user_features2 = COMMON_USER2_POWER9, + .mmu_features = MMU_FTRS_POWER9, + .icache_bsize = 128, + .dcache_bsize = 128, + .num_pmcs = 6, + .pmc_type = PPC_PMC_IBM, + .cpu_setup = __setup_cpu_power9, + .cpu_restore = __restore_cpu_power9, + .machine_check_early = __machine_check_early_realmode_p9, + .platform = "power9", + }, + { /* Power9 DD 2.1 */ + .pvr_mask = 0xffffefff, + .pvr_value = 0x004e0201, + .cpu_name = "POWER9 (raw)", + .cpu_features = CPU_FTRS_POWER9_DD2_1, + .cpu_user_features = COMMON_USER_POWER9, + .cpu_user_features2 = COMMON_USER2_POWER9, + .mmu_features = MMU_FTRS_POWER9, + .icache_bsize = 128, + .dcache_bsize = 128, + .num_pmcs = 6, + .pmc_type = PPC_PMC_IBM, + .cpu_setup = __setup_cpu_power9, + .cpu_restore = __restore_cpu_power9, + .machine_check_early = __machine_check_early_realmode_p9, + .platform = "power9", + }, + { /* Power9 DD2.2 */ + .pvr_mask = 0xffffefff, + .pvr_value = 0x004e0202, + .cpu_name = "POWER9 (raw)", + .cpu_features = CPU_FTRS_POWER9_DD2_2, + .cpu_user_features = COMMON_USER_POWER9, + .cpu_user_features2 = COMMON_USER2_POWER9, + .mmu_features = MMU_FTRS_POWER9, + .icache_bsize = 128, + .dcache_bsize = 128, + .num_pmcs = 6, + .pmc_type = PPC_PMC_IBM, + .cpu_setup = __setup_cpu_power9, + .cpu_restore = __restore_cpu_power9, + .machine_check_early = __machine_check_early_realmode_p9, + .platform = "power9", + }, + { /* Power9 DD2.3 or later */ + .pvr_mask = 0xffff0000, + .pvr_value = 0x004e0000, + .cpu_name = "POWER9 (raw)", + .cpu_features = CPU_FTRS_POWER9_DD2_3, + .cpu_user_features = COMMON_USER_POWER9, + .cpu_user_features2 = COMMON_USER2_POWER9, + .mmu_features = MMU_FTRS_POWER9, + .icache_bsize = 128, + .dcache_bsize = 128, + .num_pmcs = 6, + .pmc_type = PPC_PMC_IBM, + .cpu_setup = __setup_cpu_power9, + .cpu_restore = __restore_cpu_power9, + .machine_check_early = __machine_check_early_realmode_p9, + .platform = "power9", + }, + { /* Power10 */ + .pvr_mask = 0xffff0000, + .pvr_value = 0x00800000, + .cpu_name = "POWER10 (raw)", + .cpu_features = CPU_FTRS_POWER10, + .cpu_user_features = COMMON_USER_POWER10, + .cpu_user_features2 = COMMON_USER2_POWER10, + .mmu_features = MMU_FTRS_POWER10, + .icache_bsize = 128, + .dcache_bsize = 128, + .num_pmcs = 6, + .pmc_type = PPC_PMC_IBM, + .cpu_setup = __setup_cpu_power10, + .cpu_restore = __restore_cpu_power10, + .machine_check_early = __machine_check_early_realmode_p10, + .platform = "power10", + }, + { /* Power11 */ + .pvr_mask = 0xffff0000, + .pvr_value = 0x00820000, + .cpu_name = "Power11 (raw)", + .cpu_features = CPU_FTRS_POWER11, + .cpu_user_features = COMMON_USER_POWER11, + .cpu_user_features2 = COMMON_USER2_POWER11, + .mmu_features = MMU_FTRS_POWER11, + .icache_bsize = 128, + .dcache_bsize = 128, + .num_pmcs = 6, + .pmc_type = PPC_PMC_IBM, + .cpu_setup = __setup_cpu_power10, + .cpu_restore = __restore_cpu_power10, + .machine_check_early = __machine_check_early_realmode_p10, + .platform = "power11", + }, + { /* Cell Broadband Engine */ + .pvr_mask = 0xffff0000, + .pvr_value = 0x00700000, + .cpu_name = "Cell Broadband Engine", + .cpu_features = CPU_FTRS_CELL, + .cpu_user_features = COMMON_USER_PPC64 | PPC_FEATURE_CELL | + PPC_FEATURE_HAS_ALTIVEC_COMP | PPC_FEATURE_SMT, + .mmu_features = MMU_FTRS_CELL, + .icache_bsize = 128, + .dcache_bsize = 128, + .num_pmcs = 4, + .pmc_type = PPC_PMC_IBM, + .platform = "ppc-cell-be", + }, + { /* PA Semi PA6T */ + .pvr_mask = 0x7fff0000, + .pvr_value = 0x00900000, + .cpu_name = "PA6T", + .cpu_features = CPU_FTRS_PA6T, + .cpu_user_features = COMMON_USER_PA6T, + .mmu_features = MMU_FTRS_PA6T, + .icache_bsize = 64, + .dcache_bsize = 64, + .num_pmcs = 6, + .pmc_type = PPC_PMC_PA6T, + .cpu_setup = __setup_cpu_pa6t, + .cpu_restore = __restore_cpu_pa6t, + .platform = "pa6t", + }, + { /* default match */ + .pvr_mask = 0x00000000, + .pvr_value = 0x00000000, + .cpu_name = "POWER5 (compatible)", + .cpu_features = CPU_FTRS_COMPATIBLE, + .cpu_user_features = COMMON_USER_PPC64, + .mmu_features = MMU_FTRS_POWER, + .icache_bsize = 128, + .dcache_bsize = 128, + .num_pmcs = 6, + .pmc_type = PPC_PMC_IBM, + .platform = "power5", + } +}; diff --git a/arch/powerpc/kernel/cpu_specs_e500mc.h b/arch/powerpc/kernel/cpu_specs_e500mc.h new file mode 100644 index 000000000000..2ae8e9a7b461 --- /dev/null +++ b/arch/powerpc/kernel/cpu_specs_e500mc.h @@ -0,0 +1,76 @@ +/* SPDX-License-Identifier: GPL-2.0-or-later */ +/* + * Copyright (C) 2001 Ben. Herrenschmidt (benh@kernel.crashing.org) + * + * Modifications for ppc64: + * Copyright (C) 2003 Dave Engebretsen <engebret@us.ibm.com> + */ + +#ifdef CONFIG_PPC64 +#define COMMON_USER_BOOKE (PPC_FEATURE_32 | PPC_FEATURE_HAS_MMU | \ + PPC_FEATURE_HAS_FPU | PPC_FEATURE_64 | \ + PPC_FEATURE_BOOKE) +#else +#define COMMON_USER_BOOKE (PPC_FEATURE_32 | PPC_FEATURE_HAS_MMU | \ + PPC_FEATURE_BOOKE) +#endif + +static struct cpu_spec cpu_specs[] __initdata = { +#ifdef CONFIG_PPC32 + { /* e500mc */ + .pvr_mask = 0xffff0000, + .pvr_value = 0x80230000, + .cpu_name = "e500mc", + .cpu_features = CPU_FTRS_E500MC, + .cpu_user_features = COMMON_USER_BOOKE | PPC_FEATURE_HAS_FPU, + .cpu_user_features2 = PPC_FEATURE2_ISEL, + .mmu_features = MMU_FTR_TYPE_FSL_E | MMU_FTR_BIG_PHYS | MMU_FTR_USE_TLBILX, + .icache_bsize = 64, + .dcache_bsize = 64, + .num_pmcs = 4, + .cpu_setup = __setup_cpu_e500mc, + .machine_check = machine_check_e500mc, + .platform = "ppce500mc", + .cpu_down_flush = cpu_down_flush_e500mc, + }, +#endif /* CONFIG_PPC32 */ + { /* e5500 */ + .pvr_mask = 0xffff0000, + .pvr_value = 0x80240000, + .cpu_name = "e5500", + .cpu_features = CPU_FTRS_E5500, + .cpu_user_features = COMMON_USER_BOOKE | PPC_FEATURE_HAS_FPU, + .cpu_user_features2 = PPC_FEATURE2_ISEL, + .mmu_features = MMU_FTR_TYPE_FSL_E | MMU_FTR_BIG_PHYS | MMU_FTR_USE_TLBILX, + .icache_bsize = 64, + .dcache_bsize = 64, + .num_pmcs = 4, + .cpu_setup = __setup_cpu_e5500, +#ifndef CONFIG_PPC32 + .cpu_restore = __restore_cpu_e5500, +#endif + .machine_check = machine_check_e500mc, + .platform = "ppce5500", + .cpu_down_flush = cpu_down_flush_e5500, + }, + { /* e6500 */ + .pvr_mask = 0xffff0000, + .pvr_value = 0x80400000, + .cpu_name = "e6500", + .cpu_features = CPU_FTRS_E6500, + .cpu_user_features = COMMON_USER_BOOKE | PPC_FEATURE_HAS_FPU | + PPC_FEATURE_HAS_ALTIVEC_COMP, + .cpu_user_features2 = PPC_FEATURE2_ISEL, + .mmu_features = MMU_FTR_TYPE_FSL_E | MMU_FTR_BIG_PHYS | MMU_FTR_USE_TLBILX, + .icache_bsize = 64, + .dcache_bsize = 64, + .num_pmcs = 6, + .cpu_setup = __setup_cpu_e6500, +#ifndef CONFIG_PPC32 + .cpu_restore = __restore_cpu_e6500, +#endif + .machine_check = machine_check_e500mc, + .platform = "ppce6500", + .cpu_down_flush = cpu_down_flush_e6500, + }, +}; diff --git a/arch/powerpc/kernel/cputable.c b/arch/powerpc/kernel/cputable.c index a5dbfccd2047..6f6801da9dc1 100644 --- a/arch/powerpc/kernel/cputable.c +++ b/arch/powerpc/kernel/cputable.c @@ -18,1981 +18,17 @@ #include <asm/mce.h> #include <asm/mmu.h> #include <asm/setup.h> +#include <asm/cpu_setup.h> -static struct cpu_spec the_cpu_spec __read_mostly; +static struct cpu_spec the_cpu_spec __ro_after_init; -struct cpu_spec* cur_cpu_spec __read_mostly = NULL; +struct cpu_spec *cur_cpu_spec __ro_after_init = NULL; EXPORT_SYMBOL(cur_cpu_spec); /* The platform string corresponding to the real PVR */ const char *powerpc_base_platform; -/* NOTE: - * Unlike ppc32, ppc64 will only call this once for the boot CPU, it's - * the responsibility of the appropriate CPU save/restore functions to - * eventually copy these settings over. Those save/restore aren't yet - * part of the cputable though. That has to be fixed for both ppc32 - * and ppc64 - */ -#ifdef CONFIG_PPC32 -extern void __setup_cpu_e500v1(unsigned long offset, struct cpu_spec* spec); -extern void __setup_cpu_e500v2(unsigned long offset, struct cpu_spec* spec); -extern void __setup_cpu_e500mc(unsigned long offset, struct cpu_spec* spec); -extern void __setup_cpu_440ep(unsigned long offset, struct cpu_spec* spec); -extern void __setup_cpu_440epx(unsigned long offset, struct cpu_spec* spec); -extern void __setup_cpu_440gx(unsigned long offset, struct cpu_spec* spec); -extern void __setup_cpu_440grx(unsigned long offset, struct cpu_spec* spec); -extern void __setup_cpu_440spe(unsigned long offset, struct cpu_spec* spec); -extern void __setup_cpu_440x5(unsigned long offset, struct cpu_spec* spec); -extern void __setup_cpu_460ex(unsigned long offset, struct cpu_spec* spec); -extern void __setup_cpu_460gt(unsigned long offset, struct cpu_spec* spec); -extern void __setup_cpu_460sx(unsigned long offset, struct cpu_spec *spec); -extern void __setup_cpu_apm821xx(unsigned long offset, struct cpu_spec *spec); -extern void __setup_cpu_603(unsigned long offset, struct cpu_spec* spec); -extern void __setup_cpu_604(unsigned long offset, struct cpu_spec* spec); -extern void __setup_cpu_750(unsigned long offset, struct cpu_spec* spec); -extern void __setup_cpu_750cx(unsigned long offset, struct cpu_spec* spec); -extern void __setup_cpu_750fx(unsigned long offset, struct cpu_spec* spec); -extern void __setup_cpu_7400(unsigned long offset, struct cpu_spec* spec); -extern void __setup_cpu_7410(unsigned long offset, struct cpu_spec* spec); -extern void __setup_cpu_745x(unsigned long offset, struct cpu_spec* spec); -#endif /* CONFIG_PPC32 */ -#ifdef CONFIG_PPC64 -#include <asm/cpu_setup_power.h> -extern void __setup_cpu_ppc970(unsigned long offset, struct cpu_spec* spec); -extern void __setup_cpu_ppc970MP(unsigned long offset, struct cpu_spec* spec); -extern void __setup_cpu_pa6t(unsigned long offset, struct cpu_spec* spec); -extern void __restore_cpu_pa6t(void); -extern void __restore_cpu_ppc970(void); -extern long __machine_check_early_realmode_p7(struct pt_regs *regs); -extern long __machine_check_early_realmode_p8(struct pt_regs *regs); -extern long __machine_check_early_realmode_p9(struct pt_regs *regs); -#endif /* CONFIG_PPC64 */ -#if defined(CONFIG_E500) -extern void __setup_cpu_e5500(unsigned long offset, struct cpu_spec* spec); -extern void __setup_cpu_e6500(unsigned long offset, struct cpu_spec* spec); -extern void __restore_cpu_e5500(void); -extern void __restore_cpu_e6500(void); -#endif /* CONFIG_E500 */ - -/* This table only contains "desktop" CPUs, it need to be filled with embedded - * ones as well... - */ -#define COMMON_USER (PPC_FEATURE_32 | PPC_FEATURE_HAS_FPU | \ - PPC_FEATURE_HAS_MMU) -#define COMMON_USER_PPC64 (COMMON_USER | PPC_FEATURE_64) -#define COMMON_USER_POWER4 (COMMON_USER_PPC64 | PPC_FEATURE_POWER4) -#define COMMON_USER_POWER5 (COMMON_USER_PPC64 | PPC_FEATURE_POWER5 |\ - PPC_FEATURE_SMT | PPC_FEATURE_ICACHE_SNOOP) -#define COMMON_USER_POWER5_PLUS (COMMON_USER_PPC64 | PPC_FEATURE_POWER5_PLUS|\ - PPC_FEATURE_SMT | PPC_FEATURE_ICACHE_SNOOP) -#define COMMON_USER_POWER6 (COMMON_USER_PPC64 | PPC_FEATURE_ARCH_2_05 |\ - PPC_FEATURE_SMT | PPC_FEATURE_ICACHE_SNOOP | \ - PPC_FEATURE_TRUE_LE | \ - PPC_FEATURE_PSERIES_PERFMON_COMPAT) -#define COMMON_USER_POWER7 (COMMON_USER_PPC64 | PPC_FEATURE_ARCH_2_06 |\ - PPC_FEATURE_SMT | PPC_FEATURE_ICACHE_SNOOP | \ - PPC_FEATURE_TRUE_LE | \ - PPC_FEATURE_PSERIES_PERFMON_COMPAT) -#define COMMON_USER2_POWER7 (PPC_FEATURE2_DSCR) -#define COMMON_USER_POWER8 (COMMON_USER_PPC64 | PPC_FEATURE_ARCH_2_06 |\ - PPC_FEATURE_SMT | PPC_FEATURE_ICACHE_SNOOP | \ - PPC_FEATURE_TRUE_LE | \ - PPC_FEATURE_PSERIES_PERFMON_COMPAT) -#define COMMON_USER2_POWER8 (PPC_FEATURE2_ARCH_2_07 | \ - PPC_FEATURE2_HTM_COMP | \ - PPC_FEATURE2_HTM_NOSC_COMP | \ - PPC_FEATURE2_DSCR | \ - PPC_FEATURE2_ISEL | PPC_FEATURE2_TAR | \ - PPC_FEATURE2_VEC_CRYPTO) -#define COMMON_USER_PA6T (COMMON_USER_PPC64 | PPC_FEATURE_PA6T |\ - PPC_FEATURE_TRUE_LE | \ - PPC_FEATURE_HAS_ALTIVEC_COMP) -#define COMMON_USER_POWER9 COMMON_USER_POWER8 -#define COMMON_USER2_POWER9 (COMMON_USER2_POWER8 | \ - PPC_FEATURE2_ARCH_3_00 | \ - PPC_FEATURE2_HAS_IEEE128 | \ - PPC_FEATURE2_DARN | \ - PPC_FEATURE2_SCV) -#define COMMON_USER_POWER10 COMMON_USER_POWER9 -#define COMMON_USER2_POWER10 (PPC_FEATURE2_ARCH_3_1 | \ - PPC_FEATURE2_MMA | \ - PPC_FEATURE2_ARCH_3_00 | \ - PPC_FEATURE2_HAS_IEEE128 | \ - PPC_FEATURE2_DARN | \ - PPC_FEATURE2_SCV | \ - PPC_FEATURE2_ARCH_2_07 | \ - PPC_FEATURE2_DSCR | \ - PPC_FEATURE2_ISEL | PPC_FEATURE2_TAR | \ - PPC_FEATURE2_VEC_CRYPTO) - -#ifdef CONFIG_PPC_BOOK3E_64 -#define COMMON_USER_BOOKE (COMMON_USER_PPC64 | PPC_FEATURE_BOOKE) -#else -#define COMMON_USER_BOOKE (PPC_FEATURE_32 | PPC_FEATURE_HAS_MMU | \ - PPC_FEATURE_BOOKE) -#endif - -static struct cpu_spec __initdata cpu_specs[] = { -#ifdef CONFIG_PPC_BOOK3S_64 - { /* PPC970 */ - .pvr_mask = 0xffff0000, - .pvr_value = 0x00390000, - .cpu_name = "PPC970", - .cpu_features = CPU_FTRS_PPC970, - .cpu_user_features = COMMON_USER_POWER4 | - PPC_FEATURE_HAS_ALTIVEC_COMP, - .mmu_features = MMU_FTRS_PPC970, - .icache_bsize = 128, - .dcache_bsize = 128, - .num_pmcs = 8, - .pmc_type = PPC_PMC_IBM, - .cpu_setup = __setup_cpu_ppc970, - .cpu_restore = __restore_cpu_ppc970, - .oprofile_cpu_type = "ppc64/970", - .platform = "ppc970", - }, - { /* PPC970FX */ - .pvr_mask = 0xffff0000, - .pvr_value = 0x003c0000, - .cpu_name = "PPC970FX", - .cpu_features = CPU_FTRS_PPC970, - .cpu_user_features = COMMON_USER_POWER4 | - PPC_FEATURE_HAS_ALTIVEC_COMP, - .mmu_features = MMU_FTRS_PPC970, - .icache_bsize = 128, - .dcache_bsize = 128, - .num_pmcs = 8, - .pmc_type = PPC_PMC_IBM, - .cpu_setup = __setup_cpu_ppc970, - .cpu_restore = __restore_cpu_ppc970, - .oprofile_cpu_type = "ppc64/970", - .platform = "ppc970", - }, - { /* PPC970MP DD1.0 - no DEEPNAP, use regular 970 init */ - .pvr_mask = 0xffffffff, - .pvr_value = 0x00440100, - .cpu_name = "PPC970MP", - .cpu_features = CPU_FTRS_PPC970, - .cpu_user_features = COMMON_USER_POWER4 | - PPC_FEATURE_HAS_ALTIVEC_COMP, - .mmu_features = MMU_FTRS_PPC970, - .icache_bsize = 128, - .dcache_bsize = 128, - .num_pmcs = 8, - .pmc_type = PPC_PMC_IBM, - .cpu_setup = __setup_cpu_ppc970, - .cpu_restore = __restore_cpu_ppc970, - .oprofile_cpu_type = "ppc64/970MP", - .platform = "ppc970", - }, - { /* PPC970MP */ - .pvr_mask = 0xffff0000, - .pvr_value = 0x00440000, - .cpu_name = "PPC970MP", - .cpu_features = CPU_FTRS_PPC970, - .cpu_user_features = COMMON_USER_POWER4 | - PPC_FEATURE_HAS_ALTIVEC_COMP, - .mmu_features = MMU_FTRS_PPC970, - .icache_bsize = 128, - .dcache_bsize = 128, - .num_pmcs = 8, - .pmc_type = PPC_PMC_IBM, - .cpu_setup = __setup_cpu_ppc970MP, - .cpu_restore = __restore_cpu_ppc970, - .oprofile_cpu_type = "ppc64/970MP", - .platform = "ppc970", - }, - { /* PPC970GX */ - .pvr_mask = 0xffff0000, - .pvr_value = 0x00450000, - .cpu_name = "PPC970GX", - .cpu_features = CPU_FTRS_PPC970, - .cpu_user_features = COMMON_USER_POWER4 | - PPC_FEATURE_HAS_ALTIVEC_COMP, - .mmu_features = MMU_FTRS_PPC970, - .icache_bsize = 128, - .dcache_bsize = 128, - .num_pmcs = 8, - .pmc_type = PPC_PMC_IBM, - .cpu_setup = __setup_cpu_ppc970, - .oprofile_cpu_type = "ppc64/970", - .platform = "ppc970", - }, - { /* Power5 GR */ - .pvr_mask = 0xffff0000, - .pvr_value = 0x003a0000, - .cpu_name = "POWER5 (gr)", - .cpu_features = CPU_FTRS_POWER5, - .cpu_user_features = COMMON_USER_POWER5, - .mmu_features = MMU_FTRS_POWER5, - .icache_bsize = 128, - .dcache_bsize = 128, - .num_pmcs = 6, - .pmc_type = PPC_PMC_IBM, - .oprofile_cpu_type = "ppc64/power5", - .platform = "power5", - }, - { /* Power5++ */ - .pvr_mask = 0xffffff00, - .pvr_value = 0x003b0300, - .cpu_name = "POWER5+ (gs)", - .cpu_features = CPU_FTRS_POWER5, - .cpu_user_features = COMMON_USER_POWER5_PLUS, - .mmu_features = MMU_FTRS_POWER5, - .icache_bsize = 128, - .dcache_bsize = 128, - .num_pmcs = 6, - .oprofile_cpu_type = "ppc64/power5++", - .platform = "power5+", - }, - { /* Power5 GS */ - .pvr_mask = 0xffff0000, - .pvr_value = 0x003b0000, - .cpu_name = "POWER5+ (gs)", - .cpu_features = CPU_FTRS_POWER5, - .cpu_user_features = COMMON_USER_POWER5_PLUS, - .mmu_features = MMU_FTRS_POWER5, - .icache_bsize = 128, - .dcache_bsize = 128, - .num_pmcs = 6, - .pmc_type = PPC_PMC_IBM, - .oprofile_cpu_type = "ppc64/power5+", - .platform = "power5+", - }, - { /* POWER6 in P5+ mode; 2.04-compliant processor */ - .pvr_mask = 0xffffffff, - .pvr_value = 0x0f000001, - .cpu_name = "POWER5+", - .cpu_features = CPU_FTRS_POWER5, - .cpu_user_features = COMMON_USER_POWER5_PLUS, - .mmu_features = MMU_FTRS_POWER5, - .icache_bsize = 128, - .dcache_bsize = 128, - .oprofile_cpu_type = "ppc64/ibm-compat-v1", - .platform = "power5+", - }, - { /* Power6 */ - .pvr_mask = 0xffff0000, - .pvr_value = 0x003e0000, - .cpu_name = "POWER6 (raw)", - .cpu_features = CPU_FTRS_POWER6, - .cpu_user_features = COMMON_USER_POWER6 | - PPC_FEATURE_POWER6_EXT, - .mmu_features = MMU_FTRS_POWER6, - .icache_bsize = 128, - .dcache_bsize = 128, - .num_pmcs = 6, - .pmc_type = PPC_PMC_IBM, - .oprofile_cpu_type = "ppc64/power6", - .platform = "power6x", - }, - { /* 2.05-compliant processor, i.e. Power6 "architected" mode */ - .pvr_mask = 0xffffffff, - .pvr_value = 0x0f000002, - .cpu_name = "POWER6 (architected)", - .cpu_features = CPU_FTRS_POWER6, - .cpu_user_features = COMMON_USER_POWER6, - .mmu_features = MMU_FTRS_POWER6, - .icache_bsize = 128, - .dcache_bsize = 128, - .oprofile_cpu_type = "ppc64/ibm-compat-v1", - .platform = "power6", - }, - { /* 2.06-compliant processor, i.e. Power7 "architected" mode */ - .pvr_mask = 0xffffffff, - .pvr_value = 0x0f000003, - .cpu_name = "POWER7 (architected)", - .cpu_features = CPU_FTRS_POWER7, - .cpu_user_features = COMMON_USER_POWER7, - .cpu_user_features2 = COMMON_USER2_POWER7, - .mmu_features = MMU_FTRS_POWER7, - .icache_bsize = 128, - .dcache_bsize = 128, - .oprofile_cpu_type = "ppc64/ibm-compat-v1", - .cpu_setup = __setup_cpu_power7, - .cpu_restore = __restore_cpu_power7, - .machine_check_early = __machine_check_early_realmode_p7, - .platform = "power7", - }, - { /* 2.07-compliant processor, i.e. Power8 "architected" mode */ - .pvr_mask = 0xffffffff, - .pvr_value = 0x0f000004, - .cpu_name = "POWER8 (architected)", - .cpu_features = CPU_FTRS_POWER8, - .cpu_user_features = COMMON_USER_POWER8, - .cpu_user_features2 = COMMON_USER2_POWER8, - .mmu_features = MMU_FTRS_POWER8, - .icache_bsize = 128, - .dcache_bsize = 128, - .oprofile_cpu_type = "ppc64/ibm-compat-v1", - .cpu_setup = __setup_cpu_power8, - .cpu_restore = __restore_cpu_power8, - .machine_check_early = __machine_check_early_realmode_p8, - .platform = "power8", - }, - { /* 3.00-compliant processor, i.e. Power9 "architected" mode */ - .pvr_mask = 0xffffffff, - .pvr_value = 0x0f000005, - .cpu_name = "POWER9 (architected)", - .cpu_features = CPU_FTRS_POWER9, - .cpu_user_features = COMMON_USER_POWER9, - .cpu_user_features2 = COMMON_USER2_POWER9, - .mmu_features = MMU_FTRS_POWER9, - .icache_bsize = 128, - .dcache_bsize = 128, - .oprofile_cpu_type = "ppc64/ibm-compat-v1", - .cpu_setup = __setup_cpu_power9, - .cpu_restore = __restore_cpu_power9, - .platform = "power9", - }, - { /* 3.1-compliant processor, i.e. Power10 "architected" mode */ - .pvr_mask = 0xffffffff, - .pvr_value = 0x0f000006, - .cpu_name = "POWER10 (architected)", - .cpu_features = CPU_FTRS_POWER10, - .cpu_user_features = COMMON_USER_POWER10, - .cpu_user_features2 = COMMON_USER2_POWER10, - .mmu_features = MMU_FTRS_POWER10, - .icache_bsize = 128, - .dcache_bsize = 128, - .oprofile_cpu_type = "ppc64/ibm-compat-v1", - .cpu_setup = __setup_cpu_power10, - .cpu_restore = __restore_cpu_power10, - .platform = "power10", - }, - { /* Power7 */ - .pvr_mask = 0xffff0000, - .pvr_value = 0x003f0000, - .cpu_name = "POWER7 (raw)", - .cpu_features = CPU_FTRS_POWER7, - .cpu_user_features = COMMON_USER_POWER7, - .cpu_user_features2 = COMMON_USER2_POWER7, - .mmu_features = MMU_FTRS_POWER7, - .icache_bsize = 128, - .dcache_bsize = 128, - .num_pmcs = 6, - .pmc_type = PPC_PMC_IBM, - .oprofile_cpu_type = "ppc64/power7", - .cpu_setup = __setup_cpu_power7, - .cpu_restore = __restore_cpu_power7, - .machine_check_early = __machine_check_early_realmode_p7, - .platform = "power7", - }, - { /* Power7+ */ - .pvr_mask = 0xffff0000, - .pvr_value = 0x004A0000, - .cpu_name = "POWER7+ (raw)", - .cpu_features = CPU_FTRS_POWER7, - .cpu_user_features = COMMON_USER_POWER7, - .cpu_user_features2 = COMMON_USER2_POWER7, - .mmu_features = MMU_FTRS_POWER7, - .icache_bsize = 128, - .dcache_bsize = 128, - .num_pmcs = 6, - .pmc_type = PPC_PMC_IBM, - .oprofile_cpu_type = "ppc64/power7", - .cpu_setup = __setup_cpu_power7, - .cpu_restore = __restore_cpu_power7, - .machine_check_early = __machine_check_early_realmode_p7, - .platform = "power7+", - }, - { /* Power8E */ - .pvr_mask = 0xffff0000, - .pvr_value = 0x004b0000, - .cpu_name = "POWER8E (raw)", - .cpu_features = CPU_FTRS_POWER8E, - .cpu_user_features = COMMON_USER_POWER8, - .cpu_user_features2 = COMMON_USER2_POWER8, - .mmu_features = MMU_FTRS_POWER8, - .icache_bsize = 128, - .dcache_bsize = 128, - .num_pmcs = 6, - .pmc_type = PPC_PMC_IBM, - .oprofile_cpu_type = "ppc64/power8", - .cpu_setup = __setup_cpu_power8, - .cpu_restore = __restore_cpu_power8, - .machine_check_early = __machine_check_early_realmode_p8, - .platform = "power8", - }, - { /* Power8NVL */ - .pvr_mask = 0xffff0000, - .pvr_value = 0x004c0000, - .cpu_name = "POWER8NVL (raw)", - .cpu_features = CPU_FTRS_POWER8, - .cpu_user_features = COMMON_USER_POWER8, - .cpu_user_features2 = COMMON_USER2_POWER8, - .mmu_features = MMU_FTRS_POWER8, - .icache_bsize = 128, - .dcache_bsize = 128, - .num_pmcs = 6, - .pmc_type = PPC_PMC_IBM, - .oprofile_cpu_type = "ppc64/power8", - .cpu_setup = __setup_cpu_power8, - .cpu_restore = __restore_cpu_power8, - .machine_check_early = __machine_check_early_realmode_p8, - .platform = "power8", - }, - { /* Power8 */ - .pvr_mask = 0xffff0000, - .pvr_value = 0x004d0000, - .cpu_name = "POWER8 (raw)", - .cpu_features = CPU_FTRS_POWER8, - .cpu_user_features = COMMON_USER_POWER8, - .cpu_user_features2 = COMMON_USER2_POWER8, - .mmu_features = MMU_FTRS_POWER8, - .icache_bsize = 128, - .dcache_bsize = 128, - .num_pmcs = 6, - .pmc_type = PPC_PMC_IBM, - .oprofile_cpu_type = "ppc64/power8", - .cpu_setup = __setup_cpu_power8, - .cpu_restore = __restore_cpu_power8, - .machine_check_early = __machine_check_early_realmode_p8, - .platform = "power8", - }, - { /* Power9 DD2.0 */ - .pvr_mask = 0xffffefff, - .pvr_value = 0x004e0200, - .cpu_name = "POWER9 (raw)", - .cpu_features = CPU_FTRS_POWER9_DD2_0, - .cpu_user_features = COMMON_USER_POWER9, - .cpu_user_features2 = COMMON_USER2_POWER9, - .mmu_features = MMU_FTRS_POWER9, - .icache_bsize = 128, - .dcache_bsize = 128, - .num_pmcs = 6, - .pmc_type = PPC_PMC_IBM, - .oprofile_cpu_type = "ppc64/power9", - .cpu_setup = __setup_cpu_power9, - .cpu_restore = __restore_cpu_power9, - .machine_check_early = __machine_check_early_realmode_p9, - .platform = "power9", - }, - { /* Power9 DD 2.1 */ - .pvr_mask = 0xffffefff, - .pvr_value = 0x004e0201, - .cpu_name = "POWER9 (raw)", - .cpu_features = CPU_FTRS_POWER9_DD2_1, - .cpu_user_features = COMMON_USER_POWER9, - .cpu_user_features2 = COMMON_USER2_POWER9, - .mmu_features = MMU_FTRS_POWER9, - .icache_bsize = 128, - .dcache_bsize = 128, - .num_pmcs = 6, - .pmc_type = PPC_PMC_IBM, - .oprofile_cpu_type = "ppc64/power9", - .cpu_setup = __setup_cpu_power9, - .cpu_restore = __restore_cpu_power9, - .machine_check_early = __machine_check_early_realmode_p9, - .platform = "power9", - }, - { /* Power9 DD2.2 */ - .pvr_mask = 0xffffefff, - .pvr_value = 0x004e0202, - .cpu_name = "POWER9 (raw)", - .cpu_features = CPU_FTRS_POWER9_DD2_2, - .cpu_user_features = COMMON_USER_POWER9, - .cpu_user_features2 = COMMON_USER2_POWER9, - .mmu_features = MMU_FTRS_POWER9, - .icache_bsize = 128, - .dcache_bsize = 128, - .num_pmcs = 6, - .pmc_type = PPC_PMC_IBM, - .oprofile_cpu_type = "ppc64/power9", - .cpu_setup = __setup_cpu_power9, - .cpu_restore = __restore_cpu_power9, - .machine_check_early = __machine_check_early_realmode_p9, - .platform = "power9", - }, - { /* Power9 DD2.3 or later */ - .pvr_mask = 0xffff0000, - .pvr_value = 0x004e0000, - .cpu_name = "POWER9 (raw)", - .cpu_features = CPU_FTRS_POWER9_DD2_3, - .cpu_user_features = COMMON_USER_POWER9, - .cpu_user_features2 = COMMON_USER2_POWER9, - .mmu_features = MMU_FTRS_POWER9, - .icache_bsize = 128, - .dcache_bsize = 128, - .num_pmcs = 6, - .pmc_type = PPC_PMC_IBM, - .oprofile_cpu_type = "ppc64/power9", - .cpu_setup = __setup_cpu_power9, - .cpu_restore = __restore_cpu_power9, - .machine_check_early = __machine_check_early_realmode_p9, - .platform = "power9", - }, - { /* Power10 */ - .pvr_mask = 0xffff0000, - .pvr_value = 0x00800000, - .cpu_name = "POWER10 (raw)", - .cpu_features = CPU_FTRS_POWER10, - .cpu_user_features = COMMON_USER_POWER10, - .cpu_user_features2 = COMMON_USER2_POWER10, - .mmu_features = MMU_FTRS_POWER10, - .icache_bsize = 128, - .dcache_bsize = 128, - .num_pmcs = 6, - .pmc_type = PPC_PMC_IBM, - .oprofile_cpu_type = "ppc64/power10", - .cpu_setup = __setup_cpu_power10, - .cpu_restore = __restore_cpu_power10, - .machine_check_early = __machine_check_early_realmode_p10, - .platform = "power10", - }, - { /* Cell Broadband Engine */ - .pvr_mask = 0xffff0000, - .pvr_value = 0x00700000, - .cpu_name = "Cell Broadband Engine", - .cpu_features = CPU_FTRS_CELL, - .cpu_user_features = COMMON_USER_PPC64 | - PPC_FEATURE_CELL | PPC_FEATURE_HAS_ALTIVEC_COMP | - PPC_FEATURE_SMT, - .mmu_features = MMU_FTRS_CELL, - .icache_bsize = 128, - .dcache_bsize = 128, - .num_pmcs = 4, - .pmc_type = PPC_PMC_IBM, - .oprofile_cpu_type = "ppc64/cell-be", - .platform = "ppc-cell-be", - }, - { /* PA Semi PA6T */ - .pvr_mask = 0x7fff0000, - .pvr_value = 0x00900000, - .cpu_name = "PA6T", - .cpu_features = CPU_FTRS_PA6T, - .cpu_user_features = COMMON_USER_PA6T, - .mmu_features = MMU_FTRS_PA6T, - .icache_bsize = 64, - .dcache_bsize = 64, - .num_pmcs = 6, - .pmc_type = PPC_PMC_PA6T, - .cpu_setup = __setup_cpu_pa6t, - .cpu_restore = __restore_cpu_pa6t, - .oprofile_cpu_type = "ppc64/pa6t", - .platform = "pa6t", - }, - { /* default match */ - .pvr_mask = 0x00000000, - .pvr_value = 0x00000000, - .cpu_name = "POWER5 (compatible)", - .cpu_features = CPU_FTRS_COMPATIBLE, - .cpu_user_features = COMMON_USER_PPC64, - .mmu_features = MMU_FTRS_POWER, - .icache_bsize = 128, - .dcache_bsize = 128, - .num_pmcs = 6, - .pmc_type = PPC_PMC_IBM, - .platform = "power5", - } -#endif /* CONFIG_PPC_BOOK3S_64 */ - -#ifdef CONFIG_PPC32 -#ifdef CONFIG_PPC_BOOK3S_32 -#ifdef CONFIG_PPC_BOOK3S_604 - { /* 604 */ - .pvr_mask = 0xffff0000, - .pvr_value = 0x00040000, - .cpu_name = "604", - .cpu_features = CPU_FTRS_604, - .cpu_user_features = COMMON_USER | PPC_FEATURE_PPC_LE, - .mmu_features = MMU_FTR_HPTE_TABLE, - .icache_bsize = 32, - .dcache_bsize = 32, - .num_pmcs = 2, - .cpu_setup = __setup_cpu_604, - .machine_check = machine_check_generic, - .platform = "ppc604", - }, - { /* 604e */ - .pvr_mask = 0xfffff000, - .pvr_value = 0x00090000, - .cpu_name = "604e", - .cpu_features = CPU_FTRS_604, - .cpu_user_features = COMMON_USER | PPC_FEATURE_PPC_LE, - .mmu_features = MMU_FTR_HPTE_TABLE, - .icache_bsize = 32, - .dcache_bsize = 32, - .num_pmcs = 4, - .cpu_setup = __setup_cpu_604, - .machine_check = machine_check_generic, - .platform = "ppc604", - }, - { /* 604r */ - .pvr_mask = 0xffff0000, - .pvr_value = 0x00090000, - .cpu_name = "604r", - .cpu_features = CPU_FTRS_604, - .cpu_user_features = COMMON_USER | PPC_FEATURE_PPC_LE, - .mmu_features = MMU_FTR_HPTE_TABLE, - .icache_bsize = 32, - .dcache_bsize = 32, - .num_pmcs = 4, - .cpu_setup = __setup_cpu_604, - .machine_check = machine_check_generic, - .platform = "ppc604", - }, - { /* 604ev */ - .pvr_mask = 0xffff0000, - .pvr_value = 0x000a0000, - .cpu_name = "604ev", - .cpu_features = CPU_FTRS_604, - .cpu_user_features = COMMON_USER | PPC_FEATURE_PPC_LE, - .mmu_features = MMU_FTR_HPTE_TABLE, - .icache_bsize = 32, - .dcache_bsize = 32, - .num_pmcs = 4, - .cpu_setup = __setup_cpu_604, - .machine_check = machine_check_generic, - .platform = "ppc604", - }, - { /* 740/750 (0x4202, don't support TAU ?) */ - .pvr_mask = 0xffffffff, - .pvr_value = 0x00084202, - .cpu_name = "740/750", - .cpu_features = CPU_FTRS_740_NOTAU, - .cpu_user_features = COMMON_USER | PPC_FEATURE_PPC_LE, - .mmu_features = MMU_FTR_HPTE_TABLE, - .icache_bsize = 32, - .dcache_bsize = 32, - .num_pmcs = 4, - .cpu_setup = __setup_cpu_750, - .machine_check = machine_check_generic, - .platform = "ppc750", - }, - { /* 750CX (80100 and 8010x?) */ - .pvr_mask = 0xfffffff0, - .pvr_value = 0x00080100, - .cpu_name = "750CX", - .cpu_features = CPU_FTRS_750, - .cpu_user_features = COMMON_USER | PPC_FEATURE_PPC_LE, - .mmu_features = MMU_FTR_HPTE_TABLE, - .icache_bsize = 32, - .dcache_bsize = 32, - .num_pmcs = 4, - .cpu_setup = __setup_cpu_750cx, - .machine_check = machine_check_generic, - .platform = "ppc750", - }, - { /* 750CX (82201 and 82202) */ - .pvr_mask = 0xfffffff0, - .pvr_value = 0x00082200, - .cpu_name = "750CX", - .cpu_features = CPU_FTRS_750, - .cpu_user_features = COMMON_USER | PPC_FEATURE_PPC_LE, - .mmu_features = MMU_FTR_HPTE_TABLE, - .icache_bsize = 32, - .dcache_bsize = 32, - .num_pmcs = 4, - .pmc_type = PPC_PMC_IBM, - .cpu_setup = __setup_cpu_750cx, - .machine_check = machine_check_generic, - .platform = "ppc750", - }, - { /* 750CXe (82214) */ - .pvr_mask = 0xfffffff0, - .pvr_value = 0x00082210, - .cpu_name = "750CXe", - .cpu_features = CPU_FTRS_750, - .cpu_user_features = COMMON_USER | PPC_FEATURE_PPC_LE, - .mmu_features = MMU_FTR_HPTE_TABLE, - .icache_bsize = 32, - .dcache_bsize = 32, - .num_pmcs = 4, - .pmc_type = PPC_PMC_IBM, - .cpu_setup = __setup_cpu_750cx, - .machine_check = machine_check_generic, - .platform = "ppc750", - }, - { /* 750CXe "Gekko" (83214) */ - .pvr_mask = 0xffffffff, - .pvr_value = 0x00083214, - .cpu_name = "750CXe", - .cpu_features = CPU_FTRS_750, - .cpu_user_features = COMMON_USER | PPC_FEATURE_PPC_LE, - .mmu_features = MMU_FTR_HPTE_TABLE, - .icache_bsize = 32, - .dcache_bsize = 32, - .num_pmcs = 4, - .pmc_type = PPC_PMC_IBM, - .cpu_setup = __setup_cpu_750cx, - .machine_check = machine_check_generic, - .platform = "ppc750", - }, - { /* 750CL (and "Broadway") */ - .pvr_mask = 0xfffff0e0, - .pvr_value = 0x00087000, - .cpu_name = "750CL", - .cpu_features = CPU_FTRS_750CL, - .cpu_user_features = COMMON_USER | PPC_FEATURE_PPC_LE, - .mmu_features = MMU_FTR_HPTE_TABLE | MMU_FTR_USE_HIGH_BATS, - .icache_bsize = 32, - .dcache_bsize = 32, - .num_pmcs = 4, - .pmc_type = PPC_PMC_IBM, - .cpu_setup = __setup_cpu_750, - .machine_check = machine_check_generic, - .platform = "ppc750", - .oprofile_cpu_type = "ppc/750", - }, - { /* 745/755 */ - .pvr_mask = 0xfffff000, - .pvr_value = 0x00083000, - .cpu_name = "745/755", - .cpu_features = CPU_FTRS_750, - .cpu_user_features = COMMON_USER | PPC_FEATURE_PPC_LE, - .mmu_features = MMU_FTR_HPTE_TABLE, - .icache_bsize = 32, - .dcache_bsize = 32, - .num_pmcs = 4, - .pmc_type = PPC_PMC_IBM, - .cpu_setup = __setup_cpu_750, - .machine_check = machine_check_generic, - .platform = "ppc750", - }, - { /* 750FX rev 1.x */ - .pvr_mask = 0xffffff00, - .pvr_value = 0x70000100, - .cpu_name = "750FX", - .cpu_features = CPU_FTRS_750FX1, - .cpu_user_features = COMMON_USER | PPC_FEATURE_PPC_LE, - .mmu_features = MMU_FTR_HPTE_TABLE, - .icache_bsize = 32, - .dcache_bsize = 32, - .num_pmcs = 4, - .pmc_type = PPC_PMC_IBM, - .cpu_setup = __setup_cpu_750, - .machine_check = machine_check_generic, - .platform = "ppc750", - .oprofile_cpu_type = "ppc/750", - }, - { /* 750FX rev 2.0 must disable HID0[DPM] */ - .pvr_mask = 0xffffffff, - .pvr_value = 0x70000200, - .cpu_name = "750FX", - .cpu_features = CPU_FTRS_750FX2, - .cpu_user_features = COMMON_USER | PPC_FEATURE_PPC_LE, - .mmu_features = MMU_FTR_HPTE_TABLE, - .icache_bsize = 32, - .dcache_bsize = 32, - .num_pmcs = 4, - .pmc_type = PPC_PMC_IBM, - .cpu_setup = __setup_cpu_750, - .machine_check = machine_check_generic, - .platform = "ppc750", - .oprofile_cpu_type = "ppc/750", - }, - { /* 750FX (All revs except 2.0) */ - .pvr_mask = 0xffff0000, - .pvr_value = 0x70000000, - .cpu_name = "750FX", - .cpu_features = CPU_FTRS_750FX, - .cpu_user_features = COMMON_USER | PPC_FEATURE_PPC_LE, - .mmu_features = MMU_FTR_HPTE_TABLE | MMU_FTR_USE_HIGH_BATS, - .icache_bsize = 32, - .dcache_bsize = 32, - .num_pmcs = 4, - .pmc_type = PPC_PMC_IBM, - .cpu_setup = __setup_cpu_750fx, - .machine_check = machine_check_generic, - .platform = "ppc750", - .oprofile_cpu_type = "ppc/750", - }, - { /* 750GX */ - .pvr_mask = 0xffff0000, - .pvr_value = 0x70020000, - .cpu_name = "750GX", - .cpu_features = CPU_FTRS_750GX, - .cpu_user_features = COMMON_USER | PPC_FEATURE_PPC_LE, - .mmu_features = MMU_FTR_HPTE_TABLE | MMU_FTR_USE_HIGH_BATS, - .icache_bsize = 32, - .dcache_bsize = 32, - .num_pmcs = 4, - .pmc_type = PPC_PMC_IBM, - .cpu_setup = __setup_cpu_750fx, - .machine_check = machine_check_generic, - .platform = "ppc750", - .oprofile_cpu_type = "ppc/750", - }, - { /* 740/750 (L2CR bit need fixup for 740) */ - .pvr_mask = 0xffff0000, - .pvr_value = 0x00080000, - .cpu_name = "740/750", - .cpu_features = CPU_FTRS_740, - .cpu_user_features = COMMON_USER | PPC_FEATURE_PPC_LE, - .mmu_features = MMU_FTR_HPTE_TABLE, - .icache_bsize = 32, - .dcache_bsize = 32, - .num_pmcs = 4, - .pmc_type = PPC_PMC_IBM, - .cpu_setup = __setup_cpu_750, - .machine_check = machine_check_generic, - .platform = "ppc750", - }, - { /* 7400 rev 1.1 ? (no TAU) */ - .pvr_mask = 0xffffffff, - .pvr_value = 0x000c1101, - .cpu_name = "7400 (1.1)", - .cpu_features = CPU_FTRS_7400_NOTAU, - .cpu_user_features = COMMON_USER | - PPC_FEATURE_HAS_ALTIVEC_COMP | PPC_FEATURE_PPC_LE, - .mmu_features = MMU_FTR_HPTE_TABLE, - .icache_bsize = 32, - .dcache_bsize = 32, - .num_pmcs = 4, - .pmc_type = PPC_PMC_G4, - .cpu_setup = __setup_cpu_7400, - .machine_check = machine_check_generic, - .platform = "ppc7400", - }, - { /* 7400 */ - .pvr_mask = 0xffff0000, - .pvr_value = 0x000c0000, - .cpu_name = "7400", - .cpu_features = CPU_FTRS_7400, - .cpu_user_features = COMMON_USER | - PPC_FEATURE_HAS_ALTIVEC_COMP | PPC_FEATURE_PPC_LE, - .mmu_features = MMU_FTR_HPTE_TABLE, - .icache_bsize = 32, - .dcache_bsize = 32, - .num_pmcs = 4, - .pmc_type = PPC_PMC_G4, - .cpu_setup = __setup_cpu_7400, - .machine_check = machine_check_generic, - .platform = "ppc7400", - }, - { /* 7410 */ - .pvr_mask = 0xffff0000, - .pvr_value = 0x800c0000, - .cpu_name = "7410", - .cpu_features = CPU_FTRS_7400, - .cpu_user_features = COMMON_USER | - PPC_FEATURE_HAS_ALTIVEC_COMP | PPC_FEATURE_PPC_LE, - .mmu_features = MMU_FTR_HPTE_TABLE, - .icache_bsize = 32, - .dcache_bsize = 32, - .num_pmcs = 4, - .pmc_type = PPC_PMC_G4, - .cpu_setup = __setup_cpu_7410, - .machine_check = machine_check_generic, - .platform = "ppc7400", - }, - { /* 7450 2.0 - no doze/nap */ - .pvr_mask = 0xffffffff, - .pvr_value = 0x80000200, - .cpu_name = "7450", - .cpu_features = CPU_FTRS_7450_20, - .cpu_user_features = COMMON_USER | - PPC_FEATURE_HAS_ALTIVEC_COMP | PPC_FEATURE_PPC_LE, - .mmu_features = MMU_FTR_HPTE_TABLE, - .icache_bsize = 32, - .dcache_bsize = 32, - .num_pmcs = 6, - .pmc_type = PPC_PMC_G4, - .cpu_setup = __setup_cpu_745x, - .oprofile_cpu_type = "ppc/7450", - .machine_check = machine_check_generic, - .platform = "ppc7450", - }, - { /* 7450 2.1 */ - .pvr_mask = 0xffffffff, - .pvr_value = 0x80000201, - .cpu_name = "7450", - .cpu_features = CPU_FTRS_7450_21, - .cpu_user_features = COMMON_USER | - PPC_FEATURE_HAS_ALTIVEC_COMP | PPC_FEATURE_PPC_LE, - .mmu_features = MMU_FTR_HPTE_TABLE, - .icache_bsize = 32, - .dcache_bsize = 32, - .num_pmcs = 6, - .pmc_type = PPC_PMC_G4, - .cpu_setup = __setup_cpu_745x, - .oprofile_cpu_type = "ppc/7450", - .machine_check = machine_check_generic, - .platform = "ppc7450", - }, - { /* 7450 2.3 and newer */ - .pvr_mask = 0xffff0000, - .pvr_value = 0x80000000, - .cpu_name = "7450", - .cpu_features = CPU_FTRS_7450_23, - .cpu_user_features = COMMON_USER | - PPC_FEATURE_HAS_ALTIVEC_COMP | PPC_FEATURE_PPC_LE, - .mmu_features = MMU_FTR_HPTE_TABLE, - .icache_bsize = 32, - .dcache_bsize = 32, - .num_pmcs = 6, - .pmc_type = PPC_PMC_G4, - .cpu_setup = __setup_cpu_745x, - .oprofile_cpu_type = "ppc/7450", - .machine_check = machine_check_generic, - .platform = "ppc7450", - }, - { /* 7455 rev 1.x */ - .pvr_mask = 0xffffff00, - .pvr_value = 0x80010100, - .cpu_name = "7455", - .cpu_features = CPU_FTRS_7455_1, - .cpu_user_features = COMMON_USER | - PPC_FEATURE_HAS_ALTIVEC_COMP | PPC_FEATURE_PPC_LE, - .mmu_features = MMU_FTR_HPTE_TABLE | MMU_FTR_USE_HIGH_BATS, - .icache_bsize = 32, - .dcache_bsize = 32, - .num_pmcs = 6, - .pmc_type = PPC_PMC_G4, - .cpu_setup = __setup_cpu_745x, - .oprofile_cpu_type = "ppc/7450", - .machine_check = machine_check_generic, - .platform = "ppc7450", - }, - { /* 7455 rev 2.0 */ - .pvr_mask = 0xffffffff, - .pvr_value = 0x80010200, - .cpu_name = "7455", - .cpu_features = CPU_FTRS_7455_20, - .cpu_user_features = COMMON_USER | - PPC_FEATURE_HAS_ALTIVEC_COMP | PPC_FEATURE_PPC_LE, - .mmu_features = MMU_FTR_HPTE_TABLE | MMU_FTR_USE_HIGH_BATS, - .icache_bsize = 32, - .dcache_bsize = 32, - .num_pmcs = 6, - .pmc_type = PPC_PMC_G4, - .cpu_setup = __setup_cpu_745x, - .oprofile_cpu_type = "ppc/7450", - .machine_check = machine_check_generic, - .platform = "ppc7450", - }, - { /* 7455 others */ - .pvr_mask = 0xffff0000, - .pvr_value = 0x80010000, - .cpu_name = "7455", - .cpu_features = CPU_FTRS_7455, - .cpu_user_features = COMMON_USER | - PPC_FEATURE_HAS_ALTIVEC_COMP | PPC_FEATURE_PPC_LE, - .mmu_features = MMU_FTR_HPTE_TABLE | MMU_FTR_USE_HIGH_BATS, - .icache_bsize = 32, - .dcache_bsize = 32, - .num_pmcs = 6, - .pmc_type = PPC_PMC_G4, - .cpu_setup = __setup_cpu_745x, - .oprofile_cpu_type = "ppc/7450", - .machine_check = machine_check_generic, - .platform = "ppc7450", - }, - { /* 7447/7457 Rev 1.0 */ - .pvr_mask = 0xffffffff, - .pvr_value = 0x80020100, - .cpu_name = "7447/7457", - .cpu_features = CPU_FTRS_7447_10, - .cpu_user_features = COMMON_USER | - PPC_FEATURE_HAS_ALTIVEC_COMP | PPC_FEATURE_PPC_LE, - .mmu_features = MMU_FTR_HPTE_TABLE | MMU_FTR_USE_HIGH_BATS, - .icache_bsize = 32, - .dcache_bsize = 32, - .num_pmcs = 6, - .pmc_type = PPC_PMC_G4, - .cpu_setup = __setup_cpu_745x, - .oprofile_cpu_type = "ppc/7450", - .machine_check = machine_check_generic, - .platform = "ppc7450", - }, - { /* 7447/7457 Rev 1.1 */ - .pvr_mask = 0xffffffff, - .pvr_value = 0x80020101, - .cpu_name = "7447/7457", - .cpu_features = CPU_FTRS_7447_10, - .cpu_user_features = COMMON_USER | - PPC_FEATURE_HAS_ALTIVEC_COMP | PPC_FEATURE_PPC_LE, - .mmu_features = MMU_FTR_HPTE_TABLE | MMU_FTR_USE_HIGH_BATS, - .icache_bsize = 32, - .dcache_bsize = 32, - .num_pmcs = 6, - .pmc_type = PPC_PMC_G4, - .cpu_setup = __setup_cpu_745x, - .oprofile_cpu_type = "ppc/7450", - .machine_check = machine_check_generic, - .platform = "ppc7450", - }, - { /* 7447/7457 Rev 1.2 and later */ - .pvr_mask = 0xffff0000, - .pvr_value = 0x80020000, - .cpu_name = "7447/7457", - .cpu_features = CPU_FTRS_7447, - .cpu_user_features = COMMON_USER | PPC_FEATURE_HAS_ALTIVEC_COMP | PPC_FEATURE_PPC_LE, - .mmu_features = MMU_FTR_HPTE_TABLE | MMU_FTR_USE_HIGH_BATS, - .icache_bsize = 32, - .dcache_bsize = 32, - .num_pmcs = 6, - .pmc_type = PPC_PMC_G4, - .cpu_setup = __setup_cpu_745x, - .oprofile_cpu_type = "ppc/7450", - .machine_check = machine_check_generic, - .platform = "ppc7450", - }, - { /* 7447A */ - .pvr_mask = 0xffff0000, - .pvr_value = 0x80030000, - .cpu_name = "7447A", - .cpu_features = CPU_FTRS_7447A, - .cpu_user_features = COMMON_USER | - PPC_FEATURE_HAS_ALTIVEC_COMP | PPC_FEATURE_PPC_LE, - .mmu_features = MMU_FTR_HPTE_TABLE | MMU_FTR_USE_HIGH_BATS, - .icache_bsize = 32, - .dcache_bsize = 32, - .num_pmcs = 6, - .pmc_type = PPC_PMC_G4, - .cpu_setup = __setup_cpu_745x, - .oprofile_cpu_type = "ppc/7450", - .machine_check = machine_check_generic, - .platform = "ppc7450", - }, - { /* 7448 */ - .pvr_mask = 0xffff0000, - .pvr_value = 0x80040000, - .cpu_name = "7448", - .cpu_features = CPU_FTRS_7448, - .cpu_user_features = COMMON_USER | - PPC_FEATURE_HAS_ALTIVEC_COMP | PPC_FEATURE_PPC_LE, - .mmu_features = MMU_FTR_HPTE_TABLE | MMU_FTR_USE_HIGH_BATS, - .icache_bsize = 32, - .dcache_bsize = 32, - .num_pmcs = 6, - .pmc_type = PPC_PMC_G4, - .cpu_setup = __setup_cpu_745x, - .oprofile_cpu_type = "ppc/7450", - .machine_check = machine_check_generic, - .platform = "ppc7450", - }, -#endif /* CONFIG_PPC_BOOK3S_604 */ -#ifdef CONFIG_PPC_BOOK3S_603 - { /* 603 */ - .pvr_mask = 0xffff0000, - .pvr_value = 0x00030000, - .cpu_name = "603", - .cpu_features = CPU_FTRS_603, - .cpu_user_features = COMMON_USER | PPC_FEATURE_PPC_LE, - .mmu_features = 0, - .icache_bsize = 32, - .dcache_bsize = 32, - .cpu_setup = __setup_cpu_603, - .machine_check = machine_check_generic, - .platform = "ppc603", - }, - { /* 603e */ - .pvr_mask = 0xffff0000, - .pvr_value = 0x00060000, - .cpu_name = "603e", - .cpu_features = CPU_FTRS_603, - .cpu_user_features = COMMON_USER | PPC_FEATURE_PPC_LE, - .mmu_features = 0, - .icache_bsize = 32, - .dcache_bsize = 32, - .cpu_setup = __setup_cpu_603, - .machine_check = machine_check_generic, - .platform = "ppc603", - }, - { /* 603ev */ - .pvr_mask = 0xffff0000, - .pvr_value = 0x00070000, - .cpu_name = "603ev", - .cpu_features = CPU_FTRS_603, - .cpu_user_features = COMMON_USER | PPC_FEATURE_PPC_LE, - .mmu_features = 0, - .icache_bsize = 32, - .dcache_bsize = 32, - .cpu_setup = __setup_cpu_603, - .machine_check = machine_check_generic, - .platform = "ppc603", - }, - { /* 82xx (8240, 8245, 8260 are all 603e cores) */ - .pvr_mask = 0x7fff0000, - .pvr_value = 0x00810000, - .cpu_name = "82xx", - .cpu_features = CPU_FTRS_82XX, - .cpu_user_features = COMMON_USER, - .mmu_features = 0, - .icache_bsize = 32, - .dcache_bsize = 32, - .cpu_setup = __setup_cpu_603, - .machine_check = machine_check_generic, - .platform = "ppc603", - }, - { /* All G2_LE (603e core, plus some) have the same pvr */ - .pvr_mask = 0x7fff0000, - .pvr_value = 0x00820000, - .cpu_name = "G2_LE", - .cpu_features = CPU_FTRS_G2_LE, - .cpu_user_features = COMMON_USER, - .mmu_features = MMU_FTR_USE_HIGH_BATS, - .icache_bsize = 32, - .dcache_bsize = 32, - .cpu_setup = __setup_cpu_603, - .machine_check = machine_check_generic, - .platform = "ppc603", - }, -#ifdef CONFIG_PPC_83xx - { /* e300c1 (a 603e core, plus some) on 83xx */ - .pvr_mask = 0x7fff0000, - .pvr_value = 0x00830000, - .cpu_name = "e300c1", - .cpu_features = CPU_FTRS_E300, - .cpu_user_features = COMMON_USER, - .mmu_features = MMU_FTR_USE_HIGH_BATS, - .icache_bsize = 32, - .dcache_bsize = 32, - .cpu_setup = __setup_cpu_603, - .machine_check = machine_check_83xx, - .platform = "ppc603", - }, - { /* e300c2 (an e300c1 core, plus some, minus FPU) on 83xx */ - .pvr_mask = 0x7fff0000, - .pvr_value = 0x00840000, - .cpu_name = "e300c2", - .cpu_features = CPU_FTRS_E300C2, - .cpu_user_features = PPC_FEATURE_32 | PPC_FEATURE_HAS_MMU, - .mmu_features = MMU_FTR_USE_HIGH_BATS | - MMU_FTR_NEED_DTLB_SW_LRU, - .icache_bsize = 32, - .dcache_bsize = 32, - .cpu_setup = __setup_cpu_603, - .machine_check = machine_check_83xx, - .platform = "ppc603", - }, - { /* e300c3 (e300c1, plus one IU, half cache size) on 83xx */ - .pvr_mask = 0x7fff0000, - .pvr_value = 0x00850000, - .cpu_name = "e300c3", - .cpu_features = CPU_FTRS_E300, - .cpu_user_features = COMMON_USER, - .mmu_features = MMU_FTR_USE_HIGH_BATS | - MMU_FTR_NEED_DTLB_SW_LRU, - .icache_bsize = 32, - .dcache_bsize = 32, - .cpu_setup = __setup_cpu_603, - .machine_check = machine_check_83xx, - .num_pmcs = 4, - .oprofile_cpu_type = "ppc/e300", - .platform = "ppc603", - }, - { /* e300c4 (e300c1, plus one IU) */ - .pvr_mask = 0x7fff0000, - .pvr_value = 0x00860000, - .cpu_name = "e300c4", - .cpu_features = CPU_FTRS_E300, - .cpu_user_features = COMMON_USER, - .mmu_features = MMU_FTR_USE_HIGH_BATS | - MMU_FTR_NEED_DTLB_SW_LRU, - .icache_bsize = 32, - .dcache_bsize = 32, - .cpu_setup = __setup_cpu_603, - .machine_check = machine_check_83xx, - .num_pmcs = 4, - .oprofile_cpu_type = "ppc/e300", - .platform = "ppc603", - }, -#endif -#endif /* CONFIG_PPC_BOOK3S_603 */ -#ifdef CONFIG_PPC_BOOK3S_604 - { /* default match, we assume split I/D cache & TB (non-601)... */ - .pvr_mask = 0x00000000, - .pvr_value = 0x00000000, - .cpu_name = "(generic PPC)", - .cpu_features = CPU_FTRS_CLASSIC32, - .cpu_user_features = COMMON_USER, - .mmu_features = MMU_FTR_HPTE_TABLE, - .icache_bsize = 32, - .dcache_bsize = 32, - .machine_check = machine_check_generic, - .platform = "ppc603", - }, -#endif /* CONFIG_PPC_BOOK3S_604 */ -#endif /* CONFIG_PPC_BOOK3S_32 */ -#ifdef CONFIG_PPC_8xx - { /* 8xx */ - .pvr_mask = 0xffff0000, - .pvr_value = PVR_8xx, - .cpu_name = "8xx", - /* CPU_FTR_MAYBE_CAN_DOZE is possible, - * if the 8xx code is there.... */ - .cpu_features = CPU_FTRS_8XX, - .cpu_user_features = PPC_FEATURE_32 | PPC_FEATURE_HAS_MMU, - .mmu_features = MMU_FTR_TYPE_8xx, - .icache_bsize = 16, - .dcache_bsize = 16, - .machine_check = machine_check_8xx, - .platform = "ppc823", - }, -#endif /* CONFIG_PPC_8xx */ -#ifdef CONFIG_40x - { /* STB 04xxx */ - .pvr_mask = 0xffff0000, - .pvr_value = 0x41810000, - .cpu_name = "STB04xxx", - .cpu_features = CPU_FTRS_40X, - .cpu_user_features = PPC_FEATURE_32 | - PPC_FEATURE_HAS_MMU | PPC_FEATURE_HAS_4xxMAC, - .mmu_features = MMU_FTR_TYPE_40x, - .icache_bsize = 32, - .dcache_bsize = 32, - .machine_check = machine_check_4xx, - .platform = "ppc405", - }, - { /* NP405L */ - .pvr_mask = 0xffff0000, - .pvr_value = 0x41610000, - .cpu_name = "NP405L", - .cpu_features = CPU_FTRS_40X, - .cpu_user_features = PPC_FEATURE_32 | - PPC_FEATURE_HAS_MMU | PPC_FEATURE_HAS_4xxMAC, - .mmu_features = MMU_FTR_TYPE_40x, - .icache_bsize = 32, - .dcache_bsize = 32, - .machine_check = machine_check_4xx, - .platform = "ppc405", - }, - { /* NP4GS3 */ - .pvr_mask = 0xffff0000, - .pvr_value = 0x40B10000, - .cpu_name = "NP4GS3", - .cpu_features = CPU_FTRS_40X, - .cpu_user_features = PPC_FEATURE_32 | - PPC_FEATURE_HAS_MMU | PPC_FEATURE_HAS_4xxMAC, - .mmu_features = MMU_FTR_TYPE_40x, - .icache_bsize = 32, - .dcache_bsize = 32, - .machine_check = machine_check_4xx, - .platform = "ppc405", - }, - { /* NP405H */ - .pvr_mask = 0xffff0000, - .pvr_value = 0x41410000, - .cpu_name = "NP405H", - .cpu_features = CPU_FTRS_40X, - .cpu_user_features = PPC_FEATURE_32 | - PPC_FEATURE_HAS_MMU | PPC_FEATURE_HAS_4xxMAC, - .mmu_features = MMU_FTR_TYPE_40x, - .icache_bsize = 32, - .dcache_bsize = 32, - .machine_check = machine_check_4xx, - .platform = "ppc405", - }, - { /* 405GPr */ - .pvr_mask = 0xffff0000, - .pvr_value = 0x50910000, - .cpu_name = "405GPr", - .cpu_features = CPU_FTRS_40X, - .cpu_user_features = PPC_FEATURE_32 | - PPC_FEATURE_HAS_MMU | PPC_FEATURE_HAS_4xxMAC, - .mmu_features = MMU_FTR_TYPE_40x, - .icache_bsize = 32, - .dcache_bsize = 32, - .machine_check = machine_check_4xx, - .platform = "ppc405", - }, - { /* STBx25xx */ - .pvr_mask = 0xffff0000, - .pvr_value = 0x51510000, - .cpu_name = "STBx25xx", - .cpu_features = CPU_FTRS_40X, - .cpu_user_features = PPC_FEATURE_32 | - PPC_FEATURE_HAS_MMU | PPC_FEATURE_HAS_4xxMAC, - .mmu_features = MMU_FTR_TYPE_40x, - .icache_bsize = 32, - .dcache_bsize = 32, - .machine_check = machine_check_4xx, - .platform = "ppc405", - }, - { /* 405LP */ - .pvr_mask = 0xffff0000, - .pvr_value = 0x41F10000, - .cpu_name = "405LP", - .cpu_features = CPU_FTRS_40X, - .cpu_user_features = PPC_FEATURE_32 | PPC_FEATURE_HAS_MMU, - .mmu_features = MMU_FTR_TYPE_40x, - .icache_bsize = 32, - .dcache_bsize = 32, - .machine_check = machine_check_4xx, - .platform = "ppc405", - }, - { /* 405EP */ - .pvr_mask = 0xffff0000, - .pvr_value = 0x51210000, - .cpu_name = "405EP", - .cpu_features = CPU_FTRS_40X, - .cpu_user_features = PPC_FEATURE_32 | - PPC_FEATURE_HAS_MMU | PPC_FEATURE_HAS_4xxMAC, - .mmu_features = MMU_FTR_TYPE_40x, - .icache_bsize = 32, - .dcache_bsize = 32, - .machine_check = machine_check_4xx, - .platform = "ppc405", - }, - { /* 405EX Rev. A/B with Security */ - .pvr_mask = 0xffff000f, - .pvr_value = 0x12910007, - .cpu_name = "405EX Rev. A/B", - .cpu_features = CPU_FTRS_40X, - .cpu_user_features = PPC_FEATURE_32 | - PPC_FEATURE_HAS_MMU | PPC_FEATURE_HAS_4xxMAC, - .mmu_features = MMU_FTR_TYPE_40x, - .icache_bsize = 32, - .dcache_bsize = 32, - .machine_check = machine_check_4xx, - .platform = "ppc405", - }, - { /* 405EX Rev. C without Security */ - .pvr_mask = 0xffff000f, - .pvr_value = 0x1291000d, - .cpu_name = "405EX Rev. C", - .cpu_features = CPU_FTRS_40X, - .cpu_user_features = PPC_FEATURE_32 | - PPC_FEATURE_HAS_MMU | PPC_FEATURE_HAS_4xxMAC, - .mmu_features = MMU_FTR_TYPE_40x, - .icache_bsize = 32, - .dcache_bsize = 32, - .machine_check = machine_check_4xx, - .platform = "ppc405", - }, - { /* 405EX Rev. C with Security */ - .pvr_mask = 0xffff000f, - .pvr_value = 0x1291000f, - .cpu_name = "405EX Rev. C", - .cpu_features = CPU_FTRS_40X, - .cpu_user_features = PPC_FEATURE_32 | - PPC_FEATURE_HAS_MMU | PPC_FEATURE_HAS_4xxMAC, - .mmu_features = MMU_FTR_TYPE_40x, - .icache_bsize = 32, - .dcache_bsize = 32, - .machine_check = machine_check_4xx, - .platform = "ppc405", - }, - { /* 405EX Rev. D without Security */ - .pvr_mask = 0xffff000f, - .pvr_value = 0x12910003, - .cpu_name = "405EX Rev. D", - .cpu_features = CPU_FTRS_40X, - .cpu_user_features = PPC_FEATURE_32 | - PPC_FEATURE_HAS_MMU | PPC_FEATURE_HAS_4xxMAC, - .mmu_features = MMU_FTR_TYPE_40x, - .icache_bsize = 32, - .dcache_bsize = 32, - .machine_check = machine_check_4xx, - .platform = "ppc405", - }, - { /* 405EX Rev. D with Security */ - .pvr_mask = 0xffff000f, - .pvr_value = 0x12910005, - .cpu_name = "405EX Rev. D", - .cpu_features = CPU_FTRS_40X, - .cpu_user_features = PPC_FEATURE_32 | - PPC_FEATURE_HAS_MMU | PPC_FEATURE_HAS_4xxMAC, - .mmu_features = MMU_FTR_TYPE_40x, - .icache_bsize = 32, - .dcache_bsize = 32, - .machine_check = machine_check_4xx, - .platform = "ppc405", - }, - { /* 405EXr Rev. A/B without Security */ - .pvr_mask = 0xffff000f, - .pvr_value = 0x12910001, - .cpu_name = "405EXr Rev. A/B", - .cpu_features = CPU_FTRS_40X, - .cpu_user_features = PPC_FEATURE_32 | - PPC_FEATURE_HAS_MMU | PPC_FEATURE_HAS_4xxMAC, - .mmu_features = MMU_FTR_TYPE_40x, - .icache_bsize = 32, - .dcache_bsize = 32, - .machine_check = machine_check_4xx, - .platform = "ppc405", - }, - { /* 405EXr Rev. C without Security */ - .pvr_mask = 0xffff000f, - .pvr_value = 0x12910009, - .cpu_name = "405EXr Rev. C", - .cpu_features = CPU_FTRS_40X, - .cpu_user_features = PPC_FEATURE_32 | - PPC_FEATURE_HAS_MMU | PPC_FEATURE_HAS_4xxMAC, - .mmu_features = MMU_FTR_TYPE_40x, - .icache_bsize = 32, - .dcache_bsize = 32, - .machine_check = machine_check_4xx, - .platform = "ppc405", - }, - { /* 405EXr Rev. C with Security */ - .pvr_mask = 0xffff000f, - .pvr_value = 0x1291000b, - .cpu_name = "405EXr Rev. C", - .cpu_features = CPU_FTRS_40X, - .cpu_user_features = PPC_FEATURE_32 | - PPC_FEATURE_HAS_MMU | PPC_FEATURE_HAS_4xxMAC, - .mmu_features = MMU_FTR_TYPE_40x, - .icache_bsize = 32, - .dcache_bsize = 32, - .machine_check = machine_check_4xx, - .platform = "ppc405", - }, - { /* 405EXr Rev. D without Security */ - .pvr_mask = 0xffff000f, - .pvr_value = 0x12910000, - .cpu_name = "405EXr Rev. D", - .cpu_features = CPU_FTRS_40X, - .cpu_user_features = PPC_FEATURE_32 | - PPC_FEATURE_HAS_MMU | PPC_FEATURE_HAS_4xxMAC, - .mmu_features = MMU_FTR_TYPE_40x, - .icache_bsize = 32, - .dcache_bsize = 32, - .machine_check = machine_check_4xx, - .platform = "ppc405", - }, - { /* 405EXr Rev. D with Security */ - .pvr_mask = 0xffff000f, - .pvr_value = 0x12910002, - .cpu_name = "405EXr Rev. D", - .cpu_features = CPU_FTRS_40X, - .cpu_user_features = PPC_FEATURE_32 | - PPC_FEATURE_HAS_MMU | PPC_FEATURE_HAS_4xxMAC, - .mmu_features = MMU_FTR_TYPE_40x, - .icache_bsize = 32, - .dcache_bsize = 32, - .machine_check = machine_check_4xx, - .platform = "ppc405", - }, - { - /* 405EZ */ - .pvr_mask = 0xffff0000, - .pvr_value = 0x41510000, - .cpu_name = "405EZ", - .cpu_features = CPU_FTRS_40X, - .cpu_user_features = PPC_FEATURE_32 | - PPC_FEATURE_HAS_MMU | PPC_FEATURE_HAS_4xxMAC, - .mmu_features = MMU_FTR_TYPE_40x, - .icache_bsize = 32, - .dcache_bsize = 32, - .machine_check = machine_check_4xx, - .platform = "ppc405", - }, - { /* APM8018X */ - .pvr_mask = 0xffff0000, - .pvr_value = 0x7ff11432, - .cpu_name = "APM8018X", - .cpu_features = CPU_FTRS_40X, - .cpu_user_features = PPC_FEATURE_32 | - PPC_FEATURE_HAS_MMU | PPC_FEATURE_HAS_4xxMAC, - .mmu_features = MMU_FTR_TYPE_40x, - .icache_bsize = 32, - .dcache_bsize = 32, - .machine_check = machine_check_4xx, - .platform = "ppc405", - }, - { /* default match */ - .pvr_mask = 0x00000000, - .pvr_value = 0x00000000, - .cpu_name = "(generic 40x PPC)", - .cpu_features = CPU_FTRS_40X, - .cpu_user_features = PPC_FEATURE_32 | - PPC_FEATURE_HAS_MMU | PPC_FEATURE_HAS_4xxMAC, - .mmu_features = MMU_FTR_TYPE_40x, - .icache_bsize = 32, - .dcache_bsize = 32, - .machine_check = machine_check_4xx, - .platform = "ppc405", - } - -#endif /* CONFIG_40x */ -#ifdef CONFIG_44x -#ifndef CONFIG_PPC_47x - { - .pvr_mask = 0xf0000fff, - .pvr_value = 0x40000850, - .cpu_name = "440GR Rev. A", - .cpu_features = CPU_FTRS_44X, - .cpu_user_features = COMMON_USER_BOOKE, - .mmu_features = MMU_FTR_TYPE_44x, - .icache_bsize = 32, - .dcache_bsize = 32, - .machine_check = machine_check_4xx, - .platform = "ppc440", - }, - { /* Use logical PVR for 440EP (logical pvr = pvr | 0x8) */ - .pvr_mask = 0xf0000fff, - .pvr_value = 0x40000858, - .cpu_name = "440EP Rev. A", - .cpu_features = CPU_FTRS_44X, - .cpu_user_features = COMMON_USER_BOOKE | PPC_FEATURE_HAS_FPU, - .mmu_features = MMU_FTR_TYPE_44x, - .icache_bsize = 32, - .dcache_bsize = 32, - .cpu_setup = __setup_cpu_440ep, - .machine_check = machine_check_4xx, - .platform = "ppc440", - }, - { - .pvr_mask = 0xf0000fff, - .pvr_value = 0x400008d3, - .cpu_name = "440GR Rev. B", - .cpu_features = CPU_FTRS_44X, - .cpu_user_features = COMMON_USER_BOOKE | PPC_FEATURE_HAS_FPU, - .mmu_features = MMU_FTR_TYPE_44x, - .icache_bsize = 32, - .dcache_bsize = 32, - .machine_check = machine_check_4xx, - .platform = "ppc440", - }, - { /* Matches both physical and logical PVR for 440EP (logical pvr = pvr | 0x8) */ - .pvr_mask = 0xf0000ff7, - .pvr_value = 0x400008d4, - .cpu_name = "440EP Rev. C", - .cpu_features = CPU_FTRS_44X, - .cpu_user_features = COMMON_USER_BOOKE | PPC_FEATURE_HAS_FPU, - .mmu_features = MMU_FTR_TYPE_44x, - .icache_bsize = 32, - .dcache_bsize = 32, - .cpu_setup = __setup_cpu_440ep, - .machine_check = machine_check_4xx, - .platform = "ppc440", - }, - { /* Use logical PVR for 440EP (logical pvr = pvr | 0x8) */ - .pvr_mask = 0xf0000fff, - .pvr_value = 0x400008db, - .cpu_name = "440EP Rev. B", - .cpu_features = CPU_FTRS_44X, - .cpu_user_features = COMMON_USER_BOOKE | PPC_FEATURE_HAS_FPU, - .mmu_features = MMU_FTR_TYPE_44x, - .icache_bsize = 32, - .dcache_bsize = 32, - .cpu_setup = __setup_cpu_440ep, - .machine_check = machine_check_4xx, - .platform = "ppc440", - }, - { /* 440GRX */ - .pvr_mask = 0xf0000ffb, - .pvr_value = 0x200008D0, - .cpu_name = "440GRX", - .cpu_features = CPU_FTRS_44X, - .cpu_user_features = COMMON_USER_BOOKE, - .mmu_features = MMU_FTR_TYPE_44x, - .icache_bsize = 32, - .dcache_bsize = 32, - .cpu_setup = __setup_cpu_440grx, - .machine_check = machine_check_440A, - .platform = "ppc440", - }, - { /* Use logical PVR for 440EPx (logical pvr = pvr | 0x8) */ - .pvr_mask = 0xf0000ffb, - .pvr_value = 0x200008D8, - .cpu_name = "440EPX", - .cpu_features = CPU_FTRS_44X, - .cpu_user_features = COMMON_USER_BOOKE | PPC_FEATURE_HAS_FPU, - .mmu_features = MMU_FTR_TYPE_44x, - .icache_bsize = 32, - .dcache_bsize = 32, - .cpu_setup = __setup_cpu_440epx, - .machine_check = machine_check_440A, - .platform = "ppc440", - }, - { /* 440GP Rev. B */ - .pvr_mask = 0xf0000fff, - .pvr_value = 0x40000440, - .cpu_name = "440GP Rev. B", - .cpu_features = CPU_FTRS_44X, - .cpu_user_features = COMMON_USER_BOOKE, - .mmu_features = MMU_FTR_TYPE_44x, - .icache_bsize = 32, - .dcache_bsize = 32, - .machine_check = machine_check_4xx, - .platform = "ppc440gp", - }, - { /* 440GP Rev. C */ - .pvr_mask = 0xf0000fff, - .pvr_value = 0x40000481, - .cpu_name = "440GP Rev. C", - .cpu_features = CPU_FTRS_44X, - .cpu_user_features = COMMON_USER_BOOKE, - .mmu_features = MMU_FTR_TYPE_44x, - .icache_bsize = 32, - .dcache_bsize = 32, - .machine_check = machine_check_4xx, - .platform = "ppc440gp", - }, - { /* 440GX Rev. A */ - .pvr_mask = 0xf0000fff, - .pvr_value = 0x50000850, - .cpu_name = "440GX Rev. A", - .cpu_features = CPU_FTRS_44X, - .cpu_user_features = COMMON_USER_BOOKE, - .mmu_features = MMU_FTR_TYPE_44x, - .icache_bsize = 32, - .dcache_bsize = 32, - .cpu_setup = __setup_cpu_440gx, - .machine_check = machine_check_440A, - .platform = "ppc440", - }, - { /* 440GX Rev. B */ - .pvr_mask = 0xf0000fff, - .pvr_value = 0x50000851, - .cpu_name = "440GX Rev. B", - .cpu_features = CPU_FTRS_44X, - .cpu_user_features = COMMON_USER_BOOKE, - .mmu_features = MMU_FTR_TYPE_44x, - .icache_bsize = 32, - .dcache_bsize = 32, - .cpu_setup = __setup_cpu_440gx, - .machine_check = machine_check_440A, - .platform = "ppc440", - }, - { /* 440GX Rev. C */ - .pvr_mask = 0xf0000fff, - .pvr_value = 0x50000892, - .cpu_name = "440GX Rev. C", - .cpu_features = CPU_FTRS_44X, - .cpu_user_features = COMMON_USER_BOOKE, - .mmu_features = MMU_FTR_TYPE_44x, - .icache_bsize = 32, - .dcache_bsize = 32, - .cpu_setup = __setup_cpu_440gx, - .machine_check = machine_check_440A, - .platform = "ppc440", - }, - { /* 440GX Rev. F */ - .pvr_mask = 0xf0000fff, - .pvr_value = 0x50000894, - .cpu_name = "440GX Rev. F", - .cpu_features = CPU_FTRS_44X, - .cpu_user_features = COMMON_USER_BOOKE, - .mmu_features = MMU_FTR_TYPE_44x, - .icache_bsize = 32, - .dcache_bsize = 32, - .cpu_setup = __setup_cpu_440gx, - .machine_check = machine_check_440A, - .platform = "ppc440", - }, - { /* 440SP Rev. A */ - .pvr_mask = 0xfff00fff, - .pvr_value = 0x53200891, - .cpu_name = "440SP Rev. A", - .cpu_features = CPU_FTRS_44X, - .cpu_user_features = COMMON_USER_BOOKE, - .mmu_features = MMU_FTR_TYPE_44x, - .icache_bsize = 32, - .dcache_bsize = 32, - .machine_check = machine_check_4xx, - .platform = "ppc440", - }, - { /* 440SPe Rev. A */ - .pvr_mask = 0xfff00fff, - .pvr_value = 0x53400890, - .cpu_name = "440SPe Rev. A", - .cpu_features = CPU_FTRS_44X, - .cpu_user_features = COMMON_USER_BOOKE, - .mmu_features = MMU_FTR_TYPE_44x, - .icache_bsize = 32, - .dcache_bsize = 32, - .cpu_setup = __setup_cpu_440spe, - .machine_check = machine_check_440A, - .platform = "ppc440", - }, - { /* 440SPe Rev. B */ - .pvr_mask = 0xfff00fff, - .pvr_value = 0x53400891, - .cpu_name = "440SPe Rev. B", - .cpu_features = CPU_FTRS_44X, - .cpu_user_features = COMMON_USER_BOOKE, - .mmu_features = MMU_FTR_TYPE_44x, - .icache_bsize = 32, - .dcache_bsize = 32, - .cpu_setup = __setup_cpu_440spe, - .machine_check = machine_check_440A, - .platform = "ppc440", - }, - { /* 460EX */ - .pvr_mask = 0xffff0006, - .pvr_value = 0x13020002, - .cpu_name = "460EX", - .cpu_features = CPU_FTRS_440x6, - .cpu_user_features = COMMON_USER_BOOKE | PPC_FEATURE_HAS_FPU, - .mmu_features = MMU_FTR_TYPE_44x, - .icache_bsize = 32, - .dcache_bsize = 32, - .cpu_setup = __setup_cpu_460ex, - .machine_check = machine_check_440A, - .platform = "ppc440", - }, - { /* 460EX Rev B */ - .pvr_mask = 0xffff0007, - .pvr_value = 0x13020004, - .cpu_name = "460EX Rev. B", - .cpu_features = CPU_FTRS_440x6, - .cpu_user_features = COMMON_USER_BOOKE | PPC_FEATURE_HAS_FPU, - .mmu_features = MMU_FTR_TYPE_44x, - .icache_bsize = 32, - .dcache_bsize = 32, - .cpu_setup = __setup_cpu_460ex, - .machine_check = machine_check_440A, - .platform = "ppc440", - }, - { /* 460GT */ - .pvr_mask = 0xffff0006, - .pvr_value = 0x13020000, - .cpu_name = "460GT", - .cpu_features = CPU_FTRS_440x6, - .cpu_user_features = COMMON_USER_BOOKE | PPC_FEATURE_HAS_FPU, - .mmu_features = MMU_FTR_TYPE_44x, - .icache_bsize = 32, - .dcache_bsize = 32, - .cpu_setup = __setup_cpu_460gt, - .machine_check = machine_check_440A, - .platform = "ppc440", - }, - { /* 460GT Rev B */ - .pvr_mask = 0xffff0007, - .pvr_value = 0x13020005, - .cpu_name = "460GT Rev. B", - .cpu_features = CPU_FTRS_440x6, - .cpu_user_features = COMMON_USER_BOOKE | PPC_FEATURE_HAS_FPU, - .mmu_features = MMU_FTR_TYPE_44x, - .icache_bsize = 32, - .dcache_bsize = 32, - .cpu_setup = __setup_cpu_460gt, - .machine_check = machine_check_440A, - .platform = "ppc440", - }, - { /* 460SX */ - .pvr_mask = 0xffffff00, - .pvr_value = 0x13541800, - .cpu_name = "460SX", - .cpu_features = CPU_FTRS_44X, - .cpu_user_features = COMMON_USER_BOOKE, - .mmu_features = MMU_FTR_TYPE_44x, - .icache_bsize = 32, - .dcache_bsize = 32, - .cpu_setup = __setup_cpu_460sx, - .machine_check = machine_check_440A, - .platform = "ppc440", - }, - { /* 464 in APM821xx */ - .pvr_mask = 0xfffffff0, - .pvr_value = 0x12C41C80, - .cpu_name = "APM821XX", - .cpu_features = CPU_FTRS_44X, - .cpu_user_features = COMMON_USER_BOOKE | - PPC_FEATURE_HAS_FPU, - .mmu_features = MMU_FTR_TYPE_44x, - .icache_bsize = 32, - .dcache_bsize = 32, - .cpu_setup = __setup_cpu_apm821xx, - .machine_check = machine_check_440A, - .platform = "ppc440", - }, - { /* default match */ - .pvr_mask = 0x00000000, - .pvr_value = 0x00000000, - .cpu_name = "(generic 44x PPC)", - .cpu_features = CPU_FTRS_44X, - .cpu_user_features = COMMON_USER_BOOKE, - .mmu_features = MMU_FTR_TYPE_44x, - .icache_bsize = 32, - .dcache_bsize = 32, - .machine_check = machine_check_4xx, - .platform = "ppc440", - } -#else /* CONFIG_PPC_47x */ - { /* 476 DD2 core */ - .pvr_mask = 0xffffffff, - .pvr_value = 0x11a52080, - .cpu_name = "476", - .cpu_features = CPU_FTRS_47X | CPU_FTR_476_DD2, - .cpu_user_features = COMMON_USER_BOOKE | - PPC_FEATURE_HAS_FPU, - .mmu_features = MMU_FTR_TYPE_47x | - MMU_FTR_USE_TLBIVAX_BCAST | MMU_FTR_LOCK_BCAST_INVAL, - .icache_bsize = 32, - .dcache_bsize = 128, - .machine_check = machine_check_47x, - .platform = "ppc470", - }, - { /* 476fpe */ - .pvr_mask = 0xffff0000, - .pvr_value = 0x7ff50000, - .cpu_name = "476fpe", - .cpu_features = CPU_FTRS_47X | CPU_FTR_476_DD2, - .cpu_user_features = COMMON_USER_BOOKE | - PPC_FEATURE_HAS_FPU, - .mmu_features = MMU_FTR_TYPE_47x | - MMU_FTR_USE_TLBIVAX_BCAST | MMU_FTR_LOCK_BCAST_INVAL, - .icache_bsize = 32, - .dcache_bsize = 128, - .machine_check = machine_check_47x, - .platform = "ppc470", - }, - { /* 476 iss */ - .pvr_mask = 0xffff0000, - .pvr_value = 0x00050000, - .cpu_name = "476", - .cpu_features = CPU_FTRS_47X, - .cpu_user_features = COMMON_USER_BOOKE | - PPC_FEATURE_HAS_FPU, - .mmu_features = MMU_FTR_TYPE_47x | - MMU_FTR_USE_TLBIVAX_BCAST | MMU_FTR_LOCK_BCAST_INVAL, - .icache_bsize = 32, - .dcache_bsize = 128, - .machine_check = machine_check_47x, - .platform = "ppc470", - }, - { /* 476 others */ - .pvr_mask = 0xffff0000, - .pvr_value = 0x11a50000, - .cpu_name = "476", - .cpu_features = CPU_FTRS_47X, - .cpu_user_features = COMMON_USER_BOOKE | - PPC_FEATURE_HAS_FPU, - .mmu_features = MMU_FTR_TYPE_47x | - MMU_FTR_USE_TLBIVAX_BCAST | MMU_FTR_LOCK_BCAST_INVAL, - .icache_bsize = 32, - .dcache_bsize = 128, - .machine_check = machine_check_47x, - .platform = "ppc470", - }, - { /* default match */ - .pvr_mask = 0x00000000, - .pvr_value = 0x00000000, - .cpu_name = "(generic 47x PPC)", - .cpu_features = CPU_FTRS_47X, - .cpu_user_features = COMMON_USER_BOOKE, - .mmu_features = MMU_FTR_TYPE_47x, - .icache_bsize = 32, - .dcache_bsize = 128, - .machine_check = machine_check_47x, - .platform = "ppc470", - } -#endif /* CONFIG_PPC_47x */ -#endif /* CONFIG_44x */ -#endif /* CONFIG_PPC32 */ -#ifdef CONFIG_E500 -#ifdef CONFIG_PPC32 -#ifndef CONFIG_PPC_E500MC - { /* e500 */ - .pvr_mask = 0xffff0000, - .pvr_value = 0x80200000, - .cpu_name = "e500", - .cpu_features = CPU_FTRS_E500, - .cpu_user_features = COMMON_USER_BOOKE | - PPC_FEATURE_HAS_SPE_COMP | - PPC_FEATURE_HAS_EFP_SINGLE_COMP, - .cpu_user_features2 = PPC_FEATURE2_ISEL, - .mmu_features = MMU_FTR_TYPE_FSL_E, - .icache_bsize = 32, - .dcache_bsize = 32, - .num_pmcs = 4, - .oprofile_cpu_type = "ppc/e500", - .cpu_setup = __setup_cpu_e500v1, - .machine_check = machine_check_e500, - .platform = "ppc8540", - }, - { /* e500v2 */ - .pvr_mask = 0xffff0000, - .pvr_value = 0x80210000, - .cpu_name = "e500v2", - .cpu_features = CPU_FTRS_E500_2, - .cpu_user_features = COMMON_USER_BOOKE | - PPC_FEATURE_HAS_SPE_COMP | - PPC_FEATURE_HAS_EFP_SINGLE_COMP | - PPC_FEATURE_HAS_EFP_DOUBLE_COMP, - .cpu_user_features2 = PPC_FEATURE2_ISEL, - .mmu_features = MMU_FTR_TYPE_FSL_E | MMU_FTR_BIG_PHYS, - .icache_bsize = 32, - .dcache_bsize = 32, - .num_pmcs = 4, - .oprofile_cpu_type = "ppc/e500", - .cpu_setup = __setup_cpu_e500v2, - .machine_check = machine_check_e500, - .platform = "ppc8548", - .cpu_down_flush = cpu_down_flush_e500v2, - }, -#else - { /* e500mc */ - .pvr_mask = 0xffff0000, - .pvr_value = 0x80230000, - .cpu_name = "e500mc", - .cpu_features = CPU_FTRS_E500MC, - .cpu_user_features = COMMON_USER_BOOKE | PPC_FEATURE_HAS_FPU, - .cpu_user_features2 = PPC_FEATURE2_ISEL, - .mmu_features = MMU_FTR_TYPE_FSL_E | MMU_FTR_BIG_PHYS | - MMU_FTR_USE_TLBILX, - .icache_bsize = 64, - .dcache_bsize = 64, - .num_pmcs = 4, - .oprofile_cpu_type = "ppc/e500mc", - .cpu_setup = __setup_cpu_e500mc, - .machine_check = machine_check_e500mc, - .platform = "ppce500mc", - .cpu_down_flush = cpu_down_flush_e500mc, - }, -#endif /* CONFIG_PPC_E500MC */ -#endif /* CONFIG_PPC32 */ -#ifdef CONFIG_PPC_E500MC - { /* e5500 */ - .pvr_mask = 0xffff0000, - .pvr_value = 0x80240000, - .cpu_name = "e5500", - .cpu_features = CPU_FTRS_E5500, - .cpu_user_features = COMMON_USER_BOOKE | PPC_FEATURE_HAS_FPU, - .cpu_user_features2 = PPC_FEATURE2_ISEL, - .mmu_features = MMU_FTR_TYPE_FSL_E | MMU_FTR_BIG_PHYS | - MMU_FTR_USE_TLBILX, - .icache_bsize = 64, - .dcache_bsize = 64, - .num_pmcs = 4, - .oprofile_cpu_type = "ppc/e500mc", - .cpu_setup = __setup_cpu_e5500, -#ifndef CONFIG_PPC32 - .cpu_restore = __restore_cpu_e5500, -#endif - .machine_check = machine_check_e500mc, - .platform = "ppce5500", - .cpu_down_flush = cpu_down_flush_e5500, - }, - { /* e6500 */ - .pvr_mask = 0xffff0000, - .pvr_value = 0x80400000, - .cpu_name = "e6500", - .cpu_features = CPU_FTRS_E6500, - .cpu_user_features = COMMON_USER_BOOKE | PPC_FEATURE_HAS_FPU | - PPC_FEATURE_HAS_ALTIVEC_COMP, - .cpu_user_features2 = PPC_FEATURE2_ISEL, - .mmu_features = MMU_FTR_TYPE_FSL_E | MMU_FTR_BIG_PHYS | - MMU_FTR_USE_TLBILX, - .icache_bsize = 64, - .dcache_bsize = 64, - .num_pmcs = 6, - .oprofile_cpu_type = "ppc/e6500", - .cpu_setup = __setup_cpu_e6500, -#ifndef CONFIG_PPC32 - .cpu_restore = __restore_cpu_e6500, -#endif - .machine_check = machine_check_e500mc, - .platform = "ppce6500", - .cpu_down_flush = cpu_down_flush_e6500, - }, -#endif /* CONFIG_PPC_E500MC */ -#ifdef CONFIG_PPC32 - { /* default match */ - .pvr_mask = 0x00000000, - .pvr_value = 0x00000000, - .cpu_name = "(generic E500 PPC)", - .cpu_features = CPU_FTRS_E500, - .cpu_user_features = COMMON_USER_BOOKE | - PPC_FEATURE_HAS_SPE_COMP | - PPC_FEATURE_HAS_EFP_SINGLE_COMP, - .mmu_features = MMU_FTR_TYPE_FSL_E, - .icache_bsize = 32, - .dcache_bsize = 32, - .machine_check = machine_check_e500, - .platform = "powerpc", - } -#endif /* CONFIG_PPC32 */ -#endif /* CONFIG_E500 */ -}; +#include "cpu_specs.h" void __init set_cur_cpu_spec(struct cpu_spec *s) { @@ -2033,25 +69,16 @@ static struct cpu_spec * __init setup_cpu_spec(unsigned long offset, t->pmc_type = old.pmc_type; /* - * If we have passed through this logic once before and - * have pulled the default case because the real PVR was - * not found inside cpu_specs[], then we are possibly - * running in compatibility mode. In that case, let the - * oprofiler know which set of compatibility counters to - * pull from by making sure the oprofile_cpu_type string - * is set to that of compatibility mode. If the - * oprofile_cpu_type already has a value, then we are - * possibly overriding a real PVR with a logical one, - * and, in that case, keep the current value for - * oprofile_cpu_type. Furthermore, let's ensure that the + * Let's ensure that the * fix for the PMAO bug is enabled on compatibility mode. */ - if (old.oprofile_cpu_type != NULL) { - t->oprofile_cpu_type = old.oprofile_cpu_type; - t->cpu_features |= old.cpu_features & CPU_FTR_PMAO_BUG; - } + t->cpu_features |= old.cpu_features & CPU_FTR_PMAO_BUG; } + /* Set kuap ON at startup, will be disabled later if cmdline has 'nosmap' */ + if (IS_ENABLED(CONFIG_PPC_KUAP) && IS_ENABLED(CONFIG_PPC32)) + t->mmu_features |= MMU_FTR_KUAP; + *PTRRELOC(&cur_cpu_spec) = &the_cpu_spec; /* @@ -2081,6 +108,8 @@ struct cpu_spec * __init identify_cpu(unsigned long offset, unsigned int pvr) struct cpu_spec *s = cpu_specs; int i; + BUILD_BUG_ON(!ARRAY_SIZE(cpu_specs)); + s = PTRRELOC(s); for (i = 0; i < ARRAY_SIZE(cpu_specs); i++,s++) { diff --git a/arch/powerpc/kernel/crash_dump.c b/arch/powerpc/kernel/crash_dump.c index 9a3b85bfc83f..103b6605dd68 100644 --- a/arch/powerpc/kernel/crash_dump.c +++ b/arch/powerpc/kernel/crash_dump.c @@ -13,12 +13,13 @@ #include <linux/io.h> #include <linux/memblock.h> #include <linux/of.h> -#include <asm/code-patching.h> +#include <asm/text-patching.h> #include <asm/kdump.h> #include <asm/firmware.h> #include <linux/uio.h> #include <asm/rtas.h> #include <asm/inst.h> +#include <asm/fadump.h> #ifdef DEBUG #include <asm/udbg.h> @@ -92,6 +93,17 @@ ssize_t copy_oldmem_page(struct iov_iter *iter, unsigned long pfn, return csize; } +/* + * Return true only when kexec based kernel dump capturing method is used. + * This ensures all restritions applied for kdump case are not automatically + * applied for fadump case. + */ +bool is_kdump_kernel(void) +{ + return !is_fadump_active() && elfcorehdr_addr != ELFCORE_ADDR_MAX; +} +EXPORT_SYMBOL_GPL(is_kdump_kernel); + #ifdef CONFIG_PPC_RTAS /* * The crashkernel region will almost always overlap the RTAS region, so diff --git a/arch/powerpc/kernel/dawr.c b/arch/powerpc/kernel/dawr.c index 30d4eca88d17..909a05cd2809 100644 --- a/arch/powerpc/kernel/dawr.c +++ b/arch/powerpc/kernel/dawr.c @@ -11,6 +11,7 @@ #include <linux/debugfs.h> #include <asm/machdep.h> #include <asm/hvcall.h> +#include <asm/firmware.h> bool dawr_force_enable; EXPORT_SYMBOL_GPL(dawr_force_enable); diff --git a/arch/powerpc/kernel/dbell.c b/arch/powerpc/kernel/dbell.c index f55c6fb34a3a..5712dd846263 100644 --- a/arch/powerpc/kernel/dbell.c +++ b/arch/powerpc/kernel/dbell.c @@ -27,7 +27,7 @@ DEFINE_INTERRUPT_HANDLER_ASYNC(doorbell_exception) ppc_msgsync(); - if (should_hard_irq_enable()) + if (should_hard_irq_enable(regs)) do_hard_irq_enable(); kvmppc_clear_host_ipi(smp_processor_id()); diff --git a/arch/powerpc/kernel/dexcr.c b/arch/powerpc/kernel/dexcr.c new file mode 100644 index 000000000000..3a0358e91c60 --- /dev/null +++ b/arch/powerpc/kernel/dexcr.c @@ -0,0 +1,124 @@ +// SPDX-License-Identifier: GPL-2.0-or-later + +#include <linux/capability.h> +#include <linux/cpu.h> +#include <linux/init.h> +#include <linux/prctl.h> +#include <linux/sched.h> + +#include <asm/cpu_has_feature.h> +#include <asm/cputable.h> +#include <asm/processor.h> +#include <asm/reg.h> + +static int __init init_task_dexcr(void) +{ + if (!early_cpu_has_feature(CPU_FTR_ARCH_31)) + return 0; + + current->thread.dexcr_onexec = mfspr(SPRN_DEXCR); + + return 0; +} +early_initcall(init_task_dexcr) + +/* Allow thread local configuration of these by default */ +#define DEXCR_PRCTL_EDITABLE ( \ + DEXCR_PR_IBRTPD | \ + DEXCR_PR_SRAPD | \ + DEXCR_PR_NPHIE) + +static int prctl_to_aspect(unsigned long which, unsigned int *aspect) +{ + switch (which) { + case PR_PPC_DEXCR_SBHE: + *aspect = DEXCR_PR_SBHE; + break; + case PR_PPC_DEXCR_IBRTPD: + *aspect = DEXCR_PR_IBRTPD; + break; + case PR_PPC_DEXCR_SRAPD: + *aspect = DEXCR_PR_SRAPD; + break; + case PR_PPC_DEXCR_NPHIE: + *aspect = DEXCR_PR_NPHIE; + break; + default: + return -ENODEV; + } + + return 0; +} + +int get_dexcr_prctl(struct task_struct *task, unsigned long which) +{ + unsigned int aspect; + int ret; + + ret = prctl_to_aspect(which, &aspect); + if (ret) + return ret; + + if (aspect & DEXCR_PRCTL_EDITABLE) + ret |= PR_PPC_DEXCR_CTRL_EDITABLE; + + if (aspect & mfspr(SPRN_DEXCR)) + ret |= PR_PPC_DEXCR_CTRL_SET; + else + ret |= PR_PPC_DEXCR_CTRL_CLEAR; + + if (aspect & task->thread.dexcr_onexec) + ret |= PR_PPC_DEXCR_CTRL_SET_ONEXEC; + else + ret |= PR_PPC_DEXCR_CTRL_CLEAR_ONEXEC; + + return ret; +} + +int set_dexcr_prctl(struct task_struct *task, unsigned long which, unsigned long ctrl) +{ + unsigned long dexcr; + unsigned int aspect; + int err = 0; + + err = prctl_to_aspect(which, &aspect); + if (err) + return err; + + if (!(aspect & DEXCR_PRCTL_EDITABLE)) + return -EPERM; + + if (ctrl & ~PR_PPC_DEXCR_CTRL_MASK) + return -EINVAL; + + if (ctrl & PR_PPC_DEXCR_CTRL_SET && ctrl & PR_PPC_DEXCR_CTRL_CLEAR) + return -EINVAL; + + if (ctrl & PR_PPC_DEXCR_CTRL_SET_ONEXEC && ctrl & PR_PPC_DEXCR_CTRL_CLEAR_ONEXEC) + return -EINVAL; + + /* + * We do not want an unprivileged process being able to disable + * a setuid process's hash check instructions + */ + if (aspect == DEXCR_PR_NPHIE && + ctrl & PR_PPC_DEXCR_CTRL_CLEAR_ONEXEC && + !capable(CAP_SYS_ADMIN)) + return -EPERM; + + dexcr = mfspr(SPRN_DEXCR); + + if (ctrl & PR_PPC_DEXCR_CTRL_SET) + dexcr |= aspect; + else if (ctrl & PR_PPC_DEXCR_CTRL_CLEAR) + dexcr &= ~aspect; + + if (ctrl & PR_PPC_DEXCR_CTRL_SET_ONEXEC) + task->thread.dexcr_onexec |= aspect; + else if (ctrl & PR_PPC_DEXCR_CTRL_CLEAR_ONEXEC) + task->thread.dexcr_onexec &= ~aspect; + + mtspr(SPRN_DEXCR, dexcr); + + return 0; +} diff --git a/arch/powerpc/kernel/dma-iommu.c b/arch/powerpc/kernel/dma-iommu.c index 038ce8d9061d..4d64a5db50f3 100644 --- a/arch/powerpc/kernel/dma-iommu.c +++ b/arch/powerpc/kernel/dma-iommu.c @@ -136,7 +136,7 @@ static bool dma_iommu_bypass_supported(struct device *dev, u64 mask) struct pci_dev *pdev = to_pci_dev(dev); struct pci_controller *phb = pci_bus_to_host(pdev->bus); - if (iommu_fixed_is_weak || !phb->controller_ops.iommu_bypass_supported) + if (!phb->controller_ops.iommu_bypass_supported) return false; return phb->controller_ops.iommu_bypass_supported(pdev, mask); } @@ -144,7 +144,7 @@ static bool dma_iommu_bypass_supported(struct device *dev, u64 mask) /* We support DMA to/from any memory page via the iommu */ int dma_iommu_dma_supported(struct device *dev, u64 mask) { - struct iommu_table *tbl = get_iommu_table_base(dev); + struct iommu_table *tbl; if (dev_is_pci(dev) && dma_iommu_bypass_supported(dev, mask)) { /* @@ -162,6 +162,8 @@ int dma_iommu_dma_supported(struct device *dev, u64 mask) return 1; } + tbl = get_iommu_table_base(dev); + if (!tbl) { dev_err(dev, "Warning: IOMMU dma not supported: mask 0x%08llx, table unavailable\n", mask); return 0; @@ -214,6 +216,6 @@ const struct dma_map_ops dma_iommu_ops = { .get_required_mask = dma_iommu_get_required_mask, .mmap = dma_common_mmap, .get_sgtable = dma_common_get_sgtable, - .alloc_pages = dma_common_alloc_pages, + .alloc_pages_op = dma_common_alloc_pages, .free_pages = dma_common_free_pages, }; diff --git a/arch/powerpc/kernel/dma-mask.c b/arch/powerpc/kernel/dma-mask.c index ffbbbc432612..5b07ca7b73aa 100644 --- a/arch/powerpc/kernel/dma-mask.c +++ b/arch/powerpc/kernel/dma-mask.c @@ -1,6 +1,7 @@ // SPDX-License-Identifier: GPL-2.0 #include <linux/dma-mapping.h> +#include <linux/dma-map-ops.h> #include <linux/export.h> #include <asm/machdep.h> diff --git a/arch/powerpc/kernel/dt_cpu_ftrs.c b/arch/powerpc/kernel/dt_cpu_ftrs.c index 2ad365c21afa..3af6c06af02f 100644 --- a/arch/powerpc/kernel/dt_cpu_ftrs.c +++ b/arch/powerpc/kernel/dt_cpu_ftrs.c @@ -102,7 +102,6 @@ static struct cpu_spec __initdata base_cpu_spec = { .dcache_bsize = 32, /* cache info init. */ .num_pmcs = 0, .pmc_type = PPC_PMC_DEFAULT, - .oprofile_cpu_type = NULL, .cpu_setup = NULL, .cpu_restore = __restore_cpu_cpufeatures, .machine_check_early = NULL, @@ -387,7 +386,6 @@ static int __init feat_enable_pmu_power8(struct dt_cpu_feature *f) cur_cpu_spec->num_pmcs = 6; cur_cpu_spec->pmc_type = PPC_PMC_IBM; - cur_cpu_spec->oprofile_cpu_type = "ppc64/power8"; return 1; } @@ -423,7 +421,6 @@ static int __init feat_enable_pmu_power9(struct dt_cpu_feature *f) cur_cpu_spec->num_pmcs = 6; cur_cpu_spec->pmc_type = PPC_PMC_IBM; - cur_cpu_spec->oprofile_cpu_type = "ppc64/power9"; return 1; } @@ -449,7 +446,6 @@ static int __init feat_enable_pmu_power10(struct dt_cpu_feature *f) cur_cpu_spec->num_pmcs = 6; cur_cpu_spec->pmc_type = PPC_PMC_IBM; - cur_cpu_spec->oprofile_cpu_type = "ppc64/power10"; return 1; } @@ -462,6 +458,14 @@ static int __init feat_enable_mce_power10(struct dt_cpu_feature *f) return 1; } +static int __init feat_enable_mce_power11(struct dt_cpu_feature *f) +{ + cur_cpu_spec->platform = "power11"; + cur_cpu_spec->machine_check_early = __machine_check_early_realmode_p10; + + return 1; +} + static int __init feat_enable_tm(struct dt_cpu_feature *f) { #ifdef CONFIG_PPC_TRANSACTIONAL_MEM @@ -652,8 +656,10 @@ static struct dt_cpu_feature_match __initdata {"pc-relative-addressing", feat_enable, 0}, {"machine-check-power9", feat_enable_mce_power9, 0}, {"machine-check-power10", feat_enable_mce_power10, 0}, + {"machine-check-power11", feat_enable_mce_power11, 0}, {"performance-monitor-power9", feat_enable_pmu_power9, 0}, {"performance-monitor-power10", feat_enable_pmu_power10, 0}, + {"performance-monitor-power11", feat_enable_pmu_power10, 0}, {"event-based-branch-v3", feat_enable, 0}, {"random-number-generator", feat_enable, 0}, {"system-call-vectored", feat_disable, 0}, @@ -861,7 +867,7 @@ bool __init dt_cpu_ftrs_init(void *fdt) using_dt_cpu_ftrs = false; /* Setup and verify the FDT, if it fails we just bail */ - if (!early_init_dt_verify(fdt)) + if (!early_init_dt_verify(fdt, __pa(fdt))) return false; if (!of_scan_flat_dt(fdt_find_cpu_features, NULL)) @@ -1081,12 +1087,10 @@ static int __init dt_cpu_ftrs_scan_callback(unsigned long node, const char /* Count and allocate space for cpu features */ of_scan_flat_dt_subnodes(node, count_cpufeatures_subnodes, &nr_dt_cpu_features); - dt_cpu_features = memblock_alloc(sizeof(struct dt_cpu_feature) * nr_dt_cpu_features, PAGE_SIZE); - if (!dt_cpu_features) - panic("%s: Failed to allocate %zu bytes align=0x%lx\n", - __func__, - sizeof(struct dt_cpu_feature) * nr_dt_cpu_features, - PAGE_SIZE); + dt_cpu_features = + memblock_alloc_or_panic( + sizeof(struct dt_cpu_feature) * nr_dt_cpu_features, + PAGE_SIZE); cpufeatures_setup_start(isa); @@ -1103,7 +1107,7 @@ static int __init dt_cpu_ftrs_scan_callback(unsigned long node, const char prop = of_get_flat_dt_prop(node, "display-name", NULL); if (prop && strlen((char *)prop) != 0) { - strlcpy(dt_cpu_name, (char *)prop, sizeof(dt_cpu_name)); + strscpy(dt_cpu_name, (char *)prop, sizeof(dt_cpu_name)); cur_cpu_spec->cpu_name = dt_cpu_name; } diff --git a/arch/powerpc/kernel/eeh.c b/arch/powerpc/kernel/eeh.c index ab316e155ea9..bb836f02101c 100644 --- a/arch/powerpc/kernel/eeh.c +++ b/arch/powerpc/kernel/eeh.c @@ -506,9 +506,18 @@ int eeh_dev_check_failure(struct eeh_dev *edev) * We will punt with the following conditions: Failure to get * PE's state, EEH not support and Permanently unavailable * state, PE is in good state. + * + * On the pSeries, after reaching the threshold, get_state might + * return EEH_STATE_NOT_SUPPORT. However, it's possible that the + * device state remains uncleared if the device is not marked + * pci_channel_io_perm_failure. Therefore, consider logging the + * event to let device removal happen. + * */ if ((ret < 0) || - (ret == EEH_STATE_NOT_SUPPORT) || eeh_state_active(ret)) { + (ret == EEH_STATE_NOT_SUPPORT && + dev->error_state == pci_channel_io_perm_failure) || + eeh_state_active(ret)) { eeh_stats.false_positives++; pe->false_positives++; rc = 0; @@ -1130,6 +1139,7 @@ int eeh_unfreeze_pe(struct eeh_pe *pe) return ret; } +EXPORT_SYMBOL_GPL(eeh_unfreeze_pe); static struct pci_device_id eeh_reset_ids[] = { @@ -1199,16 +1209,16 @@ int eeh_dev_open(struct pci_dev *pdev) struct eeh_dev *edev; int ret = -ENODEV; - mutex_lock(&eeh_dev_mutex); + guard(mutex)(&eeh_dev_mutex); /* No PCI device ? */ if (!pdev) - goto out; + return ret; /* No EEH device or PE ? */ edev = pci_dev_to_eeh_dev(pdev); if (!edev || !edev->pe) - goto out; + return ret; /* * The PE might have been put into frozen state, but we @@ -1218,16 +1228,12 @@ int eeh_dev_open(struct pci_dev *pdev) */ ret = eeh_pe_change_owner(edev->pe); if (ret) - goto out; + return ret; /* Increase PE's pass through count */ atomic_inc(&edev->pe->pass_dev_cnt); - mutex_unlock(&eeh_dev_mutex); return 0; -out: - mutex_unlock(&eeh_dev_mutex); - return ret; } EXPORT_SYMBOL_GPL(eeh_dev_open); @@ -1243,43 +1249,25 @@ void eeh_dev_release(struct pci_dev *pdev) { struct eeh_dev *edev; - mutex_lock(&eeh_dev_mutex); + guard(mutex)(&eeh_dev_mutex); /* No PCI device ? */ if (!pdev) - goto out; + return; /* No EEH device ? */ edev = pci_dev_to_eeh_dev(pdev); if (!edev || !edev->pe || !eeh_pe_passed(edev->pe)) - goto out; + return; /* Decrease PE's pass through count */ WARN_ON(atomic_dec_if_positive(&edev->pe->pass_dev_cnt) < 0); eeh_pe_change_owner(edev->pe); -out: - mutex_unlock(&eeh_dev_mutex); } EXPORT_SYMBOL(eeh_dev_release); #ifdef CONFIG_IOMMU_API -static int dev_has_iommu_table(struct device *dev, void *data) -{ - struct pci_dev *pdev = to_pci_dev(dev); - struct pci_dev **ppdev = data; - - if (!dev) - return 0; - - if (device_iommu_mapped(dev)) { - *ppdev = pdev; - return 1; - } - - return 0; -} - /** * eeh_iommu_group_to_pe - Convert IOMMU group to EEH PE * @group: IOMMU group @@ -1516,6 +1504,8 @@ int eeh_pe_configure(struct eeh_pe *pe) /* Invalid PE ? */ if (!pe) return -ENODEV; + else + ret = eeh_ops->configure_bridge(pe); return ret; } @@ -1544,10 +1534,6 @@ int eeh_pe_inject_err(struct eeh_pe *pe, int type, int func, if (!eeh_ops || !eeh_ops->err_inject) return -ENOENT; - /* Check on PCI error type */ - if (type != EEH_ERR_TYPE_32 && type != EEH_ERR_TYPE_64) - return -EINVAL; - /* Check on PCI error function */ if (func < EEH_ERR_FUNC_MIN || func > EEH_ERR_FUNC_MAX) return -EINVAL; @@ -1585,6 +1571,104 @@ static int proc_eeh_show(struct seq_file *m, void *v) } #endif /* CONFIG_PROC_FS */ +static int eeh_break_device(struct pci_dev *pdev) +{ + struct resource *bar = NULL; + void __iomem *mapped; + u16 old, bit; + int i, pos; + + /* Do we have an MMIO BAR to disable? */ + for (i = 0; i <= PCI_STD_RESOURCE_END; i++) { + struct resource *r = &pdev->resource[i]; + + if (!r->flags || !r->start) + continue; + if (r->flags & IORESOURCE_IO) + continue; + if (r->flags & IORESOURCE_UNSET) + continue; + + bar = r; + break; + } + + if (!bar) { + pci_err(pdev, "Unable to find Memory BAR to cause EEH with\n"); + return -ENXIO; + } + + pci_err(pdev, "Going to break: %pR\n", bar); + + if (pdev->is_virtfn) { +#ifndef CONFIG_PCI_IOV + return -ENXIO; +#else + /* + * VFs don't have a per-function COMMAND register, so the best + * we can do is clear the Memory Space Enable bit in the PF's + * SRIOV control reg. + * + * Unfortunately, this requires that we have a PF (i.e doesn't + * work for a passed-through VF) and it has the potential side + * effect of also causing an EEH on every other VF under the + * PF. Oh well. + */ + pdev = pdev->physfn; + if (!pdev) + return -ENXIO; /* passed through VFs have no PF */ + + pos = pci_find_ext_capability(pdev, PCI_EXT_CAP_ID_SRIOV); + pos += PCI_SRIOV_CTRL; + bit = PCI_SRIOV_CTRL_MSE; +#endif /* !CONFIG_PCI_IOV */ + } else { + bit = PCI_COMMAND_MEMORY; + pos = PCI_COMMAND; + } + + /* + * Process here is: + * + * 1. Disable Memory space. + * + * 2. Perform an MMIO to the device. This should result in an error + * (CA / UR) being raised by the device which results in an EEH + * PE freeze. Using the in_8() accessor skips the eeh detection hook + * so the freeze hook so the EEH Detection machinery won't be + * triggered here. This is to match the usual behaviour of EEH + * where the HW will asynchronously freeze a PE and it's up to + * the kernel to notice and deal with it. + * + * 3. Turn Memory space back on. This is more important for VFs + * since recovery will probably fail if we don't. For normal + * the COMMAND register is reset as a part of re-initialising + * the device. + * + * Breaking stuff is the point so who cares if it's racy ;) + */ + pci_read_config_word(pdev, pos, &old); + + mapped = ioremap(bar->start, PAGE_SIZE); + if (!mapped) { + pci_err(pdev, "Unable to map MMIO BAR %pR\n", bar); + return -ENXIO; + } + + pci_write_config_word(pdev, pos, old & ~bit); + in_8(mapped); + pci_write_config_word(pdev, pos, old); + + iounmap(mapped); + + return 0; +} + +int eeh_pe_inject_mmio_error(struct pci_dev *pdev) +{ + return eeh_break_device(pdev); +} + #ifdef CONFIG_DEBUG_FS @@ -1689,7 +1773,6 @@ static ssize_t eeh_force_recover_write(struct file *filp, static const struct file_operations eeh_force_recover_fops = { .open = simple_open, - .llseek = no_llseek, .write = eeh_force_recover_write, }; @@ -1733,104 +1816,10 @@ static ssize_t eeh_dev_check_write(struct file *filp, static const struct file_operations eeh_dev_check_fops = { .open = simple_open, - .llseek = no_llseek, .write = eeh_dev_check_write, .read = eeh_debugfs_dev_usage, }; -static int eeh_debugfs_break_device(struct pci_dev *pdev) -{ - struct resource *bar = NULL; - void __iomem *mapped; - u16 old, bit; - int i, pos; - - /* Do we have an MMIO BAR to disable? */ - for (i = 0; i <= PCI_STD_RESOURCE_END; i++) { - struct resource *r = &pdev->resource[i]; - - if (!r->flags || !r->start) - continue; - if (r->flags & IORESOURCE_IO) - continue; - if (r->flags & IORESOURCE_UNSET) - continue; - - bar = r; - break; - } - - if (!bar) { - pci_err(pdev, "Unable to find Memory BAR to cause EEH with\n"); - return -ENXIO; - } - - pci_err(pdev, "Going to break: %pR\n", bar); - - if (pdev->is_virtfn) { -#ifndef CONFIG_PCI_IOV - return -ENXIO; -#else - /* - * VFs don't have a per-function COMMAND register, so the best - * we can do is clear the Memory Space Enable bit in the PF's - * SRIOV control reg. - * - * Unfortunately, this requires that we have a PF (i.e doesn't - * work for a passed-through VF) and it has the potential side - * effect of also causing an EEH on every other VF under the - * PF. Oh well. - */ - pdev = pdev->physfn; - if (!pdev) - return -ENXIO; /* passed through VFs have no PF */ - - pos = pci_find_ext_capability(pdev, PCI_EXT_CAP_ID_SRIOV); - pos += PCI_SRIOV_CTRL; - bit = PCI_SRIOV_CTRL_MSE; -#endif /* !CONFIG_PCI_IOV */ - } else { - bit = PCI_COMMAND_MEMORY; - pos = PCI_COMMAND; - } - - /* - * Process here is: - * - * 1. Disable Memory space. - * - * 2. Perform an MMIO to the device. This should result in an error - * (CA / UR) being raised by the device which results in an EEH - * PE freeze. Using the in_8() accessor skips the eeh detection hook - * so the freeze hook so the EEH Detection machinery won't be - * triggered here. This is to match the usual behaviour of EEH - * where the HW will asynchronously freeze a PE and it's up to - * the kernel to notice and deal with it. - * - * 3. Turn Memory space back on. This is more important for VFs - * since recovery will probably fail if we don't. For normal - * the COMMAND register is reset as a part of re-initialising - * the device. - * - * Breaking stuff is the point so who cares if it's racy ;) - */ - pci_read_config_word(pdev, pos, &old); - - mapped = ioremap(bar->start, PAGE_SIZE); - if (!mapped) { - pci_err(pdev, "Unable to map MMIO BAR %pR\n", bar); - return -ENXIO; - } - - pci_write_config_word(pdev, pos, old & ~bit); - in_8(mapped); - pci_write_config_word(pdev, pos, old); - - iounmap(mapped); - - return 0; -} - static ssize_t eeh_dev_break_write(struct file *filp, const char __user *user_buf, size_t count, loff_t *ppos) @@ -1842,7 +1831,7 @@ static ssize_t eeh_dev_break_write(struct file *filp, if (IS_ERR(pdev)) return PTR_ERR(pdev); - ret = eeh_debugfs_break_device(pdev); + ret = eeh_break_device(pdev); pci_dev_put(pdev); if (ret < 0) @@ -1853,7 +1842,6 @@ static ssize_t eeh_dev_break_write(struct file *filp, static const struct file_operations eeh_dev_break_fops = { .open = simple_open, - .llseek = no_llseek, .write = eeh_dev_break_write, .read = eeh_debugfs_dev_usage, }; @@ -1900,7 +1888,6 @@ static ssize_t eeh_dev_can_recover(struct file *filp, static const struct file_operations eeh_dev_can_recover_fops = { .open = simple_open, - .llseek = no_llseek, .write = eeh_dev_can_recover, .read = eeh_debugfs_dev_usage, }; diff --git a/arch/powerpc/kernel/eeh_driver.c b/arch/powerpc/kernel/eeh_driver.c index 260273e56431..48ad0116f359 100644 --- a/arch/powerpc/kernel/eeh_driver.c +++ b/arch/powerpc/kernel/eeh_driver.c @@ -39,7 +39,7 @@ static int eeh_result_priority(enum pci_ers_result result) case PCI_ERS_RESULT_NEED_RESET: return 6; default: - WARN_ONCE(1, "Unknown pci_ers_result value: %d\n", (int)result); + WARN_ONCE(1, "Unknown pci_ers_result value: %d\n", result); return 0; } }; @@ -60,7 +60,7 @@ static const char *pci_ers_result_name(enum pci_ers_result result) case PCI_ERS_RESULT_NO_AER_DRIVER: return "no AER driver"; default: - WARN_ONCE(1, "Unknown result type: %d\n", (int)result); + WARN_ONCE(1, "Unknown result type: %d\n", result); return "unknown"; } }; @@ -257,13 +257,12 @@ static void eeh_pe_report_edev(struct eeh_dev *edev, eeh_report_fn fn, struct pci_driver *driver; enum pci_ers_result new_result; - pci_lock_rescan_remove(); pdev = edev->pdev; if (pdev) get_device(&pdev->dev); - pci_unlock_rescan_remove(); if (!pdev) { eeh_edev_info(edev, "no device"); + *result = PCI_ERS_RESULT_DISCONNECT; return; } device_lock(&pdev->dev); @@ -304,8 +303,9 @@ static void eeh_pe_report(const char *name, struct eeh_pe *root, struct eeh_dev *edev, *tmp; pr_info("EEH: Beginning: '%s'\n", name); - eeh_for_each_pe(root, pe) eeh_pe_for_each_dev(pe, edev, tmp) - eeh_pe_report_edev(edev, fn, result); + eeh_for_each_pe(root, pe) + eeh_pe_for_each_dev(pe, edev, tmp) + eeh_pe_report_edev(edev, fn, result); if (result) pr_info("EEH: Finished:'%s' with aggregate recovery state:'%s'\n", name, pci_ers_result_name(*result)); @@ -383,6 +383,8 @@ static void eeh_dev_restore_state(struct eeh_dev *edev, void *userdata) if (!edev) return; + pci_lock_rescan_remove(); + /* * The content in the config space isn't saved because * the blocked config space on some adapters. We have @@ -393,14 +395,19 @@ static void eeh_dev_restore_state(struct eeh_dev *edev, void *userdata) if (list_is_last(&edev->entry, &edev->pe->edevs)) eeh_pe_restore_bars(edev->pe); + pci_unlock_rescan_remove(); return; } pdev = eeh_dev_to_pci_dev(edev); - if (!pdev) + if (!pdev) { + pci_unlock_rescan_remove(); return; + } pci_restore_state(pdev); + + pci_unlock_rescan_remove(); } /** @@ -647,9 +654,7 @@ static int eeh_reset_device(struct eeh_pe *pe, struct pci_bus *bus, if (any_passed || driver_eeh_aware || (pe->type & EEH_PE_VF)) { eeh_pe_dev_traverse(pe, eeh_rmv_device, rmv_data); } else { - pci_lock_rescan_remove(); pci_hp_remove_devices(bus); - pci_unlock_rescan_remove(); } /* @@ -665,8 +670,6 @@ static int eeh_reset_device(struct eeh_pe *pe, struct pci_bus *bus, if (rc) return rc; - pci_lock_rescan_remove(); - /* Restore PE */ eeh_ops->configure_bridge(pe); eeh_pe_restore_bars(pe); @@ -674,7 +677,6 @@ static int eeh_reset_device(struct eeh_pe *pe, struct pci_bus *bus, /* Clear frozen state */ rc = eeh_clear_pe_frozen_state(pe, false); if (rc) { - pci_unlock_rescan_remove(); return rc; } @@ -709,7 +711,6 @@ static int eeh_reset_device(struct eeh_pe *pe, struct pci_bus *bus, pe->tstamp = tstamp; pe->freeze_count = cnt; - pci_unlock_rescan_remove(); return 0; } @@ -750,7 +751,7 @@ static void eeh_pe_cleanup(struct eeh_pe *pe) * @pdev: pci_dev to check * * This function may return a false positive if we can't determine the slot's - * presence state. This might happen for for PCIe slots if the PE containing + * presence state. This might happen for PCIe slots if the PE containing * the upstream bridge is also frozen, or the bridge is part of the same PE * as the device. * @@ -843,10 +844,13 @@ void eeh_handle_normal_event(struct eeh_pe *pe) {LIST_HEAD_INIT(rmv_data.removed_vf_list), 0}; int devices = 0; + pci_lock_rescan_remove(); + bus = eeh_pe_bus_get(pe); if (!bus) { pr_err("%s: Cannot find PCI bus for PHB#%x-PE#%x\n", __func__, pe->phb->global_number, pe->addr); + pci_unlock_rescan_remove(); return; } @@ -865,9 +869,18 @@ void eeh_handle_normal_event(struct eeh_pe *pe) devices++; if (!devices) { - pr_debug("EEH: Frozen PHB#%x-PE#%x is empty!\n", + pr_warn("EEH: Frozen PHB#%x-PE#%x is empty!\n", pe->phb->global_number, pe->addr); - goto out; /* nothing to recover */ + /* + * The device is removed, tear down its state, on powernv + * hotplug driver would take care of it but not on pseries, + * permanently disable the card as it is hot removed. + * + * In the case of powernv, note that the removal of device + * is covered by pci rescan lock, so no problem even if hotplug + * driver attempts to remove the device. + */ + goto recover_failed; } /* Log the event */ @@ -898,7 +911,7 @@ void eeh_handle_normal_event(struct eeh_pe *pe) /* FIXME: Use the same format as dump_stack() */ pr_err("EEH: Call Trace:\n"); for (i = 0; i < pe->trace_entries; i++) - pr_err("EEH: [%pK] %pS\n", ptrs[i], ptrs[i]); + pr_err("EEH: [%p] %pS\n", ptrs[i], ptrs[i]); pe->trace_entries = 0; } @@ -1065,10 +1078,10 @@ recover_failed: eeh_slot_error_detail(pe, EEH_LOG_PERM); /* Notify all devices that they're about to go down. */ - eeh_set_channel_state(pe, pci_channel_io_perm_failure); eeh_set_irq_state(pe, false); eeh_pe_report("error_detected(permanent failure)", pe, eeh_report_failure, NULL); + eeh_set_channel_state(pe, pci_channel_io_perm_failure); /* Mark the PE to be removed permanently */ eeh_pe_state_mark(pe, EEH_PE_REMOVED); @@ -1085,10 +1098,15 @@ recover_failed: eeh_pe_state_clear(pe, EEH_PE_PRI_BUS, true); eeh_pe_dev_mode_mark(pe, EEH_DEV_REMOVED); - pci_lock_rescan_remove(); - pci_hp_remove_devices(bus); - pci_unlock_rescan_remove(); + bus = eeh_pe_bus_get(pe); + if (bus) + pci_hp_remove_devices(bus); + else + pr_err("%s: PCI bus for PHB#%x-PE#%x disappeared\n", + __func__, pe->phb->global_number, pe->addr); + /* The passed PE should no longer be used */ + pci_unlock_rescan_remove(); return; } @@ -1105,6 +1123,8 @@ out: eeh_clear_slot_attention(edev->pdev); eeh_pe_state_clear(pe, EEH_PE_RECOVERING, true); + + pci_unlock_rescan_remove(); } /** @@ -1123,6 +1143,7 @@ void eeh_handle_special_event(void) unsigned long flags; int rc; + pci_lock_rescan_remove(); do { rc = eeh_ops->next_error(&pe); @@ -1162,10 +1183,12 @@ void eeh_handle_special_event(void) break; case EEH_NEXT_ERR_NONE: + pci_unlock_rescan_remove(); return; default: pr_warn("%s: Invalid value %d from next_error()\n", __func__, rc); + pci_unlock_rescan_remove(); return; } @@ -1177,7 +1200,9 @@ void eeh_handle_special_event(void) if (rc == EEH_NEXT_ERR_FROZEN_PE || rc == EEH_NEXT_ERR_FENCED_PHB) { eeh_pe_state_mark(pe, EEH_PE_RECOVERING); + pci_unlock_rescan_remove(); eeh_handle_normal_event(pe); + pci_lock_rescan_remove(); } else { eeh_for_each_pe(pe, tmp_pe) eeh_pe_for_each_dev(tmp_pe, edev, tmp_edev) @@ -1185,12 +1210,11 @@ void eeh_handle_special_event(void) /* Notify all devices to be down */ eeh_pe_state_clear(pe, EEH_PE_PRI_BUS, true); - eeh_set_channel_state(pe, pci_channel_io_perm_failure); eeh_pe_report( "error_detected(permanent failure)", pe, eeh_report_failure, NULL); + eeh_set_channel_state(pe, pci_channel_io_perm_failure); - pci_lock_rescan_remove(); list_for_each_entry(hose, &hose_list, list_node) { phb_pe = eeh_phb_pe_get(hose); if (!phb_pe || @@ -1209,7 +1233,6 @@ void eeh_handle_special_event(void) } pci_hp_remove_devices(bus); } - pci_unlock_rescan_remove(); } /* @@ -1219,4 +1242,6 @@ void eeh_handle_special_event(void) if (rc == EEH_NEXT_ERR_DEAD_IOC) break; } while (rc != EEH_NEXT_ERR_NONE); + + pci_unlock_rescan_remove(); } diff --git a/arch/powerpc/kernel/eeh_pe.c b/arch/powerpc/kernel/eeh_pe.c index d2873d17d2b1..e740101fadf3 100644 --- a/arch/powerpc/kernel/eeh_pe.c +++ b/arch/powerpc/kernel/eeh_pe.c @@ -24,10 +24,10 @@ static int eeh_pe_aux_size = 0; static LIST_HEAD(eeh_phb_pe); /** - * eeh_set_pe_aux_size - Set PE auxillary data size - * @size: PE auxillary data size + * eeh_set_pe_aux_size - Set PE auxiliary data size + * @size: PE auxiliary data size in bytes * - * Set PE auxillary data size + * Set PE auxiliary data size. */ void eeh_set_pe_aux_size(int size) { @@ -527,7 +527,7 @@ EXPORT_SYMBOL_GPL(eeh_pe_state_mark); * eeh_pe_mark_isolated * @pe: EEH PE * - * Record that a PE has been isolated by marking the PE and it's children as + * Record that a PE has been isolated by marking the PE and its children as * EEH_PE_ISOLATED (and EEH_PE_CFG_BLOCKED, if required) and their PCI devices * as pci_channel_io_frozen. */ @@ -671,11 +671,12 @@ static void eeh_bridge_check_link(struct eeh_dev *edev) eeh_ops->write_config(edev, cap + PCI_EXP_LNKCTL, 2, val); /* Check link */ - eeh_ops->read_config(edev, cap + PCI_EXP_LNKCAP, 4, &val); - if (!(val & PCI_EXP_LNKCAP_DLLLARC)) { - eeh_edev_dbg(edev, "No link reporting capability (0x%08x) \n", val); - msleep(1000); - return; + if (edev->pdev) { + if (!edev->pdev->link_active_reporting) { + eeh_edev_dbg(edev, "No link reporting capability\n"); + msleep(1000); + return; + } } /* Wait the link is up until timeout (5s) */ @@ -850,6 +851,7 @@ struct pci_bus *eeh_pe_bus_get(struct eeh_pe *pe) { struct eeh_dev *edev; struct pci_dev *pdev; + struct pci_bus *bus = NULL; if (pe->type & EEH_PE_PHB) return pe->phb->bus; @@ -860,9 +862,11 @@ struct pci_bus *eeh_pe_bus_get(struct eeh_pe *pe) /* Retrieve the parent PCI bus of first (top) PCI device */ edev = list_first_entry_or_null(&pe->edevs, struct eeh_dev, entry); + pci_lock_rescan_remove(); pdev = eeh_dev_to_pci_dev(edev); if (pdev) - return pdev->bus; + bus = pdev->bus; + pci_unlock_rescan_remove(); - return NULL; + return bus; } diff --git a/arch/powerpc/kernel/entry_32.S b/arch/powerpc/kernel/entry_32.S index 1d599df6f169..f4a8c9877249 100644 --- a/arch/powerpc/kernel/entry_32.S +++ b/arch/powerpc/kernel/entry_32.S @@ -18,6 +18,8 @@ #include <linux/err.h> #include <linux/sys.h> #include <linux/threads.h> +#include <linux/linkage.h> + #include <asm/reg.h> #include <asm/page.h> #include <asm/mmu.h> @@ -27,7 +29,6 @@ #include <asm/asm-offsets.h> #include <asm/unistd.h> #include <asm/ptrace.h> -#include <asm/export.h> #include <asm/feature-fixups.h> #include <asm/barrier.h> #include <asm/kup.h> @@ -49,7 +50,7 @@ */ .align 12 -#if defined(CONFIG_PPC_BOOK3S_32) || defined(CONFIG_E500) +#if defined(CONFIG_PPC_BOOK3S_32) || defined(CONFIG_PPC_E500) .globl prepare_transfer_to_handler prepare_transfer_to_handler: /* if from kernel, check interrupted DOZE/NAP mode */ @@ -68,23 +69,24 @@ prepare_transfer_to_handler: lwz r9,_MSR(r11) /* if sleeping, clear MSR.EE */ rlwinm r9,r9,0,~MSR_EE lwz r12,_LINK(r11) /* and return to address in LR */ - lwz r2, GPR2(r11) + REST_GPR(2, r11) b fast_exception_return _ASM_NOKPROBE_SYMBOL(prepare_transfer_to_handler) -#endif /* CONFIG_PPC_BOOK3S_32 || CONFIG_E500 */ +#endif /* CONFIG_PPC_BOOK3S_32 || CONFIG_PPC_E500 */ #if defined(CONFIG_PPC_KUEP) && defined(CONFIG_PPC_BOOK3S_32) - .globl __kuep_lock -__kuep_lock: +SYM_FUNC_START(__kuep_lock) lwz r9, THREAD+THSR0(r2) update_user_segments_by_4 r9, r10, r11, r12 blr +SYM_FUNC_END(__kuep_lock) -__kuep_unlock: +SYM_FUNC_START_LOCAL(__kuep_unlock) lwz r9, THREAD+THSR0(r2) rlwinm r9,r9,0,~SR_NX update_user_segments_by_4 r9, r10, r11, r12 blr +SYM_FUNC_END(__kuep_unlock) .macro kuep_lock bl __kuep_lock @@ -101,11 +103,12 @@ __kuep_unlock: .globl transfer_to_syscall transfer_to_syscall: + stw r3, ORIG_GPR3(r1) stw r11, GPR1(r1) stw r11, 0(r1) mflr r12 stw r12, _LINK(r1) -#ifdef CONFIG_BOOKE_OR_40x +#ifdef CONFIG_BOOKE rlwinm r9,r9,0,14,12 /* clear MSR_WE (necessary?) */ #endif lis r12,STACK_FRAME_REGS_MARKER@ha /* exception frame marker */ @@ -113,7 +116,7 @@ transfer_to_syscall: addi r12,r12,STACK_FRAME_REGS_MARKER@l stw r9,_MSR(r1) li r2, INTERRUPT_SYSCALL - stw r12,8(r1) + stw r12,STACK_INT_FRAME_MARKER(r1) stw r2,_TRAP(r1) SAVE_GPR(0, r1) SAVE_GPRS(3, 8, r1) @@ -121,21 +124,22 @@ transfer_to_syscall: SAVE_NVGPRS(r1) kuep_lock - /* Calling convention has r9 = orig r0, r10 = regs */ - addi r10,r1,STACK_FRAME_OVERHEAD - mr r9,r0 + /* Calling convention has r3 = regs, r4 = orig r0 */ + addi r3,r1,STACK_INT_FRAME_REGS + mr r4,r0 bl system_call_exception ret_from_syscall: - addi r4,r1,STACK_FRAME_OVERHEAD + addi r4,r1,STACK_INT_FRAME_REGS li r5,0 bl syscall_exit_prepare #ifdef CONFIG_PPC_47x lis r4,icache_44x_need_flush@ha lwz r5,icache_44x_need_flush@l(r4) cmplwi cr0,r5,0 - bne- 2f + bne- .L44x_icache_flush #endif /* CONFIG_PPC_47x */ +.L44x_icache_flush_return: kuep_unlock lwz r4,_LINK(r1) lwz r5,_CCR(r1) @@ -143,7 +147,7 @@ ret_from_syscall: lwz r7,_NIP(r1) lwz r8,_MSR(r1) cmpwi r3,0 - lwz r3,GPR3(r1) + REST_GPR(3, r1) syscall_exit_finish: mtspr SPRN_SRR0,r7 mtspr SPRN_SRR1,r8 @@ -151,12 +155,9 @@ syscall_exit_finish: bne 3f mtcr r5 -1: lwz r2,GPR2(r1) - lwz r1,GPR1(r1) +1: REST_GPR(2, r1) + REST_GPR(1, r1) rfi -#ifdef CONFIG_40x - b . /* Prevent prefetch past rfi */ -#endif 3: mtcr r5 lwz r4,_CTR(r1) @@ -164,29 +165,27 @@ syscall_exit_finish: REST_NVGPRS(r1) mtctr r4 mtxer r5 - lwz r0,GPR0(r1) - lwz r3,GPR3(r1) - REST_GPRS(4, 11, r1) - lwz r12,GPR12(r1) + REST_GPR(0, r1) + REST_GPRS(3, 12, r1) b 1b #ifdef CONFIG_44x -2: li r7,0 +.L44x_icache_flush: + li r7,0 iccci r0,r0 stw r7,icache_44x_need_flush@l(r4) - b 1b + b .L44x_icache_flush_return #endif /* CONFIG_44x */ .globl ret_from_fork ret_from_fork: REST_NVGPRS(r1) bl schedule_tail - li r3,0 + li r3,0 /* fork() return value */ b ret_from_syscall - .globl ret_from_kernel_thread -ret_from_kernel_thread: - REST_NVGPRS(r1) + .globl ret_from_kernel_user_thread +ret_from_kernel_user_thread: bl schedule_tail mtctr r14 mr r3,r15 @@ -195,77 +194,34 @@ ret_from_kernel_thread: li r3,0 b ret_from_syscall -/* - * This routine switches between two different tasks. The process - * state of one is saved on its kernel stack. Then the state - * of the other is restored from its kernel stack. The memory - * management hardware is updated to the second process's state. - * Finally, we can return to the second process. - * On entry, r3 points to the THREAD for the current task, r4 - * points to the THREAD for the new task. - * - * This routine is always called with interrupts disabled. - * - * Note: there are two ways to get to the "going out" portion - * of this code; either by coming in via the entry (_switch) - * or via "fork" which must set up an environment equivalent - * to the "_switch" path. If you change this , you'll have to - * change the fork code also. - * - * The code which creates the new task context is in 'copy_thread' - * in arch/ppc/kernel/process.c - */ -_GLOBAL(_switch) - stwu r1,-INT_FRAME_SIZE(r1) - mflr r0 - stw r0,INT_FRAME_SIZE+4(r1) - /* r3-r12 are caller saved -- Cort */ - SAVE_NVGPRS(r1) - stw r0,_NIP(r1) /* Return to switch caller */ - mfcr r10 - stw r10,_CCR(r1) - stw r1,KSP(r3) /* Set old stack pointer */ - -#ifdef CONFIG_SMP - /* We need a sync somewhere here to make sure that if the - * previous task gets rescheduled on another CPU, it sees all - * stores it has performed on this one. + .globl start_kernel_thread +start_kernel_thread: + bl schedule_tail + mtctr r14 + mr r3,r15 + PPC440EP_ERR42 + bctrl + /* + * This must not return. We actually want to BUG here, not WARN, + * because BUG will exit the process which is what the kernel thread + * should have done, which may give some hope of continuing. */ - sync -#endif /* CONFIG_SMP */ - - tophys(r0,r4) - mtspr SPRN_SPRG_THREAD,r0 /* Update current THREAD phys addr */ - lwz r1,KSP(r4) /* Load new stack pointer */ - - /* save the old current 'last' for return value */ - mr r3,r2 - addi r2,r4,-THREAD /* Update current */ - - lwz r0,_CCR(r1) - mtcrf 0xFF,r0 - /* r3-r12 are destroyed -- Cort */ - REST_NVGPRS(r1) - - lwz r4,_NIP(r1) /* Return to _switch caller in new task */ - mtlr r4 - addi r1,r1,INT_FRAME_SIZE - blr +100: trap + EMIT_BUG_ENTRY 100b,__FILE__,__LINE__,0 .globl fast_exception_return fast_exception_return: -#if !(defined(CONFIG_4xx) || defined(CONFIG_BOOKE)) +#ifndef CONFIG_BOOKE andi. r10,r9,MSR_RI /* check for recoverable interrupt */ beq 3f /* if not, we've got problems */ #endif -2: REST_GPRS(3, 6, r11) - lwz r10,_CCR(r11) - REST_GPRS(1, 2, r11) +2: lwz r10,_CCR(r11) + REST_GPRS(1, 6, r11) mtcr r10 lwz r10,_LINK(r11) mtlr r10 - /* Clear the exception_marker on the stack to avoid confusing stacktrace */ + /* Clear the exception marker on the stack to avoid confusing stacktrace */ li r10, 0 stw r10, 8(r11) REST_GPR(10, r11) @@ -276,11 +232,8 @@ fast_exception_return: mtspr SPRN_SRR0,r12 REST_GPR(9, r11) REST_GPR(12, r11) - lwz r11,GPR11(r11) + REST_GPR(11, r11) rfi -#ifdef CONFIG_40x - b . /* Prevent prefetch past rfi */ -#endif _ASM_NOKPROBE_SYMBOL(fast_exception_return) /* aargh, a nonrecoverable interrupt, panic */ @@ -295,7 +248,7 @@ _ASM_NOKPROBE_SYMBOL(fast_exception_return) .globl interrupt_return interrupt_return: lwz r4,_MSR(r1) - addi r3,r1,STACK_FRAME_OVERHEAD + addi r3,r1,STACK_INT_FRAME_REGS andi. r0,r4,MSR_PR beq .Lkernel_interrupt_return bl interrupt_exit_user_prepare @@ -322,7 +275,7 @@ ALT_FTR_SECTION_END_IFCLR(CPU_FTR_STCX_CHECKS_ADDRESS) li r0,0 /* - * Leaving a stale exception_marker on the stack can confuse + * Leaving a stale exception marker on the stack can confuse * the reliable stack unwinder later on. Clear it. */ stw r0,8(r1) @@ -337,9 +290,6 @@ ALT_FTR_SECTION_END_IFCLR(CPU_FTR_STCX_CHECKS_ADDRESS) REST_GPR(0, r1) REST_GPR(1, r1) rfi -#ifdef CONFIG_40x - b . /* Prevent prefetch past rfi */ -#endif .Lrestore_nvgprs: REST_NVGPRS(r1) @@ -374,7 +324,7 @@ ALT_FTR_SECTION_END_IFCLR(CPU_FTR_STCX_CHECKS_ADDRESS) mtspr SPRN_XER,r5 /* - * Leaving a stale exception_marker on the stack can confuse + * Leaving a stale exception marker on the stack can confuse * the reliable stack unwinder later on. Clear it. */ stw r0,8(r1) @@ -387,9 +337,6 @@ ALT_FTR_SECTION_END_IFCLR(CPU_FTR_STCX_CHECKS_ADDRESS) REST_GPR(0, r1) REST_GPR(1, r1) rfi -#ifdef CONFIG_40x - b . /* Prevent prefetch past rfi */ -#endif 1: /* * Emulate stack store with update. New r1 value was already calculated @@ -416,12 +363,9 @@ ALT_FTR_SECTION_END_IFCLR(CPU_FTR_STCX_CHECKS_ADDRESS) mfspr r9, SPRN_SPRG_SCRATCH0 #endif rfi -#ifdef CONFIG_40x - b . /* Prevent prefetch past rfi */ -#endif _ASM_NOKPROBE_SYMBOL(interrupt_return) -#if defined(CONFIG_4xx) || defined(CONFIG_BOOKE) +#ifdef CONFIG_BOOKE /* * Returning from a critical interrupt in user mode doesn't need @@ -436,26 +380,14 @@ _ASM_NOKPROBE_SYMBOL(interrupt_return) * time of the critical interrupt. * */ -#ifdef CONFIG_40x -#define PPC_40x_TURN_OFF_MSR_DR \ - /* avoid any possible TLB misses here by turning off MSR.DR, we \ - * assume the instructions here are mapped by a pinned TLB entry */ \ - li r10,MSR_IR; \ - mtmsr r10; \ - isync; \ - tophys(r1, r1); -#else -#define PPC_40x_TURN_OFF_MSR_DR -#endif #define RET_FROM_EXC_LEVEL(exc_lvl_srr0, exc_lvl_srr1, exc_lvl_rfi) \ REST_NVGPRS(r1); \ lwz r3,_MSR(r1); \ andi. r3,r3,MSR_PR; \ bne interrupt_return; \ - lwz r0,GPR0(r1); \ - lwz r2,GPR2(r1); \ - REST_GPRS(3, 8, r1); \ + REST_GPR(0, r1); \ + REST_GPRS(2, 8, r1); \ lwz r10,_XER(r1); \ lwz r11,_CTR(r1); \ mtspr SPRN_XER,r10; \ @@ -465,7 +397,6 @@ _ASM_NOKPROBE_SYMBOL(interrupt_return) mtlr r11; \ lwz r10,_CCR(r1); \ mtcrf 0xff,r10; \ - PPC_40x_TURN_OFF_MSR_DR; \ lwz r9,_DEAR(r1); \ lwz r10,_ESR(r1); \ mtspr SPRN_DEAR,r9; \ @@ -474,11 +405,8 @@ _ASM_NOKPROBE_SYMBOL(interrupt_return) lwz r12,_MSR(r1); \ mtspr exc_lvl_srr0,r11; \ mtspr exc_lvl_srr1,r12; \ - lwz r9,GPR9(r1); \ - lwz r12,GPR12(r1); \ - lwz r10,GPR10(r1); \ - lwz r11,GPR11(r1); \ - lwz r1,GPR1(r1); \ + REST_GPRS(9, 12, r1); \ + REST_GPR(1, r1); \ exc_lvl_rfi; \ b .; /* prevent prefetch past exc_lvl_rfi */ @@ -488,7 +416,7 @@ _ASM_NOKPROBE_SYMBOL(interrupt_return) mtspr SPRN_##exc_lvl_srr0,r9; \ mtspr SPRN_##exc_lvl_srr1,r10; -#if defined(CONFIG_PPC_BOOK3E_MMU) +#if defined(CONFIG_PPC_E500) #ifdef CONFIG_PHYS_64BIT #define RESTORE_MAS7 \ lwz r11,MAS7(r1); \ @@ -516,20 +444,6 @@ _ASM_NOKPROBE_SYMBOL(interrupt_return) #define RESTORE_MMU_REGS #endif -#ifdef CONFIG_40x - .globl ret_from_crit_exc -ret_from_crit_exc: - lis r9,crit_srr0@ha; - lwz r9,crit_srr0@l(r9); - lis r10,crit_srr1@ha; - lwz r10,crit_srr1@l(r10); - mtspr SPRN_SRR0,r9; - mtspr SPRN_SRR1,r10; - RET_FROM_EXC_LEVEL(SPRN_CSRR0, SPRN_CSRR1, PPC_RFCI) -_ASM_NOKPROBE_SYMBOL(ret_from_crit_exc) -#endif /* CONFIG_40x */ - -#ifdef CONFIG_BOOKE .globl ret_from_crit_exc ret_from_crit_exc: RESTORE_xSRR(SRR0,SRR1); @@ -554,4 +468,3 @@ ret_from_mcheck_exc: RET_FROM_EXC_LEVEL(SPRN_MCSRR0, SPRN_MCSRR1, PPC_RFMCI) _ASM_NOKPROBE_SYMBOL(ret_from_mcheck_exc) #endif /* CONFIG_BOOKE */ -#endif /* !(CONFIG_4xx || CONFIG_BOOKE) */ diff --git a/arch/powerpc/kernel/epapr_hcalls.S b/arch/powerpc/kernel/epapr_hcalls.S index 69a912550577..6a414ed5a411 100644 --- a/arch/powerpc/kernel/epapr_hcalls.S +++ b/arch/powerpc/kernel/epapr_hcalls.S @@ -3,6 +3,7 @@ * Copyright (C) 2012 Freescale Semiconductor, Inc. */ +#include <linux/export.h> #include <linux/threads.h> #include <asm/epapr_hcalls.h> #include <asm/reg.h> @@ -12,7 +13,6 @@ #include <asm/ppc_asm.h> #include <asm/asm-compat.h> #include <asm/asm-offsets.h> -#include <asm/export.h> #ifndef CONFIG_PPC64 /* epapr_ev_idle() was derived from e500_idle() */ @@ -21,7 +21,13 @@ _GLOBAL(epapr_ev_idle) ori r4, r4,_TLF_NAPPING /* so when we take an exception */ PPC_STL r4, TI_LOCAL_FLAGS(r2) /* it will return to our caller */ +#ifdef CONFIG_BOOKE wrteei 1 +#else + mfmsr r4 + ori r4, r4, MSR_EE + mtmsr r4 +#endif idle_loop: LOAD_REG_IMMEDIATE(r11, EV_HCALL_TOKEN(EV_IDLE)) diff --git a/arch/powerpc/kernel/epapr_paravirt.c b/arch/powerpc/kernel/epapr_paravirt.c index d4b8aff20815..247ab2acaccc 100644 --- a/arch/powerpc/kernel/epapr_paravirt.c +++ b/arch/powerpc/kernel/epapr_paravirt.c @@ -9,7 +9,7 @@ #include <linux/of_fdt.h> #include <asm/epapr_hcalls.h> #include <asm/cacheflush.h> -#include <asm/code-patching.h> +#include <asm/text-patching.h> #include <asm/machdep.h> #include <asm/inst.h> diff --git a/arch/powerpc/kernel/exceptions-64e.S b/arch/powerpc/kernel/exceptions-64e.S index 67dc4e3179a0..63f6b9f513a4 100644 --- a/arch/powerpc/kernel/exceptions-64e.S +++ b/arch/powerpc/kernel/exceptions-64e.S @@ -5,6 +5,7 @@ * Copyright (C) 2007 Ben. Herrenschmidt (benh@kernel.crashing.org), IBM Corp. */ +#include <linux/linkage.h> #include <linux/threads.h> #include <asm/reg.h> #include <asm/page.h> @@ -13,7 +14,6 @@ #include <asm/cputable.h> #include <asm/setup.h> #include <asm/thread_info.h> -#include <asm/reg_a2.h> #include <asm/exception-64e.h> #include <asm/bug.h> #include <asm/irqflags.h> @@ -66,7 +66,7 @@ #define SPECIAL_EXC_LOAD(reg, name) \ ld reg, (SPECIAL_EXC_##name * 8 + SPECIAL_EXC_FRAME_OFFS)(r1) -special_reg_save: +SYM_CODE_START_LOCAL(special_reg_save) /* * We only need (or have stack space) to save this stuff if * we interrupted the kernel. @@ -131,8 +131,9 @@ END_FTR_SECTION_IFSET(CPU_FTR_EMB_HV) SPECIAL_EXC_STORE(r10,CSRR1) blr +SYM_CODE_END(special_reg_save) -ret_from_level_except: +SYM_CODE_START_LOCAL(ret_from_level_except) ld r3,_MSR(r1) andi. r3,r3,MSR_PR beq 1f @@ -206,6 +207,7 @@ END_FTR_SECTION_IFSET(CPU_FTR_EMB_HV) mtxer r11 blr +SYM_CODE_END(ret_from_level_except) .macro ret_from_level srr0 srr1 paca_ex scratch bl ret_from_level_except @@ -216,17 +218,15 @@ END_FTR_SECTION_IFSET(CPU_FTR_EMB_HV) mtlr r10 mtcr r11 - ld r10,GPR10(r1) - ld r11,GPR11(r1) - ld r12,GPR12(r1) + REST_GPRS(10, 12, r1) mtspr \scratch,r0 std r10,\paca_ex+EX_R10(r13); std r11,\paca_ex+EX_R11(r13); ld r10,_NIP(r1) ld r11,_MSR(r1) - ld r0,GPR0(r1) - ld r1,GPR1(r1) + REST_GPR(0, r1) + REST_GPR(1, r1) mtspr \srr0,r10 mtspr \srr1,r11 ld r10,\paca_ex+EX_R10(r13) @@ -234,13 +234,15 @@ END_FTR_SECTION_IFSET(CPU_FTR_EMB_HV) mfspr r13,\scratch .endm -ret_from_crit_except: +SYM_CODE_START_LOCAL(ret_from_crit_except) ret_from_level SPRN_CSRR0 SPRN_CSRR1 PACA_EXCRIT SPRN_SPRG_CRIT_SCRATCH rfci +SYM_CODE_END(ret_from_crit_except) -ret_from_mc_except: +SYM_CODE_START_LOCAL(ret_from_mc_except) ret_from_level SPRN_MCSRR0 SPRN_MCSRR1 PACA_EXMC SPRN_SPRG_MC_SCRATCH rfmci +SYM_CODE_END(ret_from_mc_except) /* Exception prolog code for all exceptions */ #define EXCEPTION_PROLOG(n, intnum, type, addition) \ @@ -291,7 +293,6 @@ ret_from_mc_except: #define SPRN_MC_SRR0 SPRN_MCSRR0 #define SPRN_MC_SRR1 SPRN_MCSRR1 -#ifdef CONFIG_PPC_FSL_BOOK3E #define GEN_BTB_FLUSH \ START_BTB_FLUSH_SECTION \ beq 1f; \ @@ -307,13 +308,6 @@ ret_from_mc_except: #define DBG_BTB_FLUSH CRIT_BTB_FLUSH #define MC_BTB_FLUSH CRIT_BTB_FLUSH #define GDBELL_BTB_FLUSH GEN_BTB_FLUSH -#else -#define GEN_BTB_FLUSH -#define CRIT_BTB_FLUSH -#define DBG_BTB_FLUSH -#define MC_BTB_FLUSH -#define GDBELL_BTB_FLUSH -#endif #define NORMAL_EXCEPTION_PROLOG(n, intnum, addition) \ EXCEPTION_PROLOG(n, intnum, GEN, addition##_GEN(n)) @@ -368,29 +362,27 @@ ret_from_mc_except: std r14,PACA_EXMC+EX_R14(r13); \ std r15,PACA_EXMC+EX_R15(r13) - /* Core exception code for all exceptions except TLB misses. */ #define EXCEPTION_COMMON_LVL(n, scratch, excf) \ exc_##n##_common: \ - std r0,GPR0(r1); /* save r0 in stackframe */ \ - std r2,GPR2(r1); /* save r2 in stackframe */ \ - SAVE_GPRS(3, 9, r1); /* save r3 - r9 in stackframe */ \ + SAVE_GPR(0, r1); /* save r0 in stackframe */ \ + SAVE_GPRS(2, 9, r1); /* save r2 - r9 in stackframe */ \ std r10,_NIP(r1); /* save SRR0 to stackframe */ \ std r11,_MSR(r1); /* save SRR1 to stackframe */ \ beq 2f; /* if from kernel mode */ \ 2: ld r3,excf+EX_R10(r13); /* get back r10 */ \ ld r4,excf+EX_R11(r13); /* get back r11 */ \ mfspr r5,scratch; /* get back r13 */ \ - std r12,GPR12(r1); /* save r12 in stackframe */ \ - ld r2,PACATOC(r13); /* get kernel TOC into r2 */ \ + SAVE_GPR(12, r1); /* save r12 in stackframe */ \ + LOAD_PACA_TOC(); /* get kernel TOC into r2 */ \ mflr r6; /* save LR in stackframe */ \ mfctr r7; /* save CTR in stackframe */ \ mfspr r8,SPRN_XER; /* save XER in stackframe */ \ ld r9,excf+EX_R1(r13); /* load orig r1 back from PACA */ \ lwz r10,excf+EX_CR(r13); /* load orig CR back from PACA */ \ lbz r11,PACAIRQSOFTMASK(r13); /* get current IRQ softe */ \ - ld r12,exception_marker@toc(r2); \ - li r0,0; \ + LOAD_REG_IMMEDIATE(r12, STACK_FRAME_REGS_MARKER); \ + ZEROIZE_GPR(0); \ std r3,GPR10(r1); /* save r10 to stackframe */ \ std r4,GPR11(r1); /* save r11 to stackframe */ \ std r5,GPR13(r1); /* save it to stackframe */ \ @@ -402,10 +394,11 @@ exc_##n##_common: \ std r10,_CCR(r1); /* store orig CR in stackframe */ \ std r9,GPR1(r1); /* store stack frame back link */ \ std r11,SOFTE(r1); /* and save it to stackframe */ \ - std r12,STACK_FRAME_OVERHEAD-16(r1); /* mark the frame */ \ + std r12,STACK_INT_FRAME_MARKER(r1); /* mark the frame */ \ std r3,_TRAP(r1); /* set trap number */ \ std r0,RESULT(r1); /* clear regs->result */ \ - SAVE_NVGPRS(r1); + SAVE_NVGPRS(r1); \ + SANITIZE_NVGPRS(); /* minimise speculation influence */ #define EXCEPTION_COMMON(n) \ EXCEPTION_COMMON_LVL(n, SPRN_SPRG_GEN_SCRATCH, PACA_EXGEN) @@ -466,16 +459,10 @@ exc_##n##_bad_stack: \ EXCEPTION_COMMON(trapnum) \ ack(r8); \ CHECK_NAPPING(); \ - addi r3,r1,STACK_FRAME_OVERHEAD; \ + addi r3,r1,STACK_INT_FRAME_REGS; \ bl hdlr; \ b interrupt_return -/* This value is used to mark exception frames on the stack. */ - .section ".toc","aw" -exception_marker: - .tc ID_EXC_MARKER[TC],STACK_FRAME_REGS_MARKER - - /* * And here we have the exception vectors ! */ @@ -498,8 +485,8 @@ interrupt_base_book3e: /* fake trap */ EXCEPTION_STUB(0x160, decrementer) /* 0x0900 */ EXCEPTION_STUB(0x180, fixed_interval) /* 0x0980 */ EXCEPTION_STUB(0x1a0, watchdog) /* 0x09f0 */ - EXCEPTION_STUB(0x1c0, data_tlb_miss) - EXCEPTION_STUB(0x1e0, instruction_tlb_miss) + EXCEPTION_STUB(0x1c0, data_tlb_miss_bolted) + EXCEPTION_STUB(0x1e0, instruction_tlb_miss_bolted) EXCEPTION_STUB(0x200, altivec_unavailable) EXCEPTION_STUB(0x220, altivec_assist) EXCEPTION_STUB(0x260, perfmon) @@ -521,7 +508,7 @@ __end_interrupts: EXCEPTION_COMMON_CRIT(0x100) bl special_reg_save CHECK_NAPPING(); - addi r3,r1,STACK_FRAME_OVERHEAD + addi r3,r1,STACK_INT_FRAME_REGS bl unknown_nmi_exception b ret_from_crit_except @@ -532,7 +519,7 @@ __end_interrupts: EXCEPTION_COMMON_MC(0x000) bl special_reg_save CHECK_NAPPING(); - addi r3,r1,STACK_FRAME_OVERHEAD + addi r3,r1,STACK_INT_FRAME_REGS bl machine_check_exception b ret_from_mc_except @@ -587,7 +574,7 @@ __end_interrupts: std r14,_ESR(r1) ld r14,PACA_EXGEN+EX_R14(r13) EXCEPTION_COMMON(0x700) - addi r3,r1,STACK_FRAME_OVERHEAD + addi r3,r1,STACK_INT_FRAME_REGS bl program_check_exception REST_NVGPRS(r1) b interrupt_return @@ -603,7 +590,7 @@ __end_interrupts: beq- 1f bl load_up_fpu b fast_interrupt_return -1: addi r3,r1,STACK_FRAME_OVERHEAD +1: addi r3,r1,STACK_INT_FRAME_REGS bl kernel_fp_unavailable_exception b interrupt_return @@ -623,7 +610,7 @@ BEGIN_FTR_SECTION 1: END_FTR_SECTION_IFSET(CPU_FTR_ALTIVEC) #endif - addi r3,r1,STACK_FRAME_OVERHEAD + addi r3,r1,STACK_INT_FRAME_REGS bl altivec_unavailable_exception b interrupt_return @@ -633,7 +620,7 @@ END_FTR_SECTION_IFSET(CPU_FTR_ALTIVEC) BOOKE_INTERRUPT_ALTIVEC_ASSIST, PROLOG_ADDITION_NONE) EXCEPTION_COMMON(0x220) - addi r3,r1,STACK_FRAME_OVERHEAD + addi r3,r1,STACK_INT_FRAME_REGS #ifdef CONFIG_ALTIVEC BEGIN_FTR_SECTION bl altivec_assist_exception @@ -660,7 +647,7 @@ END_FTR_SECTION_IFSET(CPU_FTR_ALTIVEC) EXCEPTION_COMMON_CRIT(0x9f0) bl special_reg_save CHECK_NAPPING(); - addi r3,r1,STACK_FRAME_OVERHEAD + addi r3,r1,STACK_INT_FRAME_REGS #ifdef CONFIG_BOOKE_WDT bl WatchdogException #else @@ -681,7 +668,7 @@ END_FTR_SECTION_IFSET(CPU_FTR_ALTIVEC) NORMAL_EXCEPTION_PROLOG(0xf20, BOOKE_INTERRUPT_AP_UNAVAIL, PROLOG_ADDITION_NONE) EXCEPTION_COMMON(0xf20) - addi r3,r1,STACK_FRAME_OVERHEAD + addi r3,r1,STACK_INT_FRAME_REGS bl unknown_exception b interrupt_return @@ -704,9 +691,9 @@ END_FTR_SECTION_IFSET(CPU_FTR_ALTIVEC) beq+ 1f #ifdef CONFIG_RELOCATABLE - ld r15,PACATOC(r13) - ld r14,interrupt_base_book3e@got(r15) - ld r15,__end_interrupts@got(r15) + __LOAD_PACA_TOC(r15) + LOAD_REG_ADDR_ALTTOC(r14, r15, interrupt_base_book3e) + LOAD_REG_ADDR_ALTTOC(r15, r15, __end_interrupts) cmpld cr0,r10,r14 cmpld cr1,r10,r15 #else @@ -748,7 +735,7 @@ END_FTR_SECTION_IFSET(CPU_FTR_ALTIVEC) ld r14,PACA_EXCRIT+EX_R14(r13) ld r15,PACA_EXCRIT+EX_R15(r13) EXCEPTION_COMMON_CRIT(0xd00) - addi r3,r1,STACK_FRAME_OVERHEAD + addi r3,r1,STACK_INT_FRAME_REGS bl DebugException REST_NVGPRS(r1) b interrupt_return @@ -775,9 +762,9 @@ kernel_dbg_exc: beq+ 1f #ifdef CONFIG_RELOCATABLE - ld r15,PACATOC(r13) - ld r14,interrupt_base_book3e@got(r15) - ld r15,__end_interrupts@got(r15) + __LOAD_PACA_TOC(r15) + LOAD_REG_ADDR_ALTTOC(r14, r15, interrupt_base_book3e) + LOAD_REG_ADDR_ALTTOC(r15, r15, __end_interrupts) cmpld cr0,r10,r14 cmpld cr1,r10,r15 #else @@ -819,7 +806,7 @@ kernel_dbg_exc: ld r14,PACA_EXDBG+EX_R14(r13) ld r15,PACA_EXDBG+EX_R15(r13) EXCEPTION_COMMON_DBG(0xd08) - addi r3,r1,STACK_FRAME_OVERHEAD + addi r3,r1,STACK_INT_FRAME_REGS bl DebugException REST_NVGPRS(r1) b interrupt_return @@ -829,7 +816,14 @@ kernel_dbg_exc: PROLOG_ADDITION_NONE) EXCEPTION_COMMON(0x260) CHECK_NAPPING() - addi r3,r1,STACK_FRAME_OVERHEAD + addi r3,r1,STACK_INT_FRAME_REGS + /* + * XXX: Returning from performance_monitor_exception taken as a + * soft-NMI (Linux irqs disabled) may be risky to use interrupt_return + * and could cause bugs in return or elsewhere. That case should just + * restore registers and return. There is a workaround for one known + * problem in interrupt_exit_kernel_prepare(). + */ bl performance_monitor_exception b interrupt_return @@ -844,7 +838,7 @@ kernel_dbg_exc: EXCEPTION_COMMON_CRIT(0x2a0) bl special_reg_save CHECK_NAPPING(); - addi r3,r1,STACK_FRAME_OVERHEAD + addi r3,r1,STACK_INT_FRAME_REGS bl unknown_nmi_exception b ret_from_crit_except @@ -856,7 +850,7 @@ kernel_dbg_exc: GDBELL_EXCEPTION_PROLOG(0x2c0, BOOKE_INTERRUPT_GUEST_DBELL, PROLOG_ADDITION_NONE) EXCEPTION_COMMON(0x2c0) - addi r3,r1,STACK_FRAME_OVERHEAD + addi r3,r1,STACK_INT_FRAME_REGS bl unknown_exception b interrupt_return @@ -867,7 +861,7 @@ kernel_dbg_exc: EXCEPTION_COMMON_CRIT(0x2e0) bl special_reg_save CHECK_NAPPING(); - addi r3,r1,STACK_FRAME_OVERHEAD + addi r3,r1,STACK_INT_FRAME_REGS bl unknown_nmi_exception b ret_from_crit_except @@ -876,7 +870,7 @@ kernel_dbg_exc: NORMAL_EXCEPTION_PROLOG(0x310, BOOKE_INTERRUPT_HV_SYSCALL, PROLOG_ADDITION_NONE) EXCEPTION_COMMON(0x310) - addi r3,r1,STACK_FRAME_OVERHEAD + addi r3,r1,STACK_INT_FRAME_REGS bl unknown_exception b interrupt_return @@ -885,7 +879,7 @@ kernel_dbg_exc: NORMAL_EXCEPTION_PROLOG(0x320, BOOKE_INTERRUPT_HV_PRIV, PROLOG_ADDITION_NONE) EXCEPTION_COMMON(0x320) - addi r3,r1,STACK_FRAME_OVERHEAD + addi r3,r1,STACK_INT_FRAME_REGS bl unknown_exception b interrupt_return @@ -894,15 +888,15 @@ kernel_dbg_exc: NORMAL_EXCEPTION_PROLOG(0x340, BOOKE_INTERRUPT_LRAT_ERROR, PROLOG_ADDITION_NONE) EXCEPTION_COMMON(0x340) - addi r3,r1,STACK_FRAME_OVERHEAD + addi r3,r1,STACK_INT_FRAME_REGS bl unknown_exception b interrupt_return .macro SEARCH_RESTART_TABLE #ifdef CONFIG_RELOCATABLE - ld r11,PACATOC(r13) - ld r14,__start___restart_table@got(r11) - ld r15,__stop___restart_table@got(r11) + __LOAD_PACA_TOC(r11) + LOAD_REG_ADDR_ALTTOC(r14, r11, __start___restart_table) + LOAD_REG_ADDR_ALTTOC(r15, r11, __stop___restart_table) #else LOAD_REG_IMMEDIATE_SYM(r14, r11, __start___restart_table) LOAD_REG_IMMEDIATE_SYM(r15, r11, __stop___restart_table) @@ -988,20 +982,22 @@ masked_interrupt_book3e_0x2c0: * r14 and r15 containing the fault address and error code, with the * original values stashed away in the PACA */ -storage_fault_common: - addi r3,r1,STACK_FRAME_OVERHEAD +SYM_CODE_START_LOCAL(storage_fault_common) + addi r3,r1,STACK_INT_FRAME_REGS bl do_page_fault b interrupt_return +SYM_CODE_END(storage_fault_common) /* * Alignment exception doesn't fit entirely in the 0x100 bytes so it * continues here. */ -alignment_more: - addi r3,r1,STACK_FRAME_OVERHEAD +SYM_CODE_START_LOCAL(alignment_more) + addi r3,r1,STACK_INT_FRAME_REGS bl alignment_exception REST_NVGPRS(r1) b interrupt_return +SYM_CODE_END(alignment_more) /* * Trampolines used when spotting a bad kernel stack pointer in @@ -1040,8 +1036,7 @@ BAD_STACK_TRAMPOLINE(0xe00) BAD_STACK_TRAMPOLINE(0xf00) BAD_STACK_TRAMPOLINE(0xf20) - .globl bad_stack_book3e -bad_stack_book3e: +_GLOBAL(bad_stack_book3e) /* XXX: Needs to make SPRN_SPRG_GEN depend on exception type */ mfspr r10,SPRN_SRR0; /* read SRR0 before touching stack */ ld r1,PACAEMERGSP(r13) @@ -1056,15 +1051,14 @@ bad_stack_book3e: mfspr r11,SPRN_ESR std r10,_DEAR(r1) std r11,_ESR(r1) - std r0,GPR0(r1); /* save r0 in stackframe */ \ - std r2,GPR2(r1); /* save r2 in stackframe */ \ - SAVE_GPRS(3, 9, r1); /* save r3 - r9 in stackframe */ \ + SAVE_GPR(0, r1); /* save r0 in stackframe */ \ + SAVE_GPRS(2, 9, r1); /* save r2 - r9 in stackframe */ \ ld r3,PACA_EXGEN+EX_R10(r13);/* get back r10 */ \ ld r4,PACA_EXGEN+EX_R11(r13);/* get back r11 */ \ mfspr r5,SPRN_SPRG_GEN_SCRATCH;/* get back r13 XXX can be wrong */ \ std r3,GPR10(r1); /* save r10 to stackframe */ \ std r4,GPR11(r1); /* save r11 to stackframe */ \ - std r12,GPR12(r1); /* save r12 in stackframe */ \ + SAVE_GPR(12, r1); /* save r12 in stackframe */ \ std r5,GPR13(r1); /* save it to stackframe */ \ mflr r10 mfctr r11 @@ -1072,15 +1066,15 @@ bad_stack_book3e: std r10,_LINK(r1) std r11,_CTR(r1) std r12,_XER(r1) - SAVE_GPRS(14, 31, r1) + SAVE_NVGPRS(r1) lhz r12,PACA_TRAP_SAVE(r13) std r12,_TRAP(r1) addi r11,r1,INT_FRAME_SIZE std r11,0(r1) - li r12,0 + ZEROIZE_GPR(12) std r12,0(r11) - ld r2,PACATOC(r13) -1: addi r3,r1,STACK_FRAME_OVERHEAD + LOAD_PACA_TOC() +1: addi r3,r1,STACK_INT_FRAME_REGS bl kernel_bad_stack b 1b @@ -1296,8 +1290,7 @@ have_hes: * ever takes any parameters, the SCOM code must also be updated to * provide them. */ - .globl a2_tlbinit_code_start -a2_tlbinit_code_start: +_GLOBAL(a2_tlbinit_code_start) ori r11,r3,MAS0_WQ_ALLWAYS oris r11,r11,MAS0_ESEL(3)@h /* Use way 3: workaround A2 erratum 376 */ @@ -1320,8 +1313,8 @@ a2_tlbinit_after_linear_map: /* Now we branch the new virtual address mapped by this entry */ #ifdef CONFIG_RELOCATABLE - ld r5,PACATOC(r13) - ld r3,1f@got(r5) + __LOAD_PACA_TOC(r5) + LOAD_REG_ADDR_ALTTOC(r3, r5, 1f) #else LOAD_REG_IMMEDIATE_SYM(r3, r5, 1f) #endif @@ -1490,8 +1483,7 @@ _GLOBAL(book3e_secondary_thread_init) mflr r28 b 3b - .globl init_core_book3e -init_core_book3e: +_GLOBAL(init_core_book3e) /* Establish the interrupt vector base */ tovirt(r2,r2) LOAD_REG_ADDR(r3, interrupt_base_book3e) @@ -1499,7 +1491,7 @@ init_core_book3e: sync blr -init_thread_book3e: +SYM_CODE_START_LOCAL(init_thread_book3e) lis r3,(SPRN_EPCR_ICM | SPRN_EPCR_GICM)@h mtspr SPRN_EPCR,r3 @@ -1513,6 +1505,7 @@ init_thread_book3e: mtspr SPRN_TSR,r3 blr +SYM_CODE_END(init_thread_book3e) _GLOBAL(__setup_base_ivors) SET_IVOR(0, 0x020) /* Critical Input */ diff --git a/arch/powerpc/kernel/exceptions-64s.S b/arch/powerpc/kernel/exceptions-64s.S index b66dd6f775a4..b7229430ca94 100644 --- a/arch/powerpc/kernel/exceptions-64s.S +++ b/arch/powerpc/kernel/exceptions-64s.S @@ -13,6 +13,7 @@ * */ +#include <linux/linkage.h> #include <asm/hw_irq.h> #include <asm/exception-64s.h> #include <asm/ptrace.h> @@ -111,6 +112,7 @@ name: #define ISTACK .L_ISTACK_\name\() /* Set regular kernel stack */ #define __ISTACK(name) .L_ISTACK_ ## name #define IKUAP .L_IKUAP_\name\() /* Do KUAP lock */ +#define IMSR_R12 .L_IMSR_R12_\name\() /* Assumes MSR saved to r12 */ #define INT_DEFINE_BEGIN(n) \ .macro int_define_ ## n name @@ -176,6 +178,9 @@ do_define_int n .ifndef IKUAP IKUAP=1 .endif + .ifndef IMSR_R12 + IMSR_R12=0 + .endif .endm /* @@ -281,7 +286,7 @@ BEGIN_FTR_SECTION mfspr r9,SPRN_PPR END_FTR_SECTION_IFSET(CPU_FTR_HAS_PPR) HMT_MEDIUM - std r10,IAREA+EX_R10(r13) /* save r10 - r12 */ + std r10,IAREA+EX_R10(r13) /* save r10 */ .if ICFAR BEGIN_FTR_SECTION mfspr r10,SPRN_CFAR @@ -321,7 +326,7 @@ END_FTR_SECTION_IFSET(CPU_FTR_CFAR) mfctr r10 std r10,IAREA+EX_CTR(r13) mfcr r9 - std r11,IAREA+EX_R11(r13) + std r11,IAREA+EX_R11(r13) /* save r11 - r12 */ std r12,IAREA+EX_R12(r13) /* @@ -502,6 +507,7 @@ DEFINE_FIXED_SYMBOL(\name\()_common_real, text) std r10,0(r1) /* make stack chain pointer */ std r0,GPR0(r1) /* save r0 in stackframe */ std r10,GPR1(r1) /* save r1 in stackframe */ + SANITIZE_GPR(0) /* Mark our [H]SRRs valid for return */ li r10,1 @@ -544,8 +550,14 @@ END_FTR_SECTION_IFSET(CPU_FTR_HAS_PPR) std r9,GPR11(r1) std r10,GPR12(r1) std r11,GPR13(r1) + .if !IMSR_R12 + SANITIZE_GPRS(9, 12) + .else + SANITIZE_GPRS(9, 11) + .endif SAVE_NVGPRS(r1) + SANITIZE_NVGPRS() .if IDAR .if IISIDE @@ -577,10 +589,10 @@ BEGIN_FTR_SECTION END_FTR_SECTION_IFSET(CPU_FTR_CFAR) ld r10,IAREA+EX_CTR(r13) std r10,_CTR(r1) - std r2,GPR2(r1) /* save r2 in stackframe */ - SAVE_GPRS(3, 8, r1) /* save r3 - r8 in stackframe */ + SAVE_GPRS(2, 8, r1) /* save r2 - r8 in stackframe */ + SANITIZE_GPRS(2, 8) mflr r9 /* Get LR, later save to stack */ - ld r2,PACATOC(r13) /* get kernel TOC into r2 */ + LOAD_PACA_TOC() /* get kernel TOC into r2 */ std r9,_LINK(r1) lbz r10,PACAIRQSOFTMASK(r13) mfspr r11,SPRN_XER /* save XER in stackframe */ @@ -589,9 +601,9 @@ END_FTR_SECTION_IFSET(CPU_FTR_CFAR) li r9,IVEC std r9,_TRAP(r1) /* set trap number */ li r10,0 - ld r11,exception_marker@toc(r2) + LOAD_REG_IMMEDIATE(r11, STACK_FRAME_REGS_MARKER) std r10,RESULT(r1) /* clear regs->result */ - std r11,STACK_FRAME_OVERHEAD-16(r1) /* mark the frame */ + std r11,STACK_INT_FRAME_MARKER(r1) /* mark the frame */ .endm /* @@ -610,7 +622,7 @@ END_FTR_SECTION_IFSET(CPU_FTR_CFAR) .macro SEARCH_RESTART_TABLE #ifdef CONFIG_RELOCATABLE mr r12,r2 - ld r2,PACATOC(r13) + LOAD_PACA_TOC() LOAD_REG_ADDR(r9, __start___restart_table) LOAD_REG_ADDR(r10, __stop___restart_table) mr r2,r12 @@ -640,7 +652,7 @@ END_FTR_SECTION_IFSET(CPU_FTR_CFAR) .macro SEARCH_SOFT_MASK_TABLE #ifdef CONFIG_RELOCATABLE mr r12,r2 - ld r2,PACATOC(r13) + LOAD_PACA_TOC() LOAD_REG_ADDR(r9, __start___soft_mask_table) LOAD_REG_ADDR(r10, __stop___soft_mask_table) mr r2,r12 @@ -696,6 +708,7 @@ END_FTR_SECTION_IFSET(CPU_FTR_CFAR) mtlr r9 ld r9,_CCR(r1) mtcr r9 + SANITIZE_RESTORE_NVGPRS() REST_GPRS(2, 13, r1) REST_GPR(0, r1) /* restore original r1. */ @@ -703,6 +716,71 @@ END_FTR_SECTION_IFSET(CPU_FTR_CFAR) .endm /* + * EARLY_BOOT_FIXUP - Fix real-mode interrupt with wrong endian in early boot. + * + * There's a short window during boot where although the kernel is running + * little endian, any exceptions will cause the CPU to switch back to big + * endian. For example a WARN() boils down to a trap instruction, which will + * cause a program check, and we end up here but with the CPU in big endian + * mode. The first instruction of the program check handler (in GEN_INT_ENTRY + * below) is an mtsprg, which when executed in the wrong endian is an lhzu with + * a ~3GB displacement from r3. The content of r3 is random, so that is a load + * from some random location, and depending on the system can easily lead to a + * checkstop, or an infinitely recursive page fault. + * + * So to handle that case we have a trampoline here that can detect we are in + * the wrong endian and flip us back to the correct endian. We can't flip + * MSR[LE] using mtmsr, so we have to use rfid. That requires backing up SRR0/1 + * as well as a GPR. To do that we use SPRG0/2/3, as SPRG1 is already used for + * the paca. SPRG3 is user readable, but this trampoline is only active very + * early in boot, and SPRG3 will be reinitialised in vdso_getcpu_init() before + * userspace starts. + */ +.macro EARLY_BOOT_FIXUP +BEGIN_FTR_SECTION +#ifdef CONFIG_CPU_LITTLE_ENDIAN + tdi 0,0,0x48 // Trap never, or in reverse endian: b . + 8 + b 2f // Skip trampoline if endian is correct + .long 0xa643707d // mtsprg 0, r11 Backup r11 + .long 0xa6027a7d // mfsrr0 r11 + .long 0xa643727d // mtsprg 2, r11 Backup SRR0 in SPRG2 + .long 0xa6027b7d // mfsrr1 r11 + .long 0xa643737d // mtsprg 3, r11 Backup SRR1 in SPRG3 + .long 0xa600607d // mfmsr r11 + .long 0x01006b69 // xori r11, r11, 1 Invert MSR[LE] + .long 0xa6037b7d // mtsrr1 r11 + /* + * This is 'li r11,1f' where 1f is the absolute address of that + * label, byteswapped into the SI field of the instruction. + */ + .long 0x00006039 | \ + ((ABS_ADDR(1f, real_vectors) & 0x00ff) << 24) | \ + ((ABS_ADDR(1f, real_vectors) & 0xff00) << 8) + .long 0xa6037a7d // mtsrr0 r11 + .long 0x2400004c // rfid +1: + mfsprg r11, 3 + mtsrr1 r11 // Restore SRR1 + mfsprg r11, 2 + mtsrr0 r11 // Restore SRR0 + mfsprg r11, 0 // Restore r11 +2: +#endif + /* + * program check could hit at any time, and pseries can not block + * MSR[ME] in early boot. So check if there is anything useful in r13 + * yet, and spin forever if not. + */ + mtsprg 0, r11 + mfcr r11 + cmpdi r13, 0 + beq . + mtcr r11 + mfsprg r11, 0 +END_FTR_SECTION(0, 1) // nop out after boot +.endm + +/* * There are a few constraints to be concerned with. * - Real mode exceptions code/data must be located at their physical location. * - Virtual mode exceptions must be mapped at their 0xc000... location. @@ -815,7 +893,7 @@ __start_interrupts: * * Call convention: * - * syscall register convention is in Documentation/powerpc/syscall64-abi.rst + * syscall register convention is in Documentation/arch/powerpc/syscall64-abi.rst */ EXC_VIRT_BEGIN(system_call_vectored, 0x3000, 0x1000) /* SCV 0 */ @@ -996,8 +1074,8 @@ EXC_COMMON_BEGIN(system_reset_common) subi r1,r1,INT_FRAME_SIZE __GEN_COMMON_BODY system_reset - addi r3,r1,STACK_FRAME_OVERHEAD - bl system_reset_exception + addi r3,r1,STACK_INT_FRAME_REGS + bl CFUNC(system_reset_exception) /* Clear MSR_RI before setting SRR0 and SRR1. */ li r9,0 @@ -1079,6 +1157,7 @@ INT_DEFINE_BEGIN(machine_check) INT_DEFINE_END(machine_check) EXC_REAL_BEGIN(machine_check, 0x200, 0x100) + EARLY_BOOT_FIXUP GEN_INT_ENTRY machine_check_early, virt=0 EXC_REAL_END(machine_check, 0x200, 0x100) EXC_VIRT_NONE(0x4200, 0x100) @@ -1142,8 +1221,11 @@ EXC_COMMON_BEGIN(machine_check_early_common) BEGIN_FTR_SECTION bl enable_machine_check END_FTR_SECTION_IFSET(CPU_FTR_HVMODE) - addi r3,r1,STACK_FRAME_OVERHEAD - bl machine_check_early + addi r3,r1,STACK_INT_FRAME_REGS +BEGIN_FTR_SECTION + bl CFUNC(machine_check_early_boot) +END_FTR_SECTION(0, 1) // nop out after boot + bl CFUNC(machine_check_early) std r3,RESULT(r1) /* Save result */ ld r12,_MSR(r1) @@ -1204,7 +1286,7 @@ END_FTR_SECTION_IFSET(CPU_FTR_HVMODE | CPU_FTR_ARCH_206) * Queue up the MCE event so that we can log it later, while * returning from kernel or opal call. */ - bl machine_check_queue_event + bl CFUNC(machine_check_queue_event) MACHINE_CHECK_HANDLER_WINDUP RFI_TO_KERNEL @@ -1229,8 +1311,8 @@ EXC_COMMON_BEGIN(machine_check_common) * save area: PACA_EXMC instead of PACA_EXGEN. */ GEN_COMMON machine_check - addi r3,r1,STACK_FRAME_OVERHEAD - bl machine_check_exception_async + addi r3,r1,STACK_INT_FRAME_REGS + bl CFUNC(machine_check_exception_async) b interrupt_return_srr @@ -1240,7 +1322,7 @@ EXC_COMMON_BEGIN(machine_check_common) * done. Queue the event then call the idle code to do the wake up. */ EXC_COMMON_BEGIN(machine_check_idle_common) - bl machine_check_queue_event + bl CFUNC(machine_check_queue_event) /* * GPR-loss wakeups are relatively straightforward, because the @@ -1279,7 +1361,7 @@ EXC_COMMON_BEGIN(unrecoverable_mce) BEGIN_FTR_SECTION li r10,0 /* clear MSR_RI */ mtmsrd r10,1 - bl disable_machine_check + bl CFUNC(disable_machine_check) END_FTR_SECTION_IFSET(CPU_FTR_HVMODE) ld r10,PACAKMSR(r13) li r3,MSR_ME @@ -1295,15 +1377,15 @@ END_FTR_SECTION_IFSET(CPU_FTR_HVMODE) * This is the NMI version of the handler because we are called from * the early handler which is a true NMI. */ - addi r3,r1,STACK_FRAME_OVERHEAD - bl machine_check_exception + addi r3,r1,STACK_INT_FRAME_REGS + bl CFUNC(machine_check_exception) /* * We will not reach here. Even if we did, there is no way out. * Call unrecoverable_exception and die. */ - addi r3,r1,STACK_FRAME_OVERHEAD - bl unrecoverable_exception + addi r3,r1,STACK_INT_FRAME_REGS + bl CFUNC(unrecoverable_exception) b . @@ -1353,26 +1435,26 @@ EXC_VIRT_END(data_access, 0x4300, 0x80) EXC_COMMON_BEGIN(data_access_common) GEN_COMMON data_access ld r4,_DSISR(r1) - addi r3,r1,STACK_FRAME_OVERHEAD + addi r3,r1,STACK_INT_FRAME_REGS andis. r0,r4,DSISR_DABRMATCH@h bne- 1f #ifdef CONFIG_PPC_64S_HASH_MMU BEGIN_MMU_FTR_SECTION - bl do_hash_fault + bl CFUNC(do_hash_fault) MMU_FTR_SECTION_ELSE - bl do_page_fault + bl CFUNC(do_page_fault) ALT_MMU_FTR_SECTION_END_IFCLR(MMU_FTR_TYPE_RADIX) #else - bl do_page_fault + bl CFUNC(do_page_fault) #endif b interrupt_return_srr -1: bl do_break +1: bl CFUNC(do_break) /* * do_break() may have changed the NV GPRS while handling a breakpoint. * If so, we need to restore them with their updated values. */ - REST_NVGPRS(r1) + HANDLER_RESTORE_NVGPRS() b interrupt_return_srr @@ -1410,8 +1492,8 @@ EXC_COMMON_BEGIN(data_access_slb_common) #ifdef CONFIG_PPC_64S_HASH_MMU BEGIN_MMU_FTR_SECTION /* HPT case, do SLB fault */ - addi r3,r1,STACK_FRAME_OVERHEAD - bl do_slb_fault + addi r3,r1,STACK_INT_FRAME_REGS + bl CFUNC(do_slb_fault) cmpdi r3,0 bne- 1f b fast_interrupt_return_srr @@ -1424,8 +1506,8 @@ ALT_MMU_FTR_SECTION_END_IFCLR(MMU_FTR_TYPE_RADIX) li r3,-EFAULT #endif std r3,RESULT(r1) - addi r3,r1,STACK_FRAME_OVERHEAD - bl do_bad_segment_interrupt + addi r3,r1,STACK_INT_FRAME_REGS + bl CFUNC(do_bad_segment_interrupt) b interrupt_return_srr @@ -1456,15 +1538,15 @@ EXC_VIRT_BEGIN(instruction_access, 0x4400, 0x80) EXC_VIRT_END(instruction_access, 0x4400, 0x80) EXC_COMMON_BEGIN(instruction_access_common) GEN_COMMON instruction_access - addi r3,r1,STACK_FRAME_OVERHEAD + addi r3,r1,STACK_INT_FRAME_REGS #ifdef CONFIG_PPC_64S_HASH_MMU BEGIN_MMU_FTR_SECTION - bl do_hash_fault + bl CFUNC(do_hash_fault) MMU_FTR_SECTION_ELSE - bl do_page_fault + bl CFUNC(do_page_fault) ALT_MMU_FTR_SECTION_END_IFCLR(MMU_FTR_TYPE_RADIX) #else - bl do_page_fault + bl CFUNC(do_page_fault) #endif b interrupt_return_srr @@ -1498,8 +1580,8 @@ EXC_COMMON_BEGIN(instruction_access_slb_common) #ifdef CONFIG_PPC_64S_HASH_MMU BEGIN_MMU_FTR_SECTION /* HPT case, do SLB fault */ - addi r3,r1,STACK_FRAME_OVERHEAD - bl do_slb_fault + addi r3,r1,STACK_INT_FRAME_REGS + bl CFUNC(do_slb_fault) cmpdi r3,0 bne- 1f b fast_interrupt_return_srr @@ -1512,8 +1594,8 @@ ALT_MMU_FTR_SECTION_END_IFCLR(MMU_FTR_TYPE_RADIX) li r3,-EFAULT #endif std r3,RESULT(r1) - addi r3,r1,STACK_FRAME_OVERHEAD - bl do_bad_segment_interrupt + addi r3,r1,STACK_INT_FRAME_REGS + bl CFUNC(do_bad_segment_interrupt) b interrupt_return_srr @@ -1566,8 +1648,8 @@ EXC_VIRT_BEGIN(hardware_interrupt, 0x4500, 0x100) EXC_VIRT_END(hardware_interrupt, 0x4500, 0x100) EXC_COMMON_BEGIN(hardware_interrupt_common) GEN_COMMON hardware_interrupt - addi r3,r1,STACK_FRAME_OVERHEAD - bl do_IRQ + addi r3,r1,STACK_INT_FRAME_REGS + bl CFUNC(do_IRQ) BEGIN_FTR_SECTION b interrupt_return_hsrr FTR_SECTION_ELSE @@ -1596,9 +1678,9 @@ EXC_VIRT_BEGIN(alignment, 0x4600, 0x100) EXC_VIRT_END(alignment, 0x4600, 0x100) EXC_COMMON_BEGIN(alignment_common) GEN_COMMON alignment - addi r3,r1,STACK_FRAME_OVERHEAD - bl alignment_exception - REST_NVGPRS(r1) /* instruction emulation may change GPRs */ + addi r3,r1,STACK_INT_FRAME_REGS + bl CFUNC(alignment_exception) + HANDLER_RESTORE_NVGPRS() /* instruction emulation may change GPRs */ b interrupt_return_srr @@ -1619,51 +1701,7 @@ INT_DEFINE_BEGIN(program_check) INT_DEFINE_END(program_check) EXC_REAL_BEGIN(program_check, 0x700, 0x100) - -#ifdef CONFIG_CPU_LITTLE_ENDIAN - /* - * There's a short window during boot where although the kernel is - * running little endian, any exceptions will cause the CPU to switch - * back to big endian. For example a WARN() boils down to a trap - * instruction, which will cause a program check, and we end up here but - * with the CPU in big endian mode. The first instruction of the program - * check handler (in GEN_INT_ENTRY below) is an mtsprg, which when - * executed in the wrong endian is an lhzu with a ~3GB displacement from - * r3. The content of r3 is random, so that is a load from some random - * location, and depending on the system can easily lead to a checkstop, - * or an infinitely recursive page fault. - * - * So to handle that case we have a trampoline here that can detect we - * are in the wrong endian and flip us back to the correct endian. We - * can't flip MSR[LE] using mtmsr, so we have to use rfid. That requires - * backing up SRR0/1 as well as a GPR. To do that we use SPRG0/2/3, as - * SPRG1 is already used for the paca. SPRG3 is user readable, but this - * trampoline is only active very early in boot, and SPRG3 will be - * reinitialised in vdso_getcpu_init() before userspace starts. - */ -BEGIN_FTR_SECTION - tdi 0,0,0x48 // Trap never, or in reverse endian: b . + 8 - b 1f // Skip trampoline if endian is correct - .long 0xa643707d // mtsprg 0, r11 Backup r11 - .long 0xa6027a7d // mfsrr0 r11 - .long 0xa643727d // mtsprg 2, r11 Backup SRR0 in SPRG2 - .long 0xa6027b7d // mfsrr1 r11 - .long 0xa643737d // mtsprg 3, r11 Backup SRR1 in SPRG3 - .long 0xa600607d // mfmsr r11 - .long 0x01006b69 // xori r11, r11, 1 Invert MSR[LE] - .long 0xa6037b7d // mtsrr1 r11 - .long 0x34076039 // li r11, 0x734 - .long 0xa6037a7d // mtsrr0 r11 - .long 0x2400004c // rfid - mfsprg r11, 3 - mtsrr1 r11 // Restore SRR1 - mfsprg r11, 2 - mtsrr0 r11 // Restore SRR0 - mfsprg r11, 0 // Restore r11 -1: -END_FTR_SECTION(0, 1) // nop out after boot -#endif /* CONFIG_CPU_LITTLE_ENDIAN */ - + EARLY_BOOT_FIXUP GEN_INT_ENTRY program_check, virt=0 EXC_REAL_END(program_check, 0x700, 0x100) EXC_VIRT_BEGIN(program_check, 0x4700, 0x100) @@ -1706,9 +1744,9 @@ EXC_COMMON_BEGIN(program_check_common) __GEN_COMMON_BODY program_check .Ldo_program_check: - addi r3,r1,STACK_FRAME_OVERHEAD - bl program_check_exception - REST_NVGPRS(r1) /* instruction emulation may change GPRs */ + addi r3,r1,STACK_INT_FRAME_REGS + bl CFUNC(program_check_exception) + HANDLER_RESTORE_NVGPRS() /* instruction emulation may change GPRs */ b interrupt_return_srr @@ -1726,6 +1764,7 @@ INT_DEFINE_BEGIN(fp_unavailable) #ifdef CONFIG_KVM_BOOK3S_PR_POSSIBLE IKVM_REAL=1 #endif + IMSR_R12=1 INT_DEFINE_END(fp_unavailable) EXC_REAL_BEGIN(fp_unavailable, 0x800, 0x100) @@ -1737,8 +1776,8 @@ EXC_VIRT_END(fp_unavailable, 0x4800, 0x100) EXC_COMMON_BEGIN(fp_unavailable_common) GEN_COMMON fp_unavailable bne 1f /* if from user, just load it up */ - addi r3,r1,STACK_FRAME_OVERHEAD - bl kernel_fp_unavailable_exception + addi r3,r1,STACK_INT_FRAME_REGS + bl CFUNC(kernel_fp_unavailable_exception) 0: trap EMIT_BUG_ENTRY 0b, __FILE__, __LINE__, 0 1: @@ -1751,12 +1790,12 @@ BEGIN_FTR_SECTION bne- 2f END_FTR_SECTION_IFSET(CPU_FTR_TM) #endif - bl load_up_fpu + bl CFUNC(load_up_fpu) b fast_interrupt_return_srr #ifdef CONFIG_PPC_TRANSACTIONAL_MEM 2: /* User process was in a transaction */ - addi r3,r1,STACK_FRAME_OVERHEAD - bl fp_unavailable_tm + addi r3,r1,STACK_INT_FRAME_REGS + bl CFUNC(fp_unavailable_tm) b interrupt_return_srr #endif @@ -1799,8 +1838,8 @@ EXC_VIRT_BEGIN(decrementer, 0x4900, 0x80) EXC_VIRT_END(decrementer, 0x4900, 0x80) EXC_COMMON_BEGIN(decrementer_common) GEN_COMMON decrementer - addi r3,r1,STACK_FRAME_OVERHEAD - bl timer_interrupt + addi r3,r1,STACK_INT_FRAME_REGS + bl CFUNC(timer_interrupt) b interrupt_return_srr @@ -1884,11 +1923,11 @@ EXC_VIRT_BEGIN(doorbell_super, 0x4a00, 0x100) EXC_VIRT_END(doorbell_super, 0x4a00, 0x100) EXC_COMMON_BEGIN(doorbell_super_common) GEN_COMMON doorbell_super - addi r3,r1,STACK_FRAME_OVERHEAD + addi r3,r1,STACK_INT_FRAME_REGS #ifdef CONFIG_PPC_DOORBELL - bl doorbell_exception + bl CFUNC(doorbell_exception) #else - bl unknown_async_exception + bl CFUNC(unknown_async_exception) #endif b interrupt_return_srr @@ -1913,8 +1952,8 @@ EXC_VIRT_NONE(0x4b00, 0x100) * Call convention: * * syscall and hypercalls register conventions are documented in - * Documentation/powerpc/syscall64-abi.rst and - * Documentation/powerpc/papr_hcalls.rst respectively. + * Documentation/arch/powerpc/syscall64-abi.rst and + * Documentation/arch/powerpc/papr_hcalls.rst respectively. * * The intersection of volatile registers that don't contain possible * inputs is: cr0, xer, ctr. We may use these as scratch regs upon entry @@ -1950,13 +1989,6 @@ INT_DEFINE_END(system_call) INTERRUPT_TO_KERNEL #endif -#ifdef CONFIG_PPC_FAST_ENDIAN_SWITCH -BEGIN_FTR_SECTION - cmpdi r0,0x1ebe - beq- 1f -END_FTR_SECTION_IFSET(CPU_FTR_REAL_LE) -#endif - /* We reach here with PACA in r13, r13 in r9. */ mfspr r11,SPRN_SRR0 mfspr r12,SPRN_SRR1 @@ -1976,16 +2008,6 @@ END_FTR_SECTION_IFSET(CPU_FTR_REAL_LE) b system_call_common #endif .endif - -#ifdef CONFIG_PPC_FAST_ENDIAN_SWITCH - /* Fast LE/BE switch system call */ -1: mfspr r12,SPRN_SRR1 - xori r12,r12,MSR_LE - mtspr SPRN_SRR1,r12 - mr r13,r9 - RFI_TO_USER /* return to userspace */ - b . /* prevent speculative execution */ -#endif .endm EXC_REAL_BEGIN(system_call, 0xc00, 0x100) @@ -2051,8 +2073,8 @@ EXC_VIRT_BEGIN(single_step, 0x4d00, 0x100) EXC_VIRT_END(single_step, 0x4d00, 0x100) EXC_COMMON_BEGIN(single_step_common) GEN_COMMON single_step - addi r3,r1,STACK_FRAME_OVERHEAD - bl single_step_exception + addi r3,r1,STACK_INT_FRAME_REGS + bl CFUNC(single_step_exception) b interrupt_return_srr @@ -2085,11 +2107,11 @@ EXC_VIRT_BEGIN(h_data_storage, 0x4e00, 0x20) EXC_VIRT_END(h_data_storage, 0x4e00, 0x20) EXC_COMMON_BEGIN(h_data_storage_common) GEN_COMMON h_data_storage - addi r3,r1,STACK_FRAME_OVERHEAD + addi r3,r1,STACK_INT_FRAME_REGS BEGIN_MMU_FTR_SECTION - bl do_bad_page_fault_segv + bl CFUNC(do_bad_page_fault_segv) MMU_FTR_SECTION_ELSE - bl unknown_exception + bl CFUNC(unknown_exception) ALT_MMU_FTR_SECTION_END_IFSET(MMU_FTR_TYPE_RADIX) b interrupt_return_hsrr @@ -2114,8 +2136,8 @@ EXC_VIRT_BEGIN(h_instr_storage, 0x4e20, 0x20) EXC_VIRT_END(h_instr_storage, 0x4e20, 0x20) EXC_COMMON_BEGIN(h_instr_storage_common) GEN_COMMON h_instr_storage - addi r3,r1,STACK_FRAME_OVERHEAD - bl unknown_exception + addi r3,r1,STACK_INT_FRAME_REGS + bl CFUNC(unknown_exception) b interrupt_return_hsrr @@ -2137,9 +2159,9 @@ EXC_VIRT_BEGIN(emulation_assist, 0x4e40, 0x20) EXC_VIRT_END(emulation_assist, 0x4e40, 0x20) EXC_COMMON_BEGIN(emulation_assist_common) GEN_COMMON emulation_assist - addi r3,r1,STACK_FRAME_OVERHEAD - bl emulation_assist_interrupt - REST_NVGPRS(r1) /* instruction emulation may change GPRs */ + addi r3,r1,STACK_INT_FRAME_REGS + bl CFUNC(emulation_assist_interrupt) + HANDLER_RESTORE_NVGPRS() /* instruction emulation may change GPRs */ b interrupt_return_hsrr @@ -2197,8 +2219,8 @@ EXC_COMMON_BEGIN(hmi_exception_early_common) __GEN_COMMON_BODY hmi_exception_early - addi r3,r1,STACK_FRAME_OVERHEAD - bl hmi_exception_realmode + addi r3,r1,STACK_INT_FRAME_REGS + bl CFUNC(hmi_exception_realmode) cmpdi cr0,r3,0 bne 1f @@ -2215,8 +2237,8 @@ EXC_COMMON_BEGIN(hmi_exception_early_common) EXC_COMMON_BEGIN(hmi_exception_common) GEN_COMMON hmi_exception - addi r3,r1,STACK_FRAME_OVERHEAD - bl handle_hmi_exception + addi r3,r1,STACK_INT_FRAME_REGS + bl CFUNC(handle_hmi_exception) b interrupt_return_hsrr @@ -2249,11 +2271,11 @@ EXC_VIRT_BEGIN(h_doorbell, 0x4e80, 0x20) EXC_VIRT_END(h_doorbell, 0x4e80, 0x20) EXC_COMMON_BEGIN(h_doorbell_common) GEN_COMMON h_doorbell - addi r3,r1,STACK_FRAME_OVERHEAD + addi r3,r1,STACK_INT_FRAME_REGS #ifdef CONFIG_PPC_DOORBELL - bl doorbell_exception + bl CFUNC(doorbell_exception) #else - bl unknown_async_exception + bl CFUNC(unknown_async_exception) #endif b interrupt_return_hsrr @@ -2285,8 +2307,8 @@ EXC_VIRT_BEGIN(h_virt_irq, 0x4ea0, 0x20) EXC_VIRT_END(h_virt_irq, 0x4ea0, 0x20) EXC_COMMON_BEGIN(h_virt_irq_common) GEN_COMMON h_virt_irq - addi r3,r1,STACK_FRAME_OVERHEAD - bl do_IRQ + addi r3,r1,STACK_INT_FRAME_REGS + bl CFUNC(do_IRQ) b interrupt_return_hsrr @@ -2331,10 +2353,22 @@ EXC_VIRT_BEGIN(performance_monitor, 0x4f00, 0x20) EXC_VIRT_END(performance_monitor, 0x4f00, 0x20) EXC_COMMON_BEGIN(performance_monitor_common) GEN_COMMON performance_monitor - addi r3,r1,STACK_FRAME_OVERHEAD - bl performance_monitor_exception + addi r3,r1,STACK_INT_FRAME_REGS + lbz r4,PACAIRQSOFTMASK(r13) + cmpdi r4,IRQS_ENABLED + bne 1f + bl CFUNC(performance_monitor_exception_async) b interrupt_return_srr +1: + bl CFUNC(performance_monitor_exception_nmi) + /* Clear MSR_RI before setting SRR0 and SRR1. */ + li r9,0 + mtmsrd r9,1 + kuap_kernel_restore r9, r10 + + EXCEPTION_RESTORE_REGS hsrr=0 + RFI_TO_KERNEL /** * Interrupt 0xf20 - Vector Unavailable Interrupt. @@ -2347,6 +2381,7 @@ INT_DEFINE_BEGIN(altivec_unavailable) #ifdef CONFIG_KVM_BOOK3S_PR_POSSIBLE IKVM_REAL=1 #endif + IMSR_R12=1 INT_DEFINE_END(altivec_unavailable) EXC_REAL_BEGIN(altivec_unavailable, 0xf20, 0x20) @@ -2369,19 +2404,19 @@ BEGIN_FTR_SECTION bne- 2f END_FTR_SECTION_NESTED(CPU_FTR_TM, CPU_FTR_TM, 69) #endif - bl load_up_altivec + bl CFUNC(load_up_altivec) b fast_interrupt_return_srr #ifdef CONFIG_PPC_TRANSACTIONAL_MEM 2: /* User process was in a transaction */ - addi r3,r1,STACK_FRAME_OVERHEAD - bl altivec_unavailable_tm + addi r3,r1,STACK_INT_FRAME_REGS + bl CFUNC(altivec_unavailable_tm) b interrupt_return_srr #endif 1: END_FTR_SECTION_IFSET(CPU_FTR_ALTIVEC) #endif - addi r3,r1,STACK_FRAME_OVERHEAD - bl altivec_unavailable_exception + addi r3,r1,STACK_INT_FRAME_REGS + bl CFUNC(altivec_unavailable_exception) b interrupt_return_srr @@ -2396,6 +2431,7 @@ INT_DEFINE_BEGIN(vsx_unavailable) #ifdef CONFIG_KVM_BOOK3S_PR_POSSIBLE IKVM_REAL=1 #endif + IMSR_R12=1 INT_DEFINE_END(vsx_unavailable) EXC_REAL_BEGIN(vsx_unavailable, 0xf40, 0x20) @@ -2421,15 +2457,15 @@ BEGIN_FTR_SECTION b load_up_vsx #ifdef CONFIG_PPC_TRANSACTIONAL_MEM 2: /* User process was in a transaction */ - addi r3,r1,STACK_FRAME_OVERHEAD - bl vsx_unavailable_tm + addi r3,r1,STACK_INT_FRAME_REGS + bl CFUNC(vsx_unavailable_tm) b interrupt_return_srr #endif 1: END_FTR_SECTION_IFSET(CPU_FTR_VSX) #endif - addi r3,r1,STACK_FRAME_OVERHEAD - bl vsx_unavailable_exception + addi r3,r1,STACK_INT_FRAME_REGS + bl CFUNC(vsx_unavailable_exception) b interrupt_return_srr @@ -2455,9 +2491,9 @@ EXC_VIRT_BEGIN(facility_unavailable, 0x4f60, 0x20) EXC_VIRT_END(facility_unavailable, 0x4f60, 0x20) EXC_COMMON_BEGIN(facility_unavailable_common) GEN_COMMON facility_unavailable - addi r3,r1,STACK_FRAME_OVERHEAD - bl facility_unavailable_exception - REST_NVGPRS(r1) /* instruction emulation may change GPRs */ + addi r3,r1,STACK_INT_FRAME_REGS + bl CFUNC(facility_unavailable_exception) + HANDLER_RESTORE_NVGPRS() /* instruction emulation may change GPRs */ b interrupt_return_srr @@ -2483,9 +2519,10 @@ EXC_VIRT_BEGIN(h_facility_unavailable, 0x4f80, 0x20) EXC_VIRT_END(h_facility_unavailable, 0x4f80, 0x20) EXC_COMMON_BEGIN(h_facility_unavailable_common) GEN_COMMON h_facility_unavailable - addi r3,r1,STACK_FRAME_OVERHEAD - bl facility_unavailable_exception - REST_NVGPRS(r1) /* XXX Shouldn't be necessary in practice */ + addi r3,r1,STACK_INT_FRAME_REGS + bl CFUNC(facility_unavailable_exception) + /* XXX Shouldn't be necessary in practice */ + HANDLER_RESTORE_NVGPRS() b interrupt_return_hsrr @@ -2500,27 +2537,8 @@ EXC_REAL_NONE(0x1000, 0x100) EXC_VIRT_NONE(0x5000, 0x100) EXC_REAL_NONE(0x1100, 0x100) EXC_VIRT_NONE(0x5100, 0x100) - -#ifdef CONFIG_CBE_RAS -INT_DEFINE_BEGIN(cbe_system_error) - IVEC=0x1200 - IHSRR=1 -INT_DEFINE_END(cbe_system_error) - -EXC_REAL_BEGIN(cbe_system_error, 0x1200, 0x100) - GEN_INT_ENTRY cbe_system_error, virt=0 -EXC_REAL_END(cbe_system_error, 0x1200, 0x100) -EXC_VIRT_NONE(0x5200, 0x100) -EXC_COMMON_BEGIN(cbe_system_error_common) - GEN_COMMON cbe_system_error - addi r3,r1,STACK_FRAME_OVERHEAD - bl cbe_system_error_exception - b interrupt_return_hsrr - -#else /* CONFIG_CBE_RAS */ EXC_REAL_NONE(0x1200, 0x100) EXC_VIRT_NONE(0x5200, 0x100) -#endif /** * Interrupt 0x1300 - Instruction Address Breakpoint Interrupt. @@ -2544,8 +2562,8 @@ EXC_VIRT_BEGIN(instruction_breakpoint, 0x5300, 0x100) EXC_VIRT_END(instruction_breakpoint, 0x5300, 0x100) EXC_COMMON_BEGIN(instruction_breakpoint_common) GEN_COMMON instruction_breakpoint - addi r3,r1,STACK_FRAME_OVERHEAD - bl instruction_breakpoint_exception + addi r3,r1,STACK_INT_FRAME_REGS + bl CFUNC(instruction_breakpoint_exception) b interrupt_return_srr @@ -2666,31 +2684,13 @@ END_FTR_SECTION_IFSET(CPU_FTR_CFAR) EXC_COMMON_BEGIN(denorm_exception_common) GEN_COMMON denorm_exception - addi r3,r1,STACK_FRAME_OVERHEAD - bl unknown_exception + addi r3,r1,STACK_INT_FRAME_REGS + bl CFUNC(unknown_exception) b interrupt_return_hsrr -#ifdef CONFIG_CBE_RAS -INT_DEFINE_BEGIN(cbe_maintenance) - IVEC=0x1600 - IHSRR=1 -INT_DEFINE_END(cbe_maintenance) - -EXC_REAL_BEGIN(cbe_maintenance, 0x1600, 0x100) - GEN_INT_ENTRY cbe_maintenance, virt=0 -EXC_REAL_END(cbe_maintenance, 0x1600, 0x100) -EXC_VIRT_NONE(0x5600, 0x100) -EXC_COMMON_BEGIN(cbe_maintenance_common) - GEN_COMMON cbe_maintenance - addi r3,r1,STACK_FRAME_OVERHEAD - bl cbe_maintenance_exception - b interrupt_return_hsrr - -#else /* CONFIG_CBE_RAS */ EXC_REAL_NONE(0x1600, 0x100) EXC_VIRT_NONE(0x5600, 0x100) -#endif INT_DEFINE_BEGIN(altivec_assist) @@ -2708,36 +2708,18 @@ EXC_VIRT_BEGIN(altivec_assist, 0x5700, 0x100) EXC_VIRT_END(altivec_assist, 0x5700, 0x100) EXC_COMMON_BEGIN(altivec_assist_common) GEN_COMMON altivec_assist - addi r3,r1,STACK_FRAME_OVERHEAD + addi r3,r1,STACK_INT_FRAME_REGS #ifdef CONFIG_ALTIVEC - bl altivec_assist_exception - REST_NVGPRS(r1) /* instruction emulation may change GPRs */ + bl CFUNC(altivec_assist_exception) + HANDLER_RESTORE_NVGPRS() /* instruction emulation may change GPRs */ #else - bl unknown_exception + bl CFUNC(unknown_exception) #endif b interrupt_return_srr -#ifdef CONFIG_CBE_RAS -INT_DEFINE_BEGIN(cbe_thermal) - IVEC=0x1800 - IHSRR=1 -INT_DEFINE_END(cbe_thermal) - -EXC_REAL_BEGIN(cbe_thermal, 0x1800, 0x100) - GEN_INT_ENTRY cbe_thermal, virt=0 -EXC_REAL_END(cbe_thermal, 0x1800, 0x100) -EXC_VIRT_NONE(0x5800, 0x100) -EXC_COMMON_BEGIN(cbe_thermal_common) - GEN_COMMON cbe_thermal - addi r3,r1,STACK_FRAME_OVERHEAD - bl cbe_thermal_exception - b interrupt_return_hsrr - -#else /* CONFIG_CBE_RAS */ EXC_REAL_NONE(0x1800, 0x100) EXC_VIRT_NONE(0x5800, 0x100) -#endif #ifdef CONFIG_PPC_WATCHDOG @@ -2763,8 +2745,8 @@ EXC_COMMON_BEGIN(soft_nmi_common) subi r1,r1,INT_FRAME_SIZE __GEN_COMMON_BODY soft_nmi - addi r3,r1,STACK_FRAME_OVERHEAD - bl soft_nmi_interrupt + addi r3,r1,STACK_INT_FRAME_REGS + bl CFUNC(soft_nmi_interrupt) /* Clear MSR_RI before setting SRR0 and SRR1. */ li r9,0 @@ -2779,7 +2761,7 @@ EXC_COMMON_BEGIN(soft_nmi_common) /* * An interrupt came in while soft-disabled. We set paca->irq_happened, then: - * - If it was a decrementer interrupt, we bump the dec to max and and return. + * - If it was a decrementer interrupt, we bump the dec to max and return. * - If it was a doorbell we return immediately since doorbells are edge * triggered and won't automatically refire. * - If it was a HMI we return immediately since we handled it in realmode @@ -2794,6 +2776,20 @@ masked_Hinterrupt: masked_interrupt: .endif stw r9,PACA_EXGEN+EX_CCR(r13) +#ifdef CONFIG_PPC_IRQ_SOFT_MASK_DEBUG + /* + * Ensure there was no previous MUST_HARD_MASK interrupt or + * HARD_DIS setting. If this does fire, the interrupt is still + * masked and MSR[EE] will be cleared on return, so no need to + * panic, but somebody probably enabled MSR[EE] under + * PACA_IRQ_HARD_DIS, mtmsr(mfmsr() | MSR_x) being a common + * cause. + */ + lbz r9,PACAIRQHAPPENED(r13) + andi. r9,r9,(PACA_IRQ_MUST_HARD_MASK|PACA_IRQ_HARD_DIS) +0: tdnei r9,0 + EMIT_WARN_ENTRY 0b,__FILE__,__LINE__,(BUGFLAG_WARNING | BUGFLAG_ONCE) +#endif lbz r9,PACAIRQHAPPENED(r13) or r9,r9,r10 stb r9,PACAIRQHAPPENED(r13) @@ -3034,22 +3030,6 @@ EXPORT_SYMBOL(do_uaccess_flush) MASKED_INTERRUPT MASKED_INTERRUPT hsrr=1 - /* - * Relocation-on interrupts: A subset of the interrupts can be delivered - * with IR=1/DR=1, if AIL==2 and MSR.HV won't be changed by delivering - * it. Addresses are the same as the original interrupt addresses, but - * offset by 0xc000000000004000. - * It's impossible to receive interrupts below 0x300 via this mechanism. - * KVM: None of these traps are from the guest ; anything that escalated - * to HV=1 from HV=0 is delivered via real mode handlers. - */ - - /* - * This uses the standard macro, since the original 0x300 vector - * only has extra guff for STAB-based processors -- which never - * come here. - */ - USE_FIXED_SECTION(virt_trampolines) /* * All code below __end_soft_masked is treated as soft-masked. If @@ -3075,7 +3055,7 @@ CLOSE_FIXED_SECTION(virt_trampolines); USE_TEXT_SECTION() /* MSR[RI] should be clear because this uses SRR[01] */ -enable_machine_check: +_GLOBAL(enable_machine_check) mflr r0 bcl 20,31,$+4 0: mflr r3 @@ -3089,7 +3069,7 @@ enable_machine_check: blr /* MSR[RI] should be clear because this uses SRR[01] */ -disable_machine_check: +SYM_FUNC_START_LOCAL(disable_machine_check) mflr r0 bcl 20,31,$+4 0: mflr r3 @@ -3102,3 +3082,4 @@ disable_machine_check: RFI_TO_KERNEL 1: mtlr r0 blr +SYM_FUNC_END(disable_machine_check) diff --git a/arch/powerpc/kernel/fadump.c b/arch/powerpc/kernel/fadump.c index ea0a073abd96..5782e743fd27 100644 --- a/arch/powerpc/kernel/fadump.c +++ b/arch/powerpc/kernel/fadump.c @@ -33,6 +33,7 @@ #include <asm/fadump-internal.h> #include <asm/setup.h> #include <asm/interrupt.h> +#include <asm/prom.h> /* * The CPU who acquired the lock to trigger the fadump crash should @@ -53,8 +54,6 @@ static struct kobject *fadump_kobj; static atomic_t cpus_in_fadump; static DEFINE_MUTEX(fadump_mutex); -static struct fadump_mrange_info crash_mrange_info = { "crash", NULL, 0, 0, 0, false }; - #define RESERVED_RNGS_SZ 16384 /* 16K - 128 entries */ #define RESERVED_RNGS_CNT (RESERVED_RNGS_SZ / \ sizeof(struct fadump_memory_range)) @@ -80,26 +79,38 @@ static struct cma *fadump_cma; * But for some reason even if it fails we still have the memory reservation * with us and we can still continue doing fadump. */ -static int __init fadump_cma_init(void) +void __init fadump_cma_init(void) { - unsigned long long base, size; + unsigned long long base, size, end; int rc; - if (!fw_dump.fadump_enabled) - return 0; - + if (!fw_dump.fadump_supported || !fw_dump.fadump_enabled || + fw_dump.dump_active) + return; /* * Do not use CMA if user has provided fadump=nocma kernel parameter. - * Return 1 to continue with fadump old behaviour. */ - if (fw_dump.nocma) - return 1; + if (fw_dump.nocma || !fw_dump.boot_memory_size) + return; + /* + * [base, end) should be reserved during early init in + * fadump_reserve_mem(). No need to check this here as + * cma_init_reserved_mem() already checks for overlap. + * Here we give the aligned chunk of this reserved memory to CMA. + */ base = fw_dump.reserve_dump_area_start; size = fw_dump.boot_memory_size; + end = base + size; - if (!size) - return 0; + base = ALIGN(base, CMA_MIN_ALIGNMENT_BYTES); + end = ALIGN_DOWN(end, CMA_MIN_ALIGNMENT_BYTES); + size = end - base; + + if (end <= base) { + pr_warn("%s: Too less memory to give to CMA\n", __func__); + return; + } rc = cma_init_reserved_mem(base, size, 0, "fadump_cma", &fadump_cma); if (rc) { @@ -110,7 +121,7 @@ static int __init fadump_cma_init(void) * blocked from production system usage. Hence return 1, * so that we can continue with fadump. */ - return 1; + return; } /* @@ -122,17 +133,50 @@ static int __init fadump_cma_init(void) /* * So we now have successfully initialized cma area for fadump. */ - pr_info("Initialized 0x%lx bytes cma area at %ldMB from 0x%lx " + pr_info("Initialized [0x%llx, %luMB] cma area from [0x%lx, %luMB] " "bytes of memory reserved for firmware-assisted dump\n", - cma_get_size(fadump_cma), - (unsigned long)cma_get_base(fadump_cma) >> 20, - fw_dump.reserve_dump_area_size); - return 1; + cma_get_base(fadump_cma), cma_get_size(fadump_cma) >> 20, + fw_dump.reserve_dump_area_start, + fw_dump.boot_memory_size >> 20); + return; } -#else -static int __init fadump_cma_init(void) { return 1; } #endif /* CONFIG_CMA */ +/* + * Additional parameters meant for capture kernel are placed in a dedicated area. + * If this is capture kernel boot, append these parameters to bootargs. + */ +void __init fadump_append_bootargs(void) +{ + char *append_args; + size_t len; + + if (!fw_dump.dump_active || !fw_dump.param_area_supported || !fw_dump.param_area) + return; + + if (fw_dump.param_area < fw_dump.boot_mem_top) { + if (memblock_reserve(fw_dump.param_area, COMMAND_LINE_SIZE)) { + pr_warn("WARNING: Can't use additional parameters area!\n"); + fw_dump.param_area = 0; + return; + } + } + + append_args = (char *)fw_dump.param_area; + len = strlen(boot_command_line); + + /* + * Too late to fail even if cmdline size exceeds. Truncate additional parameters + * to cmdline size and proceed anyway. + */ + if (len + strlen(append_args) >= COMMAND_LINE_SIZE - 1) + pr_warn("WARNING: Appending parameters exceeds cmdline size. Truncating!\n"); + + pr_debug("Cmdline: %s\n", boot_command_line); + snprintf(boot_command_line + len, COMMAND_LINE_SIZE - len, " %s", append_args); + pr_info("Updated cmdline: %s\n", boot_command_line); +} + /* Scan the Firmware Assisted dump configuration details. */ int __init early_init_dt_scan_fw_dump(unsigned long node, const char *uname, int depth, void *data) @@ -223,28 +267,6 @@ static bool is_fadump_mem_area_contiguous(u64 d_start, u64 d_end) } /* - * Returns true, if there are no holes in boot memory area, - * false otherwise. - */ -bool is_fadump_boot_mem_contiguous(void) -{ - unsigned long d_start, d_end; - bool ret = false; - int i; - - for (i = 0; i < fw_dump.boot_mem_regs_cnt; i++) { - d_start = fw_dump.boot_mem_addr[i]; - d_end = d_start + fw_dump.boot_mem_sz[i]; - - ret = is_fadump_mem_area_contiguous(d_start, d_end); - if (!ret) - break; - } - - return ret; -} - -/* * Returns true, if there are no holes in reserved memory area, * false otherwise. */ @@ -268,10 +290,8 @@ static void __init fadump_show_config(void) if (!fw_dump.fadump_supported) return; - pr_debug("Fadump enabled : %s\n", - (fw_dump.fadump_enabled ? "yes" : "no")); - pr_debug("Dump Active : %s\n", - (fw_dump.dump_active ? "yes" : "no")); + pr_debug("Fadump enabled : %s\n", str_yes_no(fw_dump.fadump_enabled)); + pr_debug("Dump Active : %s\n", str_yes_no(fw_dump.dump_active)); pr_debug("Dump section sizes:\n"); pr_debug(" CPU state data size: %lx\n", fw_dump.cpu_state_data_size); pr_debug(" HPTE region size : %lx\n", fw_dump.hpte_region_size); @@ -313,7 +333,7 @@ static __init u64 fadump_calculate_reserve_size(void) * memory at a predefined offset. */ ret = parse_crashkernel(boot_command_line, memblock_phys_mem_size(), - &size, &base); + &size, &base, NULL, NULL, NULL); if (ret == 0 && size > 0) { unsigned long max_size; @@ -373,12 +393,6 @@ static unsigned long __init get_fadump_area_size(void) size = PAGE_ALIGN(size); size += fw_dump.boot_memory_size; size += sizeof(struct fadump_crash_info_header); - size += sizeof(struct elfhdr); /* ELF core header.*/ - size += sizeof(struct elf_phdr); /* place holder for cpu notes */ - /* Program headers for crash memory regions. */ - size += sizeof(struct elf_phdr) * (memblock_num_regions(memory) + 2); - - size = PAGE_ALIGN(size); /* This is to hold kernel metadata on platforms that support it */ size += (fw_dump.ops->fadump_get_metadata_size ? @@ -389,10 +403,11 @@ static unsigned long __init get_fadump_area_size(void) static int __init add_boot_mem_region(unsigned long rstart, unsigned long rsize) { + int max_boot_mem_rgns = fw_dump.ops->fadump_max_boot_mem_rgns(); int i = fw_dump.boot_mem_regs_cnt++; - if (fw_dump.boot_mem_regs_cnt > FADUMP_MAX_MEM_REGS) { - fw_dump.boot_mem_regs_cnt = FADUMP_MAX_MEM_REGS; + if (fw_dump.boot_mem_regs_cnt > max_boot_mem_rgns) { + fw_dump.boot_mem_regs_cnt = max_boot_mem_rgns; return 0; } @@ -552,13 +567,6 @@ int __init fadump_reserve_mem(void) if (!fw_dump.dump_active) { fw_dump.boot_memory_size = PAGE_ALIGN(fadump_calculate_reserve_size()); -#ifdef CONFIG_CMA - if (!fw_dump.nocma) { - fw_dump.boot_memory_size = - ALIGN(fw_dump.boot_memory_size, - CMA_MIN_ALIGNMENT_BYTES); - } -#endif bootmem_min = fw_dump.ops->fadump_get_bootmem_min(); if (fw_dump.boot_memory_size < bootmem_min) { @@ -573,22 +581,6 @@ int __init fadump_reserve_mem(void) } } - /* - * Calculate the memory boundary. - * If memory_limit is less than actual memory boundary then reserve - * the memory for fadump beyond the memory_limit and adjust the - * memory_limit accordingly, so that the running kernel can run with - * specified memory_limit. - */ - if (memory_limit && memory_limit < memblock_end_of_DRAM()) { - size = get_fadump_area_size(); - if ((memory_limit + size) < memblock_end_of_DRAM()) - memory_limit += size; - else - memory_limit = memblock_end_of_DRAM(); - printk(KERN_INFO "Adjusted memory_limit for firmware-assisted" - " dump, now %#016llx\n", memory_limit); - } if (memory_limit) mem_boundary = memory_limit; else @@ -647,13 +639,12 @@ int __init fadump_reserve_mem(void) pr_info("Reserved %lldMB of memory at %#016llx (System RAM: %lldMB)\n", (size >> 20), base, (memblock_phys_mem_size() >> 20)); - - ret = fadump_cma_init(); } return ret; error_out: fw_dump.fadump_enabled = 0; + fw_dump.reserve_dump_area_size = 0; return 0; } @@ -704,7 +695,7 @@ void crash_fadump(struct pt_regs *regs, const char *str) * old_cpu == -1 means this is the first CPU which has come here, * go ahead and trigger fadump. * - * old_cpu != -1 means some other CPU has already on it's way + * old_cpu != -1 means some other CPU has already on its way * to trigger fadump, just keep looping here. */ this_cpu = smp_processor_id(); @@ -759,7 +750,7 @@ u32 *__init fadump_regs_to_elf_notes(u32 *buf, struct pt_regs *regs) * prstatus.pr_pid = ???? */ elf_core_copy_regs(&prstatus.pr_reg, regs); - buf = append_elf_note(buf, CRASH_CORE_NOTE_NAME, NT_PRSTATUS, + buf = append_elf_note(buf, NN_PRSTATUS, NT_PRSTATUS, &prstatus, sizeof(prstatus)); return buf; } @@ -930,36 +921,6 @@ static inline int fadump_add_mem_range(struct fadump_mrange_info *mrange_info, return 0; } -static int fadump_exclude_reserved_area(u64 start, u64 end) -{ - u64 ra_start, ra_end; - int ret = 0; - - ra_start = fw_dump.reserve_dump_area_start; - ra_end = ra_start + fw_dump.reserve_dump_area_size; - - if ((ra_start < end) && (ra_end > start)) { - if ((start < ra_start) && (end > ra_end)) { - ret = fadump_add_mem_range(&crash_mrange_info, - start, ra_start); - if (ret) - return ret; - - ret = fadump_add_mem_range(&crash_mrange_info, - ra_end, end); - } else if (start < ra_start) { - ret = fadump_add_mem_range(&crash_mrange_info, - start, ra_start); - } else if (ra_end < end) { - ret = fadump_add_mem_range(&crash_mrange_info, - ra_end, end); - } - } else - ret = fadump_add_mem_range(&crash_mrange_info, start, end); - - return ret; -} - static int fadump_init_elfcore_header(char *bufp) { struct elfhdr *elf; @@ -997,52 +958,6 @@ static int fadump_init_elfcore_header(char *bufp) } /* - * Traverse through memblock structure and setup crash memory ranges. These - * ranges will be used create PT_LOAD program headers in elfcore header. - */ -static int fadump_setup_crash_memory_ranges(void) -{ - u64 i, start, end; - int ret; - - pr_debug("Setup crash memory ranges.\n"); - crash_mrange_info.mem_range_cnt = 0; - - /* - * Boot memory region(s) registered with firmware are moved to - * different location at the time of crash. Create separate program - * header(s) for this memory chunk(s) with the correct offset. - */ - for (i = 0; i < fw_dump.boot_mem_regs_cnt; i++) { - start = fw_dump.boot_mem_addr[i]; - end = start + fw_dump.boot_mem_sz[i]; - ret = fadump_add_mem_range(&crash_mrange_info, start, end); - if (ret) - return ret; - } - - for_each_mem_range(i, &start, &end) { - /* - * skip the memory chunk that is already added - * (0 through boot_memory_top). - */ - if (start < fw_dump.boot_mem_top) { - if (end > fw_dump.boot_mem_top) - start = fw_dump.boot_mem_top; - else - continue; - } - - /* add this range excluding the reserved dump area. */ - ret = fadump_exclude_reserved_area(start, end); - if (ret) - return ret; - } - - return 0; -} - -/* * If the given physical address falls within the boot memory region then * return the relocated address that points to the dump region reserved * for saving initial boot memory contents. @@ -1072,36 +987,50 @@ static inline unsigned long fadump_relocate(unsigned long paddr) return raddr; } -static int fadump_create_elfcore_headers(char *bufp) +static void __init populate_elf_pt_load(struct elf_phdr *phdr, u64 start, + u64 size, unsigned long long offset) { - unsigned long long raddr, offset; - struct elf_phdr *phdr; + phdr->p_align = 0; + phdr->p_memsz = size; + phdr->p_filesz = size; + phdr->p_paddr = start; + phdr->p_offset = offset; + phdr->p_type = PT_LOAD; + phdr->p_flags = PF_R|PF_W|PF_X; + phdr->p_vaddr = (unsigned long)__va(start); +} + +static void __init fadump_populate_elfcorehdr(struct fadump_crash_info_header *fdh) +{ + char *bufp; struct elfhdr *elf; - int i, j; + struct elf_phdr *phdr; + u64 boot_mem_dest_offset; + unsigned long long i, ra_start, ra_end, ra_size, mstart, mend; + bufp = (char *) fw_dump.elfcorehdr_addr; fadump_init_elfcore_header(bufp); elf = (struct elfhdr *)bufp; bufp += sizeof(struct elfhdr); /* - * setup ELF PT_NOTE, place holder for cpu notes info. The notes info - * will be populated during second kernel boot after crash. Hence - * this PT_NOTE will always be the first elf note. + * Set up ELF PT_NOTE, a placeholder for CPU notes information. + * The notes info will be populated later by platform-specific code. + * Hence, this PT_NOTE will always be the first ELF note. * * NOTE: Any new ELF note addition should be placed after this note. */ phdr = (struct elf_phdr *)bufp; bufp += sizeof(struct elf_phdr); phdr->p_type = PT_NOTE; - phdr->p_flags = 0; - phdr->p_vaddr = 0; - phdr->p_align = 0; - - phdr->p_offset = 0; - phdr->p_paddr = 0; - phdr->p_filesz = 0; - phdr->p_memsz = 0; - + phdr->p_flags = 0; + phdr->p_vaddr = 0; + phdr->p_align = 0; + phdr->p_offset = 0; + phdr->p_paddr = 0; + phdr->p_filesz = 0; + phdr->p_memsz = 0; + /* Increment number of program headers. */ (elf->e_phnum)++; /* setup ELF PT_NOTE for vmcoreinfo */ @@ -1111,55 +1040,66 @@ static int fadump_create_elfcore_headers(char *bufp) phdr->p_flags = 0; phdr->p_vaddr = 0; phdr->p_align = 0; - - phdr->p_paddr = fadump_relocate(paddr_vmcoreinfo_note()); - phdr->p_offset = phdr->p_paddr; - phdr->p_memsz = phdr->p_filesz = VMCOREINFO_NOTE_SIZE; - + phdr->p_paddr = phdr->p_offset = fdh->vmcoreinfo_raddr; + phdr->p_memsz = phdr->p_filesz = fdh->vmcoreinfo_size; /* Increment number of program headers. */ (elf->e_phnum)++; - /* setup PT_LOAD sections. */ - j = 0; - offset = 0; - raddr = fw_dump.boot_mem_addr[0]; - for (i = 0; i < crash_mrange_info.mem_range_cnt; i++) { - u64 mbase, msize; - - mbase = crash_mrange_info.mem_ranges[i].base; - msize = crash_mrange_info.mem_ranges[i].size; - if (!msize) - continue; - + /* + * Setup PT_LOAD sections. first include boot memory regions + * and then add rest of the memory regions. + */ + boot_mem_dest_offset = fw_dump.boot_mem_dest_addr; + for (i = 0; i < fw_dump.boot_mem_regs_cnt; i++) { phdr = (struct elf_phdr *)bufp; bufp += sizeof(struct elf_phdr); - phdr->p_type = PT_LOAD; - phdr->p_flags = PF_R|PF_W|PF_X; - phdr->p_offset = mbase; - - if (mbase == raddr) { - /* - * The entire real memory region will be moved by - * firmware to the specified destination_address. - * Hence set the correct offset. - */ - phdr->p_offset = fw_dump.boot_mem_dest_addr + offset; - if (j < (fw_dump.boot_mem_regs_cnt - 1)) { - offset += fw_dump.boot_mem_sz[j]; - raddr = fw_dump.boot_mem_addr[++j]; - } + populate_elf_pt_load(phdr, fw_dump.boot_mem_addr[i], + fw_dump.boot_mem_sz[i], + boot_mem_dest_offset); + /* Increment number of program headers. */ + (elf->e_phnum)++; + boot_mem_dest_offset += fw_dump.boot_mem_sz[i]; + } + + /* Memory reserved for fadump in first kernel */ + ra_start = fw_dump.reserve_dump_area_start; + ra_size = get_fadump_area_size(); + ra_end = ra_start + ra_size; + + phdr = (struct elf_phdr *)bufp; + for_each_mem_range(i, &mstart, &mend) { + /* Boot memory regions already added, skip them now */ + if (mstart < fw_dump.boot_mem_top) { + if (mend > fw_dump.boot_mem_top) + mstart = fw_dump.boot_mem_top; + else + continue; } - phdr->p_paddr = mbase; - phdr->p_vaddr = (unsigned long)__va(mbase); - phdr->p_filesz = msize; - phdr->p_memsz = msize; - phdr->p_align = 0; + /* Handle memblock regions overlaps with fadump reserved area */ + if ((ra_start < mend) && (ra_end > mstart)) { + if ((mstart < ra_start) && (mend > ra_end)) { + populate_elf_pt_load(phdr, mstart, ra_start - mstart, mstart); + /* Increment number of program headers. */ + (elf->e_phnum)++; + bufp += sizeof(struct elf_phdr); + phdr = (struct elf_phdr *)bufp; + populate_elf_pt_load(phdr, ra_end, mend - ra_end, ra_end); + } else if (mstart < ra_start) { + populate_elf_pt_load(phdr, mstart, ra_start - mstart, mstart); + } else if (ra_end < mend) { + populate_elf_pt_load(phdr, ra_end, mend - ra_end, ra_end); + } + } else { + /* No overlap with fadump reserved memory region */ + populate_elf_pt_load(phdr, mstart, mend - mstart, mstart); + } /* Increment number of program headers. */ (elf->e_phnum)++; + bufp += sizeof(struct elf_phdr); + phdr = (struct elf_phdr *) bufp; } - return 0; } static unsigned long init_fadump_header(unsigned long addr) @@ -1174,14 +1114,25 @@ static unsigned long init_fadump_header(unsigned long addr) memset(fdh, 0, sizeof(struct fadump_crash_info_header)); fdh->magic_number = FADUMP_CRASH_INFO_MAGIC; - fdh->elfcorehdr_addr = addr; + fdh->version = FADUMP_HEADER_VERSION; /* We will set the crashing cpu id in crash_fadump() during crash. */ fdh->crashing_cpu = FADUMP_CPU_UNKNOWN; + + /* + * The physical address and size of vmcoreinfo are required in the + * second kernel to prepare elfcorehdr. + */ + fdh->vmcoreinfo_raddr = fadump_relocate(paddr_vmcoreinfo_note()); + fdh->vmcoreinfo_size = VMCOREINFO_NOTE_SIZE; + + + fdh->pt_regs_sz = sizeof(struct pt_regs); /* * When LPAR is terminated by PYHP, ensure all possible CPUs' * register data is processed while exporting the vmcore. */ fdh->cpu_mask = *cpu_possible_mask; + fdh->cpu_mask_sz = sizeof(struct cpumask); return addr; } @@ -1189,8 +1140,6 @@ static unsigned long init_fadump_header(unsigned long addr) static int register_fadump(void) { unsigned long addr; - void *vaddr; - int ret; /* * If no memory is reserved then we can not register for firmware- @@ -1199,18 +1148,10 @@ static int register_fadump(void) if (!fw_dump.reserve_dump_area_size) return -ENODEV; - ret = fadump_setup_crash_memory_ranges(); - if (ret) - return ret; - addr = fw_dump.fadumphdr_addr; /* Initialize fadump crash info header. */ addr = init_fadump_header(addr); - vaddr = __va(addr); - - pr_debug("Creating ELF core headers at %#016lx\n", addr); - fadump_create_elfcore_headers(vaddr); /* register the future kernel dump with firmware. */ pr_debug("Registering for firmware-assisted kernel dump...\n"); @@ -1229,7 +1170,6 @@ void fadump_cleanup(void) } else if (fw_dump.dump_registered) { /* Un-register Firmware-assisted dump if it was registered. */ fw_dump.ops->fadump_unregister(&fw_dump); - fadump_free_mem_ranges(&crash_mrange_info); } if (fw_dump.ops->fadump_cleanup) @@ -1415,17 +1355,31 @@ static void fadump_release_memory(u64 begin, u64 end) fadump_release_reserved_area(tstart, end); } -static void fadump_invalidate_release_mem(void) +static void fadump_free_elfcorehdr_buf(void) { - mutex_lock(&fadump_mutex); - if (!fw_dump.dump_active) { - mutex_unlock(&fadump_mutex); + if (fw_dump.elfcorehdr_addr == 0 || fw_dump.elfcorehdr_size == 0) return; - } - fadump_cleanup(); - mutex_unlock(&fadump_mutex); + /* + * Before freeing the memory of `elfcorehdr`, reset the global + * `elfcorehdr_addr` to prevent modules like `vmcore` from accessing + * invalid memory. + */ + elfcorehdr_addr = ELFCORE_ADDR_ERR; + fadump_free_buffer(fw_dump.elfcorehdr_addr, fw_dump.elfcorehdr_size); + fw_dump.elfcorehdr_addr = 0; + fw_dump.elfcorehdr_size = 0; +} + +static void fadump_invalidate_release_mem(void) +{ + scoped_guard(mutex, &fadump_mutex) { + if (!fw_dump.dump_active) + return; + fadump_cleanup(); + } + fadump_free_elfcorehdr_buf(); fadump_release_memory(fw_dump.boot_mem_top, memblock_end_of_DRAM()); fadump_free_cpu_notes_buf(); @@ -1483,6 +1437,18 @@ static ssize_t enabled_show(struct kobject *kobj, return sprintf(buf, "%d\n", fw_dump.fadump_enabled); } +/* + * /sys/kernel/fadump/hotplug_ready sysfs node returns 1, which inidcates + * to usersapce that fadump re-registration is not required on memory + * hotplug events. + */ +static ssize_t hotplug_ready_show(struct kobject *kobj, + struct kobj_attribute *attr, + char *buf) +{ + return sprintf(buf, "%d\n", 1); +} + static ssize_t mem_reserved_show(struct kobject *kobj, struct kobj_attribute *attr, char *buf) @@ -1497,6 +1463,43 @@ static ssize_t registered_show(struct kobject *kobj, return sprintf(buf, "%d\n", fw_dump.dump_registered); } +static ssize_t bootargs_append_show(struct kobject *kobj, + struct kobj_attribute *attr, + char *buf) +{ + return sprintf(buf, "%s\n", (char *)__va(fw_dump.param_area)); +} + +static ssize_t bootargs_append_store(struct kobject *kobj, + struct kobj_attribute *attr, + const char *buf, size_t count) +{ + char *params; + + if (!fw_dump.fadump_enabled || fw_dump.dump_active) + return -EPERM; + + if (count >= COMMAND_LINE_SIZE) + return -EINVAL; + + /* + * Fail here instead of handling this scenario with + * some silly workaround in capture kernel. + */ + if (saved_command_line_len + count >= COMMAND_LINE_SIZE) { + pr_err("Appending parameters exceeds cmdline size!\n"); + return -ENOSPC; + } + + params = __va(fw_dump.param_area); + strscpy_pad(params, buf, COMMAND_LINE_SIZE); + /* Remove newline character at the end. */ + if (params[count-1] == '\n') + params[count-1] = '\0'; + + return count; +} + static ssize_t registered_store(struct kobject *kobj, struct kobj_attribute *attr, const char *buf, size_t count) @@ -1555,11 +1558,14 @@ static struct kobj_attribute release_attr = __ATTR_WO(release_mem); static struct kobj_attribute enable_attr = __ATTR_RO(enabled); static struct kobj_attribute register_attr = __ATTR_RW(registered); static struct kobj_attribute mem_reserved_attr = __ATTR_RO(mem_reserved); +static struct kobj_attribute hotplug_ready_attr = __ATTR_RO(hotplug_ready); +static struct kobj_attribute bootargs_append_attr = __ATTR_RW(bootargs_append); static struct attribute *fadump_attrs[] = { &enable_attr.attr, ®ister_attr.attr, &mem_reserved_attr.attr, + &hotplug_ready_attr.attr, NULL, }; @@ -1577,6 +1583,12 @@ static void __init fadump_init_files(void) return; } + if (fw_dump.param_area) { + rc = sysfs_create_file(fadump_kobj, &bootargs_append_attr.attr); + if (rc) + pr_err("unable to create bootargs_append sysfs file (%d)\n", rc); + } + debugfs_create_file("fadump_region", 0444, arch_debugfs_dir, NULL, &fadump_region_fops); @@ -1631,6 +1643,150 @@ static void __init fadump_init_files(void) return; } +static int __init fadump_setup_elfcorehdr_buf(void) +{ + int elf_phdr_cnt; + unsigned long elfcorehdr_size; + + /* + * Program header for CPU notes comes first, followed by one for + * vmcoreinfo, and the remaining program headers correspond to + * memory regions. + */ + elf_phdr_cnt = 2 + fw_dump.boot_mem_regs_cnt + memblock_num_regions(memory); + elfcorehdr_size = sizeof(struct elfhdr) + (elf_phdr_cnt * sizeof(struct elf_phdr)); + elfcorehdr_size = PAGE_ALIGN(elfcorehdr_size); + + fw_dump.elfcorehdr_addr = (u64)fadump_alloc_buffer(elfcorehdr_size); + if (!fw_dump.elfcorehdr_addr) { + pr_err("Failed to allocate %lu bytes for elfcorehdr\n", + elfcorehdr_size); + return -ENOMEM; + } + fw_dump.elfcorehdr_size = elfcorehdr_size; + return 0; +} + +/* + * Check if the fadump header of crashed kernel is compatible with fadump kernel. + * + * It checks the magic number, endianness, and size of non-primitive type + * members of fadump header to ensure safe dump collection. + */ +static bool __init is_fadump_header_compatible(struct fadump_crash_info_header *fdh) +{ + if (fdh->magic_number == FADUMP_CRASH_INFO_MAGIC_OLD) { + pr_err("Old magic number, can't process the dump.\n"); + return false; + } + + if (fdh->magic_number != FADUMP_CRASH_INFO_MAGIC) { + if (fdh->magic_number == swab64(FADUMP_CRASH_INFO_MAGIC)) + pr_err("Endianness mismatch between the crashed and fadump kernels.\n"); + else + pr_err("Fadump header is corrupted.\n"); + + return false; + } + + /* + * Dump collection is not safe if the size of non-primitive type members + * of the fadump header do not match between crashed and fadump kernel. + */ + if (fdh->pt_regs_sz != sizeof(struct pt_regs) || + fdh->cpu_mask_sz != sizeof(struct cpumask)) { + pr_err("Fadump header size mismatch.\n"); + return false; + } + + return true; +} + +static void __init fadump_process(void) +{ + struct fadump_crash_info_header *fdh; + + fdh = (struct fadump_crash_info_header *) __va(fw_dump.fadumphdr_addr); + if (!fdh) { + pr_err("Crash info header is empty.\n"); + goto err_out; + } + + /* Avoid processing the dump if fadump header isn't compatible */ + if (!is_fadump_header_compatible(fdh)) + goto err_out; + + /* Allocate buffer for elfcorehdr */ + if (fadump_setup_elfcorehdr_buf()) + goto err_out; + + fadump_populate_elfcorehdr(fdh); + + /* Let platform update the CPU notes in elfcorehdr */ + if (fw_dump.ops->fadump_process(&fw_dump) < 0) + goto err_out; + + /* + * elfcorehdr is now ready to be exported. + * + * set elfcorehdr_addr so that vmcore module will export the + * elfcorehdr through '/proc/vmcore'. + */ + elfcorehdr_addr = virt_to_phys((void *)fw_dump.elfcorehdr_addr); + return; + +err_out: + fadump_invalidate_release_mem(); +} + +/* + * Reserve memory to store additional parameters to be passed + * for fadump/capture kernel. + */ +void __init fadump_setup_param_area(void) +{ + phys_addr_t range_start, range_end; + + if (!fw_dump.param_area_supported || fw_dump.dump_active) + return; + + /* This memory can't be used by PFW or bootloader as it is shared across kernels */ + if (early_radix_enabled()) { + /* + * Anywhere in the upper half should be good enough as all memory + * is accessible in real mode. + */ + range_start = memblock_end_of_DRAM() / 2; + range_end = memblock_end_of_DRAM(); + } else { + /* + * Memory range for passing additional parameters for HASH MMU + * must meet the following conditions: + * 1. The first memory block size must be higher than the + * minimum RMA (MIN_RMA) size. Bootloader can use memory + * upto RMA size. So it should be avoided. + * 2. The range should be between MIN_RMA and RMA size (ppc64_rma_size) + * 3. It must not overlap with the fadump reserved area. + */ + if (ppc64_rma_size < MIN_RMA*1024*1024) + return; + + range_start = MIN_RMA * 1024 * 1024; + range_end = min(ppc64_rma_size, fw_dump.boot_mem_top); + } + + fw_dump.param_area = memblock_phys_alloc_range(COMMAND_LINE_SIZE, + COMMAND_LINE_SIZE, + range_start, + range_end); + if (!fw_dump.param_area) { + pr_warn("WARNING: Could not setup area to pass additional parameters!\n"); + return; + } + + memset((void *)fw_dump.param_area, 0, COMMAND_LINE_SIZE); +} + /* * Prepare for firmware-assisted dump. */ @@ -1650,12 +1806,7 @@ int __init setup_fadump(void) * saving it to the disk. */ if (fw_dump.dump_active) { - /* - * if dump process fails then invalidate the registration - * and release memory before proceeding for re-registration. - */ - if (fw_dump.ops->fadump_process(&fw_dump) < 0) - fadump_invalidate_release_mem(); + fadump_process(); } /* Initialize the kernel dump memory structure and register with f/w */ else if (fw_dump.reserve_dump_area_size) { @@ -1734,8 +1885,3 @@ static void __init fadump_reserve_crash_area(u64 base) memblock_reserve(mstart, msize); } } - -unsigned long __init arch_reserved_kernel_pages(void) -{ - return memblock_reserved_size() / PAGE_SIZE; -} diff --git a/arch/powerpc/kernel/firmware.c b/arch/powerpc/kernel/firmware.c index 20328f72f9f2..8987eee33dc8 100644 --- a/arch/powerpc/kernel/firmware.c +++ b/arch/powerpc/kernel/firmware.c @@ -23,6 +23,8 @@ EXPORT_SYMBOL_GPL(powerpc_firmware_features); #if defined(CONFIG_PPC_PSERIES) || defined(CONFIG_KVM_GUEST) DEFINE_STATIC_KEY_FALSE(kvm_guest); +EXPORT_SYMBOL_GPL(kvm_guest); + int __init check_kvm_guest(void) { struct device_node *hyper_node; diff --git a/arch/powerpc/kernel/fpu.S b/arch/powerpc/kernel/fpu.S index f71f2bbd4de6..2f8f3f93cbb6 100644 --- a/arch/powerpc/kernel/fpu.S +++ b/arch/powerpc/kernel/fpu.S @@ -9,6 +9,7 @@ * Copyright (C) 1997 Dan Malek (dmalek@jlc.net). */ +#include <linux/export.h> #include <asm/reg.h> #include <asm/page.h> #include <asm/mmu.h> @@ -18,11 +19,19 @@ #include <asm/ppc_asm.h> #include <asm/asm-offsets.h> #include <asm/ptrace.h> -#include <asm/export.h> #include <asm/asm-compat.h> #include <asm/feature-fixups.h> #ifdef CONFIG_VSX +#define __REST_1FPVSR(n,c,base) \ +BEGIN_FTR_SECTION \ + b 2f; \ +END_FTR_SECTION_IFSET(CPU_FTR_VSX); \ + REST_FPR(n,base); \ + b 3f; \ +2: REST_VSR(n,c,base); \ +3: + #define __REST_32FPVSRS(n,c,base) \ BEGIN_FTR_SECTION \ b 2f; \ @@ -41,9 +50,11 @@ END_FTR_SECTION_IFSET(CPU_FTR_VSX); \ 2: SAVE_32VSRS(n,c,base); \ 3: #else +#define __REST_1FPVSR(n,b,base) REST_FPR(n, base) #define __REST_32FPVSRS(n,b,base) REST_32FPRS(n, base) #define __SAVE_32FPVSRS(n,b,base) SAVE_32FPRS(n, base) #endif +#define REST_1FPVSR(n,c,base) __REST_1FPVSR(n,__REG_##c,__REG_##base) #define REST_32FPVSRS(n,c,base) __REST_32FPVSRS(n,__REG_##c,__REG_##base) #define SAVE_32FPVSRS(n,c,base) __SAVE_32FPVSRS(n,__REG_##c,__REG_##base) @@ -67,6 +78,7 @@ _GLOBAL(store_fp_state) SAVE_32FPVSRS(0, R4, R3) mffs fr0 stfd fr0,FPSTATE_FPSCR(r3) + REST_1FPVSR(0, R4, R3) blr EXPORT_SYMBOL(store_fp_state) @@ -138,4 +150,5 @@ _GLOBAL(save_fpu) 2: SAVE_32FPVSRS(0, R4, R6) mffs fr0 stfd fr0,FPSTATE_FPSCR(r6) + REST_1FPVSR(0, R4, R6) blr diff --git a/arch/powerpc/kernel/head_32.h b/arch/powerpc/kernel/head_32.h index c3286260a7d1..9cba7dbf58dd 100644 --- a/arch/powerpc/kernel/head_32.h +++ b/arch/powerpc/kernel/head_32.h @@ -21,17 +21,9 @@ mtspr SPRN_SPRG_SCRATCH1,r11 mfspr r10, SPRN_SPRG_THREAD .if \handle_dar_dsisr -#ifdef CONFIG_40x - mfspr r11, SPRN_DEAR -#else mfspr r11, SPRN_DAR -#endif stw r11, DAR(r10) -#ifdef CONFIG_40x - mfspr r11, SPRN_ESR -#else mfspr r11, SPRN_DSISR -#endif stw r11, DSISR(r10) .endif mfspr r11, SPRN_SRR0 @@ -96,9 +88,7 @@ .endif lwz r9, SRR1(r12) lwz r12, SRR0(r12) -#ifdef CONFIG_40x - rlwinm r9,r9,0,14,12 /* clear MSR_WE (necessary?) */ -#elif defined(CONFIG_PPC_8xx) +#ifdef CONFIG_PPC_8xx mtspr SPRN_EID, r2 /* Set MSR_RI */ #else li r10, MSR_KERNEL /* can take exceptions */ @@ -112,7 +102,7 @@ _ASM_NOKPROBE_SYMBOL(\name\()_virt) stw r0,GPR0(r1) lis r10,STACK_FRAME_REGS_MARKER@ha /* exception frame marker */ addi r10,r10,STACK_FRAME_REGS_MARKER@l - stw r10,8(r1) + stw r10,STACK_INT_FRAME_MARKER(r1) li r10, \trapno stw r10,_TRAP(r1) SAVE_GPRS(3, 8, r1) @@ -127,7 +117,7 @@ _ASM_NOKPROBE_SYMBOL(\name\()_virt) mfspr r10,SPRN_XER addi r2, r2, -THREAD stw r10,_XER(r1) - addi r3,r1,STACK_FRAME_OVERHEAD + addi r3,r1,STACK_INT_FRAME_REGS .endm .macro prepare_transfer_to_handler diff --git a/arch/powerpc/kernel/head_40x.S b/arch/powerpc/kernel/head_40x.S deleted file mode 100644 index 088f500896c7..000000000000 --- a/arch/powerpc/kernel/head_40x.S +++ /dev/null @@ -1,718 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0-or-later */ -/* - * Copyright (c) 1995-1996 Gary Thomas <gdt@linuxppc.org> - * Initial PowerPC version. - * Copyright (c) 1996 Cort Dougan <cort@cs.nmt.edu> - * Rewritten for PReP - * Copyright (c) 1996 Paul Mackerras <paulus@cs.anu.edu.au> - * Low-level exception handers, MMU support, and rewrite. - * Copyright (c) 1997 Dan Malek <dmalek@jlc.net> - * PowerPC 8xx modifications. - * Copyright (c) 1998-1999 TiVo, Inc. - * PowerPC 403GCX modifications. - * Copyright (c) 1999 Grant Erickson <grant@lcse.umn.edu> - * PowerPC 403GCX/405GP modifications. - * Copyright 2000 MontaVista Software Inc. - * PPC405 modifications - * PowerPC 403GCX/405GP modifications. - * Author: MontaVista Software, Inc. - * frank_rowand@mvista.com or source@mvista.com - * debbie_chu@mvista.com - * - * Module name: head_4xx.S - * - * Description: - * Kernel execution entry point code. - */ - -#include <linux/init.h> -#include <linux/pgtable.h> -#include <linux/sizes.h> -#include <asm/processor.h> -#include <asm/page.h> -#include <asm/mmu.h> -#include <asm/cputable.h> -#include <asm/thread_info.h> -#include <asm/ppc_asm.h> -#include <asm/asm-offsets.h> -#include <asm/ptrace.h> -#include <asm/export.h> - -#include "head_32.h" - -/* As with the other PowerPC ports, it is expected that when code - * execution begins here, the following registers contain valid, yet - * optional, information: - * - * r3 - Board info structure pointer (DRAM, frequency, MAC address, etc.) - * r4 - Starting address of the init RAM disk - * r5 - Ending address of the init RAM disk - * r6 - Start of kernel command line string (e.g. "mem=96m") - * r7 - End of kernel command line string - * - * This is all going to change RSN when we add bi_recs....... -- Dan - */ - __HEAD -_GLOBAL(_stext); -_GLOBAL(_start); - - mr r31,r3 /* save device tree ptr */ - - /* We have to turn on the MMU right away so we get cache modes - * set correctly. - */ - bl initial_mmu - -/* We now have the lower 16 Meg mapped into TLB entries, and the caches - * ready to work. - */ -turn_on_mmu: - lis r0,MSR_KERNEL@h - ori r0,r0,MSR_KERNEL@l - mtspr SPRN_SRR1,r0 - lis r0,start_here@h - ori r0,r0,start_here@l - mtspr SPRN_SRR0,r0 - rfi /* enables MMU */ - b . /* prevent prefetch past rfi */ - -/* - * This area is used for temporarily saving registers during the - * critical exception prolog. - */ - . = 0xc0 -crit_save: -_GLOBAL(crit_r10) - .space 4 -_GLOBAL(crit_r11) - .space 4 -_GLOBAL(crit_srr0) - .space 4 -_GLOBAL(crit_srr1) - .space 4 -_GLOBAL(crit_r1) - .space 4 -_GLOBAL(crit_dear) - .space 4 -_GLOBAL(crit_esr) - .space 4 - -/* - * Exception prolog for critical exceptions. This is a little different - * from the normal exception prolog above since a critical exception - * can potentially occur at any point during normal exception processing. - * Thus we cannot use the same SPRG registers as the normal prolog above. - * Instead we use a couple of words of memory at low physical addresses. - * This is OK since we don't support SMP on these processors. - */ -.macro CRITICAL_EXCEPTION_PROLOG trapno name - stw r10,crit_r10@l(0) /* save two registers to work with */ - stw r11,crit_r11@l(0) - mfspr r10,SPRN_SRR0 - mfspr r11,SPRN_SRR1 - stw r10,crit_srr0@l(0) - stw r11,crit_srr1@l(0) - mfspr r10,SPRN_DEAR - mfspr r11,SPRN_ESR - stw r10,crit_dear@l(0) - stw r11,crit_esr@l(0) - mfcr r10 /* save CR in r10 for now */ - mfspr r11,SPRN_SRR3 /* check whether user or kernel */ - andi. r11,r11,MSR_PR - lis r11,(critirq_ctx-PAGE_OFFSET)@ha - lwz r11,(critirq_ctx-PAGE_OFFSET)@l(r11) - beq 1f - /* COMING FROM USER MODE */ - mfspr r11,SPRN_SPRG_THREAD /* if from user, start at top of */ - lwz r11,TASK_STACK-THREAD(r11) /* this thread's kernel stack */ -1: stw r1,crit_r1@l(0) - addi r1,r11,THREAD_SIZE-INT_FRAME_SIZE /* Alloc an excpt frm */ - LOAD_REG_IMMEDIATE(r11, MSR_KERNEL & ~(MSR_ME|MSR_DE|MSR_CE)) /* re-enable MMU */ - mtspr SPRN_SRR1, r11 - lis r11, 1f@h - ori r11, r11, 1f@l - mtspr SPRN_SRR0, r11 - rfi - - .text -1: -\name\()_virt: - lwz r11,crit_r1@l(0) - stw r11,GPR1(r1) - stw r11,0(r1) - mr r11,r1 - stw r10,_CCR(r11) /* save various registers */ - stw r12,GPR12(r11) - stw r9,GPR9(r11) - mflr r10 - stw r10,_LINK(r11) - lis r9,PAGE_OFFSET@ha - lwz r10,crit_r10@l(r9) - lwz r12,crit_r11@l(r9) - stw r10,GPR10(r11) - stw r12,GPR11(r11) - lwz r12,crit_dear@l(r9) - lwz r9,crit_esr@l(r9) - stw r12,_DEAR(r11) /* since they may have had stuff */ - stw r9,_ESR(r11) /* exception was taken */ - mfspr r12,SPRN_SRR2 - mfspr r9,SPRN_SRR3 - rlwinm r9,r9,0,14,12 /* clear MSR_WE (necessary?) */ - COMMON_EXCEPTION_PROLOG_END \trapno + 2 -_ASM_NOKPROBE_SYMBOL(\name\()_virt) -.endm - - /* - * State at this point: - * r9 saved in stack frame, now saved SRR3 & ~MSR_WE - * r10 saved in crit_r10 and in stack frame, trashed - * r11 saved in crit_r11 and in stack frame, - * now phys stack/exception frame pointer - * r12 saved in stack frame, now saved SRR2 - * CR saved in stack frame, CR0.EQ = !SRR3.PR - * LR, DEAR, ESR in stack frame - * r1 saved in stack frame, now virt stack/excframe pointer - * r0, r3-r8 saved in stack frame - */ - -/* - * Exception vectors. - */ -#define CRITICAL_EXCEPTION(n, label, hdlr) \ - START_EXCEPTION(n, label); \ - CRITICAL_EXCEPTION_PROLOG n label; \ - prepare_transfer_to_handler; \ - bl hdlr; \ - b ret_from_crit_exc - -/* - * 0x0100 - Critical Interrupt Exception - */ - CRITICAL_EXCEPTION(0x0100, CriticalInterrupt, unknown_exception) - -/* - * 0x0200 - Machine Check Exception - */ - CRITICAL_EXCEPTION(0x0200, MachineCheck, machine_check_exception) - -/* - * 0x0300 - Data Storage Exception - * This happens for just a few reasons. U0 set (but we don't do that), - * or zone protection fault (user violation, write to protected page). - * The other Data TLB exceptions bail out to this point - * if they can't resolve the lightweight TLB fault. - */ - START_EXCEPTION(0x0300, DataStorage) - EXCEPTION_PROLOG 0x300 DataStorage handle_dar_dsisr=1 - prepare_transfer_to_handler - bl do_page_fault - b interrupt_return - -/* - * 0x0400 - Instruction Storage Exception - * This is caused by a fetch from non-execute or guarded pages. - */ - START_EXCEPTION(0x0400, InstructionAccess) - EXCEPTION_PROLOG 0x400 InstructionAccess - li r5,0 - stw r5, _ESR(r11) /* Zero ESR */ - stw r12, _DEAR(r11) /* SRR0 as DEAR */ - prepare_transfer_to_handler - bl do_page_fault - b interrupt_return - -/* 0x0500 - External Interrupt Exception */ - EXCEPTION(0x0500, HardwareInterrupt, do_IRQ) - -/* 0x0600 - Alignment Exception */ - START_EXCEPTION(0x0600, Alignment) - EXCEPTION_PROLOG 0x600 Alignment handle_dar_dsisr=1 - prepare_transfer_to_handler - bl alignment_exception - REST_NVGPRS(r1) - b interrupt_return - -/* 0x0700 - Program Exception */ - START_EXCEPTION(0x0700, ProgramCheck) - EXCEPTION_PROLOG 0x700 ProgramCheck handle_dar_dsisr=1 - prepare_transfer_to_handler - bl program_check_exception - REST_NVGPRS(r1) - b interrupt_return - - EXCEPTION(0x0800, Trap_08, unknown_exception) - EXCEPTION(0x0900, Trap_09, unknown_exception) - EXCEPTION(0x0A00, Trap_0A, unknown_exception) - EXCEPTION(0x0B00, Trap_0B, unknown_exception) - -/* 0x0C00 - System Call Exception */ - START_EXCEPTION(0x0C00, SystemCall) - SYSCALL_ENTRY 0xc00 -/* Trap_0D is commented out to get more space for system call exception */ - -/* EXCEPTION(0x0D00, Trap_0D, unknown_exception) */ - EXCEPTION(0x0E00, Trap_0E, unknown_exception) - EXCEPTION(0x0F00, Trap_0F, unknown_exception) - -/* 0x1000 - Programmable Interval Timer (PIT) Exception */ - START_EXCEPTION(0x1000, DecrementerTrap) - b Decrementer - -/* 0x1010 - Fixed Interval Timer (FIT) Exception */ - START_EXCEPTION(0x1010, FITExceptionTrap) - b FITException - -/* 0x1020 - Watchdog Timer (WDT) Exception */ - START_EXCEPTION(0x1020, WDTExceptionTrap) - b WDTException - -/* 0x1100 - Data TLB Miss Exception - * As the name implies, translation is not in the MMU, so search the - * page tables and fix it. The only purpose of this function is to - * load TLB entries from the page table if they exist. - */ - START_EXCEPTION(0x1100, DTLBMiss) - mtspr SPRN_SPRG_SCRATCH5, r10 /* Save some working registers */ - mtspr SPRN_SPRG_SCRATCH6, r11 - mtspr SPRN_SPRG_SCRATCH3, r12 - mtspr SPRN_SPRG_SCRATCH4, r9 - mfcr r12 - mfspr r9, SPRN_PID - rlwimi r12, r9, 0, 0xff - mfspr r10, SPRN_DEAR /* Get faulting address */ - - /* If we are faulting a kernel address, we have to use the - * kernel page tables. - */ - lis r11, PAGE_OFFSET@h - cmplw r10, r11 - blt+ 3f - lis r11, swapper_pg_dir@h - ori r11, r11, swapper_pg_dir@l - li r9, 0 - mtspr SPRN_PID, r9 /* TLB will have 0 TID */ - b 4f - - /* Get the PGD for the current thread. - */ -3: - mfspr r11,SPRN_SPRG_THREAD - lwz r11,PGDIR(r11) -#ifdef CONFIG_PPC_KUAP - rlwinm. r9, r9, 0, 0xff - beq 5f /* Kuap fault */ -#endif -4: - tophys(r11, r11) - rlwimi r11, r10, 12, 20, 29 /* Create L1 (pgdir/pmd) address */ - lwz r11, 0(r11) /* Get L1 entry */ - andi. r9, r11, _PMD_PRESENT /* Check if it points to a PTE page */ - beq 2f /* Bail if no table */ - - rlwimi r11, r10, 22, 20, 29 /* Compute PTE address */ - lwz r11, 0(r11) /* Get Linux PTE */ - li r9, _PAGE_PRESENT | _PAGE_ACCESSED - andc. r9, r9, r11 /* Check permission */ - bne 5f - - rlwinm r9, r11, 1, _PAGE_RW /* dirty => rw */ - and r9, r9, r11 /* hwwrite = dirty & rw */ - rlwimi r11, r9, 0, _PAGE_RW /* replace rw by hwwrite */ - - /* Create TLB tag. This is the faulting address plus a static - * set of bits. These are size, valid, E, U0. - */ - li r9, 0x00c0 - rlwimi r10, r9, 0, 20, 31 - - b finish_tlb_load - -2: /* Check for possible large-page pmd entry */ - rlwinm. r9, r11, 2, 22, 24 - beq 5f - - /* Create TLB tag. This is the faulting address, plus a static - * set of bits (valid, E, U0) plus the size from the PMD. - */ - ori r9, r9, 0x40 - rlwimi r10, r9, 0, 20, 31 - - b finish_tlb_load - -5: - /* The bailout. Restore registers to pre-exception conditions - * and call the heavyweights to help us out. - */ - mtspr SPRN_PID, r12 - mtcrf 0x80, r12 - mfspr r9, SPRN_SPRG_SCRATCH4 - mfspr r12, SPRN_SPRG_SCRATCH3 - mfspr r11, SPRN_SPRG_SCRATCH6 - mfspr r10, SPRN_SPRG_SCRATCH5 - b DataStorage - -/* 0x1200 - Instruction TLB Miss Exception - * Nearly the same as above, except we get our information from different - * registers and bailout to a different point. - */ - START_EXCEPTION(0x1200, ITLBMiss) - mtspr SPRN_SPRG_SCRATCH5, r10 /* Save some working registers */ - mtspr SPRN_SPRG_SCRATCH6, r11 - mtspr SPRN_SPRG_SCRATCH3, r12 - mtspr SPRN_SPRG_SCRATCH4, r9 - mfcr r12 - mfspr r9, SPRN_PID - rlwimi r12, r9, 0, 0xff - mfspr r10, SPRN_SRR0 /* Get faulting address */ - - /* If we are faulting a kernel address, we have to use the - * kernel page tables. - */ - lis r11, PAGE_OFFSET@h - cmplw r10, r11 - blt+ 3f - lis r11, swapper_pg_dir@h - ori r11, r11, swapper_pg_dir@l - li r9, 0 - mtspr SPRN_PID, r9 /* TLB will have 0 TID */ - b 4f - - /* Get the PGD for the current thread. - */ -3: - mfspr r11,SPRN_SPRG_THREAD - lwz r11,PGDIR(r11) -#ifdef CONFIG_PPC_KUAP - rlwinm. r9, r9, 0, 0xff - beq 5f /* Kuap fault */ -#endif -4: - tophys(r11, r11) - rlwimi r11, r10, 12, 20, 29 /* Create L1 (pgdir/pmd) address */ - lwz r11, 0(r11) /* Get L1 entry */ - andi. r9, r11, _PMD_PRESENT /* Check if it points to a PTE page */ - beq 2f /* Bail if no table */ - - rlwimi r11, r10, 22, 20, 29 /* Compute PTE address */ - lwz r11, 0(r11) /* Get Linux PTE */ - li r9, _PAGE_PRESENT | _PAGE_ACCESSED | _PAGE_EXEC - andc. r9, r9, r11 /* Check permission */ - bne 5f - - rlwinm r9, r11, 1, _PAGE_RW /* dirty => rw */ - and r9, r9, r11 /* hwwrite = dirty & rw */ - rlwimi r11, r9, 0, _PAGE_RW /* replace rw by hwwrite */ - - /* Create TLB tag. This is the faulting address plus a static - * set of bits. These are size, valid, E, U0. - */ - li r9, 0x00c0 - rlwimi r10, r9, 0, 20, 31 - - b finish_tlb_load - -2: /* Check for possible large-page pmd entry */ - rlwinm. r9, r11, 2, 22, 24 - beq 5f - - /* Create TLB tag. This is the faulting address, plus a static - * set of bits (valid, E, U0) plus the size from the PMD. - */ - ori r9, r9, 0x40 - rlwimi r10, r9, 0, 20, 31 - - b finish_tlb_load - -5: - /* The bailout. Restore registers to pre-exception conditions - * and call the heavyweights to help us out. - */ - mtspr SPRN_PID, r12 - mtcrf 0x80, r12 - mfspr r9, SPRN_SPRG_SCRATCH4 - mfspr r12, SPRN_SPRG_SCRATCH3 - mfspr r11, SPRN_SPRG_SCRATCH6 - mfspr r10, SPRN_SPRG_SCRATCH5 - b InstructionAccess - - EXCEPTION(0x1300, Trap_13, unknown_exception) - EXCEPTION(0x1400, Trap_14, unknown_exception) - EXCEPTION(0x1500, Trap_15, unknown_exception) - EXCEPTION(0x1600, Trap_16, unknown_exception) - EXCEPTION(0x1700, Trap_17, unknown_exception) - EXCEPTION(0x1800, Trap_18, unknown_exception) - EXCEPTION(0x1900, Trap_19, unknown_exception) - EXCEPTION(0x1A00, Trap_1A, unknown_exception) - EXCEPTION(0x1B00, Trap_1B, unknown_exception) - EXCEPTION(0x1C00, Trap_1C, unknown_exception) - EXCEPTION(0x1D00, Trap_1D, unknown_exception) - EXCEPTION(0x1E00, Trap_1E, unknown_exception) - EXCEPTION(0x1F00, Trap_1F, unknown_exception) - -/* Check for a single step debug exception while in an exception - * handler before state has been saved. This is to catch the case - * where an instruction that we are trying to single step causes - * an exception (eg ITLB/DTLB miss) and thus the first instruction of - * the exception handler generates a single step debug exception. - * - * If we get a debug trap on the first instruction of an exception handler, - * we reset the MSR_DE in the _exception handler's_ MSR (the debug trap is - * a critical exception, so we are using SPRN_CSRR1 to manipulate the MSR). - * The exception handler was handling a non-critical interrupt, so it will - * save (and later restore) the MSR via SPRN_SRR1, which will still have - * the MSR_DE bit set. - */ - /* 0x2000 - Debug Exception */ - START_EXCEPTION(0x2000, DebugTrap) - CRITICAL_EXCEPTION_PROLOG 0x2000 DebugTrap - - /* - * If this is a single step or branch-taken exception in an - * exception entry sequence, it was probably meant to apply to - * the code where the exception occurred (since exception entry - * doesn't turn off DE automatically). We simulate the effect - * of turning off DE on entry to an exception handler by turning - * off DE in the SRR3 value and clearing the debug status. - */ - mfspr r10,SPRN_DBSR /* check single-step/branch taken */ - andis. r10,r10,DBSR_IC@h - beq+ 2f - - andi. r10,r9,MSR_IR|MSR_PR /* check supervisor + MMU off */ - beq 1f /* branch and fix it up */ - - mfspr r10,SPRN_SRR2 /* Faulting instruction address */ - cmplwi r10,0x2100 - bgt+ 2f /* address above exception vectors */ - - /* here it looks like we got an inappropriate debug exception. */ -1: rlwinm r9,r9,0,~MSR_DE /* clear DE in the SRR3 value */ - lis r10,DBSR_IC@h /* clear the IC event */ - mtspr SPRN_DBSR,r10 - /* restore state and get out */ - lwz r10,_CCR(r11) - lwz r0,GPR0(r11) - lwz r1,GPR1(r11) - mtcrf 0x80,r10 - mtspr SPRN_SRR2,r12 - mtspr SPRN_SRR3,r9 - lwz r9,GPR9(r11) - lwz r12,GPR12(r11) - lwz r10,crit_r10@l(0) - lwz r11,crit_r11@l(0) - rfci - b . - - /* continue normal handling for a critical exception... */ -2: mfspr r4,SPRN_DBSR - stw r4,_ESR(r11) /* DebugException takes DBSR in _ESR */ - prepare_transfer_to_handler - bl DebugException - b ret_from_crit_exc - - /* Programmable Interval Timer (PIT) Exception. (from 0x1000) */ - __HEAD -Decrementer: - EXCEPTION_PROLOG 0x1000 Decrementer - lis r0,TSR_PIS@h - mtspr SPRN_TSR,r0 /* Clear the PIT exception */ - prepare_transfer_to_handler - bl timer_interrupt - b interrupt_return - - /* Fixed Interval Timer (FIT) Exception. (from 0x1010) */ - __HEAD -FITException: - EXCEPTION_PROLOG 0x1010 FITException - prepare_transfer_to_handler - bl unknown_exception - b interrupt_return - - /* Watchdog Timer (WDT) Exception. (from 0x1020) */ - __HEAD -WDTException: - CRITICAL_EXCEPTION_PROLOG 0x1020 WDTException - prepare_transfer_to_handler - bl WatchdogException - b ret_from_crit_exc - -/* Other PowerPC processors, namely those derived from the 6xx-series - * have vectors from 0x2100 through 0x2F00 defined, but marked as reserved. - * However, for the 4xx-series processors these are neither defined nor - * reserved. - */ - - __HEAD - /* Damn, I came up one instruction too many to fit into the - * exception space :-). Both the instruction and data TLB - * miss get to this point to load the TLB. - * r10 - TLB_TAG value - * r11 - Linux PTE - * r9 - available to use - * PID - loaded with proper value when we get here - * Upon exit, we reload everything and RFI. - * Actually, it will fit now, but oh well.....a common place - * to load the TLB. - */ -tlb_4xx_index: - .long 0 -finish_tlb_load: - /* - * Clear out the software-only bits in the PTE to generate the - * TLB_DATA value. These are the bottom 2 bits of the RPM, the - * top 3 bits of the zone field, and M. - */ - li r9, 0x0ce2 - andc r11, r11, r9 - - /* load the next available TLB index. */ - lwz r9, tlb_4xx_index@l(0) - addi r9, r9, 1 - andi. r9, r9, PPC40X_TLB_SIZE - 1 - stw r9, tlb_4xx_index@l(0) - - tlbwe r11, r9, TLB_DATA /* Load TLB LO */ - tlbwe r10, r9, TLB_TAG /* Load TLB HI */ - - /* Done...restore registers and get out of here. - */ - mtspr SPRN_PID, r12 - mtcrf 0x80, r12 - mfspr r9, SPRN_SPRG_SCRATCH4 - mfspr r12, SPRN_SPRG_SCRATCH3 - mfspr r11, SPRN_SPRG_SCRATCH6 - mfspr r10, SPRN_SPRG_SCRATCH5 - rfi /* Should sync shadow TLBs */ - b . /* prevent prefetch past rfi */ - -/* This is where the main kernel code starts. - */ -start_here: - - /* ptr to current */ - lis r2,init_task@h - ori r2,r2,init_task@l - - /* ptr to phys current thread */ - tophys(r4,r2) - addi r4,r4,THREAD /* init task's THREAD */ - mtspr SPRN_SPRG_THREAD,r4 - - /* stack */ - lis r1,init_thread_union@ha - addi r1,r1,init_thread_union@l - li r0,0 - stwu r0,THREAD_SIZE-STACK_FRAME_OVERHEAD(r1) - - bl early_init /* We have to do this with MMU on */ - -/* - * Decide what sort of machine this is and initialize the MMU. - */ -#ifdef CONFIG_KASAN - bl kasan_early_init -#endif - li r3,0 - mr r4,r31 - bl machine_init - bl MMU_init - -/* Go back to running unmapped so we can load up new values - * and change to using our exception vectors. - * On the 4xx, all we have to do is invalidate the TLB to clear - * the old 16M byte TLB mappings. - */ - lis r4,2f@h - ori r4,r4,2f@l - tophys(r4,r4) - lis r3,(MSR_KERNEL & ~(MSR_IR|MSR_DR))@h - ori r3,r3,(MSR_KERNEL & ~(MSR_IR|MSR_DR))@l - mtspr SPRN_SRR0,r4 - mtspr SPRN_SRR1,r3 - rfi - b . /* prevent prefetch past rfi */ - -/* Load up the kernel context */ -2: - sync /* Flush to memory before changing TLB */ - tlbia - isync /* Flush shadow TLBs */ - - /* set up the PTE pointers for the Abatron bdiGDB. - */ - lis r6, swapper_pg_dir@h - ori r6, r6, swapper_pg_dir@l - lis r5, abatron_pteptrs@h - ori r5, r5, abatron_pteptrs@l - stw r5, 0xf0(0) /* Must match your Abatron config file */ - tophys(r5,r5) - stw r6, 0(r5) - -/* Now turn on the MMU for real! */ - lis r4,MSR_KERNEL@h - ori r4,r4,MSR_KERNEL@l - lis r3,start_kernel@h - ori r3,r3,start_kernel@l - mtspr SPRN_SRR0,r3 - mtspr SPRN_SRR1,r4 - rfi /* enable MMU and jump to start_kernel */ - b . /* prevent prefetch past rfi */ - -/* Set up the initial MMU state so we can do the first level of - * kernel initialization. This maps the first 32 MBytes of memory 1:1 - * virtual to physical and more importantly sets the cache mode. - */ -initial_mmu: - tlbia /* Invalidate all TLB entries */ - isync - - /* We should still be executing code at physical address 0x0000xxxx - * at this point. However, start_here is at virtual address - * 0xC000xxxx. So, set up a TLB mapping to cover this once - * translation is enabled. - */ - - lis r3,KERNELBASE@h /* Load the kernel virtual address */ - ori r3,r3,KERNELBASE@l - tophys(r4,r3) /* Load the kernel physical address */ - - iccci r0,r3 /* Invalidate the i-cache before use */ - - /* Load the kernel PID. - */ - li r0,0 - mtspr SPRN_PID,r0 - sync - - /* Configure and load one entry into TLB slots 63 */ - clrrwi r4,r4,10 /* Mask off the real page number */ - ori r4,r4,(TLB_WR | TLB_EX) /* Set the write and execute bits */ - - clrrwi r3,r3,10 /* Mask off the effective page number */ - ori r3,r3,(TLB_VALID | TLB_PAGESZ(PAGESZ_16M)) - - li r0,63 /* TLB slot 63 */ - - tlbwe r4,r0,TLB_DATA /* Load the data portion of the entry */ - tlbwe r3,r0,TLB_TAG /* Load the tag portion of the entry */ - - li r0,62 /* TLB slot 62 */ - addis r4,r4,SZ_16M@h - addis r3,r3,SZ_16M@h - tlbwe r4,r0,TLB_DATA /* Load the data portion of the entry */ - tlbwe r3,r0,TLB_TAG /* Load the tag portion of the entry */ - - isync - - /* Establish the exception vector base - */ - lis r4,KERNELBASE@h /* EVPR only uses the high 16-bits */ - tophys(r0,r4) /* Use the physical address */ - mtspr SPRN_EVPR,r0 - - blr - -_GLOBAL(abort) - mfspr r13,SPRN_DBCR0 - oris r13,r13,DBCR0_RST_SYSTEM@h - mtspr SPRN_DBCR0,r13 diff --git a/arch/powerpc/kernel/head_44x.S b/arch/powerpc/kernel/head_44x.S index f15cb9fdb692..25642e802ed3 100644 --- a/arch/powerpc/kernel/head_44x.S +++ b/arch/powerpc/kernel/head_44x.S @@ -35,7 +35,6 @@ #include <asm/asm-offsets.h> #include <asm/ptrace.h> #include <asm/synch.h> -#include <asm/export.h> #include <asm/code-patching-asm.h> #include "head_booke.h" @@ -109,7 +108,7 @@ _GLOBAL(_start); lis r1,init_thread_union@h ori r1,r1,init_thread_union@l li r0,0 - stwu r0,THREAD_SIZE-STACK_FRAME_OVERHEAD(r1) + stwu r0,THREAD_SIZE-STACK_FRAME_MIN_SIZE(r1) bl early_init @@ -315,8 +314,8 @@ interrupt_base: * kernel page tables. */ lis r11, PAGE_OFFSET@h - cmplw r10, r11 - blt+ 3f + cmplw cr7, r10, r11 + blt+ cr7, 3f lis r11, swapper_pg_dir@h ori r11, r11, swapper_pg_dir@l @@ -343,7 +342,7 @@ interrupt_base: mtspr SPRN_MMUCR,r12 /* Mask of required permission bits. Note that while we - * do copy ESR:ST to _PAGE_RW position as trying to write + * do copy ESR:ST to _PAGE_WRITE position as trying to write * to an RO page is pretty common, we don't do it with * _PAGE_DIRTY. We could do it, but it's a fairly rare * event so I'd rather take the overhead when it happens @@ -356,7 +355,7 @@ interrupt_base: * place or can we save a couple of instructions here ? */ mfspr r12,SPRN_ESR - li r13,_PAGE_PRESENT|_PAGE_ACCESSED + li r13,_PAGE_PRESENT|_PAGE_ACCESSED|_PAGE_READ rlwimi r13,r12,10,30,30 /* Load the PTE */ @@ -429,8 +428,8 @@ interrupt_base: * kernel page tables. */ lis r11, PAGE_OFFSET@h - cmplw r10, r11 - blt+ 3f + cmplw cr7, r10, r11 + blt+ cr7, 3f lis r11, swapper_pg_dir@h ori r11, r11, swapper_pg_dir@l @@ -516,6 +515,7 @@ interrupt_base: * r11 - PTE high word value * r12 - PTE low word value * r13 - TLB index + * cr7 - Result of comparison with PAGE_OFFSET * MMUCR - loaded with proper value when we get here * Upon exit, we reload everything and RFI. */ @@ -534,11 +534,10 @@ finish_tlb_load_44x: tlbwe r10,r13,PPC44x_TLB_PAGEID /* Write PAGEID */ /* And WS 2 */ - li r10,0xf85 /* Mask to apply from PTE */ - rlwimi r10,r12,29,30,30 /* DIRTY -> SW position */ + li r10,0xf84 /* Mask to apply from PTE */ + rlwimi r10,r12,29,30,31 /* DIRTY,READ -> SW,SR position */ and r11,r12,r10 /* Mask PTE bits to keep */ - andi. r10,r12,_PAGE_USER /* User page ? */ - beq 1f /* nope, leave U bits empty */ + bge cr7,1f /* User page ? no, leave U bits empty */ rlwimi r11,r11,3,26,28 /* yes, copy S bits to U */ rlwinm r11,r11,0,~PPC44x_TLB_SX /* Clear SX if User page */ 1: tlbwe r11,r13,PPC44x_TLB_ATTRIB /* Write ATTRIB */ @@ -569,8 +568,8 @@ finish_tlb_load_44x: * kernel page tables. */ lis r11,PAGE_OFFSET@h - cmplw cr0,r10,r11 - blt+ 3f + cmplw cr7,r10,r11 + blt+ cr7,3f lis r11,swapper_pg_dir@h ori r11,r11, swapper_pg_dir@l li r12,0 /* MMUCR = 0 */ @@ -587,7 +586,7 @@ finish_tlb_load_44x: 4: mtspr SPRN_MMUCR,r12 /* Set MMUCR */ /* Mask of required permission bits. Note that while we - * do copy ESR:ST to _PAGE_RW position as trying to write + * do copy ESR:ST to _PAGE_WRITE position as trying to write * to an RO page is pretty common, we don't do it with * _PAGE_DIRTY. We could do it, but it's a fairly rare * event so I'd rather take the overhead when it happens @@ -600,7 +599,7 @@ finish_tlb_load_44x: * place or can we save a couple of instructions here ? */ mfspr r12,SPRN_ESR - li r13,_PAGE_PRESENT|_PAGE_ACCESSED + li r13,_PAGE_PRESENT|_PAGE_ACCESSED|_PAGE_READ rlwimi r13,r12,10,30,30 /* Load the PTE */ @@ -670,8 +669,8 @@ finish_tlb_load_44x: * kernel page tables. */ lis r11,PAGE_OFFSET@h - cmplw cr0,r10,r11 - blt+ 3f + cmplw cr7,r10,r11 + blt+ cr7,3f lis r11,swapper_pg_dir@h ori r11,r11, swapper_pg_dir@l li r12,0 /* MMUCR = 0 */ @@ -745,6 +744,7 @@ finish_tlb_load_44x: * r11 - PTE high word value * r12 - PTE low word value * r13 - free to use + * cr7 - Result of comparison with PAGE_OFFSET * MMUCR - loaded with proper value when we get here * Upon exit, we reload everything and RFI. */ @@ -754,11 +754,10 @@ finish_tlb_load_47x: tlbwe r11,r13,1 /* And make up word 2 */ - li r10,0xf85 /* Mask to apply from PTE */ - rlwimi r10,r12,29,30,30 /* DIRTY -> SW position */ + li r10,0xf84 /* Mask to apply from PTE */ + rlwimi r10,r12,29,30,31 /* DIRTY,READ -> SW,SR position */ and r11,r12,r10 /* Mask PTE bits to keep */ - andi. r10,r12,_PAGE_USER /* User page ? */ - beq 1f /* nope, leave U bits empty */ + bge cr7,1f /* User page ? no, leave U bits empty */ rlwimi r11,r11,3,26,28 /* yes, copy S bits to U */ rlwinm r11,r11,0,~PPC47x_TLB2_SX /* Clear SX if User page */ 1: tlbwe r11,r13,2 @@ -1012,7 +1011,7 @@ _GLOBAL(start_secondary_47x) */ lis r1,temp_boot_stack@h ori r1,r1,temp_boot_stack@l - addi r1,r1,1024-STACK_FRAME_OVERHEAD + addi r1,r1,1024-STACK_FRAME_MIN_SIZE li r0,0 stw r0,0(r1) bl mmu_init_secondary @@ -1025,7 +1024,7 @@ _GLOBAL(start_secondary_47x) lwz r1,TASK_STACK(r2) /* Current stack pointer */ - addi r1,r1,THREAD_SIZE-STACK_FRAME_OVERHEAD + addi r1,r1,THREAD_SIZE-STACK_FRAME_MIN_SIZE li r0,0 stw r0,0(r1) diff --git a/arch/powerpc/kernel/head_64.S b/arch/powerpc/kernel/head_64.S index d3eea633d11a..63432a33ec49 100644 --- a/arch/powerpc/kernel/head_64.S +++ b/arch/powerpc/kernel/head_64.S @@ -18,6 +18,7 @@ * variants. */ +#include <linux/linkage.h> #include <linux/threads.h> #include <linux/init.h> #include <asm/reg.h> @@ -39,7 +40,6 @@ #include <asm/hw_irq.h> #include <asm/cputhreads.h> #include <asm/ppc-opcode.h> -#include <asm/export.h> #include <asm/feature-fixups.h> #ifdef CONFIG_PPC_BOOK3S #include <asm/exception-64s.h> @@ -75,6 +75,13 @@ * 2. The kernel is entered at __start */ +/* + * boot_from_prom and prom_init run at the physical address. Everything + * after prom and kexec entry run at the virtual address (PAGE_OFFSET). + * Secondaries run at the virtual address from generic_secondary_common_init + * onward. + */ + OPEN_FIXED_SECTION(first_256B, 0x0, 0x100) USE_FIXED_SECTION(first_256B) /* @@ -143,7 +150,7 @@ DEFINE_FIXED_SYMBOL(__run_at_load, first_256B) .globl __secondary_hold __secondary_hold: FIXUP_ENDIAN -#ifndef CONFIG_PPC_BOOK3E +#ifndef CONFIG_PPC_BOOK3E_64 mfmsr r24 ori r24,r24,MSR_RI mtmsrd r24 /* RI on */ @@ -159,17 +166,13 @@ __secondary_hold: std r24,(ABS_ADDR(__secondary_hold_acknowledge, first_256B))(0) sync - li r26,0 -#ifdef CONFIG_PPC_BOOK3E - tovirt(r26,r26) -#endif /* All secondary cpus wait here until told to start. */ -100: ld r12,(ABS_ADDR(__secondary_hold_spinloop, first_256B))(r26) +100: ld r12,(ABS_ADDR(__secondary_hold_spinloop, first_256B))(0) cmpdi 0,r12,0 beq 100b #if defined(CONFIG_SMP) || defined(CONFIG_KEXEC_CORE) -#ifdef CONFIG_PPC_BOOK3E +#ifdef CONFIG_PPC_BOOK3E_64 tovirt(r12,r12) #endif mtctr r12 @@ -178,7 +181,7 @@ __secondary_hold: * it may be the case that other platforms have r4 right to * begin with, this gives us some safety in case it is not */ -#ifdef CONFIG_PPC_BOOK3E +#ifdef CONFIG_PPC_BOOK3E_64 mr r4,r25 #else li r4,0 @@ -192,13 +195,6 @@ __secondary_hold: #endif CLOSE_FIXED_SECTION(first_256B) -/* This value is used to mark exception frames on the stack. */ - .section ".toc","aw" -/* This value is used to mark exception frames on the stack. */ -exception_marker: - .tc ID_EXC_MARKER[TC],STACK_FRAME_REGS_MARKER - .previous - /* * On server, we include the exception vectors code here as it * relies on absolute addressing which is only possible within @@ -214,7 +210,7 @@ USE_TEXT_SECTION() #include "interrupt_64.S" -#ifdef CONFIG_PPC_BOOK3E +#ifdef CONFIG_PPC_BOOK3E_64 /* * The booting_thread_hwid holds the thread id we want to boot in cpu * hotplug case. It is set by cpu hotplug code, and is invalid by default. @@ -313,16 +309,14 @@ _GLOBAL(fsl_secondary_thread_init) /* turn on 64-bit mode */ bl enable_64b_mode - /* get a valid TOC pointer, wherever we're mapped at */ - bl relative_toc - tovirt(r2,r2) - /* Book3E initialization */ mr r3,r24 bl book3e_secondary_thread_init + bl relative_toc + b generic_secondary_common_init -#endif /* CONFIG_PPC_BOOK3E */ +#endif /* CONFIG_PPC_BOOK3E_64 */ /* * On pSeries and most other platforms, secondary processors spin @@ -335,22 +329,24 @@ _GLOBAL(fsl_secondary_thread_init) */ _GLOBAL(generic_secondary_smp_init) FIXUP_ENDIAN + + li r13,0 + + /* Poison TOC */ + li r2,-1 + mr r24,r3 mr r25,r4 /* turn on 64-bit mode */ bl enable_64b_mode - /* get a valid TOC pointer, wherever we're mapped at */ - bl relative_toc - tovirt(r2,r2) - -#ifdef CONFIG_PPC_BOOK3E +#ifdef CONFIG_PPC_BOOK3E_64 /* Book3E initialization */ mr r3,r24 mr r4,r25 bl book3e_secondary_core_init - + /* Now NIA and r2 are relocated to PAGE_OFFSET if not already */ /* * After common core init has finished, check if the current thread is the * one we wanted to boot. If not, start the specified thread and stop the @@ -378,8 +374,7 @@ _GLOBAL(generic_secondary_smp_init) beq 20f /* start the specified thread */ - LOAD_REG_ADDR(r5, fsl_secondary_thread_init) - ld r4, 0(r5) + LOAD_REG_ADDR(r5, DOTSYM(fsl_secondary_thread_init)) bl book3e_start_thread /* stop the current thread */ @@ -388,6 +383,16 @@ _GLOBAL(generic_secondary_smp_init) 10: b 10b 20: +#else + /* Now the MMU is off, can branch to our PAGE_OFFSET address */ + bcl 20,31,$+4 +1: mflr r11 + addi r11,r11,(2f - 1b) + tovirt(r11, r11) + mtctr r11 + bctr +2: + bl relative_toc #endif generic_secondary_common_init: @@ -400,8 +405,12 @@ generic_secondary_common_init: #else LOAD_REG_ADDR(r8, paca_ptrs) /* Load paca_ptrs pointe */ ld r8,0(r8) /* Get base vaddr of array */ +#if (NR_CPUS == 1) || defined(CONFIG_FORCE_NR_CPUS) + LOAD_REG_IMMEDIATE(r7, NR_CPUS) +#else LOAD_REG_ADDR(r7, nr_cpu_ids) /* Load nr_cpu_ids address */ lwz r7,0(r7) /* also the max paca allocated */ +#endif li r5,0 /* logical cpu id */ 1: sldi r9,r5,3 /* get paca_ptrs[] index from cpu id */ @@ -417,7 +426,7 @@ generic_secondary_common_init: b kexec_wait /* next kernel might do better */ 2: SET_PACA(r13) -#ifdef CONFIG_PPC_BOOK3E +#ifdef CONFIG_PPC_BOOK3E_64 addi r12,r13,PACA_EXTLB /* and TLB exc frame in another */ mtspr SPRN_SPRG_TLB_EXFRAME,r12 #endif @@ -427,7 +436,7 @@ generic_secondary_common_init: /* Create a temp kernel stack for use before relocation is on. */ ld r1,PACAEMERGSP(r13) - subi r1,r1,STACK_FRAME_OVERHEAD + subi r1,r1,STACK_FRAME_MIN_SIZE /* See if we need to call a cpu state restore handler */ LOAD_REG_ADDR(r23, cur_cpu_spec) @@ -465,7 +474,7 @@ generic_secondary_common_init: * Assumes we're mapped EA == RA if the MMU is on. */ #ifdef CONFIG_PPC_BOOK3S -__mmu_off: +SYM_FUNC_START_LOCAL(__mmu_off) mfmsr r3 andi. r0,r3,MSR_IR|MSR_DR beqlr @@ -476,8 +485,34 @@ __mmu_off: sync rfid b . /* prevent speculative execution */ -#endif +SYM_FUNC_END(__mmu_off) +SYM_FUNC_START_LOCAL(start_initialization_book3s) + mflr r25 + + /* Setup some critical 970 SPRs before switching MMU off */ + mfspr r0,SPRN_PVR + srwi r0,r0,16 + cmpwi r0,0x39 /* 970 */ + beq 1f + cmpwi r0,0x3c /* 970FX */ + beq 1f + cmpwi r0,0x44 /* 970MP */ + beq 1f + cmpwi r0,0x45 /* 970GX */ + bne 2f +1: bl __cpu_preinit_ppc970 +2: + + /* Switch off MMU if not already off */ + bl __mmu_off + + /* Now the MMU is off, can return to our PAGE_OFFSET address */ + tovirt(r25,r25) + mtlr r25 + blr +SYM_FUNC_END(start_initialization_book3s) +#endif /* * Here is our main kernel entry point. We support currently 2 kind of entries @@ -494,14 +529,11 @@ __start_initialization_multiplatform: /* Make sure we are running in 64 bits mode */ bl enable_64b_mode - /* Get TOC pointer (current runtime address) */ - bl relative_toc + /* Zero r13 (paca) so early program check / mce don't use it */ + li r13,0 - /* find out where we are now */ - bcl 20,31,$+4 -0: mflr r26 /* r26 = runtime addr here */ - addis r26,r26,(_stext - 0b)@ha - addi r26,r26,(_stext - 0b)@l /* current runtime base addr */ + /* Poison TOC */ + li r2,-1 /* * Are we booted from a PROM Of-type client-interface ? @@ -519,32 +551,41 @@ __start_initialization_multiplatform: mr r29,r9 #endif -#ifdef CONFIG_PPC_BOOK3E + /* Get TOC pointer (current runtime address) */ + bl relative_toc + + /* These functions return to the virtual (PAGE_OFFSET) address */ +#ifdef CONFIG_PPC_BOOK3E_64 bl start_initialization_book3e - b __after_prom_start #else - /* Setup some critical 970 SPRs before switching MMU off */ - mfspr r0,SPRN_PVR - srwi r0,r0,16 - cmpwi r0,0x39 /* 970 */ - beq 1f - cmpwi r0,0x3c /* 970FX */ - beq 1f - cmpwi r0,0x44 /* 970MP */ - beq 1f - cmpwi r0,0x45 /* 970GX */ - bne 2f -1: bl __cpu_preinit_ppc970 -2: + bl start_initialization_book3s +#endif /* CONFIG_PPC_BOOK3E_64 */ + + /* Get TOC pointer, virtual */ + bl relative_toc + + /* find out where we are now */ + + /* OPAL doesn't pass base address in r4, have to derive it. */ + bcl 20,31,$+4 +0: mflr r26 /* r26 = runtime addr here */ + addis r26,r26,(_stext - 0b)@ha + addi r26,r26,(_stext - 0b)@l /* current runtime base addr */ - /* Switch off MMU if not already off */ - bl __mmu_off b __after_prom_start -#endif /* CONFIG_PPC_BOOK3E */ __REF __boot_from_prom: #ifdef CONFIG_PPC_OF_BOOT_TRAMPOLINE + /* Get TOC pointer, non-virtual */ + bl relative_toc + + /* find out where we are now */ + bcl 20,31,$+4 +0: mflr r26 /* r26 = runtime addr here */ + addis r26,r26,(_stext - 0b)@ha + addi r26,r26,(_stext - 0b)@l /* current runtime base addr */ + /* Save parameters */ mr r31,r3 mr r30,r4 @@ -574,7 +615,7 @@ __boot_from_prom: /* Do all of the interaction with OF client interface */ mr r8,r26 - bl prom_init + bl CFUNC(prom_init) #endif /* #CONFIG_PPC_OF_BOOT_TRAMPOLINE */ /* We never return. We also hit that trap if trying to boot @@ -585,21 +626,14 @@ __boot_from_prom: __after_prom_start: #ifdef CONFIG_RELOCATABLE /* process relocations for the final address of the kernel */ - lis r25,PAGE_OFFSET@highest /* compute virtual base of kernel */ - sldi r25,r25,32 -#if defined(CONFIG_PPC_BOOK3E) - tovirt(r26,r26) /* on booke, we already run at PAGE_OFFSET */ -#endif lwz r7,(FIXED_SYMBOL_ABS_ADDR(__run_at_load))(r26) -#if defined(CONFIG_PPC_BOOK3E) - tophys(r26,r26) -#endif cmplwi cr0,r7,1 /* flagged to stay where we are ? */ - bne 1f - add r25,r25,r26 + mr r25,r26 /* then use current kernel base */ + beq 1f + LOAD_REG_IMMEDIATE(r25, PAGE_OFFSET) /* else use static kernel base */ 1: mr r3,r25 bl relocate -#if defined(CONFIG_PPC_BOOK3E) +#if defined(CONFIG_PPC_BOOK3E_64) /* IVPR needs to be set after relocation. */ bl init_core_book3e #endif @@ -612,15 +646,10 @@ __after_prom_start: * * Note: This process overwrites the OF exception vectors. */ - li r3,0 /* target addr */ -#ifdef CONFIG_PPC_BOOK3E - tovirt(r3,r3) /* on booke, we already run at PAGE_OFFSET */ -#endif - mr. r4,r26 /* In some cases the loader may */ -#if defined(CONFIG_PPC_BOOK3E) - tovirt(r4,r4) -#endif - beq 9f /* have already put us at zero */ + LOAD_REG_IMMEDIATE(r3, PAGE_OFFSET) + mr r4,r26 /* Load the virtual source address into r4 */ + cmpld r3,r4 /* Check if source == dest */ + beq 9f /* If so skip the copy */ li r6,0x100 /* Start offset, the first 0x100 */ /* bytes were copied earlier. */ @@ -630,14 +659,11 @@ __after_prom_start: * variable __run_at_load, if it is set the kernel is treated as relocatable * kernel, otherwise it will be moved to PHYSICAL_START */ -#if defined(CONFIG_PPC_BOOK3E) - tovirt(r26,r26) /* on booke, we already run at PAGE_OFFSET */ -#endif lwz r7,(FIXED_SYMBOL_ABS_ADDR(__run_at_load))(r26) cmplwi cr0,r7,1 bne 3f -#ifdef CONFIG_PPC_BOOK3E +#ifdef CONFIG_PPC_BOOK3E_64 LOAD_REG_ADDR(r5, __end_interrupts) LOAD_REG_ADDR(r11, _stext) sub r5,r5,r11 @@ -751,9 +777,15 @@ _GLOBAL(pmac_secondary_start) sync slbia - /* get TOC pointer (real address) */ + /* Branch to our PAGE_OFFSET address */ + bcl 20,31,$+4 +1: mflr r11 + addi r11,r11,(2f - 1b) + tovirt(r11, r11) + mtctr r11 + bctr +2: bl relative_toc - tovirt(r2,r2) /* Copy some CPU settings from CPU 0 */ bl __restore_cpu_ppc970 @@ -780,7 +812,7 @@ _GLOBAL(pmac_secondary_start) /* Create a temp kernel stack for use before relocation is on. */ ld r1,PACAEMERGSP(r13) - subi r1,r1,STACK_FRAME_OVERHEAD + subi r1,r1,STACK_FRAME_MIN_SIZE b __secondary_start @@ -812,7 +844,7 @@ __secondary_start: * can turn it on below. This is a call to C, which is OK, we're still * running on the emergency stack. */ - bl early_setup_secondary + bl CFUNC(early_setup_secondary) /* * The primary has initialized our kernel stack for us in the paca, grab @@ -848,10 +880,10 @@ __secondary_start: * before going into C code. */ start_secondary_prolog: - ld r2,PACATOC(r13) + LOAD_PACA_TOC() li r3,0 std r3,0(r1) /* Zero the stack frame pointer */ - bl start_secondary + bl CFUNC(start_secondary) b . /* * Reset stack pointer and call start_secondary @@ -862,25 +894,26 @@ _GLOBAL(start_secondary_resume) ld r1,PACAKSAVE(r13) /* Reload kernel stack pointer */ li r3,0 std r3,0(r1) /* Zero the stack frame pointer */ - bl start_secondary + bl CFUNC(start_secondary) b . #endif /* * This subroutine clobbers r11 and r12 */ -enable_64b_mode: +SYM_FUNC_START_LOCAL(enable_64b_mode) mfmsr r11 /* grab the current MSR */ -#ifdef CONFIG_PPC_BOOK3E +#ifdef CONFIG_PPC_BOOK3E_64 oris r11,r11,0x8000 /* CM bit set, we'll set ICM later */ mtmsr r11 -#else /* CONFIG_PPC_BOOK3E */ +#else /* CONFIG_PPC_BOOK3E_64 */ LOAD_REG_IMMEDIATE(r12, MSR_64BIT) or r11,r11,r12 mtmsrd r11 isync #endif blr +SYM_FUNC_END(enable_64b_mode) /* * This puts the TOC pointer into r2, offset by 0x8000 (as expected @@ -891,10 +924,15 @@ enable_64b_mode: * TOC in -mcmodel=medium mode. After we relocate to 0 but before * the MMU is on we need our TOC to be a virtual address otherwise * these pointers will be real addresses which may get stored and - * accessed later with the MMU on. We use tovirt() at the call - * sites to handle this. + * accessed later with the MMU on. We branch to the virtual address + * while still in real mode then call relative_toc again to handle + * this. */ _GLOBAL(relative_toc) +#ifdef CONFIG_PPC_KERNEL_PCREL + tdnei r2,-1 + blr +#else mflr r0 bcl 20,31,$+4 0: mflr r11 @@ -905,15 +943,15 @@ _GLOBAL(relative_toc) .balign 8 p_toc: .8byte .TOC. - 0b +#endif /* * This is where the main kernel code starts. */ __REF start_here_multiplatform: - /* set up the TOC */ - bl relative_toc - tovirt(r2,r2) + /* Adjust TOC for moved kernel. Could adjust when moving it instead. */ + bl relative_toc /* Clear out the BSS. It may have been done in prom_init, * already but that's irrelevant since prom_init will soon @@ -940,7 +978,7 @@ start_here_multiplatform: std r29,8(r11); #endif -#ifndef CONFIG_PPC_BOOK3E +#ifndef CONFIG_PPC_BOOK3E_64 mfmsr r6 ori r6,r6,MSR_RI mtmsrd r6 /* RI on */ @@ -958,13 +996,16 @@ start_here_multiplatform: LOAD_REG_IMMEDIATE(r1,THREAD_SIZE) add r1,r3,r1 li r0,0 - stdu r0,-STACK_FRAME_OVERHEAD(r1) + stdu r0,-STACK_FRAME_MIN_SIZE(r1) /* * Do very early kernel initializations, including initial hash table * and SLB setup before we turn on relocation. */ +#ifdef CONFIG_KASAN + bl CFUNC(kasan_early_init) +#endif /* Restore parameters passed from prom_init/kexec */ mr r3,r31 LOAD_REG_ADDR(r12, DOTSYM(early_setup)) @@ -985,7 +1026,7 @@ start_here_common: std r1,PACAKSAVE(r13) /* Load the TOC (virtual address) */ - ld r2,PACATOC(r13) + LOAD_PACA_TOC() /* Mark interrupts soft and hard disabled (they might be enabled * in the PACA when doing hotplug) @@ -996,7 +1037,7 @@ start_here_common: stb r0,PACAIRQHAPPENED(r13) /* Generic kernel entry */ - bl start_kernel + bl CFUNC(start_kernel) /* Not reached */ 0: trap diff --git a/arch/powerpc/kernel/head_fsl_booke.S b/arch/powerpc/kernel/head_85xx.S index f0db4f52bc00..f9a73fae6464 100644 --- a/arch/powerpc/kernel/head_fsl_booke.S +++ b/arch/powerpc/kernel/head_85xx.S @@ -29,6 +29,8 @@ #include <linux/init.h> #include <linux/threads.h> #include <linux/pgtable.h> +#include <linux/linkage.h> + #include <asm/processor.h> #include <asm/page.h> #include <asm/mmu.h> @@ -38,7 +40,6 @@ #include <asm/asm-offsets.h> #include <asm/cache.h> #include <asm/ptrace.h> -#include <asm/export.h> #include <asm/feature-fixups.h> #include "head_booke.h" @@ -129,7 +130,7 @@ _GLOBAL(_start); /* * For the second relocation, we already set the right tlb entries - * for the kernel space, so skip the code in fsl_booke_entry_mapping.S + * for the kernel space, so skip the code in 85xx_entry_mapping.S */ cmpwi r19,1 beq set_ivor @@ -159,7 +160,7 @@ _GLOBAL(__early_start) lwz r20,0(r20) #define ENTRY_MAPPING_BOOT_SETUP -#include "fsl_booke_entry_mapping.S" +#include "85xx_entry_mapping.S" #undef ENTRY_MAPPING_BOOT_SETUP set_ivor: @@ -229,7 +230,7 @@ set_ivor: lis r1,init_thread_union@h ori r1,r1,init_thread_union@l li r0,0 - stwu r0,THREAD_SIZE-STACK_FRAME_OVERHEAD(r1) + stwu r0,THREAD_SIZE-STACK_FRAME_MIN_SIZE(r1) #ifdef CONFIG_SMP stw r24, TASK_CPU(r2) @@ -293,9 +294,10 @@ set_ivor: /* Macros to hide the PTE size differences * * FIND_PTE -- walks the page tables given EA & pgdir pointer - * r10 -- EA of fault + * r10 -- free * r11 -- PGDIR pointer * r12 -- free + * r13 -- EA of fault * label 2: is the bailout case * * if we find the pte (fall through): @@ -306,34 +308,34 @@ set_ivor: #ifdef CONFIG_PTE_64BIT #ifdef CONFIG_HUGETLB_PAGE #define FIND_PTE \ - rlwinm r12, r10, 13, 19, 29; /* Compute pgdir/pmd offset */ \ - lwzx r11, r12, r11; /* Get pgd/pmd entry */ \ + rlwinm r12, r13, 14, 18, 28; /* Compute pgdir/pmd offset */ \ + add r12, r11, r12; \ + lwz r11, 4(r12); /* Get pgd/pmd entry */ \ + rlwinm. r10, r11, 32 - _PAGE_PSIZE_SHIFT, 0x1e; /* get tsize*/ \ + bne 1000f; /* Huge page (leaf entry) */ \ rlwinm. r12, r11, 0, 0, 20; /* Extract pt base address */ \ - blt 1000f; /* Normal non-huge page */ \ beq 2f; /* Bail if no table */ \ - oris r11, r11, PD_HUGE@h; /* Put back address bit */ \ - andi. r10, r11, HUGEPD_SHIFT_MASK@l; /* extract size field */ \ - xor r12, r10, r11; /* drop size bits from pointer */ \ - b 1001f; \ -1000: rlwimi r12, r10, 23, 20, 28; /* Compute pte address */ \ + rlwimi r12, r13, 23, 20, 28; /* Compute pte address */ \ li r10, 0; /* clear r10 */ \ -1001: lwz r11, 4(r12); /* Get pte entry */ + lwz r11, 4(r12); /* Get pte entry */ \ +1000: #else #define FIND_PTE \ - rlwinm r12, r10, 13, 19, 29; /* Compute pgdir/pmd offset */ \ - lwzx r11, r12, r11; /* Get pgd/pmd entry */ \ + rlwinm r12, r13, 14, 18, 28; /* Compute pgdir/pmd offset */ \ + add r12, r11, r12; \ + lwz r11, 4(r12); /* Get pgd/pmd entry */ \ rlwinm. r12, r11, 0, 0, 20; /* Extract pt base address */ \ beq 2f; /* Bail if no table */ \ - rlwimi r12, r10, 23, 20, 28; /* Compute pte address */ \ + rlwimi r12, r13, 23, 20, 28; /* Compute pte address */ \ lwz r11, 4(r12); /* Get pte entry */ #endif /* HUGEPAGE */ #else /* !PTE_64BIT */ #define FIND_PTE \ - rlwimi r11, r10, 12, 20, 29; /* Create L1 (pgdir/pmd) address */ \ + rlwimi r11, r13, 12, 20, 29; /* Create L1 (pgdir/pmd) address */ \ lwz r11, 0(r11); /* Get L1 entry */ \ rlwinm. r12, r11, 0, 0, 19; /* Extract L2 (pte) base address */ \ beq 2f; /* Bail if no table */ \ - rlwimi r12, r10, 22, 20, 29; /* Compute PTE address */ \ + rlwimi r12, r13, 22, 20, 29; /* Compute PTE address */ \ lwz r11, 0(r12); /* Get Linux PTE */ #endif @@ -394,7 +396,7 @@ interrupt_base: #ifdef CONFIG_PPC_FPU FP_UNAVAILABLE_EXCEPTION #else - EXCEPTION(0x0800, FP_UNAVAIL, FloatingPointUnavailable, unknown_exception) + EXCEPTION(0x0800, FP_UNAVAIL, FloatingPointUnavailable, emulation_assist_interrupt) #endif /* System Call Interrupt */ @@ -440,13 +442,13 @@ START_BTB_FLUSH_SECTION BTB_FLUSH(r10) 1: END_BTB_FLUSH_SECTION - mfspr r10, SPRN_DEAR /* Get faulting address */ + mfspr r13, SPRN_DEAR /* Get faulting address */ /* If we are faulting a kernel address, we have to use the * kernel page tables. */ lis r11, PAGE_OFFSET@h - cmplw 5, r10, r11 + cmplw 5, r13, r11 blt 5, 3f lis r11, swapper_pg_dir@h ori r11, r11, swapper_pg_dir@l @@ -469,29 +471,14 @@ END_BTB_FLUSH_SECTION #endif 4: - /* Mask of required permission bits. Note that while we - * do copy ESR:ST to _PAGE_RW position as trying to write - * to an RO page is pretty common, we don't do it with - * _PAGE_DIRTY. We could do it, but it's a fairly rare - * event so I'd rather take the overhead when it happens - * rather than adding an instruction here. We should measure - * whether the whole thing is worth it in the first place - * as we could avoid loading SPRN_ESR completely in the first - * place... - * - * TODO: Is it worth doing that mfspr & rlwimi in the first - * place or can we save a couple of instructions here ? - */ - mfspr r12,SPRN_ESR + FIND_PTE + #ifdef CONFIG_PTE_64BIT - li r13,_PAGE_PRESENT + li r13,_PAGE_PRESENT|_PAGE_BAP_SR oris r13,r13,_PAGE_ACCESSED@h #else - li r13,_PAGE_PRESENT|_PAGE_ACCESSED + li r13,_PAGE_PRESENT|_PAGE_READ|_PAGE_ACCESSED #endif - rlwimi r13,r12,11,29,29 - - FIND_PTE andc. r13,r13,r11 /* Check permission */ #ifdef CONFIG_PTE_64BIT @@ -548,13 +535,13 @@ START_BTB_FLUSH_SECTION 1: END_BTB_FLUSH_SECTION - mfspr r10, SPRN_SRR0 /* Get faulting address */ + mfspr r13, SPRN_SRR0 /* Get faulting address */ /* If we are faulting a kernel address, we have to use the * kernel page tables. */ lis r11, PAGE_OFFSET@h - cmplw 5, r10, r11 + cmplw 5, r13, r11 blt 5, 3f lis r11, swapper_pg_dir@h ori r11, r11, swapper_pg_dir@l @@ -563,6 +550,7 @@ END_BTB_FLUSH_SECTION rlwinm r12,r12,0,16,1 mtspr SPRN_MAS1,r12 + FIND_PTE /* Make up the required permissions for kernel code */ #ifdef CONFIG_PTE_64BIT li r13,_PAGE_PRESENT | _PAGE_BAP_SX @@ -583,6 +571,7 @@ END_BTB_FLUSH_SECTION beq 2f /* KUAP fault */ #endif + FIND_PTE /* Make up the required permissions for user code */ #ifdef CONFIG_PTE_64BIT li r13,_PAGE_PRESENT | _PAGE_BAP_UX @@ -592,7 +581,6 @@ END_BTB_FLUSH_SECTION #endif 4: - FIND_PTE andc. r13,r13,r11 /* Check permission */ #ifdef CONFIG_PTE_64BIT @@ -745,17 +733,12 @@ finish_tlb_load: lwz r15, 0(r14) 100: stw r15, 0(r17) - /* - * Calc MAS1_TSIZE from r10 (which has pshift encoded) - * tlb_enc = (pshift - 10). - */ - subi r15, r10, 10 mfspr r16, SPRN_MAS1 - rlwimi r16, r15, 7, 20, 24 + rlwimi r16, r10, MAS1_TSIZE_SHIFT, MAS1_TSIZE_MASK mtspr SPRN_MAS1, r16 /* copy the pshift for use later */ - mr r14, r10 + addi r14, r10, _PAGE_PSIZE_SHIFT_OFFSET /* fall through */ @@ -782,15 +765,15 @@ BEGIN_MMU_FTR_SECTION mtspr SPRN_MAS7, r10 END_MMU_FTR_SECTION_IFSET(MMU_FTR_BIG_PHYS) #else - li r10, (_PAGE_EXEC | _PAGE_PRESENT) + li r10, (_PAGE_EXEC | _PAGE_READ) mr r13, r11 rlwimi r10, r11, 31, 29, 29 /* extract _PAGE_DIRTY into SW */ and r12, r11, r10 - andi. r10, r11, _PAGE_USER /* Test for _PAGE_USER */ + mcrf cr0, cr5 /* Test for user page */ slwi r10, r12, 1 or r10, r10, r12 rlwinm r10, r10, 0, ~_PAGE_EXEC /* Clear SX on user pages */ - iseleq r12, r12, r10 + isellt r12, r10, r12 rlwimi r13, r12, 0, 20, 31 /* Get RPN from PTE, merge w/ perms */ mtspr SPRN_MAS3, r13 #endif @@ -862,7 +845,7 @@ _GLOBAL(load_up_spe) * SPE unavailable trap from kernel - print a message, but let * the task use SPE in the kernel until it returns to user mode. */ -KernelSPE: +SYM_FUNC_START_LOCAL(KernelSPE) lwz r3,_MSR(r1) oris r3,r3,MSR_SPE@h stw r3,_MSR(r1) /* enable use of SPE after return */ @@ -879,13 +862,14 @@ KernelSPE: #endif .align 4,0 +SYM_FUNC_END(KernelSPE) #endif /* CONFIG_SPE */ /* * Translate the effec addr in r3 to phys addr. The phys addr will be put * into r3(higher 32bit) and r4(lower 32bit) */ -get_phys_addr: +SYM_FUNC_START_LOCAL(get_phys_addr) mfmsr r8 mfspr r9,SPRN_PID rlwinm r9,r9,16,0x3fff0000 /* turn PID into MAS6[SPID] */ @@ -907,12 +891,13 @@ get_phys_addr: mfspr r3,SPRN_MAS7 #endif blr +SYM_FUNC_END(get_phys_addr) /* * Global functions */ -#ifdef CONFIG_E500 +#ifdef CONFIG_PPC_E500 #ifndef CONFIG_PPC_E500MC /* Adjust or setup IVORs for e500v1/v2 */ _GLOBAL(__setup_e500_ivors) @@ -955,7 +940,7 @@ _GLOBAL(__setup_ehv_ivors) sync blr #endif /* CONFIG_PPC_E500MC */ -#endif /* CONFIG_E500 */ +#endif /* CONFIG_PPC_E500 */ #ifdef CONFIG_SPE /* @@ -972,10 +957,10 @@ _GLOBAL(__giveup_spe) li r4,THREAD_ACC evstddx evr6, r4, r3 /* save off accumulator */ beq 1f - lwz r4,_MSR-STACK_FRAME_OVERHEAD(r5) + lwz r4,_MSR-STACK_INT_FRAME_REGS(r5) lis r3,MSR_SPE@h andc r4,r4,r3 /* disable SPE for previous task */ - stw r4,_MSR-STACK_FRAME_OVERHEAD(r5) + stw r4,_MSR-STACK_INT_FRAME_REGS(r5) 1: blr #endif /* CONFIG_SPE */ @@ -1044,7 +1029,7 @@ __secondary_start: lwz r1,TASK_STACK(r2) /* stack */ - addi r1,r1,THREAD_SIZE-STACK_FRAME_OVERHEAD + addi r1,r1,THREAD_SIZE-STACK_FRAME_MIN_SIZE li r0,0 stw r0,0(r1) diff --git a/arch/powerpc/kernel/head_8xx.S b/arch/powerpc/kernel/head_8xx.S index 0b05f2be66b9..56c5ebe21b99 100644 --- a/arch/powerpc/kernel/head_8xx.S +++ b/arch/powerpc/kernel/head_8xx.S @@ -18,6 +18,8 @@ #include <linux/magic.h> #include <linux/pgtable.h> #include <linux/sizes.h> +#include <linux/linkage.h> + #include <asm/processor.h> #include <asm/page.h> #include <asm/mmu.h> @@ -27,7 +29,6 @@ #include <asm/ppc_asm.h> #include <asm/asm-offsets.h> #include <asm/ptrace.h> -#include <asm/export.h> #include <asm/code-patching-asm.h> #include <asm/interrupt.h> @@ -39,16 +40,6 @@ #include "head_32.h" -.macro compare_to_kernel_boundary scratch, addr -#if CONFIG_TASK_SIZE <= 0x80000000 && CONFIG_PAGE_OFFSET >= 0x80000000 -/* By simply checking Address >= 0x80000000, we know if its a kernel address */ - not. \scratch, \addr -#else - rlwinm \scratch, \addr, 16, 0xfff8 - cmpli cr0, \scratch, PAGE_OFFSET@h -#endif -.endm - #define PAGE_SHIFT_512K 19 #define PAGE_SHIFT_8M 23 @@ -198,18 +189,7 @@ instruction_counter: mfspr r10, SPRN_SRR0 /* Get effective address of fault */ INVALIDATE_ADJACENT_PAGES_CPU15(r10, r11) mtspr SPRN_MD_EPN, r10 -#ifdef CONFIG_MODULES - mfcr r11 - compare_to_kernel_boundary r10, r10 -#endif mfspr r10, SPRN_M_TWB /* Get level 1 table */ -#ifdef CONFIG_MODULES - blt+ 3f - rlwinm r10, r10, 0, 20, 31 - oris r10, r10, (swapper_pg_dir - PAGE_OFFSET)@ha -3: - mtcr r11 -#endif lwz r11, (swapper_pg_dir-PAGE_OFFSET)@l(r10) /* Get level 1 entry */ mtspr SPRN_MD_TWC, r11 mfspr r10, SPRN_MD_TWC @@ -247,19 +227,12 @@ instruction_counter: START_EXCEPTION(INTERRUPT_DATA_TLB_MISS_8xx, DataStoreTLBMiss) mtspr SPRN_SPRG_SCRATCH2, r10 mtspr SPRN_M_TW, r11 - mfcr r11 /* If we are faulting a kernel address, we have to use the * kernel page tables. */ mfspr r10, SPRN_MD_EPN - compare_to_kernel_boundary r10, r10 mfspr r10, SPRN_M_TWB /* Get level 1 table */ - blt+ 3f - rlwinm r10, r10, 0, 20, 31 - oris r10, r10, (swapper_pg_dir - PAGE_OFFSET)@ha -3: - mtcr r11 lwz r11, (swapper_pg_dir-PAGE_OFFSET)@l(r10) /* Get level 1 entry */ mtspr SPRN_MD_TWC, r11 @@ -331,15 +304,19 @@ instruction_counter: cmpwi cr1, r11, RPN_PATTERN beq- cr1, FixupDAR /* must be a buggy dcbX, icbi insn. */ DARFixed:/* Return from dcbx instruction bug workaround */ + mfspr r11, SPRN_DSISR + rlwinm r11, r11, 0, DSISR_NOHPTE + cmpwi cr1, r11, 0 + beq+ cr1, .Ldtlbie + mfspr r11, SPRN_DAR + tlbie r11 + rlwinm r11, r11, 16, 0xffff + cmplwi cr1, r11, TASK_SIZE@h + bge- cr1, FixupPGD +.Ldtlbie: EXCEPTION_PROLOG_1 /* 0x300 is DataAccess exception, needed by bad_page_fault() */ EXCEPTION_PROLOG_2 INTERRUPT_DATA_STORAGE DataTLBError handle_dar_dsisr=1 - lwz r4, _DAR(r11) - lwz r5, _DSISR(r11) - andis. r10,r5,DSISR_NOHPTE@h - beq+ .Ldtlbie - tlbie r4 -.Ldtlbie: prepare_transfer_to_handler bl do_page_fault b interrupt_return @@ -393,6 +370,30 @@ DARFixed:/* Return from dcbx instruction bug workaround */ __HEAD . = 0x2000 +FixupPGD: + mtspr SPRN_M_TW, r10 + mfspr r10, SPRN_DAR + mtspr SPRN_MD_EPN, r10 + mfspr r11, SPRN_M_TWB /* Get level 1 table */ + lwz r10, (swapper_pg_dir - PAGE_OFFSET)@l(r11) /* Get the level 1 entry */ + cmpwi cr1, r10, 0 + bne cr1, 1f + + rlwinm r10, r11, 0, 20, 31 + oris r10, r10, (swapper_pg_dir - PAGE_OFFSET)@ha + lwz r10, (swapper_pg_dir - PAGE_OFFSET)@l(r10) /* Get the level 1 entry */ + cmpwi cr1, r10, 0 + beq cr1, 1f + stw r10, (swapper_pg_dir - PAGE_OFFSET)@l(r11) /* Set the level 1 entry */ + mfspr r10, SPRN_M_TW + mtcr r10 + mfspr r10, SPRN_SPRG_SCRATCH0 + mfspr r11, SPRN_SPRG_SCRATCH1 + rfi +1: + mfspr r10, SPRN_M_TW + b .Ldtlbie + /* This is the procedure to calculate the data EA for buggy dcbx,dcbi instructions * by decoding the registers used by the dcbx instruction and adding them. * DAR is set to the calculated address. @@ -403,7 +404,7 @@ FixupDAR:/* Entry point for dcbx workaround. */ mfspr r10, SPRN_SRR0 mtspr SPRN_MD_EPN, r10 rlwinm r11, r10, 16, 0xfff8 - cmpli cr1, r11, PAGE_OFFSET@h + cmpli cr1, r11, TASK_SIZE@h mfspr r11, SPRN_M_TWB /* Get level 1 table */ blt+ cr1, 3f @@ -414,14 +415,13 @@ FixupDAR:/* Entry point for dcbx workaround. */ oris r11, r11, (swapper_pg_dir - PAGE_OFFSET)@ha 3: lwz r11, (swapper_pg_dir-PAGE_OFFSET)@l(r11) /* Get the level 1 entry */ + rlwinm r11, r11, 0, ~_PMD_PAGE_8M mtspr SPRN_MD_TWC, r11 - mtcrf 0x01, r11 mfspr r11, SPRN_MD_TWC lwz r11, 0(r11) /* Get the pte */ - bt 28,200f /* bit 28 = Large page (8M) */ /* concat physical page address(r11) and page offset(r10) */ rlwimi r11, r10, 0, 32 - PAGE_SHIFT, 31 -201: lwz r11,0(r11) + lwz r11,0(r11) /* Check if it really is a dcbx instruction. */ /* dcbt and dcbtst does not generate DTLB Misses/Errors, * no need to include them here */ @@ -440,11 +440,6 @@ FixupDAR:/* Entry point for dcbx workaround. */ 141: mfspr r10,SPRN_M_TW b DARFixed /* Nope, go back to normal TLB processing */ -200: - /* concat physical page address(r11) and page offset(r10) */ - rlwimi r11, r10, 0, 32 - PAGE_SHIFT_8M, 31 - b 201b - 144: mfspr r10, SPRN_DSISR rlwinm r10, r10,0,7,5 /* Clear store bit for buggy dcbst insn */ mtspr SPRN_DSISR, r10 @@ -499,6 +494,7 @@ FixupDAR:/* Entry point for dcbx workaround. */ bctr /* jump into table */ 152: mfdar r11 + mtdar r10 mtctr r11 /* restore ctr reg from DAR */ mfspr r11, SPRN_SPRG_THREAD stw r10, DAR(r11) @@ -537,7 +533,7 @@ start_here: ori r0, r0, STACK_END_MAGIC@l stw r0, 0(r1) li r0,0 - stwu r0,THREAD_SIZE-STACK_FRAME_OVERHEAD(r1) + stwu r0,THREAD_SIZE-STACK_FRAME_MIN_SIZE(r1) lis r6, swapper_pg_dir@ha tophys(r6,r6) @@ -592,6 +588,10 @@ start_here: lis r0, (MD_TWAM | MD_RSV4I)@h mtspr SPRN_MD_CTR, r0 #endif +#ifndef CONFIG_PIN_TLB_TEXT + li r0, 0 + mtspr SPRN_MI_CTR, r0 +#endif #if !defined(CONFIG_PIN_TLB_DATA) && !defined(CONFIG_PIN_TLB_IMMR) lis r0, MD_TWAM@h mtspr SPRN_MD_CTR, r0 @@ -625,7 +625,7 @@ start_here: * 24 Mbytes of data, and the 512k IMMR space. Anything not covered by * these mappings is mapped by page tables. */ -initial_mmu: +SYM_FUNC_START_LOCAL(initial_mmu) li r8, 0 mtspr SPRN_MI_CTR, r8 /* remove PINNED ITLB entries */ lis r10, MD_TWAM@h @@ -686,7 +686,9 @@ initial_mmu: #endif mtspr SPRN_DER, r8 blr +SYM_FUNC_END(initial_mmu) +#ifdef CONFIG_PIN_TLB _GLOBAL(mmu_pin_tlb) lis r9, (1f - PAGE_OFFSET)@h ori r9, r9, (1f - PAGE_OFFSET)@l @@ -708,6 +710,7 @@ _GLOBAL(mmu_pin_tlb) mtspr SPRN_MD_CTR, r6 tlbia +#ifdef CONFIG_PIN_TLB_TEXT LOAD_REG_IMMEDIATE(r5, 28 << 8) LOAD_REG_IMMEDIATE(r6, PAGE_OFFSET) LOAD_REG_IMMEDIATE(r7, MI_SVALID | MI_PS8MEG | _PMD_ACCESSED) @@ -728,6 +731,7 @@ _GLOBAL(mmu_pin_tlb) bdnzt lt, 2b lis r0, MI_RSV4I@h mtspr SPRN_MI_CTR, r0 +#endif LOAD_REG_IMMEDIATE(r5, 28 << 8 | MD_TWAM) #ifdef CONFIG_PIN_TLB_DATA @@ -787,3 +791,4 @@ _GLOBAL(mmu_pin_tlb) mtspr SPRN_SRR1, r10 mtspr SPRN_SRR0, r11 rfi +#endif diff --git a/arch/powerpc/kernel/head_book3s_32.S b/arch/powerpc/kernel/head_book3s_32.S index 6c739beb938c..cb2bca76be53 100644 --- a/arch/powerpc/kernel/head_book3s_32.S +++ b/arch/powerpc/kernel/head_book3s_32.S @@ -18,6 +18,8 @@ #include <linux/init.h> #include <linux/pgtable.h> +#include <linux/linkage.h> + #include <asm/reg.h> #include <asm/page.h> #include <asm/mmu.h> @@ -29,7 +31,6 @@ #include <asm/ptrace.h> #include <asm/bug.h> #include <asm/kvm_book3s_asm.h> -#include <asm/export.h> #include <asm/feature-fixups.h> #include <asm/interrupt.h> @@ -410,44 +411,34 @@ END_FTR_SECTION_IFSET(CPU_FTR_FPU_UNAVAILABLE) */ . = INTERRUPT_INST_TLB_MISS_603 InstructionTLBMiss: -/* - * r0: scratch - * r1: linux style pte ( later becomes ppc hardware pte ) - * r2: ptr to linux-style pte - * r3: scratch - */ /* Get PTE (linux-style) and check access */ - mfspr r3,SPRN_IMISS -#if defined(CONFIG_MODULES) || defined(CONFIG_DEBUG_PAGEALLOC) || defined(CONFIG_KFENCE) - lis r1, TASK_SIZE@h /* check if kernel address */ - cmplw 0,r1,r3 -#endif + mfspr r0,SPRN_IMISS mfspr r2, SPRN_SDR1 - li r1,_PAGE_PRESENT | _PAGE_ACCESSED | _PAGE_EXEC | _PAGE_USER - rlwinm r2, r2, 28, 0xfffff000 -#if defined(CONFIG_MODULES) || defined(CONFIG_DEBUG_PAGEALLOC) || defined(CONFIG_KFENCE) - bgt- 112f - lis r2, (swapper_pg_dir - PAGE_OFFSET)@ha /* if kernel address, use */ li r1,_PAGE_PRESENT | _PAGE_ACCESSED | _PAGE_EXEC - addi r2, r2, (swapper_pg_dir - PAGE_OFFSET)@l /* kernel page table */ -#endif -112: rlwimi r2,r3,12,20,29 /* insert top 10 bits of address */ + rlwinm r2, r2, 28, 0xfffff000 + rlwimi r2,r0,12,20,29 /* insert top 10 bits of address */ lwz r2,0(r2) /* get pmd entry */ +#ifdef CONFIG_EXECMEM + rlwinm r3, r0, 4, 0xf + subi r3, r3, (TASK_SIZE >> 28) & 0xf +#endif rlwinm. r2,r2,0,0,19 /* extract address of pte page */ beq- InstructionAddressInvalid /* return if no mapping */ - rlwimi r2,r3,22,20,29 /* insert next 10 bits of address */ - lwz r0,0(r2) /* get linux-style pte */ - andc. r1,r1,r0 /* check access & ~permission */ + rlwimi r2,r0,22,20,29 /* insert next 10 bits of address */ + lwz r2,0(r2) /* get linux-style pte */ + andc. r1,r1,r2 /* check access & ~permission */ bne- InstructionAddressInvalid /* return if access not permitted */ /* Convert linux-style PTE to low word of PPC-style PTE */ - rlwimi r0,r0,32-2,31,31 /* _PAGE_USER -> PP lsb */ +#ifdef CONFIG_EXECMEM + rlwimi r2, r3, 1, 31, 31 /* userspace ? -> PP lsb */ +#endif ori r1, r1, 0xe06 /* clear out reserved bits */ - andc r1, r0, r1 /* PP = user? 1 : 0 */ + andc r1, r2, r1 /* PP = user? 1 : 0 */ BEGIN_FTR_SECTION rlwinm r1,r1,0,~_PAGE_COHERENT /* clear M (coherence not required) */ END_FTR_SECTION_IFCLR(CPU_FTR_NEED_COHERENT) mtspr SPRN_RPA,r1 - tlbli r3 + tlbli r0 mfspr r3,SPRN_SRR1 /* Need to restore CR0 */ mtcrf 0x80,r3 rfi @@ -476,63 +467,62 @@ InstructionAddressInvalid: */ . = INTERRUPT_DATA_LOAD_TLB_MISS_603 DataLoadTLBMiss: -/* - * r0: scratch - * r1: linux style pte ( later becomes ppc hardware pte ) - * r2: ptr to linux-style pte - * r3: scratch - */ /* Get PTE (linux-style) and check access */ - mfspr r3,SPRN_DMISS - lis r1, TASK_SIZE@h /* check if kernel address */ - cmplw 0,r1,r3 + mfspr r0,SPRN_DMISS mfspr r2, SPRN_SDR1 - li r1, _PAGE_PRESENT | _PAGE_ACCESSED | _PAGE_USER - rlwinm r2, r2, 28, 0xfffff000 - bgt- 112f - lis r2, (swapper_pg_dir - PAGE_OFFSET)@ha /* if kernel address, use */ - li r1, _PAGE_PRESENT | _PAGE_ACCESSED - addi r2, r2, (swapper_pg_dir - PAGE_OFFSET)@l /* kernel page table */ -112: rlwimi r2,r3,12,20,29 /* insert top 10 bits of address */ - lwz r2,0(r2) /* get pmd entry */ + rlwinm r1, r2, 28, 0xfffff000 + rlwimi r1,r0,12,20,29 /* insert top 10 bits of address */ + lwz r2,0(r1) /* get pmd entry */ + rlwinm r3, r0, 4, 0xf rlwinm. r2,r2,0,0,19 /* extract address of pte page */ - beq- DataAddressInvalid /* return if no mapping */ - rlwimi r2,r3,22,20,29 /* insert next 10 bits of address */ - lwz r0,0(r2) /* get linux-style pte */ - andc. r1,r1,r0 /* check access & ~permission */ + subi r3, r3, (TASK_SIZE >> 28) & 0xf + beq- 2f /* bail if no mapping */ +1: rlwimi r2,r0,22,20,29 /* insert next 10 bits of address */ + lwz r2,0(r2) /* get linux-style pte */ + li r1, _PAGE_PRESENT | _PAGE_ACCESSED | _PAGE_READ + andc. r1,r1,r2 /* check access & ~permission */ bne- DataAddressInvalid /* return if access not permitted */ /* Convert linux-style PTE to low word of PPC-style PTE */ - rlwinm r1,r0,32-9,30,30 /* _PAGE_RW -> PP msb */ - rlwimi r0,r0,32-1,30,30 /* _PAGE_USER -> PP msb */ - rlwimi r1,r0,32-3,24,24 /* _PAGE_RW -> _PAGE_DIRTY */ - rlwimi r0,r0,32-1,31,31 /* _PAGE_USER -> PP lsb */ + rlwinm r1,r2,32-9,30,30 /* _PAGE_WRITE -> PP msb */ + rlwimi r2,r3,2,30,31 /* userspace ? -> PP */ + rlwimi r1,r2,32-3,24,24 /* _PAGE_WRITE -> _PAGE_DIRTY */ xori r1,r1,_PAGE_DIRTY /* clear dirty when not rw */ ori r1,r1,0xe04 /* clear out reserved bits */ - andc r1,r0,r1 /* PP = user? rw? 1: 3: 0 */ + andc r1,r2,r1 /* PP = user? rw? 1: 3: 0 */ BEGIN_FTR_SECTION rlwinm r1,r1,0,~_PAGE_COHERENT /* clear M (coherence not required) */ END_FTR_SECTION_IFCLR(CPU_FTR_NEED_COHERENT) mtspr SPRN_RPA,r1 BEGIN_MMU_FTR_SECTION - li r0,1 + li r3,1 mfspr r1,SPRN_SPRG_603_LRU - rlwinm r2,r3,20,27,31 /* Get Address bits 15:19 */ - slw r0,r0,r2 - xor r1,r0,r1 - srw r0,r1,r2 + rlwinm r2,r0,20,27,31 /* Get Address bits 15:19 */ + slw r3,r3,r2 + xor r1,r3,r1 + srw r3,r1,r2 mtspr SPRN_SPRG_603_LRU,r1 mfspr r2,SPRN_SRR1 - rlwimi r2,r0,31-14,14,14 + rlwimi r2,r3,31-14,14,14 mtspr SPRN_SRR1,r2 mtcrf 0x80,r2 - tlbld r3 + tlbld r0 rfi MMU_FTR_SECTION_ELSE mfspr r2,SPRN_SRR1 /* Need to restore CR0 */ mtcrf 0x80,r2 - tlbld r3 + tlbld r0 rfi ALT_MMU_FTR_SECTION_END_IFSET(MMU_FTR_NEED_DTLB_SW_LRU) + +2: lis r2, (swapper_pg_dir - PAGE_OFFSET)@ha + addi r2, r2, (swapper_pg_dir - PAGE_OFFSET)@l /* kernel page table */ + rlwimi r2,r0,12,20,29 /* insert top 10 bits of address */ + lwz r2,0(r2) /* get pmd entry */ + cmpwi cr0,r2,0 + beq- DataAddressInvalid /* return if no mapping */ + stw r2,0(r1) + rlwinm. r2,r2,0,0,19 /* extract address of pte page */ + b 1b DataAddressInvalid: mfspr r3,SPRN_SRR1 rlwinm r1,r3,9,6,6 /* Get load/store bit */ @@ -556,35 +546,26 @@ DataAddressInvalid: */ . = INTERRUPT_DATA_STORE_TLB_MISS_603 DataStoreTLBMiss: -/* - * r0: scratch - * r1: linux style pte ( later becomes ppc hardware pte ) - * r2: ptr to linux-style pte - * r3: scratch - */ /* Get PTE (linux-style) and check access */ - mfspr r3,SPRN_DMISS - lis r1, TASK_SIZE@h /* check if kernel address */ - cmplw 0,r1,r3 + mfspr r0,SPRN_DMISS mfspr r2, SPRN_SDR1 - li r1, _PAGE_RW | _PAGE_DIRTY | _PAGE_PRESENT | _PAGE_ACCESSED | _PAGE_USER - rlwinm r2, r2, 28, 0xfffff000 - bgt- 112f - lis r2, (swapper_pg_dir - PAGE_OFFSET)@ha /* if kernel address, use */ - li r1, _PAGE_RW | _PAGE_DIRTY | _PAGE_PRESENT | _PAGE_ACCESSED - addi r2, r2, (swapper_pg_dir - PAGE_OFFSET)@l /* kernel page table */ -112: rlwimi r2,r3,12,20,29 /* insert top 10 bits of address */ - lwz r2,0(r2) /* get pmd entry */ + rlwinm r1, r2, 28, 0xfffff000 + rlwimi r1,r0,12,20,29 /* insert top 10 bits of address */ + lwz r2,0(r1) /* get pmd entry */ + rlwinm r3, r0, 4, 0xf rlwinm. r2,r2,0,0,19 /* extract address of pte page */ - beq- DataAddressInvalid /* return if no mapping */ - rlwimi r2,r3,22,20,29 /* insert next 10 bits of address */ - lwz r0,0(r2) /* get linux-style pte */ - andc. r1,r1,r0 /* check access & ~permission */ + subi r3, r3, (TASK_SIZE >> 28) & 0xf + beq- 2f /* bail if no mapping */ +1: + rlwimi r2,r0,22,20,29 /* insert next 10 bits of address */ + lwz r2,0(r2) /* get linux-style pte */ + li r1, _PAGE_RW | _PAGE_DIRTY | _PAGE_PRESENT | _PAGE_ACCESSED + andc. r1,r1,r2 /* check access & ~permission */ bne- DataAddressInvalid /* return if access not permitted */ /* Convert linux-style PTE to low word of PPC-style PTE */ - rlwimi r0,r0,32-2,31,31 /* _PAGE_USER -> PP lsb */ + rlwimi r2,r3,1,31,31 /* userspace ? -> PP lsb */ li r1,0xe06 /* clear out reserved bits & PP msb */ - andc r1,r0,r1 /* PP = user? 1: 0 */ + andc r1,r2,r1 /* PP = user? 1: 0 */ BEGIN_FTR_SECTION rlwinm r1,r1,0,~_PAGE_COHERENT /* clear M (coherence not required) */ END_FTR_SECTION_IFCLR(CPU_FTR_NEED_COHERENT) @@ -592,26 +573,36 @@ END_FTR_SECTION_IFCLR(CPU_FTR_NEED_COHERENT) mfspr r2,SPRN_SRR1 /* Need to restore CR0 */ mtcrf 0x80,r2 BEGIN_MMU_FTR_SECTION - li r0,1 + li r3,1 mfspr r1,SPRN_SPRG_603_LRU - rlwinm r2,r3,20,27,31 /* Get Address bits 15:19 */ - slw r0,r0,r2 - xor r1,r0,r1 - srw r0,r1,r2 + rlwinm r2,r0,20,27,31 /* Get Address bits 15:19 */ + slw r3,r3,r2 + xor r1,r3,r1 + srw r3,r1,r2 mtspr SPRN_SPRG_603_LRU,r1 mfspr r2,SPRN_SRR1 - rlwimi r2,r0,31-14,14,14 + rlwimi r2,r3,31-14,14,14 mtspr SPRN_SRR1,r2 mtcrf 0x80,r2 - tlbld r3 + tlbld r0 rfi MMU_FTR_SECTION_ELSE mfspr r2,SPRN_SRR1 /* Need to restore CR0 */ mtcrf 0x80,r2 - tlbld r3 + tlbld r0 rfi ALT_MMU_FTR_SECTION_END_IFSET(MMU_FTR_NEED_DTLB_SW_LRU) +2: lis r2, (swapper_pg_dir - PAGE_OFFSET)@ha + addi r2, r2, (swapper_pg_dir - PAGE_OFFSET)@l /* kernel page table */ + rlwimi r2,r0,12,20,29 /* insert top 10 bits of address */ + lwz r2,0(r2) /* get pmd entry */ + cmpwi cr0,r2,0 + beq- DataAddressInvalid /* return if no mapping */ + stw r2,0(r1) + rlwinm r2,r2,0,0,19 /* extract address of pte page */ + b 1b + #ifndef CONFIG_ALTIVEC #define altivec_assist_exception unknown_exception #endif @@ -688,7 +679,8 @@ hash_page_dsi: mfdar r4 mfsrr0 r5 mfsrr1 r9 - rlwinm r3, r3, 32 - 15, _PAGE_RW /* DSISR_STORE -> _PAGE_RW */ + rlwinm r3, r3, 32 - 15, _PAGE_WRITE /* DSISR_STORE -> _PAGE_WRITE */ + ori r3, r3, _PAGE_PRESENT | _PAGE_READ bl hash_page mfspr r10, SPRN_SPRG_THREAD restore_regs_thread r10 @@ -698,7 +690,7 @@ hash_page_isi: mr r11, r10 mfspr r10, SPRN_SPRG_THREAD save_regs_thread r10 - li r3, 0 + li r3, _PAGE_PRESENT | _PAGE_EXEC lwz r4, SRR0(r10) lwz r9, SRR1(r10) bl hash_page @@ -840,7 +832,7 @@ __secondary_start: lwz r1,TASK_STACK(r1) /* stack */ - addi r1,r1,THREAD_SIZE-STACK_FRAME_OVERHEAD + addi r1,r1,THREAD_SIZE-STACK_FRAME_MIN_SIZE li r0,0 tophys(r3,r1) stw r0,0(r3) @@ -877,7 +869,7 @@ END_MMU_FTR_SECTION_IFCLR(MMU_FTR_HPTE_TABLE) * Load stuff into the MMU. Intended to be called with * IR=0 and DR=0. */ -early_hash_table: +SYM_FUNC_START_LOCAL(early_hash_table) sync /* Force all PTE updates to finish */ isync tlbia /* Clear all TLB entries */ @@ -888,8 +880,9 @@ early_hash_table: ori r6, r6, 3 /* 256kB table */ mtspr SPRN_SDR1, r6 blr +SYM_FUNC_END(early_hash_table) -load_up_mmu: +SYM_FUNC_START_LOCAL(load_up_mmu) sync /* Force all PTE updates to finish */ isync tlbia /* Clear all TLB entries */ @@ -918,6 +911,7 @@ BEGIN_MMU_FTR_SECTION LOAD_BAT(7,r3,r4,r5) END_MMU_FTR_SECTION_IFSET(MMU_FTR_USE_HIGH_BATS) blr +SYM_FUNC_END(load_up_mmu) _GLOBAL(load_segment_registers) li r0, NUM_USER_SEGMENTS /* load up user segment register values */ @@ -966,7 +960,7 @@ END_MMU_FTR_SECTION_IFCLR(MMU_FTR_HPTE_TABLE) lis r1,init_thread_union@ha addi r1,r1,init_thread_union@l li r0,0 - stwu r0,THREAD_SIZE-STACK_FRAME_OVERHEAD(r1) + stwu r0,THREAD_SIZE-STACK_FRAME_MIN_SIZE(r1) /* * Do early platform-specific initialization, * and set up the MMU. @@ -1028,7 +1022,7 @@ END_MMU_FTR_SECTION_IFCLR(MMU_FTR_HPTE_TABLE) * this makes sure it's done. * -- Cort */ -clear_bats: +SYM_FUNC_START_LOCAL(clear_bats) li r10,0 mtspr SPRN_DBAT0U,r10 @@ -1072,6 +1066,7 @@ BEGIN_MMU_FTR_SECTION mtspr SPRN_IBAT7L,r10 END_MMU_FTR_SECTION_IFSET(MMU_FTR_USE_HIGH_BATS) blr +SYM_FUNC_END(clear_bats) _GLOBAL(update_bats) lis r4, 1f@h @@ -1108,15 +1103,16 @@ END_MMU_FTR_SECTION_IFSET(MMU_FTR_USE_HIGH_BATS) mtspr SPRN_SRR1, r6 rfi -flush_tlbs: +SYM_FUNC_START_LOCAL(flush_tlbs) lis r10, 0x40 1: addic. r10, r10, -0x1000 tlbie r10 bgt 1b sync blr +SYM_FUNC_END(flush_tlbs) -mmu_off: +SYM_FUNC_START_LOCAL(mmu_off) addi r4, r3, __after_mmu_off - _start mfmsr r3 andi. r0,r3,MSR_DR|MSR_IR /* MMU enabled? */ @@ -1128,9 +1124,10 @@ mmu_off: mtspr SPRN_SRR1,r3 sync rfi +SYM_FUNC_END(mmu_off) /* We use one BAT to map up to 256M of RAM at _PAGE_OFFSET */ -initial_bats: +SYM_FUNC_START_LOCAL(initial_bats) lis r11,PAGE_OFFSET@h tophys(r8,r11) #ifdef CONFIG_SMP @@ -1146,9 +1143,10 @@ initial_bats: mtspr SPRN_IBAT0U,r11 isync blr +SYM_FUNC_END(initial_bats) #ifdef CONFIG_BOOTX_TEXT -setup_disp_bat: +SYM_FUNC_START_LOCAL(setup_disp_bat) /* * setup the display bat prepared for us in prom.c */ @@ -1164,10 +1162,11 @@ setup_disp_bat: mtspr SPRN_DBAT3L,r8 mtspr SPRN_DBAT3U,r11 blr +SYM_FUNC_END(setup_disp_bat) #endif /* CONFIG_BOOTX_TEXT */ #ifdef CONFIG_PPC_EARLY_DEBUG_CPM -setup_cpm_bat: +SYM_FUNC_START_LOCAL(setup_cpm_bat) lis r8, 0xf000 ori r8, r8, 0x002a mtspr SPRN_DBAT1L, r8 @@ -1177,10 +1176,11 @@ setup_cpm_bat: mtspr SPRN_DBAT1U, r11 blr +SYM_FUNC_END(setup_cpm_bat) #endif #ifdef CONFIG_PPC_EARLY_DEBUG_USBGECKO -setup_usbgecko_bat: +SYM_FUNC_START_LOCAL(setup_usbgecko_bat) /* prepare a BAT for early io */ #if defined(CONFIG_GAMECUBE) lis r8, 0x0c00 @@ -1199,6 +1199,7 @@ setup_usbgecko_bat: mtspr SPRN_DBAT1L, r8 mtspr SPRN_DBAT1U, r11 blr +SYM_FUNC_END(setup_usbgecko_bat) #endif .data diff --git a/arch/powerpc/kernel/head_booke.h b/arch/powerpc/kernel/head_booke.h index bb6d5d0fc4ac..0b5c1993809e 100644 --- a/arch/powerpc/kernel/head_booke.h +++ b/arch/powerpc/kernel/head_booke.h @@ -5,6 +5,7 @@ #include <asm/ptrace.h> /* for STACK_FRAME_REGS_MARKER */ #include <asm/kvm_asm.h> #include <asm/kvm_booke_hv_asm.h> +#include <asm/thread_info.h> /* for THREAD_SHIFT */ #ifdef __ASSEMBLY__ @@ -34,7 +35,7 @@ */ #define THREAD_NORMSAVE(offset) (THREAD_NORMSAVES + (offset * 4)) -#ifdef CONFIG_PPC_FSL_BOOK3E +#ifdef CONFIG_PPC_E500 #define BOOKE_CLEAR_BTB(reg) \ START_BTB_FLUSH_SECTION \ BTB_FLUSH(reg) \ @@ -84,7 +85,7 @@ END_BTB_FLUSH_SECTION stw r0,GPR0(r1) lis r10, STACK_FRAME_REGS_MARKER@ha /* exception frame marker */ addi r10, r10, STACK_FRAME_REGS_MARKER@l - stw r10, 8(r1) + stw r10, STACK_INT_FRAME_MARKER(r1) li r10, \trapno stw r10,_TRAP(r1) SAVE_GPRS(3, 8, r1) @@ -99,11 +100,11 @@ END_BTB_FLUSH_SECTION mfspr r10,SPRN_XER addi r2, r2, -THREAD stw r10,_XER(r1) - addi r3,r1,STACK_FRAME_OVERHEAD + addi r3,r1,STACK_INT_FRAME_REGS .endm .macro prepare_transfer_to_handler -#ifdef CONFIG_E500 +#ifdef CONFIG_PPC_E500 andi. r12,r9,MSR_PR bne 777f bl prepare_transfer_to_handler @@ -144,10 +145,9 @@ ALT_FTR_SECTION_END_IFSET(CPU_FTR_EMB_HV) b transfer_to_syscall /* jump to handler */ .endm -/* To handle the additional exception priority levels on 40x and Book-E +/* To handle the additional exception priority levels on Book-E * processors we allocate a stack per additional priority level. * - * On 40x critical is the only additional level * On 44x/e500 we have critical and machine check * * Additionally we reserve a SPRG for each priority level so we can free up a @@ -242,7 +242,7 @@ ALT_FTR_SECTION_END_IFSET(CPU_FTR_EMB_HV) .macro SAVE_MMU_REGS -#ifdef CONFIG_PPC_BOOK3E_MMU +#ifdef CONFIG_PPC_E500 mfspr r0,SPRN_MAS0 stw r0,MAS0(r1) mfspr r0,SPRN_MAS1 @@ -257,7 +257,7 @@ ALT_FTR_SECTION_END_IFSET(CPU_FTR_EMB_HV) mfspr r0,SPRN_MAS7 stw r0,MAS7(r1) #endif /* CONFIG_PHYS_64BIT */ -#endif /* CONFIG_PPC_BOOK3E_MMU */ +#endif /* CONFIG_PPC_E500 */ #ifdef CONFIG_44x mfspr r0,SPRN_MMUCR stw r0,MMUCR(r1) diff --git a/arch/powerpc/kernel/hw_breakpoint.c b/arch/powerpc/kernel/hw_breakpoint.c index 2669f80b3a49..a1318ce18d0e 100644 --- a/arch/powerpc/kernel/hw_breakpoint.c +++ b/arch/powerpc/kernel/hw_breakpoint.c @@ -15,6 +15,7 @@ #include <linux/kernel.h> #include <linux/sched.h> #include <linux/smp.h> +#include <linux/spinlock.h> #include <linux/debugfs.h> #include <linux/init.h> @@ -42,16 +43,6 @@ int hw_breakpoint_slots(int type) return 0; /* no instruction breakpoints available */ } -static bool single_step_pending(void) -{ - int i; - - for (i = 0; i < nr_wp_slots(); i++) { - if (current->thread.last_hit_ubp[i]) - return true; - } - return false; -} /* * Install a perf counter breakpoint. @@ -83,7 +74,7 @@ int arch_install_hw_breakpoint(struct perf_event *bp) * Do not install DABR values if the instruction must be single-stepped. * If so, DABR will be populated in single_step_dabr_instruction(). */ - if (!single_step_pending()) + if (!info->perf_single_step) __set_breakpoint(i, info); return 0; @@ -123,249 +114,6 @@ static bool is_ptrace_bp(struct perf_event *bp) return bp->overflow_handler == ptrace_triggered; } -struct breakpoint { - struct list_head list; - struct perf_event *bp; - bool ptrace_bp; -}; - -static DEFINE_PER_CPU(struct breakpoint *, cpu_bps[HBP_NUM_MAX]); -static LIST_HEAD(task_bps); - -static struct breakpoint *alloc_breakpoint(struct perf_event *bp) -{ - struct breakpoint *tmp; - - tmp = kzalloc(sizeof(*tmp), GFP_KERNEL); - if (!tmp) - return ERR_PTR(-ENOMEM); - tmp->bp = bp; - tmp->ptrace_bp = is_ptrace_bp(bp); - return tmp; -} - -static bool bp_addr_range_overlap(struct perf_event *bp1, struct perf_event *bp2) -{ - __u64 bp1_saddr, bp1_eaddr, bp2_saddr, bp2_eaddr; - - bp1_saddr = ALIGN_DOWN(bp1->attr.bp_addr, HW_BREAKPOINT_SIZE); - bp1_eaddr = ALIGN(bp1->attr.bp_addr + bp1->attr.bp_len, HW_BREAKPOINT_SIZE); - bp2_saddr = ALIGN_DOWN(bp2->attr.bp_addr, HW_BREAKPOINT_SIZE); - bp2_eaddr = ALIGN(bp2->attr.bp_addr + bp2->attr.bp_len, HW_BREAKPOINT_SIZE); - - return (bp1_saddr < bp2_eaddr && bp1_eaddr > bp2_saddr); -} - -static bool alternate_infra_bp(struct breakpoint *b, struct perf_event *bp) -{ - return is_ptrace_bp(bp) ? !b->ptrace_bp : b->ptrace_bp; -} - -static bool can_co_exist(struct breakpoint *b, struct perf_event *bp) -{ - return !(alternate_infra_bp(b, bp) && bp_addr_range_overlap(b->bp, bp)); -} - -static int task_bps_add(struct perf_event *bp) -{ - struct breakpoint *tmp; - - tmp = alloc_breakpoint(bp); - if (IS_ERR(tmp)) - return PTR_ERR(tmp); - - list_add(&tmp->list, &task_bps); - return 0; -} - -static void task_bps_remove(struct perf_event *bp) -{ - struct list_head *pos, *q; - - list_for_each_safe(pos, q, &task_bps) { - struct breakpoint *tmp = list_entry(pos, struct breakpoint, list); - - if (tmp->bp == bp) { - list_del(&tmp->list); - kfree(tmp); - break; - } - } -} - -/* - * If any task has breakpoint from alternate infrastructure, - * return true. Otherwise return false. - */ -static bool all_task_bps_check(struct perf_event *bp) -{ - struct breakpoint *tmp; - - list_for_each_entry(tmp, &task_bps, list) { - if (!can_co_exist(tmp, bp)) - return true; - } - return false; -} - -/* - * If same task has breakpoint from alternate infrastructure, - * return true. Otherwise return false. - */ -static bool same_task_bps_check(struct perf_event *bp) -{ - struct breakpoint *tmp; - - list_for_each_entry(tmp, &task_bps, list) { - if (tmp->bp->hw.target == bp->hw.target && - !can_co_exist(tmp, bp)) - return true; - } - return false; -} - -static int cpu_bps_add(struct perf_event *bp) -{ - struct breakpoint **cpu_bp; - struct breakpoint *tmp; - int i = 0; - - tmp = alloc_breakpoint(bp); - if (IS_ERR(tmp)) - return PTR_ERR(tmp); - - cpu_bp = per_cpu_ptr(cpu_bps, bp->cpu); - for (i = 0; i < nr_wp_slots(); i++) { - if (!cpu_bp[i]) { - cpu_bp[i] = tmp; - break; - } - } - return 0; -} - -static void cpu_bps_remove(struct perf_event *bp) -{ - struct breakpoint **cpu_bp; - int i = 0; - - cpu_bp = per_cpu_ptr(cpu_bps, bp->cpu); - for (i = 0; i < nr_wp_slots(); i++) { - if (!cpu_bp[i]) - continue; - - if (cpu_bp[i]->bp == bp) { - kfree(cpu_bp[i]); - cpu_bp[i] = NULL; - break; - } - } -} - -static bool cpu_bps_check(int cpu, struct perf_event *bp) -{ - struct breakpoint **cpu_bp; - int i; - - cpu_bp = per_cpu_ptr(cpu_bps, cpu); - for (i = 0; i < nr_wp_slots(); i++) { - if (cpu_bp[i] && !can_co_exist(cpu_bp[i], bp)) - return true; - } - return false; -} - -static bool all_cpu_bps_check(struct perf_event *bp) -{ - int cpu; - - for_each_online_cpu(cpu) { - if (cpu_bps_check(cpu, bp)) - return true; - } - return false; -} - -/* - * We don't use any locks to serialize accesses to cpu_bps or task_bps - * because are already inside nr_bp_mutex. - */ -int arch_reserve_bp_slot(struct perf_event *bp) -{ - int ret; - - /* ptrace breakpoint */ - if (is_ptrace_bp(bp)) { - if (all_cpu_bps_check(bp)) - return -ENOSPC; - - if (same_task_bps_check(bp)) - return -ENOSPC; - - return task_bps_add(bp); - } - - /* perf breakpoint */ - if (is_kernel_addr(bp->attr.bp_addr)) - return 0; - - if (bp->hw.target && bp->cpu == -1) { - if (same_task_bps_check(bp)) - return -ENOSPC; - - return task_bps_add(bp); - } else if (!bp->hw.target && bp->cpu != -1) { - if (all_task_bps_check(bp)) - return -ENOSPC; - - return cpu_bps_add(bp); - } - - if (same_task_bps_check(bp)) - return -ENOSPC; - - ret = cpu_bps_add(bp); - if (ret) - return ret; - ret = task_bps_add(bp); - if (ret) - cpu_bps_remove(bp); - - return ret; -} - -void arch_release_bp_slot(struct perf_event *bp) -{ - if (!is_kernel_addr(bp->attr.bp_addr)) { - if (bp->hw.target) - task_bps_remove(bp); - if (bp->cpu != -1) - cpu_bps_remove(bp); - } -} - -/* - * Perform cleanup of arch-specific counters during unregistration - * of the perf-event - */ -void arch_unregister_hw_breakpoint(struct perf_event *bp) -{ - /* - * If the breakpoint is unregistered between a hw_breakpoint_handler() - * and the single_step_dabr_instruction(), then cleanup the breakpoint - * restoration variables to prevent dangling pointers. - * FIXME, this should not be using bp->ctx at all! Sayeth peterz. - */ - if (bp->ctx && bp->ctx->task && bp->ctx->task != ((void *)-1L)) { - int i; - - for (i = 0; i < nr_wp_slots(); i++) { - if (bp->ctx->task->thread.last_hit_ubp[i] == bp) - bp->ctx->task->thread.last_hit_ubp[i] = NULL; - } - } -} - /* * Check for virtual address in kernel space. */ @@ -472,25 +220,36 @@ int hw_breakpoint_arch_parse(struct perf_event *bp, * Restores the breakpoint on the debug registers. * Invoke this function if it is known that the execution context is * about to change to cause loss of MSR_SE settings. + * + * The perf watchpoint will simply re-trigger once the thread is started again, + * and the watchpoint handler will set up MSR_SE and perf_single_step as + * needed. */ void thread_change_pc(struct task_struct *tsk, struct pt_regs *regs) { struct arch_hw_breakpoint *info; int i; + preempt_disable(); + for (i = 0; i < nr_wp_slots(); i++) { - if (unlikely(tsk->thread.last_hit_ubp[i])) + struct perf_event *bp = __this_cpu_read(bp_per_reg[i]); + + if (unlikely(bp && counter_arch_bp(bp)->perf_single_step)) goto reset; } - return; + goto out; reset: regs_set_return_msr(regs, regs->msr & ~MSR_SE); for (i = 0; i < nr_wp_slots(); i++) { info = counter_arch_bp(__this_cpu_read(bp_per_reg[i])); __set_breakpoint(i, info); - tsk->thread.last_hit_ubp[i] = NULL; + info->perf_single_step = false; } + +out: + preempt_enable(); } static bool is_larx_stcx_instr(int type) @@ -507,23 +266,22 @@ static bool is_octword_vsx_instr(int type, int size) * We've failed in reliably handling the hw-breakpoint. Unregister * it and throw a warning message to let the user know about it. */ -static void handler_error(struct perf_event *bp, struct arch_hw_breakpoint *info) +static void handler_error(struct perf_event *bp) { WARN(1, "Unable to handle hardware breakpoint. Breakpoint at 0x%lx will be disabled.", - info->address); + counter_arch_bp(bp)->address); perf_event_disable_inatomic(bp); } -static void larx_stcx_err(struct perf_event *bp, struct arch_hw_breakpoint *info) +static void larx_stcx_err(struct perf_event *bp) { printk_ratelimited("Breakpoint hit on instruction that can't be emulated. Breakpoint at 0x%lx will be disabled.\n", - info->address); + counter_arch_bp(bp)->address); perf_event_disable_inatomic(bp); } static bool stepping_handler(struct pt_regs *regs, struct perf_event **bp, - struct arch_hw_breakpoint **info, int *hit, - ppc_inst_t instr) + int *hit, ppc_inst_t instr) { int i; int stepped; @@ -533,8 +291,9 @@ static bool stepping_handler(struct pt_regs *regs, struct perf_event **bp, for (i = 0; i < nr_wp_slots(); i++) { if (!hit[i]) continue; - current->thread.last_hit_ubp[i] = bp[i]; - info[i] = NULL; + + counter_arch_bp(bp[i])->perf_single_step = true; + bp[i] = NULL; } regs_set_return_msr(regs, regs->msr | MSR_SE); return false; @@ -545,15 +304,15 @@ static bool stepping_handler(struct pt_regs *regs, struct perf_event **bp, for (i = 0; i < nr_wp_slots(); i++) { if (!hit[i]) continue; - handler_error(bp[i], info[i]); - info[i] = NULL; + handler_error(bp[i]); + bp[i] = NULL; } return false; } return true; } -static void handle_p10dd1_spurious_exception(struct arch_hw_breakpoint **info, +static void handle_p10dd1_spurious_exception(struct perf_event **bp, int *hit, unsigned long ea) { int i; @@ -565,10 +324,14 @@ static void handle_p10dd1_spurious_exception(struct arch_hw_breakpoint **info, * spurious exception. */ for (i = 0; i < nr_wp_slots(); i++) { - if (!info[i]) + struct arch_hw_breakpoint *info; + + if (!bp[i]) continue; - hw_end_addr = ALIGN(info[i]->address + info[i]->len, HW_BREAKPOINT_SIZE); + info = counter_arch_bp(bp[i]); + + hw_end_addr = ALIGN(info->address + info->len, HW_BREAKPOINT_SIZE); /* * Ending address of DAWR range is less than starting @@ -598,20 +361,24 @@ static void handle_p10dd1_spurious_exception(struct arch_hw_breakpoint **info, return; for (i = 0; i < nr_wp_slots(); i++) { - if (info[i]) { + if (bp[i]) { hit[i] = 1; - info[i]->type |= HW_BRK_TYPE_EXTRANEOUS_IRQ; + counter_arch_bp(bp[i])->type |= HW_BRK_TYPE_EXTRANEOUS_IRQ; } } } +/* + * Handle a DABR or DAWR exception. + * + * Called in atomic context. + */ int hw_breakpoint_handler(struct die_args *args) { bool err = false; int rc = NOTIFY_STOP; struct perf_event *bp[HBP_NUM_MAX] = { NULL }; struct pt_regs *regs = args->regs; - struct arch_hw_breakpoint *info[HBP_NUM_MAX] = { NULL }; int i; int hit[HBP_NUM_MAX] = {0}; int nr_hit = 0; @@ -619,7 +386,7 @@ int hw_breakpoint_handler(struct die_args *args) ppc_inst_t instr = ppc_inst(0); int type = 0; int size = 0; - unsigned long ea; + unsigned long ea = 0; /* Disable breakpoints during exception handling */ hw_breakpoint_disable(); @@ -636,18 +403,20 @@ int hw_breakpoint_handler(struct die_args *args) wp_get_instr_detail(regs, &instr, &type, &size, &ea); for (i = 0; i < nr_wp_slots(); i++) { + struct arch_hw_breakpoint *info; + bp[i] = __this_cpu_read(bp_per_reg[i]); if (!bp[i]) continue; - info[i] = counter_arch_bp(bp[i]); - info[i]->type &= ~HW_BRK_TYPE_EXTRANEOUS_IRQ; + info = counter_arch_bp(bp[i]); + info->type &= ~HW_BRK_TYPE_EXTRANEOUS_IRQ; - if (wp_check_constraints(regs, instr, ea, type, size, info[i])) { + if (wp_check_constraints(regs, instr, ea, type, size, info)) { if (!IS_ENABLED(CONFIG_PPC_8xx) && ppc_inst_equal(instr, ppc_inst(0))) { - handler_error(bp[i], info[i]); - info[i] = NULL; + handler_error(bp[i]); + bp[i] = NULL; err = 1; continue; } @@ -666,7 +435,7 @@ int hw_breakpoint_handler(struct die_args *args) /* Workaround for Power10 DD1 */ if (!IS_ENABLED(CONFIG_PPC_8xx) && mfspr(SPRN_PVR) == 0x800100 && is_octword_vsx_instr(type, size)) { - handle_p10dd1_spurious_exception(info, hit, ea); + handle_p10dd1_spurious_exception(bp, hit, ea); } else { rc = NOTIFY_DONE; goto out; @@ -681,10 +450,10 @@ int hw_breakpoint_handler(struct die_args *args) */ if (ptrace_bp) { for (i = 0; i < nr_wp_slots(); i++) { - if (!hit[i]) + if (!hit[i] || !is_ptrace_bp(bp[i])) continue; perf_bp_event(bp[i], regs); - info[i] = NULL; + bp[i] = NULL; } rc = NOTIFY_DONE; goto reset; @@ -695,13 +464,13 @@ int hw_breakpoint_handler(struct die_args *args) for (i = 0; i < nr_wp_slots(); i++) { if (!hit[i]) continue; - larx_stcx_err(bp[i], info[i]); - info[i] = NULL; + larx_stcx_err(bp[i]); + bp[i] = NULL; } goto reset; } - if (!stepping_handler(regs, bp, info, hit, instr)) + if (!stepping_handler(regs, bp, hit, instr)) goto reset; } @@ -712,15 +481,15 @@ int hw_breakpoint_handler(struct die_args *args) for (i = 0; i < nr_wp_slots(); i++) { if (!hit[i]) continue; - if (!(info[i]->type & HW_BRK_TYPE_EXTRANEOUS_IRQ)) + if (!(counter_arch_bp(bp[i])->type & HW_BRK_TYPE_EXTRANEOUS_IRQ)) perf_bp_event(bp[i], regs); } reset: for (i = 0; i < nr_wp_slots(); i++) { - if (!info[i]) + if (!bp[i]) continue; - __set_breakpoint(i, info[i]); + __set_breakpoint(i, counter_arch_bp(bp[i])); } out: @@ -731,28 +500,34 @@ NOKPROBE_SYMBOL(hw_breakpoint_handler); /* * Handle single-step exceptions following a DABR hit. + * + * Called in atomic context. */ static int single_step_dabr_instruction(struct die_args *args) { struct pt_regs *regs = args->regs; - struct perf_event *bp = NULL; - struct arch_hw_breakpoint *info; - int i; bool found = false; /* * Check if we are single-stepping as a result of a * previous HW Breakpoint exception */ - for (i = 0; i < nr_wp_slots(); i++) { - bp = current->thread.last_hit_ubp[i]; + for (int i = 0; i < nr_wp_slots(); i++) { + struct perf_event *bp; + struct arch_hw_breakpoint *info; + + bp = __this_cpu_read(bp_per_reg[i]); if (!bp) continue; - found = true; info = counter_arch_bp(bp); + if (!info->perf_single_step) + continue; + + found = true; + /* * We shall invoke the user-defined callback function in the * single stepping handler to confirm to 'trigger-after-execute' @@ -760,26 +535,16 @@ static int single_step_dabr_instruction(struct die_args *args) */ if (!(info->type & HW_BRK_TYPE_EXTRANEOUS_IRQ)) perf_bp_event(bp, regs); - current->thread.last_hit_ubp[i] = NULL; - } - if (!found) - return NOTIFY_DONE; - - for (i = 0; i < nr_wp_slots(); i++) { - bp = __this_cpu_read(bp_per_reg[i]); - if (!bp) - continue; - - info = counter_arch_bp(bp); - __set_breakpoint(i, info); + info->perf_single_step = false; + __set_breakpoint(i, counter_arch_bp(bp)); } /* * If the process was being single-stepped by ptrace, let the * other single-step actions occur (e.g. generate SIGTRAP). */ - if (test_thread_flag(TIF_SINGLESTEP)) + if (!found || test_thread_flag(TIF_SINGLESTEP)) return NOTIFY_DONE; return NOTIFY_STOP; @@ -788,6 +553,8 @@ NOKPROBE_SYMBOL(single_step_dabr_instruction); /* * Handle debug exception notifications. + * + * Called in atomic context. */ int hw_breakpoint_exceptions_notify( struct notifier_block *unused, unsigned long val, void *data) diff --git a/arch/powerpc/kernel/hw_breakpoint_constraints.c b/arch/powerpc/kernel/hw_breakpoint_constraints.c index a74623025f3a..9e51801c4915 100644 --- a/arch/powerpc/kernel/hw_breakpoint_constraints.c +++ b/arch/powerpc/kernel/hw_breakpoint_constraints.c @@ -131,8 +131,13 @@ void wp_get_instr_detail(struct pt_regs *regs, ppc_inst_t *instr, int *type, int *size, unsigned long *ea) { struct instruction_op op; + int err; - if (__get_user_instr(*instr, (void __user *)regs->nip)) + pagefault_disable(); + err = __get_user_instr(*instr, (void __user *)regs->nip); + pagefault_enable(); + + if (err) return; analyse_instr(&op, regs, *instr); diff --git a/arch/powerpc/kernel/idle.c b/arch/powerpc/kernel/idle.c index 77cd4c5a2d63..e527cd3ef128 100644 --- a/arch/powerpc/kernel/idle.c +++ b/arch/powerpc/kernel/idle.c @@ -51,10 +51,9 @@ void arch_cpu_idle(void) * Some power_save functions return with * interrupts enabled, some don't. */ - if (irqs_disabled()) - raw_local_irq_enable(); + if (!irqs_disabled()) + raw_local_irq_disable(); } else { - raw_local_irq_enable(); /* * Go into low thread priority and possibly * low power mode. @@ -98,7 +97,7 @@ void power4_idle(void) /* * Register the sysctl to set/clear powersave_nap. */ -static struct ctl_table powersave_nap_ctl_table[] = { +static const struct ctl_table powersave_nap_ctl_table[] = { { .procname = "powersave-nap", .data = &powersave_nap, @@ -106,21 +105,12 @@ static struct ctl_table powersave_nap_ctl_table[] = { .mode = 0644, .proc_handler = proc_dointvec, }, - {} -}; -static struct ctl_table powersave_nap_sysctl_root[] = { - { - .procname = "kernel", - .mode = 0555, - .child = powersave_nap_ctl_table, - }, - {} }; static int __init register_powersave_nap_sysctl(void) { - register_sysctl_table(powersave_nap_sysctl_root); + register_sysctl("kernel", powersave_nap_ctl_table); return 0; } diff --git a/arch/powerpc/kernel/idle_book3e.S b/arch/powerpc/kernel/idle_64e.S index cc008de58b05..0fc680e03dee 100644 --- a/arch/powerpc/kernel/idle_book3e.S +++ b/arch/powerpc/kernel/idle_64e.S @@ -2,7 +2,7 @@ /* * Copyright 2010 IBM Corp, Benjamin Herrenschmidt <benh@kernel.crashing.org> * - * Generic idle routine for Book3E processors + * Generic idle routine for 64 bits e500 processors */ #include <linux/threads.h> @@ -16,8 +16,6 @@ #include <asm/hw_irq.h> /* 64-bit version only for now */ -#ifdef CONFIG_PPC64 - .macro BOOK3E_IDLE name loop _GLOBAL(\name) /* Save LR for later */ @@ -77,7 +75,7 @@ _GLOBAL(\name) .macro BOOK3E_IDLE_LOOP 1: - PPC_WAIT(0) + PPC_WAIT_v203 b 1b .endm @@ -98,6 +96,4 @@ epapr_ev_idle_start: BOOK3E_IDLE epapr_ev_idle EPAPR_EV_IDLE_LOOP -BOOK3E_IDLE book3e_idle BOOK3E_IDLE_LOOP - -#endif /* CONFIG_PPC64 */ +BOOK3E_IDLE e500_idle BOOK3E_IDLE_LOOP diff --git a/arch/powerpc/kernel/idle_e500.S b/arch/powerpc/kernel/idle_85xx.S index 9e1bc4502c50..9e1bc4502c50 100644 --- a/arch/powerpc/kernel/idle_e500.S +++ b/arch/powerpc/kernel/idle_85xx.S diff --git a/arch/powerpc/kernel/ima_arch.c b/arch/powerpc/kernel/ima_arch.c index 957abd592075..b7029beed847 100644 --- a/arch/powerpc/kernel/ima_arch.c +++ b/arch/powerpc/kernel/ima_arch.c @@ -23,9 +23,9 @@ bool arch_ima_get_secureboot(void) * is not enabled. */ static const char *const secure_rules[] = { - "appraise func=KEXEC_KERNEL_CHECK appraise_flag=check_blacklist appraise_type=imasig|modsig", + "appraise func=KEXEC_KERNEL_CHECK appraise_type=imasig|modsig", #ifndef CONFIG_MODULE_SIG - "appraise func=MODULE_CHECK appraise_flag=check_blacklist appraise_type=imasig|modsig", + "appraise func=MODULE_CHECK appraise_type=imasig|modsig", #endif NULL }; @@ -49,9 +49,9 @@ static const char *const trusted_rules[] = { static const char *const secure_and_trusted_rules[] = { "measure func=KEXEC_KERNEL_CHECK template=ima-modsig", "measure func=MODULE_CHECK template=ima-modsig", - "appraise func=KEXEC_KERNEL_CHECK appraise_flag=check_blacklist appraise_type=imasig|modsig", + "appraise func=KEXEC_KERNEL_CHECK appraise_type=imasig|modsig", #ifndef CONFIG_MODULE_SIG - "appraise func=MODULE_CHECK appraise_flag=check_blacklist appraise_type=imasig|modsig", + "appraise func=MODULE_CHECK appraise_type=imasig|modsig", #endif NULL }; diff --git a/arch/powerpc/kernel/interrupt.c b/arch/powerpc/kernel/interrupt.c index 784ea3289c84..e0c681d0b076 100644 --- a/arch/powerpc/kernel/interrupt.c +++ b/arch/powerpc/kernel/interrupt.c @@ -3,6 +3,7 @@ #include <linux/context_tracking.h> #include <linux/err.h> #include <linux/compat.h> +#include <linux/rseq.h> #include <linux/sched/debug.h> /* for show_regs */ #include <asm/kup.h> @@ -24,7 +25,9 @@ unsigned long global_dbcr0[NR_CPUS]; #endif -typedef long (*syscall_fn)(long, long, long, long, long, long); +#if defined(CONFIG_PREEMPT_DYNAMIC) +DEFINE_STATIC_KEY_TRUE(sk_dynamic_irqentry_exit_cond_resched); +#endif #ifdef CONFIG_PPC_BOOK3S_64 DEFINE_STATIC_KEY_FALSE(interrupt_exit_not_reentrant); @@ -52,16 +55,18 @@ static inline bool exit_must_hard_disable(void) */ static notrace __always_inline bool prep_irq_for_enabled_exit(bool restartable) { + bool must_hard_disable = (exit_must_hard_disable() || !restartable); + /* This must be done with RI=1 because tracing may touch vmaps */ trace_hardirqs_on(); - if (exit_must_hard_disable() || !restartable) + if (must_hard_disable) __hard_EE_RI_disable(); #ifdef CONFIG_PPC64 /* This pattern matches prep_irq_for_idle */ if (unlikely(lazy_irq_pending_nocheck())) { - if (exit_must_hard_disable() || !restartable) { + if (must_hard_disable) { local_paca->irq_happened |= PACA_IRQ_HARD_DIS; __hard_RI_enable(); } @@ -73,165 +78,6 @@ static notrace __always_inline bool prep_irq_for_enabled_exit(bool restartable) return true; } -/* Has to run notrace because it is entered not completely "reconciled" */ -notrace long system_call_exception(long r3, long r4, long r5, - long r6, long r7, long r8, - unsigned long r0, struct pt_regs *regs) -{ - syscall_fn f; - - kuap_lock(); - - regs->orig_gpr3 = r3; - - if (IS_ENABLED(CONFIG_PPC_IRQ_SOFT_MASK_DEBUG)) - BUG_ON(irq_soft_mask_return() != IRQS_ALL_DISABLED); - - trace_hardirqs_off(); /* finish reconciling */ - - CT_WARN_ON(ct_state() == CONTEXT_KERNEL); - user_exit_irqoff(); - - BUG_ON(regs_is_unrecoverable(regs)); - BUG_ON(!(regs->msr & MSR_PR)); - BUG_ON(arch_irq_disabled_regs(regs)); - -#ifdef CONFIG_PPC_PKEY - if (mmu_has_feature(MMU_FTR_PKEY)) { - unsigned long amr, iamr; - bool flush_needed = false; - /* - * When entering from userspace we mostly have the AMR/IAMR - * different from kernel default values. Hence don't compare. - */ - amr = mfspr(SPRN_AMR); - iamr = mfspr(SPRN_IAMR); - regs->amr = amr; - regs->iamr = iamr; - if (mmu_has_feature(MMU_FTR_BOOK3S_KUAP)) { - mtspr(SPRN_AMR, AMR_KUAP_BLOCKED); - flush_needed = true; - } - if (mmu_has_feature(MMU_FTR_BOOK3S_KUEP)) { - mtspr(SPRN_IAMR, AMR_KUEP_BLOCKED); - flush_needed = true; - } - if (flush_needed) - isync(); - } else -#endif - kuap_assert_locked(); - - booke_restore_dbcr0(); - - account_cpu_user_entry(); - - account_stolen_time(); - - /* - * This is not required for the syscall exit path, but makes the - * stack frame look nicer. If this was initialised in the first stack - * frame, or if the unwinder was taught the first stack frame always - * returns to user with IRQS_ENABLED, this store could be avoided! - */ - irq_soft_mask_regs_set_state(regs, IRQS_ENABLED); - - /* - * If system call is called with TM active, set _TIF_RESTOREALL to - * prevent RFSCV being used to return to userspace, because POWER9 - * TM implementation has problems with this instruction returning to - * transactional state. Final register values are not relevant because - * the transaction will be aborted upon return anyway. Or in the case - * of unsupported_scv SIGILL fault, the return state does not much - * matter because it's an edge case. - */ - if (IS_ENABLED(CONFIG_PPC_TRANSACTIONAL_MEM) && - unlikely(MSR_TM_TRANSACTIONAL(regs->msr))) - set_bits(_TIF_RESTOREALL, ¤t_thread_info()->flags); - - /* - * If the system call was made with a transaction active, doom it and - * return without performing the system call. Unless it was an - * unsupported scv vector, in which case it's treated like an illegal - * instruction. - */ -#ifdef CONFIG_PPC_TRANSACTIONAL_MEM - if (unlikely(MSR_TM_TRANSACTIONAL(regs->msr)) && - !trap_is_unsupported_scv(regs)) { - /* Enable TM in the kernel, and disable EE (for scv) */ - hard_irq_disable(); - mtmsr(mfmsr() | MSR_TM); - - /* tabort, this dooms the transaction, nothing else */ - asm volatile(".long 0x7c00071d | ((%0) << 16)" - :: "r"(TM_CAUSE_SYSCALL|TM_CAUSE_PERSISTENT)); - - /* - * Userspace will never see the return value. Execution will - * resume after the tbegin. of the aborted transaction with the - * checkpointed register state. A context switch could occur - * or signal delivered to the process before resuming the - * doomed transaction context, but that should all be handled - * as expected. - */ - return -ENOSYS; - } -#endif // CONFIG_PPC_TRANSACTIONAL_MEM - - local_irq_enable(); - - if (unlikely(read_thread_flags() & _TIF_SYSCALL_DOTRACE)) { - if (unlikely(trap_is_unsupported_scv(regs))) { - /* Unsupported scv vector */ - _exception(SIGILL, regs, ILL_ILLOPC, regs->nip); - return regs->gpr[3]; - } - /* - * We use the return value of do_syscall_trace_enter() as the - * syscall number. If the syscall was rejected for any reason - * do_syscall_trace_enter() returns an invalid syscall number - * and the test against NR_syscalls will fail and the return - * value to be used is in regs->gpr[3]. - */ - r0 = do_syscall_trace_enter(regs); - if (unlikely(r0 >= NR_syscalls)) - return regs->gpr[3]; - r3 = regs->gpr[3]; - r4 = regs->gpr[4]; - r5 = regs->gpr[5]; - r6 = regs->gpr[6]; - r7 = regs->gpr[7]; - r8 = regs->gpr[8]; - - } else if (unlikely(r0 >= NR_syscalls)) { - if (unlikely(trap_is_unsupported_scv(regs))) { - /* Unsupported scv vector */ - _exception(SIGILL, regs, ILL_ILLOPC, regs->nip); - return regs->gpr[3]; - } - return -ENOSYS; - } - - /* May be faster to do array_index_nospec? */ - barrier_nospec(); - - if (unlikely(is_compat_task())) { - f = (void *)compat_sys_call_table[r0]; - - r3 &= 0x00000000ffffffffULL; - r4 &= 0x00000000ffffffffULL; - r5 &= 0x00000000ffffffffULL; - r6 &= 0x00000000ffffffffULL; - r7 &= 0x00000000ffffffffULL; - r8 &= 0x00000000ffffffffULL; - - } else { - f = (void *)sys_call_table[r0]; - } - - return f(r3, r4, r5, r6, r7, r8); -} - static notrace void booke_load_dbcr0(void) { #ifdef CONFIG_PPC_ADV_DEBUG_REGS @@ -254,7 +100,7 @@ static notrace void booke_load_dbcr0(void) #endif } -static void check_return_regs_valid(struct pt_regs *regs) +static notrace void check_return_regs_valid(struct pt_regs *regs) { #ifdef CONFIG_PPC_BOOK3S_64 unsigned long trap, srr0, srr1; @@ -284,7 +130,7 @@ static void check_return_regs_valid(struct pt_regs *regs) case 0x1600: case 0x1800: validp = &local_paca->hsrr_valid; - if (!*validp) + if (!READ_ONCE(*validp)) return; srr0 = mfspr(SPRN_HSRR0); @@ -294,7 +140,7 @@ static void check_return_regs_valid(struct pt_regs *regs) break; default: validp = &local_paca->srr_valid; - if (!*validp) + if (!READ_ONCE(*validp)) return; srr0 = mfspr(SPRN_SRR0); @@ -320,19 +166,17 @@ static void check_return_regs_valid(struct pt_regs *regs) * such things will get caught most of the time, statistically * enough to be able to get a warning out. */ - barrier(); - - if (!*validp) + if (!READ_ONCE(*validp)) return; - if (!warned) { - warned = true; + if (!data_race(warned)) { + data_race(warned = true); printk("%sSRR0 was: %lx should be: %lx\n", h, srr0, regs->nip); printk("%sSRR1 was: %lx should be: %lx\n", h, srr1, regs->msr); show_regs(regs); } - *validp = 0; /* fixup */ + WRITE_ONCE(*validp, 0); /* fixup */ #endif } @@ -345,7 +189,7 @@ again: ti_flags = read_thread_flags(); while (unlikely(ti_flags & (_TIF_USER_WORK_MASK & ~_TIF_RESTORE_TM))) { local_irq_enable(); - if (ti_flags & _TIF_NEED_RESCHED) { + if (ti_flags & (_TIF_NEED_RESCHED | _TIF_NEED_RESCHED_LAZY)) { schedule(); } else { /* @@ -426,7 +270,7 @@ notrace unsigned long syscall_exit_prepare(unsigned long r3, unsigned long ret = 0; bool is_not_scv = !IS_ENABLED(CONFIG_PPC_BOOK3S_64) || !scv; - CT_WARN_ON(ct_state() == CONTEXT_USER); + CT_WARN_ON(ct_state() == CT_STATE_USER); kuap_assert_locked(); @@ -504,7 +348,7 @@ notrace unsigned long interrupt_exit_user_prepare(struct pt_regs *regs) BUG_ON(regs_is_unrecoverable(regs)); BUG_ON(arch_irq_disabled_regs(regs)); - CT_WARN_ON(ct_state() == CONTEXT_USER); + CT_WARN_ON(ct_state() == CT_STATE_USER); /* * We don't need to restore AMR on the way back to userspace for KUAP. @@ -527,7 +371,6 @@ void preempt_schedule_irq(void); notrace unsigned long interrupt_exit_kernel_prepare(struct pt_regs *regs) { - unsigned long flags; unsigned long ret = 0; unsigned long kuap; bool stack_store = read_thread_flags() & _TIF_EMULATE_STACK_STORE; @@ -535,21 +378,29 @@ notrace unsigned long interrupt_exit_kernel_prepare(struct pt_regs *regs) if (regs_is_unrecoverable(regs)) unrecoverable_exception(regs); /* - * CT_WARN_ON comes here via program_check_exception, - * so avoid recursion. + * CT_WARN_ON comes here via program_check_exception, so avoid + * recursion. + * + * Skip the assertion on PMIs on 64e to work around a problem caused + * by NMI PMIs incorrectly taking this interrupt return path, it's + * possible for this to hit after interrupt exit to user switches + * context to user. See also the comment in the performance monitor + * handler in exceptions-64e.S */ - if (TRAP(regs) != INTERRUPT_PROGRAM) - CT_WARN_ON(ct_state() == CONTEXT_USER); + if (!IS_ENABLED(CONFIG_PPC_BOOK3E_64) && + TRAP(regs) != INTERRUPT_PROGRAM && + TRAP(regs) != INTERRUPT_PERFMON) + CT_WARN_ON(ct_state() == CT_STATE_USER); kuap = kuap_get_and_assert_locked(); - local_irq_save(flags); + local_irq_disable(); if (!arch_irq_disabled_regs(regs)) { /* Returning to a kernel context with local irqs enabled. */ WARN_ON_ONCE(!(regs->msr & MSR_EE)); again: - if (IS_ENABLED(CONFIG_PREEMPT)) { + if (need_irq_preemption()) { /* Return to preemptible kernel context */ if (unlikely(read_thread_flags() & _TIF_NEED_RESCHED)) { if (preempt_count() == 0) @@ -592,16 +443,6 @@ again: if (unlikely(stack_store)) __hard_EE_RI_disable(); - /* - * Returning to a kernel context with local irqs disabled. - * Here, if EE was enabled in the interrupted context, enable - * it on return as well. A problem exists here where a soft - * masked interrupt may have cleared MSR[EE] and set HARD_DIS - * here, and it will still exist on return to the caller. This - * will be resolved by the masked interrupt firing again. - */ - if (regs->msr & MSR_EE) - local_paca->irq_happened &= ~PACA_IRQ_HARD_DIS; #endif /* CONFIG_PPC64 */ } diff --git a/arch/powerpc/kernel/interrupt_64.S b/arch/powerpc/kernel/interrupt_64.S index ce25b28cf418..1ad059a9e2fe 100644 --- a/arch/powerpc/kernel/interrupt_64.S +++ b/arch/powerpc/kernel/interrupt_64.S @@ -13,16 +13,6 @@ #include <asm/ppc_asm.h> #include <asm/ptrace.h> - .section ".toc","aw" -SYS_CALL_TABLE: - .tc sys_call_table[TC],sys_call_table - -#ifdef CONFIG_COMPAT -COMPAT_SYS_CALL_TABLE: - .tc compat_sys_call_table[TC],compat_sys_call_table -#endif - .previous - .align 7 .macro DEBUG_SRR_VALID srr @@ -62,21 +52,17 @@ _ASM_NOKPROBE_SYMBOL(system_call_vectored_\name) mr r10,r1 ld r1,PACAKSAVE(r13) std r10,0(r1) - std r11,_NIP(r1) + std r11,_LINK(r1) + std r11,_NIP(r1) /* Saved LR is also the next instruction */ std r12,_MSR(r1) std r0,GPR0(r1) std r10,GPR1(r1) std r2,GPR2(r1) - ld r2,PACATOC(r13) + LOAD_PACA_TOC() mfcr r12 li r11,0 - /* Can we avoid saving r3-r8 in common case? */ - std r3,GPR3(r1) - std r4,GPR4(r1) - std r5,GPR5(r1) - std r6,GPR6(r1) - std r7,GPR7(r1) - std r8,GPR8(r1) + /* Save syscall parameters in r3-r8 */ + SAVE_GPRS(3, 8, r1) /* Zero r9-r12, this should only be required when restoring all GPRs */ std r11,GPR9(r1) std r11,GPR10(r1) @@ -85,15 +71,17 @@ _ASM_NOKPROBE_SYMBOL(system_call_vectored_\name) std r9,GPR13(r1) SAVE_NVGPRS(r1) std r11,_XER(r1) - std r11,_LINK(r1) std r11,_CTR(r1) li r11,\trapnr std r11,_TRAP(r1) std r12,_CCR(r1) - addi r10,r1,STACK_FRAME_OVERHEAD - ld r11,exception_marker@toc(r2) - std r11,-16(r10) /* "regshere" marker */ + std r3,ORIG_GPR3(r1) + LOAD_REG_IMMEDIATE(r11, STACK_FRAME_REGS_MARKER) + std r11,STACK_INT_FRAME_MARKER(r1) /* "regs" marker */ + /* Calling convention has r3 = regs, r4 = orig r0 */ + addi r3,r1,STACK_INT_FRAME_REGS + mr r4,r0 BEGIN_FTR_SECTION HMT_MEDIUM @@ -108,14 +96,17 @@ END_FTR_SECTION_IFSET(CPU_FTR_HAS_PPR) * but this is the best we can do. */ - /* Calling convention has r9 = orig r0, r10 = regs */ - mr r9,r0 - bl system_call_exception + /* + * Zero user registers to prevent influencing speculative execution + * state of kernel code. + */ + SANITIZE_SYSCALL_GPRS() + bl CFUNC(system_call_exception) .Lsyscall_vectored_\name\()_exit: - addi r4,r1,STACK_FRAME_OVERHEAD + addi r4,r1,STACK_INT_FRAME_REGS li r5,1 /* scv */ - bl syscall_exit_prepare + bl CFUNC(syscall_exit_prepare) std r1,PACA_EXIT_SAVE_R1(r13) /* save r1 for restart */ .Lsyscall_vectored_\name\()_rst_start: lbz r11,PACAIRQHAPPENED(r13) @@ -138,6 +129,7 @@ BEGIN_FTR_SECTION HMT_MEDIUM_LOW END_FTR_SECTION_IFSET(CPU_FTR_HAS_PPR) + SANITIZE_RESTORE_NVGPRS() cmpdi r3,0 bne .Lsyscall_vectored_\name\()_restore_regs @@ -148,17 +140,10 @@ END_FTR_SECTION_IFSET(CPU_FTR_HAS_PPR) /* Could zero these as per ABI, but we may consider a stricter ABI * which preserves these if libc implementations can benefit, so * restore them for now until further measurement is done. */ - ld r0,GPR0(r1) - ld r4,GPR4(r1) - ld r5,GPR5(r1) - ld r6,GPR6(r1) - ld r7,GPR7(r1) - ld r8,GPR8(r1) + REST_GPR(0, r1) + REST_GPRS(4, 8, r1) /* Zero volatile regs that may contain sensitive kernel data */ - li r9,0 - li r10,0 - li r11,0 - li r12,0 + ZEROIZE_GPRS(9, 12) mtspr SPRN_XER,r0 /* @@ -180,8 +165,8 @@ END_FTR_SECTION_IFSET(CPU_FTR_HAS_PPR) ld r4,_LINK(r1) ld r5,_XER(r1) - REST_NVGPRS(r1) - ld r0,GPR0(r1) + HANDLER_RESTORE_NVGPRS() + REST_GPR(0, r1) mtcr r2 mtctr r3 mtlr r4 @@ -195,12 +180,12 @@ syscall_vectored_\name\()_restart: _ASM_NOKPROBE_SYMBOL(syscall_vectored_\name\()_restart) GET_PACA(r13) ld r1,PACA_EXIT_SAVE_R1(r13) - ld r2,PACATOC(r13) + LOAD_PACA_TOC() ld r3,RESULT(r1) - addi r4,r1,STACK_FRAME_OVERHEAD + addi r4,r1,STACK_INT_FRAME_REGS li r11,IRQS_ALL_DISABLED stb r11,PACAIRQSOFTMASK(r13) - bl syscall_exit_restart + bl CFUNC(syscall_exit_restart) std r1,PACA_EXIT_SAVE_R1(r13) /* save r1 for restart */ b .Lsyscall_vectored_\name\()_rst_start 1: @@ -240,21 +225,16 @@ _ASM_NOKPROBE_SYMBOL(system_call_common) std r0,GPR0(r1) std r10,GPR1(r1) std r2,GPR2(r1) -#ifdef CONFIG_PPC_FSL_BOOK3E +#ifdef CONFIG_PPC_E500 START_BTB_FLUSH_SECTION BTB_FLUSH(r10) END_BTB_FLUSH_SECTION #endif - ld r2,PACATOC(r13) + LOAD_PACA_TOC() mfcr r12 li r11,0 - /* Can we avoid saving r3-r8 in common case? */ - std r3,GPR3(r1) - std r4,GPR4(r1) - std r5,GPR5(r1) - std r6,GPR6(r1) - std r7,GPR7(r1) - std r8,GPR8(r1) + /* Save syscall parameters in r3-r8 */ + SAVE_GPRS(3, 8, r1) /* Zero r9-r12, this should only be required when restoring all GPRs */ std r11,GPR9(r1) std r11,GPR10(r1) @@ -275,9 +255,12 @@ END_BTB_FLUSH_SECTION std r10,_LINK(r1) std r11,_TRAP(r1) std r12,_CCR(r1) - addi r10,r1,STACK_FRAME_OVERHEAD - ld r11,exception_marker@toc(r2) - std r11,-16(r10) /* "regshere" marker */ + std r3,ORIG_GPR3(r1) + LOAD_REG_IMMEDIATE(r11, STACK_FRAME_REGS_MARKER) + std r11,STACK_INT_FRAME_MARKER(r1) /* "regs" marker */ + /* Calling convention has r3 = regs, r4 = orig r0 */ + addi r3,r1,STACK_INT_FRAME_REGS + mr r4,r0 #ifdef CONFIG_PPC_BOOK3S li r11,1 @@ -298,14 +281,17 @@ END_BTB_FLUSH_SECTION wrteei 1 #endif - /* Calling convention has r9 = orig r0, r10 = regs */ - mr r9,r0 - bl system_call_exception + /* + * Zero user registers to prevent influencing speculative execution + * state of kernel code. + */ + SANITIZE_SYSCALL_GPRS() + bl CFUNC(system_call_exception) .Lsyscall_exit: - addi r4,r1,STACK_FRAME_OVERHEAD + addi r4,r1,STACK_INT_FRAME_REGS li r5,0 /* !scv */ - bl syscall_exit_prepare + bl CFUNC(syscall_exit_prepare) std r1,PACA_EXIT_SAVE_R1(r13) /* save r1 for restart */ #ifdef CONFIG_PPC_BOOK3S .Lsyscall_rst_start: @@ -340,19 +326,12 @@ BEGIN_FTR_SECTION stdcx. r0,0,r1 /* to clear the reservation */ END_FTR_SECTION_IFCLR(CPU_FTR_STCX_CHECKS_ADDRESS) + SANITIZE_RESTORE_NVGPRS() cmpdi r3,0 bne .Lsyscall_restore_regs /* Zero volatile regs that may contain sensitive kernel data */ - li r0,0 - li r4,0 - li r5,0 - li r6,0 - li r7,0 - li r8,0 - li r9,0 - li r10,0 - li r11,0 - li r12,0 + ZEROIZE_GPR(0) + ZEROIZE_GPRS(4, 12) mtctr r0 mtspr SPRN_XER,r0 .Lsyscall_restore_regs_cont: @@ -375,10 +354,10 @@ END_FTR_SECTION_IFSET(CPU_FTR_HAS_PPR) .Lsyscall_restore_regs: ld r3,_CTR(r1) ld r4,_XER(r1) - REST_NVGPRS(r1) + HANDLER_RESTORE_NVGPRS() mtctr r3 mtspr SPRN_XER,r4 - ld r0,GPR0(r1) + REST_GPR(0, r1) REST_GPRS(4, 12, r1) b .Lsyscall_restore_regs_cont .Lsyscall_rst_end: @@ -388,12 +367,12 @@ syscall_restart: _ASM_NOKPROBE_SYMBOL(syscall_restart) GET_PACA(r13) ld r1,PACA_EXIT_SAVE_R1(r13) - ld r2,PACATOC(r13) + LOAD_PACA_TOC() ld r3,RESULT(r1) - addi r4,r1,STACK_FRAME_OVERHEAD + addi r4,r1,STACK_INT_FRAME_REGS li r11,IRQS_ALL_DISABLED stb r11,PACAIRQSOFTMASK(r13) - bl syscall_exit_restart + bl CFUNC(syscall_exit_restart) std r1,PACA_EXIT_SAVE_R1(r13) /* save r1 for restart */ b .Lsyscall_rst_start 1: @@ -421,8 +400,8 @@ _ASM_NOKPROBE_SYMBOL(fast_interrupt_return_srr) andi. r0,r5,MSR_RI li r3,0 /* 0 return value, no EMULATE_STACK_STORE */ bne+ .Lfast_kernel_interrupt_return_srr - addi r3,r1,STACK_FRAME_OVERHEAD - bl unrecoverable_exception + addi r3,r1,STACK_INT_FRAME_REGS + bl CFUNC(unrecoverable_exception) b . /* should not get here */ #else bne .Lfast_user_interrupt_return_srr @@ -439,11 +418,13 @@ _ASM_NOKPROBE_SYMBOL(interrupt_return_\srr\()) beq interrupt_return_\srr\()_kernel interrupt_return_\srr\()_user: /* make backtraces match the _kernel variant */ _ASM_NOKPROBE_SYMBOL(interrupt_return_\srr\()_user) - addi r3,r1,STACK_FRAME_OVERHEAD - bl interrupt_exit_user_prepare + addi r3,r1,STACK_INT_FRAME_REGS + bl CFUNC(interrupt_exit_user_prepare) +#ifndef CONFIG_INTERRUPT_SANITIZE_REGISTERS cmpdi r3,0 bne- .Lrestore_nvgprs_\srr .Lrestore_nvgprs_\srr\()_cont: +#endif std r1,PACA_EXIT_SAVE_R1(r13) /* save r1 for restart */ #ifdef CONFIG_PPC_BOOK3S .Linterrupt_return_\srr\()_user_rst_start: @@ -457,6 +438,7 @@ _ASM_NOKPROBE_SYMBOL(interrupt_return_\srr\()_user) stb r11,PACAIRQHAPPENED(r13) # clear out possible HARD_DIS .Lfast_user_interrupt_return_\srr\(): + SANITIZE_RESTORE_NVGPRS() #ifdef CONFIG_PPC_BOOK3S .ifc \srr,srr lbz r4,PACASRR_VALID(r13) @@ -526,20 +508,22 @@ ALT_FTR_SECTION_END_IFCLR(CPU_FTR_STCX_CHECKS_ADDRESS) b . /* prevent speculative execution */ .Linterrupt_return_\srr\()_user_rst_end: +#ifndef CONFIG_INTERRUPT_SANITIZE_REGISTERS .Lrestore_nvgprs_\srr\(): REST_NVGPRS(r1) b .Lrestore_nvgprs_\srr\()_cont +#endif #ifdef CONFIG_PPC_BOOK3S interrupt_return_\srr\()_user_restart: _ASM_NOKPROBE_SYMBOL(interrupt_return_\srr\()_user_restart) GET_PACA(r13) ld r1,PACA_EXIT_SAVE_R1(r13) - ld r2,PACATOC(r13) - addi r3,r1,STACK_FRAME_OVERHEAD + LOAD_PACA_TOC() + addi r3,r1,STACK_INT_FRAME_REGS li r11,IRQS_ALL_DISABLED stb r11,PACAIRQSOFTMASK(r13) - bl interrupt_exit_user_restart + bl CFUNC(interrupt_exit_user_restart) std r1,PACA_EXIT_SAVE_R1(r13) /* save r1 for restart */ b .Linterrupt_return_\srr\()_user_rst_start 1: @@ -551,25 +535,74 @@ RESTART_TABLE(.Linterrupt_return_\srr\()_user_rst_start, .Linterrupt_return_\srr .balign IFETCH_ALIGN_BYTES interrupt_return_\srr\()_kernel: _ASM_NOKPROBE_SYMBOL(interrupt_return_\srr\()_kernel) - addi r3,r1,STACK_FRAME_OVERHEAD - bl interrupt_exit_kernel_prepare + addi r3,r1,STACK_INT_FRAME_REGS + bl CFUNC(interrupt_exit_kernel_prepare) std r1,PACA_EXIT_SAVE_R1(r13) /* save r1 for restart */ .Linterrupt_return_\srr\()_kernel_rst_start: ld r11,SOFTE(r1) cmpwi r11,IRQS_ENABLED stb r11,PACAIRQSOFTMASK(r13) - bne 1f + beq .Linterrupt_return_\srr\()_soft_enabled + + /* + * Returning to soft-disabled context. + * Check if a MUST_HARD_MASK interrupt has become pending, in which + * case we need to disable MSR[EE] in the return context. + * + * The MSR[EE] check catches among other things the short incoherency + * in hard_irq_disable() between clearing MSR[EE] and setting + * PACA_IRQ_HARD_DIS. + */ + ld r12,_MSR(r1) + andi. r10,r12,MSR_EE + beq .Lfast_kernel_interrupt_return_\srr\() // EE already disabled + lbz r11,PACAIRQHAPPENED(r13) + andi. r10,r11,PACA_IRQ_MUST_HARD_MASK + bne 1f // HARD_MASK is pending + // No HARD_MASK pending, clear possible HARD_DIS set by interrupt + andi. r11,r11,(~PACA_IRQ_HARD_DIS)@l + stb r11,PACAIRQHAPPENED(r13) + b .Lfast_kernel_interrupt_return_\srr\() + + +1: /* Must clear MSR_EE from _MSR */ +#ifdef CONFIG_PPC_BOOK3S + li r10,0 + /* Clear valid before changing _MSR */ + .ifc \srr,srr + stb r10,PACASRR_VALID(r13) + .else + stb r10,PACAHSRR_VALID(r13) + .endif +#endif + xori r12,r12,MSR_EE + std r12,_MSR(r1) + b .Lfast_kernel_interrupt_return_\srr\() + +.Linterrupt_return_\srr\()_soft_enabled: + /* + * In the soft-enabled case, need to double-check that we have no + * pending interrupts that might have come in before we reached the + * restart section of code, and restart the exit so those can be + * handled. + * + * If there are none, it is be possible that the interrupt still + * has PACA_IRQ_HARD_DIS set, which needs to be cleared for the + * interrupted context. This clear will not clobber a new pending + * interrupt coming in, because we're in the restart section, so + * such would return to the restart location. + */ #ifdef CONFIG_PPC_BOOK3S lbz r11,PACAIRQHAPPENED(r13) andi. r11,r11,(~PACA_IRQ_HARD_DIS)@l bne- interrupt_return_\srr\()_kernel_restart #endif li r11,0 - stb r11,PACAIRQHAPPENED(r13) # clear out possible HARD_DIS -1: + stb r11,PACAIRQHAPPENED(r13) // clear the possible HARD_DIS .Lfast_kernel_interrupt_return_\srr\(): + SANITIZE_RESTORE_NVGPRS() cmpdi cr1,r3,0 #ifdef CONFIG_PPC_BOOK3S .ifc \srr,srr @@ -619,10 +652,10 @@ ALT_FTR_SECTION_END_IFCLR(CPU_FTR_STCX_CHECKS_ADDRESS) mtspr SPRN_XER,r5 /* - * Leaving a stale exception_marker on the stack can confuse + * Leaving a stale STACK_FRAME_REGS_MARKER on the stack can confuse * the reliable stack unwinder later on. Clear it. */ - std r0,STACK_FRAME_OVERHEAD-16(r1) + std r0,STACK_INT_FRAME_MARKER(r1) REST_GPRS(2, 5, r1) @@ -668,11 +701,11 @@ interrupt_return_\srr\()_kernel_restart: _ASM_NOKPROBE_SYMBOL(interrupt_return_\srr\()_kernel_restart) GET_PACA(r13) ld r1,PACA_EXIT_SAVE_R1(r13) - ld r2,PACATOC(r13) - addi r3,r1,STACK_FRAME_OVERHEAD + LOAD_PACA_TOC() + addi r3,r1,STACK_INT_FRAME_REGS li r11,IRQS_ALL_DISABLED stb r11,PACAIRQSOFTMASK(r13) - bl interrupt_exit_kernel_restart + bl CFUNC(interrupt_exit_kernel_restart) std r1,PACA_EXIT_SAVE_R1(r13) /* save r1 for restart */ b .Linterrupt_return_\srr\()_kernel_rst_start 1: @@ -694,21 +727,20 @@ DEFINE_FIXED_SYMBOL(__end_soft_masked, text) #ifdef CONFIG_PPC_BOOK3S _GLOBAL(ret_from_fork_scv) - bl schedule_tail - REST_NVGPRS(r1) + bl CFUNC(schedule_tail) + HANDLER_RESTORE_NVGPRS() li r3,0 /* fork() return value */ b .Lsyscall_vectored_common_exit #endif _GLOBAL(ret_from_fork) - bl schedule_tail - REST_NVGPRS(r1) + bl CFUNC(schedule_tail) + HANDLER_RESTORE_NVGPRS() li r3,0 /* fork() return value */ b .Lsyscall_exit -_GLOBAL(ret_from_kernel_thread) - bl schedule_tail - REST_NVGPRS(r1) +_GLOBAL(ret_from_kernel_user_thread) + bl CFUNC(schedule_tail) mtctr r14 mr r3,r15 #ifdef CONFIG_PPC64_ELF_ABI_V2 @@ -716,4 +748,25 @@ _GLOBAL(ret_from_kernel_thread) #endif bctrl li r3,0 + /* + * It does not matter whether this returns via the scv or sc path + * because it returns as execve() and therefore has no calling ABI + * (i.e., it sets registers according to the exec()ed entry point). + */ b .Lsyscall_exit + +_GLOBAL(start_kernel_thread) + bl CFUNC(schedule_tail) + mtctr r14 + mr r3,r15 +#ifdef CONFIG_PPC64_ELF_ABI_V2 + mr r12,r14 +#endif + bctrl + /* + * This must not return. We actually want to BUG here, not WARN, + * because BUG will exit the process which is what the kernel thread + * should have done, which may give some hope of continuing. + */ +100: trap + EMIT_BUG_ENTRY 100b,__FILE__,__LINE__,0 diff --git a/arch/powerpc/kernel/io-workarounds.c b/arch/powerpc/kernel/io-workarounds.c deleted file mode 100644 index c877f074d174..000000000000 --- a/arch/powerpc/kernel/io-workarounds.c +++ /dev/null @@ -1,197 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0-only -/* - * Support PCI IO workaround - * - * Copyright (C) 2006 Benjamin Herrenschmidt <benh@kernel.crashing.org> - * IBM, Corp. - * (C) Copyright 2007-2008 TOSHIBA CORPORATION - */ -#undef DEBUG - -#include <linux/kernel.h> -#include <linux/sched/mm.h> /* for init_mm */ -#include <linux/pgtable.h> - -#include <asm/io.h> -#include <asm/machdep.h> -#include <asm/ppc-pci.h> -#include <asm/io-workarounds.h> -#include <asm/pte-walk.h> - - -#define IOWA_MAX_BUS 8 - -static struct iowa_bus iowa_busses[IOWA_MAX_BUS]; -static unsigned int iowa_bus_count; - -static struct iowa_bus *iowa_pci_find(unsigned long vaddr, unsigned long paddr) -{ - int i, j; - struct resource *res; - unsigned long vstart, vend; - - for (i = 0; i < iowa_bus_count; i++) { - struct iowa_bus *bus = &iowa_busses[i]; - struct pci_controller *phb = bus->phb; - - if (vaddr) { - vstart = (unsigned long)phb->io_base_virt; - vend = vstart + phb->pci_io_size - 1; - if ((vaddr >= vstart) && (vaddr <= vend)) - return bus; - } - - if (paddr) - for (j = 0; j < 3; j++) { - res = &phb->mem_resources[j]; - if (paddr >= res->start && paddr <= res->end) - return bus; - } - } - - return NULL; -} - -#ifdef CONFIG_PPC_INDIRECT_MMIO -struct iowa_bus *iowa_mem_find_bus(const PCI_IO_ADDR addr) -{ - struct iowa_bus *bus; - int token; - - token = PCI_GET_ADDR_TOKEN(addr); - - if (token && token <= iowa_bus_count) - bus = &iowa_busses[token - 1]; - else { - unsigned long vaddr, paddr; - - vaddr = (unsigned long)PCI_FIX_ADDR(addr); - if (vaddr < PHB_IO_BASE || vaddr >= PHB_IO_END) - return NULL; - - paddr = ppc_find_vmap_phys(vaddr); - - bus = iowa_pci_find(vaddr, paddr); - - if (bus == NULL) - return NULL; - } - - return bus; -} -#else /* CONFIG_PPC_INDIRECT_MMIO */ -struct iowa_bus *iowa_mem_find_bus(const PCI_IO_ADDR addr) -{ - return NULL; -} -#endif /* !CONFIG_PPC_INDIRECT_MMIO */ - -#ifdef CONFIG_PPC_INDIRECT_PIO -struct iowa_bus *iowa_pio_find_bus(unsigned long port) -{ - unsigned long vaddr = (unsigned long)pci_io_base + port; - return iowa_pci_find(vaddr, 0); -} -#else -struct iowa_bus *iowa_pio_find_bus(unsigned long port) -{ - return NULL; -} -#endif - -#define DEF_PCI_AC_RET(name, ret, at, al, space, aa) \ -static ret iowa_##name at \ -{ \ - struct iowa_bus *bus; \ - bus = iowa_##space##_find_bus(aa); \ - if (bus && bus->ops && bus->ops->name) \ - return bus->ops->name al; \ - return __do_##name al; \ -} - -#define DEF_PCI_AC_NORET(name, at, al, space, aa) \ -static void iowa_##name at \ -{ \ - struct iowa_bus *bus; \ - bus = iowa_##space##_find_bus(aa); \ - if (bus && bus->ops && bus->ops->name) { \ - bus->ops->name al; \ - return; \ - } \ - __do_##name al; \ -} - -#include <asm/io-defs.h> - -#undef DEF_PCI_AC_RET -#undef DEF_PCI_AC_NORET - -static const struct ppc_pci_io iowa_pci_io = { - -#define DEF_PCI_AC_RET(name, ret, at, al, space, aa) .name = iowa_##name, -#define DEF_PCI_AC_NORET(name, at, al, space, aa) .name = iowa_##name, - -#include <asm/io-defs.h> - -#undef DEF_PCI_AC_RET -#undef DEF_PCI_AC_NORET - -}; - -#ifdef CONFIG_PPC_INDIRECT_MMIO -void __iomem *iowa_ioremap(phys_addr_t addr, unsigned long size, - pgprot_t prot, void *caller) -{ - struct iowa_bus *bus; - void __iomem *res = __ioremap_caller(addr, size, prot, caller); - int busno; - - bus = iowa_pci_find(0, (unsigned long)addr); - if (bus != NULL) { - busno = bus - iowa_busses; - PCI_SET_ADDR_TOKEN(res, busno + 1); - } - return res; -} -#endif /* !CONFIG_PPC_INDIRECT_MMIO */ - -bool io_workaround_inited; - -/* Enable IO workaround */ -static void io_workaround_init(void) -{ - if (io_workaround_inited) - return; - ppc_pci_io = iowa_pci_io; - io_workaround_inited = true; -} - -/* Register new bus to support workaround */ -void iowa_register_bus(struct pci_controller *phb, struct ppc_pci_io *ops, - int (*initfunc)(struct iowa_bus *, void *), void *data) -{ - struct iowa_bus *bus; - struct device_node *np = phb->dn; - - io_workaround_init(); - - if (iowa_bus_count >= IOWA_MAX_BUS) { - pr_err("IOWA:Too many pci bridges, " - "workarounds disabled for %pOF\n", np); - return; - } - - bus = &iowa_busses[iowa_bus_count]; - bus->phb = phb; - bus->ops = ops; - bus->private = data; - - if (initfunc) - if ((*initfunc)(bus, data)) - return; - - iowa_bus_count++; - - pr_debug("IOWA:[%d]Add bus, %pOF.\n", iowa_bus_count-1, np); -} - diff --git a/arch/powerpc/kernel/io.c b/arch/powerpc/kernel/io.c index 2f29b7d432de..bcc201c01514 100644 --- a/arch/powerpc/kernel/io.c +++ b/arch/powerpc/kernel/io.c @@ -31,13 +31,14 @@ void _insb(const volatile u8 __iomem *port, void *buf, long count) if (unlikely(count <= 0)) return; - asm volatile("sync"); + + mb(); do { - tmp = *port; + tmp = *(const volatile u8 __force *)port; eieio(); *tbuf++ = tmp; } while (--count != 0); - asm volatile("twi 0,%0,0; isync" : : "r" (tmp)); + data_barrier(tmp); } EXPORT_SYMBOL(_insb); @@ -47,75 +48,80 @@ void _outsb(volatile u8 __iomem *port, const void *buf, long count) if (unlikely(count <= 0)) return; - asm volatile("sync"); + + mb(); do { - *port = *tbuf++; + *(volatile u8 __force *)port = *tbuf++; } while (--count != 0); - asm volatile("sync"); + mb(); } EXPORT_SYMBOL(_outsb); -void _insw_ns(const volatile u16 __iomem *port, void *buf, long count) +void _insw(const volatile u16 __iomem *port, void *buf, long count) { u16 *tbuf = buf; u16 tmp; if (unlikely(count <= 0)) return; - asm volatile("sync"); + + mb(); do { - tmp = *port; + tmp = *(const volatile u16 __force *)port; eieio(); *tbuf++ = tmp; } while (--count != 0); - asm volatile("twi 0,%0,0; isync" : : "r" (tmp)); + data_barrier(tmp); } -EXPORT_SYMBOL(_insw_ns); +EXPORT_SYMBOL(_insw); -void _outsw_ns(volatile u16 __iomem *port, const void *buf, long count) +void _outsw(volatile u16 __iomem *port, const void *buf, long count) { const u16 *tbuf = buf; if (unlikely(count <= 0)) return; - asm volatile("sync"); + + mb(); do { - *port = *tbuf++; + *(volatile u16 __force *)port = *tbuf++; } while (--count != 0); - asm volatile("sync"); + mb(); } -EXPORT_SYMBOL(_outsw_ns); +EXPORT_SYMBOL(_outsw); -void _insl_ns(const volatile u32 __iomem *port, void *buf, long count) +void _insl(const volatile u32 __iomem *port, void *buf, long count) { u32 *tbuf = buf; u32 tmp; if (unlikely(count <= 0)) return; - asm volatile("sync"); + + mb(); do { - tmp = *port; + tmp = *(const volatile u32 __force *)port; eieio(); *tbuf++ = tmp; } while (--count != 0); - asm volatile("twi 0,%0,0; isync" : : "r" (tmp)); + data_barrier(tmp); } -EXPORT_SYMBOL(_insl_ns); +EXPORT_SYMBOL(_insl); -void _outsl_ns(volatile u32 __iomem *port, const void *buf, long count) +void _outsl(volatile u32 __iomem *port, const void *buf, long count) { const u32 *tbuf = buf; if (unlikely(count <= 0)) return; - asm volatile("sync"); + + mb(); do { - *port = *tbuf++; + *(volatile u32 __force *)port = *tbuf++; } while (--count != 0); - asm volatile("sync"); + mb(); } -EXPORT_SYMBOL(_outsl_ns); +EXPORT_SYMBOL(_outsl); #define IO_CHECK_ALIGN(v,a) ((((unsigned long)(v)) & ((a) - 1)) == 0) @@ -127,7 +133,7 @@ _memset_io(volatile void __iomem *addr, int c, unsigned long n) lc |= lc << 8; lc |= lc << 16; - __asm__ __volatile__ ("sync" : : : "memory"); + mb(); while(n && !IO_CHECK_ALIGN(p, 4)) { *((volatile u8 *)p) = c; p++; @@ -143,7 +149,7 @@ _memset_io(volatile void __iomem *addr, int c, unsigned long n) p++; n--; } - __asm__ __volatile__ ("sync" : : : "memory"); + mb(); } EXPORT_SYMBOL(_memset_io); @@ -152,7 +158,7 @@ void _memcpy_fromio(void *dest, const volatile void __iomem *src, { void *vsrc = (void __force *) src; - __asm__ __volatile__ ("sync" : : : "memory"); + mb(); while(n && (!IO_CHECK_ALIGN(vsrc, 4) || !IO_CHECK_ALIGN(dest, 4))) { *((u8 *)dest) = *((volatile u8 *)vsrc); eieio(); @@ -174,7 +180,7 @@ void _memcpy_fromio(void *dest, const volatile void __iomem *src, dest++; n--; } - __asm__ __volatile__ ("sync" : : : "memory"); + mb(); } EXPORT_SYMBOL(_memcpy_fromio); @@ -182,7 +188,7 @@ void _memcpy_toio(volatile void __iomem *dest, const void *src, unsigned long n) { void *vdest = (void __force *) dest; - __asm__ __volatile__ ("sync" : : : "memory"); + mb(); while(n && (!IO_CHECK_ALIGN(vdest, 4) || !IO_CHECK_ALIGN(src, 4))) { *((volatile u8 *)vdest) = *((u8 *)src); src++; @@ -201,6 +207,6 @@ void _memcpy_toio(volatile void __iomem *dest, const void *src, unsigned long n) vdest++; n--; } - __asm__ __volatile__ ("sync" : : : "memory"); + mb(); } EXPORT_SYMBOL(_memcpy_toio); diff --git a/arch/powerpc/kernel/iommu.c b/arch/powerpc/kernel/iommu.c index 7e56ddb3e0b9..244eb4857e7f 100644 --- a/arch/powerpc/kernel/iommu.c +++ b/arch/powerpc/kernel/iommu.c @@ -16,6 +16,7 @@ #include <linux/mm.h> #include <linux/spinlock.h> #include <linux/string.h> +#include <linux/string_choices.h> #include <linux/dma-mapping.h> #include <linux/bitmap.h> #include <linux/iommu-helper.h> @@ -26,6 +27,7 @@ #include <linux/iommu.h> #include <linux/sched.h> #include <linux/debugfs.h> +#include <linux/vmalloc.h> #include <asm/io.h> #include <asm/iommu.h> #include <asm/pci-bridge.h> @@ -35,6 +37,7 @@ #include <asm/vio.h> #include <asm/tce.h> #include <asm/mmu_context.h> +#include <asm/ppc-pci.h> #define DBG(...) @@ -67,11 +70,9 @@ static void iommu_debugfs_add(struct iommu_table *tbl) static void iommu_debugfs_del(struct iommu_table *tbl) { char name[10]; - struct dentry *liobn_entry; sprintf(name, "%08lx", tbl->it_index); - liobn_entry = debugfs_lookup(name, iommu_debugfs_dir); - debugfs_remove(liobn_entry); + debugfs_lookup_and_remove(name, iommu_debugfs_dir); } #else static void iommu_debugfs_add(struct iommu_table *tbl){} @@ -173,17 +174,28 @@ static int fail_iommu_bus_notify(struct notifier_block *nb, return 0; } -static struct notifier_block fail_iommu_bus_notifier = { +/* + * PCI and VIO buses need separate notifier_block structs, since they're linked + * list nodes. Sharing a notifier_block would mean that any notifiers later + * registered for PCI buses would also get called by VIO buses and vice versa. + */ +static struct notifier_block fail_iommu_pci_bus_notifier = { + .notifier_call = fail_iommu_bus_notify +}; + +#ifdef CONFIG_IBMVIO +static struct notifier_block fail_iommu_vio_bus_notifier = { .notifier_call = fail_iommu_bus_notify }; +#endif static int __init fail_iommu_setup(void) { #ifdef CONFIG_PCI - bus_register_notifier(&pci_bus_type, &fail_iommu_bus_notifier); + bus_register_notifier(&pci_bus_type, &fail_iommu_pci_bus_notifier); #endif #ifdef CONFIG_IBMVIO - bus_register_notifier(&vio_bus_type, &fail_iommu_bus_notifier); + bus_register_notifier(&vio_bus_type, &fail_iommu_vio_bus_notifier); #endif return 0; @@ -519,7 +531,7 @@ int ppc_iommu_map_sg(struct device *dev, struct iommu_table *tbl, /* Convert entry to a dma_addr_t */ entry += tbl->it_offset; dma_addr = entry << tbl->it_page_shift; - dma_addr |= (s->offset & ~IOMMU_PAGE_MASK(tbl)); + dma_addr |= (vaddr & ~IOMMU_PAGE_MASK(tbl)); DBG(" - %lu pages, entry: %lx, dma_addr: %lx\n", npages, entry, dma_addr); @@ -632,7 +644,7 @@ void ppc_iommu_unmap_sg(struct iommu_table *tbl, struct scatterlist *sglist, tbl->it_ops->flush(tbl); } -static void iommu_table_clear(struct iommu_table *tbl) +void iommu_table_clear(struct iommu_table *tbl) { /* * In case of firmware assisted dump system goes through clean @@ -673,10 +685,10 @@ static void iommu_table_clear(struct iommu_table *tbl) #endif } -static void iommu_table_reserve_pages(struct iommu_table *tbl, +void iommu_table_reserve_pages(struct iommu_table *tbl, unsigned long res_start, unsigned long res_end) { - int i; + unsigned long i; WARN_ON_ONCE(res_end < res_start); /* @@ -758,8 +770,8 @@ struct iommu_table *iommu_init_table(struct iommu_table *tbl, int nid, iommu_table_clear(tbl); if (!welcomed) { - printk(KERN_INFO "IOMMU table initialized, virtual merging %s\n", - novmerge ? "disabled" : "enabled"); + pr_info("IOMMU table initialized, virtual merging %s\n", + str_disabled_enabled(novmerge)); welcomed = 1; } @@ -775,6 +787,11 @@ bool iommu_table_in_use(struct iommu_table *tbl) /* ignore reserved bit0 */ if (tbl->it_offset == 0) start = 1; + + /* Simple case with no reserved MMIO32 region */ + if (!tbl->it_reserved_start && !tbl->it_reserved_end) + return find_next_bit(tbl->it_map, tbl->it_size, start) != tbl->it_size; + end = tbl->it_reserved_start - tbl->it_offset; if (find_next_bit(tbl->it_map, end, start) != end) return true; @@ -901,6 +918,7 @@ void *iommu_alloc_coherent(struct device *dev, struct iommu_table *tbl, unsigned int order; unsigned int nio_pages, io_order; struct page *page; + int tcesize = (1 << tbl->it_page_shift); size = PAGE_ALIGN(size); order = get_order(size); @@ -927,7 +945,8 @@ void *iommu_alloc_coherent(struct device *dev, struct iommu_table *tbl, memset(ret, 0, size); /* Set up tces to cover the allocated range */ - nio_pages = size >> tbl->it_page_shift; + nio_pages = IOMMU_PAGE_ALIGN(size, tbl) >> tbl->it_page_shift; + io_order = get_iommu_order(size, tbl); mapping = iommu_alloc(dev, tbl, ret, nio_pages, DMA_BIDIRECTIONAL, mask >> tbl->it_page_shift, io_order, 0); @@ -935,7 +954,8 @@ void *iommu_alloc_coherent(struct device *dev, struct iommu_table *tbl, free_pages((unsigned long)ret, order); return NULL; } - *dma_handle = mapping; + + *dma_handle = mapping | ((u64)ret & (tcesize - 1)); return ret; } @@ -946,7 +966,7 @@ void iommu_free_coherent(struct iommu_table *tbl, size_t size, unsigned int nio_pages; size = PAGE_ALIGN(size); - nio_pages = size >> tbl->it_page_shift; + nio_pages = IOMMU_PAGE_ALIGN(size, tbl) >> tbl->it_page_shift; iommu_free(tbl, dma_handle, nio_pages); size = PAGE_ALIGN(size); free_pages((unsigned long)vaddr, get_order(size)); @@ -969,6 +989,23 @@ unsigned long iommu_direction_to_tce_perm(enum dma_data_direction dir) EXPORT_SYMBOL_GPL(iommu_direction_to_tce_perm); #ifdef CONFIG_IOMMU_API + +int dev_has_iommu_table(struct device *dev, void *data) +{ + struct pci_dev *pdev = to_pci_dev(dev); + struct pci_dev **ppdev = data; + + if (!dev) + return 0; + + if (device_iommu_mapped(dev)) { + *ppdev = pdev; + return 1; + } + + return 0; +} + /* * SPAPR TCE API */ @@ -1056,10 +1093,10 @@ int iommu_tce_check_gpa(unsigned long page_shift, unsigned long gpa) } EXPORT_SYMBOL_GPL(iommu_tce_check_gpa); -extern long iommu_tce_xchg_no_kill(struct mm_struct *mm, - struct iommu_table *tbl, - unsigned long entry, unsigned long *hpa, - enum dma_data_direction *direction) +long iommu_tce_xchg_no_kill(struct mm_struct *mm, + struct iommu_table *tbl, + unsigned long entry, unsigned long *hpa, + enum dma_data_direction *direction) { long ret; unsigned long size = 0; @@ -1083,59 +1120,6 @@ void iommu_tce_kill(struct iommu_table *tbl, } EXPORT_SYMBOL_GPL(iommu_tce_kill); -int iommu_take_ownership(struct iommu_table *tbl) -{ - unsigned long flags, i, sz = (tbl->it_size + 7) >> 3; - int ret = 0; - - /* - * VFIO does not control TCE entries allocation and the guest - * can write new TCEs on top of existing ones so iommu_tce_build() - * must be able to release old pages. This functionality - * requires exchange() callback defined so if it is not - * implemented, we disallow taking ownership over the table. - */ - if (!tbl->it_ops->xchg_no_kill) - return -EINVAL; - - spin_lock_irqsave(&tbl->large_pool.lock, flags); - for (i = 0; i < tbl->nr_pools; i++) - spin_lock_nest_lock(&tbl->pools[i].lock, &tbl->large_pool.lock); - - if (iommu_table_in_use(tbl)) { - pr_err("iommu_tce: it_map is not empty"); - ret = -EBUSY; - } else { - memset(tbl->it_map, 0xff, sz); - } - - for (i = 0; i < tbl->nr_pools; i++) - spin_unlock(&tbl->pools[i].lock); - spin_unlock_irqrestore(&tbl->large_pool.lock, flags); - - return ret; -} -EXPORT_SYMBOL_GPL(iommu_take_ownership); - -void iommu_release_ownership(struct iommu_table *tbl) -{ - unsigned long flags, i, sz = (tbl->it_size + 7) >> 3; - - spin_lock_irqsave(&tbl->large_pool.lock, flags); - for (i = 0; i < tbl->nr_pools; i++) - spin_lock_nest_lock(&tbl->pools[i].lock, &tbl->large_pool.lock); - - memset(tbl->it_map, 0, sz); - - iommu_table_reserve_pages(tbl, tbl->it_reserved_start, - tbl->it_reserved_end); - - for (i = 0; i < tbl->nr_pools; i++) - spin_unlock(&tbl->pools[i].lock); - spin_unlock_irqrestore(&tbl->large_pool.lock, flags); -} -EXPORT_SYMBOL_GPL(iommu_release_ownership); - int iommu_add_device(struct iommu_table_group *table_group, struct device *dev) { /* @@ -1155,25 +1139,179 @@ int iommu_add_device(struct iommu_table_group *table_group, struct device *dev) pr_debug("%s: Adding %s to iommu group %d\n", __func__, dev_name(dev), iommu_group_id(table_group->group)); - - return iommu_group_add_device(table_group->group, dev); + /* + * This is still not adding devices via the IOMMU bus notifier because + * of pcibios_init() from arch/powerpc/kernel/pci_64.c which calls + * pcibios_scan_phb() first (and this guy adds devices and triggers + * the notifier) and only then it calls pci_bus_add_devices() which + * configures DMA for buses which also creates PEs and IOMMU groups. + */ + return iommu_probe_device(dev); } EXPORT_SYMBOL_GPL(iommu_add_device); -void iommu_del_device(struct device *dev) +#if defined(CONFIG_PPC_PSERIES) || defined(CONFIG_PPC_POWERNV) +/* + * A simple iommu_ops to allow less cruft in generic VFIO code. + */ +static int +spapr_tce_platform_iommu_attach_dev(struct iommu_domain *platform_domain, + struct device *dev) { + struct iommu_domain *domain = iommu_get_domain_for_dev(dev); + struct iommu_table_group *table_group; + struct iommu_group *grp; + + /* At first attach the ownership is already set */ + if (!domain) + return 0; + + grp = iommu_group_get(dev); + table_group = iommu_group_get_iommudata(grp); /* - * Some devices might not have IOMMU table and group - * and we needn't detach them from the associated - * IOMMU groups + * The domain being set to PLATFORM from earlier + * BLOCKED. The table_group ownership has to be released. */ - if (!device_iommu_mapped(dev)) { - pr_debug("iommu_tce: skipping device %s with no tbl\n", - dev_name(dev)); - return; + table_group->ops->release_ownership(table_group, dev); + iommu_group_put(grp); + + return 0; +} + +static const struct iommu_domain_ops spapr_tce_platform_domain_ops = { + .attach_dev = spapr_tce_platform_iommu_attach_dev, +}; + +static struct iommu_domain spapr_tce_platform_domain = { + .type = IOMMU_DOMAIN_PLATFORM, + .ops = &spapr_tce_platform_domain_ops, +}; + +static int +spapr_tce_blocked_iommu_attach_dev(struct iommu_domain *platform_domain, + struct device *dev) +{ + struct iommu_group *grp = iommu_group_get(dev); + struct iommu_table_group *table_group; + int ret = -EINVAL; + + /* + * FIXME: SPAPR mixes blocked and platform behaviors, the blocked domain + * also sets the dma_api ops + */ + table_group = iommu_group_get_iommudata(grp); + ret = table_group->ops->take_ownership(table_group, dev); + iommu_group_put(grp); + + return ret; +} + +static const struct iommu_domain_ops spapr_tce_blocked_domain_ops = { + .attach_dev = spapr_tce_blocked_iommu_attach_dev, +}; + +static struct iommu_domain spapr_tce_blocked_domain = { + .type = IOMMU_DOMAIN_BLOCKED, + .ops = &spapr_tce_blocked_domain_ops, +}; + +static bool spapr_tce_iommu_capable(struct device *dev, enum iommu_cap cap) +{ + switch (cap) { + case IOMMU_CAP_CACHE_COHERENCY: + return true; + default: + break; } - iommu_group_remove_device(dev); + return false; } -EXPORT_SYMBOL_GPL(iommu_del_device); + +static struct iommu_device *spapr_tce_iommu_probe_device(struct device *dev) +{ + struct pci_dev *pdev; + struct pci_controller *hose; + + if (!dev_is_pci(dev)) + return ERR_PTR(-ENODEV); + + pdev = to_pci_dev(dev); + hose = pdev->bus->sysdata; + + return &hose->iommu; +} + +static void spapr_tce_iommu_release_device(struct device *dev) +{ +} + +static struct iommu_group *spapr_tce_iommu_device_group(struct device *dev) +{ + struct pci_controller *hose; + struct pci_dev *pdev; + + pdev = to_pci_dev(dev); + hose = pdev->bus->sysdata; + + if (!hose->controller_ops.device_group) + return ERR_PTR(-ENOENT); + + return hose->controller_ops.device_group(hose, pdev); +} + +static const struct iommu_ops spapr_tce_iommu_ops = { + .default_domain = &spapr_tce_platform_domain, + .blocked_domain = &spapr_tce_blocked_domain, + .capable = spapr_tce_iommu_capable, + .probe_device = spapr_tce_iommu_probe_device, + .release_device = spapr_tce_iommu_release_device, + .device_group = spapr_tce_iommu_device_group, +}; + +static struct attribute *spapr_tce_iommu_attrs[] = { + NULL, +}; + +static struct attribute_group spapr_tce_iommu_group = { + .name = "spapr-tce-iommu", + .attrs = spapr_tce_iommu_attrs, +}; + +static const struct attribute_group *spapr_tce_iommu_groups[] = { + &spapr_tce_iommu_group, + NULL, +}; + +void ppc_iommu_register_device(struct pci_controller *phb) +{ + iommu_device_sysfs_add(&phb->iommu, phb->parent, + spapr_tce_iommu_groups, "iommu-phb%04x", + phb->global_number); + iommu_device_register(&phb->iommu, &spapr_tce_iommu_ops, + phb->parent); +} + +void ppc_iommu_unregister_device(struct pci_controller *phb) +{ + iommu_device_unregister(&phb->iommu); + iommu_device_sysfs_remove(&phb->iommu); +} + +/* + * This registers IOMMU devices of PHBs. This needs to happen + * after core_initcall(iommu_init) + postcore_initcall(pci_driver_init) and + * before subsys_initcall(iommu_subsys_init). + */ +static int __init spapr_tce_setup_phb_iommus_initcall(void) +{ + struct pci_controller *hose; + + list_for_each_entry(hose, &hose_list, list_node) { + ppc_iommu_register_device(hose); + } + return 0; +} +postcore_initcall_sync(spapr_tce_setup_phb_iommus_initcall); +#endif + #endif /* CONFIG_IOMMU_API */ diff --git a/arch/powerpc/kernel/irq.c b/arch/powerpc/kernel/irq.c index dd09919c3c66..a0e8b998c9b5 100644 --- a/arch/powerpc/kernel/irq.c +++ b/arch/powerpc/kernel/irq.c @@ -65,13 +65,8 @@ #include <asm/smp.h> #include <asm/hw_irq.h> #include <asm/softirq_stack.h> +#include <asm/ppc_asm.h> -#ifdef CONFIG_PPC64 -#include <asm/paca.h> -#include <asm/firmware.h> -#include <asm/lv1call.h> -#include <asm/dbell.h> -#endif #define CREATE_TRACE_POINTS #include <asm/trace.h> #include <asm/cpu_has_feature.h> @@ -88,480 +83,75 @@ u32 tau_interrupts(unsigned long cpu); #endif #endif /* CONFIG_PPC32 */ -#ifdef CONFIG_PPC64 - -int distribute_irqs = 1; - -static inline notrace unsigned long get_irq_happened(void) -{ - unsigned long happened; - - __asm__ __volatile__("lbz %0,%1(13)" - : "=r" (happened) : "i" (offsetof(struct paca_struct, irq_happened))); - - return happened; -} - -void replay_soft_interrupts(void) -{ - struct pt_regs regs; - - /* - * Be careful here, calling these interrupt handlers can cause - * softirqs to be raised, which they may run when calling irq_exit, - * which will cause local_irq_enable() to be run, which can then - * recurse into this function. Don't keep any state across - * interrupt handler calls which may change underneath us. - * - * We use local_paca rather than get_paca() to avoid all the - * debug_smp_processor_id() business in this low level function. - */ - - ppc_save_regs(®s); - regs.softe = IRQS_ENABLED; - regs.msr |= MSR_EE; - -again: - if (IS_ENABLED(CONFIG_PPC_IRQ_SOFT_MASK_DEBUG)) - WARN_ON_ONCE(mfmsr() & MSR_EE); - - /* - * Force the delivery of pending soft-disabled interrupts on PS3. - * Any HV call will have this side effect. - */ - if (firmware_has_feature(FW_FEATURE_PS3_LV1)) { - u64 tmp, tmp2; - lv1_get_version_info(&tmp, &tmp2); - } - - /* - * Check if an hypervisor Maintenance interrupt happened. - * This is a higher priority interrupt than the others, so - * replay it first. - */ - if (IS_ENABLED(CONFIG_PPC_BOOK3S) && (local_paca->irq_happened & PACA_IRQ_HMI)) { - local_paca->irq_happened &= ~PACA_IRQ_HMI; - regs.trap = INTERRUPT_HMI; - handle_hmi_exception(®s); - if (!(local_paca->irq_happened & PACA_IRQ_HARD_DIS)) - hard_irq_disable(); - } - - if (local_paca->irq_happened & PACA_IRQ_DEC) { - local_paca->irq_happened &= ~PACA_IRQ_DEC; - regs.trap = INTERRUPT_DECREMENTER; - timer_interrupt(®s); - if (!(local_paca->irq_happened & PACA_IRQ_HARD_DIS)) - hard_irq_disable(); - } - - if (local_paca->irq_happened & PACA_IRQ_EE) { - local_paca->irq_happened &= ~PACA_IRQ_EE; - regs.trap = INTERRUPT_EXTERNAL; - do_IRQ(®s); - if (!(local_paca->irq_happened & PACA_IRQ_HARD_DIS)) - hard_irq_disable(); - } - - if (IS_ENABLED(CONFIG_PPC_DOORBELL) && (local_paca->irq_happened & PACA_IRQ_DBELL)) { - local_paca->irq_happened &= ~PACA_IRQ_DBELL; - regs.trap = INTERRUPT_DOORBELL; - doorbell_exception(®s); - if (!(local_paca->irq_happened & PACA_IRQ_HARD_DIS)) - hard_irq_disable(); - } - - /* Book3E does not support soft-masking PMI interrupts */ - if (IS_ENABLED(CONFIG_PPC_BOOK3S) && (local_paca->irq_happened & PACA_IRQ_PMI)) { - local_paca->irq_happened &= ~PACA_IRQ_PMI; - regs.trap = INTERRUPT_PERFMON; - performance_monitor_exception(®s); - if (!(local_paca->irq_happened & PACA_IRQ_HARD_DIS)) - hard_irq_disable(); - } - - if (local_paca->irq_happened & ~PACA_IRQ_HARD_DIS) { - /* - * We are responding to the next interrupt, so interrupt-off - * latencies should be reset here. - */ - trace_hardirqs_on(); - trace_hardirqs_off(); - goto again; - } -} - -#if defined(CONFIG_PPC_BOOK3S_64) && defined(CONFIG_PPC_KUAP) -static inline void replay_soft_interrupts_irqrestore(void) -{ - unsigned long kuap_state = get_kuap(); - - /* - * Check if anything calls local_irq_enable/restore() when KUAP is - * disabled (user access enabled). We handle that case here by saving - * and re-locking AMR but we shouldn't get here in the first place, - * hence the warning. - */ - kuap_assert_locked(); - - if (kuap_state != AMR_KUAP_BLOCKED) - set_kuap(AMR_KUAP_BLOCKED); - - replay_soft_interrupts(); - - if (kuap_state != AMR_KUAP_BLOCKED) - set_kuap(kuap_state); -} -#else -#define replay_soft_interrupts_irqrestore() replay_soft_interrupts() -#endif - -notrace void arch_local_irq_restore(unsigned long mask) -{ - unsigned char irq_happened; - - /* Write the new soft-enabled value if it is a disable */ - if (mask) { - irq_soft_mask_set(mask); - return; - } - - if (IS_ENABLED(CONFIG_PPC_IRQ_SOFT_MASK_DEBUG)) - WARN_ON_ONCE(in_nmi() || in_hardirq()); - - /* - * After the stb, interrupts are unmasked and there are no interrupts - * pending replay. The restart sequence makes this atomic with - * respect to soft-masked interrupts. If this was just a simple code - * sequence, a soft-masked interrupt could become pending right after - * the comparison and before the stb. - * - * This allows interrupts to be unmasked without hard disabling, and - * also without new hard interrupts coming in ahead of pending ones. - */ - asm_volatile_goto( -"1: \n" -" lbz 9,%0(13) \n" -" cmpwi 9,0 \n" -" bne %l[happened] \n" -" stb 9,%1(13) \n" -"2: \n" - RESTART_TABLE(1b, 2b, 1b) - : : "i" (offsetof(struct paca_struct, irq_happened)), - "i" (offsetof(struct paca_struct, irq_soft_mask)) - : "cr0", "r9" - : happened); - - if (IS_ENABLED(CONFIG_PPC_IRQ_SOFT_MASK_DEBUG)) - WARN_ON_ONCE(!(mfmsr() & MSR_EE)); - - return; - -happened: - irq_happened = get_irq_happened(); - if (IS_ENABLED(CONFIG_PPC_IRQ_SOFT_MASK_DEBUG)) - WARN_ON_ONCE(!irq_happened); - - if (irq_happened == PACA_IRQ_HARD_DIS) { - if (IS_ENABLED(CONFIG_PPC_IRQ_SOFT_MASK_DEBUG)) - WARN_ON_ONCE(mfmsr() & MSR_EE); - irq_soft_mask_set(IRQS_ENABLED); - local_paca->irq_happened = 0; - __hard_irq_enable(); - return; - } - - /* Have interrupts to replay, need to hard disable first */ - if (!(irq_happened & PACA_IRQ_HARD_DIS)) { - if (IS_ENABLED(CONFIG_PPC_IRQ_SOFT_MASK_DEBUG)) { - if (!(mfmsr() & MSR_EE)) { - /* - * An interrupt could have come in and cleared - * MSR[EE] and set IRQ_HARD_DIS, so check - * IRQ_HARD_DIS again and warn if it is still - * clear. - */ - irq_happened = get_irq_happened(); - WARN_ON_ONCE(!(irq_happened & PACA_IRQ_HARD_DIS)); - } - } - __hard_irq_disable(); - local_paca->irq_happened |= PACA_IRQ_HARD_DIS; - } else { - if (IS_ENABLED(CONFIG_PPC_IRQ_SOFT_MASK_DEBUG)) { - if (WARN_ON_ONCE(mfmsr() & MSR_EE)) - __hard_irq_disable(); - } - } - - /* - * Disable preempt here, so that the below preempt_enable will - * perform resched if required (a replayed interrupt may set - * need_resched). - */ - preempt_disable(); - irq_soft_mask_set(IRQS_ALL_DISABLED); - trace_hardirqs_off(); - - replay_soft_interrupts_irqrestore(); - local_paca->irq_happened = 0; - - trace_hardirqs_on(); - irq_soft_mask_set(IRQS_ENABLED); - __hard_irq_enable(); - preempt_enable(); -} -EXPORT_SYMBOL(arch_local_irq_restore); - -/* - * This is a helper to use when about to go into idle low-power - * when the latter has the side effect of re-enabling interrupts - * (such as calling H_CEDE under pHyp). - * - * You call this function with interrupts soft-disabled (this is - * already the case when ppc_md.power_save is called). The function - * will return whether to enter power save or just return. - * - * In the former case, it will have notified lockdep of interrupts - * being re-enabled and generally sanitized the lazy irq state, - * and in the latter case it will leave with interrupts hard - * disabled and marked as such, so the local_irq_enable() call - * in arch_cpu_idle() will properly re-enable everything. - */ -bool prep_irq_for_idle(void) -{ - /* - * First we need to hard disable to ensure no interrupt - * occurs before we effectively enter the low power state - */ - __hard_irq_disable(); - local_paca->irq_happened |= PACA_IRQ_HARD_DIS; - - /* - * If anything happened while we were soft-disabled, - * we return now and do not enter the low power state. - */ - if (lazy_irq_pending()) - return false; - - /* Tell lockdep we are about to re-enable */ - trace_hardirqs_on(); - - /* - * Mark interrupts as soft-enabled and clear the - * PACA_IRQ_HARD_DIS from the pending mask since we - * are about to hard enable as well as a side effect - * of entering the low power state. - */ - local_paca->irq_happened &= ~PACA_IRQ_HARD_DIS; - irq_soft_mask_set(IRQS_ENABLED); - - /* Tell the caller to enter the low power state */ - return true; -} - -#ifdef CONFIG_PPC_BOOK3S -/* - * This is for idle sequences that return with IRQs off, but the - * idle state itself wakes on interrupt. Tell the irq tracer that - * IRQs are enabled for the duration of idle so it does not get long - * off times. Must be paired with fini_irq_for_idle_irqsoff. - */ -bool prep_irq_for_idle_irqsoff(void) -{ - WARN_ON(!irqs_disabled()); - - /* - * First we need to hard disable to ensure no interrupt - * occurs before we effectively enter the low power state - */ - __hard_irq_disable(); - local_paca->irq_happened |= PACA_IRQ_HARD_DIS; - - /* - * If anything happened while we were soft-disabled, - * we return now and do not enter the low power state. - */ - if (lazy_irq_pending()) - return false; - - /* Tell lockdep we are about to re-enable */ - trace_hardirqs_on(); - - return true; -} - -/* - * Take the SRR1 wakeup reason, index into this table to find the - * appropriate irq_happened bit. - * - * Sytem reset exceptions taken in idle state also come through here, - * but they are NMI interrupts so do not need to wait for IRQs to be - * restored, and should be taken as early as practical. These are marked - * with 0xff in the table. The Power ISA specifies 0100b as the system - * reset interrupt reason. - */ -#define IRQ_SYSTEM_RESET 0xff - -static const u8 srr1_to_lazyirq[0x10] = { - 0, 0, 0, - PACA_IRQ_DBELL, - IRQ_SYSTEM_RESET, - PACA_IRQ_DBELL, - PACA_IRQ_DEC, - 0, - PACA_IRQ_EE, - PACA_IRQ_EE, - PACA_IRQ_HMI, - 0, 0, 0, 0, 0 }; - -void replay_system_reset(void) -{ - struct pt_regs regs; - - ppc_save_regs(®s); - regs.trap = 0x100; - get_paca()->in_nmi = 1; - system_reset_exception(®s); - get_paca()->in_nmi = 0; -} -EXPORT_SYMBOL_GPL(replay_system_reset); - -void irq_set_pending_from_srr1(unsigned long srr1) -{ - unsigned int idx = (srr1 & SRR1_WAKEMASK_P8) >> 18; - u8 reason = srr1_to_lazyirq[idx]; - - /* - * Take the system reset now, which is immediately after registers - * are restored from idle. It's an NMI, so interrupts need not be - * re-enabled before it is taken. - */ - if (unlikely(reason == IRQ_SYSTEM_RESET)) { - replay_system_reset(); - return; - } - - if (reason == PACA_IRQ_DBELL) { - /* - * When doorbell triggers a system reset wakeup, the message - * is not cleared, so if the doorbell interrupt is replayed - * and the IPI handled, the doorbell interrupt would still - * fire when EE is enabled. - * - * To avoid taking the superfluous doorbell interrupt, - * execute a msgclr here before the interrupt is replayed. - */ - ppc_msgclr(PPC_DBELL_MSGTYPE); - } - - /* - * The 0 index (SRR1[42:45]=b0000) must always evaluate to 0, - * so this can be called unconditionally with the SRR1 wake - * reason as returned by the idle code, which uses 0 to mean no - * interrupt. - * - * If a future CPU was to designate this as an interrupt reason, - * then a new index for no interrupt must be assigned. - */ - local_paca->irq_happened |= reason; -} -#endif /* CONFIG_PPC_BOOK3S */ - -/* - * Force a replay of the external interrupt handler on this CPU. - */ -void force_external_irq_replay(void) -{ - /* - * This must only be called with interrupts soft-disabled, - * the replay will happen when re-enabling. - */ - WARN_ON(!arch_irqs_disabled()); - - /* - * Interrupts must always be hard disabled before irq_happened is - * modified (to prevent lost update in case of interrupt between - * load and store). - */ - __hard_irq_disable(); - local_paca->irq_happened |= PACA_IRQ_HARD_DIS; - - /* Indicate in the PACA that we have an interrupt to replay */ - local_paca->irq_happened |= PACA_IRQ_EE; -} - -#endif /* CONFIG_PPC64 */ - int arch_show_interrupts(struct seq_file *p, int prec) { int j; #if defined(CONFIG_PPC32) && defined(CONFIG_TAU_INT) if (tau_initialized) { - seq_printf(p, "%*s: ", prec, "TAU"); + seq_printf(p, "%*s:", prec, "TAU"); for_each_online_cpu(j) - seq_printf(p, "%10u ", tau_interrupts(j)); + seq_put_decimal_ull_width(p, " ", tau_interrupts(j), 10); seq_puts(p, " PowerPC Thermal Assist (cpu temp)\n"); } #endif /* CONFIG_PPC32 && CONFIG_TAU_INT */ - seq_printf(p, "%*s: ", prec, "LOC"); + seq_printf(p, "%*s:", prec, "LOC"); for_each_online_cpu(j) - seq_printf(p, "%10u ", per_cpu(irq_stat, j).timer_irqs_event); + seq_put_decimal_ull_width(p, " ", per_cpu(irq_stat, j).timer_irqs_event, 10); seq_printf(p, " Local timer interrupts for timer event device\n"); - seq_printf(p, "%*s: ", prec, "BCT"); + seq_printf(p, "%*s:", prec, "BCT"); for_each_online_cpu(j) - seq_printf(p, "%10u ", per_cpu(irq_stat, j).broadcast_irqs_event); + seq_put_decimal_ull_width(p, " ", per_cpu(irq_stat, j).broadcast_irqs_event, 10); seq_printf(p, " Broadcast timer interrupts for timer event device\n"); - seq_printf(p, "%*s: ", prec, "LOC"); + seq_printf(p, "%*s:", prec, "LOC"); for_each_online_cpu(j) - seq_printf(p, "%10u ", per_cpu(irq_stat, j).timer_irqs_others); + seq_put_decimal_ull_width(p, " ", per_cpu(irq_stat, j).timer_irqs_others, 10); seq_printf(p, " Local timer interrupts for others\n"); - seq_printf(p, "%*s: ", prec, "SPU"); + seq_printf(p, "%*s:", prec, "SPU"); for_each_online_cpu(j) - seq_printf(p, "%10u ", per_cpu(irq_stat, j).spurious_irqs); + seq_put_decimal_ull_width(p, " ", per_cpu(irq_stat, j).spurious_irqs, 10); seq_printf(p, " Spurious interrupts\n"); - seq_printf(p, "%*s: ", prec, "PMI"); + seq_printf(p, "%*s:", prec, "PMI"); for_each_online_cpu(j) - seq_printf(p, "%10u ", per_cpu(irq_stat, j).pmu_irqs); + seq_put_decimal_ull_width(p, " ", per_cpu(irq_stat, j).pmu_irqs, 10); seq_printf(p, " Performance monitoring interrupts\n"); - seq_printf(p, "%*s: ", prec, "MCE"); + seq_printf(p, "%*s:", prec, "MCE"); for_each_online_cpu(j) - seq_printf(p, "%10u ", per_cpu(irq_stat, j).mce_exceptions); + seq_put_decimal_ull_width(p, " ", per_cpu(irq_stat, j).mce_exceptions, 10); seq_printf(p, " Machine check exceptions\n"); #ifdef CONFIG_PPC_BOOK3S_64 if (cpu_has_feature(CPU_FTR_HVMODE)) { - seq_printf(p, "%*s: ", prec, "HMI"); + seq_printf(p, "%*s:", prec, "HMI"); for_each_online_cpu(j) - seq_printf(p, "%10u ", paca_ptrs[j]->hmi_irqs); + seq_put_decimal_ull_width(p, " ", paca_ptrs[j]->hmi_irqs, 10); seq_printf(p, " Hypervisor Maintenance Interrupts\n"); } #endif - seq_printf(p, "%*s: ", prec, "NMI"); + seq_printf(p, "%*s:", prec, "NMI"); for_each_online_cpu(j) - seq_printf(p, "%10u ", per_cpu(irq_stat, j).sreset_irqs); + seq_put_decimal_ull_width(p, " ", per_cpu(irq_stat, j).sreset_irqs, 10); seq_printf(p, " System Reset interrupts\n"); #ifdef CONFIG_PPC_WATCHDOG - seq_printf(p, "%*s: ", prec, "WDG"); + seq_printf(p, "%*s:", prec, "WDG"); for_each_online_cpu(j) - seq_printf(p, "%10u ", per_cpu(irq_stat, j).soft_nmi_irqs); + seq_put_decimal_ull_width(p, " ", per_cpu(irq_stat, j).soft_nmi_irqs, 10); seq_printf(p, " Watchdog soft-NMI interrupts\n"); #endif #ifdef CONFIG_PPC_DOORBELL if (cpu_has_feature(CPU_FTR_DBELL)) { - seq_printf(p, "%*s: ", prec, "DBL"); + seq_printf(p, "%*s:", prec, "DBL"); for_each_online_cpu(j) - seq_printf(p, "%10u ", per_cpu(irq_stat, j).doorbell_irqs); + seq_put_decimal_ull_width(p, " ", per_cpu(irq_stat, j).doorbell_irqs, 10); seq_printf(p, " Doorbell interrupts\n"); } #endif @@ -595,33 +185,36 @@ u64 arch_irq_stat_cpu(unsigned int cpu) return sum; } -static inline void check_stack_overflow(void) +static inline void check_stack_overflow(unsigned long sp) { - long sp; - if (!IS_ENABLED(CONFIG_DEBUG_STACKOVERFLOW)) return; - sp = current_stack_pointer & (THREAD_SIZE - 1); + sp &= THREAD_SIZE - 1; - /* check for stack overflow: is there less than 2KB free? */ - if (unlikely(sp < 2048)) { + /* check for stack overflow: is there less than 1/4th free? */ + if (unlikely(sp < THREAD_SIZE / 4)) { pr_err("do_IRQ: stack overflow: %ld\n", sp); dump_stack(); } } +#ifdef CONFIG_SOFTIRQ_ON_OWN_STACK static __always_inline void call_do_softirq(const void *sp) { /* Temporarily switch r1 to sp, call __do_softirq() then restore r1. */ asm volatile ( PPC_STLU " %%r1, %[offset](%[sp]) ;" "mr %%r1, %[sp] ;" +#ifdef CONFIG_PPC_KERNEL_PCREL + "bl %[callee]@notoc ;" +#else "bl %[callee] ;" +#endif PPC_LL " %%r1, 0(%%r1) ;" : // Outputs : // Inputs - [sp] "b" (sp), [offset] "i" (THREAD_SIZE - STACK_FRAME_OVERHEAD), + [sp] "b" (sp), [offset] "i" (THREAD_SIZE - STACK_FRAME_MIN_SIZE), [callee] "i" (__do_softirq) : // Clobbers "lr", "xer", "ctr", "memory", "cr0", "cr1", "cr5", "cr6", @@ -629,37 +222,18 @@ static __always_inline void call_do_softirq(const void *sp) "r11", "r12" ); } - -static __always_inline void call_do_irq(struct pt_regs *regs, void *sp) -{ - register unsigned long r3 asm("r3") = (unsigned long)regs; - - /* Temporarily switch r1 to sp, call __do_irq() then restore r1. */ - asm volatile ( - PPC_STLU " %%r1, %[offset](%[sp]) ;" - "mr %%r1, %[sp] ;" - "bl %[callee] ;" - PPC_LL " %%r1, 0(%%r1) ;" - : // Outputs - "+r" (r3) - : // Inputs - [sp] "b" (sp), [offset] "i" (THREAD_SIZE - STACK_FRAME_OVERHEAD), - [callee] "i" (__do_irq) - : // Clobbers - "lr", "xer", "ctr", "memory", "cr0", "cr1", "cr5", "cr6", - "cr7", "r0", "r4", "r5", "r6", "r7", "r8", "r9", "r10", - "r11", "r12" - ); -} +#endif DEFINE_STATIC_CALL_RET0(ppc_get_irq, *ppc_md.get_irq); -void __do_irq(struct pt_regs *regs) +static void __do_irq(struct pt_regs *regs, unsigned long oldsp) { unsigned int irq; trace_irq_entry(regs); + check_stack_overflow(oldsp); + /* * Query the platform PIC for the interrupt & ack it. * @@ -668,7 +242,7 @@ void __do_irq(struct pt_regs *regs) irq = static_call(ppc_get_irq)(); /* We can hard enable interrupts now to allow perf interrupts */ - if (should_hard_irq_enable()) + if (should_hard_irq_enable(regs)) do_hard_irq_enable(); /* And finally process it */ @@ -680,26 +254,47 @@ void __do_irq(struct pt_regs *regs) trace_irq_exit(regs); } +static __always_inline void call_do_irq(struct pt_regs *regs, void *sp) +{ + register unsigned long r3 asm("r3") = (unsigned long)regs; + + /* Temporarily switch r1 to sp, call __do_irq() then restore r1. */ + asm volatile ( + PPC_STLU " %%r1, %[offset](%[sp]) ;" + "mr %%r4, %%r1 ;" + "mr %%r1, %[sp] ;" +#ifdef CONFIG_PPC_KERNEL_PCREL + "bl %[callee]@notoc ;" +#else + "bl %[callee] ;" +#endif + PPC_LL " %%r1, 0(%%r1) ;" + : // Outputs + "+r" (r3) + : // Inputs + [sp] "b" (sp), [offset] "i" (THREAD_SIZE - STACK_FRAME_MIN_SIZE), + [callee] "i" (__do_irq) + : // Clobbers + "lr", "xer", "ctr", "memory", "cr0", "cr1", "cr5", "cr6", + "cr7", "r0", "r4", "r5", "r6", "r7", "r8", "r9", "r10", + "r11", "r12" + ); +} + void __do_IRQ(struct pt_regs *regs) { struct pt_regs *old_regs = set_irq_regs(regs); - void *cursp, *irqsp, *sirqsp; + void *cursp, *irqsp; /* Switch to the irq stack to handle this */ cursp = (void *)(current_stack_pointer & ~(THREAD_SIZE - 1)); irqsp = hardirq_ctx[raw_smp_processor_id()]; - sirqsp = softirq_ctx[raw_smp_processor_id()]; - - check_stack_overflow(); - /* Already there ? */ - if (unlikely(cursp == irqsp || cursp == sirqsp)) { - __do_irq(regs); - set_irq_regs(old_regs); - return; - } - /* Switch stack and call */ - call_do_irq(regs, irqsp); + /* Already there ? If not switch stack and call */ + if (unlikely(cursp == irqsp)) + __do_irq(regs, current_stack_pointer); + else + call_do_irq(regs, irqsp); set_irq_regs(old_regs); } @@ -738,7 +333,7 @@ void __init init_IRQ(void) static_call_update(ppc_get_irq, ppc_md.get_irq); } -#ifdef CONFIG_BOOKE_OR_40x +#ifdef CONFIG_BOOKE void *critirq_ctx[NR_CPUS] __read_mostly; void *dbgirq_ctx[NR_CPUS] __read_mostly; void *mcheckirq_ctx[NR_CPUS] __read_mostly; @@ -747,10 +342,12 @@ void *mcheckirq_ctx[NR_CPUS] __read_mostly; void *softirq_ctx[NR_CPUS] __read_mostly; void *hardirq_ctx[NR_CPUS] __read_mostly; +#ifdef CONFIG_SOFTIRQ_ON_OWN_STACK void do_softirq_own_stack(void) { call_do_softirq(softirq_ctx[smp_processor_id()]); } +#endif irq_hw_number_t virq_to_hw(unsigned int virq) { @@ -794,13 +391,3 @@ int irq_choose_cpu(const struct cpumask *mask) return hard_smp_processor_id(); } #endif - -#ifdef CONFIG_PPC64 -static int __init setup_noirqdistrib(char *str) -{ - distribute_irqs = 0; - return 1; -} - -__setup("noirqdistrib", setup_noirqdistrib); -#endif /* CONFIG_PPC64 */ diff --git a/arch/powerpc/kernel/irq_64.c b/arch/powerpc/kernel/irq_64.c new file mode 100644 index 000000000000..d5c48d1b0a31 --- /dev/null +++ b/arch/powerpc/kernel/irq_64.c @@ -0,0 +1,522 @@ +// SPDX-License-Identifier: GPL-2.0-or-later +/* + * Derived from arch/i386/kernel/irq.c + * Copyright (C) 1992 Linus Torvalds + * Adapted from arch/i386 by Gary Thomas + * Copyright (C) 1995-1996 Gary Thomas (gdt@linuxppc.org) + * Updated and modified by Cort Dougan <cort@fsmlabs.com> + * Copyright (C) 1996-2001 Cort Dougan + * Adapted for Power Macintosh by Paul Mackerras + * Copyright (C) 1996 Paul Mackerras (paulus@cs.anu.edu.au) + * + * This file contains the code used by various IRQ handling routines: + * asking for different IRQ's should be done through these routines + * instead of just grabbing them. Thus setups with different IRQ numbers + * shouldn't result in any weird surprises, and installing new handlers + * should be easier. + */ + +#undef DEBUG + +#include <linux/export.h> +#include <linux/threads.h> +#include <linux/kernel_stat.h> +#include <linux/signal.h> +#include <linux/sched.h> +#include <linux/ptrace.h> +#include <linux/ioport.h> +#include <linux/interrupt.h> +#include <linux/timex.h> +#include <linux/init.h> +#include <linux/slab.h> +#include <linux/delay.h> +#include <linux/irq.h> +#include <linux/seq_file.h> +#include <linux/cpumask.h> +#include <linux/profile.h> +#include <linux/bitops.h> +#include <linux/list.h> +#include <linux/radix-tree.h> +#include <linux/mutex.h> +#include <linux/pci.h> +#include <linux/debugfs.h> +#include <linux/of.h> +#include <linux/of_irq.h> +#include <linux/vmalloc.h> +#include <linux/pgtable.h> +#include <linux/static_call.h> + +#include <linux/uaccess.h> +#include <asm/interrupt.h> +#include <asm/io.h> +#include <asm/irq.h> +#include <asm/cache.h> +#include <asm/ptrace.h> +#include <asm/machdep.h> +#include <asm/udbg.h> +#include <asm/smp.h> +#include <asm/hw_irq.h> +#include <asm/softirq_stack.h> +#include <asm/ppc_asm.h> + +#include <asm/paca.h> +#include <asm/firmware.h> +#include <asm/lv1call.h> +#include <asm/dbell.h> +#include <asm/trace.h> +#include <asm/cpu_has_feature.h> + +int distribute_irqs = 1; + +static inline void next_interrupt(struct pt_regs *regs) +{ + if (IS_ENABLED(CONFIG_PPC_IRQ_SOFT_MASK_DEBUG)) { + WARN_ON(!(local_paca->irq_happened & PACA_IRQ_HARD_DIS)); + WARN_ON(irq_soft_mask_return() != IRQS_ALL_DISABLED); + } + + /* + * We are responding to the next interrupt, so interrupt-off + * latencies should be reset here. + */ + lockdep_hardirq_exit(); + trace_hardirqs_on(); + trace_hardirqs_off(); + lockdep_hardirq_enter(); +} + +static inline bool irq_happened_test_and_clear(u8 irq) +{ + if (local_paca->irq_happened & irq) { + local_paca->irq_happened &= ~irq; + return true; + } + return false; +} + +static __no_kcsan void __replay_soft_interrupts(void) +{ + struct pt_regs regs; + + /* + * We use local_paca rather than get_paca() to avoid all the + * debug_smp_processor_id() business in this low level function. + */ + + if (IS_ENABLED(CONFIG_PPC_IRQ_SOFT_MASK_DEBUG)) { + WARN_ON_ONCE(mfmsr() & MSR_EE); + WARN_ON(!(local_paca->irq_happened & PACA_IRQ_HARD_DIS)); + WARN_ON(local_paca->irq_happened & PACA_IRQ_REPLAYING); + } + + /* + * PACA_IRQ_REPLAYING prevents interrupt handlers from enabling + * MSR[EE] to get PMIs, which can result in more IRQs becoming + * pending. + */ + local_paca->irq_happened |= PACA_IRQ_REPLAYING; + + ppc_save_regs(®s); + regs.softe = IRQS_ENABLED; + regs.msr |= MSR_EE; + + /* + * Force the delivery of pending soft-disabled interrupts on PS3. + * Any HV call will have this side effect. + */ + if (firmware_has_feature(FW_FEATURE_PS3_LV1)) { + u64 tmp, tmp2; + lv1_get_version_info(&tmp, &tmp2); + } + + /* + * Check if an hypervisor Maintenance interrupt happened. + * This is a higher priority interrupt than the others, so + * replay it first. + */ + if (IS_ENABLED(CONFIG_PPC_BOOK3S) && + irq_happened_test_and_clear(PACA_IRQ_HMI)) { + regs.trap = INTERRUPT_HMI; + handle_hmi_exception(®s); + next_interrupt(®s); + } + + if (irq_happened_test_and_clear(PACA_IRQ_DEC)) { + regs.trap = INTERRUPT_DECREMENTER; + timer_interrupt(®s); + next_interrupt(®s); + } + + if (irq_happened_test_and_clear(PACA_IRQ_EE)) { + regs.trap = INTERRUPT_EXTERNAL; + do_IRQ(®s); + next_interrupt(®s); + } + + if (IS_ENABLED(CONFIG_PPC_DOORBELL) && + irq_happened_test_and_clear(PACA_IRQ_DBELL)) { + regs.trap = INTERRUPT_DOORBELL; + doorbell_exception(®s); + next_interrupt(®s); + } + + /* Book3E does not support soft-masking PMI interrupts */ + if (IS_ENABLED(CONFIG_PPC_BOOK3S) && + irq_happened_test_and_clear(PACA_IRQ_PMI)) { + regs.trap = INTERRUPT_PERFMON; + performance_monitor_exception(®s); + next_interrupt(®s); + } + + local_paca->irq_happened &= ~PACA_IRQ_REPLAYING; +} + +__no_kcsan void replay_soft_interrupts(void) +{ + irq_enter(); /* See comment in arch_local_irq_restore */ + __replay_soft_interrupts(); + irq_exit(); +} + +#if defined(CONFIG_PPC_BOOK3S_64) && defined(CONFIG_PPC_KUAP) +static inline __no_kcsan void replay_soft_interrupts_irqrestore(void) +{ + unsigned long kuap_state = get_kuap(); + + /* + * Check if anything calls local_irq_enable/restore() when KUAP is + * disabled (user access enabled). We handle that case here by saving + * and re-locking AMR but we shouldn't get here in the first place, + * hence the warning. + */ + kuap_assert_locked(); + + if (kuap_state != AMR_KUAP_BLOCKED) + set_kuap(AMR_KUAP_BLOCKED); + + __replay_soft_interrupts(); + + if (kuap_state != AMR_KUAP_BLOCKED) + set_kuap(kuap_state); +} +#else +#define replay_soft_interrupts_irqrestore() __replay_soft_interrupts() +#endif + +notrace __no_kcsan void arch_local_irq_restore(unsigned long mask) +{ + unsigned char irq_happened; + + /* Write the new soft-enabled value if it is a disable */ + if (mask) { + irq_soft_mask_set(mask); + return; + } + + if (IS_ENABLED(CONFIG_PPC_IRQ_SOFT_MASK_DEBUG)) { + WARN_ON_ONCE(in_nmi()); + WARN_ON_ONCE(in_hardirq()); + WARN_ON_ONCE(local_paca->irq_happened & PACA_IRQ_REPLAYING); + } + +again: + /* + * After the stb, interrupts are unmasked and there are no interrupts + * pending replay. The restart sequence makes this atomic with + * respect to soft-masked interrupts. If this was just a simple code + * sequence, a soft-masked interrupt could become pending right after + * the comparison and before the stb. + * + * This allows interrupts to be unmasked without hard disabling, and + * also without new hard interrupts coming in ahead of pending ones. + */ + asm goto( +"1: \n" +" lbz 9,%0(13) \n" +" cmpwi 9,0 \n" +" bne %l[happened] \n" +" stb 9,%1(13) \n" +"2: \n" + RESTART_TABLE(1b, 2b, 1b) + : : "i" (offsetof(struct paca_struct, irq_happened)), + "i" (offsetof(struct paca_struct, irq_soft_mask)) + : "cr0", "r9" + : happened); + + if (IS_ENABLED(CONFIG_PPC_IRQ_SOFT_MASK_DEBUG)) + WARN_ON_ONCE(!(mfmsr() & MSR_EE)); + + /* + * If we came here from the replay below, we might have a preempt + * pending (due to preempt_enable_no_resched()). Have to check now. + */ + preempt_check_resched(); + + return; + +happened: + irq_happened = READ_ONCE(local_paca->irq_happened); + if (IS_ENABLED(CONFIG_PPC_IRQ_SOFT_MASK_DEBUG)) + WARN_ON_ONCE(!irq_happened); + + if (irq_happened == PACA_IRQ_HARD_DIS) { + if (IS_ENABLED(CONFIG_PPC_IRQ_SOFT_MASK_DEBUG)) + WARN_ON_ONCE(mfmsr() & MSR_EE); + irq_soft_mask_set(IRQS_ENABLED); + local_paca->irq_happened = 0; + __hard_irq_enable(); + preempt_check_resched(); + return; + } + + /* Have interrupts to replay, need to hard disable first */ + if (!(irq_happened & PACA_IRQ_HARD_DIS)) { + if (IS_ENABLED(CONFIG_PPC_IRQ_SOFT_MASK_DEBUG)) { + if (!(mfmsr() & MSR_EE)) { + /* + * An interrupt could have come in and cleared + * MSR[EE] and set IRQ_HARD_DIS, so check + * IRQ_HARD_DIS again and warn if it is still + * clear. + */ + irq_happened = READ_ONCE(local_paca->irq_happened); + WARN_ON_ONCE(!(irq_happened & PACA_IRQ_HARD_DIS)); + } + } + __hard_irq_disable(); + local_paca->irq_happened |= PACA_IRQ_HARD_DIS; + } else { + if (IS_ENABLED(CONFIG_PPC_IRQ_SOFT_MASK_DEBUG)) { + if (WARN_ON_ONCE(mfmsr() & MSR_EE)) + __hard_irq_disable(); + } + } + + /* + * Disable preempt here, so that the below preempt_enable will + * perform resched if required (a replayed interrupt may set + * need_resched). + */ + preempt_disable(); + irq_soft_mask_set(IRQS_ALL_DISABLED); + trace_hardirqs_off(); + + /* + * Now enter interrupt context. The interrupt handlers themselves + * also call irq_enter/exit (which is okay, they can nest). But call + * it here now to hold off softirqs until the below irq_exit(). If + * we allowed replayed handlers to run softirqs, that enables irqs, + * which must replay interrupts, which recurses in here and makes + * things more complicated. The recursion is limited to 2, and it can + * be made to work, but it's complicated. + * + * local_bh_disable can not be used here because interrupts taken in + * idle are not in the right context (RCU, tick, etc) to run softirqs + * so irq_enter must be called. + */ + irq_enter(); + + replay_soft_interrupts_irqrestore(); + + irq_exit(); + + if (unlikely(local_paca->irq_happened != PACA_IRQ_HARD_DIS)) { + /* + * The softirq processing in irq_exit() may enable interrupts + * temporarily, which can result in MSR[EE] being enabled and + * more irqs becoming pending. Go around again if that happens. + */ + trace_hardirqs_on(); + preempt_enable_no_resched(); + goto again; + } + + trace_hardirqs_on(); + irq_soft_mask_set(IRQS_ENABLED); + local_paca->irq_happened = 0; + __hard_irq_enable(); + preempt_enable(); +} +EXPORT_SYMBOL(arch_local_irq_restore); + +/* + * This is a helper to use when about to go into idle low-power + * when the latter has the side effect of re-enabling interrupts + * (such as calling H_CEDE under pHyp). + * + * You call this function with interrupts soft-disabled (this is + * already the case when ppc_md.power_save is called). The function + * will return whether to enter power save or just return. + * + * In the former case, it will have generally sanitized the lazy irq + * state, and in the latter case it will leave with interrupts hard + * disabled and marked as such, so the local_irq_enable() call + * in arch_cpu_idle() will properly re-enable everything. + */ +__cpuidle bool prep_irq_for_idle(void) +{ + /* + * First we need to hard disable to ensure no interrupt + * occurs before we effectively enter the low power state + */ + __hard_irq_disable(); + local_paca->irq_happened |= PACA_IRQ_HARD_DIS; + + /* + * If anything happened while we were soft-disabled, + * we return now and do not enter the low power state. + */ + if (lazy_irq_pending()) + return false; + + /* + * Mark interrupts as soft-enabled and clear the + * PACA_IRQ_HARD_DIS from the pending mask since we + * are about to hard enable as well as a side effect + * of entering the low power state. + */ + local_paca->irq_happened &= ~PACA_IRQ_HARD_DIS; + irq_soft_mask_set(IRQS_ENABLED); + + /* Tell the caller to enter the low power state */ + return true; +} + +#ifdef CONFIG_PPC_BOOK3S +/* + * This is for idle sequences that return with IRQs off, but the + * idle state itself wakes on interrupt. Tell the irq tracer that + * IRQs are enabled for the duration of idle so it does not get long + * off times. Must be paired with fini_irq_for_idle_irqsoff. + */ +bool prep_irq_for_idle_irqsoff(void) +{ + WARN_ON(!irqs_disabled()); + + /* + * First we need to hard disable to ensure no interrupt + * occurs before we effectively enter the low power state + */ + __hard_irq_disable(); + local_paca->irq_happened |= PACA_IRQ_HARD_DIS; + + /* + * If anything happened while we were soft-disabled, + * we return now and do not enter the low power state. + */ + if (lazy_irq_pending()) + return false; + + /* Tell lockdep we are about to re-enable */ + trace_hardirqs_on(); + + return true; +} + +/* + * Take the SRR1 wakeup reason, index into this table to find the + * appropriate irq_happened bit. + * + * Sytem reset exceptions taken in idle state also come through here, + * but they are NMI interrupts so do not need to wait for IRQs to be + * restored, and should be taken as early as practical. These are marked + * with 0xff in the table. The Power ISA specifies 0100b as the system + * reset interrupt reason. + */ +#define IRQ_SYSTEM_RESET 0xff + +static const u8 srr1_to_lazyirq[0x10] = { + 0, 0, 0, + PACA_IRQ_DBELL, + IRQ_SYSTEM_RESET, + PACA_IRQ_DBELL, + PACA_IRQ_DEC, + 0, + PACA_IRQ_EE, + PACA_IRQ_EE, + PACA_IRQ_HMI, + 0, 0, 0, 0, 0 }; + +void replay_system_reset(void) +{ + struct pt_regs regs; + + ppc_save_regs(®s); + regs.trap = 0x100; + get_paca()->in_nmi = 1; + system_reset_exception(®s); + get_paca()->in_nmi = 0; +} +EXPORT_SYMBOL_GPL(replay_system_reset); + +void irq_set_pending_from_srr1(unsigned long srr1) +{ + unsigned int idx = (srr1 & SRR1_WAKEMASK_P8) >> 18; + u8 reason = srr1_to_lazyirq[idx]; + + /* + * Take the system reset now, which is immediately after registers + * are restored from idle. It's an NMI, so interrupts need not be + * re-enabled before it is taken. + */ + if (unlikely(reason == IRQ_SYSTEM_RESET)) { + replay_system_reset(); + return; + } + + if (reason == PACA_IRQ_DBELL) { + /* + * When doorbell triggers a system reset wakeup, the message + * is not cleared, so if the doorbell interrupt is replayed + * and the IPI handled, the doorbell interrupt would still + * fire when EE is enabled. + * + * To avoid taking the superfluous doorbell interrupt, + * execute a msgclr here before the interrupt is replayed. + */ + ppc_msgclr(PPC_DBELL_MSGTYPE); + } + + /* + * The 0 index (SRR1[42:45]=b0000) must always evaluate to 0, + * so this can be called unconditionally with the SRR1 wake + * reason as returned by the idle code, which uses 0 to mean no + * interrupt. + * + * If a future CPU was to designate this as an interrupt reason, + * then a new index for no interrupt must be assigned. + */ + local_paca->irq_happened |= reason; +} +#endif /* CONFIG_PPC_BOOK3S */ + +/* + * Force a replay of the external interrupt handler on this CPU. + */ +void force_external_irq_replay(void) +{ + /* + * This must only be called with interrupts soft-disabled, + * the replay will happen when re-enabling. + */ + WARN_ON(!arch_irqs_disabled()); + + /* + * Interrupts must always be hard disabled before irq_happened is + * modified (to prevent lost update in case of interrupt between + * load and store). + */ + __hard_irq_disable(); + local_paca->irq_happened |= PACA_IRQ_HARD_DIS; + + /* Indicate in the PACA that we have an interrupt to replay */ + local_paca->irq_happened |= PACA_IRQ_EE; +} + +static int __init setup_noirqdistrib(char *str) +{ + distribute_irqs = 0; + return 1; +} + +__setup("noirqdistrib", setup_noirqdistrib); diff --git a/arch/powerpc/kernel/isa-bridge.c b/arch/powerpc/kernel/isa-bridge.c index dc746611ebc0..5c064485197a 100644 --- a/arch/powerpc/kernel/isa-bridge.c +++ b/arch/powerpc/kernel/isa-bridge.c @@ -46,8 +46,8 @@ static void remap_isa_base(phys_addr_t pa, unsigned long size) WARN_ON_ONCE(size & ~PAGE_MASK); if (slab_is_available()) { - if (ioremap_page_range(ISA_IO_BASE, ISA_IO_BASE + size, pa, - pgprot_noncached(PAGE_KERNEL))) + if (vmap_page_range(ISA_IO_BASE, ISA_IO_BASE + size, pa, + pgprot_noncached(PAGE_KERNEL))) vunmap_range(ISA_IO_BASE, ISA_IO_BASE + size); } else { early_ioremap_range(ISA_IO_BASE, pa, size, @@ -55,80 +55,50 @@ static void remap_isa_base(phys_addr_t pa, unsigned long size) } } -static void pci_process_ISA_OF_ranges(struct device_node *isa_node, - unsigned long phb_io_base_phys) +static int process_ISA_OF_ranges(struct device_node *isa_node, + unsigned long phb_io_base_phys) { - /* We should get some saner parsing here and remove these structs */ - struct pci_address { - u32 a_hi; - u32 a_mid; - u32 a_lo; - }; - - struct isa_address { - u32 a_hi; - u32 a_lo; - }; - - struct isa_range { - struct isa_address isa_addr; - struct pci_address pci_addr; - unsigned int size; - }; - - const struct isa_range *range; - unsigned long pci_addr; - unsigned int isa_addr; unsigned int size; - int rlen = 0; + struct of_range_parser parser; + struct of_range range; - range = of_get_property(isa_node, "ranges", &rlen); - if (range == NULL || (rlen < sizeof(struct isa_range))) + if (of_range_parser_init(&parser, isa_node)) goto inval_range; - /* From "ISA Binding to 1275" - * The ranges property is laid out as an array of elements, - * each of which comprises: - * cells 0 - 1: an ISA address - * cells 2 - 4: a PCI address - * (size depending on dev->n_addr_cells) - * cell 5: the size of the range - */ - if ((range->isa_addr.a_hi & ISA_SPACE_MASK) != ISA_SPACE_IO) { - range++; - rlen -= sizeof(struct isa_range); - if (rlen < sizeof(struct isa_range)) - goto inval_range; - } - if ((range->isa_addr.a_hi & ISA_SPACE_MASK) != ISA_SPACE_IO) - goto inval_range; + for_each_of_range(&parser, &range) { + if ((range.flags & ISA_SPACE_MASK) != ISA_SPACE_IO) + continue; - isa_addr = range->isa_addr.a_lo; - pci_addr = (unsigned long) range->pci_addr.a_mid << 32 | - range->pci_addr.a_lo; + if (range.cpu_addr == OF_BAD_ADDR) { + pr_err("ISA: Bad CPU mapping: %s\n", __func__); + return -EINVAL; + } - /* Assume these are both zero. Note: We could fix that and - * do a proper parsing instead ... oh well, that will do for - * now as nobody uses fancy mappings for ISA bridges - */ - if ((pci_addr != 0) || (isa_addr != 0)) { - printk(KERN_ERR "unexpected isa to pci mapping: %s\n", - __func__); - return; - } + /* We need page alignment */ + if ((range.bus_addr & ~PAGE_MASK) || (range.cpu_addr & ~PAGE_MASK)) { + pr_warn("ISA: bridge %pOF has non aligned IO range\n", isa_node); + return -EINVAL; + } - /* Align size and make sure it's cropped to 64K */ - size = PAGE_ALIGN(range->size); - if (size > 0x10000) - size = 0x10000; + /* Align size and make sure it's cropped to 64K */ + size = PAGE_ALIGN(range.size); + if (size > 0x10000) + size = 0x10000; - remap_isa_base(phb_io_base_phys, size); - return; + if (!phb_io_base_phys) + phb_io_base_phys = range.cpu_addr; + + remap_isa_base(phb_io_base_phys, size); + return 0; + } inval_range: - printk(KERN_ERR "no ISA IO ranges or unexpected isa range, " - "mapping 64k\n"); - remap_isa_base(phb_io_base_phys, 0x10000); + if (phb_io_base_phys) { + pr_err("no ISA IO ranges or unexpected isa range, mapping 64k\n"); + remap_isa_base(phb_io_base_phys, 0x10000); + return 0; + } + return -EINVAL; } @@ -170,7 +140,7 @@ void __init isa_bridge_find_early(struct pci_controller *hose) isa_bridge_devnode = np; /* Now parse the "ranges" property and setup the ISA mapping */ - pci_process_ISA_OF_ranges(np, hose->io_base_phys); + process_ISA_OF_ranges(np, hose->io_base_phys); /* Set the global ISA io base to indicate we have an ISA bridge */ isa_io_base = ISA_IO_BASE; @@ -186,75 +156,15 @@ void __init isa_bridge_find_early(struct pci_controller *hose) */ void __init isa_bridge_init_non_pci(struct device_node *np) { - const __be32 *ranges, *pbasep = NULL; - int rlen, i, rs; - u32 na, ns, pna; - u64 cbase, pbase, size = 0; + int ret; /* If we already have an ISA bridge, bail off */ if (isa_bridge_devnode != NULL) return; - pna = of_n_addr_cells(np); - if (of_property_read_u32(np, "#address-cells", &na) || - of_property_read_u32(np, "#size-cells", &ns)) { - pr_warn("ISA: Non-PCI bridge %pOF is missing address format\n", - np); - return; - } - - /* Check it's a supported address format */ - if (na != 2 || ns != 1) { - pr_warn("ISA: Non-PCI bridge %pOF has unsupported address format\n", - np); - return; - } - rs = na + ns + pna; - - /* Grab the ranges property */ - ranges = of_get_property(np, "ranges", &rlen); - if (ranges == NULL || rlen < rs) { - pr_warn("ISA: Non-PCI bridge %pOF has absent or invalid ranges\n", - np); - return; - } - - /* Parse it. We are only looking for IO space */ - for (i = 0; (i + rs - 1) < rlen; i += rs) { - if (be32_to_cpup(ranges + i) != 1) - continue; - cbase = be32_to_cpup(ranges + i + 1); - size = of_read_number(ranges + i + na + pna, ns); - pbasep = ranges + i + na; - break; - } - - /* Got something ? */ - if (!size || !pbasep) { - pr_warn("ISA: Non-PCI bridge %pOF has no usable IO range\n", - np); + ret = process_ISA_OF_ranges(np, 0); + if (ret) return; - } - - /* Align size and make sure it's cropped to 64K */ - size = PAGE_ALIGN(size); - if (size > 0x10000) - size = 0x10000; - - /* Map pbase */ - pbase = of_translate_address(np, pbasep); - if (pbase == OF_BAD_ADDR) { - pr_warn("ISA: Non-PCI bridge %pOF failed to translate IO base\n", - np); - return; - } - - /* We need page alignment */ - if ((cbase & ~PAGE_MASK) || (pbase & ~PAGE_MASK)) { - pr_warn("ISA: Non-PCI bridge %pOF has non aligned IO range\n", - np); - return; - } /* Got it */ isa_bridge_devnode = np; @@ -263,7 +173,6 @@ void __init isa_bridge_init_non_pci(struct device_node *np) * and map it */ isa_io_base = ISA_IO_BASE; - remap_isa_base(pbase, size); pr_debug("ISA: Non-PCI bridge is %pOF\n", np); } @@ -282,7 +191,7 @@ static void isa_bridge_find_late(struct pci_dev *pdev, isa_bridge_pcidev = pdev; /* Now parse the "ranges" property and setup the ISA mapping */ - pci_process_ISA_OF_ranges(devnode, hose->io_base_phys); + process_ISA_OF_ranges(devnode, hose->io_base_phys); /* Set the global ISA io base to indicate we have an ISA bridge */ isa_io_base = ISA_IO_BASE; diff --git a/arch/powerpc/kernel/jump_label.c b/arch/powerpc/kernel/jump_label.c index 5277cf582c16..2659e1ac8604 100644 --- a/arch/powerpc/kernel/jump_label.c +++ b/arch/powerpc/kernel/jump_label.c @@ -5,7 +5,7 @@ #include <linux/kernel.h> #include <linux/jump_label.h> -#include <asm/code-patching.h> +#include <asm/text-patching.h> #include <asm/inst.h> void arch_jump_label_transform(struct jump_entry *entry, diff --git a/arch/powerpc/kernel/kgdb.c b/arch/powerpc/kernel/kgdb.c index 9f8d0fa7b718..5081334b7bd2 100644 --- a/arch/powerpc/kernel/kgdb.c +++ b/arch/powerpc/kernel/kgdb.c @@ -1,3 +1,4 @@ +// SPDX-License-Identifier: GPL-2.0-only /* * PowerPC backend to the KGDB stub. * @@ -8,10 +9,6 @@ * PPC32 support restored by Vitaly Wool <vwool@ru.mvista.com> and * Sergei Shtylyov <sshtylyov@ru.mvista.com> * Copyright (C) 2007-2008 Wind River Systems, Inc. - * - * This file is licensed under the terms of the GNU General Public License - * version 2. This program as licensed "as is" without any warranty of any - * kind, whether express or implied. */ #include <linux/kernel.h> @@ -24,7 +21,7 @@ #include <asm/processor.h> #include <asm/machdep.h> #include <asm/debug.h> -#include <asm/code-patching.h> +#include <asm/text-patching.h> #include <linux/slab.h> #include <asm/inst.h> @@ -48,9 +45,9 @@ static struct hard_trap_info { 0x0800, 0x08 /* SIGFPE */ }, /* fp unavailable */ { 0x0900, 0x0e /* SIGALRM */ }, /* decrementer */ { 0x0c00, 0x14 /* SIGCHLD */ }, /* system call */ -#ifdef CONFIG_BOOKE_OR_40x +#ifdef CONFIG_BOOKE { 0x2002, 0x05 /* SIGTRAP */ }, /* debug */ -#if defined(CONFIG_FSL_BOOKE) +#if defined(CONFIG_PPC_85xx) { 0x2010, 0x08 /* SIGFPE */ }, /* spe unavailable */ { 0x2020, 0x08 /* SIGFPE */ }, /* spe unavailable */ { 0x2030, 0x08 /* SIGFPE */ }, /* spe fp data */ @@ -60,14 +57,14 @@ static struct hard_trap_info { 0x2900, 0x08 /* SIGFPE */ }, /* apu unavailable */ { 0x3100, 0x0e /* SIGALRM */ }, /* fixed interval timer */ { 0x3200, 0x02 /* SIGINT */ }, /* watchdog */ -#else /* ! CONFIG_FSL_BOOKE */ +#else /* ! CONFIG_PPC_85xx */ { 0x1000, 0x0e /* SIGALRM */ }, /* prog interval timer */ { 0x1010, 0x0e /* SIGALRM */ }, /* fixed interval timer */ { 0x1020, 0x02 /* SIGINT */ }, /* watchdog */ { 0x2010, 0x08 /* SIGFPE */ }, /* fp unavailable */ { 0x2020, 0x08 /* SIGFPE */ }, /* ap unavailable */ #endif -#else /* !CONFIG_BOOKE_OR_40x */ +#else /* !CONFIG_BOOKE */ { 0x0d00, 0x05 /* SIGTRAP */ }, /* single-step */ #if defined(CONFIG_PPC_8xx) { 0x1000, 0x04 /* SIGILL */ }, /* software emulation */ @@ -194,7 +191,7 @@ static int kgdb_break_match(struct pt_regs *regs) void sleeping_thread_to_gdb_regs(unsigned long *gdb_regs, struct task_struct *p) { struct pt_regs *regs = (struct pt_regs *)(p->thread.ksp + - STACK_FRAME_OVERHEAD); + STACK_INT_FRAME_REGS); unsigned long *ptr = gdb_regs; int reg; @@ -211,7 +208,7 @@ void sleeping_thread_to_gdb_regs(unsigned long *gdb_regs, struct task_struct *p) for (reg = 14; reg < 32; reg++) PACK64(ptr, regs->gpr[reg]); -#ifdef CONFIG_FSL_BOOKE +#ifdef CONFIG_PPC_85xx #ifdef CONFIG_SPE for (reg = 0; reg < 32; reg++) PACK64(ptr, p->thread.evr[reg]); @@ -237,7 +234,7 @@ void sleeping_thread_to_gdb_regs(unsigned long *gdb_regs, struct task_struct *p) #define GDB_SIZEOF_REG sizeof(unsigned long) #define GDB_SIZEOF_REG_U32 sizeof(u32) -#ifdef CONFIG_FSL_BOOKE +#ifdef CONFIG_PPC_85xx #define GDB_SIZEOF_FLOAT_REG sizeof(unsigned long) #else #define GDB_SIZEOF_FLOAT_REG sizeof(u64) @@ -332,7 +329,7 @@ char *dbg_get_reg(int regno, void *mem, struct pt_regs *regs) if (regno >= 32 && regno < 64) { /* FP registers 32 -> 63 */ -#if defined(CONFIG_FSL_BOOKE) && defined(CONFIG_SPE) +#if defined(CONFIG_PPC_85xx) && defined(CONFIG_SPE) if (current) memcpy(mem, ¤t->thread.evr[regno-32], dbg_reg_def[regno].size); @@ -358,7 +355,7 @@ int dbg_set_reg(int regno, void *mem, struct pt_regs *regs) if (regno >= 32 && regno < 64) { /* FP registers 32 -> 63 */ -#if defined(CONFIG_FSL_BOOKE) && defined(CONFIG_SPE) +#if defined(CONFIG_PPC_85xx) && defined(CONFIG_SPE) memcpy(¤t->thread.evr[regno-32], mem, dbg_reg_def[regno].size); #else diff --git a/arch/powerpc/kernel/kprobes-ftrace.c b/arch/powerpc/kernel/kprobes-ftrace.c index 072ebe7f290b..f8208c027148 100644 --- a/arch/powerpc/kernel/kprobes-ftrace.c +++ b/arch/powerpc/kernel/kprobes-ftrace.c @@ -21,6 +21,9 @@ void kprobe_ftrace_handler(unsigned long nip, unsigned long parent_nip, struct pt_regs *regs; int bit; + if (unlikely(kprobe_ftrace_disabled)) + return; + bit = ftrace_test_recursion_trylock(nip, parent_nip); if (bit < 0) return; diff --git a/arch/powerpc/kernel/kprobes.c b/arch/powerpc/kernel/kprobes.c index 1c97c0f177ae..c0d9f12cb441 100644 --- a/arch/powerpc/kernel/kprobes.c +++ b/arch/powerpc/kernel/kprobes.c @@ -19,13 +19,13 @@ #include <linux/extable.h> #include <linux/kdebug.h> #include <linux/slab.h> -#include <linux/moduleloader.h> -#include <asm/code-patching.h> +#include <linux/set_memory.h> +#include <linux/execmem.h> +#include <asm/text-patching.h> #include <asm/cacheflush.h> #include <asm/sstep.h> #include <asm/sections.h> #include <asm/inst.h> -#include <asm/set_memory.h> #include <linux/uaccess.h> DEFINE_PER_CPU(struct kprobe *, current_kprobe) = NULL; @@ -105,42 +105,25 @@ kprobe_opcode_t *kprobe_lookup_name(const char *name, unsigned int offset) return addr; } -static bool arch_kprobe_on_func_entry(unsigned long offset) +static bool arch_kprobe_on_func_entry(unsigned long addr, unsigned long offset) { -#ifdef CONFIG_PPC64_ELF_ABI_V2 -#ifdef CONFIG_KPROBES_ON_FTRACE - return offset <= 16; -#else - return offset <= 8; -#endif -#else + unsigned long ip = ftrace_location(addr); + + if (ip) + return offset <= (ip - addr); + if (IS_ENABLED(CONFIG_PPC64_ELF_ABI_V2) && !IS_ENABLED(CONFIG_PPC_KERNEL_PCREL)) + return offset <= 8; return !offset; -#endif } /* XXX try and fold the magic of kprobe_lookup_name() in this */ kprobe_opcode_t *arch_adjust_kprobe_addr(unsigned long addr, unsigned long offset, bool *on_func_entry) { - *on_func_entry = arch_kprobe_on_func_entry(offset); + *on_func_entry = arch_kprobe_on_func_entry(addr, offset); return (kprobe_opcode_t *)(addr + offset); } -void *alloc_insn_page(void) -{ - void *page; - - page = module_alloc(PAGE_SIZE); - if (!page) - return NULL; - - if (strict_module_rwx_enabled()) { - set_memory_ro((unsigned long)page, 1); - set_memory_x((unsigned long)page, 1); - } - return page; -} - int arch_prepare_kprobe(struct kprobe *p) { int ret = 0; @@ -158,10 +141,14 @@ int arch_prepare_kprobe(struct kprobe *p) printk("Cannot register a kprobe on the second word of prefixed instruction\n"); ret = -EINVAL; } - preempt_disable(); prev = get_kprobe(p->addr - 1); - preempt_enable_no_resched(); - if (prev && ppc_inst_prefixed(ppc_inst_read(prev->ainsn.insn))) { + + /* + * When prev is a ftrace-based kprobe, we don't have an insn, and it + * doesn't probe for prefixed instruction. + */ + if (prev && !kprobe_ftrace(prev) && + ppc_inst_prefixed(ppc_inst_read(prev->ainsn.insn))) { printk("Cannot register a kprobe on the second word of prefixed instruction\n"); ret = -EINVAL; } @@ -239,16 +226,6 @@ static nokprobe_inline void set_current_kprobe(struct kprobe *p, struct pt_regs kcb->kprobe_saved_msr = regs->msr; } -void arch_prepare_kretprobe(struct kretprobe_instance *ri, struct pt_regs *regs) -{ - ri->ret_addr = (kprobe_opcode_t *)regs->link; - ri->fp = NULL; - - /* Replace the return addr with trampoline addr */ - regs->link = (unsigned long)__kretprobe_trampoline; -} -NOKPROBE_SYMBOL(arch_prepare_kretprobe); - static int try_to_emulate(struct kprobe *p, struct pt_regs *regs) { int ret; @@ -269,7 +246,7 @@ static int try_to_emulate(struct kprobe *p, struct pt_regs *regs) * So, we should never get here... but, its still * good to catch them, just in case... */ - printk("Can't step on instruction %s\n", ppc_inst_as_str(insn)); + printk("Can't step on instruction %08lx\n", ppc_inst_as_ulong(insn)); BUG(); } else { /* @@ -365,7 +342,7 @@ int kprobe_handler(struct pt_regs *regs) if (ret > 0) { restore_previous_kprobe(kcb); - preempt_enable_no_resched(); + preempt_enable(); return 1; } } @@ -378,7 +355,7 @@ int kprobe_handler(struct pt_regs *regs) if (p->pre_handler && p->pre_handler(p, regs)) { /* handler changed execution path, so skip ss setup */ reset_current_kprobe(); - preempt_enable_no_resched(); + preempt_enable(); return 1; } @@ -391,7 +368,7 @@ int kprobe_handler(struct pt_regs *regs) kcb->kprobe_status = KPROBE_HIT_SSDONE; reset_current_kprobe(); - preempt_enable_no_resched(); + preempt_enable(); return 1; } } @@ -400,55 +377,12 @@ int kprobe_handler(struct pt_regs *regs) return 1; no_kprobe: - preempt_enable_no_resched(); + preempt_enable(); return ret; } NOKPROBE_SYMBOL(kprobe_handler); /* - * Function return probe trampoline: - * - init_kprobes() establishes a probepoint here - * - When the probed function returns, this probe - * causes the handlers to fire - */ -asm(".global __kretprobe_trampoline\n" - ".type __kretprobe_trampoline, @function\n" - "__kretprobe_trampoline:\n" - "nop\n" - "blr\n" - ".size __kretprobe_trampoline, .-__kretprobe_trampoline\n"); - -/* - * Called when the probe at kretprobe trampoline is hit - */ -static int trampoline_probe_handler(struct kprobe *p, struct pt_regs *regs) -{ - unsigned long orig_ret_address; - - orig_ret_address = __kretprobe_trampoline_handler(regs, NULL); - /* - * We get here through one of two paths: - * 1. by taking a trap -> kprobe_handler() -> here - * 2. by optprobe branch -> optimized_callback() -> opt_pre_handler() -> here - * - * When going back through (1), we need regs->nip to be setup properly - * as it is used to determine the return address from the trap. - * For (2), since nip is not honoured with optprobes, we instead setup - * the link register properly so that the subsequent 'blr' in - * __kretprobe_trampoline jumps back to the right instruction. - * - * For nip, we should set the address to the previous instruction since - * we end up emulating it in kprobe_handler(), which increments the nip - * again. - */ - regs_set_return_ip(regs, orig_ret_address - 4); - regs->link = orig_ret_address; - - return 0; -} -NOKPROBE_SYMBOL(trampoline_probe_handler); - -/* * Called after single-stepping. p->addr is the address of the * instruction whose first byte has been replaced by the "breakpoint" * instruction. To avoid the SMP problems that can occur when we @@ -486,7 +420,7 @@ int kprobe_post_handler(struct pt_regs *regs) } reset_current_kprobe(); out: - preempt_enable_no_resched(); + preempt_enable(); /* * if somebody else is singlestepping across a probe point, msr @@ -525,7 +459,7 @@ int kprobe_fault_handler(struct pt_regs *regs, int trapnr) restore_previous_kprobe(kcb); else reset_current_kprobe(); - preempt_enable_no_resched(); + preempt_enable(); break; case KPROBE_HIT_ACTIVE: case KPROBE_HIT_SSDONE: @@ -550,19 +484,9 @@ int kprobe_fault_handler(struct pt_regs *regs, int trapnr) } NOKPROBE_SYMBOL(kprobe_fault_handler); -static struct kprobe trampoline_p = { - .addr = (kprobe_opcode_t *) &__kretprobe_trampoline, - .pre_handler = trampoline_probe_handler -}; - -int __init arch_init_kprobes(void) -{ - return register_kprobe(&trampoline_p); -} - int arch_trampoline_kprobe(struct kprobe *p) { - if (p->addr == (kprobe_opcode_t *)&__kretprobe_trampoline) + if (p->addr == (kprobe_opcode_t *)&arch_rethook_trampoline) return 1; return 0; diff --git a/arch/powerpc/kernel/kvm.c b/arch/powerpc/kernel/kvm.c index 6568823cf306..7209d00a9c25 100644 --- a/arch/powerpc/kernel/kvm.c +++ b/arch/powerpc/kernel/kvm.c @@ -455,7 +455,7 @@ static void __init kvm_check_ins(u32 *inst, u32 features) kvm_patch_ins_lwz(inst, magic_var(dsisr), inst_rt); break; -#ifdef CONFIG_PPC_BOOK3E_MMU +#ifdef CONFIG_PPC_E500 case KVM_INST_MFSPR(SPRN_MAS0): if (features & KVM_MAGIC_FEAT_MAS0_TO_SPRG7) kvm_patch_ins_lwz(inst, magic_var(mas0), inst_rt); @@ -484,7 +484,7 @@ static void __init kvm_check_ins(u32 *inst, u32 features) if (features & KVM_MAGIC_FEAT_MAS0_TO_SPRG7) kvm_patch_ins_lwz(inst, magic_var(mas7_3), inst_rt); break; -#endif /* CONFIG_PPC_BOOK3E_MMU */ +#endif /* CONFIG_PPC_E500 */ case KVM_INST_MFSPR(SPRN_SPRG4): #ifdef CONFIG_BOOKE @@ -557,7 +557,7 @@ static void __init kvm_check_ins(u32 *inst, u32 features) case KVM_INST_MTSPR(SPRN_DSISR): kvm_patch_ins_stw(inst, magic_var(dsisr), inst_rt); break; -#ifdef CONFIG_PPC_BOOK3E_MMU +#ifdef CONFIG_PPC_E500 case KVM_INST_MTSPR(SPRN_MAS0): if (features & KVM_MAGIC_FEAT_MAS0_TO_SPRG7) kvm_patch_ins_stw(inst, magic_var(mas0), inst_rt); @@ -586,7 +586,7 @@ static void __init kvm_check_ins(u32 *inst, u32 features) if (features & KVM_MAGIC_FEAT_MAS0_TO_SPRG7) kvm_patch_ins_stw(inst, magic_var(mas7_3), inst_rt); break; -#endif /* CONFIG_PPC_BOOK3E_MMU */ +#endif /* CONFIG_PPC_E500 */ case KVM_INST_MTSPR(SPRN_SPRG4): if (features & KVM_MAGIC_FEAT_MAS0_TO_SPRG7) @@ -632,19 +632,19 @@ static void __init kvm_check_ins(u32 *inst, u32 features) #endif } - switch (inst_no_rt & ~KVM_MASK_RB) { #ifdef CONFIG_PPC_BOOK3S_32 + switch (inst_no_rt & ~KVM_MASK_RB) { case KVM_INST_MTSRIN: if (features & KVM_MAGIC_FEAT_SR) { u32 inst_rb = _inst & KVM_MASK_RB; kvm_patch_ins_mtsrin(inst, inst_rt, inst_rb); } break; -#endif } +#endif - switch (_inst) { #ifdef CONFIG_BOOKE + switch (_inst) { case KVM_INST_WRTEEI_0: kvm_patch_ins_wrteei_0(inst); break; @@ -652,8 +652,8 @@ static void __init kvm_check_ins(u32 *inst, u32 features) case KVM_INST_WRTEEI_1: kvm_patch_ins_wrtee(inst, 0, 1); break; -#endif } +#endif } extern u32 kvm_template_start[]; diff --git a/arch/powerpc/kernel/legacy_serial.c b/arch/powerpc/kernel/legacy_serial.c index 5c58460b269a..ae1906bfe8a5 100644 --- a/arch/powerpc/kernel/legacy_serial.c +++ b/arch/powerpc/kernel/legacy_serial.c @@ -5,8 +5,8 @@ #include <linux/serial_core.h> #include <linux/console.h> #include <linux/pci.h> +#include <linux/of.h> #include <linux/of_address.h> -#include <linux/of_device.h> #include <linux/of_irq.h> #include <linux/serial_reg.h> #include <asm/io.h> @@ -54,9 +54,10 @@ static int legacy_serial_console = -1; static const upf_t legacy_port_flags = UPF_BOOT_AUTOCONF | UPF_SKIP_TEST | UPF_SHARE_IRQ | UPF_FIXED_PORT; -static unsigned int tsi_serial_in(struct uart_port *p, int offset) +static u32 tsi_serial_in(struct uart_port *p, unsigned int offset) { - unsigned int tmp; + u32 tmp; + offset = offset << p->regshift; if (offset == UART_IIR) { tmp = readl(p->membase + (UART_IIR & ~3)); @@ -65,7 +66,7 @@ static unsigned int tsi_serial_in(struct uart_port *p, int offset) return readb(p->membase + offset); } -static void tsi_serial_out(struct uart_port *p, int offset, int value) +static void tsi_serial_out(struct uart_port *p, unsigned int offset, u32 value) { offset = offset << p->regshift; if (!((offset == UART_IER) && (value & UART_IER_UUE))) @@ -77,6 +78,8 @@ static int __init add_legacy_port(struct device_node *np, int want_index, phys_addr_t taddr, unsigned long irq, upf_t flags, int irq_check_parent) { + struct plat_serial8250_port *legacy_port; + struct legacy_serial_info *legacy_info; const __be32 *clk, *spd, *rs; u32 clock = BASE_BAUD * 16; u32 shift = 0; @@ -110,16 +113,17 @@ static int __init add_legacy_port(struct device_node *np, int want_index, if (index >= legacy_serial_count) legacy_serial_count = index + 1; + legacy_port = &legacy_serial_ports[index]; + legacy_info = &legacy_serial_infos[index]; + /* Check if there is a port who already claimed our slot */ - if (legacy_serial_infos[index].np != NULL) { + if (legacy_info->np != NULL) { /* if we still have some room, move it, else override */ if (legacy_serial_count < MAX_LEGACY_SERIAL_PORTS) { printk(KERN_DEBUG "Moved legacy port %d -> %d\n", index, legacy_serial_count); - legacy_serial_ports[legacy_serial_count] = - legacy_serial_ports[index]; - legacy_serial_infos[legacy_serial_count] = - legacy_serial_infos[index]; + legacy_serial_ports[legacy_serial_count] = *legacy_port; + legacy_serial_infos[legacy_serial_count] = *legacy_info; legacy_serial_count++; } else { printk(KERN_DEBUG "Replacing legacy port %d\n", index); @@ -127,36 +131,32 @@ static int __init add_legacy_port(struct device_node *np, int want_index, } /* Now fill the entry */ - memset(&legacy_serial_ports[index], 0, - sizeof(struct plat_serial8250_port)); + memset(legacy_port, 0, sizeof(*legacy_port)); if (iotype == UPIO_PORT) - legacy_serial_ports[index].iobase = base; + legacy_port->iobase = base; else - legacy_serial_ports[index].mapbase = base; - - legacy_serial_ports[index].iotype = iotype; - legacy_serial_ports[index].uartclk = clock; - legacy_serial_ports[index].irq = irq; - legacy_serial_ports[index].flags = flags; - legacy_serial_ports[index].regshift = shift; - legacy_serial_infos[index].taddr = taddr; - legacy_serial_infos[index].np = of_node_get(np); - legacy_serial_infos[index].clock = clock; - legacy_serial_infos[index].speed = spd ? be32_to_cpup(spd) : 0; - legacy_serial_infos[index].irq_check_parent = irq_check_parent; + legacy_port->mapbase = base; + + legacy_port->iotype = iotype; + legacy_port->uartclk = clock; + legacy_port->irq = irq; + legacy_port->flags = flags; + legacy_port->regshift = shift; + legacy_info->taddr = taddr; + legacy_info->np = of_node_get(np); + legacy_info->clock = clock; + legacy_info->speed = spd ? be32_to_cpup(spd) : 0; + legacy_info->irq_check_parent = irq_check_parent; if (iotype == UPIO_TSI) { - legacy_serial_ports[index].serial_in = tsi_serial_in; - legacy_serial_ports[index].serial_out = tsi_serial_out; + legacy_port->serial_in = tsi_serial_in; + legacy_port->serial_out = tsi_serial_out; } - printk(KERN_DEBUG "Found legacy serial port %d for %pOF\n", - index, np); - printk(KERN_DEBUG " %s=%llx, taddr=%llx, irq=%lx, clk=%d, speed=%d\n", + printk(KERN_DEBUG "Found legacy serial port %d for %pOF\n", index, np); + printk(KERN_DEBUG " %s=%pa, taddr=%pa, irq=%lx, clk=%d, speed=%d\n", (iotype == UPIO_PORT) ? "port" : "mem", - (unsigned long long)base, (unsigned long long)taddr, irq, - legacy_serial_ports[index].uartclk, - legacy_serial_infos[index].speed); + &base, &taddr, irq, legacy_port->uartclk, legacy_info->speed); return index; } @@ -171,15 +171,15 @@ static int __init add_legacy_soc_port(struct device_node *np, /* We only support ports that have a clock frequency properly * encoded in the device-tree. */ - if (of_get_property(np, "clock-frequency", NULL) == NULL) + if (!of_property_present(np, "clock-frequency")) return -1; /* if reg-offset don't try to use it */ - if ((of_get_property(np, "reg-offset", NULL) != NULL)) + if (of_property_present(np, "reg-offset")) return -1; /* if rtas uses this device, don't try to use it as well */ - if (of_get_property(np, "used-by-rtas", NULL) != NULL) + if (of_property_read_bool(np, "used-by-rtas")) return -1; /* Get the address */ @@ -237,7 +237,7 @@ static int __init add_legacy_isa_port(struct device_node *np, * Note: Don't even try on P8 lpc, we know it's not directly mapped */ if (!of_device_is_compatible(isa_brg, "ibm,power8-lpc") || - of_get_property(isa_brg, "ranges", NULL)) { + of_property_present(isa_brg, "ranges")) { taddr = of_translate_address(np, reg); if (taddr == OF_BAD_ADDR) taddr = 0; @@ -268,7 +268,7 @@ static int __init add_legacy_pci_port(struct device_node *np, * compatible UARTs on PCI need all sort of quirks (port offsets * etc...) that this code doesn't know about */ - if (of_get_property(np, "clock-frequency", NULL) == NULL) + if (!of_property_present(np, "clock-frequency")) return -1; /* Get the PCI address. Assume BAR 0 */ @@ -471,6 +471,8 @@ void __init find_legacy_serial_ports(void) } #endif + of_node_put(stdout); + DBG("legacy_serial_console = %d\n", legacy_serial_console); if (legacy_serial_console >= 0) setup_legacy_serial_console(legacy_serial_console); @@ -506,12 +508,16 @@ static void __init fixup_port_irq(int index, port->irq = virq; -#ifdef CONFIG_SERIAL_8250_FSL - if (of_device_is_compatible(np, "fsl,ns16550")) { - port->handle_irq = fsl8250_handle_irq; - port->has_sysrq = IS_ENABLED(CONFIG_SERIAL_8250_CONSOLE); + if (IS_ENABLED(CONFIG_SERIAL_8250) && + of_device_is_compatible(np, "fsl,ns16550")) { + if (IS_REACHABLE(CONFIG_SERIAL_8250_FSL)) { + port->handle_irq = fsl8250_handle_irq; + port->has_sysrq = IS_ENABLED(CONFIG_SERIAL_8250_CONSOLE); + } else { + pr_warn_once("Not activating Freescale specific workaround for device %pOFP\n", + np); + } } -#endif } static void __init fixup_port_pio(int index, diff --git a/arch/powerpc/kernel/mce.c b/arch/powerpc/kernel/mce.c index 18173199b79d..219f28637a3e 100644 --- a/arch/powerpc/kernel/mce.c +++ b/arch/powerpc/kernel/mce.c @@ -131,6 +131,13 @@ void save_mce_event(struct pt_regs *regs, long handled, if (mce->error_type == MCE_ERROR_TYPE_UE) mce->u.ue_error.ignore_event = mce_err->ignore_event; + /* + * Raise irq work, So that we don't miss to log the error for + * unrecoverable errors. + */ + if (mce->disposition == MCE_DISPOSITION_NOT_RECOVERED) + mce_irq_work_queue(); + if (!addr) return; @@ -233,9 +240,6 @@ static void machine_check_ue_event(struct machine_check_event *evt) } memcpy(&local_paca->mce_info->mce_ue_event_queue[index], evt, sizeof(*evt)); - - /* Queue work to process this event later. */ - mce_irq_work_queue(); } /* @@ -756,7 +760,7 @@ void __init mce_init(void) mce_info = memblock_alloc_try_nid(sizeof(*mce_info), __alignof__(*mce_info), MEMBLOCK_LOW_LIMIT, - limit, cpu_to_node(i)); + limit, early_cpu_to_node(i)); if (!mce_info) goto err; paca_ptrs[i]->mce_info = mce_info; diff --git a/arch/powerpc/kernel/misc.S b/arch/powerpc/kernel/misc.S index fb7de3543c03..29e1440d14cc 100644 --- a/arch/powerpc/kernel/misc.S +++ b/arch/powerpc/kernel/misc.S @@ -10,11 +10,11 @@ * * setjmp/longjmp code by Paul Mackerras. */ +#include <linux/export.h> #include <asm/ppc_asm.h> #include <asm/unistd.h> #include <asm/asm-compat.h> #include <asm/asm-offsets.h> -#include <asm/export.h> .text diff --git a/arch/powerpc/kernel/misc_32.S b/arch/powerpc/kernel/misc_32.S index e5127b19fec2..acb727f54e9d 100644 --- a/arch/powerpc/kernel/misc_32.S +++ b/arch/powerpc/kernel/misc_32.S @@ -8,6 +8,7 @@ * */ +#include <linux/export.h> #include <linux/sys.h> #include <asm/unistd.h> #include <asm/errno.h> @@ -22,38 +23,11 @@ #include <asm/processor.h> #include <asm/bug.h> #include <asm/ptrace.h> -#include <asm/export.h> #include <asm/feature-fixups.h> .text /* - * This returns the high 64 bits of the product of two 64-bit numbers. - */ -_GLOBAL(mulhdu) - cmpwi r6,0 - cmpwi cr1,r3,0 - mr r10,r4 - mulhwu r4,r4,r5 - beq 1f - mulhwu r0,r10,r6 - mullw r7,r10,r5 - addc r7,r0,r7 - addze r4,r4 -1: beqlr cr1 /* all done if high part of A is 0 */ - mullw r9,r3,r5 - mulhwu r10,r3,r5 - beq 2f - mullw r0,r3,r6 - mulhwu r8,r3,r6 - addc r7,r0,r7 - adde r4,r4,r8 - addze r10,r10 -2: addc r4,r4,r9 - addze r3,r10 - blr - -/* * reloc_got2 runs through the .got2 section adding an offset * to each entry. */ @@ -176,46 +150,6 @@ _GLOBAL(low_choose_7447a_dfs) #endif /* CONFIG_CPU_FREQ_PMAC && CONFIG_PPC_BOOK3S_32 */ -#ifdef CONFIG_40x - -/* - * Do an IO access in real mode - */ -_GLOBAL(real_readb) - mfmsr r7 - rlwinm r0,r7,0,~MSR_DR - sync - mtmsr r0 - sync - isync - lbz r3,0(r3) - sync - mtmsr r7 - sync - isync - blr -_ASM_NOKPROBE_SYMBOL(real_readb) - - /* - * Do an IO access in real mode - */ -_GLOBAL(real_writeb) - mfmsr r7 - rlwinm r0,r7,0,~MSR_DR - sync - mtmsr r0 - sync - isync - stb r3,0(r4) - sync - mtmsr r7 - sync - isync - blr -_ASM_NOKPROBE_SYMBOL(real_writeb) - -#endif /* CONFIG_40x */ - /* * Copy a whole page. We use the dcbz instruction on the destination * to reduce memory traffic (it eliminates the unnecessary reads of @@ -382,7 +316,7 @@ EXPORT_SYMBOL(__bswapdi2) _GLOBAL(start_secondary_resume) /* Reset stack */ rlwinm r1, r1, 0, 0, 31 - THREAD_SHIFT - addi r1,r1,THREAD_SIZE-STACK_FRAME_OVERHEAD + addi r1,r1,THREAD_SIZE-STACK_FRAME_MIN_SIZE li r3,0 stw r3,0(r1) /* Zero the stack frame pointer */ bl start_secondary diff --git a/arch/powerpc/kernel/misc_64.S b/arch/powerpc/kernel/misc_64.S index fd6d8d3a548e..a997c7f43dc0 100644 --- a/arch/powerpc/kernel/misc_64.S +++ b/arch/powerpc/kernel/misc_64.S @@ -9,6 +9,8 @@ * PPC64 updates by Dave Engebretsen (engebret@us.ibm.com) */ +#include <linux/export.h> +#include <linux/linkage.h> #include <linux/sys.h> #include <asm/unistd.h> #include <asm/errno.h> @@ -22,7 +24,6 @@ #include <asm/kexec.h> #include <asm/ptrace.h> #include <asm/mmu.h> -#include <asm/export.h> #include <asm/feature-fixups.h> .text @@ -73,7 +74,7 @@ _GLOBAL(rmci_off) blr #endif /* CONFIG_PPC_EARLY_DEBUG_BOOTX */ -#if defined(CONFIG_PPC_PMAC) || defined(CONFIG_PPC_MAPLE) +#ifdef CONFIG_PPC_PMAC /* * Do an IO access in real mode @@ -136,7 +137,7 @@ _GLOBAL(real_writeb) sync isync blr -#endif /* defined(CONFIG_PPC_PMAC) || defined(CONFIG_PPC_MAPLE) */ +#endif // CONFIG_PPC_PMAC #ifdef CONFIG_PPC_PASEMI @@ -173,7 +174,7 @@ _GLOBAL(real_205_writeb) #endif /* CONFIG_PPC_PASEMI */ -#if defined(CONFIG_CPU_FREQ_PMAC64) || defined(CONFIG_CPU_FREQ_MAPLE) +#ifdef CONFIG_CPU_FREQ_PMAC64 /* * SCOM access functions for 970 (FX only for now) * @@ -191,7 +192,7 @@ _GLOBAL(scom970_read) xori r0,r0,MSR_EE mtmsrd r0,1 - /* rotate 24 bits SCOM address 8 bits left and mask out it's low 8 bits + /* rotate 24 bits SCOM address 8 bits left and mask out its low 8 bits * (including parity). On current CPUs they must be 0'd, * and finally or in RW bit */ @@ -225,7 +226,7 @@ _GLOBAL(scom970_write) xori r0,r0,MSR_EE mtmsrd r0,1 - /* rotate 24 bits SCOM address 8 bits left and mask out it's low 8 bits + /* rotate 24 bits SCOM address 8 bits left and mask out its low 8 bits * (including parity). On current CPUs they must be 0'd. */ @@ -242,7 +243,7 @@ _GLOBAL(scom970_write) /* restore interrupts */ mtmsrd r5,1 blr -#endif /* CONFIG_CPU_FREQ_PMAC64 || CONFIG_CPU_FREQ_MAPLE */ +#endif // CONFIG_CPU_FREQ_PMAC64 /* kexec_wait(phys_cpu) * @@ -286,7 +287,7 @@ kexec_flag: #ifdef CONFIG_KEXEC_CORE -#ifdef CONFIG_PPC_BOOK3E +#ifdef CONFIG_PPC_BOOK3E_64 /* * BOOK3E has no real MMU mode, so we have to setup the initial TLB * for a core to identity map v:0 to p:0. This current implementation @@ -353,8 +354,8 @@ _GLOBAL(kexec_smp_wait) * * don't overwrite r3 here, it is live for kexec_wait above. */ -real_mode: /* assume normal blr return */ -#ifdef CONFIG_PPC_BOOK3E +SYM_FUNC_START_LOCAL(real_mode) /* assume normal blr return */ +#ifdef CONFIG_PPC_BOOK3E_64 /* Create an identity mapping. */ b kexec_create_tlb #else @@ -370,6 +371,7 @@ real_mode: /* assume normal blr return */ mtspr SPRN_SRR0,r11 rfid #endif +SYM_FUNC_END(real_mode) /* * kexec_sequence(newstack, start, image, control, clear_all(), @@ -384,7 +386,7 @@ _GLOBAL(kexec_sequence) std r0,16(r1) /* switch stacks to newstack -- &kexec_stack.stack */ - stdu r1,THREAD_SIZE-STACK_FRAME_OVERHEAD(r3) + stdu r1,THREAD_SIZE-STACK_FRAME_MIN_SIZE(r3) mr r1,r3 li r0,0 @@ -401,7 +403,7 @@ _GLOBAL(kexec_sequence) std r26,-48(r1) std r25,-56(r1) - stdu r1,-STACK_FRAME_OVERHEAD-64(r1) + stdu r1,-STACK_FRAME_MIN_SIZE-64(r1) /* save args into preserved regs */ mr r31,r3 /* newstack (both) */ @@ -413,7 +415,7 @@ _GLOBAL(kexec_sequence) lhz r25,PACAHWCPUID(r13) /* get our phys cpu from paca */ /* disable interrupts, we are overwriting kernel data next */ -#ifdef CONFIG_PPC_BOOK3E +#ifdef CONFIG_PPC_BOOK3E_64 wrteei 0 #else mfmsr r3 @@ -430,7 +432,7 @@ _GLOBAL(kexec_sequence) 1: /* copy dest pages, flush whole dest image */ mr r3,r29 - bl kexec_copy_flush /* (image) */ + bl CFUNC(kexec_copy_flush) /* (image) */ /* turn off mmu now if not done earlier */ cmpdi r26,0 diff --git a/arch/powerpc/kernel/module.c b/arch/powerpc/kernel/module.c index f6d6ae0a1692..baeb24c102c8 100644 --- a/arch/powerpc/kernel/module.c +++ b/arch/powerpc/kernel/module.c @@ -7,7 +7,6 @@ #include <linux/elf.h> #include <linux/moduleloader.h> #include <linux/err.h> -#include <linux/vmalloc.h> #include <linux/mm.h> #include <linux/bug.h> #include <asm/module.h> @@ -17,8 +16,6 @@ #include <asm/setup.h> #include <asm/sections.h> -static LIST_HEAD(module_bug_list); - static const Elf_Shdr *find_section(const Elf_Ehdr *hdr, const Elf_Shdr *sechdrs, const char *name) @@ -88,40 +85,3 @@ int module_finalize(const Elf_Ehdr *hdr, return 0; } - -static __always_inline void * -__module_alloc(unsigned long size, unsigned long start, unsigned long end, bool nowarn) -{ - pgprot_t prot = strict_module_rwx_enabled() ? PAGE_KERNEL : PAGE_KERNEL_EXEC; - gfp_t gfp = GFP_KERNEL | (nowarn ? __GFP_NOWARN : 0); - - /* - * Don't do huge page allocations for modules yet until more testing - * is done. STRICT_MODULE_RWX may require extra work to support this - * too. - */ - return __vmalloc_node_range(size, 1, start, end, gfp, prot, - VM_FLUSH_RESET_PERMS, - NUMA_NO_NODE, __builtin_return_address(0)); -} - -void *module_alloc(unsigned long size) -{ -#ifdef MODULES_VADDR - unsigned long limit = (unsigned long)_etext - SZ_32M; - void *ptr = NULL; - - BUILD_BUG_ON(TASK_SIZE > MODULES_VADDR); - - /* First try within 32M limit from _etext to avoid branch trampolines */ - if (MODULES_VADDR < PAGE_OFFSET && MODULES_END > limit) - ptr = __module_alloc(size, limit, MODULES_END, true); - - if (!ptr) - ptr = __module_alloc(size, MODULES_VADDR, MODULES_END, false); - - return ptr; -#else - return __module_alloc(size, VMALLOC_START, VMALLOC_END, false); -#endif -} diff --git a/arch/powerpc/kernel/module_32.c b/arch/powerpc/kernel/module_32.c index ea6536171778..f930e3395a7f 100644 --- a/arch/powerpc/kernel/module_32.c +++ b/arch/powerpc/kernel/module_32.c @@ -18,7 +18,7 @@ #include <linux/bug.h> #include <linux/sort.h> #include <asm/setup.h> -#include <asm/code-patching.h> +#include <asm/text-patching.h> /* Count how many different relocations (different symbol, different addend) */ @@ -163,8 +163,7 @@ static uint32_t do_plt_call(void *location, pr_debug("Doing plt for call to 0x%x at 0x%x\n", val, (unsigned int)location); /* Init, or core PLT? */ - if (location >= mod->core_layout.base - && location < mod->core_layout.base + mod->core_layout.size) + if (within_module_core((unsigned long)location, mod)) entry = (void *)sechdrs[mod->arch.core_plt_section].sh_addr; else entry = (void *)sechdrs[mod->arch.init_plt_section].sh_addr; @@ -322,14 +321,14 @@ notrace int module_trampoline_target(struct module *mod, unsigned long addr, int module_finalize_ftrace(struct module *module, const Elf_Shdr *sechdrs) { - module->arch.tramp = do_plt_call(module->core_layout.base, + module->arch.tramp = do_plt_call(module->mem[MOD_TEXT].base, (unsigned long)ftrace_caller, sechdrs, module); if (!module->arch.tramp) return -ENOENT; #ifdef CONFIG_DYNAMIC_FTRACE_WITH_REGS - module->arch.tramp_regs = do_plt_call(module->core_layout.base, + module->arch.tramp_regs = do_plt_call(module->mem[MOD_TEXT].base, (unsigned long)ftrace_regs_caller, sechdrs, module); if (!module->arch.tramp_regs) diff --git a/arch/powerpc/kernel/module_64.c b/arch/powerpc/kernel/module_64.c index 7e45dc98df8a..126bf3b06ab7 100644 --- a/arch/powerpc/kernel/module_64.c +++ b/arch/powerpc/kernel/module_64.c @@ -17,7 +17,7 @@ #include <linux/kernel.h> #include <asm/module.h> #include <asm/firmware.h> -#include <asm/code-patching.h> +#include <asm/text-patching.h> #include <linux/sort.h> #include <asm/setup.h> #include <asm/sections.h> @@ -31,6 +31,16 @@ this, and makes other things simpler. Anton? --RR. */ +bool module_elf_check_arch(Elf_Ehdr *hdr) +{ + unsigned long abi_level = hdr->e_flags & 0x3; + + if (IS_ENABLED(CONFIG_PPC64_ELF_ABI_V2)) + return abi_level == 2; + else + return abi_level < 2; +} + #ifdef CONFIG_PPC64_ELF_ABI_V2 static func_desc_t func_desc(unsigned long addr) @@ -91,32 +101,45 @@ static unsigned long stub_func_addr(func_desc_t func) /* Like PPC32, we need little trampolines to do > 24-bit jumps (into the kernel itself). But on PPC64, these need to be used for every jump, actually, to reset r2 (TOC+0x8000). */ -struct ppc64_stub_entry -{ - /* 28 byte jump instruction sequence (7 instructions). We only - * need 6 instructions on ABIv2 but we always allocate 7 so - * so we don't have to modify the trampoline load instruction. */ +struct ppc64_stub_entry { + /* + * 28 byte jump instruction sequence (7 instructions) that can + * hold ppc64_stub_insns or stub_insns. Must be 8-byte aligned + * with PCREL kernels that use prefix instructions in the stub. + */ u32 jump[7]; /* Used by ftrace to identify stubs */ u32 magic; /* Data for the above code */ func_desc_t funcdata; +} __aligned(8); + +struct ppc64_got_entry { + u64 addr; }; /* * PPC64 uses 24 bit jumps, but we need to jump into other modules or * the kernel which may be further. So we jump to a stub. * - * For ELFv1 we need to use this to set up the new r2 value (aka TOC - * pointer). For ELFv2 it's the callee's responsibility to set up the - * new r2, but for both we need to save the old r2. + * Target address and TOC are loaded from function descriptor in the + * ppc64_stub_entry. + * + * r12 is used to generate the target address, which is required for the + * ELFv2 global entry point calling convention. * - * We could simply patch the new r2 value and function pointer into - * the stub, but it's significantly shorter to put these values at the - * end of the stub code, and patch the stub address (32-bits relative - * to the TOC ptr, r2) into the stub. + * TOC handling: + * - PCREL does not have a TOC. + * - ELFv2 non-PCREL just has to save r2, the callee is responsible for + * setting its own TOC pointer at the global entry address. + * - ELFv1 must load the new TOC pointer from the function descriptor. */ static u32 ppc64_stub_insns[] = { +#ifdef CONFIG_PPC_KERNEL_PCREL + /* pld r12,addr */ + PPC_PREFIX_8LS | __PPC_PRFX_R(1), + PPC_INST_PLD | ___PPC_RT(_R12), +#else PPC_RAW_ADDIS(_R11, _R2, 0), PPC_RAW_ADDI(_R11, _R11, 0), /* Save current r2 value in magic place on the stack. */ @@ -126,13 +149,17 @@ static u32 ppc64_stub_insns[] = { /* Set up new r2 from function descriptor */ PPC_RAW_LD(_R2, _R11, 40), #endif +#endif PPC_RAW_MTCTR(_R12), PPC_RAW_BCTR(), }; -/* Count how many different 24-bit relocations (different symbol, - different addend) */ -static unsigned int count_relocs(const Elf64_Rela *rela, unsigned int num) +/* + * Count how many different r_type relocations (different symbol, + * different addend). + */ +static unsigned int count_relocs(const Elf64_Rela *rela, unsigned int num, + unsigned long r_type) { unsigned int i, r_info, r_addend, _count_relocs; @@ -141,8 +168,8 @@ static unsigned int count_relocs(const Elf64_Rela *rela, unsigned int num) r_info = 0; r_addend = 0; for (i = 0; i < num; i++) - /* Only count 24-bit relocs, others don't need stubs */ - if (ELF64_R_TYPE(rela[i].r_info) == R_PPC_REL24 && + /* Only count r_type relocs, others don't need stubs */ + if (ELF64_R_TYPE(rela[i].r_info) == r_type && (r_info != ELF64_R_SYM(rela[i].r_info) || r_addend != rela[i].r_addend)) { _count_relocs++; @@ -178,7 +205,9 @@ static int relacmp(const void *_x, const void *_y) /* Get size of potential trampolines required. */ static unsigned long get_stubs_size(const Elf64_Ehdr *hdr, - const Elf64_Shdr *sechdrs) + const Elf64_Shdr *sechdrs, + char *secstrings, + struct module *me) { /* One extra reloc so it's always 0-addr terminated */ unsigned long relocs = 1; @@ -203,23 +232,127 @@ static unsigned long get_stubs_size(const Elf64_Ehdr *hdr, relocs += count_relocs((void *)sechdrs[i].sh_addr, sechdrs[i].sh_size - / sizeof(Elf64_Rela)); + / sizeof(Elf64_Rela), + R_PPC_REL24); +#ifdef CONFIG_PPC_KERNEL_PCREL + relocs += count_relocs((void *)sechdrs[i].sh_addr, + sechdrs[i].sh_size + / sizeof(Elf64_Rela), + R_PPC64_REL24_NOTOC); +#endif } } -#ifdef CONFIG_DYNAMIC_FTRACE - /* make the trampoline to the ftrace_caller */ - relocs++; -#ifdef CONFIG_DYNAMIC_FTRACE_WITH_REGS - /* an additional one for ftrace_regs_caller */ - relocs++; -#endif + /* stubs for ftrace_caller and ftrace_regs_caller */ + relocs += IS_ENABLED(CONFIG_DYNAMIC_FTRACE) + IS_ENABLED(CONFIG_DYNAMIC_FTRACE_WITH_REGS); + +#ifdef CONFIG_PPC_FTRACE_OUT_OF_LINE + /* stubs for the function tracer */ + for (i = 1; i < hdr->e_shnum; i++) { + if (!strcmp(secstrings + sechdrs[i].sh_name, "__patchable_function_entries")) { + me->arch.ool_stub_count = sechdrs[i].sh_size / sizeof(unsigned long); + me->arch.ool_stub_index = 0; + relocs += roundup(me->arch.ool_stub_count * sizeof(struct ftrace_ool_stub), + sizeof(struct ppc64_stub_entry)) / + sizeof(struct ppc64_stub_entry); + break; + } + } #endif pr_debug("Looks like a total of %lu stubs, max\n", relocs); return relocs * sizeof(struct ppc64_stub_entry); } +#ifdef CONFIG_PPC_KERNEL_PCREL +static int count_pcpu_relocs(const Elf64_Shdr *sechdrs, + const Elf64_Rela *rela, unsigned int num, + unsigned int symindex, unsigned int pcpu) +{ + unsigned int i, r_info, r_addend, _count_relocs; + + _count_relocs = 0; + r_info = 0; + r_addend = 0; + + for (i = 0; i < num; i++) { + Elf64_Sym *sym; + + /* This is the symbol it is referring to */ + sym = (Elf64_Sym *)sechdrs[symindex].sh_addr + + ELF64_R_SYM(rela[i].r_info); + + if (sym->st_shndx == pcpu && + (r_info != ELF64_R_SYM(rela[i].r_info) || + r_addend != rela[i].r_addend)) { + _count_relocs++; + r_info = ELF64_R_SYM(rela[i].r_info); + r_addend = rela[i].r_addend; + } + } + + return _count_relocs; +} + +/* Get size of potential GOT required. */ +static unsigned long get_got_size(const Elf64_Ehdr *hdr, + const Elf64_Shdr *sechdrs, + struct module *me) +{ + /* One extra reloc so it's always 0-addr terminated */ + unsigned long relocs = 1; + unsigned int i, symindex = 0; + + for (i = 1; i < hdr->e_shnum; i++) { + if (sechdrs[i].sh_type == SHT_SYMTAB) { + symindex = i; + break; + } + } + WARN_ON_ONCE(!symindex); + + /* Every relocated section... */ + for (i = 1; i < hdr->e_shnum; i++) { + if (sechdrs[i].sh_type == SHT_RELA) { + pr_debug("Found relocations in section %u\n", i); + pr_debug("Ptr: %p. Number: %llu\n", (void *)sechdrs[i].sh_addr, + sechdrs[i].sh_size / sizeof(Elf64_Rela)); + + /* + * Sort the relocation information based on a symbol and + * addend key. This is a stable O(n*log n) complexity + * algorithm but it will reduce the complexity of + * count_relocs() to linear complexity O(n) + */ + sort((void *)sechdrs[i].sh_addr, + sechdrs[i].sh_size / sizeof(Elf64_Rela), + sizeof(Elf64_Rela), relacmp, NULL); + + relocs += count_relocs((void *)sechdrs[i].sh_addr, + sechdrs[i].sh_size + / sizeof(Elf64_Rela), + R_PPC64_GOT_PCREL34); + + /* + * Percpu data access typically gets linked with + * REL34 relocations, but the percpu section gets + * moved at load time and requires that to be + * converted to GOT linkage. + */ + if (IS_ENABLED(CONFIG_SMP) && symindex) + relocs += count_pcpu_relocs(sechdrs, + (void *)sechdrs[i].sh_addr, + sechdrs[i].sh_size + / sizeof(Elf64_Rela), + symindex, me->arch.pcpu_section); + } + } + + pr_debug("Looks like a total of %lu GOT entries, max\n", relocs); + return relocs * sizeof(struct ppc64_got_entry); +} +#else /* CONFIG_PPC_KERNEL_PCREL */ + /* Still needed for ELFv2, for .TOC. */ static void dedotify_versions(struct modversion_info *vers, unsigned long size) @@ -232,6 +365,24 @@ static void dedotify_versions(struct modversion_info *vers, } } +/* Same as normal versions, remove a leading dot if present. */ +static void dedotify_ext_version_names(char *str_seq, unsigned long size) +{ + unsigned long out = 0; + unsigned long in; + char last = '\0'; + + for (in = 0; in < size; in++) { + /* Skip one leading dot */ + if (last == '\0' && str_seq[in] == '.') + in++; + last = str_seq[in]; + str_seq[out++] = last; + } + /* Zero the trailing portion of the names table for robustness */ + memset(&str_seq[out], 0, size - out); +} + /* * Undefined symbols which refer to .funcname, hack to funcname. Make .TOC. * seem to be defined (value set later). @@ -269,6 +420,7 @@ static Elf64_Sym *find_dot_toc(Elf64_Shdr *sechdrs, } return NULL; } +#endif /* CONFIG_PPC_KERNEL_PCREL */ bool module_init_section(const char *name) { @@ -287,20 +439,32 @@ int module_frob_arch_sections(Elf64_Ehdr *hdr, for (i = 1; i < hdr->e_shnum; i++) { if (strcmp(secstrings + sechdrs[i].sh_name, ".stubs") == 0) me->arch.stubs_section = i; +#ifdef CONFIG_PPC_KERNEL_PCREL + else if (strcmp(secstrings + sechdrs[i].sh_name, ".data..percpu") == 0) + me->arch.pcpu_section = i; + else if (strcmp(secstrings + sechdrs[i].sh_name, ".mygot") == 0) { + me->arch.got_section = i; + if (sechdrs[i].sh_addralign < 8) + sechdrs[i].sh_addralign = 8; + } +#else else if (strcmp(secstrings + sechdrs[i].sh_name, ".toc") == 0) { me->arch.toc_section = i; if (sechdrs[i].sh_addralign < 8) sechdrs[i].sh_addralign = 8; - } - else if (strcmp(secstrings+sechdrs[i].sh_name,"__versions")==0) + } else if (strcmp(secstrings + sechdrs[i].sh_name, "__versions") == 0) dedotify_versions((void *)hdr + sechdrs[i].sh_offset, sechdrs[i].sh_size); + else if (strcmp(secstrings + sechdrs[i].sh_name, "__version_ext_names") == 0) + dedotify_ext_version_names((void *)hdr + sechdrs[i].sh_offset, + sechdrs[i].sh_size); if (sechdrs[i].sh_type == SHT_SYMTAB) dedotify((void *)hdr + sechdrs[i].sh_offset, sechdrs[i].sh_size / sizeof(Elf64_Sym), (void *)hdr + sechdrs[sechdrs[i].sh_link].sh_offset); +#endif } if (!me->arch.stubs_section) { @@ -308,26 +472,47 @@ int module_frob_arch_sections(Elf64_Ehdr *hdr, return -ENOEXEC; } +#ifdef CONFIG_PPC_KERNEL_PCREL + if (!me->arch.got_section) { + pr_err("%s: doesn't contain .mygot.\n", me->name); + return -ENOEXEC; + } + + /* Override the got size */ + sechdrs[me->arch.got_section].sh_size = get_got_size(hdr, sechdrs, me); +#else /* If we don't have a .toc, just use .stubs. We need to set r2 to some reasonable value in case the module calls out to other functions via a stub, or if a function pointer escapes the module by some means. */ if (!me->arch.toc_section) me->arch.toc_section = me->arch.stubs_section; +#endif /* Override the stubs size */ - sechdrs[me->arch.stubs_section].sh_size = get_stubs_size(hdr, sechdrs); + sechdrs[me->arch.stubs_section].sh_size = get_stubs_size(hdr, sechdrs, secstrings, me); + return 0; } -#ifdef CONFIG_MPROFILE_KERNEL +#if defined(CONFIG_MPROFILE_KERNEL) || defined(CONFIG_ARCH_USING_PATCHABLE_FUNCTION_ENTRY) static u32 stub_insns[] = { +#ifdef CONFIG_PPC_KERNEL_PCREL + PPC_RAW_LD(_R12, _R13, offsetof(struct paca_struct, kernelbase)), + PPC_RAW_NOP(), /* align the prefix insn */ + /* paddi r12,r12,addr */ + PPC_PREFIX_MLS | __PPC_PRFX_R(0), + PPC_INST_PADDI | ___PPC_RT(_R12) | ___PPC_RA(_R12), + PPC_RAW_MTCTR(_R12), + PPC_RAW_BCTR(), +#else PPC_RAW_LD(_R12, _R13, offsetof(struct paca_struct, kernel_toc)), PPC_RAW_ADDIS(_R12, _R12, 0), PPC_RAW_ADDI(_R12, _R12, 0), PPC_RAW_MTCTR(_R12), PPC_RAW_BCTR(), +#endif }; /* @@ -348,18 +533,37 @@ static inline int create_ftrace_stub(struct ppc64_stub_entry *entry, { long reladdr; - memcpy(entry->jump, stub_insns, sizeof(stub_insns)); - - /* Stub uses address relative to kernel toc (from the paca) */ - reladdr = addr - kernel_toc_addr(); - if (reladdr > 0x7FFFFFFF || reladdr < -(0x80000000L)) { - pr_err("%s: Address of %ps out of range of kernel_toc.\n", - me->name, (void *)addr); + if ((unsigned long)entry->jump % 8 != 0) { + pr_err("%s: Address of stub entry is not 8-byte aligned\n", me->name); return 0; } - entry->jump[1] |= PPC_HA(reladdr); - entry->jump[2] |= PPC_LO(reladdr); + BUILD_BUG_ON(sizeof(stub_insns) > sizeof(entry->jump)); + memcpy(entry->jump, stub_insns, sizeof(stub_insns)); + + if (IS_ENABLED(CONFIG_PPC_KERNEL_PCREL)) { + /* Stub uses address relative to kernel base (from the paca) */ + reladdr = addr - local_paca->kernelbase; + if (reladdr > 0x1FFFFFFFFL || reladdr < -0x200000000L) { + pr_err("%s: Address of %ps out of range of 34-bit relative address.\n", + me->name, (void *)addr); + return 0; + } + + entry->jump[2] |= IMM_H18(reladdr); + entry->jump[3] |= IMM_L(reladdr); + } else { + /* Stub uses address relative to kernel toc (from the paca) */ + reladdr = addr - kernel_toc_addr(); + if (reladdr > 0x7FFFFFFF || reladdr < -(0x80000000L)) { + pr_err("%s: Address of %ps out of range of kernel_toc.\n", + me->name, (void *)addr); + return 0; + } + + entry->jump[1] |= PPC_HA(reladdr); + entry->jump[2] |= PPC_LO(reladdr); + } /* Even though we don't use funcdata in the stub, it's needed elsewhere. */ entry->funcdata = func_desc(addr); @@ -405,7 +609,11 @@ static bool is_mprofile_ftrace_call(const char *name) */ static inline unsigned long my_r2(const Elf64_Shdr *sechdrs, struct module *me) { +#ifndef CONFIG_PPC_KERNEL_PCREL return (sechdrs[me->arch.toc_section].sh_addr & ~0xfful) + 0x8000; +#else + return -1; +#endif } /* Patch stub to reference function and correct r2 value. */ @@ -422,38 +630,62 @@ static inline int create_stub(const Elf64_Shdr *sechdrs, if (is_mprofile_ftrace_call(name)) return create_ftrace_stub(entry, addr, me); + if ((unsigned long)entry->jump % 8 != 0) { + pr_err("%s: Address of stub entry is not 8-byte aligned\n", me->name); + return 0; + } + + BUILD_BUG_ON(sizeof(ppc64_stub_insns) > sizeof(entry->jump)); for (i = 0; i < ARRAY_SIZE(ppc64_stub_insns); i++) { if (patch_instruction(&entry->jump[i], ppc_inst(ppc64_stub_insns[i]))) return 0; } - /* Stub uses address relative to r2. */ - reladdr = (unsigned long)entry - my_r2(sechdrs, me); - if (reladdr > 0x7FFFFFFF || reladdr < -(0x80000000L)) { - pr_err("%s: Address %p of stub out of range of %p.\n", - me->name, (void *)reladdr, (void *)my_r2); - return 0; - } - pr_debug("Stub %p get data from reladdr %li\n", entry, reladdr); + if (IS_ENABLED(CONFIG_PPC_KERNEL_PCREL)) { + /* Stub uses address relative to itself! */ + reladdr = 0 + offsetof(struct ppc64_stub_entry, funcdata); + BUILD_BUG_ON(reladdr != 32); + if (reladdr > 0x1FFFFFFFFL || reladdr < -0x200000000L) { + pr_err("%s: Address of %p out of range of 34-bit relative address.\n", + me->name, (void *)reladdr); + return 0; + } + pr_debug("Stub %p get data from reladdr %li\n", entry, reladdr); - if (patch_instruction(&entry->jump[0], - ppc_inst(entry->jump[0] | PPC_HA(reladdr)))) - return 0; + /* May not even need this if we're relative to 0 */ + if (patch_instruction(&entry->jump[0], + ppc_inst_prefix(entry->jump[0] | IMM_H18(reladdr), + entry->jump[1] | IMM_L(reladdr)))) + return 0; - if (patch_instruction(&entry->jump[1], - ppc_inst(entry->jump[1] | PPC_LO(reladdr)))) - return 0; + } else { + /* Stub uses address relative to r2. */ + reladdr = (unsigned long)entry - my_r2(sechdrs, me); + if (reladdr > 0x7FFFFFFF || reladdr < -(0x80000000L)) { + pr_err("%s: Address %p of stub out of range of %p.\n", + me->name, (void *)reladdr, (void *)my_r2); + return 0; + } + pr_debug("Stub %p get data from reladdr %li\n", entry, reladdr); + + if (patch_instruction(&entry->jump[0], + ppc_inst(entry->jump[0] | PPC_HA(reladdr)))) + return 0; + + if (patch_instruction(&entry->jump[1], + ppc_inst(entry->jump[1] | PPC_LO(reladdr)))) + return 0; + } // func_desc_t is 8 bytes if ABIv2, else 16 bytes desc = func_desc(addr); for (i = 0; i < sizeof(func_desc_t) / sizeof(u32); i++) { - if (patch_instruction(((u32 *)&entry->funcdata) + i, - ppc_inst(((u32 *)(&desc))[i]))) + if (patch_u32(((u32 *)&entry->funcdata) + i, ((u32 *)&desc)[i])) return 0; } - if (patch_instruction(&entry->magic, ppc_inst(STUB_MAGIC))) + if (patch_u32(&entry->magic, STUB_MAGIC)) return 0; return 1; @@ -487,14 +719,49 @@ static unsigned long stub_for_addr(const Elf64_Shdr *sechdrs, return (unsigned long)&stubs[i]; } +#ifdef CONFIG_PPC_KERNEL_PCREL +/* Create GOT to load the location described in this ptr */ +static unsigned long got_for_addr(const Elf64_Shdr *sechdrs, + unsigned long addr, + struct module *me, + const char *name) +{ + struct ppc64_got_entry *got; + unsigned int i, num_got; + + if (!IS_ENABLED(CONFIG_PPC_KERNEL_PCREL)) + return addr; + + num_got = sechdrs[me->arch.got_section].sh_size / sizeof(*got); + + /* Find this stub, or if that fails, the next avail. entry */ + got = (void *)sechdrs[me->arch.got_section].sh_addr; + for (i = 0; got[i].addr; i++) { + if (WARN_ON(i >= num_got)) + return 0; + + if (got[i].addr == addr) + return (unsigned long)&got[i]; + } + + got[i].addr = addr; + + return (unsigned long)&got[i]; +} +#endif + /* We expect a noop next: if it is, replace it with instruction to restore r2. */ static int restore_r2(const char *name, u32 *instruction, struct module *me) { u32 *prev_insn = instruction - 1; + u32 insn_val = *instruction; + + if (IS_ENABLED(CONFIG_PPC_KERNEL_PCREL)) + return 0; if (is_mprofile_ftrace_call(name)) - return 1; + return 0; /* * Make sure the branch isn't a sibling call. Sibling calls aren't @@ -502,19 +769,25 @@ static int restore_r2(const char *name, u32 *instruction, struct module *me) * restore afterwards. */ if (!instr_is_relative_link_branch(ppc_inst(*prev_insn))) - return 1; + return 0; - if (*instruction != PPC_RAW_NOP()) { - pr_err("%s: Expected nop after call, got %08x at %pS\n", - me->name, *instruction, instruction); + /* + * For livepatch, the restore r2 instruction might have already been + * written previously, if the referenced symbol is in a previously + * unloaded module which is now being loaded again. In that case, skip + * the warning and the instruction write. + */ + if (insn_val == PPC_INST_LD_TOC) return 0; + + if (insn_val != PPC_RAW_NOP()) { + pr_err("%s: Expected nop after call, got %08x at %pS\n", + me->name, insn_val, instruction); + return -ENOEXEC; } /* ld r2,R2_STACK_OFFSET(r1) */ - if (patch_instruction(instruction, ppc_inst(PPC_INST_LD_TOC))) - return 0; - - return 1; + return patch_instruction(instruction, ppc_inst(PPC_INST_LD_TOC)); } int apply_relocate_add(Elf64_Shdr *sechdrs, @@ -532,6 +805,7 @@ int apply_relocate_add(Elf64_Shdr *sechdrs, pr_debug("Applying ADD relocate section %u to %u\n", relsec, sechdrs[relsec].sh_info); +#ifndef CONFIG_PPC_KERNEL_PCREL /* First time we're called, we can fix up .TOC. */ if (!me->arch.toc_fixed) { sym = find_dot_toc(sechdrs, strtab, symindex); @@ -541,7 +815,7 @@ int apply_relocate_add(Elf64_Shdr *sechdrs, sym->st_value = my_r2(sechdrs, me); me->arch.toc_fixed = true; } - +#endif for (i = 0; i < sechdrs[relsec].sh_size / sizeof(*rela); i++) { /* This is where to make the change */ location = (void *)sechdrs[sechdrs[relsec].sh_info].sh_addr @@ -569,6 +843,7 @@ int apply_relocate_add(Elf64_Shdr *sechdrs, *(unsigned long *)location = value; break; +#ifndef CONFIG_PPC_KERNEL_PCREL case R_PPC64_TOC: *(unsigned long *)location = my_r2(sechdrs, me); break; @@ -628,8 +903,13 @@ int apply_relocate_add(Elf64_Shdr *sechdrs, = (*((uint16_t *) location) & ~0xffff) | (value & 0xffff); break; +#endif case R_PPC_REL24: +#ifdef CONFIG_PPC_KERNEL_PCREL + /* PCREL still generates REL24 for mcount */ + case R_PPC64_REL24_NOTOC: +#endif /* FIXME: Handle weak symbols here --RR */ if (sym->st_shndx == SHN_UNDEF || sym->st_shndx == SHN_LIVEPATCH) { @@ -638,8 +918,8 @@ int apply_relocate_add(Elf64_Shdr *sechdrs, strtab + sym->st_name); if (!value) return -ENOENT; - if (!restore_r2(strtab + sym->st_name, - (u32 *)location + 1, me)) + if (restore_r2(strtab + sym->st_name, + (u32 *)location + 1, me)) return -ENOEXEC; } else value += local_entry_offset(sym); @@ -677,6 +957,47 @@ int apply_relocate_add(Elf64_Shdr *sechdrs, *(u32 *)location = value; break; +#ifdef CONFIG_PPC_KERNEL_PCREL + case R_PPC64_PCREL34: { + unsigned long absvalue = value; + + /* Convert value to relative */ + value -= (unsigned long)location; + + if (value + 0x200000000 > 0x3ffffffff) { + if (sym->st_shndx != me->arch.pcpu_section) { + pr_err("%s: REL34 %li out of range!\n", + me->name, (long)value); + return -ENOEXEC; + } + + /* + * per-cpu section is special cased because + * it is moved during loading, so has to be + * converted to use GOT. + */ + value = got_for_addr(sechdrs, absvalue, me, + strtab + sym->st_name); + if (!value) + return -ENOENT; + value -= (unsigned long)location; + + /* Turn pla into pld */ + if (patch_instruction((u32 *)location, + ppc_inst_prefix((*(u32 *)location & ~0x02000000), + (*((u32 *)location + 1) & ~0xf8000000) | 0xe4000000))) + return -EFAULT; + } + + if (patch_instruction((u32 *)location, + ppc_inst_prefix((*(u32 *)location & ~0x3ffff) | IMM_H18(value), + (*((u32 *)location + 1) & ~0xffff) | IMM_L(value)))) + return -EFAULT; + + break; + } + +#else case R_PPC64_TOCSAVE: /* * Marker reloc indicates we don't have to save r2. @@ -684,8 +1005,12 @@ int apply_relocate_add(Elf64_Shdr *sechdrs, * it. */ break; +#endif case R_PPC64_ENTRY: + if (IS_ENABLED(CONFIG_PPC_KERNEL_PCREL)) + break; + /* * Optimize ELFv2 large code model entry point if * the TOC is within 2GB range of current location. @@ -728,6 +1053,20 @@ int apply_relocate_add(Elf64_Shdr *sechdrs, | (value & 0xffff); break; +#ifdef CONFIG_PPC_KERNEL_PCREL + case R_PPC64_GOT_PCREL34: + value = got_for_addr(sechdrs, value, me, + strtab + sym->st_name); + if (!value) + return -ENOENT; + value -= (unsigned long)location; + ((uint32_t *)location)[0] = (((uint32_t *)location)[0] & ~0x3ffff) | + ((value >> 16) & 0x3ffff); + ((uint32_t *)location)[1] = (((uint32_t *)location)[1] & ~0xffff) | + (value & 0xffff); + break; +#endif + default: pr_err("%s: Unknown ADD relocation: %lu\n", me->name, @@ -776,6 +1115,37 @@ int module_trampoline_target(struct module *mod, unsigned long addr, return 0; } +static int setup_ftrace_ool_stubs(const Elf64_Shdr *sechdrs, unsigned long addr, struct module *me) +{ +#ifdef CONFIG_PPC_FTRACE_OUT_OF_LINE + unsigned int i, total_stubs, num_stubs; + struct ppc64_stub_entry *stub; + + total_stubs = sechdrs[me->arch.stubs_section].sh_size / sizeof(*stub); + num_stubs = roundup(me->arch.ool_stub_count * sizeof(struct ftrace_ool_stub), + sizeof(struct ppc64_stub_entry)) / sizeof(struct ppc64_stub_entry); + + /* Find the next available entry */ + stub = (void *)sechdrs[me->arch.stubs_section].sh_addr; + for (i = 0; stub_func_addr(stub[i].funcdata); i++) + if (WARN_ON(i >= total_stubs)) + return -1; + + if (WARN_ON(i + num_stubs > total_stubs)) + return -1; + + stub += i; + me->arch.ool_stubs = (struct ftrace_ool_stub *)stub; + + /* reserve stubs */ + for (i = 0; i < num_stubs; i++) + if (patch_u32((void *)&stub->funcdata, PPC_RAW_NOP())) + return -1; +#endif + + return 0; +} + int module_finalize_ftrace(struct module *mod, const Elf_Shdr *sechdrs) { mod->arch.tramp = stub_for_addr(sechdrs, @@ -794,6 +1164,9 @@ int module_finalize_ftrace(struct module *mod, const Elf_Shdr *sechdrs) if (!mod->arch.tramp) return -ENOENT; + if (setup_ftrace_ool_stubs(sechdrs, mod->arch.tramp, mod)) + return -ENOENT; + return 0; } #endif diff --git a/arch/powerpc/kernel/nvram_64.c b/arch/powerpc/kernel/nvram_64.c index e385d3164648..f9c6568a9137 100644 --- a/arch/powerpc/kernel/nvram_64.c +++ b/arch/powerpc/kernel/nvram_64.c @@ -73,7 +73,7 @@ static const char *nvram_os_partitions[] = { }; static void oops_to_nvram(struct kmsg_dumper *dumper, - enum kmsg_dump_reason reason); + struct kmsg_dump_detail *detail); static struct kmsg_dumper nvram_kmsg_dumper = { .dump = oops_to_nvram @@ -643,7 +643,7 @@ void __init nvram_init_oops_partition(int rtas_partition_exists) * partition. If that's too much, go back and capture uncompressed text. */ static void oops_to_nvram(struct kmsg_dumper *dumper, - enum kmsg_dump_reason reason) + struct kmsg_dump_detail *detail) { struct oops_log_info *oops_hdr = (struct oops_log_info *)oops_buf; static unsigned int oops_count = 0; @@ -655,7 +655,7 @@ static void oops_to_nvram(struct kmsg_dumper *dumper, unsigned int err_type = ERR_TYPE_KERNEL_PANIC_GZ; int rc = -1; - switch (reason) { + switch (detail->reason) { case KMSG_DUMP_SHUTDOWN: /* These are almost always orderly shutdowns. */ return; @@ -671,7 +671,7 @@ static void oops_to_nvram(struct kmsg_dumper *dumper, break; default: pr_err("%s: ignoring unrecognized KMSG_DUMP_* reason %d\n", - __func__, (int) reason); + __func__, (int) detail->reason); return; } diff --git a/arch/powerpc/kernel/of_platform.c b/arch/powerpc/kernel/of_platform.c deleted file mode 100644 index f89376ff633e..000000000000 --- a/arch/powerpc/kernel/of_platform.c +++ /dev/null @@ -1,104 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0-or-later -/* - * Copyright (C) 2006 Benjamin Herrenschmidt, IBM Corp. - * <benh@kernel.crashing.org> - * and Arnd Bergmann, IBM Corp. - */ - -#undef DEBUG - -#include <linux/string.h> -#include <linux/kernel.h> -#include <linux/init.h> -#include <linux/export.h> -#include <linux/mod_devicetable.h> -#include <linux/pci.h> -#include <linux/of.h> -#include <linux/of_device.h> -#include <linux/of_platform.h> -#include <linux/atomic.h> - -#include <asm/errno.h> -#include <asm/topology.h> -#include <asm/pci-bridge.h> -#include <asm/ppc-pci.h> -#include <asm/eeh.h> - -#ifdef CONFIG_PPC_OF_PLATFORM_PCI - -/* The probing of PCI controllers from of_platform is currently - * 64 bits only, mostly due to gratuitous differences between - * the 32 and 64 bits PCI code on PowerPC and the 32 bits one - * lacking some bits needed here. - */ - -static int of_pci_phb_probe(struct platform_device *dev) -{ - struct pci_controller *phb; - - /* Check if we can do that ... */ - if (ppc_md.pci_setup_phb == NULL) - return -ENODEV; - - pr_info("Setting up PCI bus %pOF\n", dev->dev.of_node); - - /* Alloc and setup PHB data structure */ - phb = pcibios_alloc_controller(dev->dev.of_node); - if (!phb) - return -ENODEV; - - /* Setup parent in sysfs */ - phb->parent = &dev->dev; - - /* Setup the PHB using arch provided callback */ - if (ppc_md.pci_setup_phb(phb)) { - pcibios_free_controller(phb); - return -ENODEV; - } - - /* Process "ranges" property */ - pci_process_bridge_OF_ranges(phb, dev->dev.of_node, 0); - - /* Init pci_dn data structures */ - pci_devs_phb_init_dynamic(phb); - - /* Create EEH PE for the PHB */ - eeh_phb_pe_create(phb); - - /* Scan the bus */ - pcibios_scan_phb(phb); - if (phb->bus == NULL) - return -ENXIO; - - /* Claim resources. This might need some rework as well depending - * whether we are doing probe-only or not, like assigning unassigned - * resources etc... - */ - pcibios_claim_one_bus(phb->bus); - - /* Add probed PCI devices to the device model */ - pci_bus_add_devices(phb->bus); - - return 0; -} - -static const struct of_device_id of_pci_phb_ids[] = { - { .type = "pci", }, - { .type = "pcix", }, - { .type = "pcie", }, - { .type = "pciex", }, - { .type = "ht", }, - {} -}; - -static struct platform_driver of_pci_phb_driver = { - .probe = of_pci_phb_probe, - .driver = { - .name = "of-pci", - .of_match_table = of_pci_phb_ids, - }, -}; - -builtin_platform_driver(of_pci_phb_driver); - -#endif /* CONFIG_PPC_OF_PLATFORM_PCI */ diff --git a/arch/powerpc/kernel/optprobes.c b/arch/powerpc/kernel/optprobes.c index 3b1c2236cbee..2e83702bf9ba 100644 --- a/arch/powerpc/kernel/optprobes.c +++ b/arch/powerpc/kernel/optprobes.c @@ -13,7 +13,7 @@ #include <asm/kprobes.h> #include <asm/ptrace.h> #include <asm/cacheflush.h> -#include <asm/code-patching.h> +#include <asm/text-patching.h> #include <asm/sstep.h> #include <asm/ppc-opcode.h> #include <asm/inst.h> @@ -56,7 +56,7 @@ static unsigned long can_optimize(struct kprobe *p) * has a 'nop' instruction, which can be emulated. * So further checks can be skipped. */ - if (p->addr == (kprobe_opcode_t *)&__kretprobe_trampoline) + if (p->addr == (kprobe_opcode_t *)&arch_rethook_trampoline) return addr + sizeof(kprobe_opcode_t); /* @@ -112,7 +112,7 @@ static void optimized_callback(struct optimized_kprobe *op, __this_cpu_write(current_kprobe, NULL); } - preempt_enable_no_resched(); + preempt_enable(); } NOKPROBE_SYMBOL(optimized_callback); diff --git a/arch/powerpc/kernel/optprobes_head.S b/arch/powerpc/kernel/optprobes_head.S index 5c7f0b4b784b..35932f45fb4e 100644 --- a/arch/powerpc/kernel/optprobes_head.S +++ b/arch/powerpc/kernel/optprobes_head.S @@ -73,7 +73,7 @@ optprobe_template_entry: * further below. */ #ifdef CONFIG_PPC64 - ld r2,PACATOC(r13) + LOAD_PACA_TOC() #endif .global optprobe_template_op_address @@ -85,7 +85,7 @@ optprobe_template_op_address: TEMPLATE_FOR_IMM_LOAD_INSNS /* 2. pt_regs pointer in r4 */ - addi r4,r1,STACK_FRAME_OVERHEAD + addi r4,r1,STACK_INT_FRAME_REGS .global optprobe_template_call_handler optprobe_template_call_handler: @@ -96,7 +96,7 @@ optprobe_template_call_handler: * Parameters for instruction emulation: * 1. Pass SP in register r3. */ - addi r3,r1,STACK_FRAME_OVERHEAD + addi r3,r1,STACK_INT_FRAME_REGS .global optprobe_template_insn optprobe_template_insn: diff --git a/arch/powerpc/kernel/paca.c b/arch/powerpc/kernel/paca.c index ba593fd60124..7502066c3c53 100644 --- a/arch/powerpc/kernel/paca.c +++ b/arch/powerpc/kernel/paca.c @@ -16,7 +16,6 @@ #include <asm/kexec.h> #include <asm/svm.h> #include <asm/ultravisor.h> -#include <asm/rtas.h> #include "setup.h" @@ -69,7 +68,7 @@ static void *__init alloc_shared_lppaca(unsigned long size, unsigned long limit, memblock_set_bottom_up(true); /* - * See Documentation/powerpc/ultravisor.rst for more details. + * See Documentation/arch/powerpc/ultravisor.rst for more details. * * UV/HV data sharing is in PAGE_SIZE granularity. In order to * minimize the number of pages shared, align the allocation to @@ -170,30 +169,6 @@ static struct slb_shadow * __init new_slb_shadow(int cpu, unsigned long limit) } #endif /* CONFIG_PPC_64S_HASH_MMU */ -#ifdef CONFIG_PPC_PSERIES -/** - * new_rtas_args() - Allocates rtas args - * @cpu: CPU number - * @limit: Memory limit for this allocation - * - * Allocates a struct rtas_args and return it's pointer, - * if not in Hypervisor mode - * - * Return: Pointer to allocated rtas_args - * NULL if CPU in Hypervisor Mode - */ -static struct rtas_args * __init new_rtas_args(int cpu, unsigned long limit) -{ - limit = min_t(unsigned long, limit, RTAS_INSTANTIATE_MAX); - - if (early_cpu_has_feature(CPU_FTR_HVMODE)) - return NULL; - - return alloc_paca_data(sizeof(struct rtas_args), L1_CACHE_BYTES, - limit, cpu); -} -#endif /* CONFIG_PPC_PSERIES */ - /* The Paca is an array with one entry per processor. Each contains an * lppaca, which contains the information shared between the * hypervisor and Linux. @@ -211,12 +186,14 @@ void __init initialise_paca(struct paca_struct *new_paca, int cpu) #ifdef CONFIG_PPC_PSERIES new_paca->lppaca_ptr = NULL; #endif -#ifdef CONFIG_PPC_BOOK3E +#ifdef CONFIG_PPC_BOOK3E_64 new_paca->kernel_pgd = swapper_pg_dir; #endif new_paca->lock_token = 0x8000; new_paca->paca_index = cpu; +#ifndef CONFIG_PPC_KERNEL_PCREL new_paca->kernel_toc = kernel_toc_addr(); +#endif new_paca->kernelbase = (unsigned long) _stext; /* Only set MSR:IR/DR when MMU is initialized */ new_paca->kernel_msr = MSR_KERNEL & ~(MSR_IR | MSR_DR); @@ -228,14 +205,10 @@ void __init initialise_paca(struct paca_struct *new_paca, int cpu) new_paca->slb_shadow_ptr = NULL; #endif -#ifdef CONFIG_PPC_BOOK3E +#ifdef CONFIG_PPC_BOOK3E_64 /* For now -- if we have threads this will be adjusted later */ new_paca->tcd_ptr = &new_paca->tcd; #endif - -#ifdef CONFIG_PPC_PSERIES - new_paca->rtas_args_reentrant = NULL; -#endif } /* Put the paca pointer into r13 and SPRG_PACA */ @@ -244,7 +217,7 @@ void setup_paca(struct paca_struct *new_paca) /* Setup r13 */ local_paca = new_paca; -#ifdef CONFIG_PPC_BOOK3E +#ifdef CONFIG_PPC_BOOK3E_64 /* On Book3E, initialize the TLB miss exception frames */ mtspr(SPRN_SPRG_TLB_EXFRAME, local_paca->extlb); #else @@ -308,9 +281,6 @@ void __init allocate_paca(int cpu) #ifdef CONFIG_PPC_64S_HASH_MMU paca->slb_shadow_ptr = new_slb_shadow(cpu, limit); #endif -#ifdef CONFIG_PPC_PSERIES - paca->rtas_args_reentrant = new_rtas_args(cpu, limit); -#endif paca_struct_size += sizeof(struct paca_struct); } diff --git a/arch/powerpc/kernel/pci-common.c b/arch/powerpc/kernel/pci-common.c index 068410cd54a3..eac84d687b53 100644 --- a/arch/powerpc/kernel/pci-common.c +++ b/arch/powerpc/kernel/pci-common.c @@ -39,6 +39,7 @@ #include <asm/machdep.h> #include <asm/ppc-pci.h> #include <asm/eeh.h> +#include <asm/setup.h> #include "../../../drivers/pci/pci.h" @@ -67,23 +68,35 @@ void __init set_pci_dma_ops(const struct dma_map_ops *dma_ops) pci_dma_ops = dma_ops; } -/* - * This function should run under locking protection, specifically - * hose_spinlock. - */ static int get_phb_number(struct device_node *dn) { int ret, phb_id = -1; - u32 prop_32; u64 prop; /* * Try fixed PHB numbering first, by checking archs and reading - * the respective device-tree properties. Firstly, try powernv by - * reading "ibm,opal-phbid", only present in OPAL environment. + * the respective device-tree properties. Firstly, try reading + * standard "linux,pci-domain", then try reading "ibm,opal-phbid" + * (only present in powernv OPAL environment), then try device-tree + * alias and as the last try to use lower bits of "reg" property. */ - ret = of_property_read_u64(dn, "ibm,opal-phbid", &prop); + ret = of_get_pci_domain_nr(dn); + if (ret >= 0) { + prop = ret; + ret = 0; + } + if (ret) + ret = of_property_read_u64(dn, "ibm,opal-phbid", &prop); + + if (ret) { + ret = of_alias_get_id(dn, "pci"); + if (ret >= 0) { + prop = ret; + ret = 0; + } + } if (ret) { + u32 prop_32; ret = of_property_read_u32_index(dn, "reg", 1, &prop_32); prop = prop_32; } @@ -91,18 +104,20 @@ static int get_phb_number(struct device_node *dn) if (!ret) phb_id = (int)(prop & (MAX_PHBS - 1)); + spin_lock(&hose_spinlock); + /* We need to be sure to not use the same PHB number twice. */ if ((phb_id >= 0) && !test_and_set_bit(phb_id, phb_bitmap)) - return phb_id; + goto out_unlock; - /* - * If not pseries nor powernv, or if fixed PHB numbering tried to add - * the same PHB number twice, then fallback to dynamic PHB numbering. - */ + /* If everything fails then fallback to dynamic PHB numbering. */ phb_id = find_first_zero_bit(phb_bitmap, MAX_PHBS); BUG_ON(phb_id >= MAX_PHBS); set_bit(phb_id, phb_bitmap); +out_unlock: + spin_unlock(&hose_spinlock); + return phb_id; } @@ -110,14 +125,17 @@ struct pci_controller *pcibios_alloc_controller(struct device_node *dev) { struct pci_controller *phb; - phb = zalloc_maybe_bootmem(sizeof(struct pci_controller), GFP_KERNEL); + phb = kzalloc(sizeof(struct pci_controller), GFP_KERNEL); if (phb == NULL) return NULL; - spin_lock(&hose_spinlock); + phb->global_number = get_phb_number(dev); + + spin_lock(&hose_spinlock); list_add_tail(&phb->list_node, &hose_list); spin_unlock(&hose_spinlock); - phb->dn = dev; + + phb->dn = of_node_get(dev); phb->is_dynamic = slab_is_available(); #ifdef CONFIG_PPC64 if (dev) { @@ -140,7 +158,7 @@ void pcibios_free_controller(struct pci_controller *phb) /* Clear bit of phb_bitmap to allow reuse of this PHB number. */ if (phb->global_number < MAX_PHBS) clear_bit(phb->global_number, phb_bitmap); - + of_node_put(phb->dn); list_del(&phb->list_node); spin_unlock(&hose_spinlock); @@ -499,12 +517,11 @@ int pci_iobar_pfn(struct pci_dev *pdev, int bar, struct vm_area_struct *vma) } /* - * This one is used by /dev/mem and fbdev who have no clue about the + * This one is used by /dev/mem and video who have no clue about the * PCI device, it tries to find the PCI device first and calls the * above routine */ -pgprot_t pci_phys_mem_access_prot(struct file *file, - unsigned long pfn, +pgprot_t pci_phys_mem_access_prot(unsigned long pfn, unsigned long size, pgprot_t prot) { @@ -862,6 +879,7 @@ int pcibios_root_bridge_prepare(struct pci_host_bridge *bridge) static void pcibios_fixup_resources(struct pci_dev *dev) { struct pci_controller *hose = pci_bus_to_host(dev->bus); + struct resource *res; int i; if (!hose) { @@ -873,9 +891,9 @@ static void pcibios_fixup_resources(struct pci_dev *dev) if (dev->is_virtfn) return; - for (i = 0; i < DEVICE_COUNT_RESOURCE; i++) { - struct resource *res = dev->resource + i; + pci_dev_for_each_resource(dev, res, i) { struct pci_bus_region reg; + if (!res->flags) continue; @@ -1087,7 +1105,7 @@ void pcibios_fixup_bus(struct pci_bus *bus) */ pci_read_bridge_bases(bus); - /* Now fixup the bus bus */ + /* Now fixup the bus */ pcibios_setup_bus_self(bus); } EXPORT_SYMBOL(pcibios_fixup_bus); @@ -1434,11 +1452,10 @@ void pcibios_claim_one_bus(struct pci_bus *bus) struct pci_bus *child_bus; list_for_each_entry(dev, &bus->devices, bus_list) { + struct resource *r; int i; - for (i = 0; i < PCI_NUM_RESOURCES; i++) { - struct resource *r = &dev->resource[i]; - + pci_dev_for_each_resource(dev, r, i) { if (r->parent || !r->start || !r->flags) continue; @@ -1687,19 +1704,20 @@ EXPORT_SYMBOL_GPL(pcibios_scan_phb); static void fixup_hide_host_resource_fsl(struct pci_dev *dev) { - int i, class = dev->class >> 8; + int class = dev->class >> 8; /* When configured as agent, programming interface = 1 */ int prog_if = dev->class & 0xf; + struct resource *r; if ((class == PCI_CLASS_PROCESSOR_POWERPC || class == PCI_CLASS_BRIDGE_OTHER) && (dev->hdr_type == PCI_HEADER_TYPE_NORMAL) && (prog_if == 0) && (dev->bus->parent == NULL)) { - for (i = 0; i < DEVICE_COUNT_RESOURCE; i++) { - dev->resource[i].start = 0; - dev->resource[i].end = 0; - dev->resource[i].flags = 0; + pci_dev_for_each_resource(dev, r) { + r->start = 0; + r->end = 0; + r->flags = 0; } } } diff --git a/arch/powerpc/kernel/pci-hotplug.c b/arch/powerpc/kernel/pci-hotplug.c index 0fe251c6ac2c..6f444d0822d8 100644 --- a/arch/powerpc/kernel/pci-hotplug.c +++ b/arch/powerpc/kernel/pci-hotplug.c @@ -93,6 +93,36 @@ void pci_hp_remove_devices(struct pci_bus *bus) } EXPORT_SYMBOL_GPL(pci_hp_remove_devices); +static void traverse_siblings_and_scan_slot(struct device_node *start, struct pci_bus *bus) +{ + struct device_node *dn; + int slotno; + + u32 class = 0; + + if (!of_property_read_u32(start->child, "class-code", &class)) { + /* Call of pci_scan_slot for non-bridge/EP case */ + if (!((class >> 8) == PCI_CLASS_BRIDGE_PCI)) { + slotno = PCI_SLOT(PCI_DN(start->child)->devfn); + pci_scan_slot(bus, PCI_DEVFN(slotno, 0)); + return; + } + } + + /* Iterate all siblings */ + for_each_child_of_node(start, dn) { + class = 0; + + if (!of_property_read_u32(start->child, "class-code", &class)) { + /* Call of pci_scan_slot on each sibling-nodes/bridge-ports */ + if ((class >> 8) == PCI_CLASS_BRIDGE_PCI) { + slotno = PCI_SLOT(PCI_DN(dn)->devfn); + pci_scan_slot(bus, PCI_DEVFN(slotno, 0)); + } + } + } +} + /** * pci_hp_add_devices - adds new pci devices to bus * @bus: the indicated PCI bus @@ -106,11 +136,14 @@ EXPORT_SYMBOL_GPL(pci_hp_remove_devices); */ void pci_hp_add_devices(struct pci_bus *bus) { - int slotno, mode, max; + int mode, max; struct pci_dev *dev; struct pci_controller *phb; struct device_node *dn = pci_bus_to_OF_node(bus); + if (!dn) + return; + phb = pci_bus_to_host(bus); mode = PCI_PROBE_NORMAL; @@ -129,8 +162,7 @@ void pci_hp_add_devices(struct pci_bus *bus) * order for fully rescan all the way down to pick them up. * They can have been removed during partial hotplug. */ - slotno = PCI_SLOT(PCI_DN(dn->child)->devfn); - pci_scan_slot(bus, PCI_DEVFN(slotno, 0)); + traverse_siblings_and_scan_slot(dn, bus); max = bus->busn_res.start; /* * Scan bridges that are already configured. We don't touch diff --git a/arch/powerpc/kernel/pci_32.c b/arch/powerpc/kernel/pci_32.c index 5a174936c9a0..f8a3bd8cfae4 100644 --- a/arch/powerpc/kernel/pci_32.c +++ b/arch/powerpc/kernel/pci_32.c @@ -36,18 +36,13 @@ int pcibios_assign_bus_offset = 1; EXPORT_SYMBOL(isa_io_base); EXPORT_SYMBOL(pci_dram_offset); -void __init pcibios_make_OF_bus_map(void); - static void fixup_cpc710_pci64(struct pci_dev* dev); -static u8* pci_to_OF_bus_map; /* By default, we don't re-assign bus numbers. We do this only on * some pmacs */ static int pci_assign_all_buses; -static int pci_bus_count; - /* This will remain NULL for now, until isa-bridge.c is made common * to both 32-bit and 64-bit. */ @@ -67,6 +62,11 @@ fixup_cpc710_pci64(struct pci_dev* dev) } DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_IBM, PCI_DEVICE_ID_IBM_CPC710_PCI64, fixup_cpc710_pci64); +#ifdef CONFIG_PPC_PCI_OF_BUS_MAP + +static u8* pci_to_OF_bus_map; +static int pci_bus_count; + /* * Functions below are used on OpenFirmware machines. */ @@ -108,7 +108,7 @@ make_one_node_map(struct device_node* node, u8 pci_bus) } } -void __init +static void __init pcibios_make_OF_bus_map(void) { int i; @@ -152,14 +152,18 @@ pcibios_make_OF_bus_map(void) } #endif } +#endif // CONFIG_PPC_PCI_OF_BUS_MAP +#ifdef CONFIG_PPC_PMAC /* * Returns the PCI device matching a given OF node */ int pci_device_from_OF_node(struct device_node *node, u8 *bus, u8 *devfn) { +#ifdef CONFIG_PPC_PCI_OF_BUS_MAP struct pci_dev *dev = NULL; +#endif const __be32 *reg; int size; @@ -174,6 +178,9 @@ int pci_device_from_OF_node(struct device_node *node, u8 *bus, u8 *devfn) *bus = (be32_to_cpup(®[0]) >> 16) & 0xff; *devfn = (be32_to_cpup(®[0]) >> 8) & 0xff; +#ifndef CONFIG_PPC_PCI_OF_BUS_MAP + return 0; +#else /* Ok, here we need some tweak. If we have already renumbered * all busses, we can't rely on the OF bus number any more. * the pci_to_OF_bus_map is not enough as several PCI busses @@ -191,9 +198,12 @@ int pci_device_from_OF_node(struct device_node *node, u8 *bus, u8 *devfn) } return -ENODEV; +#endif // CONFIG_PPC_PCI_OF_BUS_MAP } EXPORT_SYMBOL(pci_device_from_OF_node); +#endif +#ifdef CONFIG_PPC_PCI_OF_BUS_MAP /* We create the "pci-OF-bus-map" property now so it appears in the * /proc device tree */ @@ -203,11 +213,8 @@ pci_create_OF_bus_map(void) struct property* of_prop; struct device_node *dn; - of_prop = memblock_alloc(sizeof(struct property) + 256, + of_prop = memblock_alloc_or_panic(sizeof(struct property) + 256, SMP_CACHE_BYTES); - if (!of_prop) - panic("%s: Failed to allocate %zu bytes\n", __func__, - sizeof(struct property) + 256); dn = of_find_node_by_path("/"); if (dn) { memset(of_prop, -1, sizeof(struct property) + 256); @@ -218,6 +225,7 @@ pci_create_OF_bus_map(void) of_node_put(dn); } } +#endif // CONFIG_PPC_PCI_OF_BUS_MAP void pcibios_setup_phb_io_space(struct pci_controller *hose) { @@ -233,23 +241,41 @@ void pcibios_setup_phb_io_space(struct pci_controller *hose) static int __init pcibios_init(void) { struct pci_controller *hose, *tmp; +#ifndef CONFIG_PPC_PCI_BUS_NUM_DOMAIN_DEPENDENT int next_busno = 0; +#endif printk(KERN_INFO "PCI: Probing PCI hardware\n"); +#ifdef CONFIG_PPC_PCI_BUS_NUM_DOMAIN_DEPENDENT + /* + * Enable PCI domains in /proc when PCI bus numbers are not unique + * across all PCI domains to prevent conflicts. And keep PCI domain 0 + * backward compatible in /proc for video cards. + */ + pci_add_flags(PCI_ENABLE_PROC_DOMAINS | PCI_COMPAT_DOMAIN_0); +#endif + if (pci_has_flag(PCI_REASSIGN_ALL_BUS)) pci_assign_all_buses = 1; /* Scan all of the recorded PCI controllers. */ list_for_each_entry_safe(hose, tmp, &hose_list, list_node) { +#ifndef CONFIG_PPC_PCI_BUS_NUM_DOMAIN_DEPENDENT if (pci_assign_all_buses) hose->first_busno = next_busno; +#endif hose->last_busno = 0xff; pcibios_scan_phb(hose); pci_bus_add_devices(hose->bus); +#ifndef CONFIG_PPC_PCI_BUS_NUM_DOMAIN_DEPENDENT if (pci_assign_all_buses || next_busno <= hose->last_busno) next_busno = hose->last_busno + pcibios_assign_bus_offset; +#endif } + +#if defined(CONFIG_PPC_PMAC) || defined(CONFIG_PPC_CHRP) +#ifdef CONFIG_PPC_PCI_OF_BUS_MAP pci_bus_count = next_busno; /* OpenFirmware based machines need a map of OF bus @@ -258,6 +284,8 @@ static int __init pcibios_init(void) */ if (pci_assign_all_buses) pcibios_make_OF_bus_map(); +#endif +#endif /* Call common code to handle resource allocation */ pcibios_resource_survey(); diff --git a/arch/powerpc/kernel/pci_64.c b/arch/powerpc/kernel/pci_64.c index 19b03ddf5631..e27342ef128b 100644 --- a/arch/powerpc/kernel/pci_64.c +++ b/arch/powerpc/kernel/pci_64.c @@ -73,7 +73,7 @@ static int __init pcibios_init(void) return 0; } -subsys_initcall(pcibios_init); +subsys_initcall_sync(pcibios_init); int pcibios_unmap_io_space(struct pci_bus *bus) { @@ -132,7 +132,7 @@ void __iomem *ioremap_phb(phys_addr_t paddr, unsigned long size) * address decoding but I'd rather not deal with those outside of the * reserved 64K legacy region. */ - area = __get_vm_area_caller(size, 0, PHB_IO_BASE, PHB_IO_END, + area = __get_vm_area_caller(size, VM_IOREMAP, PHB_IO_BASE, PHB_IO_END, __builtin_return_address(0)); if (!area) return NULL; @@ -286,6 +286,7 @@ int pcibus_to_node(struct pci_bus *bus) EXPORT_SYMBOL(pcibus_to_node); #endif +#ifdef CONFIG_PPC_PMAC int pci_device_from_OF_node(struct device_node *np, u8 *bus, u8 *devfn) { if (!PCI_DN(np)) @@ -294,3 +295,4 @@ int pci_device_from_OF_node(struct device_node *np, u8 *bus, u8 *devfn) *devfn = PCI_DN(np)->devfn; return 0; } +#endif diff --git a/arch/powerpc/kernel/pci_dn.c b/arch/powerpc/kernel/pci_dn.c index 938ab8838ab5..38561d6a2079 100644 --- a/arch/powerpc/kernel/pci_dn.c +++ b/arch/powerpc/kernel/pci_dn.c @@ -259,7 +259,7 @@ void remove_sriov_vf_pdns(struct pci_dev *pdev) if (edev) { /* * We allocate pci_dn's for the totalvfs count, - * but only only the vfs that were activated + * but only the vfs that were activated * have a configured PE. */ if (edev->pe) @@ -330,6 +330,7 @@ struct pci_dn *pci_add_device_node_info(struct pci_controller *hose, INIT_LIST_HEAD(&pdn->list); parent = of_get_parent(dn); pdn->parent = parent ? PCI_DN(parent) : NULL; + of_node_put(parent); if (pdn->parent) list_add_tail(&pdn->list, &pdn->parent->child_list); diff --git a/arch/powerpc/kernel/pmc.c b/arch/powerpc/kernel/pmc.c index 15414c8a2837..9fabb4d9235e 100644 --- a/arch/powerpc/kernel/pmc.c +++ b/arch/powerpc/kernel/pmc.c @@ -74,7 +74,7 @@ void release_pmc_hardware(void) } EXPORT_SYMBOL_GPL(release_pmc_hardware); -#ifdef CONFIG_PPC64 +#ifdef CONFIG_PPC_BOOK3S_64 void power4_enable_pmcs(void) { unsigned long hid0; diff --git a/arch/powerpc/kernel/ppc32.h b/arch/powerpc/kernel/ppc32.h deleted file mode 100644 index 2346f8c7ff2e..000000000000 --- a/arch/powerpc/kernel/ppc32.h +++ /dev/null @@ -1,60 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0-or-later */ -#ifndef _PPC64_PPC32_H -#define _PPC64_PPC32_H - -#include <linux/compat.h> -#include <asm/siginfo.h> -#include <asm/signal.h> - -/* - * Data types and macros for providing 32b PowerPC support. - */ - -/* These are here to support 32-bit syscalls on a 64-bit kernel. */ - -struct pt_regs32 { - unsigned int gpr[32]; - unsigned int nip; - unsigned int msr; - unsigned int orig_gpr3; /* Used for restarting system calls */ - unsigned int ctr; - unsigned int link; - unsigned int xer; - unsigned int ccr; - unsigned int mq; /* 601 only (not used at present) */ - unsigned int trap; /* Reason for being here */ - unsigned int dar; /* Fault registers */ - unsigned int dsisr; - unsigned int result; /* Result of a system call */ -}; - -struct sigcontext32 { - unsigned int _unused[4]; - int signal; - compat_uptr_t handler; - unsigned int oldmask; - compat_uptr_t regs; /* 4 byte pointer to the pt_regs32 structure. */ -}; - -struct mcontext32 { - elf_gregset_t32 mc_gregs; - elf_fpregset_t mc_fregs; - unsigned int mc_pad[2]; - elf_vrregset_t32 mc_vregs __attribute__((__aligned__(16))); - elf_vsrreghalf_t32 mc_vsregs __attribute__((__aligned__(16))); -}; - -struct ucontext32 { - unsigned int uc_flags; - unsigned int uc_link; - compat_stack_t uc_stack; - int uc_pad[7]; - compat_uptr_t uc_regs; /* points to uc_mcontext field */ - compat_sigset_t uc_sigmask; /* mask last for extensibility */ - /* glibc has 1024-bit signal masks, ours are 64-bit */ - int uc_maskext[30]; - int uc_pad2[3]; - struct mcontext32 uc_mcontext; -}; - -#endif /* _PPC64_PPC32_H */ diff --git a/arch/powerpc/kernel/ppc_save_regs.S b/arch/powerpc/kernel/ppc_save_regs.S index 2d4d21bb46a9..a9b9c32d0c1f 100644 --- a/arch/powerpc/kernel/ppc_save_regs.S +++ b/arch/powerpc/kernel/ppc_save_regs.S @@ -21,60 +21,33 @@ * different ABIs, though). */ _GLOBAL(ppc_save_regs) - PPC_STL r0,0*SZL(r3) + /* This allows stack frame accessor macros and offsets to be used */ + subi r3,r3,STACK_INT_FRAME_REGS + PPC_STL r0,GPR0(r3) #ifdef CONFIG_PPC32 - stmw r2, 2*SZL(r3) + stmw r2,GPR2(r3) #else - PPC_STL r2,2*SZL(r3) - PPC_STL r3,3*SZL(r3) - PPC_STL r4,4*SZL(r3) - PPC_STL r5,5*SZL(r3) - PPC_STL r6,6*SZL(r3) - PPC_STL r7,7*SZL(r3) - PPC_STL r8,8*SZL(r3) - PPC_STL r9,9*SZL(r3) - PPC_STL r10,10*SZL(r3) - PPC_STL r11,11*SZL(r3) - PPC_STL r12,12*SZL(r3) - PPC_STL r13,13*SZL(r3) - PPC_STL r14,14*SZL(r3) - PPC_STL r15,15*SZL(r3) - PPC_STL r16,16*SZL(r3) - PPC_STL r17,17*SZL(r3) - PPC_STL r18,18*SZL(r3) - PPC_STL r19,19*SZL(r3) - PPC_STL r20,20*SZL(r3) - PPC_STL r21,21*SZL(r3) - PPC_STL r22,22*SZL(r3) - PPC_STL r23,23*SZL(r3) - PPC_STL r24,24*SZL(r3) - PPC_STL r25,25*SZL(r3) - PPC_STL r26,26*SZL(r3) - PPC_STL r27,27*SZL(r3) - PPC_STL r28,28*SZL(r3) - PPC_STL r29,29*SZL(r3) - PPC_STL r30,30*SZL(r3) - PPC_STL r31,31*SZL(r3) + SAVE_GPRS(2, 31, r3) lbz r0,PACAIRQSOFTMASK(r13) - PPC_STL r0,SOFTE-STACK_FRAME_OVERHEAD(r3) + PPC_STL r0,SOFTE(r3) #endif - /* go up one stack frame for SP */ - PPC_LL r4,0(r1) - PPC_STL r4,1*SZL(r3) + /* store current SP */ + PPC_STL r1,GPR1(r3) /* get caller's LR */ + PPC_LL r4,0(r1) PPC_LL r0,LRSAVE(r4) - PPC_STL r0,_LINK-STACK_FRAME_OVERHEAD(r3) + PPC_STL r0,_LINK(r3) mflr r0 - PPC_STL r0,_NIP-STACK_FRAME_OVERHEAD(r3) + PPC_STL r0,_NIP(r3) mfmsr r0 - PPC_STL r0,_MSR-STACK_FRAME_OVERHEAD(r3) + PPC_STL r0,_MSR(r3) mfctr r0 - PPC_STL r0,_CTR-STACK_FRAME_OVERHEAD(r3) + PPC_STL r0,_CTR(r3) mfxer r0 - PPC_STL r0,_XER-STACK_FRAME_OVERHEAD(r3) + PPC_STL r0,_XER(r3) mfcr r0 - PPC_STL r0,_CCR-STACK_FRAME_OVERHEAD(r3) + PPC_STL r0,_CCR(r3) li r0,0 - PPC_STL r0,_TRAP-STACK_FRAME_OVERHEAD(r3) - PPC_STL r0,ORIG_GPR3-STACK_FRAME_OVERHEAD(r3) + PPC_STL r0,_TRAP(r3) + PPC_STL r0,ORIG_GPR3(r3) blr diff --git a/arch/powerpc/kernel/proc_powerpc.c b/arch/powerpc/kernel/proc_powerpc.c index b109cd7b5d01..d083b4517065 100644 --- a/arch/powerpc/kernel/proc_powerpc.c +++ b/arch/powerpc/kernel/proc_powerpc.c @@ -4,17 +4,20 @@ */ #include <linux/init.h> +#include <linux/memblock.h> #include <linux/mm.h> #include <linux/proc_fs.h> #include <linux/kernel.h> #include <linux/of.h> +#include <linux/string.h> #include <asm/machdep.h> #include <asm/vdso_datapage.h> #include <asm/rtas.h> +#include <asm/systemcfg.h> #include <linux/uaccess.h> -#ifdef CONFIG_PPC64 +#ifdef CONFIG_PPC64_PROC_SYSTEMCFG static loff_t page_map_seek(struct file *file, loff_t off, int whence) { @@ -33,10 +36,9 @@ static int page_map_mmap( struct file *file, struct vm_area_struct *vma ) if ((vma->vm_end - vma->vm_start) > PAGE_SIZE) return -EINVAL; - remap_pfn_range(vma, vma->vm_start, - __pa(pde_data(file_inode(file))) >> PAGE_SHIFT, - PAGE_SIZE, vma->vm_page_prot); - return 0; + return remap_pfn_range(vma, vma->vm_start, + __pa(pde_data(file_inode(file))) >> PAGE_SHIFT, + PAGE_SIZE, vma->vm_page_prot); } static const struct proc_ops page_map_proc_ops = { @@ -45,13 +47,35 @@ static const struct proc_ops page_map_proc_ops = { .proc_mmap = page_map_mmap, }; +static union { + struct systemcfg data; + u8 page[PAGE_SIZE]; +} systemcfg_data_store __page_aligned_data; +struct systemcfg *systemcfg = &systemcfg_data_store.data; static int __init proc_ppc64_init(void) { struct proc_dir_entry *pde; + strscpy(systemcfg->eye_catcher, "SYSTEMCFG:PPC64"); + systemcfg->version.major = SYSTEMCFG_MAJOR; + systemcfg->version.minor = SYSTEMCFG_MINOR; + systemcfg->processor = mfspr(SPRN_PVR); + /* + * Fake the old platform number for pSeries and add + * in LPAR bit if necessary + */ + systemcfg->platform = 0x100; + if (firmware_has_feature(FW_FEATURE_LPAR)) + systemcfg->platform |= 1; + systemcfg->physicalMemorySize = memblock_phys_mem_size(); + systemcfg->dcache_size = ppc64_caches.l1d.size; + systemcfg->dcache_line_size = ppc64_caches.l1d.line_size; + systemcfg->icache_size = ppc64_caches.l1i.size; + systemcfg->icache_line_size = ppc64_caches.l1i.line_size; + pde = proc_create_data("powerpc/systemcfg", S_IFREG | 0444, NULL, - &page_map_proc_ops, vdso_data); + &page_map_proc_ops, systemcfg); if (!pde) return 1; proc_set_size(pde, PAGE_SIZE); @@ -60,7 +84,7 @@ static int __init proc_ppc64_init(void) } __initcall(proc_ppc64_init); -#endif /* CONFIG_PPC64 */ +#endif /* CONFIG_PPC64_PROC_SYSTEMCFG */ /* * Create the ppc64 and ppc64/rtas directories early. This allows us to diff --git a/arch/powerpc/kernel/process.c b/arch/powerpc/kernel/process.c index 0fbda89cd1bb..855e09886503 100644 --- a/arch/powerpc/kernel/process.c +++ b/arch/powerpc/kernel/process.c @@ -54,7 +54,7 @@ #include <asm/firmware.h> #include <asm/hw_irq.h> #endif -#include <asm/code-patching.h> +#include <asm/text-patching.h> #include <asm/exec.h> #include <asm/livepatch.h> #include <asm/cpu_has_feature.h> @@ -72,8 +72,6 @@ #define TM_DEBUG(x...) do { } while(0) #endif -extern unsigned long _get_SP(void); - #ifdef CONFIG_PPC_TRANSACTIONAL_MEM /* * Are we running in "Suspend disabled" mode? If so we have to block any @@ -127,7 +125,7 @@ unsigned long notrace msr_check_and_set(unsigned long bits) newmsr |= MSR_VSX; if (oldmsr != newmsr) - mtmsr_isync(newmsr); + newmsr = mtmsr_isync_irqsafe(newmsr); return newmsr; } @@ -145,7 +143,7 @@ void notrace __msr_check_and_clear(unsigned long bits) newmsr &= ~MSR_VSX; if (oldmsr != newmsr) - mtmsr_isync(newmsr); + mtmsr_isync_irqsafe(newmsr); } EXPORT_SYMBOL(__msr_check_and_clear); @@ -862,10 +860,8 @@ static inline int set_breakpoint_8xx(struct arch_hw_breakpoint *brk) return 0; } -void __set_breakpoint(int nr, struct arch_hw_breakpoint *brk) +static void set_hw_breakpoint(int nr, struct arch_hw_breakpoint *brk) { - memcpy(this_cpu_ptr(¤t_brk[nr]), brk, sizeof(*brk)); - if (dawr_enabled()) // Power8 or later set_dawr(nr, brk); @@ -879,6 +875,12 @@ void __set_breakpoint(int nr, struct arch_hw_breakpoint *brk) WARN_ON_ONCE(1); } +void __set_breakpoint(int nr, struct arch_hw_breakpoint *brk) +{ + memcpy(this_cpu_ptr(¤t_brk[nr]), brk, sizeof(*brk)); + set_hw_breakpoint(nr, brk); +} + /* Check if we have DAWR or DABR hardware */ bool ppc_breakpoint_available(void) { @@ -891,6 +893,34 @@ bool ppc_breakpoint_available(void) } EXPORT_SYMBOL_GPL(ppc_breakpoint_available); +/* Disable the breakpoint in hardware without touching current_brk[] */ +void suspend_breakpoints(void) +{ + struct arch_hw_breakpoint brk = {0}; + int i; + + if (!ppc_breakpoint_available()) + return; + + for (i = 0; i < nr_wp_slots(); i++) + set_hw_breakpoint(i, &brk); +} + +/* + * Re-enable breakpoints suspended by suspend_breakpoints() in hardware + * from current_brk[] + */ +void restore_breakpoints(void) +{ + int i; + + if (!ppc_breakpoint_available()) + return; + + for (i = 0; i < nr_wp_slots(); i++) + set_hw_breakpoint(i, this_cpu_ptr(¤t_brk[i])); +} + #ifdef CONFIG_PPC_TRANSACTIONAL_MEM static inline bool tm_enabled(struct task_struct *tsk) @@ -970,7 +1000,7 @@ static inline void tm_reclaim_task(struct task_struct *tsk) WARN_ON(tm_suspend_disabled); - TM_DEBUG("--- tm_reclaim on pid %d (NIP=%lx, " + TM_DEBUG("---- tm_reclaim on pid %d (NIP=%lx, " "ccr=%lx, msr=%lx, trap=%lx)\n", tsk->pid, thr->regs->nip, thr->regs->ccr, thr->regs->msr, @@ -978,7 +1008,7 @@ static inline void tm_reclaim_task(struct task_struct *tsk) tm_reclaim_thread(thr, TM_CAUSE_RESCHED); - TM_DEBUG("--- tm_reclaim on pid %d complete\n", + TM_DEBUG("---- tm_reclaim on pid %d complete\n", tsk->pid); out_and_saveregs: @@ -1150,6 +1180,12 @@ static inline void save_sprs(struct thread_struct *t) */ t->tar = mfspr(SPRN_TAR); } + + if (cpu_has_feature(CPU_FTR_DEXCR_NPHIE)) + t->hashkeyr = mfspr(SPRN_HASHKEYR); + + if (cpu_has_feature(CPU_FTR_ARCH_31)) + t->dexcr = mfspr(SPRN_DEXCR); #endif } @@ -1163,11 +1199,11 @@ void kvmppc_save_user_regs(void) usermsr = current->thread.regs->msr; + /* Caller has enabled FP/VEC/VSX/TM in MSR */ if (usermsr & MSR_FP) - save_fpu(current); - + __giveup_fpu(current); if (usermsr & MSR_VEC) - save_altivec(current); + __giveup_altivec(current); #ifdef CONFIG_PPC_TRANSACTIONAL_MEM if (usermsr & MSR_TM) { @@ -1228,6 +1264,14 @@ static inline void restore_sprs(struct thread_struct *old_thread, if (cpu_has_feature(CPU_FTR_P9_TIDR) && old_thread->tidr != new_thread->tidr) mtspr(SPRN_TIDR, new_thread->tidr); + + if (cpu_has_feature(CPU_FTR_DEXCR_NPHIE) && + old_thread->hashkeyr != new_thread->hashkeyr) + mtspr(SPRN_HASHKEYR, new_thread->hashkeyr); + + if (cpu_has_feature(CPU_FTR_ARCH_31) && + old_thread->dexcr != new_thread->dexcr) + mtspr(SPRN_DEXCR, new_thread->dexcr); #endif } @@ -1359,7 +1403,7 @@ static void show_instructions(struct pt_regs *regs) unsigned long nip = regs->nip; unsigned long pc = regs->nip - (NR_INSN_TO_PRINT * 3 / 4 * sizeof(int)); - printk("Instruction dump:"); + printk("Code: "); /* * If we were executing with the MMU off for instructions, adjust pc @@ -1373,11 +1417,7 @@ static void show_instructions(struct pt_regs *regs) for (i = 0; i < NR_INSN_TO_PRINT; i++) { int instr; - if (!(i % 8)) - pr_cont("\n"); - - if (!__kernel_text_address(pc) || - get_kernel_nofault(instr, (const void *)pc)) { + if (get_kernel_nofault(instr, (const void *)pc)) { pr_cont("XXXXXXXX "); } else { if (nip == pc) @@ -1531,7 +1571,7 @@ static void __show_regs(struct pt_regs *regs) if (trap == INTERRUPT_MACHINE_CHECK || trap == INTERRUPT_DATA_STORAGE || trap == INTERRUPT_ALIGNMENT) { - if (IS_ENABLED(CONFIG_4xx) || IS_ENABLED(CONFIG_BOOKE)) + if (IS_ENABLED(CONFIG_BOOKE)) pr_cont("DEAR: "REG" ESR: "REG" ", regs->dear, regs->esr); else pr_cont("DAR: "REG" DSISR: %08lx ", regs->dar, regs->dsisr); @@ -1599,10 +1639,17 @@ void arch_setup_new_exec(void) current->thread.regs->amr = default_amr; current->thread.regs->iamr = default_iamr; #endif + +#ifdef CONFIG_PPC_BOOK3S_64 + if (cpu_has_feature(CPU_FTR_ARCH_31)) { + current->thread.dexcr = current->thread.dexcr_onexec; + mtspr(SPRN_DEXCR, current->thread.dexcr); + } +#endif /* CONFIG_PPC_BOOK3S_64 */ } #ifdef CONFIG_PPC64 -/** +/* * Assign a TIDR (thread ID) for task @t and set it in the thread * structure. For now, we only support setting TIDR for 'current' task. * @@ -1612,7 +1659,7 @@ void arch_setup_new_exec(void) * cases will happen: * * 1. The correct thread is running, the wrong thread is not - * In this situation, the correct thread is woken and proceeds to pass it's + * In this situation, the correct thread is woken and proceeds to pass its * condition check. * * 2. Neither threads are running @@ -1622,15 +1669,15 @@ void arch_setup_new_exec(void) * for the wrong thread, or they will execute the condition check immediately. * * 3. The wrong thread is running, the correct thread is not - * The wrong thread will be woken, but will fail it's condition check and + * The wrong thread will be woken, but will fail its condition check and * re-execute wait. The correct thread, when scheduled, will execute either - * it's condition check (which will pass), or wait, which returns immediately - * when called the first time after the thread is scheduled, followed by it's + * its condition check (which will pass), or wait, which returns immediately + * when called the first time after the thread is scheduled, followed by its * condition check (which will pass). * * 4. Both threads are running - * Both threads will be woken. The wrong thread will fail it's condition check - * and execute another wait, while the correct thread will pass it's condition + * Both threads will be woken. The wrong thread will fail its condition check + * and execute another wait, while the correct thread will pass its condition * check. * * @t: the task to set the thread ID for @@ -1655,11 +1702,6 @@ EXPORT_SYMBOL_GPL(set_thread_tidr); #endif /* CONFIG_PPC64 */ -void -release_thread(struct task_struct *t) -{ -} - /* * this gets called so that we can store coprocessor state into memory and * copy the current task into the new thread. @@ -1715,64 +1757,83 @@ static void setup_ksp_vsid(struct task_struct *p, unsigned long sp) */ int copy_thread(struct task_struct *p, const struct kernel_clone_args *args) { - unsigned long clone_flags = args->flags; - unsigned long usp = args->stack; - unsigned long tls = args->tls; - struct pt_regs *childregs, *kregs; + struct pt_regs *kregs; /* Switch frame regs */ extern void ret_from_fork(void); extern void ret_from_fork_scv(void); - extern void ret_from_kernel_thread(void); + extern void ret_from_kernel_user_thread(void); + extern void start_kernel_thread(void); void (*f)(void); unsigned long sp = (unsigned long)task_stack_page(p) + THREAD_SIZE; - struct thread_info *ti = task_thread_info(p); #ifdef CONFIG_HAVE_HW_BREAKPOINT int i; #endif klp_init_thread_info(p); - /* Copy registers */ - sp -= sizeof(struct pt_regs); - childregs = (struct pt_regs *) sp; - if (unlikely(args->fn)) { + if (unlikely(p->flags & PF_KTHREAD)) { /* kernel thread */ - memset(childregs, 0, sizeof(struct pt_regs)); - childregs->gpr[1] = sp + sizeof(struct pt_regs); - /* function */ - if (args->fn) - childregs->gpr[14] = ppc_function_entry((void *)args->fn); -#ifdef CONFIG_PPC64 - clear_tsk_thread_flag(p, TIF_32BIT); - childregs->softe = IRQS_ENABLED; -#endif - childregs->gpr[15] = (unsigned long)args->fn_arg; + + /* Create initial minimum stack frame. */ + sp -= STACK_FRAME_MIN_SIZE; + ((unsigned long *)sp)[0] = 0; + + f = start_kernel_thread; p->thread.regs = NULL; /* no user register state */ - ti->flags |= _TIF_RESTOREALL; - f = ret_from_kernel_thread; + clear_tsk_compat_task(p); } else { /* user thread */ - struct pt_regs *regs = current_pt_regs(); - *childregs = *regs; - if (usp) - childregs->gpr[1] = usp; - p->thread.regs = childregs; - /* 64s sets this in ret_from_fork */ - if (!IS_ENABLED(CONFIG_PPC_BOOK3S_64)) - childregs->gpr[3] = 0; /* Result from fork() */ - if (clone_flags & CLONE_SETTLS) { - if (!is_32bit_task()) - childregs->gpr[13] = tls; + struct pt_regs *childregs; + + /* Create initial user return stack frame. */ + sp -= STACK_USER_INT_FRAME_SIZE; + *(unsigned long *)(sp + STACK_INT_FRAME_MARKER) = STACK_FRAME_REGS_MARKER; + + childregs = (struct pt_regs *)(sp + STACK_INT_FRAME_REGS); + + if (unlikely(args->fn)) { + /* + * A user space thread, but it first runs a kernel + * thread, and then returns as though it had called + * execve rather than fork, so user regs will be + * filled in (e.g., by kernel_execve()). + */ + ((unsigned long *)sp)[0] = 0; + memset(childregs, 0, sizeof(struct pt_regs)); +#ifdef CONFIG_PPC64 + childregs->softe = IRQS_ENABLED; +#endif + f = ret_from_kernel_user_thread; + } else { + struct pt_regs *regs = current_pt_regs(); + unsigned long clone_flags = args->flags; + unsigned long usp = args->stack; + + /* Copy registers */ + *childregs = *regs; + if (usp) + childregs->gpr[1] = usp; + ((unsigned long *)sp)[0] = childregs->gpr[1]; +#ifdef CONFIG_PPC_IRQ_SOFT_MASK_DEBUG + WARN_ON_ONCE(childregs->softe != IRQS_ENABLED); +#endif + if (clone_flags & CLONE_SETTLS) { + unsigned long tls = args->tls; + + if (!is_32bit_task()) + childregs->gpr[13] = tls; + else + childregs->gpr[2] = tls; + } + + if (trap_is_scv(regs)) + f = ret_from_fork_scv; else - childregs->gpr[2] = tls; + f = ret_from_fork; } - if (trap_is_scv(regs)) - f = ret_from_fork_scv; - else - f = ret_from_fork; + childregs->msr &= ~(MSR_FP|MSR_VEC|MSR_VSX); + p->thread.regs = childregs; } - childregs->msr &= ~(MSR_FP|MSR_VEC|MSR_VSX); - sp -= STACK_FRAME_OVERHEAD; /* * The way this works is that at some point in the future @@ -1782,11 +1843,22 @@ int copy_thread(struct task_struct *p, const struct kernel_clone_args *args) * do some house keeping and then return from the fork or clone * system call, using the stack frame created above. */ - ((unsigned long *)sp)[0] = 0; - sp -= sizeof(struct pt_regs); - kregs = (struct pt_regs *) sp; - sp -= STACK_FRAME_OVERHEAD; + ((unsigned long *)sp)[STACK_FRAME_LR_SAVE] = (unsigned long)f; + sp -= STACK_SWITCH_FRAME_SIZE; + ((unsigned long *)sp)[0] = sp + STACK_SWITCH_FRAME_SIZE; + kregs = (struct pt_regs *)(sp + STACK_SWITCH_FRAME_REGS); + kregs->nip = ppc_function_entry(f); + if (unlikely(args->fn)) { + /* + * Put kthread fn, arg parameters in non-volatile GPRs in the + * switch frame so they are loaded by _switch before it returns + * to ret_from_kernel_thread. + */ + kregs->gpr[14] = ppc_function_entry((void *)args->fn); + kregs->gpr[15] = (unsigned long)args->fn_arg; + } p->thread.ksp = sp; + #ifdef CONFIG_HAVE_HW_BREAKPOINT for (i = 0; i < nr_wp_slots(); i++) p->thread.ptrace_bps[i] = NULL; @@ -1801,7 +1873,7 @@ int copy_thread(struct task_struct *p, const struct kernel_clone_args *args) #if defined(CONFIG_PPC_BOOK3S_32) && defined(CONFIG_PPC_KUAP) p->thread.kuap = KUAP_NONE; #endif -#if defined(CONFIG_BOOKE_OR_40x) && defined(CONFIG_PPC_KUAP) +#if defined(CONFIG_BOOKE) && defined(CONFIG_PPC_KUAP) p->thread.pid = MMU_NO_CONTEXT; #endif @@ -1812,22 +1884,16 @@ int copy_thread(struct task_struct *p, const struct kernel_clone_args *args) p->thread.dscr_inherit = current->thread.dscr_inherit; p->thread.dscr = mfspr(SPRN_DSCR); } - if (cpu_has_feature(CPU_FTR_HAS_PPR)) - childregs->ppr = DEFAULT_PPR; p->thread.tidr = 0; #endif - /* - * Run with the current AMR value of the kernel - */ -#ifdef CONFIG_PPC_PKEY - if (mmu_has_feature(MMU_FTR_BOOK3S_KUAP)) - kregs->amr = AMR_KUAP_BLOCKED; +#ifdef CONFIG_PPC_BOOK3S_64 + if (cpu_has_feature(CPU_FTR_DEXCR_NPHIE)) + p->thread.hashkeyr = current->thread.hashkeyr; - if (mmu_has_feature(MMU_FTR_BOOK3S_KUEP)) - kregs->iamr = AMR_KUEP_BLOCKED; + if (cpu_has_feature(CPU_FTR_ARCH_31)) + p->thread.dexcr = mfspr(SPRN_DEXCR); #endif - kregs->nip = ppc_function_entry(f); return 0; } @@ -1894,8 +1960,8 @@ void start_thread(struct pt_regs *regs, unsigned long start, unsigned long sp) * address of _start and the second entry is the TOC * value we need to use. */ - __get_user(entry, (unsigned long __user *)start); - __get_user(toc, (unsigned long __user *)start+1); + get_user(entry, (unsigned long __user *)start); + get_user(toc, (unsigned long __user *)start+1); /* Check whether the e_entry function descriptor entries * need to be relocated before we can use them. @@ -1944,6 +2010,12 @@ void start_thread(struct pt_regs *regs, unsigned long start, unsigned long sp) current->thread.tm_tfiar = 0; current->thread.load_tm = 0; #endif /* CONFIG_PPC_TRANSACTIONAL_MEM */ +#ifdef CONFIG_PPC_BOOK3S_64 + if (cpu_has_feature(CPU_FTR_DEXCR_NPHIE)) { + current->thread.hashkeyr = get_random_long(); + mtspr(SPRN_HASHKEYR, current->thread.hashkeyr); + } +#endif /* CONFIG_PPC_BOOK3S_64 */ } EXPORT_SYMBOL(start_thread); @@ -2089,6 +2161,9 @@ static inline int valid_irq_stack(unsigned long sp, struct task_struct *p, unsigned long stack_page; unsigned long cpu = task_cpu(p); + if (!hardirq_ctx[cpu] || !softirq_ctx[cpu]) + return 0; + stack_page = (unsigned long)hardirq_ctx[cpu]; if (sp >= stack_page && sp <= stack_page + THREAD_SIZE - nbytes) return 1; @@ -2100,16 +2175,24 @@ static inline int valid_irq_stack(unsigned long sp, struct task_struct *p, return 0; } +#ifdef CONFIG_PPC64 static inline int valid_emergency_stack(unsigned long sp, struct task_struct *p, unsigned long nbytes) { -#ifdef CONFIG_PPC64 unsigned long stack_page; unsigned long cpu = task_cpu(p); if (!paca_ptrs) return 0; + if (!paca_ptrs[cpu]->emergency_sp) + return 0; + +# ifdef CONFIG_PPC_BOOK3S_64 + if (!paca_ptrs[cpu]->nmi_emergency_sp || !paca_ptrs[cpu]->mc_emergency_sp) + return 0; +#endif + stack_page = (unsigned long)paca_ptrs[cpu]->emergency_sp - THREAD_SIZE; if (sp >= stack_page && sp <= stack_page + THREAD_SIZE - nbytes) return 1; @@ -2123,14 +2206,33 @@ static inline int valid_emergency_stack(unsigned long sp, struct task_struct *p, if (sp >= stack_page && sp <= stack_page + THREAD_SIZE - nbytes) return 1; # endif -#endif return 0; } +#else +static inline int valid_emergency_stack(unsigned long sp, struct task_struct *p, + unsigned long nbytes) +{ + unsigned long stack_page; + unsigned long cpu = task_cpu(p); + if (!IS_ENABLED(CONFIG_VMAP_STACK)) + return 0; -int validate_sp(unsigned long sp, struct task_struct *p, - unsigned long nbytes) + stack_page = (unsigned long)emergency_ctx[cpu] - THREAD_SIZE; + if (sp >= stack_page && sp <= stack_page + THREAD_SIZE - nbytes) + return 1; + + return 0; +} +#endif + +/* + * validate the stack frame of a particular minimum size, used for when we are + * looking at a certain object in the stack beyond the minimum. + */ +int validate_sp_size(unsigned long sp, struct task_struct *p, + unsigned long nbytes) { unsigned long stack_page = (unsigned long)task_stack_page(p); @@ -2146,7 +2248,10 @@ int validate_sp(unsigned long sp, struct task_struct *p, return valid_emergency_stack(sp, p, nbytes); } -EXPORT_SYMBOL(validate_sp); +int validate_sp(unsigned long sp, struct task_struct *p) +{ + return validate_sp_size(sp, p, STACK_FRAME_MIN_SIZE); +} static unsigned long ___get_wchan(struct task_struct *p) { @@ -2154,13 +2259,12 @@ static unsigned long ___get_wchan(struct task_struct *p) int count = 0; sp = p->thread.ksp; - if (!validate_sp(sp, p, STACK_FRAME_OVERHEAD)) + if (!validate_sp(sp, p)) return 0; do { sp = READ_ONCE_NOCHECK(*(unsigned long *)sp); - if (!validate_sp(sp, p, STACK_FRAME_OVERHEAD) || - task_is_running(p)) + if (!validate_sp(sp, p) || task_is_running(p)) return 0; if (count > 0) { ip = READ_ONCE_NOCHECK(((unsigned long *)sp)[STACK_FRAME_LR_SAVE]); @@ -2185,6 +2289,22 @@ unsigned long __get_wchan(struct task_struct *p) return ret; } +static bool empty_user_regs(struct pt_regs *regs, struct task_struct *tsk) +{ + unsigned long stack_page; + + // A non-empty pt_regs should never have a zero MSR or TRAP value. + if (regs->msr || regs->trap) + return false; + + // Check it sits at the very base of the stack + stack_page = (unsigned long)task_stack_page(tsk); + if ((unsigned long)(regs + 1) != stack_page + THREAD_SIZE) + return false; + + return true; +} + static int kstack_depth_to_print = CONFIG_PRINT_STACK_DEPTH; void __no_sanitize_address show_stack(struct task_struct *tsk, @@ -2214,7 +2334,7 @@ void __no_sanitize_address show_stack(struct task_struct *tsk, lr = 0; printk("%sCall Trace:\n", loglvl); do { - if (!validate_sp(sp, tsk, STACK_FRAME_OVERHEAD)) + if (!validate_sp(sp, tsk)) break; stack = (unsigned long *) sp; @@ -2235,19 +2355,27 @@ void __no_sanitize_address show_stack(struct task_struct *tsk, /* * See if this is an exception frame. - * We look for the "regshere" marker in the current frame. + * We look for the "regs" marker in the current frame. + * + * STACK_SWITCH_FRAME_SIZE being the smallest frame that + * could hold a pt_regs, if that does not fit then it can't + * have regs. */ - if (validate_sp(sp, tsk, STACK_FRAME_WITH_PT_REGS) - && stack[STACK_FRAME_MARKER] == STACK_FRAME_REGS_MARKER) { + if (validate_sp_size(sp, tsk, STACK_SWITCH_FRAME_SIZE) + && stack[STACK_INT_FRAME_MARKER_LONGS] == STACK_FRAME_REGS_MARKER) { struct pt_regs *regs = (struct pt_regs *) - (sp + STACK_FRAME_OVERHEAD); + (sp + STACK_INT_FRAME_REGS); lr = regs->link; - printk("%s--- interrupt: %lx at %pS\n", + printk("%s---- interrupt: %lx at %pS\n", loglvl, regs->trap, (void *)regs->nip); - __show_regs(regs); - printk("%s--- interrupt: %lx\n", - loglvl, regs->trap); + + // Detect the case of an empty pt_regs at the very base + // of the stack and suppress showing it in full. + if (!empty_user_regs(regs, tsk)) { + __show_regs(regs); + printk("%s---- interrupt: %lx\n", loglvl, regs->trap); + } firstframe = 1; } @@ -2308,6 +2436,6 @@ void notrace __ppc64_runlatch_off(void) unsigned long arch_align_stack(unsigned long sp) { if (!(current->personality & ADDR_NO_RANDOMIZE) && randomize_va_space) - sp -= get_random_int() & ~PAGE_MASK; + sp -= get_random_u32_below(PAGE_SIZE); return sp & ~0xf; } diff --git a/arch/powerpc/kernel/prom.c b/arch/powerpc/kernel/prom.c index feae8509b59c..9ed9dde7d231 100644 --- a/arch/powerpc/kernel/prom.c +++ b/arch/powerpc/kernel/prom.c @@ -30,6 +30,7 @@ #include <linux/libfdt.h> #include <linux/cpu.h> #include <linux/pgtable.h> +#include <linux/seq_buf.h> #include <asm/rtas.h> #include <asm/page.h> @@ -44,7 +45,7 @@ #include <asm/iommu.h> #include <asm/btext.h> #include <asm/sections.h> -#include <asm/machdep.h> +#include <asm/setup.h> #include <asm/pci-bridge.h> #include <asm/kexec.h> #include <asm/opal.h> @@ -54,6 +55,8 @@ #include <asm/dt_cpu_ftrs.h> #include <asm/drmem.h> #include <asm/ultravisor.h> +#include <asm/prom.h> +#include <asm/plpks.h> #include <mm/mmu_decl.h> @@ -70,6 +73,7 @@ int __initdata iommu_is_off; int __initdata iommu_force_on; unsigned long tce_alloc_start, tce_alloc_end; u64 ppc64_rma_size; +unsigned int boot_cpu_node_count __ro_after_init; #endif static phys_addr_t first_memblock_size; static int __initdata boot_cpu_count; @@ -136,7 +140,7 @@ static void __init move_device_tree(void) } /* - * ibm,pa-features is a per-cpu property that contains a string of + * ibm,pa/pi-features is a per-cpu property that contains a string of * attribute descriptors, each of which has a 2 byte header plus up * to 254 bytes worth of processor attribute bits. First header * byte specifies the number of bytes following the header. @@ -147,16 +151,21 @@ static void __init move_device_tree(void) * pa-features property is missing, or a 1/0 to indicate if the feature * is supported/not supported. Note that the bit numbers are * big-endian to match the definition in PAPR. + * Note: the 'clear' flag clears the feature if the bit is set in the + * ibm,pa/pi-features property, it does not set the feature if the + * bit is clear. */ -static struct ibm_pa_feature { +struct ibm_feature { unsigned long cpu_features; /* CPU_FTR_xxx bit */ unsigned long mmu_features; /* MMU_FTR_xxx bit */ unsigned int cpu_user_ftrs; /* PPC_FEATURE_xxx bit */ unsigned int cpu_user_ftrs2; /* PPC_FEATURE2_xxx bit */ - unsigned char pabyte; /* byte number in ibm,pa-features */ + unsigned char pabyte; /* byte number in ibm,pa/pi-features */ unsigned char pabit; /* bit number (big-endian) */ - unsigned char invert; /* if 1, pa bit set => clear feature */ -} ibm_pa_features[] __initdata = { + unsigned char clear; /* if 1, pa bit set => clear feature */ +}; + +static struct ibm_feature ibm_pa_features[] __initdata = { { .pabyte = 0, .pabit = 0, .cpu_user_ftrs = PPC_FEATURE_HAS_MMU }, { .pabyte = 0, .pabit = 1, .cpu_user_ftrs = PPC_FEATURE_HAS_FPU }, { .pabyte = 0, .pabit = 3, .cpu_features = CPU_FTR_CTRL }, @@ -176,11 +185,23 @@ static struct ibm_pa_feature { .cpu_user_ftrs2 = PPC_FEATURE2_HTM_COMP | PPC_FEATURE2_HTM_NOSC_COMP }, { .pabyte = 64, .pabit = 0, .cpu_features = CPU_FTR_DAWR1 }, + { .pabyte = 68, .pabit = 5, .cpu_features = CPU_FTR_DEXCR_NPHIE }, +}; + +/* + * ibm,pi-features property provides the support of processor specific + * options not described in ibm,pa-features. Right now use byte 0, bit 3 + * which indicates the occurrence of DSI interrupt when the paste operation + * on the suspended NX window. + */ +static struct ibm_feature ibm_pi_features[] __initdata = { + { .pabyte = 0, .pabit = 3, .mmu_features = MMU_FTR_NX_DSI }, + { .pabyte = 0, .pabit = 4, .cpu_features = CPU_FTR_DBELL, .clear = 1 }, }; static void __init scan_features(unsigned long node, const unsigned char *ftrs, unsigned long tablelen, - struct ibm_pa_feature *fp, + struct ibm_feature *fp, unsigned long ft_size) { unsigned long i, len, bit; @@ -203,12 +224,12 @@ static void __init scan_features(unsigned long node, const unsigned char *ftrs, if (fp->pabyte >= ftrs[0]) continue; bit = (ftrs[2 + fp->pabyte] >> (7 - fp->pabit)) & 1; - if (bit ^ fp->invert) { + if (bit && !fp->clear) { cur_cpu_spec->cpu_features |= fp->cpu_features; cur_cpu_spec->cpu_user_features |= fp->cpu_user_ftrs; cur_cpu_spec->cpu_user_features2 |= fp->cpu_user_ftrs2; cur_cpu_spec->mmu_features |= fp->mmu_features; - } else { + } else if (bit == fp->clear) { cur_cpu_spec->cpu_features &= ~fp->cpu_features; cur_cpu_spec->cpu_user_features &= ~fp->cpu_user_ftrs; cur_cpu_spec->cpu_user_features2 &= ~fp->cpu_user_ftrs2; @@ -217,17 +238,18 @@ static void __init scan_features(unsigned long node, const unsigned char *ftrs, } } -static void __init check_cpu_pa_features(unsigned long node) +static void __init check_cpu_features(unsigned long node, char *name, + struct ibm_feature *fp, + unsigned long size) { const unsigned char *pa_ftrs; int tablelen; - pa_ftrs = of_get_flat_dt_prop(node, "ibm,pa-features", &tablelen); + pa_ftrs = of_get_flat_dt_prop(node, name, &tablelen); if (pa_ftrs == NULL) return; - scan_features(node, pa_ftrs, tablelen, - ibm_pa_features, ARRAY_SIZE(ibm_pa_features)); + scan_features(node, pa_ftrs, tablelen, fp, size); } #ifdef CONFIG_PPC_64S_HASH_MMU @@ -309,6 +331,7 @@ static int __init early_init_dt_scan_cpus(unsigned long node, void *data) { const char *type = of_get_flat_dt_prop(node, "device_type", NULL); + const __be32 *cpu_version = NULL; const __be32 *prop; const __be32 *intserv; int i, nthreads; @@ -320,6 +343,9 @@ static int __init early_init_dt_scan_cpus(unsigned long node, if (type == NULL || strcmp(type, "cpu") != 0) return 0; + if (IS_ENABLED(CONFIG_PPC64)) + boot_cpu_node_count++; + /* Get physical cpuid */ intserv = of_get_flat_dt_prop(node, "ibm,ppc-interrupt-server#s", &len); if (!intserv) @@ -347,12 +373,30 @@ static int __init early_init_dt_scan_cpus(unsigned long node, if (found < 0) return 0; - DBG("boot cpu: logical %d physical %d\n", found, - be32_to_cpu(intserv[found_thread])); boot_cpuid = found; - // Pass the boot CPU's hard CPU id back to our caller - *((u32 *)data) = be32_to_cpu(intserv[found_thread]); + if (IS_ENABLED(CONFIG_PPC64)) + boot_cpu_hwid = be32_to_cpu(intserv[found_thread]); + + if (nr_cpu_ids % nthreads != 0) { + set_nr_cpu_ids(ALIGN(nr_cpu_ids, nthreads)); + pr_warn("nr_cpu_ids was not a multiple of threads_per_core, adjusted to %d\n", + nr_cpu_ids); + } + + if (boot_cpuid >= nr_cpu_ids) { + // Remember boot core for smp_setup_cpu_maps() + boot_core_hwid = be32_to_cpu(intserv[0]); + + pr_warn("Boot CPU %d (core hwid %d) >= nr_cpu_ids, adjusted boot CPU to %d\n", + boot_cpuid, boot_core_hwid, found_thread); + + // Adjust boot CPU to appear on logical core 0 + boot_cpuid = found_thread; + } + + DBG("boot cpu: logical %d physical %d\n", boot_cpuid, + be32_to_cpu(intserv[found_thread])); /* * PAPR defines "logical" PVR values for cpus that @@ -375,14 +419,25 @@ static int __init early_init_dt_scan_cpus(unsigned long node, */ if (!dt_cpu_ftrs_in_use()) { prop = of_get_flat_dt_prop(node, "cpu-version", NULL); - if (prop && (be32_to_cpup(prop) & 0xff000000) == 0x0f000000) + if (prop && (be32_to_cpup(prop) & 0xff000000) == 0x0f000000) { identify_cpu(0, be32_to_cpup(prop)); + cpu_version = prop; + } check_cpu_feature_properties(node); - check_cpu_pa_features(node); + check_cpu_features(node, "ibm,pa-features", ibm_pa_features, + ARRAY_SIZE(ibm_pa_features)); + check_cpu_features(node, "ibm,pi-features", ibm_pi_features, + ARRAY_SIZE(ibm_pi_features)); } identical_pvr_fixup(node); + + // We can now add the CPU name & PVR to the hardware description + seq_buf_printf(&ppc_hw_desc, "%s 0x%04lx ", cur_cpu_spec->cpu_name, mfspr(SPRN_PVR)); + if (cpu_version) + seq_buf_printf(&ppc_hw_desc, "0x%04x ", be32_to_cpup(cpu_version)); + init_mmu_slb_size(node); #ifdef CONFIG_PPC64 @@ -427,7 +482,7 @@ static int __init early_init_dt_scan_chosen_ppc(unsigned long node, tce_alloc_end = *lprop; #endif -#ifdef CONFIG_KEXEC_CORE +#ifdef CONFIG_CRASH_RESERVE lprop = of_get_flat_dt_prop(node, "linux,crashkernel-base", NULL); if (lprop) crashk_res.start = *lprop; @@ -695,6 +750,23 @@ static void __init tm_init(void) static void tm_init(void) { } #endif /* CONFIG_PPC_TRANSACTIONAL_MEM */ +static int __init +early_init_dt_scan_model(unsigned long node, const char *uname, + int depth, void *data) +{ + const char *prop; + + if (depth != 0) + return 0; + + prop = of_get_flat_dt_prop(node, "model", NULL); + if (prop) + seq_buf_printf(&ppc_hw_desc, "%s ", prop); + + /* break now */ + return 1; +} + #ifdef CONFIG_PPC64 static void __init save_fscr_to_task(void) { @@ -714,15 +786,16 @@ static inline void save_fscr_to_task(void) {} void __init early_init_devtree(void *params) { - u32 boot_cpu_hwid; - phys_addr_t limit; + phys_addr_t int_vector_size; DBG(" -> early_init_devtree(%px)\n", params); /* Too early to BUG_ON(), do it by hand */ - if (!early_init_dt_verify(params)) + if (!early_init_dt_verify(params, __pa(params))) panic("BUG: Failed verifying flat device tree, bad version?"); + of_scan_flat_dt(early_init_dt_scan_model, NULL); + #ifdef CONFIG_PPC_RTAS /* Some machines might need RTAS info for debugging, grab it now. */ of_scan_flat_dt(early_init_dt_scan_rtas, NULL); @@ -747,10 +820,20 @@ void __init early_init_devtree(void *params) */ of_scan_flat_dt(early_init_dt_scan_chosen_ppc, boot_command_line); + /* Append additional parameters passed for fadump capture kernel */ + fadump_append_bootargs(); + /* Scan memory nodes and rebuild MEMBLOCKs */ early_init_dt_scan_root(); early_init_dt_scan_memory_ppc(); + /* + * As generic code authors expect to be able to use static keys + * in early_param() handlers, we initialize the static keys just + * before parsing early params (it's fine to call jump_label_init() + * more than once). + */ + jump_label_init(); parse_early_param(); /* make sure we've parsed cmdline for mem= before this */ @@ -759,9 +842,16 @@ void __init early_init_devtree(void *params) setup_initial_memory_limit(memstart_addr, first_memblock_size); /* Reserve MEMBLOCK regions used by kernel, initrd, dt, etc... */ memblock_reserve(PHYSICAL_START, __pa(_end) - PHYSICAL_START); +#ifdef CONFIG_PPC64 + /* If relocatable, reserve at least 32k for interrupt vectors etc. */ + int_vector_size = __end_interrupts - _stext; + int_vector_size = max_t(phys_addr_t, SZ_32K, int_vector_size); +#else /* If relocatable, reserve first 32k for interrupt vectors etc. */ + int_vector_size = SZ_32K; +#endif if (PHYSICAL_START > MEMORY_START) - memblock_reserve(MEMORY_START, 0x8000); + memblock_reserve(MEMORY_START, int_vector_size); reserve_kdump_trampoline(); #if defined(CONFIG_FA_DUMP) || defined(CONFIG_PRESERVE_FA_DUMP) /* @@ -770,12 +860,15 @@ void __init early_init_devtree(void *params) */ if (fadump_reserve_mem() == 0) #endif - reserve_crashkernel(); + arch_reserve_crashkernel(); early_reserve_mem(); - /* Ensure that total memory size is page-aligned. */ - limit = ALIGN(memory_limit ?: memblock_phys_mem_size(), PAGE_SIZE); - memblock_enforce_memory_limit(limit); + if (memory_limit > memblock_phys_mem_size()) + memory_limit = 0; + + /* Align down to 16 MB which is large page size with hash page translation */ + memory_limit = ALIGN_DOWN(memory_limit ?: memblock_phys_mem_size(), SZ_16M); + memblock_enforce_memory_limit(memory_limit); #if defined(CONFIG_PPC_BOOK3S_64) && defined(CONFIG_PPC_4K_PAGES) if (!early_radix_enabled()) @@ -798,7 +891,7 @@ void __init early_init_devtree(void *params) /* Retrieve CPU related informations from the flat tree * (altivec support, boot CPU ID, ...) */ - of_scan_flat_dt(early_init_dt_scan_cpus, &boot_cpu_hwid); + of_scan_flat_dt(early_init_dt_scan_cpus, NULL); if (boot_cpuid < 0) { printk("Failed to identify boot CPU !\n"); BUG(); @@ -815,10 +908,8 @@ void __init early_init_devtree(void *params) mmu_early_init_devtree(); - // NB. paca is not installed until later in early_setup() - allocate_paca_ptrs(); - allocate_paca(boot_cpuid); - set_hard_smp_processor_id(boot_cpuid, boot_cpu_hwid); + /* Setup param area for passing additional parameters to fadump capture kernel. */ + fadump_setup_param_area(); #ifdef CONFIG_PPC_POWERNV /* Scan and build the list of machine check recoverable ranges */ @@ -840,6 +931,9 @@ void __init early_init_devtree(void *params) powerpc_firmware_features |= FW_FEATURE_PS3_POSSIBLE; #endif + /* If kexec left a PLPKS password in the DT, get it and clear it */ + plpks_early_init_devtree(); + tm_init(); DBG(" <- early_init_devtree()\n"); diff --git a/arch/powerpc/kernel/prom_entry_64.S b/arch/powerpc/kernel/prom_entry_64.S new file mode 100644 index 000000000000..f1b8793d28c6 --- /dev/null +++ b/arch/powerpc/kernel/prom_entry_64.S @@ -0,0 +1,87 @@ +/* SPDX-License-Identifier: GPL-2.0-or-later */ +/* + * PowerPC version + * Copyright (C) 1995-1996 Gary Thomas (gdt@linuxppc.org) + * Rewritten by Cort Dougan (cort@cs.nmt.edu) for PReP + * Copyright (C) 1996 Cort Dougan <cort@cs.nmt.edu> + * Adapted for Power Macintosh by Paul Mackerras. + * Low-level exception handlers and MMU support + * rewritten by Paul Mackerras. + * Copyright (C) 1996 Paul Mackerras. + * MPC8xx modifications Copyright (C) 1997 Dan Malek (dmalek@jlc.net). + * + * This file contains the 64-bit prom entry code. + */ +#include <asm/asm-offsets.h> +#ifdef CONFIG_PPC_BOOK3S +#include <asm/exception-64s.h> +#else +#include <asm/exception-64e.h> +#endif +#include <asm/ppc_asm.h> + +.section ".text","ax",@progbits + +_GLOBAL(enter_prom) + mflr r0 + std r0,16(r1) + stdu r1,-SWITCH_FRAME_SIZE(r1) /* Save SP and create stack space */ + + /* Because PROM is running in 32b mode, it clobbers the high order half + * of all registers that it saves. We therefore save those registers + * PROM might touch to the stack. (r0, r3-r13 are caller saved) + */ + SAVE_GPR(2, r1) + SAVE_GPR(13, r1) + SAVE_NVGPRS(r1) + mfcr r10 + mfmsr r11 + std r10,_CCR(r1) + std r11,_MSR(r1) + + /* Put PROM address in SRR0 */ + mtsrr0 r4 + + /* Setup our trampoline return addr in LR */ + bcl 20,31,$+4 +0: mflr r4 + addi r4,r4,(1f - 0b) + mtlr r4 + + /* Prepare a 32-bit mode big endian MSR + */ +#ifdef CONFIG_PPC_BOOK3E_64 + rlwinm r11,r11,0,1,31 + mtsrr1 r11 + rfi +#else /* CONFIG_PPC_BOOK3E_64 */ + LOAD_REG_IMMEDIATE(r12, MSR_SF | MSR_LE) + andc r11,r11,r12 + mtsrr1 r11 + RFI_TO_KERNEL +#endif /* CONFIG_PPC_BOOK3E_64 */ + +1: /* Return from OF */ + FIXUP_ENDIAN + + /* Just make sure that r1 top 32 bits didn't get + * corrupt by OF + */ + rldicl r1,r1,0,32 + + /* Restore the MSR (back to 64 bits) */ + ld r0,_MSR(r1) + MTMSRD(r0) + isync + + /* Restore other registers */ + REST_GPR(2, r1) + REST_GPR(13, r1) + REST_NVGPRS(r1) + ld r4,_CCR(r1) + mtcr r4 + + addi r1,r1,SWITCH_FRAME_SIZE + ld r0,16(r1) + mtlr r0 + blr diff --git a/arch/powerpc/kernel/prom_init.c b/arch/powerpc/kernel/prom_init.c index 13d6cb188835..827c958677f8 100644 --- a/arch/powerpc/kernel/prom_init.c +++ b/arch/powerpc/kernel/prom_init.c @@ -42,7 +42,7 @@ #include <asm/iommu.h> #include <asm/btext.h> #include <asm/sections.h> -#include <asm/machdep.h> +#include <asm/setup.h> #include <asm/asm-prototypes.h> #include <asm/ultravisor-api.h> @@ -96,12 +96,6 @@ static int of_workarounds __prombss; #define OF_WA_CLAIM 1 /* do phys/virt claim separately, then map */ #define OF_WA_LONGTRAIL 2 /* work around longtrail bugs */ -#define PROM_BUG() do { \ - prom_printf("kernel BUG at %s line 0x%x!\n", \ - __FILE__, __LINE__); \ - __builtin_trap(); \ -} while (0) - #ifdef DEBUG_PROM #define prom_debug(x...) prom_printf(x) #else @@ -823,8 +817,8 @@ static void __init early_cmdline_parse(void) opt += 4; prom_memory_limit = prom_memparse(opt, (const char **)&opt); #ifdef CONFIG_PPC64 - /* Align to 16 MB == size of ppc64 large page */ - prom_memory_limit = ALIGN(prom_memory_limit, 0x1000000); + /* Align down to 16 MB which is large page size with hash page translation */ + prom_memory_limit = ALIGN_DOWN(prom_memory_limit, SZ_16M); #endif } @@ -953,7 +947,7 @@ struct option_vector7 { } __packed; struct ibm_arch_vec { - struct { u32 mask, val; } pvrs[14]; + struct { __be32 mask, val; } pvrs[16]; u8 num_vectors; @@ -1014,6 +1008,14 @@ static const struct ibm_arch_vec ibm_architecture_vec_template __initconst = { .val = cpu_to_be32(0x00800000), }, { + .mask = cpu_to_be32(0xffff0000), /* POWER11 */ + .val = cpu_to_be32(0x00820000), + }, + { + .mask = cpu_to_be32(0xffffffff), /* P11 compliant */ + .val = cpu_to_be32(0x0f000007), + }, + { .mask = cpu_to_be32(0xffffffff), /* all 3.1-compliant */ .val = cpu_to_be32(0x0f000006), }, @@ -1059,7 +1061,7 @@ static const struct ibm_arch_vec ibm_architecture_vec_template __initconst = { .virt_base = cpu_to_be32(0xffffffff), .virt_size = cpu_to_be32(0xffffffff), .load_base = cpu_to_be32(0xffffffff), - .min_rma = cpu_to_be32(512), /* 512MB min RMA */ + .min_rma = cpu_to_be32(MIN_RMA), .min_load = cpu_to_be32(0xffffffff), /* full client load */ .min_rma_percent = 0, /* min RMA percentage of total RAM */ .max_pft_size = 48, /* max log_2(hash table size) */ @@ -2790,91 +2792,6 @@ static void __init flatten_device_tree(void) dt_struct_start, dt_struct_end); } -#ifdef CONFIG_PPC_MAPLE -/* PIBS Version 1.05.0000 04/26/2005 has an incorrect /ht/isa/ranges property. - * The values are bad, and it doesn't even have the right number of cells. */ -static void __init fixup_device_tree_maple(void) -{ - phandle isa; - u32 rloc = 0x01002000; /* IO space; PCI device = 4 */ - u32 isa_ranges[6]; - char *name; - - name = "/ht@0/isa@4"; - isa = call_prom("finddevice", 1, 1, ADDR(name)); - if (!PHANDLE_VALID(isa)) { - name = "/ht@0/isa@6"; - isa = call_prom("finddevice", 1, 1, ADDR(name)); - rloc = 0x01003000; /* IO space; PCI device = 6 */ - } - if (!PHANDLE_VALID(isa)) - return; - - if (prom_getproplen(isa, "ranges") != 12) - return; - if (prom_getprop(isa, "ranges", isa_ranges, sizeof(isa_ranges)) - == PROM_ERROR) - return; - - if (isa_ranges[0] != 0x1 || - isa_ranges[1] != 0xf4000000 || - isa_ranges[2] != 0x00010000) - return; - - prom_printf("Fixing up bogus ISA range on Maple/Apache...\n"); - - isa_ranges[0] = 0x1; - isa_ranges[1] = 0x0; - isa_ranges[2] = rloc; - isa_ranges[3] = 0x0; - isa_ranges[4] = 0x0; - isa_ranges[5] = 0x00010000; - prom_setprop(isa, name, "ranges", - isa_ranges, sizeof(isa_ranges)); -} - -#define CPC925_MC_START 0xf8000000 -#define CPC925_MC_LENGTH 0x1000000 -/* The values for memory-controller don't have right number of cells */ -static void __init fixup_device_tree_maple_memory_controller(void) -{ - phandle mc; - u32 mc_reg[4]; - char *name = "/hostbridge@f8000000"; - u32 ac, sc; - - mc = call_prom("finddevice", 1, 1, ADDR(name)); - if (!PHANDLE_VALID(mc)) - return; - - if (prom_getproplen(mc, "reg") != 8) - return; - - prom_getprop(prom.root, "#address-cells", &ac, sizeof(ac)); - prom_getprop(prom.root, "#size-cells", &sc, sizeof(sc)); - if ((ac != 2) || (sc != 2)) - return; - - if (prom_getprop(mc, "reg", mc_reg, sizeof(mc_reg)) == PROM_ERROR) - return; - - if (mc_reg[0] != CPC925_MC_START || mc_reg[1] != CPC925_MC_LENGTH) - return; - - prom_printf("Fixing up bogus hostbridge on Maple...\n"); - - mc_reg[0] = 0x0; - mc_reg[1] = CPC925_MC_START; - mc_reg[2] = 0x0; - mc_reg[3] = CPC925_MC_LENGTH; - prom_setprop(mc, name, "reg", mc_reg, sizeof(mc_reg)); -} -#else -#define fixup_device_tree_maple() -#define fixup_device_tree_maple_memory_controller() -#endif - -#ifdef CONFIG_PPC_CHRP /* * Pegasos and BriQ lacks the "ranges" property in the isa node * Pegasos needs decimal IRQ 14/15, not hexadecimal @@ -2925,12 +2842,8 @@ static void __init fixup_device_tree_chrp(void) } } } -#else -#define fixup_device_tree_chrp() -#endif -#if defined(CONFIG_PPC64) && defined(CONFIG_PPC_PMAC) -static void __init fixup_device_tree_pmac(void) +static void __init fixup_device_tree_pmac64(void) { phandle u3, i2c, mpic; u32 u3_rev; @@ -2969,11 +2882,27 @@ static void __init fixup_device_tree_pmac(void) prom_setprop(i2c, "/u3@0,f8000000/i2c@f8001000", "interrupt-parent", &parent, sizeof(parent)); } -#else -#define fixup_device_tree_pmac() -#endif -#ifdef CONFIG_PPC_EFIKA +static void __init fixup_device_tree_pmac(void) +{ + __be32 val = 1; + char type[8]; + phandle node; + + // Some pmacs are missing #size-cells on escc or i2s nodes + for (node = 0; prom_next_node(&node); ) { + type[0] = '\0'; + prom_getprop(node, "device_type", type, sizeof(type)); + if (prom_strcmp(type, "escc") && prom_strcmp(type, "i2s")) + continue; + + if (prom_getproplen(node, "#size-cells") != PROM_ERROR) + continue; + + prom_setprop(node, NULL, "#size-cells", &val, sizeof(val)); + } +} + /* * The MPC5200 FEC driver requires an phy-handle property to tell it how * to talk to the phy. If the phy-handle property is missing, then this @@ -3105,11 +3034,7 @@ static void __init fixup_device_tree_efika(void) /* Make sure ethernet phy-handle property exists */ fixup_device_tree_efika_add_phy(); } -#else -#define fixup_device_tree_efika() -#endif -#ifdef CONFIG_PPC_PASEMI_NEMO /* * CFE supplied on Nemo is broken in several ways, biggest * problem is that it reassigns ISA interrupts to unused mpic ints. @@ -3185,18 +3110,23 @@ static void __init fixup_device_tree_pasemi(void) prom_setprop(iob, name, "device_type", "isa", sizeof("isa")); } -#else /* !CONFIG_PPC_PASEMI_NEMO */ -static inline void fixup_device_tree_pasemi(void) { } -#endif static void __init fixup_device_tree(void) { - fixup_device_tree_maple(); - fixup_device_tree_maple_memory_controller(); - fixup_device_tree_chrp(); - fixup_device_tree_pmac(); - fixup_device_tree_efika(); - fixup_device_tree_pasemi(); + if (IS_ENABLED(CONFIG_PPC_CHRP)) + fixup_device_tree_chrp(); + + if (IS_ENABLED(CONFIG_PPC_PMAC)) + fixup_device_tree_pmac(); + + if (IS_ENABLED(CONFIG_PPC_PMAC) && IS_ENABLED(CONFIG_PPC64)) + fixup_device_tree_pmac64(); + + if (IS_ENABLED(CONFIG_PPC_EFIKA)) + fixup_device_tree_efika(); + + if (IS_ENABLED(CONFIG_PPC_PASEMI_NEMO)) + fixup_device_tree_pasemi(); } static void __init prom_find_boot_cpu(void) diff --git a/arch/powerpc/kernel/prom_init_check.sh b/arch/powerpc/kernel/prom_init_check.sh index dfa5f729f774..3090b97258ae 100644 --- a/arch/powerpc/kernel/prom_init_check.sh +++ b/arch/powerpc/kernel/prom_init_check.sh @@ -13,8 +13,13 @@ # If you really need to reference something from prom_init.o add # it to the list below: -grep "^CONFIG_KASAN=y$" ${KCONFIG_CONFIG} >/dev/null -if [ $? -eq 0 ] +has_renamed_memintrinsics() +{ + grep -q "^CONFIG_KASAN=y$" "${KCONFIG_CONFIG}" && \ + ! grep -q "^CONFIG_CC_HAS_KASAN_MEMINTRINSIC_PREFIX=y" "${KCONFIG_CONFIG}" +} + +if has_renamed_memintrinsics then MEM_FUNCS="__memcpy __memset" else @@ -26,8 +31,7 @@ _end enter_prom $MEM_FUNCS reloc_offset __secondary_hold __secondary_hold_acknowledge __secondary_hold_spinloop __start logo_linux_clut224 btext_prepare_BAT reloc_got2 kernstart_addr memstart_addr linux_banner _stext -__prom_init_toc_start __prom_init_toc_end btext_setup_display TOC. -relocate" +btext_setup_display TOC. relocate" NM="$1" OBJ="$2" @@ -38,25 +42,24 @@ check_section() { file=$1 section=$2 - size=$(objdump -h -j $section $file 2>/dev/null | awk "\$2 == \"$section\" {print \$3}") + size=$(objdump -h -j "$section" "$file" 2>/dev/null | awk "\$2 == \"$section\" {print \$3}") size=${size:-0} - if [ $size -ne 0 ]; then + if [ "$size" -ne 0 ]; then ERROR=1 echo "Error: Section $section not empty in prom_init.c" >&2 fi } -for UNDEF in $($NM -u $OBJ | awk '{print $2}') +for UNDEF in $($NM -u "$OBJ" | awk '{print $2}') do # On 64-bit nm gives us the function descriptors, which have # a leading . on the name, so strip it off here. UNDEF="${UNDEF#.}" - if [ $KBUILD_VERBOSE ]; then - if [ $KBUILD_VERBOSE -ne 0 ]; then - echo "Checking prom_init.o symbol '$UNDEF'" - fi - fi + case "$KBUILD_VERBOSE" in + *1*) + echo "Checking prom_init.o symbol '$UNDEF'" ;; + esac OK=0 for WHITE in $WHITELIST @@ -84,8 +87,8 @@ do fi done -check_section $OBJ .data -check_section $OBJ .bss -check_section $OBJ .init.data +check_section "$OBJ" .data +check_section "$OBJ" .bss +check_section "$OBJ" .init.data exit $ERROR diff --git a/arch/powerpc/kernel/ptrace/ptrace-decl.h b/arch/powerpc/kernel/ptrace/ptrace-decl.h index eafe5f0f6289..4171a5727197 100644 --- a/arch/powerpc/kernel/ptrace/ptrace-decl.h +++ b/arch/powerpc/kernel/ptrace/ptrace-decl.h @@ -1,5 +1,7 @@ /* SPDX-License-Identifier: GPL-2.0-or-later */ +#include <linux/regset.h> + /* * Set of msr bits that gdb can change on behalf of a process. */ @@ -55,6 +57,10 @@ enum powerpc_regset { REGSET_TAR, /* TAR register */ REGSET_EBB, /* EBB registers */ REGSET_PMR, /* Performance Monitor Registers */ + REGSET_DEXCR, /* DEXCR registers */ +#ifdef CONFIG_CHECKPOINT_RESTORE + REGSET_HASHKEYR, /* HASHKEYR register */ +#endif #endif #ifdef CONFIG_PPC_MEM_KEYS REGSET_PKEY, /* AMR register */ diff --git a/arch/powerpc/kernel/ptrace/ptrace-tm.c b/arch/powerpc/kernel/ptrace/ptrace-tm.c index 44045363a903..447bff87fd21 100644 --- a/arch/powerpc/kernel/ptrace/ptrace-tm.c +++ b/arch/powerpc/kernel/ptrace/ptrace-tm.c @@ -12,7 +12,7 @@ void flush_tmregs_to_thread(struct task_struct *tsk) { /* * If task is not current, it will have been flushed already to - * it's thread_struct during __switch_to(). + * its thread_struct during __switch_to(). * * A reclaim flushes ALL the state or if not in TM save TM SPRs * in the appropriate thread structures from live. @@ -170,9 +170,9 @@ int tm_cgpr_set(struct task_struct *target, const struct user_regset *regset, (PT_MAX_PUT_REG + 1) * sizeof(reg)); if (PT_MAX_PUT_REG + 1 < PT_TRAP && !ret) - ret = user_regset_copyin_ignore(&pos, &count, &kbuf, &ubuf, - (PT_MAX_PUT_REG + 1) * sizeof(reg), - PT_TRAP * sizeof(reg)); + user_regset_copyin_ignore(&pos, &count, &kbuf, &ubuf, + (PT_MAX_PUT_REG + 1) * sizeof(reg), + PT_TRAP * sizeof(reg)); if (!ret && count > 0) { ret = user_regset_copyin(&pos, &count, &kbuf, &ubuf, ®, @@ -183,8 +183,8 @@ int tm_cgpr_set(struct task_struct *target, const struct user_regset *regset, } if (!ret) - ret = user_regset_copyin_ignore(&pos, &count, &kbuf, &ubuf, - (PT_TRAP + 1) * sizeof(reg), -1); + user_regset_copyin_ignore(&pos, &count, &kbuf, &ubuf, + (PT_TRAP + 1) * sizeof(reg), -1); return ret; } diff --git a/arch/powerpc/kernel/ptrace/ptrace-view.c b/arch/powerpc/kernel/ptrace/ptrace-view.c index 076d867412c7..0310f9097e39 100644 --- a/arch/powerpc/kernel/ptrace/ptrace-view.c +++ b/arch/powerpc/kernel/ptrace/ptrace-view.c @@ -267,9 +267,9 @@ static int gpr_set(struct task_struct *target, const struct user_regset *regset, (PT_MAX_PUT_REG + 1) * sizeof(reg)); if (PT_MAX_PUT_REG + 1 < PT_TRAP && !ret) - ret = user_regset_copyin_ignore(&pos, &count, &kbuf, &ubuf, - (PT_MAX_PUT_REG + 1) * sizeof(reg), - PT_TRAP * sizeof(reg)); + user_regset_copyin_ignore(&pos, &count, &kbuf, &ubuf, + (PT_MAX_PUT_REG + 1) * sizeof(reg), + PT_TRAP * sizeof(reg)); if (!ret && count > 0) { ret = user_regset_copyin(&pos, &count, &kbuf, &ubuf, ®, @@ -280,8 +280,8 @@ static int gpr_set(struct task_struct *target, const struct user_regset *regset, } if (!ret) - ret = user_regset_copyin_ignore(&pos, &count, &kbuf, &ubuf, - (PT_TRAP + 1) * sizeof(reg), -1); + user_regset_copyin_ignore(&pos, &count, &kbuf, &ubuf, + (PT_TRAP + 1) * sizeof(reg), -1); return ret; } @@ -290,6 +290,9 @@ static int gpr_set(struct task_struct *target, const struct user_regset *regset, static int ppr_get(struct task_struct *target, const struct user_regset *regset, struct membuf to) { + if (!target->thread.regs) + return -EINVAL; + return membuf_write(&to, &target->thread.regs->ppr, sizeof(u64)); } @@ -297,6 +300,9 @@ static int ppr_set(struct task_struct *target, const struct user_regset *regset, unsigned int pos, unsigned int count, const void *kbuf, const void __user *ubuf) { + if (!target->thread.regs) + return -EINVAL; + return user_regset_copyin(&pos, &count, &kbuf, &ubuf, &target->thread.regs->ppr, 0, sizeof(u64)); } @@ -448,7 +454,60 @@ static int pmu_set(struct task_struct *target, const struct user_regset *regset, 5 * sizeof(unsigned long)); return ret; } -#endif + +static int dexcr_active(struct task_struct *target, const struct user_regset *regset) +{ + if (!cpu_has_feature(CPU_FTR_ARCH_31)) + return -ENODEV; + + return regset->n; +} + +static int dexcr_get(struct task_struct *target, const struct user_regset *regset, + struct membuf to) +{ + if (!cpu_has_feature(CPU_FTR_ARCH_31)) + return -ENODEV; + + membuf_store(&to, (u64)lower_32_bits(target->thread.dexcr)); + + /* + * Technically the HDEXCR is per-cpu, but a hypervisor can't reasonably + * change it between CPUs of the same guest. + */ + return membuf_store(&to, (u64)lower_32_bits(mfspr(SPRN_HDEXCR_RO))); +} + +#ifdef CONFIG_CHECKPOINT_RESTORE +static int hashkeyr_active(struct task_struct *target, const struct user_regset *regset) +{ + if (!cpu_has_feature(CPU_FTR_ARCH_31)) + return -ENODEV; + + return regset->n; +} + +static int hashkeyr_get(struct task_struct *target, const struct user_regset *regset, + struct membuf to) +{ + if (!cpu_has_feature(CPU_FTR_ARCH_31)) + return -ENODEV; + + return membuf_store(&to, target->thread.hashkeyr); +} + +static int hashkeyr_set(struct task_struct *target, const struct user_regset *regset, + unsigned int pos, unsigned int count, const void *kbuf, + const void __user *ubuf) +{ + if (!cpu_has_feature(CPU_FTR_ARCH_31)) + return -ENODEV; + + return user_regset_copyin(&pos, &count, &kbuf, &ubuf, &target->thread.hashkeyr, + 0, sizeof(unsigned long)); +} +#endif /* CONFIG_CHECKPOINT_RESTORE */ +#endif /* CONFIG_PPC_BOOK3S_64 */ #ifdef CONFIG_PPC_MEM_KEYS static int pkey_active(struct task_struct *target, const struct user_regset *regset) @@ -509,110 +568,122 @@ static int pkey_set(struct task_struct *target, const struct user_regset *regset static const struct user_regset native_regsets[] = { [REGSET_GPR] = { - .core_note_type = NT_PRSTATUS, .n = ELF_NGREG, + USER_REGSET_NOTE_TYPE(PRSTATUS), .n = ELF_NGREG, .size = sizeof(long), .align = sizeof(long), .regset_get = gpr_get, .set = gpr_set }, [REGSET_FPR] = { - .core_note_type = NT_PRFPREG, .n = ELF_NFPREG, + USER_REGSET_NOTE_TYPE(PRFPREG), .n = ELF_NFPREG, .size = sizeof(double), .align = sizeof(double), .regset_get = fpr_get, .set = fpr_set }, #ifdef CONFIG_ALTIVEC [REGSET_VMX] = { - .core_note_type = NT_PPC_VMX, .n = 34, + USER_REGSET_NOTE_TYPE(PPC_VMX), .n = 34, .size = sizeof(vector128), .align = sizeof(vector128), .active = vr_active, .regset_get = vr_get, .set = vr_set }, #endif #ifdef CONFIG_VSX [REGSET_VSX] = { - .core_note_type = NT_PPC_VSX, .n = 32, + USER_REGSET_NOTE_TYPE(PPC_VSX), .n = 32, .size = sizeof(double), .align = sizeof(double), .active = vsr_active, .regset_get = vsr_get, .set = vsr_set }, #endif #ifdef CONFIG_SPE [REGSET_SPE] = { - .core_note_type = NT_PPC_SPE, .n = 35, + USER_REGSET_NOTE_TYPE(PPC_SPE), .n = 35, .size = sizeof(u32), .align = sizeof(u32), .active = evr_active, .regset_get = evr_get, .set = evr_set }, #endif #ifdef CONFIG_PPC_TRANSACTIONAL_MEM [REGSET_TM_CGPR] = { - .core_note_type = NT_PPC_TM_CGPR, .n = ELF_NGREG, + USER_REGSET_NOTE_TYPE(PPC_TM_CGPR), .n = ELF_NGREG, .size = sizeof(long), .align = sizeof(long), .active = tm_cgpr_active, .regset_get = tm_cgpr_get, .set = tm_cgpr_set }, [REGSET_TM_CFPR] = { - .core_note_type = NT_PPC_TM_CFPR, .n = ELF_NFPREG, + USER_REGSET_NOTE_TYPE(PPC_TM_CFPR), .n = ELF_NFPREG, .size = sizeof(double), .align = sizeof(double), .active = tm_cfpr_active, .regset_get = tm_cfpr_get, .set = tm_cfpr_set }, [REGSET_TM_CVMX] = { - .core_note_type = NT_PPC_TM_CVMX, .n = ELF_NVMX, + USER_REGSET_NOTE_TYPE(PPC_TM_CVMX), .n = ELF_NVMX, .size = sizeof(vector128), .align = sizeof(vector128), .active = tm_cvmx_active, .regset_get = tm_cvmx_get, .set = tm_cvmx_set }, [REGSET_TM_CVSX] = { - .core_note_type = NT_PPC_TM_CVSX, .n = ELF_NVSX, + USER_REGSET_NOTE_TYPE(PPC_TM_CVSX), .n = ELF_NVSX, .size = sizeof(double), .align = sizeof(double), .active = tm_cvsx_active, .regset_get = tm_cvsx_get, .set = tm_cvsx_set }, [REGSET_TM_SPR] = { - .core_note_type = NT_PPC_TM_SPR, .n = ELF_NTMSPRREG, + USER_REGSET_NOTE_TYPE(PPC_TM_SPR), .n = ELF_NTMSPRREG, .size = sizeof(u64), .align = sizeof(u64), .active = tm_spr_active, .regset_get = tm_spr_get, .set = tm_spr_set }, [REGSET_TM_CTAR] = { - .core_note_type = NT_PPC_TM_CTAR, .n = 1, + USER_REGSET_NOTE_TYPE(PPC_TM_CTAR), .n = 1, .size = sizeof(u64), .align = sizeof(u64), .active = tm_tar_active, .regset_get = tm_tar_get, .set = tm_tar_set }, [REGSET_TM_CPPR] = { - .core_note_type = NT_PPC_TM_CPPR, .n = 1, + USER_REGSET_NOTE_TYPE(PPC_TM_CPPR), .n = 1, .size = sizeof(u64), .align = sizeof(u64), .active = tm_ppr_active, .regset_get = tm_ppr_get, .set = tm_ppr_set }, [REGSET_TM_CDSCR] = { - .core_note_type = NT_PPC_TM_CDSCR, .n = 1, + USER_REGSET_NOTE_TYPE(PPC_TM_CDSCR), .n = 1, .size = sizeof(u64), .align = sizeof(u64), .active = tm_dscr_active, .regset_get = tm_dscr_get, .set = tm_dscr_set }, #endif #ifdef CONFIG_PPC64 [REGSET_PPR] = { - .core_note_type = NT_PPC_PPR, .n = 1, + USER_REGSET_NOTE_TYPE(PPC_PPR), .n = 1, .size = sizeof(u64), .align = sizeof(u64), .regset_get = ppr_get, .set = ppr_set }, [REGSET_DSCR] = { - .core_note_type = NT_PPC_DSCR, .n = 1, + USER_REGSET_NOTE_TYPE(PPC_DSCR), .n = 1, .size = sizeof(u64), .align = sizeof(u64), .regset_get = dscr_get, .set = dscr_set }, #endif #ifdef CONFIG_PPC_BOOK3S_64 [REGSET_TAR] = { - .core_note_type = NT_PPC_TAR, .n = 1, + USER_REGSET_NOTE_TYPE(PPC_TAR), .n = 1, .size = sizeof(u64), .align = sizeof(u64), .regset_get = tar_get, .set = tar_set }, [REGSET_EBB] = { - .core_note_type = NT_PPC_EBB, .n = ELF_NEBB, + USER_REGSET_NOTE_TYPE(PPC_EBB), .n = ELF_NEBB, .size = sizeof(u64), .align = sizeof(u64), .active = ebb_active, .regset_get = ebb_get, .set = ebb_set }, [REGSET_PMR] = { - .core_note_type = NT_PPC_PMU, .n = ELF_NPMU, + USER_REGSET_NOTE_TYPE(PPC_PMU), .n = ELF_NPMU, .size = sizeof(u64), .align = sizeof(u64), .active = pmu_active, .regset_get = pmu_get, .set = pmu_set }, + [REGSET_DEXCR] = { + USER_REGSET_NOTE_TYPE(PPC_DEXCR), .n = ELF_NDEXCR, + .size = sizeof(u64), .align = sizeof(u64), + .active = dexcr_active, .regset_get = dexcr_get + }, +#ifdef CONFIG_CHECKPOINT_RESTORE + [REGSET_HASHKEYR] = { + USER_REGSET_NOTE_TYPE(PPC_HASHKEYR), .n = ELF_NHASHKEYR, + .size = sizeof(u64), .align = sizeof(u64), + .active = hashkeyr_active, .regset_get = hashkeyr_get, .set = hashkeyr_set + }, +#endif #endif #ifdef CONFIG_PPC_MEM_KEYS [REGSET_PKEY] = { - .core_note_type = NT_PPC_PKEY, .n = ELF_NPKEY, + USER_REGSET_NOTE_TYPE(PPC_PKEY), .n = ELF_NPKEY, .size = sizeof(u64), .align = sizeof(u64), .active = pkey_active, .regset_get = pkey_get, .set = pkey_set }, @@ -640,80 +711,110 @@ int gpr32_get_common(struct task_struct *target, return membuf_zero(&to, (ELF_NGREG - PT_REGS_COUNT) * sizeof(u32)); } -int gpr32_set_common(struct task_struct *target, - const struct user_regset *regset, - unsigned int pos, unsigned int count, - const void *kbuf, const void __user *ubuf, - unsigned long *regs) +static int gpr32_set_common_kernel(struct task_struct *target, + const struct user_regset *regset, + unsigned int pos, unsigned int count, + const void *kbuf, unsigned long *regs) { const compat_ulong_t *k = kbuf; + + pos /= sizeof(compat_ulong_t); + count /= sizeof(compat_ulong_t); + + for (; count > 0 && pos < PT_MSR; --count) + regs[pos++] = *k++; + + if (count > 0 && pos == PT_MSR) { + set_user_msr(target, *k++); + ++pos; + --count; + } + + for (; count > 0 && pos <= PT_MAX_PUT_REG; --count) + regs[pos++] = *k++; + for (; count > 0 && pos < PT_TRAP; --count, ++pos) + ++k; + + if (count > 0 && pos == PT_TRAP) { + set_user_trap(target, *k++); + ++pos; + --count; + } + + kbuf = k; + pos *= sizeof(compat_ulong_t); + count *= sizeof(compat_ulong_t); + user_regset_copyin_ignore(&pos, &count, &kbuf, NULL, + (PT_TRAP + 1) * sizeof(compat_ulong_t), -1); + return 0; +} + +static int gpr32_set_common_user(struct task_struct *target, + const struct user_regset *regset, + unsigned int pos, unsigned int count, + const void __user *ubuf, unsigned long *regs) +{ const compat_ulong_t __user *u = ubuf; + const void *kbuf = NULL; compat_ulong_t reg; - if (!kbuf && !user_read_access_begin(u, count)) + if (!user_read_access_begin(u, count)) return -EFAULT; pos /= sizeof(reg); count /= sizeof(reg); - if (kbuf) - for (; count > 0 && pos < PT_MSR; --count) - regs[pos++] = *k++; - else - for (; count > 0 && pos < PT_MSR; --count) { - unsafe_get_user(reg, u++, Efault); - regs[pos++] = reg; - } - + for (; count > 0 && pos < PT_MSR; --count) { + unsafe_get_user(reg, u++, Efault); + regs[pos++] = reg; + } if (count > 0 && pos == PT_MSR) { - if (kbuf) - reg = *k++; - else - unsafe_get_user(reg, u++, Efault); + unsafe_get_user(reg, u++, Efault); set_user_msr(target, reg); ++pos; --count; } - if (kbuf) { - for (; count > 0 && pos <= PT_MAX_PUT_REG; --count) - regs[pos++] = *k++; - for (; count > 0 && pos < PT_TRAP; --count, ++pos) - ++k; - } else { - for (; count > 0 && pos <= PT_MAX_PUT_REG; --count) { - unsafe_get_user(reg, u++, Efault); - regs[pos++] = reg; - } - for (; count > 0 && pos < PT_TRAP; --count, ++pos) - unsafe_get_user(reg, u++, Efault); + for (; count > 0 && pos <= PT_MAX_PUT_REG; --count) { + unsafe_get_user(reg, u++, Efault); + regs[pos++] = reg; } + for (; count > 0 && pos < PT_TRAP; --count, ++pos) + unsafe_get_user(reg, u++, Efault); if (count > 0 && pos == PT_TRAP) { - if (kbuf) - reg = *k++; - else - unsafe_get_user(reg, u++, Efault); + unsafe_get_user(reg, u++, Efault); set_user_trap(target, reg); ++pos; --count; } - if (!kbuf) - user_read_access_end(); + user_read_access_end(); - kbuf = k; ubuf = u; pos *= sizeof(reg); count *= sizeof(reg); - return user_regset_copyin_ignore(&pos, &count, &kbuf, &ubuf, - (PT_TRAP + 1) * sizeof(reg), -1); + user_regset_copyin_ignore(&pos, &count, &kbuf, &ubuf, + (PT_TRAP + 1) * sizeof(reg), -1); + return 0; Efault: user_read_access_end(); return -EFAULT; } +int gpr32_set_common(struct task_struct *target, + const struct user_regset *regset, + unsigned int pos, unsigned int count, + const void *kbuf, const void __user *ubuf, + unsigned long *regs) +{ + if (kbuf) + return gpr32_set_common_kernel(target, regset, pos, count, kbuf, regs); + else + return gpr32_set_common_user(target, regset, pos, count, ubuf, regs); +} + static int gpr32_get(struct task_struct *target, const struct user_regset *regset, struct membuf to) @@ -742,92 +843,92 @@ static int gpr32_set(struct task_struct *target, */ static const struct user_regset compat_regsets[] = { [REGSET_GPR] = { - .core_note_type = NT_PRSTATUS, .n = ELF_NGREG, + USER_REGSET_NOTE_TYPE(PRSTATUS), .n = ELF_NGREG, .size = sizeof(compat_long_t), .align = sizeof(compat_long_t), .regset_get = gpr32_get, .set = gpr32_set }, [REGSET_FPR] = { - .core_note_type = NT_PRFPREG, .n = ELF_NFPREG, + USER_REGSET_NOTE_TYPE(PRFPREG), .n = ELF_NFPREG, .size = sizeof(double), .align = sizeof(double), .regset_get = fpr_get, .set = fpr_set }, #ifdef CONFIG_ALTIVEC [REGSET_VMX] = { - .core_note_type = NT_PPC_VMX, .n = 34, + USER_REGSET_NOTE_TYPE(PPC_VMX), .n = 34, .size = sizeof(vector128), .align = sizeof(vector128), .active = vr_active, .regset_get = vr_get, .set = vr_set }, #endif #ifdef CONFIG_SPE [REGSET_SPE] = { - .core_note_type = NT_PPC_SPE, .n = 35, + USER_REGSET_NOTE_TYPE(PPC_SPE), .n = 35, .size = sizeof(u32), .align = sizeof(u32), .active = evr_active, .regset_get = evr_get, .set = evr_set }, #endif #ifdef CONFIG_PPC_TRANSACTIONAL_MEM [REGSET_TM_CGPR] = { - .core_note_type = NT_PPC_TM_CGPR, .n = ELF_NGREG, + USER_REGSET_NOTE_TYPE(PPC_TM_CGPR), .n = ELF_NGREG, .size = sizeof(long), .align = sizeof(long), .active = tm_cgpr_active, .regset_get = tm_cgpr32_get, .set = tm_cgpr32_set }, [REGSET_TM_CFPR] = { - .core_note_type = NT_PPC_TM_CFPR, .n = ELF_NFPREG, + USER_REGSET_NOTE_TYPE(PPC_TM_CFPR), .n = ELF_NFPREG, .size = sizeof(double), .align = sizeof(double), .active = tm_cfpr_active, .regset_get = tm_cfpr_get, .set = tm_cfpr_set }, [REGSET_TM_CVMX] = { - .core_note_type = NT_PPC_TM_CVMX, .n = ELF_NVMX, + USER_REGSET_NOTE_TYPE(PPC_TM_CVMX), .n = ELF_NVMX, .size = sizeof(vector128), .align = sizeof(vector128), .active = tm_cvmx_active, .regset_get = tm_cvmx_get, .set = tm_cvmx_set }, [REGSET_TM_CVSX] = { - .core_note_type = NT_PPC_TM_CVSX, .n = ELF_NVSX, + USER_REGSET_NOTE_TYPE(PPC_TM_CVSX), .n = ELF_NVSX, .size = sizeof(double), .align = sizeof(double), .active = tm_cvsx_active, .regset_get = tm_cvsx_get, .set = tm_cvsx_set }, [REGSET_TM_SPR] = { - .core_note_type = NT_PPC_TM_SPR, .n = ELF_NTMSPRREG, + USER_REGSET_NOTE_TYPE(PPC_TM_SPR), .n = ELF_NTMSPRREG, .size = sizeof(u64), .align = sizeof(u64), .active = tm_spr_active, .regset_get = tm_spr_get, .set = tm_spr_set }, [REGSET_TM_CTAR] = { - .core_note_type = NT_PPC_TM_CTAR, .n = 1, + USER_REGSET_NOTE_TYPE(PPC_TM_CTAR), .n = 1, .size = sizeof(u64), .align = sizeof(u64), .active = tm_tar_active, .regset_get = tm_tar_get, .set = tm_tar_set }, [REGSET_TM_CPPR] = { - .core_note_type = NT_PPC_TM_CPPR, .n = 1, + USER_REGSET_NOTE_TYPE(PPC_TM_CPPR), .n = 1, .size = sizeof(u64), .align = sizeof(u64), .active = tm_ppr_active, .regset_get = tm_ppr_get, .set = tm_ppr_set }, [REGSET_TM_CDSCR] = { - .core_note_type = NT_PPC_TM_CDSCR, .n = 1, + USER_REGSET_NOTE_TYPE(PPC_TM_CDSCR), .n = 1, .size = sizeof(u64), .align = sizeof(u64), .active = tm_dscr_active, .regset_get = tm_dscr_get, .set = tm_dscr_set }, #endif #ifdef CONFIG_PPC64 [REGSET_PPR] = { - .core_note_type = NT_PPC_PPR, .n = 1, + USER_REGSET_NOTE_TYPE(PPC_PPR), .n = 1, .size = sizeof(u64), .align = sizeof(u64), .regset_get = ppr_get, .set = ppr_set }, [REGSET_DSCR] = { - .core_note_type = NT_PPC_DSCR, .n = 1, + USER_REGSET_NOTE_TYPE(PPC_DSCR), .n = 1, .size = sizeof(u64), .align = sizeof(u64), .regset_get = dscr_get, .set = dscr_set }, #endif #ifdef CONFIG_PPC_BOOK3S_64 [REGSET_TAR] = { - .core_note_type = NT_PPC_TAR, .n = 1, + USER_REGSET_NOTE_TYPE(PPC_TAR), .n = 1, .size = sizeof(u64), .align = sizeof(u64), .regset_get = tar_get, .set = tar_set }, [REGSET_EBB] = { - .core_note_type = NT_PPC_EBB, .n = ELF_NEBB, + USER_REGSET_NOTE_TYPE(PPC_EBB), .n = ELF_NEBB, .size = sizeof(u64), .align = sizeof(u64), .active = ebb_active, .regset_get = ebb_get, .set = ebb_set }, diff --git a/arch/powerpc/kernel/ptrace/ptrace-vsx.c b/arch/powerpc/kernel/ptrace/ptrace-vsx.c index 1da4303128ef..7df08004c47d 100644 --- a/arch/powerpc/kernel/ptrace/ptrace-vsx.c +++ b/arch/powerpc/kernel/ptrace/ptrace-vsx.c @@ -71,7 +71,7 @@ int fpr_set(struct task_struct *target, const struct user_regset *regset, } /* - * Currently to set and and get all the vsx state, you need to call + * Currently to set and get all the vsx state, you need to call * the fp and VMX calls as well. This only get/sets the lower 32 * 128bit VSX registers. */ diff --git a/arch/powerpc/kernel/ptrace/ptrace.c b/arch/powerpc/kernel/ptrace/ptrace.c index 5d7a72b41ae7..c6997df63287 100644 --- a/arch/powerpc/kernel/ptrace/ptrace.c +++ b/arch/powerpc/kernel/ptrace/ptrace.c @@ -1,3 +1,4 @@ +// SPDX-License-Identifier: GPL-2.0-or-later /* * PowerPC version * Copyright (C) 1995-1996 Gary Thomas (gdt@linuxppc.org) @@ -9,10 +10,6 @@ * * Modified by Cort Dougan (cort@hq.fsmlabs.com) * and Paul Mackerras (paulus@samba.org). - * - * This file is subject to the terms and conditions of the GNU General - * Public License. See the file README.legal in the main directory of - * this archive for more details. */ #include <linux/regset.h> @@ -218,7 +215,7 @@ static int do_seccomp(struct pt_regs *regs) * have already loaded -ENOSYS into r3, or seccomp has put * something else in r3 (via SECCOMP_RET_ERRNO/TRACE). */ - if (__secure_computing(NULL)) + if (__secure_computing()) return -1; /* diff --git a/arch/powerpc/kernel/reloc_64.S b/arch/powerpc/kernel/reloc_64.S index 232e4549defe..efd52f2e7033 100644 --- a/arch/powerpc/kernel/reloc_64.S +++ b/arch/powerpc/kernel/reloc_64.S @@ -27,8 +27,8 @@ _GLOBAL(relocate) add r9,r9,r12 /* r9 has runtime addr of .rela.dyn section */ ld r10,(p_st - 0b)(r12) add r10,r10,r12 /* r10 has runtime addr of _stext */ - ld r13,(p_sym - 0b)(r12) - add r13,r13,r12 /* r13 has runtime addr of .dynsym */ + ld r4,(p_sym - 0b)(r12) + add r4,r4,r12 /* r4 has runtime addr of .dynsym */ /* * Scan the dynamic section for the RELA, RELASZ and RELAENT entries. @@ -84,16 +84,16 @@ _GLOBAL(relocate) ld r0,16(r9) /* reloc->r_addend */ b .Lstore .Luaddr64: - srdi r14,r0,32 /* ELF64_R_SYM(reloc->r_info) */ + srdi r5,r0,32 /* ELF64_R_SYM(reloc->r_info) */ clrldi r0,r0,32 cmpdi r0,R_PPC64_UADDR64 bne .Lnext ld r6,0(r9) ld r0,16(r9) - mulli r14,r14,24 /* 24 == sizeof(elf64_sym) */ - add r14,r14,r13 /* elf64_sym[ELF64_R_SYM] */ - ld r14,8(r14) - add r0,r0,r14 + mulli r5,r5,24 /* 24 == sizeof(elf64_sym) */ + add r5,r5,r4 /* elf64_sym[ELF64_R_SYM] */ + ld r5,8(r5) + add r0,r0,r5 .Lstore: add r0,r0,r3 stdx r0,r7,r6 diff --git a/arch/powerpc/kernel/rethook.c b/arch/powerpc/kernel/rethook.c new file mode 100644 index 000000000000..5f5f47ae82cf --- /dev/null +++ b/arch/powerpc/kernel/rethook.c @@ -0,0 +1,73 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * PowerPC implementation of rethook. This depends on kprobes. + */ + +#include <linux/kprobes.h> +#include <linux/rethook.h> + +/* + * Function return trampoline: + * - init_kprobes() establishes a probepoint here + * - When the probed function returns, this probe + * causes the handlers to fire + */ +asm(".global arch_rethook_trampoline\n" + ".type arch_rethook_trampoline, @function\n" + "arch_rethook_trampoline:\n" + "nop\n" + "blr\n" + ".size arch_rethook_trampoline, .-arch_rethook_trampoline\n"); + +/* + * Called when the probe at kretprobe trampoline is hit + */ +static int trampoline_rethook_handler(struct kprobe *p, struct pt_regs *regs) +{ + return !rethook_trampoline_handler(regs, regs->gpr[1]); +} +NOKPROBE_SYMBOL(trampoline_rethook_handler); + +void arch_rethook_prepare(struct rethook_node *rh, struct pt_regs *regs, bool mcount) +{ + rh->ret_addr = regs->link; + rh->frame = regs->gpr[1]; + + /* Replace the return addr with trampoline addr */ + regs->link = (unsigned long)arch_rethook_trampoline; +} +NOKPROBE_SYMBOL(arch_rethook_prepare); + +/* This is called from rethook_trampoline_handler(). */ +void arch_rethook_fixup_return(struct pt_regs *regs, unsigned long orig_ret_address) +{ + /* + * We get here through one of two paths: + * 1. by taking a trap -> kprobe_handler() -> here + * 2. by optprobe branch -> optimized_callback() -> opt_pre_handler() -> here + * + * When going back through (1), we need regs->nip to be setup properly + * as it is used to determine the return address from the trap. + * For (2), since nip is not honoured with optprobes, we instead setup + * the link register properly so that the subsequent 'blr' in + * arch_rethook_trampoline jumps back to the right instruction. + * + * For nip, we should set the address to the previous instruction since + * we end up emulating it in kprobe_handler(), which increments the nip + * again. + */ + regs_set_return_ip(regs, orig_ret_address - 4); + regs->link = orig_ret_address; +} +NOKPROBE_SYMBOL(arch_rethook_fixup_return); + +static struct kprobe trampoline_p = { + .addr = (kprobe_opcode_t *) &arch_rethook_trampoline, + .pre_handler = trampoline_rethook_handler +}; + +/* rethook initializer */ +int __init arch_init_kprobes(void) +{ + return register_kprobe(&trampoline_p); +} diff --git a/arch/powerpc/kernel/rtas-proc.c b/arch/powerpc/kernel/rtas-proc.c index 081b2b741a8c..f38df72e64b8 100644 --- a/arch/powerpc/kernel/rtas-proc.c +++ b/arch/powerpc/kernel/rtas-proc.c @@ -287,9 +287,9 @@ static ssize_t ppc_rtas_poweron_write(struct file *file, rtc_time64_to_tm(nowtime, &tm); - error = rtas_call(rtas_token("set-time-for-power-on"), 7, 1, NULL, - tm.tm_year + 1900, tm.tm_mon + 1, tm.tm_mday, - tm.tm_hour, tm.tm_min, tm.tm_sec, 0 /* nano */); + error = rtas_call(rtas_function_token(RTAS_FN_SET_TIME_FOR_POWER_ON), 7, 1, NULL, + tm.tm_year + 1900, tm.tm_mon + 1, tm.tm_mday, + tm.tm_hour, tm.tm_min, tm.tm_sec, 0 /* nano */); if (error) printk(KERN_WARNING "error: setting poweron time returned: %s\n", ppc_rtas_process_error(error)); @@ -350,9 +350,9 @@ static ssize_t ppc_rtas_clock_write(struct file *file, return error; rtc_time64_to_tm(nowtime, &tm); - error = rtas_call(rtas_token("set-time-of-day"), 7, 1, NULL, - tm.tm_year + 1900, tm.tm_mon + 1, tm.tm_mday, - tm.tm_hour, tm.tm_min, tm.tm_sec, 0); + error = rtas_call(rtas_function_token(RTAS_FN_SET_TIME_OF_DAY), 7, 1, NULL, + tm.tm_year + 1900, tm.tm_mon + 1, tm.tm_mday, + tm.tm_hour, tm.tm_min, tm.tm_sec, 0); if (error) printk(KERN_WARNING "error: setting the clock returned: %s\n", ppc_rtas_process_error(error)); @@ -362,7 +362,7 @@ static ssize_t ppc_rtas_clock_write(struct file *file, static int ppc_rtas_clock_show(struct seq_file *m, void *v) { int ret[8]; - int error = rtas_call(rtas_token("get-time-of-day"), 0, 8, ret); + int error = rtas_call(rtas_function_token(RTAS_FN_GET_TIME_OF_DAY), 0, 8, ret); if (error) { printk(KERN_WARNING "error: reading the clock returned: %s\n", @@ -385,7 +385,7 @@ static int ppc_rtas_sensors_show(struct seq_file *m, void *v) { int i,j; int state, error; - int get_sensor_state = rtas_token("get-sensor-state"); + int get_sensor_state = rtas_function_token(RTAS_FN_GET_SENSOR_STATE); seq_printf(m, "RTAS (RunTime Abstraction Services) Sensor Information\n"); seq_printf(m, "Sensor\t\tValue\t\tCondition\tLocation\n"); @@ -708,8 +708,8 @@ static ssize_t ppc_rtas_tone_freq_write(struct file *file, return error; rtas_tone_frequency = freq; /* save it for later */ - error = rtas_call(rtas_token("set-indicator"), 3, 1, NULL, - TONE_FREQUENCY, 0, freq); + error = rtas_call(rtas_function_token(RTAS_FN_SET_INDICATOR), 3, 1, NULL, + TONE_FREQUENCY, 0, freq); if (error) printk(KERN_WARNING "error: setting tone frequency returned: %s\n", ppc_rtas_process_error(error)); @@ -736,8 +736,8 @@ static ssize_t ppc_rtas_tone_volume_write(struct file *file, volume = 100; rtas_tone_volume = volume; /* save it for later */ - error = rtas_call(rtas_token("set-indicator"), 3, 1, NULL, - TONE_VOLUME, 0, volume); + error = rtas_call(rtas_function_token(RTAS_FN_SET_INDICATOR), 3, 1, NULL, + TONE_VOLUME, 0, volume); if (error) printk(KERN_WARNING "error: setting tone volume returned: %s\n", ppc_rtas_process_error(error)); @@ -752,6 +752,8 @@ static int ppc_rtas_tone_volume_show(struct seq_file *m, void *v) /** * ppc_rtas_rmo_buf_show() - Describe RTAS-addressable region for user space. + * @m: seq_file output target. + * @v: Unused. * * Base + size description of a range of RTAS-addressable memory set * aside for user space to use as work area(s) for certain RTAS diff --git a/arch/powerpc/kernel/rtas-rtc.c b/arch/powerpc/kernel/rtas-rtc.c index 5a31d1829bca..6996214532bd 100644 --- a/arch/powerpc/kernel/rtas-rtc.c +++ b/arch/powerpc/kernel/rtas-rtc.c @@ -21,7 +21,7 @@ time64_t __init rtas_get_boot_time(void) max_wait_tb = get_tb() + tb_ticks_per_usec * 1000 * MAX_RTC_WAIT; do { - error = rtas_call(rtas_token("get-time-of-day"), 0, 8, ret); + error = rtas_call(rtas_function_token(RTAS_FN_GET_TIME_OF_DAY), 0, 8, ret); wait_time = rtas_busy_delay_time(error); if (wait_time) { @@ -53,7 +53,7 @@ void rtas_get_rtc_time(struct rtc_time *rtc_tm) max_wait_tb = get_tb() + tb_ticks_per_usec * 1000 * MAX_RTC_WAIT; do { - error = rtas_call(rtas_token("get-time-of-day"), 0, 8, ret); + error = rtas_call(rtas_function_token(RTAS_FN_GET_TIME_OF_DAY), 0, 8, ret); wait_time = rtas_busy_delay_time(error); if (wait_time) { @@ -90,7 +90,7 @@ int rtas_set_rtc_time(struct rtc_time *tm) max_wait_tb = get_tb() + tb_ticks_per_usec * 1000 * MAX_RTC_WAIT; do { - error = rtas_call(rtas_token("set-time-of-day"), 7, 1, NULL, + error = rtas_call(rtas_function_token(RTAS_FN_SET_TIME_OF_DAY), 7, 1, NULL, tm->tm_year + 1900, tm->tm_mon + 1, tm->tm_mday, tm->tm_hour, tm->tm_min, tm->tm_sec, 0); diff --git a/arch/powerpc/kernel/rtas.c b/arch/powerpc/kernel/rtas.c index 693133972294..e61245c4468e 100644 --- a/arch/powerpc/kernel/rtas.c +++ b/arch/powerpc/kernel/rtas.c @@ -7,77 +7,741 @@ * Copyright (C) 2001 IBM. */ -#include <linux/stdarg.h> -#include <linux/kernel.h> -#include <linux/types.h> -#include <linux/spinlock.h> -#include <linux/export.h> -#include <linux/init.h> +#define pr_fmt(fmt) "rtas: " fmt + +#include <linux/bsearch.h> #include <linux/capability.h> #include <linux/delay.h> -#include <linux/cpu.h> -#include <linux/sched.h> -#include <linux/smp.h> -#include <linux/completion.h> -#include <linux/cpumask.h> +#include <linux/export.h> +#include <linux/init.h> +#include <linux/kconfig.h> +#include <linux/kernel.h> +#include <linux/lockdep.h> #include <linux/memblock.h> -#include <linux/slab.h> -#include <linux/reboot.h> -#include <linux/syscalls.h> +#include <linux/mutex.h> +#include <linux/nospec.h> #include <linux/of.h> #include <linux/of_fdt.h> +#include <linux/reboot.h> +#include <linux/sched.h> +#include <linux/security.h> +#include <linux/slab.h> +#include <linux/spinlock.h> +#include <linux/stdarg.h> +#include <linux/syscalls.h> +#include <linux/types.h> +#include <linux/uaccess.h> +#include <linux/xarray.h> +#include <asm/delay.h> +#include <asm/firmware.h> #include <asm/interrupt.h> -#include <asm/rtas.h> -#include <asm/hvcall.h> #include <asm/machdep.h> -#include <asm/firmware.h> +#include <asm/mmu.h> #include <asm/page.h> -#include <asm/param.h> -#include <asm/delay.h> -#include <linux/uaccess.h> -#include <asm/udbg.h> -#include <asm/syscalls.h> -#include <asm/smp.h> -#include <linux/atomic.h> +#include <asm/rtas-work-area.h> +#include <asm/rtas.h> #include <asm/time.h> -#include <asm/mmu.h> -#include <asm/topology.h> -#include <asm/paca.h> +#include <asm/trace.h> +#include <asm/udbg.h> + +struct rtas_filter { + /* Indexes into the args buffer, -1 if not used */ + const int buf_idx1; + const int size_idx1; + const int buf_idx2; + const int size_idx2; + /* + * Assumed buffer size per the spec if the function does not + * have a size parameter, e.g. ibm,errinjct. 0 if unused. + */ + const int fixed_size; +}; + +/** + * struct rtas_function - Descriptor for RTAS functions. + * + * @token: Value of @name if it exists under the /rtas node. + * @name: Function name. + * @filter: If non-NULL, invoking this function via the rtas syscall is + * generally allowed, and @filter describes constraints on the + * arguments. See also @banned_for_syscall_on_le. + * @banned_for_syscall_on_le: Set when call via sys_rtas is generally allowed + * but specifically restricted on ppc64le. Such + * functions are believed to have no users on + * ppc64le, and we want to keep it that way. It does + * not make sense for this to be set when @filter + * is NULL. + * @lock: Pointer to an optional dedicated per-function mutex. This + * should be set for functions that require multiple calls in + * sequence to complete a single operation, and such sequences + * will disrupt each other if allowed to interleave. Users of + * this function are required to hold the associated lock for + * the duration of the call sequence. Add an explanatory + * comment to the function table entry if setting this member. + */ +struct rtas_function { + s32 token; + const bool banned_for_syscall_on_le:1; + const char * const name; + const struct rtas_filter *filter; + struct mutex *lock; +}; + +/* + * Per-function locks for sequence-based RTAS functions. + */ +static DEFINE_MUTEX(rtas_ibm_activate_firmware_lock); +static DEFINE_MUTEX(rtas_ibm_lpar_perftools_lock); +DEFINE_MUTEX(rtas_ibm_physical_attestation_lock); +DEFINE_MUTEX(rtas_ibm_get_vpd_lock); +DEFINE_MUTEX(rtas_ibm_get_indices_lock); +DEFINE_MUTEX(rtas_ibm_set_dynamic_indicator_lock); +DEFINE_MUTEX(rtas_ibm_get_dynamic_sensor_state_lock); + +static struct rtas_function rtas_function_table[] __ro_after_init = { + [RTAS_FNIDX__CHECK_EXCEPTION] = { + .name = "check-exception", + }, + [RTAS_FNIDX__DISPLAY_CHARACTER] = { + .name = "display-character", + .filter = &(const struct rtas_filter) { + .buf_idx1 = -1, .size_idx1 = -1, + .buf_idx2 = -1, .size_idx2 = -1, + }, + }, + [RTAS_FNIDX__EVENT_SCAN] = { + .name = "event-scan", + }, + [RTAS_FNIDX__FREEZE_TIME_BASE] = { + .name = "freeze-time-base", + }, + [RTAS_FNIDX__GET_POWER_LEVEL] = { + .name = "get-power-level", + .filter = &(const struct rtas_filter) { + .buf_idx1 = -1, .size_idx1 = -1, + .buf_idx2 = -1, .size_idx2 = -1, + }, + }, + [RTAS_FNIDX__GET_SENSOR_STATE] = { + .name = "get-sensor-state", + .filter = &(const struct rtas_filter) { + .buf_idx1 = -1, .size_idx1 = -1, + .buf_idx2 = -1, .size_idx2 = -1, + }, + }, + [RTAS_FNIDX__GET_TERM_CHAR] = { + .name = "get-term-char", + }, + [RTAS_FNIDX__GET_TIME_OF_DAY] = { + .name = "get-time-of-day", + .filter = &(const struct rtas_filter) { + .buf_idx1 = -1, .size_idx1 = -1, + .buf_idx2 = -1, .size_idx2 = -1, + }, + }, + [RTAS_FNIDX__IBM_ACTIVATE_FIRMWARE] = { + .name = "ibm,activate-firmware", + .filter = &(const struct rtas_filter) { + .buf_idx1 = -1, .size_idx1 = -1, + .buf_idx2 = -1, .size_idx2 = -1, + }, + /* + * PAPR+ as of v2.13 doesn't explicitly impose any + * restriction, but this typically requires multiple + * calls before success, and there's no reason to + * allow sequences to interleave. + */ + .lock = &rtas_ibm_activate_firmware_lock, + }, + [RTAS_FNIDX__IBM_CBE_START_PTCAL] = { + .name = "ibm,cbe-start-ptcal", + }, + [RTAS_FNIDX__IBM_CBE_STOP_PTCAL] = { + .name = "ibm,cbe-stop-ptcal", + }, + [RTAS_FNIDX__IBM_CHANGE_MSI] = { + .name = "ibm,change-msi", + }, + [RTAS_FNIDX__IBM_CLOSE_ERRINJCT] = { + .name = "ibm,close-errinjct", + .filter = &(const struct rtas_filter) { + .buf_idx1 = -1, .size_idx1 = -1, + .buf_idx2 = -1, .size_idx2 = -1, + }, + }, + [RTAS_FNIDX__IBM_CONFIGURE_BRIDGE] = { + .name = "ibm,configure-bridge", + }, + [RTAS_FNIDX__IBM_CONFIGURE_CONNECTOR] = { + .name = "ibm,configure-connector", + .filter = &(const struct rtas_filter) { + .buf_idx1 = 0, .size_idx1 = -1, + .buf_idx2 = 1, .size_idx2 = -1, + .fixed_size = 4096, + }, + }, + [RTAS_FNIDX__IBM_CONFIGURE_KERNEL_DUMP] = { + .name = "ibm,configure-kernel-dump", + }, + [RTAS_FNIDX__IBM_CONFIGURE_PE] = { + .name = "ibm,configure-pe", + }, + [RTAS_FNIDX__IBM_CREATE_PE_DMA_WINDOW] = { + .name = "ibm,create-pe-dma-window", + }, + [RTAS_FNIDX__IBM_DISPLAY_MESSAGE] = { + .name = "ibm,display-message", + .filter = &(const struct rtas_filter) { + .buf_idx1 = 0, .size_idx1 = -1, + .buf_idx2 = -1, .size_idx2 = -1, + }, + }, + [RTAS_FNIDX__IBM_ERRINJCT] = { + .name = "ibm,errinjct", + .filter = &(const struct rtas_filter) { + .buf_idx1 = 2, .size_idx1 = -1, + .buf_idx2 = -1, .size_idx2 = -1, + .fixed_size = 1024, + }, + }, + [RTAS_FNIDX__IBM_EXTI2C] = { + .name = "ibm,exti2c", + }, + [RTAS_FNIDX__IBM_GET_CONFIG_ADDR_INFO] = { + .name = "ibm,get-config-addr-info", + }, + [RTAS_FNIDX__IBM_GET_CONFIG_ADDR_INFO2] = { + .name = "ibm,get-config-addr-info2", + .filter = &(const struct rtas_filter) { + .buf_idx1 = -1, .size_idx1 = -1, + .buf_idx2 = -1, .size_idx2 = -1, + }, + }, + [RTAS_FNIDX__IBM_GET_DYNAMIC_SENSOR_STATE] = { + .name = "ibm,get-dynamic-sensor-state", + .filter = &(const struct rtas_filter) { + .buf_idx1 = 1, .size_idx1 = -1, + .buf_idx2 = -1, .size_idx2 = -1, + }, + /* + * PAPR+ v2.13 R1–7.3.19–3 is explicit that the OS + * must not call ibm,get-dynamic-sensor-state with + * different inputs until a non-retry status has been + * returned. + */ + .lock = &rtas_ibm_get_dynamic_sensor_state_lock, + }, + [RTAS_FNIDX__IBM_GET_INDICES] = { + .name = "ibm,get-indices", + .filter = &(const struct rtas_filter) { + .buf_idx1 = 2, .size_idx1 = 3, + .buf_idx2 = -1, .size_idx2 = -1, + }, + /* + * PAPR+ v2.13 R1–7.3.17–2 says that the OS must not + * interleave ibm,get-indices call sequences with + * different inputs. + */ + .lock = &rtas_ibm_get_indices_lock, + }, + [RTAS_FNIDX__IBM_GET_RIO_TOPOLOGY] = { + .name = "ibm,get-rio-topology", + }, + [RTAS_FNIDX__IBM_GET_SYSTEM_PARAMETER] = { + .name = "ibm,get-system-parameter", + .filter = &(const struct rtas_filter) { + .buf_idx1 = 1, .size_idx1 = 2, + .buf_idx2 = -1, .size_idx2 = -1, + }, + }, + [RTAS_FNIDX__IBM_GET_VPD] = { + .name = "ibm,get-vpd", + .filter = &(const struct rtas_filter) { + .buf_idx1 = 0, .size_idx1 = -1, + .buf_idx2 = 1, .size_idx2 = 2, + }, + /* + * PAPR+ v2.13 R1–7.3.20–4 indicates that sequences + * should not be allowed to interleave. + */ + .lock = &rtas_ibm_get_vpd_lock, + }, + [RTAS_FNIDX__IBM_GET_XIVE] = { + .name = "ibm,get-xive", + }, + [RTAS_FNIDX__IBM_INT_OFF] = { + .name = "ibm,int-off", + }, + [RTAS_FNIDX__IBM_INT_ON] = { + .name = "ibm,int-on", + }, + [RTAS_FNIDX__IBM_IO_QUIESCE_ACK] = { + .name = "ibm,io-quiesce-ack", + }, + [RTAS_FNIDX__IBM_LPAR_PERFTOOLS] = { + .name = "ibm,lpar-perftools", + .filter = &(const struct rtas_filter) { + .buf_idx1 = 2, .size_idx1 = 3, + .buf_idx2 = -1, .size_idx2 = -1, + }, + /* + * PAPR+ v2.13 R1–7.3.26–6 says the OS should allow + * only one call sequence in progress at a time. + */ + .lock = &rtas_ibm_lpar_perftools_lock, + }, + [RTAS_FNIDX__IBM_MANAGE_FLASH_IMAGE] = { + .name = "ibm,manage-flash-image", + }, + [RTAS_FNIDX__IBM_MANAGE_STORAGE_PRESERVATION] = { + .name = "ibm,manage-storage-preservation", + }, + [RTAS_FNIDX__IBM_NMI_INTERLOCK] = { + .name = "ibm,nmi-interlock", + }, + [RTAS_FNIDX__IBM_NMI_REGISTER] = { + .name = "ibm,nmi-register", + }, + [RTAS_FNIDX__IBM_OPEN_ERRINJCT] = { + .name = "ibm,open-errinjct", + .filter = &(const struct rtas_filter) { + .buf_idx1 = -1, .size_idx1 = -1, + .buf_idx2 = -1, .size_idx2 = -1, + }, + }, + [RTAS_FNIDX__IBM_OPEN_SRIOV_ALLOW_UNFREEZE] = { + .name = "ibm,open-sriov-allow-unfreeze", + }, + [RTAS_FNIDX__IBM_OPEN_SRIOV_MAP_PE_NUMBER] = { + .name = "ibm,open-sriov-map-pe-number", + }, + [RTAS_FNIDX__IBM_OS_TERM] = { + .name = "ibm,os-term", + }, + [RTAS_FNIDX__IBM_PARTNER_CONTROL] = { + .name = "ibm,partner-control", + }, + [RTAS_FNIDX__IBM_PHYSICAL_ATTESTATION] = { + .name = "ibm,physical-attestation", + .filter = &(const struct rtas_filter) { + .buf_idx1 = 0, .size_idx1 = 1, + .buf_idx2 = -1, .size_idx2 = -1, + }, + /* + * This follows a sequence-based pattern similar to + * ibm,get-vpd et al. Since PAPR+ restricts + * interleaving call sequences for other functions of + * this style, assume the restriction applies here, + * even though it's not explicit in the spec. + */ + .lock = &rtas_ibm_physical_attestation_lock, + }, + [RTAS_FNIDX__IBM_PLATFORM_DUMP] = { + .name = "ibm,platform-dump", + .filter = &(const struct rtas_filter) { + .buf_idx1 = 4, .size_idx1 = 5, + .buf_idx2 = -1, .size_idx2 = -1, + }, + /* + * PAPR+ v2.13 7.3.3.4.1 indicates that concurrent + * sequences of ibm,platform-dump are allowed if they + * are operating on different dump tags. So leave the + * lock pointer unset for now. This may need + * reconsideration if kernel-internal users appear. + */ + }, + [RTAS_FNIDX__IBM_POWER_OFF_UPS] = { + .name = "ibm,power-off-ups", + }, + [RTAS_FNIDX__IBM_QUERY_INTERRUPT_SOURCE_NUMBER] = { + .name = "ibm,query-interrupt-source-number", + }, + [RTAS_FNIDX__IBM_QUERY_PE_DMA_WINDOW] = { + .name = "ibm,query-pe-dma-window", + }, + [RTAS_FNIDX__IBM_READ_PCI_CONFIG] = { + .name = "ibm,read-pci-config", + }, + [RTAS_FNIDX__IBM_READ_SLOT_RESET_STATE] = { + .name = "ibm,read-slot-reset-state", + .filter = &(const struct rtas_filter) { + .buf_idx1 = -1, .size_idx1 = -1, + .buf_idx2 = -1, .size_idx2 = -1, + }, + }, + [RTAS_FNIDX__IBM_READ_SLOT_RESET_STATE2] = { + .name = "ibm,read-slot-reset-state2", + }, + [RTAS_FNIDX__IBM_REMOVE_PE_DMA_WINDOW] = { + .name = "ibm,remove-pe-dma-window", + }, + [RTAS_FNIDX__IBM_RESET_PE_DMA_WINDOW] = { + /* + * Note: PAPR+ v2.13 7.3.31.4.1 spells this as + * "ibm,reset-pe-dma-windows" (plural), but RTAS + * implementations use the singular form in practice. + */ + .name = "ibm,reset-pe-dma-window", + }, + [RTAS_FNIDX__IBM_SCAN_LOG_DUMP] = { + .name = "ibm,scan-log-dump", + .filter = &(const struct rtas_filter) { + .buf_idx1 = 0, .size_idx1 = 1, + .buf_idx2 = -1, .size_idx2 = -1, + }, + }, + [RTAS_FNIDX__IBM_SET_DYNAMIC_INDICATOR] = { + .name = "ibm,set-dynamic-indicator", + .filter = &(const struct rtas_filter) { + .buf_idx1 = 2, .size_idx1 = -1, + .buf_idx2 = -1, .size_idx2 = -1, + }, + /* + * PAPR+ v2.13 R1–7.3.18–3 says the OS must not call + * this function with different inputs until a + * non-retry status has been returned. + */ + .lock = &rtas_ibm_set_dynamic_indicator_lock, + }, + [RTAS_FNIDX__IBM_SET_EEH_OPTION] = { + .name = "ibm,set-eeh-option", + .filter = &(const struct rtas_filter) { + .buf_idx1 = -1, .size_idx1 = -1, + .buf_idx2 = -1, .size_idx2 = -1, + }, + }, + [RTAS_FNIDX__IBM_SET_SLOT_RESET] = { + .name = "ibm,set-slot-reset", + }, + [RTAS_FNIDX__IBM_SET_SYSTEM_PARAMETER] = { + .name = "ibm,set-system-parameter", + .filter = &(const struct rtas_filter) { + .buf_idx1 = 1, .size_idx1 = -1, + .buf_idx2 = -1, .size_idx2 = -1, + }, + }, + [RTAS_FNIDX__IBM_SET_XIVE] = { + .name = "ibm,set-xive", + }, + [RTAS_FNIDX__IBM_SLOT_ERROR_DETAIL] = { + .name = "ibm,slot-error-detail", + }, + [RTAS_FNIDX__IBM_SUSPEND_ME] = { + .name = "ibm,suspend-me", + .banned_for_syscall_on_le = true, + .filter = &(const struct rtas_filter) { + .buf_idx1 = -1, .size_idx1 = -1, + .buf_idx2 = -1, .size_idx2 = -1, + }, + }, + [RTAS_FNIDX__IBM_TUNE_DMA_PARMS] = { + .name = "ibm,tune-dma-parms", + }, + [RTAS_FNIDX__IBM_UPDATE_FLASH_64_AND_REBOOT] = { + .name = "ibm,update-flash-64-and-reboot", + }, + [RTAS_FNIDX__IBM_UPDATE_NODES] = { + .name = "ibm,update-nodes", + .banned_for_syscall_on_le = true, + .filter = &(const struct rtas_filter) { + .buf_idx1 = 0, .size_idx1 = -1, + .buf_idx2 = -1, .size_idx2 = -1, + .fixed_size = 4096, + }, + }, + [RTAS_FNIDX__IBM_UPDATE_PROPERTIES] = { + .name = "ibm,update-properties", + .banned_for_syscall_on_le = true, + .filter = &(const struct rtas_filter) { + .buf_idx1 = 0, .size_idx1 = -1, + .buf_idx2 = -1, .size_idx2 = -1, + .fixed_size = 4096, + }, + }, + [RTAS_FNIDX__IBM_VALIDATE_FLASH_IMAGE] = { + .name = "ibm,validate-flash-image", + }, + [RTAS_FNIDX__IBM_WRITE_PCI_CONFIG] = { + .name = "ibm,write-pci-config", + }, + [RTAS_FNIDX__NVRAM_FETCH] = { + .name = "nvram-fetch", + }, + [RTAS_FNIDX__NVRAM_STORE] = { + .name = "nvram-store", + }, + [RTAS_FNIDX__POWER_OFF] = { + .name = "power-off", + }, + [RTAS_FNIDX__PUT_TERM_CHAR] = { + .name = "put-term-char", + }, + [RTAS_FNIDX__QUERY_CPU_STOPPED_STATE] = { + .name = "query-cpu-stopped-state", + }, + [RTAS_FNIDX__READ_PCI_CONFIG] = { + .name = "read-pci-config", + }, + [RTAS_FNIDX__RTAS_LAST_ERROR] = { + .name = "rtas-last-error", + }, + [RTAS_FNIDX__SET_INDICATOR] = { + .name = "set-indicator", + .filter = &(const struct rtas_filter) { + .buf_idx1 = -1, .size_idx1 = -1, + .buf_idx2 = -1, .size_idx2 = -1, + }, + }, + [RTAS_FNIDX__SET_POWER_LEVEL] = { + .name = "set-power-level", + .filter = &(const struct rtas_filter) { + .buf_idx1 = -1, .size_idx1 = -1, + .buf_idx2 = -1, .size_idx2 = -1, + }, + }, + [RTAS_FNIDX__SET_TIME_FOR_POWER_ON] = { + .name = "set-time-for-power-on", + .filter = &(const struct rtas_filter) { + .buf_idx1 = -1, .size_idx1 = -1, + .buf_idx2 = -1, .size_idx2 = -1, + }, + }, + [RTAS_FNIDX__SET_TIME_OF_DAY] = { + .name = "set-time-of-day", + .filter = &(const struct rtas_filter) { + .buf_idx1 = -1, .size_idx1 = -1, + .buf_idx2 = -1, .size_idx2 = -1, + }, + }, + [RTAS_FNIDX__START_CPU] = { + .name = "start-cpu", + }, + [RTAS_FNIDX__STOP_SELF] = { + .name = "stop-self", + }, + [RTAS_FNIDX__SYSTEM_REBOOT] = { + .name = "system-reboot", + }, + [RTAS_FNIDX__THAW_TIME_BASE] = { + .name = "thaw-time-base", + }, + [RTAS_FNIDX__WRITE_PCI_CONFIG] = { + .name = "write-pci-config", + }, +}; + +#define for_each_rtas_function(funcp) \ + for (funcp = &rtas_function_table[0]; \ + funcp < &rtas_function_table[ARRAY_SIZE(rtas_function_table)]; \ + ++funcp) + +/* + * Nearly all RTAS calls need to be serialized. All uses of the + * default rtas_args block must hold rtas_lock. + * + * Exceptions to the RTAS serialization requirement (e.g. stop-self) + * must use a separate rtas_args structure. + */ +static DEFINE_RAW_SPINLOCK(rtas_lock); +static struct rtas_args rtas_args; + +/** + * rtas_function_token() - RTAS function token lookup. + * @handle: Function handle, e.g. RTAS_FN_EVENT_SCAN. + * + * Context: Any context. + * Return: the token value for the function if implemented by this platform, + * otherwise RTAS_UNKNOWN_SERVICE. + */ +s32 rtas_function_token(const rtas_fn_handle_t handle) +{ + const size_t index = handle.index; + const bool out_of_bounds = index >= ARRAY_SIZE(rtas_function_table); + + if (WARN_ONCE(out_of_bounds, "invalid function index %zu", index)) + return RTAS_UNKNOWN_SERVICE; + /* + * Various drivers attempt token lookups on non-RTAS + * platforms. + */ + if (!rtas.dev) + return RTAS_UNKNOWN_SERVICE; + + return rtas_function_table[index].token; +} +EXPORT_SYMBOL_GPL(rtas_function_token); + +static int rtas_function_cmp(const void *a, const void *b) +{ + const struct rtas_function *f1 = a; + const struct rtas_function *f2 = b; + + return strcmp(f1->name, f2->name); +} + +/* + * Boot-time initialization of the function table needs the lookup to + * return a non-const-qualified object. Use rtas_name_to_function() + * in all other contexts. + */ +static struct rtas_function *__rtas_name_to_function(const char *name) +{ + const struct rtas_function key = { + .name = name, + }; + struct rtas_function *found; + + found = bsearch(&key, rtas_function_table, ARRAY_SIZE(rtas_function_table), + sizeof(rtas_function_table[0]), rtas_function_cmp); + + return found; +} + +static const struct rtas_function *rtas_name_to_function(const char *name) +{ + return __rtas_name_to_function(name); +} + +static DEFINE_XARRAY(rtas_token_to_function_xarray); + +static int __init rtas_token_to_function_xarray_init(void) +{ + const struct rtas_function *func; + int err = 0; + + for_each_rtas_function(func) { + const s32 token = func->token; + + if (token == RTAS_UNKNOWN_SERVICE) + continue; + + err = xa_err(xa_store(&rtas_token_to_function_xarray, + token, (void *)func, GFP_KERNEL)); + if (err) + break; + } + + return err; +} +arch_initcall(rtas_token_to_function_xarray_init); + +/* + * For use by sys_rtas(), where the token value is provided by user + * space and we don't want to warn on failed lookups. + */ +static const struct rtas_function *rtas_token_to_function_untrusted(s32 token) +{ + return xa_load(&rtas_token_to_function_xarray, token); +} + +/* + * Reverse lookup for deriving the function descriptor from a + * known-good token value in contexts where the former is not already + * available. @token must be valid, e.g. derived from the result of a + * prior lookup against the function table. + */ +static const struct rtas_function *rtas_token_to_function(s32 token) +{ + const struct rtas_function *func; + + if (WARN_ONCE(token < 0, "invalid token %d", token)) + return NULL; + + func = rtas_token_to_function_untrusted(token); + if (func) + return func; + /* + * Fall back to linear scan in case the reverse mapping hasn't + * been initialized yet. + */ + if (xa_empty(&rtas_token_to_function_xarray)) { + for_each_rtas_function(func) { + if (func->token == token) + return func; + } + } + + WARN_ONCE(true, "unexpected failed lookup for token %d", token); + return NULL; +} /* This is here deliberately so it's only used in this file */ void enter_rtas(unsigned long); -static inline void do_enter_rtas(unsigned long args) +static void __do_enter_rtas(struct rtas_args *args) { - unsigned long msr; + enter_rtas(__pa(args)); + srr_regs_clobbered(); /* rtas uses SRRs, invalidate */ +} + +static void __do_enter_rtas_trace(struct rtas_args *args) +{ + const struct rtas_function *func = rtas_token_to_function(be32_to_cpu(args->token)); /* + * If there is a per-function lock, it must be held by the + * caller. + */ + if (func->lock) + lockdep_assert_held(func->lock); + + if (args == &rtas_args) + lockdep_assert_held(&rtas_lock); + + trace_rtas_input(args, func->name); + trace_rtas_ll_entry(args); + + __do_enter_rtas(args); + + trace_rtas_ll_exit(args); + trace_rtas_output(args, func->name); +} + +static void do_enter_rtas(struct rtas_args *args) +{ + const unsigned long msr = mfmsr(); + /* + * Situations where we want to skip any active tracepoints for + * safety reasons: + * + * 1. The last code executed on an offline CPU as it stops, + * i.e. we're about to call stop-self. The tracepoints' + * function name lookup uses xarray, which uses RCU, which + * isn't valid to call on an offline CPU. Any events + * emitted on an offline CPU will be discarded anyway. + * + * 2. In real mode, as when invoking ibm,nmi-interlock from + * the pseries MCE handler. We cannot count on trace + * buffers or the entries in rtas_token_to_function_xarray + * to be contained in the RMO. + */ + const unsigned long mask = MSR_IR | MSR_DR; + const bool can_trace = likely(cpu_online(raw_smp_processor_id()) && + (msr & mask) == mask); + /* * Make sure MSR[RI] is currently enabled as it will be forced later * in enter_rtas. */ - msr = mfmsr(); BUG_ON(!(msr & MSR_RI)); BUG_ON(!irqs_disabled()); hard_irq_disable(); /* Ensure MSR[EE] is disabled on PPC64 */ - enter_rtas(args); - - srr_regs_clobbered(); /* rtas uses SRRs, invalidate */ + if (can_trace) + __do_enter_rtas_trace(args); + else + __do_enter_rtas(args); } -struct rtas_t rtas = { - .lock = __ARCH_SPIN_LOCK_UNLOCKED -}; -EXPORT_SYMBOL(rtas); +struct rtas_t rtas; DEFINE_SPINLOCK(rtas_data_buf_lock); -EXPORT_SYMBOL(rtas_data_buf_lock); +EXPORT_SYMBOL_GPL(rtas_data_buf_lock); -char rtas_data_buf[RTAS_DATA_BUF_SIZE] __cacheline_aligned; -EXPORT_SYMBOL(rtas_data_buf); +char rtas_data_buf[RTAS_DATA_BUF_SIZE] __aligned(SZ_4K); +EXPORT_SYMBOL_GPL(rtas_data_buf); unsigned long rtas_rmo_buf; @@ -86,29 +750,7 @@ unsigned long rtas_rmo_buf; * This is done like this so rtas_flash can be a module. */ void (*rtas_flash_term_hook)(int); -EXPORT_SYMBOL(rtas_flash_term_hook); - -/* RTAS use home made raw locking instead of spin_lock_irqsave - * because those can be called from within really nasty contexts - * such as having the timebase stopped which would lockup with - * normal locks and spinlock debugging enabled - */ -static unsigned long lock_rtas(void) -{ - unsigned long flags; - - local_irq_save(flags); - preempt_disable(); - arch_spin_lock(&rtas.lock); - return flags; -} - -static void unlock_rtas(unsigned long flags) -{ - arch_spin_unlock(&rtas.lock); - local_irq_restore(flags); - preempt_enable(); -} +EXPORT_SYMBOL_GPL(rtas_flash_term_hook); /* * call_rtas_display_status and call_rtas_display_status_delay @@ -117,14 +759,14 @@ static void unlock_rtas(unsigned long flags) */ static void call_rtas_display_status(unsigned char c) { - unsigned long s; + unsigned long flags; if (!rtas.base) return; - s = lock_rtas(); - rtas_call_unlocked(&rtas.args, 10, 1, 1, NULL, c); - unlock_rtas(s); + raw_spin_lock_irqsave(&rtas_lock, flags); + rtas_call_unlocked(&rtas_args, 10, 1, 1, NULL, c); + raw_spin_unlock_irqrestore(&rtas_lock, flags); } static void call_rtas_display_status_delay(char c) @@ -132,7 +774,7 @@ static void call_rtas_display_status_delay(char c) static int pending_newline = 0; /* did last write end with unprinted newline? */ static int width = 16; - if (c == '\n') { + if (c == '\n') { while (width-- > 0) call_rtas_display_status(' '); width = 16; @@ -142,7 +784,7 @@ static void call_rtas_display_status_delay(char c) if (pending_newline) { call_rtas_display_status('\r'); call_rtas_display_status('\n'); - } + } pending_newline = 0; if (width--) { call_rtas_display_status(c); @@ -156,66 +798,6 @@ void __init udbg_init_rtas_panel(void) udbg_putc = call_rtas_display_status_delay; } -#ifdef CONFIG_UDBG_RTAS_CONSOLE - -/* If you think you're dying before early_init_dt_scan_rtas() does its - * work, you can hard code the token values for your firmware here and - * hardcode rtas.base/entry etc. - */ -static unsigned int rtas_putchar_token = RTAS_UNKNOWN_SERVICE; -static unsigned int rtas_getchar_token = RTAS_UNKNOWN_SERVICE; - -static void udbg_rtascon_putc(char c) -{ - int tries; - - if (!rtas.base) - return; - - /* Add CRs before LFs */ - if (c == '\n') - udbg_rtascon_putc('\r'); - - /* if there is more than one character to be displayed, wait a bit */ - for (tries = 0; tries < 16; tries++) { - if (rtas_call(rtas_putchar_token, 1, 1, NULL, c) == 0) - break; - udelay(1000); - } -} - -static int udbg_rtascon_getc_poll(void) -{ - int c; - - if (!rtas.base) - return -1; - - if (rtas_call(rtas_getchar_token, 0, 2, &c)) - return -1; - - return c; -} - -static int udbg_rtascon_getc(void) -{ - int c; - - while ((c = udbg_rtascon_getc_poll()) == -1) - ; - - return c; -} - - -void __init udbg_init_rtas_console(void) -{ - udbg_putc = udbg_rtascon_putc; - udbg_getc = udbg_rtascon_getc; - udbg_getc_poll = udbg_rtascon_getc_poll; -} -#endif /* CONFIG_UDBG_RTAS_CONSOLE */ - void rtas_progress(char *s, unsigned short hex) { struct device_node *root; @@ -248,8 +830,8 @@ void rtas_progress(char *s, unsigned short hex) "ibm,display-truncation-length", NULL); of_node_put(root); } - display_character = rtas_token("display-character"); - set_indicator = rtas_token("set-indicator"); + display_character = rtas_function_token(RTAS_FN_DISPLAY_CHARACTER); + set_indicator = rtas_function_token(RTAS_FN_SET_INDICATOR); } if (display_character == RTAS_UNKNOWN_SERVICE) { @@ -282,7 +864,7 @@ void rtas_progress(char *s, unsigned short hex) else rtas_call(display_character, 1, 1, NULL, '\r'); } - + if (row_width) width = row_width[current_line]; else @@ -302,9 +884,9 @@ void rtas_progress(char *s, unsigned short hex) spin_unlock(&progress_lock); return; } - + /* RTAS wants CR-LF, not just LF */ - + if (*os == '\n') { rtas_call(display_character, 1, 1, NULL, '\r'); rtas_call(display_character, 1, 1, NULL, '\n'); @@ -314,7 +896,7 @@ void rtas_progress(char *s, unsigned short hex) */ rtas_call(display_character, 1, 1, NULL, *os); } - + if (row_width) width = row_width[current_line]; else @@ -323,36 +905,49 @@ void rtas_progress(char *s, unsigned short hex) width--; rtas_call(display_character, 1, 1, NULL, *os); } - + os++; - + /* if we overwrite the screen length */ if (width <= 0) while ((*os != 0) && (*os != '\n') && (*os != '\r')) os++; } - + spin_unlock(&progress_lock); } -EXPORT_SYMBOL(rtas_progress); /* needed by rtas_flash module */ +EXPORT_SYMBOL_GPL(rtas_progress); /* needed by rtas_flash module */ int rtas_token(const char *service) { + const struct rtas_function *func; const __be32 *tokp; + if (rtas.dev == NULL) return RTAS_UNKNOWN_SERVICE; + + func = rtas_name_to_function(service); + if (func) + return func->token; + /* + * The caller is looking up a name that is not known to be an + * RTAS function. Either it's a function that needs to be + * added to the table, or they're misusing rtas_token() to + * access non-function properties of the /rtas node. Warn and + * fall back to the legacy behavior. + */ + WARN_ONCE(1, "unknown function `%s`, should it be added to rtas_function_table?\n", + service); + tokp = of_get_property(rtas.dev, service, NULL); return tokp ? be32_to_cpu(*tokp) : RTAS_UNKNOWN_SERVICE; } -EXPORT_SYMBOL(rtas_token); - -int rtas_service_present(const char *service) -{ - return rtas_token(service) != RTAS_UNKNOWN_SERVICE; -} -EXPORT_SYMBOL(rtas_service_present); +EXPORT_SYMBOL_GPL(rtas_token); #ifdef CONFIG_RTAS_ERROR_LOGGING + +static u32 rtas_error_log_max __ro_after_init = RTAS_ERROR_LOG_MAX; + /* * Return the firmware-specified size of the error log buffer * for all rtas calls that require an error buffer argument. @@ -360,56 +955,66 @@ EXPORT_SYMBOL(rtas_service_present); */ int rtas_get_error_log_max(void) { - static int rtas_error_log_max; - if (rtas_error_log_max) - return rtas_error_log_max; - - rtas_error_log_max = rtas_token ("rtas-error-log-max"); - if ((rtas_error_log_max == RTAS_UNKNOWN_SERVICE) || - (rtas_error_log_max > RTAS_ERROR_LOG_MAX)) { - printk (KERN_WARNING "RTAS: bad log buffer size %d\n", - rtas_error_log_max); - rtas_error_log_max = RTAS_ERROR_LOG_MAX; - } return rtas_error_log_max; } -EXPORT_SYMBOL(rtas_get_error_log_max); + +static void __init init_error_log_max(void) +{ + static const char propname[] __initconst = "rtas-error-log-max"; + u32 max; + + if (of_property_read_u32(rtas.dev, propname, &max)) { + pr_warn("%s not found, using default of %u\n", + propname, RTAS_ERROR_LOG_MAX); + max = RTAS_ERROR_LOG_MAX; + } + + if (max > RTAS_ERROR_LOG_MAX) { + pr_warn("%s = %u, clamping max error log size to %u\n", + propname, max, RTAS_ERROR_LOG_MAX); + max = RTAS_ERROR_LOG_MAX; + } + + rtas_error_log_max = max; +} static char rtas_err_buf[RTAS_ERROR_LOG_MAX]; -static int rtas_last_error_token; /** Return a copy of the detailed error text associated with the * most recent failed call to rtas. Because the error text * might go stale if there are any other intervening rtas calls, * this routine must be called atomically with whatever produced - * the error (i.e. with rtas.lock still held from the previous call). + * the error (i.e. with rtas_lock still held from the previous call). */ static char *__fetch_rtas_last_error(char *altbuf) { + const s32 token = rtas_function_token(RTAS_FN_RTAS_LAST_ERROR); struct rtas_args err_args, save_args; u32 bufsz; char *buf = NULL; - if (rtas_last_error_token == -1) + lockdep_assert_held(&rtas_lock); + + if (token == -1) return NULL; bufsz = rtas_get_error_log_max(); - err_args.token = cpu_to_be32(rtas_last_error_token); + err_args.token = cpu_to_be32(token); err_args.nargs = cpu_to_be32(2); err_args.nret = cpu_to_be32(1); err_args.args[0] = cpu_to_be32(__pa(rtas_err_buf)); err_args.args[1] = cpu_to_be32(bufsz); err_args.args[2] = 0; - save_args = rtas.args; - rtas.args = err_args; + save_args = rtas_args; + rtas_args = err_args; - do_enter_rtas(__pa(&rtas.args)); + do_enter_rtas(&rtas_args); - err_args = rtas.args; - rtas.args = save_args; + err_args = rtas_args; + rtas_args = save_args; /* Log the error in the unlikely case that there was one. */ if (unlikely(err_args.args[2] == 0)) { @@ -421,7 +1026,7 @@ static char *__fetch_rtas_last_error(char *altbuf) buf = kmalloc(RTAS_ERROR_LOG_MAX, GFP_ATOMIC); } if (buf) - memcpy(buf, rtas_err_buf, RTAS_ERROR_LOG_MAX); + memmove(buf, rtas_err_buf, RTAS_ERROR_LOG_MAX); } return buf; @@ -432,6 +1037,7 @@ static char *__fetch_rtas_last_error(char *altbuf) #else /* CONFIG_RTAS_ERROR_LOGGING */ #define __fetch_rtas_last_error(x) NULL #define get_errorlog_buffer() NULL +static void __init init_error_log_max(void) {} #endif @@ -452,9 +1058,26 @@ va_rtas_call_unlocked(struct rtas_args *args, int token, int nargs, int nret, for (i = 0; i < nret; ++i) args->rets[i] = 0; - do_enter_rtas(__pa(args)); + do_enter_rtas(args); } +/** + * rtas_call_unlocked() - Invoke an RTAS firmware function without synchronization. + * @args: RTAS parameter block to be used for the call, must obey RTAS addressing + * constraints. + * @token: Identifies the function being invoked. + * @nargs: Number of input parameters. Does not include token. + * @nret: Number of output parameters, including the call status. + * @....: List of @nargs input parameters. + * + * Invokes the RTAS function indicated by @token, which the caller + * should obtain via rtas_function_token(). + * + * This function is similar to rtas_call(), but must be used with a + * limited set of RTAS calls specifically exempted from the general + * requirement that only one RTAS call may be in progress at any + * time. Examples include stop-self and ibm,nmi-interlock. + */ void rtas_call_unlocked(struct rtas_args *args, int token, int nargs, int nret, ...) { va_list list; @@ -464,43 +1087,120 @@ void rtas_call_unlocked(struct rtas_args *args, int token, int nargs, int nret, va_end(list); } +static bool token_is_restricted_errinjct(s32 token) +{ + return token == rtas_function_token(RTAS_FN_IBM_OPEN_ERRINJCT) || + token == rtas_function_token(RTAS_FN_IBM_ERRINJCT); +} + +/** + * rtas_call() - Invoke an RTAS firmware function. + * @token: Identifies the function being invoked. + * @nargs: Number of input parameters. Does not include token. + * @nret: Number of output parameters, including the call status. + * @outputs: Array of @nret output words. + * @....: List of @nargs input parameters. + * + * Invokes the RTAS function indicated by @token, which the caller + * should obtain via rtas_function_token(). + * + * The @nargs and @nret arguments must match the number of input and + * output parameters specified for the RTAS function. + * + * rtas_call() returns RTAS status codes, not conventional Linux errno + * values. Callers must translate any failure to an appropriate errno + * in syscall context. Most callers of RTAS functions that can return + * -2 or 990x should use rtas_busy_delay() to correctly handle those + * statuses before calling again. + * + * The return value descriptions are adapted from 7.2.8 [RTAS] Return + * Codes of the PAPR and CHRP specifications. + * + * Context: Process context preferably, interrupt context if + * necessary. Acquires an internal spinlock and may perform + * GFP_ATOMIC slab allocation in error path. Unsafe for NMI + * context. + * Return: + * * 0 - RTAS function call succeeded. + * * -1 - RTAS function encountered a hardware or + * platform error, or the token is invalid, + * or the function is restricted by kernel policy. + * * -2 - Specs say "A necessary hardware device was busy, + * and the requested function could not be + * performed. The operation should be retried at + * a later time." This is misleading, at least with + * respect to current RTAS implementations. What it + * usually means in practice is that the function + * could not be completed while meeting RTAS's + * deadline for returning control to the OS (250us + * for PAPR/PowerVM, typically), but the call may be + * immediately reattempted to resume work on it. + * * -3 - Parameter error. + * * -7 - Unexpected state change. + * * 9000...9899 - Vendor-specific success codes. + * * 9900...9905 - Advisory extended delay. Caller should try + * again after ~10^x ms has elapsed, where x is + * the last digit of the status [0-5]. Again going + * beyond the PAPR text, 990x on PowerVM indicates + * contention for RTAS-internal resources. Other + * RTAS call sequences in progress should be + * allowed to complete before reattempting the + * call. + * * -9000 - Multi-level isolation error. + * * -9999...-9004 - Vendor-specific error codes. + * * Additional negative values - Function-specific error. + * * Additional positive values - Function-specific success. + */ int rtas_call(int token, int nargs, int nret, int *outputs, ...) { + struct pin_cookie cookie; va_list list; int i; - unsigned long s; - struct rtas_args *rtas_args; + unsigned long flags; + struct rtas_args *args; char *buff_copy = NULL; int ret; if (!rtas.entry || token == RTAS_UNKNOWN_SERVICE) return -1; + if (token_is_restricted_errinjct(token)) { + /* + * It would be nicer to not discard the error value + * from security_locked_down(), but callers expect an + * RTAS status, not an errno. + */ + if (security_locked_down(LOCKDOWN_RTAS_ERROR_INJECTION)) + return -1; + } + if ((mfmsr() & (MSR_IR|MSR_DR)) != (MSR_IR|MSR_DR)) { WARN_ON_ONCE(1); return -1; } - s = lock_rtas(); + raw_spin_lock_irqsave(&rtas_lock, flags); + cookie = lockdep_pin_lock(&rtas_lock); /* We use the global rtas args buffer */ - rtas_args = &rtas.args; + args = &rtas_args; va_start(list, outputs); - va_rtas_call_unlocked(rtas_args, token, nargs, nret, list); + va_rtas_call_unlocked(args, token, nargs, nret, list); va_end(list); /* A -1 return code indicates that the last command couldn't be completed due to a hardware error. */ - if (be32_to_cpu(rtas_args->rets[0]) == -1) + if (be32_to_cpu(args->rets[0]) == -1) buff_copy = __fetch_rtas_last_error(NULL); if (nret > 1 && outputs != NULL) for (i = 0; i < nret-1; ++i) - outputs[i] = be32_to_cpu(rtas_args->rets[i+1]); - ret = (nret > 0)? be32_to_cpu(rtas_args->rets[0]): 0; + outputs[i] = be32_to_cpu(args->rets[i + 1]); + ret = (nret > 0) ? be32_to_cpu(args->rets[0]) : 0; - unlock_rtas(s); + lockdep_unpin_lock(&rtas_lock, cookie); + raw_spin_unlock_irqrestore(&rtas_lock, flags); if (buff_copy) { log_error(buff_copy, ERR_TYPE_RTAS_LOG, 0); @@ -509,7 +1209,7 @@ int rtas_call(int token, int nargs, int nret, int *outputs, ...) } return ret; } -EXPORT_SYMBOL(rtas_call); +EXPORT_SYMBOL_GPL(rtas_call); /** * rtas_busy_delay_time() - From an RTAS status value, calculate the @@ -547,7 +1247,47 @@ unsigned int rtas_busy_delay_time(int status) return ms; } -EXPORT_SYMBOL(rtas_busy_delay_time); + +/* + * Early boot fallback for rtas_busy_delay(). + */ +static bool __init rtas_busy_delay_early(int status) +{ + static size_t successive_ext_delays __initdata; + bool retry; + + switch (status) { + case RTAS_EXTENDED_DELAY_MIN...RTAS_EXTENDED_DELAY_MAX: + /* + * In the unlikely case that we receive an extended + * delay status in early boot, the OS is probably not + * the cause, and there's nothing we can do to clear + * the condition. Best we can do is delay for a bit + * and hope it's transient. Lie to the caller if it + * seems like we're stuck in a retry loop. + */ + mdelay(1); + retry = true; + successive_ext_delays += 1; + if (successive_ext_delays > 1000) { + pr_err("too many extended delays, giving up\n"); + dump_stack(); + retry = false; + successive_ext_delays = 0; + } + break; + case RTAS_BUSY: + retry = true; + successive_ext_delays = 0; + break; + default: + retry = false; + successive_ext_delays = 0; + break; + } + + return retry; +} /** * rtas_busy_delay() - helper for RTAS busy and extended delay statuses @@ -567,11 +1307,17 @@ EXPORT_SYMBOL(rtas_busy_delay_time); * * false - @status is not @RTAS_BUSY nor an extended delay hint. The * caller is responsible for handling @status. */ -bool rtas_busy_delay(int status) +bool __ref rtas_busy_delay(int status) { unsigned int ms; bool ret; + /* + * Can't do timed sleeps before timekeeping is up. + */ + if (system_state < SYSTEM_SCHEDULING) + return rtas_busy_delay_early(status); + switch (status) { case RTAS_EXTENDED_DELAY_MIN...RTAS_EXTENDED_DELAY_MAX: ret = true; @@ -584,21 +1330,14 @@ bool rtas_busy_delay(int status) */ ms = clamp(ms, 1U, 1000U); /* - * The delay hint is an order-of-magnitude suggestion, not - * a minimum. It is fine, possibly even advantageous, for - * us to pause for less time than hinted. For small values, - * use usleep_range() to ensure we don't sleep much longer - * than actually needed. - * - * See Documentation/timers/timers-howto.rst for - * explanation of the threshold used here. In effect we use - * usleep_range() for 9900 and 9901, msleep() for - * 9902-9905. + * The delay hint is an order-of-magnitude suggestion, not a + * minimum. It is fine, possibly even advantageous, for us to + * pause for less time than hinted. To make sure pause time will + * not be way longer than requested independent of HZ + * configuration, use fsleep(). See fsleep() for details of + * used sleeping functions. */ - if (ms <= 20) - usleep_range(ms * 100, ms * 1000); - else - msleep(ms); + fsleep(ms * 1000); break; case RTAS_BUSY: ret = true; @@ -621,40 +1360,40 @@ bool rtas_busy_delay(int status) return ret; } -EXPORT_SYMBOL(rtas_busy_delay); +EXPORT_SYMBOL_GPL(rtas_busy_delay); -static int rtas_error_rc(int rtas_rc) +int rtas_error_rc(int rtas_rc) { int rc; switch (rtas_rc) { - case -1: /* Hardware Error */ - rc = -EIO; - break; - case -3: /* Bad indicator/domain/etc */ - rc = -EINVAL; - break; - case -9000: /* Isolation error */ - rc = -EFAULT; - break; - case -9001: /* Outstanding TCE/PTE */ - rc = -EEXIST; - break; - case -9002: /* No usable slot */ - rc = -ENODEV; - break; - default: - printk(KERN_ERR "%s: unexpected RTAS error %d\n", - __func__, rtas_rc); - rc = -ERANGE; - break; + case RTAS_HARDWARE_ERROR: /* Hardware Error */ + rc = -EIO; + break; + case RTAS_INVALID_PARAMETER: /* Bad indicator/domain/etc */ + rc = -EINVAL; + break; + case -9000: /* Isolation error */ + rc = -EFAULT; + break; + case -9001: /* Outstanding TCE/PTE */ + rc = -EEXIST; + break; + case -9002: /* No usable slot */ + rc = -ENODEV; + break; + default: + pr_err("%s: unexpected error %d\n", __func__, rtas_rc); + rc = -ERANGE; + break; } return rc; } +EXPORT_SYMBOL_GPL(rtas_error_rc); int rtas_get_power_level(int powerdomain, int *level) { - int token = rtas_token("get-power-level"); + int token = rtas_function_token(RTAS_FN_GET_POWER_LEVEL); int rc; if (token == RTAS_UNKNOWN_SERVICE) @@ -667,11 +1406,11 @@ int rtas_get_power_level(int powerdomain, int *level) return rtas_error_rc(rc); return rc; } -EXPORT_SYMBOL(rtas_get_power_level); +EXPORT_SYMBOL_GPL(rtas_get_power_level); int rtas_set_power_level(int powerdomain, int level, int *setlevel) { - int token = rtas_token("set-power-level"); + int token = rtas_function_token(RTAS_FN_SET_POWER_LEVEL); int rc; if (token == RTAS_UNKNOWN_SERVICE) @@ -685,11 +1424,11 @@ int rtas_set_power_level(int powerdomain, int level, int *setlevel) return rtas_error_rc(rc); return rc; } -EXPORT_SYMBOL(rtas_set_power_level); +EXPORT_SYMBOL_GPL(rtas_set_power_level); int rtas_get_sensor(int sensor, int index, int *state) { - int token = rtas_token("get-sensor-state"); + int token = rtas_function_token(RTAS_FN_GET_SENSOR_STATE); int rc; if (token == RTAS_UNKNOWN_SERVICE) @@ -703,11 +1442,11 @@ int rtas_get_sensor(int sensor, int index, int *state) return rtas_error_rc(rc); return rc; } -EXPORT_SYMBOL(rtas_get_sensor); +EXPORT_SYMBOL_GPL(rtas_get_sensor); int rtas_get_sensor_fast(int sensor, int index, int *state) { - int token = rtas_token("get-sensor-state"); + int token = rtas_function_token(RTAS_FN_GET_SENSOR_STATE); int rc; if (token == RTAS_UNKNOWN_SERVICE) @@ -746,11 +1485,10 @@ bool rtas_indicator_present(int token, int *maxindex) return false; } -EXPORT_SYMBOL(rtas_indicator_present); int rtas_set_indicator(int indicator, int index, int new_value) { - int token = rtas_token("set-indicator"); + int token = rtas_function_token(RTAS_FN_SET_INDICATOR); int rc; if (token == RTAS_UNKNOWN_SERVICE) @@ -764,15 +1502,15 @@ int rtas_set_indicator(int indicator, int index, int new_value) return rtas_error_rc(rc); return rc; } -EXPORT_SYMBOL(rtas_set_indicator); +EXPORT_SYMBOL_GPL(rtas_set_indicator); /* * Ignoring RTAS extended delay */ int rtas_set_indicator_fast(int indicator, int index, int new_value) { + int token = rtas_function_token(RTAS_FN_SET_INDICATOR); int rc; - int token = rtas_token("set-indicator"); if (token == RTAS_UNKNOWN_SERVICE) return -ENOENT; @@ -814,10 +1552,11 @@ int rtas_set_indicator_fast(int indicator, int index, int new_value) */ int rtas_ibm_suspend_me(int *fw_status) { + int token = rtas_function_token(RTAS_FN_IBM_SUSPEND_ME); int fwrc; int ret; - fwrc = rtas_call(rtas_token("ibm,suspend-me"), 0, 1, NULL); + fwrc = rtas_call(token, 0, 1, NULL); switch (fwrc) { case 0: @@ -849,8 +1588,8 @@ void __noreturn rtas_restart(char *cmd) { if (rtas_flash_term_hook) rtas_flash_term_hook(SYS_RESTART); - printk("RTAS system-reboot returned %d\n", - rtas_call(rtas_token("system-reboot"), 0, 1, NULL)); + pr_emerg("system-reboot returned %d\n", + rtas_call(rtas_function_token(RTAS_FN_SYSTEM_REBOOT), 0, 1, NULL)); for (;;); } @@ -859,8 +1598,8 @@ void rtas_power_off(void) if (rtas_flash_term_hook) rtas_flash_term_hook(SYS_POWER_OFF); /* allow power on only with power button press */ - printk("RTAS power-off returned %d\n", - rtas_call(rtas_token("power-off"), 2, 1, NULL, -1, -1)); + pr_emerg("power-off returned %d\n", + rtas_call(rtas_function_token(RTAS_FN_POWER_OFF), 2, 1, NULL, -1, -1)); for (;;); } @@ -869,16 +1608,19 @@ void __noreturn rtas_halt(void) if (rtas_flash_term_hook) rtas_flash_term_hook(SYS_HALT); /* allow power on only with power button press */ - printk("RTAS power-off returned %d\n", - rtas_call(rtas_token("power-off"), 2, 1, NULL, -1, -1)); + pr_emerg("power-off returned %d\n", + rtas_call(rtas_function_token(RTAS_FN_POWER_OFF), 2, 1, NULL, -1, -1)); for (;;); } /* Must be in the RMO region, so we place it here */ static char rtas_os_term_buf[2048]; +static bool ibm_extended_os_term; void rtas_os_term(char *str) { + s32 token = rtas_function_token(RTAS_FN_IBM_OS_TERM); + static struct rtas_args args; int status; /* @@ -887,19 +1629,24 @@ void rtas_os_term(char *str) * this property may terminate the partition which we want to avoid * since it interferes with panic_timeout. */ - if (RTAS_UNKNOWN_SERVICE == rtas_token("ibm,os-term") || - RTAS_UNKNOWN_SERVICE == rtas_token("ibm,extended-os-term")) + + if (token == RTAS_UNKNOWN_SERVICE || !ibm_extended_os_term) return; snprintf(rtas_os_term_buf, 2048, "OS panic: %s", str); + /* + * Keep calling as long as RTAS returns a "try again" status, + * but don't use rtas_busy_delay(), which potentially + * schedules. + */ do { - status = rtas_call(rtas_token("ibm,os-term"), 1, 1, NULL, - __pa(rtas_os_term_buf)); - } while (rtas_busy_delay(status)); + rtas_call_unlocked(&args, token, 1, 1, NULL, __pa(rtas_os_term_buf)); + status = be32_to_cpu(args.rets[0]); + } while (rtas_busy_delay_time(status)); if (status != 0) - printk(KERN_EMERG "ibm,os-term call failed %d\n", status); + pr_emerg("ibm,os-term call failed %d\n", status); } /** @@ -915,76 +1662,26 @@ void rtas_os_term(char *str) */ void rtas_activate_firmware(void) { - int token; + int token = rtas_function_token(RTAS_FN_IBM_ACTIVATE_FIRMWARE); int fwrc; - token = rtas_token("ibm,activate-firmware"); if (token == RTAS_UNKNOWN_SERVICE) { pr_notice("ibm,activate-firmware method unavailable\n"); return; } + mutex_lock(&rtas_ibm_activate_firmware_lock); + do { fwrc = rtas_call(token, 0, 1, NULL); } while (rtas_busy_delay(fwrc)); + mutex_unlock(&rtas_ibm_activate_firmware_lock); + if (fwrc) pr_err("ibm,activate-firmware failed (%i)\n", fwrc); } -#ifdef CONFIG_PPC_PSERIES -/** - * rtas_call_reentrant() - Used for reentrant rtas calls - * @token: Token for desired reentrant RTAS call - * @nargs: Number of Input Parameters - * @nret: Number of Output Parameters - * @outputs: Array of outputs - * @...: Inputs for desired RTAS call - * - * According to LoPAR documentation, only "ibm,int-on", "ibm,int-off", - * "ibm,get-xive" and "ibm,set-xive" are currently reentrant. - * Reentrant calls need their own rtas_args buffer, so not using rtas.args, but - * PACA one instead. - * - * Return: -1 on error, - * First output value of RTAS call if (nret > 0), - * 0 otherwise, - */ -int rtas_call_reentrant(int token, int nargs, int nret, int *outputs, ...) -{ - va_list list; - struct rtas_args *args; - unsigned long flags; - int i, ret = 0; - - if (!rtas.entry || token == RTAS_UNKNOWN_SERVICE) - return -1; - - local_irq_save(flags); - preempt_disable(); - - /* We use the per-cpu (PACA) rtas args buffer */ - args = local_paca->rtas_args_reentrant; - - va_start(list, outputs); - va_rtas_call_unlocked(args, token, nargs, nret, list); - va_end(list); - - if (nret > 1 && outputs) - for (i = 0; i < nret - 1; ++i) - outputs[i] = be32_to_cpu(args->rets[i + 1]); - - if (nret > 0) - ret = be32_to_cpu(args->rets[0]); - - local_irq_restore(flags); - preempt_enable(); - - return ret; -} - -#endif /* CONFIG_PPC_PSERIES */ - /** * get_pseries_errorlog() - Find a specific pseries error log in an RTAS * extended event log. @@ -1023,8 +1720,6 @@ noinstr struct pseries_errorlog *get_pseries_errorlog(struct rtas_error_log *log return NULL; } -#ifdef CONFIG_PPC_RTAS_FILTER - /* * The sys_rtas syscall, as originally designed, allows root to pass * arbitrary physical addresses to RTAS calls. A number of RTAS calls @@ -1038,56 +1733,12 @@ noinstr struct pseries_errorlog *get_pseries_errorlog(struct rtas_error_log *log * * Accordingly, we filter RTAS requests to check that the call is * permitted, and that provided pointers fall within the RMO buffer. - * The rtas_filters list contains an entry for each permitted call, - * with the indexes of the parameters which are expected to contain - * addresses and sizes of buffers allocated inside the RMO buffer. + * If a function is allowed to be invoked via the syscall, then its + * entry in the rtas_functions table points to a rtas_filter that + * describes its constraints, with the indexes of the parameters which + * are expected to contain addresses and sizes of buffers allocated + * inside the RMO buffer. */ -struct rtas_filter { - const char *name; - int token; - /* Indexes into the args buffer, -1 if not used */ - int buf_idx1; - int size_idx1; - int buf_idx2; - int size_idx2; - - int fixed_size; -}; - -static struct rtas_filter rtas_filters[] __ro_after_init = { - { "ibm,activate-firmware", -1, -1, -1, -1, -1 }, - { "ibm,configure-connector", -1, 0, -1, 1, -1, 4096 }, /* Special cased */ - { "display-character", -1, -1, -1, -1, -1 }, - { "ibm,display-message", -1, 0, -1, -1, -1 }, - { "ibm,errinjct", -1, 2, -1, -1, -1, 1024 }, - { "ibm,close-errinjct", -1, -1, -1, -1, -1 }, - { "ibm,open-errinjct", -1, -1, -1, -1, -1 }, - { "ibm,get-config-addr-info2", -1, -1, -1, -1, -1 }, - { "ibm,get-dynamic-sensor-state", -1, 1, -1, -1, -1 }, - { "ibm,get-indices", -1, 2, 3, -1, -1 }, - { "get-power-level", -1, -1, -1, -1, -1 }, - { "get-sensor-state", -1, -1, -1, -1, -1 }, - { "ibm,get-system-parameter", -1, 1, 2, -1, -1 }, - { "get-time-of-day", -1, -1, -1, -1, -1 }, - { "ibm,get-vpd", -1, 0, -1, 1, 2 }, - { "ibm,lpar-perftools", -1, 2, 3, -1, -1 }, - { "ibm,platform-dump", -1, 4, 5, -1, -1 }, /* Special cased */ - { "ibm,read-slot-reset-state", -1, -1, -1, -1, -1 }, - { "ibm,scan-log-dump", -1, 0, 1, -1, -1 }, - { "ibm,set-dynamic-indicator", -1, 2, -1, -1, -1 }, - { "ibm,set-eeh-option", -1, -1, -1, -1, -1 }, - { "set-indicator", -1, -1, -1, -1, -1 }, - { "set-power-level", -1, -1, -1, -1, -1 }, - { "set-time-for-power-on", -1, -1, -1, -1, -1 }, - { "ibm,set-system-parameter", -1, 1, -1, -1, -1 }, - { "set-time-of-day", -1, -1, -1, -1, -1 }, -#ifdef CONFIG_CPU_BIG_ENDIAN - { "ibm,suspend-me", -1, -1, -1, -1, -1 }, - { "ibm,update-nodes", -1, 0, -1, -1, -1, 4096 }, - { "ibm,update-properties", -1, 0, -1, -1, -1, 4096 }, -#endif - { "ibm,physical-attestation", -1, 0, 1, -1, -1 }, -}; static bool in_rmo_buf(u32 base, u32 end) { @@ -1098,98 +1749,84 @@ static bool in_rmo_buf(u32 base, u32 end) end < (rtas_rmo_buf + RTAS_USER_REGION_SIZE); } -static bool block_rtas_call(int token, int nargs, +static bool block_rtas_call(const struct rtas_function *func, int nargs, struct rtas_args *args) { - int i; - - for (i = 0; i < ARRAY_SIZE(rtas_filters); i++) { - struct rtas_filter *f = &rtas_filters[i]; - u32 base, size, end; - - if (token != f->token) - continue; + const struct rtas_filter *f; + const bool is_platform_dump = + func == &rtas_function_table[RTAS_FNIDX__IBM_PLATFORM_DUMP]; + const bool is_config_conn = + func == &rtas_function_table[RTAS_FNIDX__IBM_CONFIGURE_CONNECTOR]; + u32 base, size, end; - if (f->buf_idx1 != -1) { - base = be32_to_cpu(args->args[f->buf_idx1]); - if (f->size_idx1 != -1) - size = be32_to_cpu(args->args[f->size_idx1]); - else if (f->fixed_size) - size = f->fixed_size; - else - size = 1; - - end = base + size - 1; + /* + * Only functions with filters attached are allowed. + */ + f = func->filter; + if (!f) + goto err; + /* + * And some functions aren't allowed on LE. + */ + if (IS_ENABLED(CONFIG_CPU_LITTLE_ENDIAN) && func->banned_for_syscall_on_le) + goto err; + + if (f->buf_idx1 != -1) { + base = be32_to_cpu(args->args[f->buf_idx1]); + if (f->size_idx1 != -1) + size = be32_to_cpu(args->args[f->size_idx1]); + else if (f->fixed_size) + size = f->fixed_size; + else + size = 1; - /* - * Special case for ibm,platform-dump - NULL buffer - * address is used to indicate end of dump processing - */ - if (!strcmp(f->name, "ibm,platform-dump") && - base == 0) - return false; + end = base + size - 1; - if (!in_rmo_buf(base, end)) - goto err; - } + /* + * Special case for ibm,platform-dump - NULL buffer + * address is used to indicate end of dump processing + */ + if (is_platform_dump && base == 0) + return false; - if (f->buf_idx2 != -1) { - base = be32_to_cpu(args->args[f->buf_idx2]); - if (f->size_idx2 != -1) - size = be32_to_cpu(args->args[f->size_idx2]); - else if (f->fixed_size) - size = f->fixed_size; - else - size = 1; - end = base + size - 1; + if (!in_rmo_buf(base, end)) + goto err; + } - /* - * Special case for ibm,configure-connector where the - * address can be 0 - */ - if (!strcmp(f->name, "ibm,configure-connector") && - base == 0) - return false; + if (f->buf_idx2 != -1) { + base = be32_to_cpu(args->args[f->buf_idx2]); + if (f->size_idx2 != -1) + size = be32_to_cpu(args->args[f->size_idx2]); + else if (f->fixed_size) + size = f->fixed_size; + else + size = 1; + end = base + size - 1; - if (!in_rmo_buf(base, end)) - goto err; - } + /* + * Special case for ibm,configure-connector where the + * address can be 0 + */ + if (is_config_conn && base == 0) + return false; - return false; + if (!in_rmo_buf(base, end)) + goto err; } + return false; err: pr_err_ratelimited("sys_rtas: RTAS call blocked - exploit attempt?\n"); - pr_err_ratelimited("sys_rtas: token=0x%x, nargs=%d (called by %s)\n", - token, nargs, current->comm); + pr_err_ratelimited("sys_rtas: %s nargs=%d (called by %s)\n", + func->name, nargs, current->comm); return true; } -static void __init rtas_syscall_filter_init(void) -{ - unsigned int i; - - for (i = 0; i < ARRAY_SIZE(rtas_filters); i++) - rtas_filters[i].token = rtas_token(rtas_filters[i].name); -} - -#else - -static bool block_rtas_call(int token, int nargs, - struct rtas_args *args) -{ - return false; -} - -static void __init rtas_syscall_filter_init(void) -{ -} - -#endif /* CONFIG_PPC_RTAS_FILTER */ - /* We assume to be passed big endian arguments */ SYSCALL_DEFINE1(rtas, struct rtas_args __user *, uargs) { + const struct rtas_function *func; + struct pin_cookie cookie; struct rtas_args args; unsigned long flags; char *buff_copy, *errbuf = NULL; @@ -1213,22 +1850,38 @@ SYSCALL_DEFINE1(rtas, struct rtas_args __user *, uargs) || nargs + nret > ARRAY_SIZE(args.args)) return -EINVAL; + nargs = array_index_nospec(nargs, ARRAY_SIZE(args.args)); + nret = array_index_nospec(nret, ARRAY_SIZE(args.args) - nargs); + /* Copy in args. */ if (copy_from_user(args.args, uargs->args, nargs * sizeof(rtas_arg_t)) != 0) return -EFAULT; - if (token == RTAS_UNKNOWN_SERVICE) + /* + * If this token doesn't correspond to a function the kernel + * understands, you're not allowed to call it. + */ + func = rtas_token_to_function_untrusted(token); + if (!func) return -EINVAL; args.rets = &args.args[nargs]; memset(args.rets, 0, nret * sizeof(rtas_arg_t)); - if (block_rtas_call(token, nargs, &args)) + if (block_rtas_call(func, nargs, &args)) return -EINVAL; + if (token_is_restricted_errinjct(token)) { + int err; + + err = security_locked_down(LOCKDOWN_RTAS_ERROR_INJECTION); + if (err) + return err; + } + /* Need to handle ibm,suspend_me call specially */ - if (token == rtas_token("ibm,suspend-me")) { + if (token == rtas_function_token(RTAS_FN_IBM_SUSPEND_ME)) { /* * rtas_ibm_suspend_me assumes the streamid handle is in cpu @@ -1249,18 +1902,32 @@ SYSCALL_DEFINE1(rtas, struct rtas_args __user *, uargs) buff_copy = get_errorlog_buffer(); - flags = lock_rtas(); + /* + * If this function has a mutex assigned to it, we must + * acquire it to avoid interleaving with any kernel-based uses + * of the same function. Kernel-based sequences acquire the + * appropriate mutex explicitly. + */ + if (func->lock) + mutex_lock(func->lock); + + raw_spin_lock_irqsave(&rtas_lock, flags); + cookie = lockdep_pin_lock(&rtas_lock); - rtas.args = args; - do_enter_rtas(__pa(&rtas.args)); - args = rtas.args; + rtas_args = args; + do_enter_rtas(&rtas_args); + args = rtas_args; /* A -1 return code indicates that the last command couldn't be completed due to a hardware error. */ if (be32_to_cpu(args.rets[0]) == -1) errbuf = __fetch_rtas_last_error(buff_copy); - unlock_rtas(flags); + lockdep_unpin_lock(&rtas_lock, cookie); + raw_spin_unlock_irqrestore(&rtas_lock, flags); + + if (func->lock) + mutex_unlock(func->lock); if (buff_copy) { if (errbuf) @@ -1278,6 +1945,54 @@ SYSCALL_DEFINE1(rtas, struct rtas_args __user *, uargs) return 0; } +static void __init rtas_function_table_init(void) +{ + struct property *prop; + + for (size_t i = 0; i < ARRAY_SIZE(rtas_function_table); ++i) { + struct rtas_function *curr = &rtas_function_table[i]; + struct rtas_function *prior; + int cmp; + + curr->token = RTAS_UNKNOWN_SERVICE; + + if (i == 0) + continue; + /* + * Ensure table is sorted correctly for binary search + * on function names. + */ + prior = &rtas_function_table[i - 1]; + + cmp = strcmp(prior->name, curr->name); + if (cmp < 0) + continue; + + if (cmp == 0) { + pr_err("'%s' has duplicate function table entries\n", + curr->name); + } else { + pr_err("function table unsorted: '%s' wrongly precedes '%s'\n", + prior->name, curr->name); + } + } + + for_each_property_of_node(rtas.dev, prop) { + struct rtas_function *func; + + if (prop->length != sizeof(u32)) + continue; + + func = __rtas_name_to_function(prop->name); + if (!func) + continue; + + func->token = be32_to_cpup((__be32 *)prop->value); + + pr_debug("function %s has token %u\n", func->name, func->token); + } +} + /* * Call early during boot, before mem init, to retrieve the RTAS * information from the device-tree and allocate the RMO buffer for userland @@ -1309,6 +2024,17 @@ void __init rtas_initialize(void) no_entry = of_property_read_u32(rtas.dev, "linux,rtas-entry", &entry); rtas.entry = no_entry ? rtas.base : entry; + init_error_log_max(); + + /* Must be called before any function token lookups */ + rtas_function_table_init(); + + /* + * Discover this now to avoid a device tree lookup in the + * panic path. + */ + ibm_extended_os_term = of_property_read_bool(rtas.dev, "ibm,extended-os-term"); + /* If RTAS was found, allocate the RMO buffer for it and look for * the stop-self token if any */ @@ -1322,11 +2048,7 @@ void __init rtas_initialize(void) panic("ERROR: RTAS: Failed to allocate %lx bytes below %pa\n", PAGE_SIZE, &rtas_region); -#ifdef CONFIG_RTAS_ERROR_LOGGING - rtas_last_error_token = rtas_token("rtas-last-error"); -#endif - - rtas_syscall_filter_init(); + rtas_work_area_reserve_arena(rtas_region); } int __init early_init_dt_scan_rtas(unsigned long node, @@ -1353,42 +2075,26 @@ int __init early_init_dt_scan_rtas(unsigned long node, rtas.size = *sizep; } -#ifdef CONFIG_UDBG_RTAS_CONSOLE - basep = of_get_flat_dt_prop(node, "put-term-char", NULL); - if (basep) - rtas_putchar_token = *basep; - - basep = of_get_flat_dt_prop(node, "get-term-char", NULL); - if (basep) - rtas_getchar_token = *basep; - - if (rtas_putchar_token != RTAS_UNKNOWN_SERVICE && - rtas_getchar_token != RTAS_UNKNOWN_SERVICE) - udbg_init_rtas_console(); - -#endif - /* break now */ return 1; } -static arch_spinlock_t timebase_lock; +static DEFINE_RAW_SPINLOCK(timebase_lock); static u64 timebase = 0; void rtas_give_timebase(void) { unsigned long flags; - local_irq_save(flags); + raw_spin_lock_irqsave(&timebase_lock, flags); hard_irq_disable(); - arch_spin_lock(&timebase_lock); - rtas_call(rtas_token("freeze-time-base"), 0, 1, NULL); + rtas_call(rtas_function_token(RTAS_FN_FREEZE_TIME_BASE), 0, 1, NULL); timebase = get_tb(); - arch_spin_unlock(&timebase_lock); + raw_spin_unlock(&timebase_lock); while (timebase) barrier(); - rtas_call(rtas_token("thaw-time-base"), 0, 1, NULL); + rtas_call(rtas_function_token(RTAS_FN_THAW_TIME_BASE), 0, 1, NULL); local_irq_restore(flags); } @@ -1396,8 +2102,8 @@ void rtas_take_timebase(void) { while (!timebase) barrier(); - arch_spin_lock(&timebase_lock); + raw_spin_lock(&timebase_lock); set_tb(timebase >> 32, timebase & 0xffffffff); timebase = 0; - arch_spin_unlock(&timebase_lock); + raw_spin_unlock(&timebase_lock); } diff --git a/arch/powerpc/kernel/rtas_entry.S b/arch/powerpc/kernel/rtas_entry.S index 9a434d42e660..6ce95ddadbcd 100644 --- a/arch/powerpc/kernel/rtas_entry.S +++ b/arch/powerpc/kernel/rtas_entry.S @@ -109,8 +109,12 @@ __enter_rtas: * its critical regions (as specified in PAPR+ section 7.2.1). MSR[S] * is not impacted by RFI_TO_KERNEL (only urfid can unset it). So if * MSR[S] is set, it will remain when entering RTAS. + * If we're in HV mode, RTAS must also run in HV mode, so extract MSR_HV + * from the saved MSR value and insert into the value RTAS will use. */ + extrdi r0, r6, 1, 63 - MSR_HV_LG LOAD_REG_IMMEDIATE(r6, MSR_ME | MSR_RI) + insrdi r6, r0, 1, 63 - MSR_HV_LG li r0,0 mtmsrd r0,1 /* disable RI before using SRR0/1 */ diff --git a/arch/powerpc/kernel/rtas_flash.c b/arch/powerpc/kernel/rtas_flash.c index bc817a5619d6..583dc16e9d3c 100644 --- a/arch/powerpc/kernel/rtas_flash.c +++ b/arch/powerpc/kernel/rtas_flash.c @@ -312,13 +312,13 @@ static ssize_t rtas_flash_write(struct file *file, const char __user *buffer, { struct rtas_update_flash_t *const uf = &rtas_update_flash_data; char *p; - int next_free, rc; + int next_free; struct flash_block_list *fl; - mutex_lock(&rtas_update_flash_mutex); + guard(mutex)(&rtas_update_flash_mutex); if (uf->status == FLASH_AUTH || count == 0) - goto out; /* discard data */ + return count; /* discard data */ /* In the case that the image is not ready for flashing, the memory * allocated for the block list will be freed upon the release of the @@ -327,7 +327,7 @@ static ssize_t rtas_flash_write(struct file *file, const char __user *buffer, if (uf->flist == NULL) { uf->flist = kmem_cache_zalloc(flash_block_cache, GFP_KERNEL); if (!uf->flist) - goto nomem; + return -ENOMEM; } fl = uf->flist; @@ -338,7 +338,7 @@ static ssize_t rtas_flash_write(struct file *file, const char __user *buffer, /* Need to allocate another block_list */ fl->next = kmem_cache_zalloc(flash_block_cache, GFP_KERNEL); if (!fl->next) - goto nomem; + return -ENOMEM; fl = fl->next; next_free = 0; } @@ -347,25 +347,17 @@ static ssize_t rtas_flash_write(struct file *file, const char __user *buffer, count = RTAS_BLK_SIZE; p = kmem_cache_zalloc(flash_block_cache, GFP_KERNEL); if (!p) - goto nomem; + return -ENOMEM; if(copy_from_user(p, buffer, count)) { kmem_cache_free(flash_block_cache, p); - rc = -EFAULT; - goto error; + return -EFAULT; } fl->blocks[next_free].data = p; fl->blocks[next_free].length = count; fl->num_blocks++; -out: - mutex_unlock(&rtas_update_flash_mutex); - return count; -nomem: - rc = -ENOMEM; -error: - mutex_unlock(&rtas_update_flash_mutex); - return rc; + return count; } /* @@ -376,7 +368,7 @@ static void manage_flash(struct rtas_manage_flash_t *args_buf, unsigned int op) s32 rc; do { - rc = rtas_call(rtas_token("ibm,manage-flash-image"), 1, 1, + rc = rtas_call(rtas_function_token(RTAS_FN_IBM_MANAGE_FLASH_IMAGE), 1, 1, NULL, op); } while (rtas_busy_delay(rc)); @@ -405,19 +397,18 @@ static ssize_t manage_flash_write(struct file *file, const char __user *buf, static const char reject_str[] = "0"; static const char commit_str[] = "1"; char stkbuf[10]; - int op, rc; + int op; - mutex_lock(&rtas_manage_flash_mutex); + guard(mutex)(&rtas_manage_flash_mutex); if ((args_buf->status == MANAGE_AUTH) || (count == 0)) - goto out; + return count; op = -1; if (buf) { if (count > 9) count = 9; - rc = -EFAULT; if (copy_from_user (stkbuf, buf, count)) - goto error; + return -EFAULT; if (strncmp(stkbuf, reject_str, strlen(reject_str)) == 0) op = RTAS_REJECT_TMP_IMG; else if (strncmp(stkbuf, commit_str, strlen(commit_str)) == 0) @@ -425,18 +416,11 @@ static ssize_t manage_flash_write(struct file *file, const char __user *buf, } if (op == -1) { /* buf is empty, or contains invalid string */ - rc = -EINVAL; - goto error; + return -EINVAL; } manage_flash(args_buf, op); -out: - mutex_unlock(&rtas_manage_flash_mutex); return count; - -error: - mutex_unlock(&rtas_manage_flash_mutex); - return rc; } /* @@ -444,7 +428,7 @@ error: */ static void validate_flash(struct rtas_validate_flash_t *args_buf) { - int token = rtas_token("ibm,validate-flash-image"); + int token = rtas_function_token(RTAS_FN_IBM_VALIDATE_FLASH_IMAGE); int update_results; s32 rc; @@ -499,16 +483,14 @@ static ssize_t validate_flash_write(struct file *file, const char __user *buf, { struct rtas_validate_flash_t *const args_buf = &rtas_validate_flash_data; - int rc; - mutex_lock(&rtas_validate_flash_mutex); + guard(mutex)(&rtas_validate_flash_mutex); /* We are only interested in the first 4K of the * candidate image */ if ((*off >= VALIDATE_BUF_SIZE) || (args_buf->status == VALIDATE_AUTH)) { *off += count; - mutex_unlock(&rtas_validate_flash_mutex); return count; } @@ -519,20 +501,14 @@ static ssize_t validate_flash_write(struct file *file, const char __user *buf, args_buf->status = VALIDATE_INCOMPLETE; } - if (!access_ok(buf, count)) { - rc = -EFAULT; - goto done; - } - if (copy_from_user(args_buf->buf + *off, buf, count)) { - rc = -EFAULT; - goto done; - } + if (!access_ok(buf, count)) + return -EFAULT; + + if (copy_from_user(args_buf->buf + *off, buf, count)) + return -EFAULT; *off += count; - rc = count; -done: - mutex_unlock(&rtas_validate_flash_mutex); - return rc; + return count; } static int validate_flash_release(struct inode *inode, struct file *file) @@ -570,7 +546,7 @@ static void rtas_flash_firmware(int reboot_type) return; } - update_token = rtas_token("ibm,update-flash-64-and-reboot"); + update_token = rtas_function_token(RTAS_FN_IBM_UPDATE_FLASH_64_AND_REBOOT); if (update_token == RTAS_UNKNOWN_SERVICE) { printk(KERN_ALERT "FLASH: ibm,update-flash-64-and-reboot " "is not available -- not a service partition?\n"); @@ -653,7 +629,7 @@ static void rtas_flash_firmware(int reboot_type) */ struct rtas_flash_file { const char *filename; - const char *rtas_call_name; + const rtas_fn_handle_t handle; int *status; const struct proc_ops ops; }; @@ -661,7 +637,7 @@ struct rtas_flash_file { static const struct rtas_flash_file rtas_flash_files[] = { { .filename = "powerpc/rtas/" FIRMWARE_FLASH_NAME, - .rtas_call_name = "ibm,update-flash-64-and-reboot", + .handle = RTAS_FN_IBM_UPDATE_FLASH_64_AND_REBOOT, .status = &rtas_update_flash_data.status, .ops.proc_read = rtas_flash_read_msg, .ops.proc_write = rtas_flash_write, @@ -670,7 +646,7 @@ static const struct rtas_flash_file rtas_flash_files[] = { }, { .filename = "powerpc/rtas/" FIRMWARE_UPDATE_NAME, - .rtas_call_name = "ibm,update-flash-64-and-reboot", + .handle = RTAS_FN_IBM_UPDATE_FLASH_64_AND_REBOOT, .status = &rtas_update_flash_data.status, .ops.proc_read = rtas_flash_read_num, .ops.proc_write = rtas_flash_write, @@ -679,7 +655,7 @@ static const struct rtas_flash_file rtas_flash_files[] = { }, { .filename = "powerpc/rtas/" VALIDATE_FLASH_NAME, - .rtas_call_name = "ibm,validate-flash-image", + .handle = RTAS_FN_IBM_VALIDATE_FLASH_IMAGE, .status = &rtas_validate_flash_data.status, .ops.proc_read = validate_flash_read, .ops.proc_write = validate_flash_write, @@ -688,7 +664,7 @@ static const struct rtas_flash_file rtas_flash_files[] = { }, { .filename = "powerpc/rtas/" MANAGE_FLASH_NAME, - .rtas_call_name = "ibm,manage-flash-image", + .handle = RTAS_FN_IBM_MANAGE_FLASH_IMAGE, .status = &rtas_manage_flash_data.status, .ops.proc_read = manage_flash_read, .ops.proc_write = manage_flash_write, @@ -700,8 +676,7 @@ static int __init rtas_flash_init(void) { int i; - if (rtas_token("ibm,update-flash-64-and-reboot") == - RTAS_UNKNOWN_SERVICE) { + if (rtas_function_token(RTAS_FN_IBM_UPDATE_FLASH_64_AND_REBOOT) == RTAS_UNKNOWN_SERVICE) { pr_info("rtas_flash: no firmware flash support\n"); return -EINVAL; } @@ -710,9 +685,9 @@ static int __init rtas_flash_init(void) if (!rtas_validate_flash_data.buf) return -ENOMEM; - flash_block_cache = kmem_cache_create("rtas_flash_cache", - RTAS_BLK_SIZE, RTAS_BLK_SIZE, 0, - NULL); + flash_block_cache = kmem_cache_create_usercopy("rtas_flash_cache", + RTAS_BLK_SIZE, RTAS_BLK_SIZE, + 0, 0, RTAS_BLK_SIZE, NULL); if (!flash_block_cache) { printk(KERN_ERR "%s: failed to create block cache\n", __func__); @@ -730,7 +705,7 @@ static int __init rtas_flash_init(void) * This code assumes that the status int is the first member of the * struct */ - token = rtas_token(f->rtas_call_name); + token = rtas_function_token(f->handle); if (token == RTAS_UNKNOWN_SERVICE) *f->status = FLASH_AUTH; else @@ -774,4 +749,5 @@ static void __exit rtas_flash_cleanup(void) module_init(rtas_flash_init); module_exit(rtas_flash_cleanup); +MODULE_DESCRIPTION("PPC procfs firmware flash interface"); MODULE_LICENSE("GPL"); diff --git a/arch/powerpc/kernel/rtas_pci.c b/arch/powerpc/kernel/rtas_pci.c index 5a2f5ea3b054..fccf96e897f6 100644 --- a/arch/powerpc/kernel/rtas_pci.c +++ b/arch/powerpc/kernel/rtas_pci.c @@ -43,7 +43,7 @@ static inline int config_access_valid(struct pci_dn *dn, int where) return 0; } -int rtas_read_config(struct pci_dn *pdn, int where, int size, u32 *val) +int rtas_pci_dn_read_config(struct pci_dn *pdn, int where, int size, u32 *val) { int returnval = -1; unsigned long buid, addr; @@ -87,7 +87,7 @@ static int rtas_pci_read_config(struct pci_bus *bus, pdn = pci_get_pdn_by_devfn(bus, devfn); /* Validity of pdn is checked in here */ - ret = rtas_read_config(pdn, where, size, val); + ret = rtas_pci_dn_read_config(pdn, where, size, val); if (*val == EEH_IO_ERROR_VALUE(size) && eeh_dev_check_failure(pdn_to_eeh_dev(pdn))) return PCIBIOS_DEVICE_NOT_FOUND; @@ -95,7 +95,7 @@ static int rtas_pci_read_config(struct pci_bus *bus, return ret; } -int rtas_write_config(struct pci_dn *pdn, int where, int size, u32 val) +int rtas_pci_dn_write_config(struct pci_dn *pdn, int where, int size, u32 val) { unsigned long buid, addr; int ret; @@ -134,7 +134,7 @@ static int rtas_pci_write_config(struct pci_bus *bus, pdn = pci_get_pdn_by_devfn(bus, devfn); /* Validity of pdn is checked in here. */ - return rtas_write_config(pdn, where, size, val); + return rtas_pci_dn_write_config(pdn, where, size, val); } static struct pci_ops rtas_pci_ops = { @@ -191,10 +191,10 @@ static void python_countermeasures(struct device_node *dev) void __init init_pci_config_tokens(void) { - read_pci_config = rtas_token("read-pci-config"); - write_pci_config = rtas_token("write-pci-config"); - ibm_read_pci_config = rtas_token("ibm,read-pci-config"); - ibm_write_pci_config = rtas_token("ibm,write-pci-config"); + read_pci_config = rtas_function_token(RTAS_FN_READ_PCI_CONFIG); + write_pci_config = rtas_function_token(RTAS_FN_WRITE_PCI_CONFIG); + ibm_read_pci_config = rtas_function_token(RTAS_FN_IBM_READ_PCI_CONFIG); + ibm_write_pci_config = rtas_function_token(RTAS_FN_IBM_WRITE_PCI_CONFIG); } unsigned long get_phb_buid(struct device_node *phb) diff --git a/arch/powerpc/kernel/rtasd.c b/arch/powerpc/kernel/rtasd.c index 5270b450bbde..9bba469239fc 100644 --- a/arch/powerpc/kernel/rtasd.c +++ b/arch/powerpc/kernel/rtasd.c @@ -9,6 +9,7 @@ #include <linux/errno.h> #include <linux/sched.h> #include <linux/kernel.h> +#include <linux/of.h> #include <linux/poll.h> #include <linux/proc_fs.h> #include <linux/init.h> @@ -499,18 +500,20 @@ EXPORT_SYMBOL_GPL(rtas_cancel_event_scan); static int __init rtas_event_scan_init(void) { + int err; + if (!machine_is(pseries) && !machine_is(chrp)) return 0; /* No RTAS */ - event_scan = rtas_token("event-scan"); + event_scan = rtas_function_token(RTAS_FN_EVENT_SCAN); if (event_scan == RTAS_UNKNOWN_SERVICE) { printk(KERN_INFO "rtasd: No event-scan on system\n"); return -ENODEV; } - rtas_event_scan_rate = rtas_token("rtas-event-scan-rate"); - if (rtas_event_scan_rate == RTAS_UNKNOWN_SERVICE) { + err = of_property_read_u32(rtas.dev, "rtas-event-scan-rate", &rtas_event_scan_rate); + if (err) { printk(KERN_ERR "rtasd: no rtas-event-scan-rate on system\n"); return -ENODEV; } diff --git a/arch/powerpc/kernel/secure_boot.c b/arch/powerpc/kernel/secure_boot.c index f9af305d9579..3a28795b4ed8 100644 --- a/arch/powerpc/kernel/secure_boot.c +++ b/arch/powerpc/kernel/secure_boot.c @@ -5,6 +5,7 @@ */ #include <linux/types.h> #include <linux/of.h> +#include <linux/string_choices.h> #include <asm/secure_boot.h> static struct device_node *get_ppc_fw_sb_node(void) @@ -32,11 +33,13 @@ bool is_ppc_secureboot_enabled(void) if (enabled) goto out; - if (!of_property_read_u32(of_root, "ibm,secure-boot", &secureboot)) + node = of_find_node_by_path("/"); + if (!of_property_read_u32(node, "ibm,secure-boot", &secureboot)) enabled = (secureboot > 1); + of_node_put(node); out: - pr_info("Secure boot mode %s\n", enabled ? "enabled" : "disabled"); + pr_info("Secure boot mode %s\n", str_enabled_disabled(enabled)); return enabled; } @@ -54,11 +57,13 @@ bool is_ppc_trustedboot_enabled(void) if (enabled) goto out; - if (!of_property_read_u32(of_root, "ibm,trusted-boot", &trustedboot)) + node = of_find_node_by_path("/"); + if (!of_property_read_u32(node, "ibm,trusted-boot", &trustedboot)) enabled = (trustedboot > 0); + of_node_put(node); out: - pr_info("Trusted boot mode %s\n", enabled ? "enabled" : "disabled"); + pr_info("Trusted boot mode %s\n", str_enabled_disabled(enabled)); return enabled; } diff --git a/arch/powerpc/kernel/security.c b/arch/powerpc/kernel/security.c index d96fd14bd7c9..fbb7ebd8aa08 100644 --- a/arch/powerpc/kernel/security.c +++ b/arch/powerpc/kernel/security.c @@ -14,8 +14,9 @@ #include <linux/debugfs.h> #include <asm/asm-prototypes.h> -#include <asm/code-patching.h> +#include <asm/text-patching.h> #include <asm/security_features.h> +#include <asm/sections.h> #include <asm/setup.h> #include <asm/inst.h> @@ -34,7 +35,7 @@ static enum branch_cache_flush_type link_stack_flush_type = BRANCH_CACHE_FLUSH_N bool barrier_nospec_enabled; static bool no_nospec; static bool btb_flush_enabled; -#if defined(CONFIG_PPC_FSL_BOOK3E) || defined(CONFIG_PPC_BOOK3S_64) +#if defined(CONFIG_PPC_E500) || defined(CONFIG_PPC_BOOK3S_64) static bool no_spectrev2; #endif @@ -121,7 +122,7 @@ static __init int security_feature_debugfs_init(void) device_initcall(security_feature_debugfs_init); #endif /* CONFIG_DEBUG_FS */ -#if defined(CONFIG_PPC_FSL_BOOK3E) || defined(CONFIG_PPC_BOOK3S_64) +#if defined(CONFIG_PPC_E500) || defined(CONFIG_PPC_BOOK3S_64) static int __init handle_nospectre_v2(char *p) { no_spectrev2 = true; @@ -129,9 +130,9 @@ static int __init handle_nospectre_v2(char *p) return 0; } early_param("nospectre_v2", handle_nospectre_v2); -#endif /* CONFIG_PPC_FSL_BOOK3E || CONFIG_PPC_BOOK3S_64 */ +#endif /* CONFIG_PPC_E500 || CONFIG_PPC_BOOK3S_64 */ -#ifdef CONFIG_PPC_FSL_BOOK3E +#ifdef CONFIG_PPC_E500 void __init setup_spectre_v2(void) { if (no_spectrev2 || cpu_mitigations_off()) @@ -139,7 +140,7 @@ void __init setup_spectre_v2(void) else btb_flush_enabled = true; } -#endif /* CONFIG_PPC_FSL_BOOK3E */ +#endif /* CONFIG_PPC_E500 */ #ifdef CONFIG_PPC_BOOK3S_64 ssize_t cpu_show_meltdown(struct device *dev, struct device_attribute *attr, char *buf) @@ -363,26 +364,27 @@ ssize_t cpu_show_spec_store_bypass(struct device *dev, struct device_attribute * static int ssb_prctl_get(struct task_struct *task) { + /* + * The STF_BARRIER feature is on by default, so if it's off that means + * firmware has explicitly said the CPU is not vulnerable via either + * the hypercall or device tree. + */ + if (!security_ftr_enabled(SEC_FTR_STF_BARRIER)) + return PR_SPEC_NOT_AFFECTED; + + /* + * If the system's CPU has no known barrier (see setup_stf_barrier()) + * then assume that the CPU is not vulnerable. + */ if (stf_enabled_flush_types == STF_BARRIER_NONE) - /* - * We don't have an explicit signal from firmware that we're - * vulnerable or not, we only have certain CPU revisions that - * are known to be vulnerable. - * - * We assume that if we're on another CPU, where the barrier is - * NONE, then we are not vulnerable. - */ return PR_SPEC_NOT_AFFECTED; - else - /* - * If we do have a barrier type then we are vulnerable. The - * barrier is not a global or per-process mitigation, so the - * only value we can report here is PR_SPEC_ENABLE, which - * appears as "vulnerable" in /proc. - */ - return PR_SPEC_ENABLE; - - return -EINVAL; + + /* + * Otherwise the CPU is vulnerable. The barrier is not a global or + * per-process mitigation, so the only value that can be reported here + * is PR_SPEC_ENABLE, which appears as "vulnerable" in /proc. + */ + return PR_SPEC_ENABLE; } int arch_prctl_spec_ctrl_get(struct task_struct *task, unsigned long which) diff --git a/arch/powerpc/kernel/secvar-ops.c b/arch/powerpc/kernel/secvar-ops.c index 6a29777d6a2d..19172a2804f0 100644 --- a/arch/powerpc/kernel/secvar-ops.c +++ b/arch/powerpc/kernel/secvar-ops.c @@ -8,10 +8,16 @@ #include <linux/cache.h> #include <asm/secvar.h> +#include <asm/bug.h> -const struct secvar_operations *secvar_ops __ro_after_init; +const struct secvar_operations *secvar_ops __ro_after_init = NULL; -void set_secvar_ops(const struct secvar_operations *ops) +int set_secvar_ops(const struct secvar_operations *ops) { + if (WARN_ON_ONCE(secvar_ops)) + return -EBUSY; + secvar_ops = ops; + + return 0; } diff --git a/arch/powerpc/kernel/secvar-sysfs.c b/arch/powerpc/kernel/secvar-sysfs.c index 1ee4640a2641..ec900bce0257 100644 --- a/arch/powerpc/kernel/secvar-sysfs.c +++ b/arch/powerpc/kernel/secvar-sysfs.c @@ -21,56 +21,48 @@ static struct kset *secvar_kset; static ssize_t format_show(struct kobject *kobj, struct kobj_attribute *attr, char *buf) { - ssize_t rc = 0; - struct device_node *node; - const char *format; - - node = of_find_compatible_node(NULL, NULL, "ibm,secvar-backend"); - if (!of_device_is_available(node)) { - rc = -ENODEV; - goto out; - } - - rc = of_property_read_string(node, "format", &format); - if (rc) - goto out; - - rc = sprintf(buf, "%s\n", format); + char tmp[32]; + ssize_t len = secvar_ops->format(tmp, sizeof(tmp)); -out: - of_node_put(node); + if (len > 0) + return sysfs_emit(buf, "%s\n", tmp); + else if (len < 0) + pr_err("Error %zd reading format string\n", len); + else + pr_err("Got empty format string from backend\n"); - return rc; + return -EIO; } static ssize_t size_show(struct kobject *kobj, struct kobj_attribute *attr, char *buf) { - uint64_t dsize; + u64 dsize; int rc; rc = secvar_ops->get(kobj->name, strlen(kobj->name) + 1, NULL, &dsize); if (rc) { - pr_err("Error retrieving %s variable size %d\n", kobj->name, - rc); + if (rc != -ENOENT) + pr_err("Error retrieving %s variable size %d\n", kobj->name, rc); return rc; } - return sprintf(buf, "%llu\n", dsize); + return sysfs_emit(buf, "%llu\n", dsize); } static ssize_t data_read(struct file *filep, struct kobject *kobj, - struct bin_attribute *attr, char *buf, loff_t off, + const struct bin_attribute *attr, char *buf, loff_t off, size_t count) { - uint64_t dsize; char *data; + u64 dsize; int rc; rc = secvar_ops->get(kobj->name, strlen(kobj->name) + 1, NULL, &dsize); if (rc) { - pr_err("Error getting %s variable size %d\n", kobj->name, rc); + if (rc != -ENOENT) + pr_err("Error getting %s variable size %d\n", kobj->name, rc); return rc; } pr_debug("dsize is %llu\n", dsize); @@ -93,7 +85,7 @@ data_fail: } static ssize_t update_write(struct file *filep, struct kobject *kobj, - struct bin_attribute *attr, char *buf, loff_t off, + const struct bin_attribute *attr, char *buf, loff_t off, size_t count) { int rc; @@ -112,11 +104,11 @@ static struct kobj_attribute format_attr = __ATTR_RO(format); static struct kobj_attribute size_attr = __ATTR_RO(size); -static struct bin_attribute data_attr = __BIN_ATTR_RO(data, 0); +static struct bin_attribute data_attr __ro_after_init = __BIN_ATTR_RO(data, 0); -static struct bin_attribute update_attr = __BIN_ATTR_WO(update, 0); +static struct bin_attribute update_attr __ro_after_init = __BIN_ATTR_WO(update, 0); -static struct bin_attribute *secvar_bin_attrs[] = { +static const struct bin_attribute *const secvar_bin_attrs[] = { &data_attr, &update_attr, NULL, @@ -133,44 +125,68 @@ static const struct attribute_group secvar_attr_group = { }; __ATTRIBUTE_GROUPS(secvar_attr); -static struct kobj_type secvar_ktype = { +static const struct kobj_type secvar_ktype = { .sysfs_ops = &kobj_sysfs_ops, .default_groups = secvar_attr_groups, }; -static int update_kobj_size(void) +static __init int update_kobj_size(void) { - struct device_node *node; u64 varsize; - int rc = 0; - - node = of_find_compatible_node(NULL, NULL, "ibm,secvar-backend"); - if (!of_device_is_available(node)) { - rc = -ENODEV; - goto out; - } + int rc = secvar_ops->max_size(&varsize); - rc = of_property_read_u64(node, "max-var-size", &varsize); if (rc) - goto out; + return rc; data_attr.size = varsize; update_attr.size = varsize; -out: - of_node_put(node); + return 0; +} - return rc; +static __init int secvar_sysfs_config(struct kobject *kobj) +{ + struct attribute_group config_group = { + .name = "config", + .attrs = (struct attribute **)secvar_ops->config_attrs, + }; + + if (secvar_ops->config_attrs) + return sysfs_create_group(kobj, &config_group); + + return 0; } -static int secvar_sysfs_load(void) +static __init int add_var(const char *name) { - char *name; - uint64_t namesize = 0; struct kobject *kobj; int rc; + kobj = kzalloc(sizeof(*kobj), GFP_KERNEL); + if (!kobj) + return -ENOMEM; + + kobject_init(kobj, &secvar_ktype); + + rc = kobject_add(kobj, &secvar_kset->kobj, "%s", name); + if (rc) { + pr_warn("kobject_add error %d for attribute: %s\n", rc, + name); + kobject_put(kobj); + return rc; + } + + kobject_uevent(kobj, KOBJ_ADD); + return 0; +} + +static __init int secvar_sysfs_load(void) +{ + u64 namesize = 0; + char *name; + int rc; + name = kzalloc(NAME_MAX_SIZE, GFP_KERNEL); if (!name) return -ENOMEM; @@ -179,73 +195,99 @@ static int secvar_sysfs_load(void) rc = secvar_ops->get_next(name, &namesize, NAME_MAX_SIZE); if (rc) { if (rc != -ENOENT) - pr_err("error getting secvar from firmware %d\n", - rc); - break; - } + pr_err("error getting secvar from firmware %d\n", rc); + else + rc = 0; - kobj = kzalloc(sizeof(*kobj), GFP_KERNEL); - if (!kobj) { - rc = -ENOMEM; break; } - kobject_init(kobj, &secvar_ktype); - - rc = kobject_add(kobj, &secvar_kset->kobj, "%s", name); - if (rc) { - pr_warn("kobject_add error %d for attribute: %s\n", rc, - name); - kobject_put(kobj); - kobj = NULL; - } - - if (kobj) - kobject_uevent(kobj, KOBJ_ADD); - + rc = add_var(name); } while (!rc); kfree(name); return rc; } -static int secvar_sysfs_init(void) +static __init int secvar_sysfs_load_static(void) +{ + const char * const *name_ptr = secvar_ops->var_names; + int rc; + + while (*name_ptr) { + rc = add_var(*name_ptr); + if (rc) + return rc; + name_ptr++; + } + + return 0; +} + +static __init int secvar_sysfs_init(void) { + u64 max_size; int rc; if (!secvar_ops) { - pr_warn("secvar: failed to retrieve secvar operations.\n"); + pr_warn("Failed to retrieve secvar operations\n"); return -ENODEV; } secvar_kobj = kobject_create_and_add("secvar", firmware_kobj); if (!secvar_kobj) { - pr_err("secvar: Failed to create firmware kobj\n"); + pr_err("Failed to create firmware kobj\n"); return -ENOMEM; } rc = sysfs_create_file(secvar_kobj, &format_attr.attr); if (rc) { - kobject_put(secvar_kobj); - return -ENOMEM; + pr_err("Failed to create format object\n"); + rc = -ENOMEM; + goto err; } secvar_kset = kset_create_and_add("vars", NULL, secvar_kobj); if (!secvar_kset) { - pr_err("secvar: sysfs kobject registration failed.\n"); - kobject_put(secvar_kobj); - return -ENOMEM; + pr_err("sysfs kobject registration failed\n"); + rc = -ENOMEM; + goto err; } rc = update_kobj_size(); if (rc) { pr_err("Cannot read the size of the attribute\n"); - return rc; + goto err; + } + + rc = secvar_sysfs_config(secvar_kobj); + if (rc) { + pr_err("Failed to create config directory\n"); + goto err; } - secvar_sysfs_load(); + if (secvar_ops->get_next) + rc = secvar_sysfs_load(); + else + rc = secvar_sysfs_load_static(); + + if (rc) { + pr_err("Failed to create variable attributes\n"); + goto err; + } + + // Due to sysfs limitations, we will only ever get a write buffer of + // up to 1 page in size. Print a warning if this is potentially going + // to cause problems, so that the user is aware. + secvar_ops->max_size(&max_size); + if (max_size > PAGE_SIZE) + pr_warn_ratelimited("PAGE_SIZE (%lu) is smaller than maximum object size (%llu), writes are limited to PAGE_SIZE\n", + PAGE_SIZE, max_size); return 0; +err: + kobject_put(secvar_kobj); + return rc; } late_initcall(secvar_sysfs_init); diff --git a/arch/powerpc/kernel/setup-common.c b/arch/powerpc/kernel/setup-common.c index 1a02629ec70b..68d47c53876c 100644 --- a/arch/powerpc/kernel/setup-common.c +++ b/arch/powerpc/kernel/setup-common.c @@ -18,20 +18,21 @@ #include <linux/delay.h> #include <linux/initrd.h> #include <linux/platform_device.h> +#include <linux/printk.h> #include <linux/seq_file.h> #include <linux/ioport.h> #include <linux/console.h> -#include <linux/screen_info.h> #include <linux/root_dev.h> #include <linux/cpu.h> #include <linux/unistd.h> +#include <linux/seq_buf.h> #include <linux/serial.h> #include <linux/serial_8250.h> #include <linux/percpu.h> #include <linux/memblock.h> -#include <linux/of_irq.h> +#include <linux/of.h> #include <linux/of_fdt.h> -#include <linux/of_platform.h> +#include <linux/of_irq.h> #include <linux/hugetlb.h> #include <linux/pgtable.h> #include <asm/io.h> @@ -57,6 +58,7 @@ #include <asm/xmon.h> #include <asm/cputhreads.h> #include <mm/mmu_decl.h> +#include <asm/archrandom.h> #include <asm/fadump.h> #include <asm/udbg.h> #include <asm/hugetlb.h> @@ -65,6 +67,7 @@ #include <asm/cpu_has_feature.h> #include <asm/kasan.h> #include <asm/mce.h> +#include <asm/systemcfg.h> #include "setup.h" @@ -83,6 +86,11 @@ EXPORT_SYMBOL(machine_id); int boot_cpuid = -1; EXPORT_SYMBOL_GPL(boot_cpuid); +int __initdata boot_core_hwid = -1; + +#ifdef CONFIG_PPC64 +int boot_cpu_hwid = -1; +#endif /* * These are used in binfmt_elf.c to put aux entries on the stack @@ -91,21 +99,6 @@ EXPORT_SYMBOL_GPL(boot_cpuid); int dcache_bsize; int icache_bsize; -/* - * This still seems to be needed... -- paulus - */ -struct screen_info screen_info = { - .orig_x = 0, - .orig_y = 25, - .orig_video_cols = 80, - .orig_video_lines = 25, - .orig_video_isVGA = 1, - .orig_video_points = 16 -}; -#if defined(CONFIG_FB_VGA16_MODULE) -EXPORT_SYMBOL(screen_info); -#endif - /* Variables required to store legacy IO irq routing */ int of_i8042_kbd_irq; EXPORT_SYMBOL_GPL(of_i8042_kbd_irq); @@ -118,7 +111,7 @@ int ppc_do_canonicalize_irqs; EXPORT_SYMBOL(ppc_do_canonicalize_irqs); #endif -#ifdef CONFIG_CRASH_CORE +#ifdef CONFIG_CRASH_DUMP /* This keeps a track of which one is the crashing cpu. */ int crashing_cpu = -1; #endif @@ -171,6 +164,14 @@ EXPORT_SYMBOL_GPL(machine_power_off); void (*pm_power_off)(void); EXPORT_SYMBOL_GPL(pm_power_off); +size_t __must_check arch_get_random_seed_longs(unsigned long *v, size_t max_longs) +{ + if (max_longs && ppc_md.get_random_seed && ppc_md.get_random_seed(v)) + return 1; + return 0; +} +EXPORT_SYMBOL(arch_get_random_seed_longs); + void machine_halt(void) { machine_shutdown(); @@ -405,13 +406,32 @@ static void __init cpu_init_thread_core_maps(int tpc) cpumask_set_cpu(i, &threads_core_mask); printk(KERN_INFO "CPU maps initialized for %d thread%s per core\n", - tpc, tpc > 1 ? "s" : ""); + tpc, str_plural(tpc)); printk(KERN_DEBUG " (thread shift is %d)\n", threads_shift); } u32 *cpu_to_phys_id = NULL; +static int assign_threads(unsigned int cpu, unsigned int nthreads, bool present, + const __be32 *hw_ids) +{ + for (int i = 0; i < nthreads && cpu < nr_cpu_ids; i++) { + __be32 hwid; + + hwid = be32_to_cpu(hw_ids[i]); + + DBG(" thread %d -> cpu %d (hard id %d)\n", i, cpu, hwid); + + set_cpu_present(cpu, present); + set_cpu_possible(cpu, true); + cpu_to_phys_id[cpu] = hwid; + cpu++; + } + + return cpu; +} + /** * setup_cpu_maps - initialize the following cpu maps: * cpu_possible_mask @@ -438,16 +458,13 @@ void __init smp_setup_cpu_maps(void) DBG("smp_setup_cpu_maps()\n"); - cpu_to_phys_id = memblock_alloc(nr_cpu_ids * sizeof(u32), + cpu_to_phys_id = memblock_alloc_or_panic(nr_cpu_ids * sizeof(u32), __alignof__(u32)); - if (!cpu_to_phys_id) - panic("%s: Failed to allocate %zu bytes align=0x%zx\n", - __func__, nr_cpu_ids * sizeof(u32), __alignof__(u32)); for_each_node_by_type(dn, "cpu") { const __be32 *intserv; __be32 cpu_be; - int j, len; + int len; DBG(" * %pOF...\n", dn); @@ -469,27 +486,31 @@ void __init smp_setup_cpu_maps(void) nthreads = len / sizeof(int); - for (j = 0; j < nthreads && cpu < nr_cpu_ids; j++) { - bool avail; - - DBG(" thread %d -> cpu %d (hard id %d)\n", - j, cpu, be32_to_cpu(intserv[j])); + bool avail = of_device_is_available(dn); + if (!avail) + avail = !of_property_match_string(dn, + "enable-method", "spin-table"); - avail = of_device_is_available(dn); - if (!avail) - avail = !of_property_match_string(dn, - "enable-method", "spin-table"); - - set_cpu_present(cpu, avail); - set_cpu_possible(cpu, true); - cpu_to_phys_id[cpu] = be32_to_cpu(intserv[j]); - cpu++; - } + if (boot_core_hwid >= 0) { + if (cpu == 0) { + pr_info("Skipping CPU node %pOF to allow for boot core.\n", dn); + cpu = nthreads; + continue; + } - if (cpu >= nr_cpu_ids) { + if (be32_to_cpu(intserv[0]) == boot_core_hwid) { + pr_info("Renumbered boot core %pOF to logical 0\n", dn); + assign_threads(0, nthreads, avail, intserv); + of_node_put(dn); + break; + } + } else if (cpu >= nr_cpu_ids) { of_node_put(dn); break; } + + if (cpu < nr_cpu_ids) + cpu = assign_threads(cpu, nthreads, avail, intserv); } /* If no SMT supported, nthreads is forced to 1 */ @@ -537,7 +558,9 @@ void __init smp_setup_cpu_maps(void) out: of_node_put(dn); } - vdso_data->processorCount = num_present_cpus(); +#endif +#ifdef CONFIG_PPC64_PROC_SYSTEMCFG + systemcfg->processorCount = num_present_cpus(); #endif /* CONFIG_PPC64 */ /* Initialize CPU <=> thread mapping/ @@ -580,6 +603,14 @@ static __init int add_pcspkr(void) device_initcall(add_pcspkr); #endif /* CONFIG_PCSPKR_PLATFORM */ +static char ppc_hw_desc_buf[128] __initdata; + +struct seq_buf ppc_hw_desc __initdata = { + .buffer = ppc_hw_desc_buf, + .size = sizeof(ppc_hw_desc_buf), + .len = 0, +}; + static __init void probe_machine(void) { extern struct machdep_calls __machine_desc_start; @@ -606,13 +637,16 @@ static __init void probe_machine(void) for (machine_id = &__machine_desc_start; machine_id < &__machine_desc_end; machine_id++) { - DBG(" %s ...", machine_id->name); + DBG(" %s ...\n", machine_id->name); + if (machine_id->compatible && !of_machine_is_compatible(machine_id->compatible)) + continue; + if (machine_id->compatibles && !of_machine_compatible_match(machine_id->compatibles)) + continue; memcpy(&ppc_md, machine_id, sizeof(struct machdep_calls)); - if (ppc_md.probe()) { - DBG(" match !\n"); - break; - } - DBG("\n"); + if (ppc_md.probe && !ppc_md.probe()) + continue; + DBG(" %s match !\n", machine_id->name); + break; } /* What can we do if we didn't find ? */ if (machine_id >= &__machine_desc_end) { @@ -620,7 +654,13 @@ static __init void probe_machine(void) for (;;); } - printk(KERN_INFO "Using %s machine description\n", ppc_md.name); + // Append the machine name to other info we've gathered + seq_buf_puts(&ppc_hw_desc, ppc_md.name); + + // Set the generic hardware description shown in oopses + dump_stack_set_arch_desc(ppc_hw_desc.buffer); + + pr_info("Hardware name: %s\n", ppc_hw_desc.buffer); } /* Match a class of boards, not a specific device configuration. */ @@ -791,8 +831,8 @@ static int __init check_cache_coherency(void) if (devtree_coherency != KERNEL_COHERENCY) { printk(KERN_ERR "kernel coherency:%s != device tree_coherency:%s\n", - KERNEL_COHERENCY ? "on" : "off", - devtree_coherency ? "on" : "off"); + str_on_off(KERNEL_COHERENCY), + str_on_off(devtree_coherency)); BUG(); } @@ -917,6 +957,7 @@ void __init setup_arch(char **cmdline_p) /* Parse memory topology */ mem_topology_setup(); + high_memory = (void *)__va(max_low_pfn * PAGE_SIZE); /* * Release secondary cpus out of their spinloops at 0x60 now that @@ -938,8 +979,12 @@ void __init setup_arch(char **cmdline_p) klp_init_thread_info(&init_task); setup_initial_init_mm(_stext, _etext, _edata, _end); - + /* sched_init() does the mmgrab(&init_mm) for the primary CPU */ + VM_WARN_ON(cpumask_test_cpu(smp_processor_id(), mm_cpumask(&init_mm))); + cpumask_set_cpu(smp_processor_id(), mm_cpumask(&init_mm)); + inc_mm_active_cpus(&init_mm); mm_iommu_init(&init_mm); + irqstack_early_init(); exc_lvl_early_init(); emergency_stack_init(); @@ -950,9 +995,11 @@ void __init setup_arch(char **cmdline_p) initmem_init(); /* - * Reserve large chunks of memory for use by CMA for KVM and hugetlb. These must - * be called after initmem_init(), so that pageblock_order is initialised. + * Reserve large chunks of memory for use by CMA for fadump, KVM and + * hugetlb. These must be called after initmem_init(), so that + * pageblock_order is initialised. */ + fadump_cma_init(); kvm_cma_reserve(); gigantic_hugetlb_cma_reserve(); diff --git a/arch/powerpc/kernel/setup.h b/arch/powerpc/kernel/setup.h index 93f22da12abe..385a00a2e2ca 100644 --- a/arch/powerpc/kernel/setup.h +++ b/arch/powerpc/kernel/setup.h @@ -23,13 +23,13 @@ void check_smt_enabled(void); static inline void check_smt_enabled(void) { } #endif -#if defined(CONFIG_PPC_BOOK3E) && defined(CONFIG_SMP) +#if defined(CONFIG_PPC_BOOK3E_64) && defined(CONFIG_SMP) void setup_tlb_core_data(void); #else static inline void setup_tlb_core_data(void) { } #endif -#ifdef CONFIG_BOOKE_OR_40x +#ifdef CONFIG_BOOKE void exc_lvl_early_init(void); #else static inline void exc_lvl_early_init(void) { } diff --git a/arch/powerpc/kernel/setup_32.c b/arch/powerpc/kernel/setup_32.c index 813261789303..5a1bf501fbe1 100644 --- a/arch/powerpc/kernel/setup_32.c +++ b/arch/powerpc/kernel/setup_32.c @@ -40,7 +40,7 @@ #include <asm/time.h> #include <asm/serial.h> #include <asm/udbg.h> -#include <asm/code-patching.h> +#include <asm/text-patching.h> #include <asm/cpu_has_feature.h> #include <asm/asm-prototypes.h> #include <asm/kdump.h> @@ -140,13 +140,7 @@ arch_initcall(ppc_init); static void *__init alloc_stack(void) { - void *ptr = memblock_alloc(THREAD_SIZE, THREAD_ALIGN); - - if (!ptr) - panic("cannot allocate %d bytes for stack at %pS\n", - THREAD_SIZE, (void *)_RET_IP_); - - return ptr; + return memblock_alloc_or_panic(THREAD_SIZE, THREAD_ALIGN); } void __init irqstack_early_init(void) @@ -176,7 +170,7 @@ void __init emergency_stack_init(void) } #endif -#ifdef CONFIG_BOOKE_OR_40x +#ifdef CONFIG_BOOKE void __init exc_lvl_early_init(void) { unsigned int i, hw_cpu; @@ -207,7 +201,7 @@ void __init setup_power_save(void) ppc_md.power_save = ppc6xx_idle; #endif -#ifdef CONFIG_E500 +#ifdef CONFIG_PPC_E500 if (cpu_has_feature(CPU_FTR_CAN_DOZE) || cpu_has_feature(CPU_FTR_CAN_NAP)) ppc_md.power_save = e500_idle; diff --git a/arch/powerpc/kernel/setup_64.c b/arch/powerpc/kernel/setup_64.c index 5761f08dae95..8fd7cbf3bd04 100644 --- a/arch/powerpc/kernel/setup_64.c +++ b/arch/powerpc/kernel/setup_64.c @@ -34,6 +34,7 @@ #include <linux/of.h> #include <linux/of_fdt.h> +#include <asm/asm-prototypes.h> #include <asm/kvm_guest.h> #include <asm/io.h> #include <asm/kdump.h> @@ -59,7 +60,7 @@ #include <asm/xmon.h> #include <asm/udbg.h> #include <asm/kexec.h> -#include <asm/code-patching.h> +#include <asm/text-patching.h> #include <asm/ftrace.h> #include <asm/opal.h> #include <asm/cputhreads.h> @@ -86,7 +87,7 @@ struct ppc64_caches ppc64_caches = { }; EXPORT_SYMBOL_GPL(ppc64_caches); -#if defined(CONFIG_PPC_BOOK3E) && defined(CONFIG_SMP) +#if defined(CONFIG_PPC_BOOK3E_64) && defined(CONFIG_SMP) void __init setup_tlb_core_data(void) { int cpu; @@ -113,7 +114,6 @@ void __init setup_tlb_core_data(void) * Should we panic instead? */ WARN_ONCE(smt_enabled_at_boot >= 2 && - !mmu_has_feature(MMU_FTR_USE_TLBRSRV) && book3e_htw_mode != PPC_HTW_E6500, "%s: unsupported MMU configuration\n", __func__); } @@ -141,10 +141,7 @@ void __init check_smt_enabled(void) smt_enabled_at_boot = 0; else { int smt; - int rc; - - rc = kstrtoint(smt_enabled_cmdline, 10, &smt); - if (!rc) + if (!kstrtoint(smt_enabled_cmdline, 10, &smt)) smt_enabled_at_boot = min(threads_per_core, smt); } @@ -177,14 +174,26 @@ early_param("smt-enabled", early_smt_enabled); #endif /* CONFIG_SMP */ /** Fix up paca fields required for the boot cpu */ -static void __init fixup_boot_paca(void) +static void __init fixup_boot_paca(struct paca_struct *boot_paca) { /* The boot cpu is started */ - get_paca()->cpu_start = 1; + boot_paca->cpu_start = 1; +#ifdef CONFIG_PPC_BOOK3S_64 + /* + * Give the early boot machine check stack somewhere to use, use + * half of the init stack. This is a bit hacky but there should not be + * deep stack usage in early init so shouldn't overflow it or overwrite + * things. + */ + boot_paca->mc_emergency_sp = (void *)&init_thread_union + + (THREAD_SIZE/2); +#endif /* Allow percpu accesses to work until we setup percpu data */ - get_paca()->data_offset = 0; - /* Mark interrupts disabled in PACA */ - irq_soft_mask_set(IRQS_DISABLED); + boot_paca->data_offset = 0; + /* Mark interrupts soft and hard disabled in PACA */ + boot_paca->irq_soft_mask = IRQS_DISABLED; + boot_paca->irq_happened = PACA_IRQ_HARD_DIS; + WARN_ON(mfmsr() & MSR_EE); } static void __init configure_exceptions(void) @@ -351,11 +360,15 @@ void __init early_setup(unsigned long dt_ptr) * what CPU we are on. */ initialise_paca(&boot_paca, 0); - setup_paca(&boot_paca); - fixup_boot_paca(); + fixup_boot_paca(&boot_paca); + WARN_ON(local_paca); + setup_paca(&boot_paca); /* install the paca into registers */ /* -------- printk is now safe to use ------- */ + if (IS_ENABLED(CONFIG_PPC_BOOK3S_64) && (mfmsr() & MSR_HV)) + enable_machine_check(); + /* Try new device tree based feature discovery ... */ if (!dt_cpu_ftrs_init(__va(dt_ptr))) /* Otherwise use the old style CPU table */ @@ -369,17 +382,21 @@ void __init early_setup(unsigned long dt_ptr) /* * Do early initialization using the flattened device * tree, such as retrieving the physical memory map or - * calculating/retrieving the hash table size. + * calculating/retrieving the hash table size, discover + * boot_cpuid and boot_cpu_hwid. */ early_init_devtree(__va(dt_ptr)); - /* Now we know the logical id of our boot cpu, setup the paca. */ - if (boot_cpuid != 0) { - /* Poison paca_ptrs[0] again if it's not the boot cpu */ - memset(&paca_ptrs[0], 0x88, sizeof(paca_ptrs[0])); - } - setup_paca(paca_ptrs[boot_cpuid]); - fixup_boot_paca(); + allocate_paca_ptrs(); + allocate_paca(boot_cpuid); + set_hard_smp_processor_id(boot_cpuid, boot_cpu_hwid); + fixup_boot_paca(paca_ptrs[boot_cpuid]); + setup_paca(paca_ptrs[boot_cpuid]); /* install the paca into registers */ + // smp_processor_id() now reports boot_cpuid + +#ifdef CONFIG_SMP + task_thread_info(current)->cpu = boot_cpuid; // fix task_cpu(current) +#endif /* * Configure exception handlers. This include setting up trampolines @@ -460,7 +477,7 @@ void early_setup_secondary(void) #endif /* CONFIG_SMP */ -void panic_smp_self_stop(void) +void __noreturn panic_smp_self_stop(void) { hard_irq_disable(); spin_begin(); @@ -674,13 +691,9 @@ void __init initialize_cache_info(void) */ __init u64 ppc64_bolted_size(void) { -#ifdef CONFIG_PPC_BOOK3E +#ifdef CONFIG_PPC_BOOK3E_64 /* Freescale BookE bolts the entire linear mapping */ - /* XXX: BookE ppc64_rma_limit setup seems to disagree? */ - if (early_mmu_has_feature(MMU_FTR_TYPE_FSL_E)) - return linear_map_top; - /* Other BookE, we assume the first GB is bolted */ - return 1ul << 30; + return linear_map_top; #else /* BookS radix, does not take faults on linear mapping */ if (early_radix_enabled()) @@ -724,7 +737,7 @@ void __init irqstack_early_init(void) } } -#ifdef CONFIG_PPC_BOOK3E +#ifdef CONFIG_PPC_BOOK3E_64 void __init exc_lvl_early_init(void) { unsigned int i; @@ -814,6 +827,7 @@ static __init int pcpu_cpu_to_node(int cpu) unsigned long __per_cpu_offset[NR_CPUS] __read_mostly; EXPORT_SYMBOL(__per_cpu_offset); +DEFINE_STATIC_KEY_FALSE(__percpu_first_chunk_is_paged); void __init setup_per_cpu_areas(void) { @@ -826,7 +840,7 @@ void __init setup_per_cpu_areas(void) /* * BookE and BookS radix are historical values and should be revisited. */ - if (IS_ENABLED(CONFIG_PPC_BOOK3E)) { + if (IS_ENABLED(CONFIG_PPC_BOOK3E_64)) { atom_size = SZ_1M; } else if (radix_enabled()) { atom_size = PAGE_SIZE; @@ -856,6 +870,7 @@ void __init setup_per_cpu_areas(void) if (rc < 0) panic("cannot initialize percpu area (err=%d)", rc); + static_key_enable(&__percpu_first_chunk_is_paged.key); delta = (unsigned long)pcpu_base_addr - (unsigned long)__per_cpu_start; for_each_possible_cpu(cpu) { __per_cpu_offset[cpu] = delta + pcpu_unit_offsets[cpu]; @@ -874,7 +889,7 @@ unsigned long memory_block_size_bytes(void) } #endif -#if defined(CONFIG_PPC_INDIRECT_PIO) || defined(CONFIG_PPC_INDIRECT_MMIO) +#ifdef CONFIG_PPC_INDIRECT_PIO struct ppc_pci_io ppc_pci_io; EXPORT_SYMBOL(ppc_pci_io); #endif @@ -902,6 +917,7 @@ static int __init disable_hardlockup_detector(void) hardlockup_detector_disable(); #else if (firmware_has_feature(FW_FEATURE_LPAR)) { + check_kvm_guest(); if (is_kvm_guest()) hardlockup_detector_disable(); } diff --git a/arch/powerpc/kernel/signal.c b/arch/powerpc/kernel/signal.c index 68a91e553e14..aa17e62f3754 100644 --- a/arch/powerpc/kernel/signal.c +++ b/arch/powerpc/kernel/signal.c @@ -1,12 +1,9 @@ +// SPDX-License-Identifier: GPL-2.0-or-later /* * Common signal handling code for both 32 and 64 bits * * Copyright (c) 2007 Benjamin Herrenschmidt, IBM Corporation * Extracted from signal_32.c and signal_64.c - * - * This file is subject to the terms and conditions of the GNU General - * Public License. See the file README.legal in the main directory of - * this archive for more details. */ #include <linux/resume_user_mode.h> diff --git a/arch/powerpc/kernel/signal.h b/arch/powerpc/kernel/signal.h index 618aeccdf691..58ecea1cdc27 100644 --- a/arch/powerpc/kernel/signal.h +++ b/arch/powerpc/kernel/signal.h @@ -1,10 +1,7 @@ -/* +/* SPDX-License-Identifier: GPL-2.0-or-later + * * Copyright (c) 2007 Benjamin Herrenschmidt, IBM Corporation * Extracted from signal_32.c and signal_64.c - * - * This file is subject to the terms and conditions of the GNU General - * Public License. See the file README.legal in the main directory of - * this archive for more details. */ #ifndef _POWERPC_ARCH_SIGNAL_H @@ -196,9 +193,6 @@ extern int handle_rt_signal64(struct ksignal *ksig, sigset_t *set, #else /* CONFIG_PPC64 */ -extern long sys_rt_sigreturn(void); -extern long sys_sigreturn(void); - static inline int handle_rt_signal64(struct ksignal *ksig, sigset_t *set, struct task_struct *tsk) { diff --git a/arch/powerpc/kernel/signal_32.c b/arch/powerpc/kernel/signal_32.c index 157a7403e3eb..7a718ed32b27 100644 --- a/arch/powerpc/kernel/signal_32.c +++ b/arch/powerpc/kernel/signal_32.c @@ -43,7 +43,7 @@ #include <asm/tm.h> #include <asm/asm-prototypes.h> #ifdef CONFIG_PPC64 -#include "ppc32.h" +#include <asm/syscalls_32.h> #include <asm/unistd.h> #else #include <asm/ucontext.h> @@ -264,8 +264,9 @@ static void prepare_save_user_regs(int ctx_has_vsx_region) #endif } -static int __unsafe_save_user_regs(struct pt_regs *regs, struct mcontext __user *frame, - struct mcontext __user *tm_frame, int ctx_has_vsx_region) +static __always_inline int +__unsafe_save_user_regs(struct pt_regs *regs, struct mcontext __user *frame, + struct mcontext __user *tm_frame, int ctx_has_vsx_region) { unsigned long msr = regs->msr; @@ -364,8 +365,9 @@ static void prepare_save_tm_user_regs(void) current->thread.ckvrsave = mfspr(SPRN_VRSAVE); } -static int save_tm_user_regs_unsafe(struct pt_regs *regs, struct mcontext __user *frame, - struct mcontext __user *tm_frame, unsigned long msr) +static __always_inline int +save_tm_user_regs_unsafe(struct pt_regs *regs, struct mcontext __user *frame, + struct mcontext __user *tm_frame, unsigned long msr) { /* Save both sets of general registers */ unsafe_save_general_regs(¤t->thread.ckpt_regs, frame, failed); @@ -444,8 +446,9 @@ failed: #else static void prepare_save_tm_user_regs(void) { } -static int save_tm_user_regs_unsafe(struct pt_regs *regs, struct mcontext __user *frame, - struct mcontext __user *tm_frame, unsigned long msr) +static __always_inline int +save_tm_user_regs_unsafe(struct pt_regs *regs, struct mcontext __user *frame, + struct mcontext __user *tm_frame, unsigned long msr) { return 0; } diff --git a/arch/powerpc/kernel/signal_64.c b/arch/powerpc/kernel/signal_64.c index 472596a109e2..86bb5bb4c143 100644 --- a/arch/powerpc/kernel/signal_64.c +++ b/arch/powerpc/kernel/signal_64.c @@ -377,9 +377,12 @@ static long notrace __unsafe_restore_sigcontext(struct task_struct *tsk, sigset_ unsafe_get_user(set->sig[0], &sc->oldmask, efault_out); /* - * Force reload of FP/VEC. - * This has to be done before copying stuff into tsk->thread.fpr/vr - * for the reasons explained in the previous comment. + * Force reload of FP/VEC/VSX so userspace sees any changes. + * Clear these bits from the user process' MSR before copying into the + * thread struct. If we are rescheduled or preempted and another task + * uses FP/VEC/VSX, and this process has the MSR bits set, then the + * context switch code will save the current CPU state into the + * thread_struct - possibly overwriting the data we are updating here. */ regs_set_return_msr(regs, regs->msr & ~(MSR_FP | MSR_FE0 | MSR_FE1 | MSR_VEC | MSR_VSX)); diff --git a/arch/powerpc/kernel/smp.c b/arch/powerpc/kernel/smp.c index bcefab484ea6..f59e4b9cc207 100644 --- a/arch/powerpc/kernel/smp.c +++ b/arch/powerpc/kernel/smp.c @@ -35,6 +35,7 @@ #include <linux/stackprotector.h> #include <linux/pgtable.h> #include <linux/clockchips.h> +#include <linux/kexec.h> #include <asm/ptrace.h> #include <linux/atomic.h> @@ -46,6 +47,7 @@ #include <asm/smp.h> #include <asm/time.h> #include <asm/machdep.h> +#include <asm/mmu_context.h> #include <asm/cputhreads.h> #include <asm/cputable.h> #include <asm/mpic.h> @@ -55,11 +57,13 @@ #endif #include <asm/vdso.h> #include <asm/debug.h> -#include <asm/kexec.h> #include <asm/cpu_has_feature.h> #include <asm/ftrace.h> #include <asm/kup.h> #include <asm/fadump.h> +#include <asm/systemcfg.h> + +#include <trace/events/ipi.h> #ifdef DEBUG #include <asm/udbg.h> @@ -74,10 +78,10 @@ static DEFINE_PER_CPU(int, cpu_state) = { 0 }; #endif struct task_struct *secondary_current; -bool has_big_cores; -bool coregroup_enabled; -bool thread_group_shares_l2; -bool thread_group_shares_l3; +bool has_big_cores __ro_after_init; +bool coregroup_enabled __ro_after_init; +bool thread_group_shares_l2 __ro_after_init; +bool thread_group_shares_l3 __ro_after_init; DEFINE_PER_CPU(cpumask_var_t, cpu_sibling_map); DEFINE_PER_CPU(cpumask_var_t, cpu_smallcore_map); @@ -90,15 +94,6 @@ EXPORT_PER_CPU_SYMBOL(cpu_l2_cache_map); EXPORT_PER_CPU_SYMBOL(cpu_core_map); EXPORT_SYMBOL_GPL(has_big_cores); -enum { -#ifdef CONFIG_SCHED_SMT - smt_idx, -#endif - cache_idx, - mc_idx, - die_idx, -}; - #define MAX_THREAD_LIST_SIZE 8 #define THREAD_GROUP_SHARE_L1 1 #define THREAD_GROUP_SHARE_L2_L3 2 @@ -289,7 +284,7 @@ void smp_muxed_ipi_set_message(int cpu, int msg) * Order previous accesses before accesses in the IPI handler. */ smp_mb(); - message[msg] = 1; + WRITE_ONCE(message[msg], 1); } void smp_muxed_ipi_message_pass(int cpu, int msg) @@ -348,7 +343,7 @@ irqreturn_t smp_ipi_demux_relaxed(void) if (all & IPI_MESSAGE(PPC_MSG_NMI_IPI)) nmi_ipi_action(0, NULL); #endif - } while (info->messages); + } while (READ_ONCE(info->messages)); return IRQ_HANDLED; } @@ -364,12 +359,12 @@ static inline void do_message_pass(int cpu, int msg) #endif } -void smp_send_reschedule(int cpu) +void arch_smp_send_reschedule(int cpu) { if (likely(smp_ops)) do_message_pass(cpu, PPC_MSG_RESCHEDULE); } -EXPORT_SYMBOL_GPL(smp_send_reschedule); +EXPORT_SYMBOL_GPL(arch_smp_send_reschedule); void arch_send_call_function_single_ipi(int cpu) { @@ -415,9 +410,9 @@ noinstr static void nmi_ipi_lock_start(unsigned long *flags) { raw_local_irq_save(*flags); hard_irq_disable(); - while (arch_atomic_cmpxchg(&__nmi_ipi_lock, 0, 1) == 1) { + while (raw_atomic_cmpxchg(&__nmi_ipi_lock, 0, 1) == 1) { raw_local_irq_restore(*flags); - spin_until_cond(arch_atomic_read(&__nmi_ipi_lock) == 0); + spin_until_cond(raw_atomic_read(&__nmi_ipi_lock) == 0); raw_local_irq_save(*flags); hard_irq_disable(); } @@ -425,15 +420,15 @@ noinstr static void nmi_ipi_lock_start(unsigned long *flags) noinstr static void nmi_ipi_lock(void) { - while (arch_atomic_cmpxchg(&__nmi_ipi_lock, 0, 1) == 1) - spin_until_cond(arch_atomic_read(&__nmi_ipi_lock) == 0); + while (raw_atomic_cmpxchg(&__nmi_ipi_lock, 0, 1) == 1) + spin_until_cond(raw_atomic_read(&__nmi_ipi_lock) == 0); } noinstr static void nmi_ipi_unlock(void) { smp_mb(); - WARN_ON(arch_atomic_read(&__nmi_ipi_lock) != 1); - arch_atomic_set(&__nmi_ipi_lock, 0); + WARN_ON(raw_atomic_read(&__nmi_ipi_lock) != 1); + raw_atomic_set(&__nmi_ipi_lock, 0); } noinstr static void nmi_ipi_unlock_end(unsigned long *flags) @@ -594,7 +589,7 @@ void smp_send_debugger_break(void) } #endif -#ifdef CONFIG_KEXEC_CORE +#ifdef CONFIG_CRASH_DUMP void crash_send_ipi(void (*crash_ipi_callback)(struct pt_regs *)) { int cpu; @@ -619,20 +614,6 @@ void crash_send_ipi(void (*crash_ipi_callback)(struct pt_regs *)) } #endif -#ifdef CONFIG_NMI_IPI -static void crash_stop_this_cpu(struct pt_regs *regs) -#else -static void crash_stop_this_cpu(void *dummy) -#endif -{ - /* - * Just busy wait here and avoid marking CPU as offline to ensure - * register data is captured appropriately. - */ - while (1) - cpu_relax(); -} - void crash_smp_send_stop(void) { static bool stopped = false; @@ -651,11 +632,14 @@ void crash_smp_send_stop(void) stopped = true; -#ifdef CONFIG_NMI_IPI - smp_send_nmi_ipi(NMI_IPI_ALL_OTHERS, crash_stop_this_cpu, 1000000); -#else - smp_call_function(crash_stop_this_cpu, NULL, 0); -#endif /* CONFIG_NMI_IPI */ +#ifdef CONFIG_CRASH_DUMP + if (kexec_crash_image) { + crash_kexec_prepare(); + return; + } +#endif + + smp_send_stop(); } #ifdef CONFIG_NMI_IPI @@ -719,7 +703,7 @@ static struct task_struct *current_set[NR_CPUS]; static void smp_store_cpu_info(int id) { per_cpu(cpu_pvr, id) = mfspr(SPRN_PVR); -#ifdef CONFIG_PPC_FSL_BOOK3E +#ifdef CONFIG_PPC_E500 per_cpu(next_tlbcam_idx, id) = (mfspr(SPRN_TLB1CFG) & TLBnCFG_N_ENTRY) - 1; #endif @@ -995,13 +979,13 @@ static int __init init_thread_group_cache_map(int cpu, int cache_property) return 0; } -static bool shared_caches; +static bool shared_caches __ro_after_init; #ifdef CONFIG_SCHED_SMT /* cpumask of CPUs with asymmetric SMT dependency */ static int powerpc_smt_flags(void) { - int flags = SD_SHARE_CPUCAPACITY | SD_SHARE_PKG_RESOURCES; + int flags = SD_SHARE_CPUCAPACITY | SD_SHARE_LLC; if (cpu_has_feature(CPU_FTR_ASYM_SMT)) { printk_once(KERN_INFO "Enabling Asymmetric SMT scheduling\n"); @@ -1012,6 +996,13 @@ static int powerpc_smt_flags(void) #endif /* + * On shared processor LPARs scheduled on a big core (which has two or more + * independent thread groups per core), prefer lower numbered CPUs, so + * that workload consolidates to lesser number of cores. + */ +static __ro_after_init DEFINE_STATIC_KEY_FALSE(splpar_asym_pack); + +/* * P9 has a slightly odd architecture where pairs of cores share an L2 cache. * This topology makes it *much* cheaper to migrate tasks between adjacent cores * since the migrated task remains cache hot. We want to take advantage of this @@ -1019,7 +1010,18 @@ static int powerpc_smt_flags(void) */ static int powerpc_shared_cache_flags(void) { - return SD_SHARE_PKG_RESOURCES; + if (static_branch_unlikely(&splpar_asym_pack)) + return SD_SHARE_LLC | SD_ASYM_PACKING; + + return SD_SHARE_LLC; +} + +static int powerpc_shared_proc_flags(void) +{ + if (static_branch_unlikely(&splpar_asym_pack)) + return SD_ASYM_PACKING; + + return 0; } /* @@ -1045,6 +1047,10 @@ static struct cpumask *cpu_coregroup_mask(int cpu) static bool has_coregroup_support(void) { + /* Coregroup identification not available on shared systems */ + if (is_shared_processor()) + return 0; + return coregroup_enabled; } @@ -1053,16 +1059,6 @@ static const struct cpumask *cpu_mc_mask(int cpu) return cpu_coregroup_mask(cpu); } -static struct sched_domain_topology_level powerpc_topology[] = { -#ifdef CONFIG_SCHED_SMT - { cpu_smt_mask, powerpc_smt_flags, SD_INIT_NAME(SMT) }, -#endif - { shared_cache_mask, powerpc_shared_cache_flags, SD_INIT_NAME(CACHE) }, - { cpu_mc_mask, SD_INIT_NAME(MC) }, - { cpu_cpu_mask, SD_INIT_NAME(DIE) }, - { NULL, }, -}; - static int __init init_big_cores(void) { int cpu; @@ -1096,7 +1092,7 @@ static int __init init_big_cores(void) void __init smp_prepare_cpus(unsigned int max_cpus) { - unsigned int cpu; + unsigned int cpu, num_threads; DBG("smp_prepare_cpus\n"); @@ -1163,9 +1159,15 @@ void __init smp_prepare_cpus(unsigned int max_cpus) if (smp_ops && smp_ops->probe) smp_ops->probe(); + + // Initalise the generic SMT topology support + num_threads = 1; + if (smt_enabled_at_boot) + num_threads = smt_enabled_at_boot; + cpu_smt_set_num_threads(num_threads, threads_per_core); } -void smp_prepare_boot_cpu(void) +void __init smp_prepare_boot_cpu(void) { BUG_ON(smp_processor_id() != boot_cpuid); #ifdef CONFIG_PPC64 @@ -1185,8 +1187,8 @@ int generic_cpu_disable(void) return -EBUSY; set_cpu_online(cpu, false); -#ifdef CONFIG_PPC64 - vdso_data->processorCount--; +#ifdef CONFIG_PPC64_PROC_SYSTEMCFG + systemcfg->processorCount--; #endif /* Update affinity of all IRQs previously aimed at this CPU */ irq_migrate_all_off_this_cpu(); @@ -1260,7 +1262,7 @@ static void cpu_idle_thread_init(unsigned int cpu, struct task_struct *idle) #ifdef CONFIG_PPC64 paca_ptrs[cpu]->__current = idle; paca_ptrs[cpu]->kstack = (unsigned long)task_stack_page(idle) + - THREAD_SIZE - STACK_FRAME_OVERHEAD; + THREAD_SIZE - STACK_FRAME_MIN_SIZE; #endif task_thread_info(idle)->cpu = cpu; secondary_current = current_set[cpu] = idle; @@ -1268,7 +1270,12 @@ static void cpu_idle_thread_init(unsigned int cpu, struct task_struct *idle) int __cpu_up(unsigned int cpu, struct task_struct *tidle) { - int rc, c; + const unsigned long boot_spin_ms = 5 * MSEC_PER_SEC; + const bool booting = system_state < SYSTEM_RUNNING; + const unsigned long hp_spin_ms = 1; + unsigned long deadline; + int rc; + const unsigned long spin_wait_ms = booting ? boot_spin_ms : hp_spin_ms; /* * Don't allow secondary threads to come online if inhibited @@ -1313,22 +1320,23 @@ int __cpu_up(unsigned int cpu, struct task_struct *tidle) } /* - * wait to see if the cpu made a callin (is actually up). - * use this value that I found through experimentation. - * -- Cort + * At boot time, simply spin on the callin word until the + * deadline passes. + * + * At run time, spin for an optimistic amount of time to avoid + * sleeping in the common case. */ - if (system_state < SYSTEM_RUNNING) - for (c = 50000; c && !cpu_callin_map[cpu]; c--) - udelay(100); -#ifdef CONFIG_HOTPLUG_CPU - else - /* - * CPUs can take much longer to come up in the - * hotplug case. Wait five seconds. - */ - for (c = 5000; c && !cpu_callin_map[cpu]; c--) - msleep(1); -#endif + deadline = jiffies + msecs_to_jiffies(spin_wait_ms); + spin_until_cond(cpu_callin_map[cpu] || time_is_before_jiffies(deadline)); + + if (!cpu_callin_map[cpu] && system_state >= SYSTEM_RUNNING) { + const unsigned long sleep_interval_us = 10 * USEC_PER_MSEC; + const unsigned long sleep_wait_ms = 100 * MSEC_PER_SEC; + + deadline = jiffies + msecs_to_jiffies(sleep_wait_ms); + while (!cpu_callin_map[cpu] && time_is_after_jiffies(deadline)) + fsleep(sleep_interval_us); + } if (!cpu_callin_map[cpu]) { printk(KERN_ERR "Processor %u is stuck.\n", cpu); @@ -1560,7 +1568,7 @@ static void add_cpu_to_masks(int cpu) /* * This CPU will not be in the online mask yet so we need to manually - * add it to it's own thread sibling mask. + * add it to its own thread sibling mask. */ map_cpu_to_node(cpu, cpu_to_node(cpu)); cpumask_set_cpu(cpu, cpu_sibling_mask(cpu)); @@ -1591,7 +1599,7 @@ static void add_cpu_to_masks(int cpu) /* Skip all CPUs already part of current CPU core mask */ cpumask_andnot(mask, cpu_online_mask, cpu_core_mask(cpu)); - /* If chip_id is -1; limit the cpu_core_mask to within DIE*/ + /* If chip_id is -1; limit the cpu_core_mask to within PKG */ if (chip_id == -1) cpumask_and(mask, mask, cpu_cpu_mask(cpu)); @@ -1608,6 +1616,7 @@ static void add_cpu_to_masks(int cpu) } /* Activate a secondary processor. */ +__no_stack_protector void start_secondary(void *unused) { unsigned int cpu = raw_smp_processor_id(); @@ -1616,12 +1625,15 @@ void start_secondary(void *unused) if (IS_ENABLED(CONFIG_PPC32)) setup_kup(); - mmgrab(&init_mm); + mmgrab_lazy_tlb(&init_mm); current->active_mm = &init_mm; + VM_WARN_ON(cpumask_test_cpu(smp_processor_id(), mm_cpumask(&init_mm))); + cpumask_set_cpu(cpu, mm_cpumask(&init_mm)); + inc_mm_active_cpus(&init_mm); smp_store_cpu_info(cpu); set_dec(tb_ticks_per_jiffy); - rcu_cpu_starting(cpu); + rcutree_report_cpu_starting(cpu); cpu_callin_map[cpu] = 1; if (smp_ops->setup_cpu) @@ -1631,10 +1643,12 @@ void start_secondary(void *unused) secondary_cpu_time_init(); -#ifdef CONFIG_PPC64 +#ifdef CONFIG_PPC64_PROC_SYSTEMCFG if (system_state == SYSTEM_RUNNING) - vdso_data->processorCount++; + systemcfg->processorCount++; +#endif +#ifdef CONFIG_PPC64 vdso_getcpu_init(); #endif set_numa_node(numa_cpu_lookup_table[cpu]); @@ -1674,50 +1688,40 @@ void start_secondary(void *unused) BUG(); } -#ifdef CONFIG_PROFILING -int setup_profiling_timer(unsigned int multiplier) -{ - return 0; -} -#endif +static struct sched_domain_topology_level powerpc_topology[6]; -static void __init fixup_topology(void) +static void __init build_sched_topology(void) { - int i; + int i = 0; + + if (is_shared_processor() && has_big_cores) + static_branch_enable(&splpar_asym_pack); #ifdef CONFIG_SCHED_SMT if (has_big_cores) { pr_info("Big cores detected but using small core scheduling\n"); - powerpc_topology[smt_idx].mask = smallcore_smt_mask; + powerpc_topology[i++] = + SDTL_INIT(smallcore_smt_mask, powerpc_smt_flags, SMT); + } else { + powerpc_topology[i++] = SDTL_INIT(cpu_smt_mask, powerpc_smt_flags, SMT); } #endif + if (shared_caches) { + powerpc_topology[i++] = + SDTL_INIT(shared_cache_mask, powerpc_shared_cache_flags, CACHE); + } - if (!has_coregroup_support()) - powerpc_topology[mc_idx].mask = powerpc_topology[cache_idx].mask; - - /* - * Try to consolidate topology levels here instead of - * allowing scheduler to degenerate. - * - Dont consolidate if masks are different. - * - Dont consolidate if sd_flags exists and are different. - */ - for (i = 1; i <= die_idx; i++) { - if (powerpc_topology[i].mask != powerpc_topology[i - 1].mask) - continue; + if (has_coregroup_support()) { + powerpc_topology[i++] = + SDTL_INIT(cpu_mc_mask, powerpc_shared_proc_flags, MC); + } - if (powerpc_topology[i].sd_flags && powerpc_topology[i - 1].sd_flags && - powerpc_topology[i].sd_flags != powerpc_topology[i - 1].sd_flags) - continue; + powerpc_topology[i++] = SDTL_INIT(cpu_cpu_mask, powerpc_shared_proc_flags, PKG); - if (!powerpc_topology[i - 1].sd_flags) - powerpc_topology[i - 1].sd_flags = powerpc_topology[i].sd_flags; + /* There must be one trailing NULL entry left. */ + BUG_ON(i >= ARRAY_SIZE(powerpc_topology) - 1); - powerpc_topology[i].mask = powerpc_topology[i + 1].mask; - powerpc_topology[i].sd_flags = powerpc_topology[i + 1].sd_flags; -#ifdef CONFIG_SCHED_DEBUG - powerpc_topology[i].name = powerpc_topology[i + 1].name; -#endif - } + set_sched_topology(powerpc_topology); } void __init smp_cpus_done(unsigned int max_cpus) @@ -1732,9 +1736,20 @@ void __init smp_cpus_done(unsigned int max_cpus) smp_ops->bringup_done(); dump_numa_cpu_topology(); + build_sched_topology(); +} - fixup_topology(); - set_sched_topology(powerpc_topology); +/* + * For asym packing, by default lower numbered CPU has higher priority. + * On shared processors, pack to lower numbered core. However avoid moving + * between thread_groups within the same core. + */ +int arch_asym_cpu_priority(int cpu) +{ + if (static_branch_unlikely(&splpar_asym_pack)) + return -cpu / threads_per_core; + + return -cpu; } #ifdef CONFIG_HOTPLUG_CPU @@ -1760,11 +1775,19 @@ int __cpu_disable(void) void __cpu_die(unsigned int cpu) { + /* + * This could perhaps be a generic call in idlea_task_dead(), but + * that requires testing from all archs, so first put it here to + */ + VM_WARN_ON_ONCE(!cpumask_test_cpu(cpu, mm_cpumask(&init_mm))); + dec_mm_active_cpus(&init_mm); + cpumask_clear_cpu(cpu, mm_cpumask(&init_mm)); + if (smp_ops->cpu_die) smp_ops->cpu_die(cpu); } -void arch_cpu_idle_dead(void) +void __noreturn arch_cpu_idle_dead(void) { /* * Disable on the down path. This will be re-enabled by diff --git a/arch/powerpc/kernel/stacktrace.c b/arch/powerpc/kernel/stacktrace.c index a2443d61728e..90882b5175cd 100644 --- a/arch/powerpc/kernel/stacktrace.c +++ b/arch/powerpc/kernel/stacktrace.c @@ -21,6 +21,7 @@ #include <asm/processor.h> #include <linux/ftrace.h> #include <asm/kprobes.h> +#include <linux/rethook.h> #include <asm/paca.h> @@ -43,7 +44,7 @@ void __no_sanitize_address arch_stack_walk(stack_trace_consume_fn consume_entry, unsigned long *stack = (unsigned long *) sp; unsigned long newsp, ip; - if (!validate_sp(sp, task, STACK_FRAME_OVERHEAD)) + if (!validate_sp(sp, task)) return; newsp = stack[0]; @@ -73,29 +74,12 @@ int __no_sanitize_address arch_stack_walk_reliable(stack_trace_consume_fn consum bool firstframe; stack_end = stack_page + THREAD_SIZE; - if (!is_idle_task(task)) { - /* - * For user tasks, this is the SP value loaded on - * kernel entry, see "PACAKSAVE(r13)" in _switch() and - * system_call_common()/EXCEPTION_PROLOG_COMMON(). - * - * Likewise for non-swapper kernel threads, - * this also happens to be the top of the stack - * as setup by copy_thread(). - * - * Note that stack backlinks are not properly setup by - * copy_thread() and thus, a forked task() will have - * an unreliable stack trace until it's been - * _switch()'ed to for the first time. - */ - stack_end -= STACK_FRAME_OVERHEAD + sizeof(struct pt_regs); - } else { - /* - * idle tasks have a custom stack layout, - * c.f. cpu_idle_thread_init(). - */ - stack_end -= STACK_FRAME_OVERHEAD; - } + + // See copy_thread() for details. + if (task->flags & PF_KTHREAD) + stack_end -= STACK_FRAME_MIN_SIZE; + else + stack_end -= STACK_USER_INT_FRAME_SIZE; if (task == current) sp = current_stack_frame(); @@ -136,7 +120,7 @@ int __no_sanitize_address arch_stack_walk_reliable(stack_trace_consume_fn consum /* Mark stacktraces with exception frames as unreliable. */ if (sp <= stack_end - STACK_INT_FRAME_SIZE && - stack[STACK_FRAME_MARKER] == STACK_FRAME_REGS_MARKER) { + stack[STACK_INT_FRAME_MARKER_LONGS] == STACK_FRAME_REGS_MARKER) { return -EINVAL; } @@ -150,12 +134,13 @@ int __no_sanitize_address arch_stack_walk_reliable(stack_trace_consume_fn consum * arch-dependent code, they are generic. */ ip = ftrace_graph_ret_addr(task, &graph_idx, ip, stack); -#ifdef CONFIG_KPROBES + /* * Mark stacktraces with kretprobed functions on them * as unreliable. */ - if (ip == (unsigned long)__kretprobe_trampoline) +#ifdef CONFIG_RETHOOK + if (ip == (unsigned long)arch_rethook_trampoline) return -EINVAL; #endif @@ -221,8 +206,8 @@ static void raise_backtrace_ipi(cpumask_t *mask) } } -void arch_trigger_cpumask_backtrace(const cpumask_t *mask, bool exclude_self) +void arch_trigger_cpumask_backtrace(const cpumask_t *mask, int exclude_cpu) { - nmi_trigger_cpumask_backtrace(mask, exclude_self, raise_backtrace_ipi); + nmi_trigger_cpumask_backtrace(mask, exclude_cpu, raise_backtrace_ipi); } #endif /* defined(CONFIG_PPC_BOOK3S_64) && defined(CONFIG_NMI_IPI) */ diff --git a/arch/powerpc/kernel/static_call.c b/arch/powerpc/kernel/static_call.c index 863a7aa24650..ec3101f95e53 100644 --- a/arch/powerpc/kernel/static_call.c +++ b/arch/powerpc/kernel/static_call.c @@ -2,32 +2,60 @@ #include <linux/memory.h> #include <linux/static_call.h> -#include <asm/code-patching.h> +#include <asm/text-patching.h> void arch_static_call_transform(void *site, void *tramp, void *func, bool tail) { int err; bool is_ret0 = (func == __static_call_return0); - unsigned long target = (unsigned long)(is_ret0 ? tramp + PPC_SCT_RET0 : func); - bool is_short = is_offset_in_branch_range((long)target - (long)tramp); - - if (!tramp) - return; + unsigned long _tramp = (unsigned long)tramp; + unsigned long _func = (unsigned long)func; + unsigned long _ret0 = _tramp + PPC_SCT_RET0; + bool is_short = is_offset_in_branch_range((long)func - (long)(site ? : tramp)); mutex_lock(&text_mutex); - if (func && !is_short) { - err = patch_instruction(tramp + PPC_SCT_DATA, ppc_inst(target)); - if (err) - goto out; + if (site && tail) { + if (!func) + err = patch_instruction(site, ppc_inst(PPC_RAW_BLR())); + else if (is_ret0) + err = patch_branch(site, _ret0, 0); + else if (is_short) + err = patch_branch(site, _func, 0); + else if (tramp) + err = patch_branch(site, _tramp, 0); + else + err = 0; + } else if (site) { + if (!func) + err = patch_instruction(site, ppc_inst(PPC_RAW_NOP())); + else if (is_ret0) + err = patch_instruction(site, ppc_inst(PPC_RAW_LI(_R3, 0))); + else if (is_short) + err = patch_branch(site, _func, BRANCH_SET_LINK); + else if (tramp) + err = patch_branch(site, _tramp, BRANCH_SET_LINK); + else + err = 0; + } else if (tramp) { + if (func && !is_short) { + err = patch_ulong(tramp + PPC_SCT_DATA, _func); + if (err) + goto out; + } + + if (!func) + err = patch_instruction(tramp, ppc_inst(PPC_RAW_BLR())); + else if (is_ret0) + err = patch_branch(tramp, _ret0, 0); + else if (is_short) + err = patch_branch(tramp, _func, 0); + else + err = patch_instruction(tramp, ppc_inst(PPC_RAW_NOP())); + } else { + err = 0; } - if (!func) - err = patch_instruction(tramp, ppc_inst(PPC_RAW_BLR())); - else if (is_short) - err = patch_branch(tramp, target, 0); - else - err = patch_instruction(tramp, ppc_inst(PPC_RAW_NOP())); out: mutex_unlock(&text_mutex); diff --git a/arch/powerpc/kernel/entry_64.S b/arch/powerpc/kernel/switch.S index 01ace4c56104..59e3ee99db0e 100644 --- a/arch/powerpc/kernel/entry_64.S +++ b/arch/powerpc/kernel/switch.S @@ -1,55 +1,21 @@ /* SPDX-License-Identifier: GPL-2.0-or-later */ -/* - * PowerPC version - * Copyright (C) 1995-1996 Gary Thomas (gdt@linuxppc.org) - * Rewritten by Cort Dougan (cort@cs.nmt.edu) for PReP - * Copyright (C) 1996 Cort Dougan <cort@cs.nmt.edu> - * Adapted for Power Macintosh by Paul Mackerras. - * Low-level exception handlers and MMU support - * rewritten by Paul Mackerras. - * Copyright (C) 1996 Paul Mackerras. - * MPC8xx modifications Copyright (C) 1997 Dan Malek (dmalek@jlc.net). - * - * This file contains the system call entry code, context switch - * code, and exception/interrupt return code for PowerPC. - */ - -#include <linux/errno.h> -#include <linux/err.h> -#include <asm/cache.h> -#include <asm/unistd.h> -#include <asm/processor.h> -#include <asm/page.h> -#include <asm/mmu.h> -#include <asm/thread_info.h> +#include <linux/objtool.h> +#include <asm/asm-offsets.h> #include <asm/code-patching-asm.h> +#include <asm/mmu.h> #include <asm/ppc_asm.h> -#include <asm/asm-offsets.h> -#include <asm/cputable.h> -#include <asm/firmware.h> -#include <asm/bug.h> -#include <asm/ptrace.h> -#include <asm/irqflags.h> -#include <asm/hw_irq.h> -#include <asm/context_tracking.h> -#include <asm/ppc-opcode.h> -#include <asm/barrier.h> -#include <asm/export.h> -#include <asm/asm-compat.h> -#ifdef CONFIG_PPC_BOOK3S -#include <asm/exception-64s.h> -#else -#include <asm/exception-64e.h> -#endif -#include <asm/feature-fixups.h> #include <asm/kup.h> +#include <asm/thread_info.h> -/* - * System calls. - */ - .section ".text" +.section ".text","ax",@progbits #ifdef CONFIG_PPC_BOOK3S_64 +/* + * Cancel all explict user streams as they will have no use after context + * switch and will stop the HW from creating streams itself + */ +#define STOP_STREAMS \ + DCBT_BOOK3S_STOP_ALL_STREAM_IDS(r6) #define FLUSH_COUNT_CACHE \ 1: nop; \ @@ -103,87 +69,13 @@ flush_branch_caches: .endr blr -#else -#define FLUSH_COUNT_CACHE -#endif /* CONFIG_PPC_BOOK3S_64 */ +#ifdef CONFIG_PPC_64S_HASH_MMU +.balign 32 /* - * This routine switches between two different tasks. The process - * state of one is saved on its kernel stack. Then the state - * of the other is restored from its kernel stack. The memory - * management hardware is updated to the second process's state. - * Finally, we can return to the second process, via interrupt_return. - * On entry, r3 points to the THREAD for the current task, r4 - * points to the THREAD for the new task. - * - * Note: there are two ways to get to the "going out" portion - * of this code; either by coming in via the entry (_switch) - * or via "fork" which must set up an environment equivalent - * to the "_switch" path. If you change this you'll have to change - * the fork code also. - * - * The code which creates the new task context is in 'copy_thread' - * in arch/powerpc/kernel/process.c + * New stack pointer in r8, old stack pointer in r1, must not clobber r3 */ - .align 7 -_GLOBAL(_switch) - mflr r0 - std r0,16(r1) - stdu r1,-SWITCH_FRAME_SIZE(r1) - /* r3-r13 are caller saved -- Cort */ - SAVE_NVGPRS(r1) - std r0,_NIP(r1) /* Return to switch caller */ - mfcr r23 - std r23,_CCR(r1) - std r1,KSP(r3) /* Set old stack pointer */ - - kuap_check_amr r9, r10 - - FLUSH_COUNT_CACHE /* Clobbers r9, ctr */ - - /* - * On SMP kernels, care must be taken because a task may be - * scheduled off CPUx and on to CPUy. Memory ordering must be - * considered. - * - * Cacheable stores on CPUx will be visible when the task is - * scheduled on CPUy by virtue of the core scheduler barriers - * (see "Notes on Program-Order guarantees on SMP systems." in - * kernel/sched/core.c). - * - * Uncacheable stores in the case of involuntary preemption must - * be taken care of. The smp_mb__after_spinlock() in __schedule() - * is implemented as hwsync on powerpc, which orders MMIO too. So - * long as there is an hwsync in the context switch path, it will - * be executed on the source CPU after the task has performed - * all MMIO ops on that CPU, and on the destination CPU before the - * task performs any MMIO ops there. - */ - - /* - * The kernel context switch path must contain a spin_lock, - * which contains larx/stcx, which will clear any reservation - * of the task being switched. - */ -#ifdef CONFIG_PPC_BOOK3S -/* Cancel all explict user streams as they will have no use after context - * switch and will stop the HW from creating streams itself - */ - DCBT_BOOK3S_STOP_ALL_STREAM_IDS(r6) -#endif - - addi r6,r4,-THREAD /* Convert THREAD to 'current' */ - std r6,PACACURRENT(r13) /* Set new 'current' */ -#if defined(CONFIG_STACKPROTECTOR) - ld r6, TASK_CANARY(r6) - std r6, PACA_CANARY(r13) -#endif - - ld r8,KSP(r4) /* new stack pointer */ -#ifdef CONFIG_PPC_64S_HASH_MMU -BEGIN_MMU_FTR_SECTION - b 2f -END_MMU_FTR_SECTION_IFSET(MMU_FTR_TYPE_RADIX) +pin_stack_slb: BEGIN_FTR_SECTION clrrdi r6,r8,28 /* get its ESID */ clrrdi r9,r1,28 /* get current sp ESID */ @@ -231,15 +123,57 @@ BEGIN_FTR_SECTION END_FTR_SECTION_IFCLR(CPU_FTR_ARCH_207S) slbmte r7,r0 isync -2: +2: blr + .size pin_stack_slb,.-pin_stack_slb #endif /* CONFIG_PPC_64S_HASH_MMU */ - clrrdi r7, r8, THREAD_SHIFT /* base of new stack */ - /* Note: this uses SWITCH_FRAME_SIZE rather than INT_FRAME_SIZE - because we don't need to leave the 288-byte ABI gap at the - top of the kernel stack. */ +#else +#define STOP_STREAMS +#define FLUSH_COUNT_CACHE +#endif /* CONFIG_PPC_BOOK3S_64 */ + +/* + * do_switch_32/64 have the same calling convention as _switch, i.e., r3,r4 + * are prev and next thread_struct *, and returns prev task_struct * in r3. + + * This switches the stack, current, and does other task switch housekeeping. + */ +.macro do_switch_32 + tophys(r0,r4) + mtspr SPRN_SPRG_THREAD,r0 /* Update current THREAD phys addr */ + lwz r1,KSP(r4) /* Load new stack pointer */ + + /* save the old current 'last' for return value */ + mr r3,r2 + addi r2,r4,-THREAD /* Update current */ +.endm + +.macro do_switch_64 + ld r8,KSP(r4) /* Load new stack pointer */ + + kuap_check_amr r9, r10 + + FLUSH_COUNT_CACHE /* Clobbers r9, ctr */ + + STOP_STREAMS /* Clobbers r6 */ + + addi r3,r3,-THREAD /* old thread -> task_struct for return value */ + addi r6,r4,-THREAD /* new thread -> task_struct */ + std r6,PACACURRENT(r13) /* Set new task_struct to 'current' */ +#if defined(CONFIG_STACKPROTECTOR) + ld r6, TASK_CANARY(r6) + std r6, PACA_CANARY(r13) +#endif + /* Set new PACAKSAVE */ + clrrdi r7,r8,THREAD_SHIFT /* base of new stack */ addi r7,r7,THREAD_SIZE-SWITCH_FRAME_SIZE + std r7,PACAKSAVE(r13) +#ifdef CONFIG_PPC_64S_HASH_MMU +BEGIN_MMU_FTR_SECTION + bl pin_stack_slb +END_MMU_FTR_SECTION_IFCLR(MMU_FTR_TYPE_RADIX) +#endif /* * PMU interrupts in radix may come in here. They will use r1, not * PACAKSAVE, so this stack switch will not cause a problem. They @@ -249,81 +183,75 @@ END_FTR_SECTION_IFCLR(CPU_FTR_ARCH_207S) * active on the new CPU, will order those stores. */ mr r1,r8 /* start using new stack pointer */ - std r7,PACAKSAVE(r13) +.endm + +/* + * This routine switches between two different tasks. The process + * state of one is saved on its kernel stack. Then the state + * of the other is restored from its kernel stack. The memory + * management hardware is updated to the second process's state. + * Finally, we can return to the second process. + * On entry, r3 points to the THREAD for the current task, r4 + * points to the THREAD for the new task. + * + * This routine is always called with interrupts disabled. + * + * Note: there are two ways to get to the "going out" portion + * of this code; either by coming in via the entry (_switch) + * or via "fork" which must set up an environment equivalent + * to the "_switch" path. If you change this , you'll have to + * change the fork code also. + * + * The code which creates the new task context is in 'copy_thread' + * in arch/ppc/kernel/process.c + * + * Note: this uses SWITCH_FRAME_SIZE rather than USER_INT_FRAME_SIZE + * because we don't need to leave the redzone ABI gap at the top of + * the kernel stack. + */ +_GLOBAL(_switch) + PPC_CREATE_STACK_FRAME(SWITCH_FRAME_SIZE) + PPC_STL r1,KSP(r3) /* Set old stack pointer */ + SAVE_NVGPRS(r1) /* volatiles are caller-saved -- Cort */ + PPC_STL r0,_NIP(r1) /* Return to switch caller */ + mfcr r0 + stw r0,_CCR(r1) - ld r6,_CCR(r1) - mtcrf 0xFF,r6 + /* + * On SMP kernels, care must be taken because a task may be + * scheduled off CPUx and on to CPUy. Memory ordering must be + * considered. + * + * Cacheable stores on CPUx will be visible when the task is + * scheduled on CPUy by virtue of the core scheduler barriers + * (see "Notes on Program-Order guarantees on SMP systems." in + * kernel/sched/core.c). + * + * Uncacheable stores in the case of involuntary preemption must + * be taken care of. The smp_mb__after_spinlock() in __schedule() + * is implemented as hwsync on powerpc, which orders MMIO too. So + * long as there is an hwsync in the context switch path, it will + * be executed on the source CPU after the task has performed + * all MMIO ops on that CPU, and on the destination CPU before the + * task performs any MMIO ops there. + */ + + /* + * The kernel context switch path must contain a spin_lock, + * which contains larx/stcx, which will clear any reservation + * of the task being switched. + */ - /* r3-r13 are destroyed -- Cort */ - REST_NVGPRS(r1) +#ifdef CONFIG_PPC32 + do_switch_32 +#else + do_switch_64 +#endif - /* convert old thread to its task_struct for return value */ - addi r3,r3,-THREAD - ld r7,_NIP(r1) /* Return to _switch caller in new task */ - mtlr r7 + lwz r0,_CCR(r1) + mtcrf 0xFF,r0 + REST_NVGPRS(r1) /* volatiles are destroyed -- Cort */ + PPC_LL r0,_NIP(r1) /* Return to _switch caller in new task */ + mtlr r0 addi r1,r1,SWITCH_FRAME_SIZE blr - -_GLOBAL(enter_prom) - mflr r0 - std r0,16(r1) - stdu r1,-SWITCH_FRAME_SIZE(r1) /* Save SP and create stack space */ - - /* Because PROM is running in 32b mode, it clobbers the high order half - * of all registers that it saves. We therefore save those registers - * PROM might touch to the stack. (r0, r3-r13 are caller saved) - */ - SAVE_GPR(2, r1) - SAVE_GPR(13, r1) - SAVE_NVGPRS(r1) - mfcr r10 - mfmsr r11 - std r10,_CCR(r1) - std r11,_MSR(r1) - - /* Put PROM address in SRR0 */ - mtsrr0 r4 - - /* Setup our trampoline return addr in LR */ - bcl 20,31,$+4 -0: mflr r4 - addi r4,r4,(1f - 0b) - mtlr r4 - - /* Prepare a 32-bit mode big endian MSR - */ -#ifdef CONFIG_PPC_BOOK3E - rlwinm r11,r11,0,1,31 - mtsrr1 r11 - rfi -#else /* CONFIG_PPC_BOOK3E */ - LOAD_REG_IMMEDIATE(r12, MSR_SF | MSR_LE) - andc r11,r11,r12 - mtsrr1 r11 - RFI_TO_KERNEL -#endif /* CONFIG_PPC_BOOK3E */ - -1: /* Return from OF */ - FIXUP_ENDIAN - - /* Just make sure that r1 top 32 bits didn't get - * corrupt by OF - */ - rldicl r1,r1,0,32 - - /* Restore the MSR (back to 64 bits) */ - ld r0,_MSR(r1) - MTMSRD(r0) - isync - - /* Restore other registers */ - REST_GPR(2, r1) - REST_GPR(13, r1) - REST_NVGPRS(r1) - ld r4,_CCR(r1) - mtcr r4 - - addi r1,r1,SWITCH_FRAME_SIZE - ld r0,16(r1) - mtlr r0 - blr diff --git a/arch/powerpc/kernel/swsusp_32.S b/arch/powerpc/kernel/swsusp_32.S index e0cbd63007f2..ffb79326483c 100644 --- a/arch/powerpc/kernel/swsusp_32.S +++ b/arch/powerpc/kernel/swsusp_32.S @@ -1,5 +1,7 @@ /* SPDX-License-Identifier: GPL-2.0 */ #include <linux/threads.h> +#include <linux/linkage.h> + #include <asm/processor.h> #include <asm/page.h> #include <asm/cputable.h> @@ -400,7 +402,7 @@ _ASM_NOKPROBE_SYMBOL(swsusp_arch_resume) /* FIXME:This construct is actually not useful since we don't shut * down the instruction MMU, we could just flip back MSR-DR on. */ -turn_on_mmu: +SYM_FUNC_START_LOCAL(turn_on_mmu) mflr r4 mtsrr0 r4 mtsrr1 r3 @@ -408,4 +410,5 @@ turn_on_mmu: isync rfi _ASM_NOKPROBE_SYMBOL(turn_on_mmu) +SYM_FUNC_END(turn_on_mmu) diff --git a/arch/powerpc/kernel/swsusp_64.c b/arch/powerpc/kernel/swsusp_64.c index 16ee3baaf09a..50fa8fc9ef95 100644 --- a/arch/powerpc/kernel/swsusp_64.c +++ b/arch/powerpc/kernel/swsusp_64.c @@ -11,6 +11,8 @@ #include <linux/interrupt.h> #include <linux/nmi.h> +void do_after_copyback(void); + void do_after_copyback(void) { iommu_restore(); diff --git a/arch/powerpc/kernel/swsusp_booke.S b/arch/powerpc/kernel/swsusp_85xx.S index 88cfdbd530f1..88cfdbd530f1 100644 --- a/arch/powerpc/kernel/swsusp_booke.S +++ b/arch/powerpc/kernel/swsusp_85xx.S diff --git a/arch/powerpc/kernel/swsusp_asm64.S b/arch/powerpc/kernel/swsusp_asm64.S index 9f1903c7f540..f645652c2654 100644 --- a/arch/powerpc/kernel/swsusp_asm64.S +++ b/arch/powerpc/kernel/swsusp_asm64.S @@ -76,16 +76,10 @@ swsusp_save_area: .space SL_SIZE - .section ".toc","aw" -swsusp_save_area_ptr: - .tc swsusp_save_area[TC],swsusp_save_area -restore_pblist_ptr: - .tc restore_pblist[TC],restore_pblist - .section .text .align 5 _GLOBAL(swsusp_arch_suspend) - ld r11,swsusp_save_area_ptr@toc(r2) + LOAD_REG_ADDR(r11, swsusp_save_area) SAVE_SPECIAL(LR) SAVE_REGISTER(r1) SAVE_SPECIAL(CR) @@ -131,7 +125,7 @@ END_FW_FTR_SECTION_IFCLR(FW_FEATURE_LPAR) bl swsusp_save /* restore LR */ - ld r11,swsusp_save_area_ptr@toc(r2) + LOAD_REG_ADDR(r11, swsusp_save_area) RESTORE_SPECIAL(LR) addi r1,r1,128 @@ -145,7 +139,7 @@ BEGIN_FTR_SECTION END_FTR_SECTION_IFSET(CPU_FTR_ALTIVEC) sync - ld r12,restore_pblist_ptr@toc(r2) + LOAD_REG_ADDR(r11, restore_pblist) ld r12,0(r12) cmpdi r12,0 @@ -187,7 +181,7 @@ nothing_to_copy: tlbia #endif - ld r11,swsusp_save_area_ptr@toc(r2) + LOAD_REG_ADDR(r11, swsusp_save_area) RESTORE_SPECIAL(CR) @@ -265,7 +259,7 @@ END_FW_FTR_SECTION_IFCLR(FW_FEATURE_LPAR) bl do_after_copyback addi r1,r1,128 - ld r11,swsusp_save_area_ptr@toc(r2) + LOAD_REG_ADDR(r11, swsusp_save_area) RESTORE_SPECIAL(LR) li r3, 0 diff --git a/arch/powerpc/kernel/sys_ppc32.c b/arch/powerpc/kernel/sys_ppc32.c index 16ff0399a257..d451a8229223 100644 --- a/arch/powerpc/kernel/sys_ppc32.c +++ b/arch/powerpc/kernel/sys_ppc32.c @@ -1,13 +1,23 @@ // SPDX-License-Identifier: GPL-2.0-or-later /* - * sys_ppc32.c: Conversion between 32bit and 64bit native syscalls. + * sys_ppc32.c: 32-bit system calls with complex calling conventions. * * Copyright (C) 2001 IBM * Copyright (C) 1997,1998 Jakub Jelinek (jj@sunsite.mff.cuni.cz) * Copyright (C) 1997 David S. Miller (davem@caip.rutgers.edu) * - * These routines maintain argument size conversion between 32bit and 64bit - * environment. + * 32-bit system calls with 64-bit arguments pass those in register pairs. + * This must be specially dealt with on 64-bit kernels. The compat_arg_u64_dual + * in generic compat syscalls is not always usable because the register + * pairing is constrained depending on preceding arguments. + * + * An analogous problem exists on 32-bit kernels with ARCH_HAS_SYSCALL_WRAPPER, + * the defined system call functions take the pt_regs as an argument, and there + * is a mapping macro which maps registers to arguments + * (SC_POWERPC_REGS_TO_ARGS) which also does not deal with these 64-bit + * arguments. + * + * This file contains these system calls. */ #include <linux/kernel.h> @@ -25,7 +35,6 @@ #include <linux/poll.h> #include <linux/personality.h> #include <linux/stat.h> -#include <linux/mman.h> #include <linux/in.h> #include <linux/syscalls.h> #include <linux/unistd.h> @@ -48,75 +57,79 @@ #include <asm/syscalls.h> #include <asm/switch_to.h> -unsigned long compat_sys_mmap2(unsigned long addr, size_t len, - unsigned long prot, unsigned long flags, - unsigned long fd, unsigned long pgoff) -{ - /* This should remain 12 even if PAGE_SIZE changes */ - return sys_mmap(addr, len, prot, flags, fd, pgoff << 12); -} - -/* - * long long munging: - * The 32 bit ABI passes long longs in an odd even register pair. - * High and low parts are swapped depending on endian mode, - * so define a macro (similar to mips linux32) to handle that. - */ -#ifdef __LITTLE_ENDIAN__ -#define merge_64(low, high) ((u64)high << 32) | low +#ifdef CONFIG_PPC32 +#define PPC32_SYSCALL_DEFINE4 SYSCALL_DEFINE4 +#define PPC32_SYSCALL_DEFINE5 SYSCALL_DEFINE5 +#define PPC32_SYSCALL_DEFINE6 SYSCALL_DEFINE6 #else -#define merge_64(high, low) ((u64)high << 32) | low +#define PPC32_SYSCALL_DEFINE4 COMPAT_SYSCALL_DEFINE4 +#define PPC32_SYSCALL_DEFINE5 COMPAT_SYSCALL_DEFINE5 +#define PPC32_SYSCALL_DEFINE6 COMPAT_SYSCALL_DEFINE6 #endif -compat_ssize_t compat_sys_pread64(unsigned int fd, char __user *ubuf, compat_size_t count, - u32 reg6, u32 pos1, u32 pos2) +PPC32_SYSCALL_DEFINE6(ppc_pread64, + unsigned int, fd, + char __user *, ubuf, compat_size_t, count, + u32, reg6, u32, pos1, u32, pos2) { return ksys_pread64(fd, ubuf, count, merge_64(pos1, pos2)); } -compat_ssize_t compat_sys_pwrite64(unsigned int fd, const char __user *ubuf, compat_size_t count, - u32 reg6, u32 pos1, u32 pos2) +PPC32_SYSCALL_DEFINE6(ppc_pwrite64, + unsigned int, fd, + const char __user *, ubuf, compat_size_t, count, + u32, reg6, u32, pos1, u32, pos2) { return ksys_pwrite64(fd, ubuf, count, merge_64(pos1, pos2)); } -compat_ssize_t compat_sys_readahead(int fd, u32 r4, u32 offset1, u32 offset2, u32 count) +PPC32_SYSCALL_DEFINE5(ppc_readahead, + int, fd, u32, r4, + u32, offset1, u32, offset2, u32, count) { return ksys_readahead(fd, merge_64(offset1, offset2), count); } -asmlinkage int compat_sys_truncate64(const char __user * path, u32 reg4, - unsigned long len1, unsigned long len2) +PPC32_SYSCALL_DEFINE4(ppc_truncate64, + const char __user *, path, u32, reg4, + unsigned long, len1, unsigned long, len2) { return ksys_truncate(path, merge_64(len1, len2)); } -asmlinkage long compat_sys_fallocate(int fd, int mode, u32 offset1, u32 offset2, - u32 len1, u32 len2) -{ - return ksys_fallocate(fd, mode, ((loff_t)offset1 << 32) | offset2, - merge_64(len1, len2)); -} - -asmlinkage int compat_sys_ftruncate64(unsigned int fd, u32 reg4, unsigned long len1, - unsigned long len2) +PPC32_SYSCALL_DEFINE4(ppc_ftruncate64, + unsigned int, fd, u32, reg4, + unsigned long, len1, unsigned long, len2) { return ksys_ftruncate(fd, merge_64(len1, len2)); } -long ppc32_fadvise64(int fd, u32 unused, u32 offset1, u32 offset2, - size_t len, int advice) +PPC32_SYSCALL_DEFINE6(ppc32_fadvise64, + int, fd, u32, unused, u32, offset1, u32, offset2, + size_t, len, int, advice) { return ksys_fadvise64_64(fd, merge_64(offset1, offset2), len, advice); } -asmlinkage long compat_sys_sync_file_range2(int fd, unsigned int flags, - unsigned offset1, unsigned offset2, - unsigned nbytes1, unsigned nbytes2) +PPC32_SYSCALL_DEFINE6(ppc_sync_file_range2, + int, fd, unsigned int, flags, + unsigned int, offset1, unsigned int, offset2, + unsigned int, nbytes1, unsigned int, nbytes2) { loff_t offset = merge_64(offset1, offset2); loff_t nbytes = merge_64(nbytes1, nbytes2); return ksys_sync_file_range(fd, offset, nbytes, flags); } + +#ifdef CONFIG_PPC32 +SYSCALL_DEFINE6(ppc_fallocate, + int, fd, int, mode, + u32, offset1, u32, offset2, u32, len1, u32, len2) +{ + return ksys_fallocate(fd, mode, + merge_64(offset1, offset2), + merge_64(len1, len2)); +} +#endif diff --git a/arch/powerpc/kernel/syscall.c b/arch/powerpc/kernel/syscall.c new file mode 100644 index 000000000000..be159ad4b77b --- /dev/null +++ b/arch/powerpc/kernel/syscall.c @@ -0,0 +1,189 @@ +// SPDX-License-Identifier: GPL-2.0-or-later + +#include <linux/compat.h> +#include <linux/context_tracking.h> +#include <linux/randomize_kstack.h> + +#include <asm/interrupt.h> +#include <asm/kup.h> +#include <asm/syscall.h> +#include <asm/time.h> +#include <asm/tm.h> +#include <asm/unistd.h> + + +/* Has to run notrace because it is entered not completely "reconciled" */ +notrace long system_call_exception(struct pt_regs *regs, unsigned long r0) +{ + long ret; + syscall_fn f; + + kuap_lock(); + + add_random_kstack_offset(); + + if (IS_ENABLED(CONFIG_PPC_IRQ_SOFT_MASK_DEBUG)) + BUG_ON(irq_soft_mask_return() != IRQS_ALL_DISABLED); + + trace_hardirqs_off(); /* finish reconciling */ + + CT_WARN_ON(ct_state() == CT_STATE_KERNEL); + user_exit_irqoff(); + + BUG_ON(regs_is_unrecoverable(regs)); + BUG_ON(!user_mode(regs)); + BUG_ON(arch_irq_disabled_regs(regs)); + +#ifdef CONFIG_PPC_PKEY + if (mmu_has_feature(MMU_FTR_PKEY)) { + unsigned long amr, iamr; + bool flush_needed = false; + /* + * When entering from userspace we mostly have the AMR/IAMR + * different from kernel default values. Hence don't compare. + */ + amr = mfspr(SPRN_AMR); + iamr = mfspr(SPRN_IAMR); + regs->amr = amr; + regs->iamr = iamr; + if (mmu_has_feature(MMU_FTR_KUAP)) { + mtspr(SPRN_AMR, AMR_KUAP_BLOCKED); + flush_needed = true; + } + if (mmu_has_feature(MMU_FTR_BOOK3S_KUEP)) { + mtspr(SPRN_IAMR, AMR_KUEP_BLOCKED); + flush_needed = true; + } + if (flush_needed) + isync(); + } else +#endif + kuap_assert_locked(); + + booke_restore_dbcr0(); + + account_cpu_user_entry(); + + account_stolen_time(); + + /* + * This is not required for the syscall exit path, but makes the + * stack frame look nicer. If this was initialised in the first stack + * frame, or if the unwinder was taught the first stack frame always + * returns to user with IRQS_ENABLED, this store could be avoided! + */ + irq_soft_mask_regs_set_state(regs, IRQS_ENABLED); + + /* + * If system call is called with TM active, set _TIF_RESTOREALL to + * prevent RFSCV being used to return to userspace, because POWER9 + * TM implementation has problems with this instruction returning to + * transactional state. Final register values are not relevant because + * the transaction will be aborted upon return anyway. Or in the case + * of unsupported_scv SIGILL fault, the return state does not much + * matter because it's an edge case. + */ + if (IS_ENABLED(CONFIG_PPC_TRANSACTIONAL_MEM) && + unlikely(MSR_TM_TRANSACTIONAL(regs->msr))) + set_bits(_TIF_RESTOREALL, ¤t_thread_info()->flags); + + /* + * If the system call was made with a transaction active, doom it and + * return without performing the system call. Unless it was an + * unsupported scv vector, in which case it's treated like an illegal + * instruction. + */ +#ifdef CONFIG_PPC_TRANSACTIONAL_MEM + if (unlikely(MSR_TM_TRANSACTIONAL(regs->msr)) && + !trap_is_unsupported_scv(regs)) { + /* Enable TM in the kernel, and disable EE (for scv) */ + hard_irq_disable(); + mtmsr(mfmsr() | MSR_TM); + + /* tabort, this dooms the transaction, nothing else */ + asm volatile(".long 0x7c00071d | ((%0) << 16)" + :: "r"(TM_CAUSE_SYSCALL|TM_CAUSE_PERSISTENT)); + + /* + * Userspace will never see the return value. Execution will + * resume after the tbegin. of the aborted transaction with the + * checkpointed register state. A context switch could occur + * or signal delivered to the process before resuming the + * doomed transaction context, but that should all be handled + * as expected. + */ + return -ENOSYS; + } +#endif // CONFIG_PPC_TRANSACTIONAL_MEM + + local_irq_enable(); + + if (unlikely(read_thread_flags() & _TIF_SYSCALL_DOTRACE)) { + if (unlikely(trap_is_unsupported_scv(regs))) { + /* Unsupported scv vector */ + _exception(SIGILL, regs, ILL_ILLOPC, regs->nip); + return regs->gpr[3]; + } + /* + * We use the return value of do_syscall_trace_enter() as the + * syscall number. If the syscall was rejected for any reason + * do_syscall_trace_enter() returns an invalid syscall number + * and the test against NR_syscalls will fail and the return + * value to be used is in regs->gpr[3]. + */ + r0 = do_syscall_trace_enter(regs); + if (unlikely(r0 >= NR_syscalls)) + return regs->gpr[3]; + + } else if (unlikely(r0 >= NR_syscalls)) { + if (unlikely(trap_is_unsupported_scv(regs))) { + /* Unsupported scv vector */ + _exception(SIGILL, regs, ILL_ILLOPC, regs->nip); + return regs->gpr[3]; + } + return -ENOSYS; + } + + /* May be faster to do array_index_nospec? */ + barrier_nospec(); + +#ifdef CONFIG_ARCH_HAS_SYSCALL_WRAPPER + // No COMPAT if we have SYSCALL_WRAPPER, see Kconfig + f = (void *)sys_call_table[r0]; + ret = f(regs); +#else + if (unlikely(is_compat_task())) { + unsigned long r3, r4, r5, r6, r7, r8; + + f = (void *)compat_sys_call_table[r0]; + + r3 = regs->gpr[3] & 0x00000000ffffffffULL; + r4 = regs->gpr[4] & 0x00000000ffffffffULL; + r5 = regs->gpr[5] & 0x00000000ffffffffULL; + r6 = regs->gpr[6] & 0x00000000ffffffffULL; + r7 = regs->gpr[7] & 0x00000000ffffffffULL; + r8 = regs->gpr[8] & 0x00000000ffffffffULL; + + ret = f(r3, r4, r5, r6, r7, r8); + } else { + f = (void *)sys_call_table[r0]; + + ret = f(regs->gpr[3], regs->gpr[4], regs->gpr[5], + regs->gpr[6], regs->gpr[7], regs->gpr[8]); + } +#endif + + /* + * Ultimately, this value will get limited by KSTACK_OFFSET_MAX(), + * so the maximum stack offset is 1k bytes (10 bits). + * + * The actual entropy will be further reduced by the compiler when + * applying stack alignment constraints: the powerpc architecture + * may have two kinds of stack alignment (16-bytes and 8-bytes). + * + * So the resulting 6 or 7 bits of entropy is seen in SP[9:4] or SP[9:3]. + */ + choose_random_kstack_offset(mftb()); + + return ret; +} diff --git a/arch/powerpc/kernel/syscalls.c b/arch/powerpc/kernel/syscalls.c index fc999140bc27..68ebb23a5af4 100644 --- a/arch/powerpc/kernel/syscalls.c +++ b/arch/powerpc/kernel/syscalls.c @@ -36,9 +36,9 @@ #include <asm/time.h> #include <asm/unistd.h> -static inline long do_mmap2(unsigned long addr, size_t len, - unsigned long prot, unsigned long flags, - unsigned long fd, unsigned long off, int shift) +static long do_mmap2(unsigned long addr, size_t len, + unsigned long prot, unsigned long flags, + unsigned long fd, unsigned long off, int shift) { if (!arch_validate_prot(prot, addr)) return -EINVAL; @@ -56,6 +56,16 @@ SYSCALL_DEFINE6(mmap2, unsigned long, addr, size_t, len, return do_mmap2(addr, len, prot, flags, fd, pgoff, PAGE_SHIFT-12); } +#ifdef CONFIG_COMPAT +COMPAT_SYSCALL_DEFINE6(mmap2, + unsigned long, addr, size_t, len, + unsigned long, prot, unsigned long, flags, + unsigned long, fd, unsigned long, off_4k) +{ + return do_mmap2(addr, len, prot, flags, fd, off_4k, PAGE_SHIFT-12); +} +#endif + SYSCALL_DEFINE6(mmap, unsigned long, addr, size_t, len, unsigned long, prot, unsigned long, flags, unsigned long, fd, off_t, offset) @@ -63,43 +73,39 @@ SYSCALL_DEFINE6(mmap, unsigned long, addr, size_t, len, return do_mmap2(addr, len, prot, flags, fd, offset, PAGE_SHIFT); } -#ifdef CONFIG_PPC32 -/* - * Due to some executables calling the wrong select we sometimes - * get wrong args. This determines how the args are being passed - * (a single ptr to them all args passed) then calls - * sys_select() with the appropriate args. -- Cort - */ -int -ppc_select(int n, fd_set __user *inp, fd_set __user *outp, fd_set __user *exp, struct __kernel_old_timeval __user *tvp) -{ - if ((unsigned long)n >= 4096) - return sys_old_select((void __user *)n); - - return sys_select(n, inp, outp, exp, tvp); -} -#endif - #ifdef CONFIG_PPC64 -long ppc64_personality(unsigned long personality) +static long do_ppc64_personality(unsigned long personality) { long ret; if (personality(current->personality) == PER_LINUX32 && personality(personality) == PER_LINUX) personality = (personality & ~PER_MASK) | PER_LINUX32; - ret = sys_personality(personality); + ret = ksys_personality(personality); if (personality(ret) == PER_LINUX32) ret = (ret & ~PER_MASK) | PER_LINUX; return ret; } -#endif -long ppc_fadvise64_64(int fd, int advice, u32 offset_high, u32 offset_low, - u32 len_high, u32 len_low) +SYSCALL_DEFINE1(ppc64_personality, unsigned long, personality) +{ + return do_ppc64_personality(personality); +} + +#ifdef CONFIG_COMPAT +COMPAT_SYSCALL_DEFINE1(ppc64_personality, unsigned long, personality) +{ + return do_ppc64_personality(personality); +} +#endif /* CONFIG_COMPAT */ +#endif /* CONFIG_PPC64 */ + +SYSCALL_DEFINE6(ppc_fadvise64_64, + int, fd, int, advice, u32, offset_high, u32, offset_low, + u32, len_high, u32, len_low) { - return ksys_fadvise64_64(fd, (u64)offset_high << 32 | offset_low, - (u64)len_high << 32 | len_low, advice); + return ksys_fadvise64_64(fd, merge_64(offset_high, offset_low), + merge_64(len_high, len_low), advice); } SYSCALL_DEFINE0(switch_endian) diff --git a/arch/powerpc/kernel/syscalls/syscall.tbl b/arch/powerpc/kernel/syscalls/syscall.tbl index 2600b4237292..b453e80dfc00 100644 --- a/arch/powerpc/kernel/syscalls/syscall.tbl +++ b/arch/powerpc/kernel/syscalls/syscall.tbl @@ -110,7 +110,7 @@ 79 common settimeofday sys_settimeofday compat_sys_settimeofday 80 common getgroups sys_getgroups 81 common setgroups sys_setgroups -82 32 select ppc_select sys_ni_syscall +82 32 select sys_old_select compat_sys_old_select 82 64 select sys_ni_syscall 82 spu select sys_ni_syscall 83 common symlink sys_symlink @@ -178,9 +178,9 @@ 133 common fchdir sys_fchdir 134 common bdflush sys_ni_syscall 135 common sysfs sys_sysfs -136 32 personality sys_personality ppc64_personality -136 64 personality ppc64_personality -136 spu personality ppc64_personality +136 32 personality sys_personality compat_sys_ppc64_personality +136 64 personality sys_ppc64_personality +136 spu personality sys_ppc64_personality 137 common afs_syscall sys_ni_syscall 138 common setfsuid sys_setfsuid 139 common setfsgid sys_setfsgid @@ -228,8 +228,12 @@ 176 64 rt_sigtimedwait sys_rt_sigtimedwait 177 nospu rt_sigqueueinfo sys_rt_sigqueueinfo compat_sys_rt_sigqueueinfo 178 nospu rt_sigsuspend sys_rt_sigsuspend compat_sys_rt_sigsuspend -179 common pread64 sys_pread64 compat_sys_pread64 -180 common pwrite64 sys_pwrite64 compat_sys_pwrite64 +179 32 pread64 sys_ppc_pread64 compat_sys_ppc_pread64 +179 64 pread64 sys_pread64 +179 spu pread64 sys_pread64 +180 32 pwrite64 sys_ppc_pwrite64 compat_sys_ppc_pwrite64 +180 64 pwrite64 sys_pwrite64 +180 spu pwrite64 sys_pwrite64 181 common chown sys_chown 182 common getcwd sys_getcwd 183 common capget sys_capget @@ -242,10 +246,12 @@ 188 common putpmsg sys_ni_syscall 189 nospu vfork sys_vfork 190 common ugetrlimit sys_getrlimit compat_sys_getrlimit -191 common readahead sys_readahead compat_sys_readahead +191 32 readahead sys_ppc_readahead compat_sys_ppc_readahead +191 64 readahead sys_readahead +191 spu readahead sys_readahead 192 32 mmap2 sys_mmap2 compat_sys_mmap2 -193 32 truncate64 sys_truncate64 compat_sys_truncate64 -194 32 ftruncate64 sys_ftruncate64 compat_sys_ftruncate64 +193 32 truncate64 sys_ppc_truncate64 compat_sys_ppc_truncate64 +194 32 ftruncate64 sys_ppc_ftruncate64 compat_sys_ppc_ftruncate64 195 32 stat64 sys_stat64 196 32 lstat64 sys_lstat64 197 32 fstat64 sys_fstat64 @@ -288,9 +294,11 @@ 230 common io_submit sys_io_submit compat_sys_io_submit 231 common io_cancel sys_io_cancel 232 nospu set_tid_address sys_set_tid_address -233 common fadvise64 sys_fadvise64 ppc32_fadvise64 +233 32 fadvise64 sys_ppc32_fadvise64 compat_sys_ppc32_fadvise64 +233 64 fadvise64 sys_fadvise64 +233 spu fadvise64 sys_fadvise64 234 nospu exit_group sys_exit_group -235 nospu lookup_dcookie sys_lookup_dcookie compat_sys_lookup_dcookie +235 nospu lookup_dcookie sys_ni_syscall 236 common epoll_create sys_epoll_create 237 common epoll_ctl sys_epoll_ctl 238 common epoll_wait sys_epoll_wait @@ -323,7 +331,7 @@ 251 spu utimes sys_utimes 252 common statfs64 sys_statfs64 compat_sys_statfs64 253 common fstatfs64 sys_fstatfs64 compat_sys_fstatfs64 -254 32 fadvise64_64 ppc_fadvise64_64 +254 32 fadvise64_64 sys_ppc_fadvise64_64 254 spu fadvise64_64 sys_ni_syscall 255 common rtas sys_rtas 256 32 sys_debug_setcontext sys_debug_setcontext sys_ni_syscall @@ -390,8 +398,11 @@ 305 common signalfd sys_signalfd compat_sys_signalfd 306 common timerfd_create sys_timerfd_create 307 common eventfd sys_eventfd -308 common sync_file_range2 sys_sync_file_range2 compat_sys_sync_file_range2 -309 nospu fallocate sys_fallocate compat_sys_fallocate +308 32 sync_file_range2 sys_ppc_sync_file_range2 compat_sys_ppc_sync_file_range2 +308 64 sync_file_range2 sys_sync_file_range2 +308 spu sync_file_range2 sys_sync_file_range2 +309 32 fallocate sys_ppc_fallocate compat_sys_fallocate +309 64 fallocate sys_fallocate 310 nospu subpage_prot sys_subpage_prot 311 32 timerfd_settime sys_timerfd_settime32 311 64 timerfd_settime sys_timerfd_settime @@ -495,7 +506,7 @@ 412 32 utimensat_time64 sys_utimensat sys_utimensat 413 32 pselect6_time64 sys_pselect6 compat_sys_pselect6_time64 414 32 ppoll_time64 sys_ppoll compat_sys_ppoll_time64 -416 32 io_pgetevents_time64 sys_io_pgetevents sys_io_pgetevents +416 32 io_pgetevents_time64 sys_io_pgetevents compat_sys_io_pgetevents_time64 417 32 recvmmsg_time64 sys_recvmmsg compat_sys_recvmmsg_time64 418 32 mq_timedsend_time64 sys_mq_timedsend sys_mq_timedsend 419 32 mq_timedreceive_time64 sys_mq_timedreceive sys_mq_timedreceive @@ -530,3 +541,22 @@ 448 common process_mrelease sys_process_mrelease 449 common futex_waitv sys_futex_waitv 450 nospu set_mempolicy_home_node sys_set_mempolicy_home_node +451 common cachestat sys_cachestat +452 common fchmodat2 sys_fchmodat2 +453 common map_shadow_stack sys_ni_syscall +454 common futex_wake sys_futex_wake +455 common futex_wait sys_futex_wait +456 common futex_requeue sys_futex_requeue +457 common statmount sys_statmount +458 common listmount sys_listmount +459 common lsm_get_self_attr sys_lsm_get_self_attr +460 common lsm_set_self_attr sys_lsm_set_self_attr +461 common lsm_list_modules sys_lsm_list_modules +462 common mseal sys_mseal +463 common setxattrat sys_setxattrat +464 common getxattrat sys_getxattrat +465 common listxattrat sys_listxattrat +466 common removexattrat sys_removexattrat +467 common open_tree_attr sys_open_tree_attr +468 common file_getattr sys_file_getattr +469 common file_setattr sys_file_setattr diff --git a/arch/powerpc/kernel/sysfs.c b/arch/powerpc/kernel/sysfs.c index 3a10cda9c05e..6b3dd6decdf9 100644 --- a/arch/powerpc/kernel/sysfs.c +++ b/arch/powerpc/kernel/sysfs.c @@ -17,6 +17,7 @@ #include <asm/hvcall.h> #include <asm/machdep.h> #include <asm/smp.h> +#include <asm/time.h> #include <asm/pmc.h> #include <asm/firmware.h> #include <asm/idle.h> @@ -139,7 +140,7 @@ static unsigned long dscr_default; * @val: Returned cpu specific DSCR default value * * This function returns the per cpu DSCR default value - * for any cpu which is contained in it's PACA structure. + * for any cpu which is contained in its PACA structure. */ static void read_dscr(void *val) { @@ -152,7 +153,7 @@ static void read_dscr(void *val) * @val: New cpu specific DSCR default value to update * * This function updates the per cpu DSCR default value - * for any cpu which is contained in it's PACA structure. + * for any cpu which is contained in its PACA structure. */ static void write_dscr(void *val) { @@ -217,18 +218,23 @@ static DEVICE_ATTR(dscr_default, 0600, static void __init sysfs_create_dscr_default(void) { if (cpu_has_feature(CPU_FTR_DSCR)) { + struct device *dev_root; int cpu; dscr_default = spr_default_dscr; for_each_possible_cpu(cpu) paca_ptrs[cpu]->dscr_default = dscr_default; - device_create_file(cpu_subsys.dev_root, &dev_attr_dscr_default); + dev_root = bus_get_dev_root(&cpu_subsys); + if (dev_root) { + device_create_file(dev_root, &dev_attr_dscr_default); + put_device(dev_root); + } } } #endif /* CONFIG_PPC64 */ -#ifdef CONFIG_PPC_FSL_BOOK3E +#ifdef CONFIG_PPC_E500 #define MAX_BIT 63 static u64 pw20_wt; @@ -746,7 +752,12 @@ static DEVICE_ATTR(svm, 0444, show_svm, NULL); static void __init create_svm_file(void) { - device_create_file(cpu_subsys.dev_root, &dev_attr_svm); + struct device *dev_root = bus_get_dev_root(&cpu_subsys); + + if (dev_root) { + device_create_file(dev_root, &dev_attr_svm); + put_device(dev_root); + } } #else static void __init create_svm_file(void) @@ -907,7 +918,7 @@ static int register_cpu_online(unsigned int cpu) device_create_file(s, &dev_attr_tscr); #endif /* CONFIG_PPC64 */ -#ifdef CONFIG_PPC_FSL_BOOK3E +#ifdef CONFIG_PPC_E500 if (PVR_VER(cur_cpu_spec->pvr_value) == PVR_VER_E6500) { device_create_file(s, &dev_attr_pw20_state); device_create_file(s, &dev_attr_pw20_wait_time); @@ -1003,7 +1014,7 @@ static int unregister_cpu_online(unsigned int cpu) device_remove_file(s, &dev_attr_tscr); #endif /* CONFIG_PPC64 */ -#ifdef CONFIG_PPC_FSL_BOOK3E +#ifdef CONFIG_PPC_E500 if (PVR_VER(cur_cpu_spec->pvr_value) == PVR_VER_E6500) { device_remove_file(s, &dev_attr_pw20_state); device_remove_file(s, &dev_attr_pw20_wait_time); diff --git a/arch/powerpc/kernel/systbl.S b/arch/powerpc/kernel/systbl.c index cb3358886203..4305f2a2162f 100644 --- a/arch/powerpc/kernel/systbl.S +++ b/arch/powerpc/kernel/systbl.c @@ -10,31 +10,37 @@ * PPC64 updates by Dave Engebretsen (engebret@us.ibm.com) */ -#include <asm/ppc_asm.h> +#include <linux/syscalls.h> +#include <linux/compat.h> +#include <asm/unistd.h> +#include <asm/syscalls.h> -.section .rodata,"a" +#undef __SYSCALL_WITH_COMPAT +#define __SYSCALL_WITH_COMPAT(nr, entry, compat) __SYSCALL(nr, entry) -#ifdef CONFIG_PPC64 - .p2align 3 -#define __SYSCALL(nr, entry) .8byte entry +#undef __SYSCALL +#ifdef CONFIG_ARCH_HAS_SYSCALL_WRAPPER +#define __SYSCALL(nr, entry) [nr] = entry, #else -#define __SYSCALL(nr, entry) .long entry +/* + * Coerce syscall handlers with arbitrary parameters to common type + * requires cast to void* to avoid -Wcast-function-type. + */ +#define __SYSCALL(nr, entry) [nr] = (void *) entry, #endif -#define __SYSCALL_WITH_COMPAT(nr, native, compat) __SYSCALL(nr, native) -.globl sys_call_table -sys_call_table: +const syscall_fn sys_call_table[] = { #ifdef CONFIG_PPC64 #include <asm/syscall_table_64.h> #else #include <asm/syscall_table_32.h> #endif +}; #ifdef CONFIG_COMPAT #undef __SYSCALL_WITH_COMPAT #define __SYSCALL_WITH_COMPAT(nr, native, compat) __SYSCALL(nr, compat) -.globl compat_sys_call_table -compat_sys_call_table: -#define compat_sys_sigsuspend sys_sigsuspend +const syscall_fn compat_sys_call_table[] = { #include <asm/syscall_table_32.h> -#endif +}; +#endif /* CONFIG_COMPAT */ diff --git a/arch/powerpc/kernel/systbl_chk.sh b/arch/powerpc/kernel/systbl_chk.sh deleted file mode 100644 index c7ac3ed657c4..000000000000 --- a/arch/powerpc/kernel/systbl_chk.sh +++ /dev/null @@ -1,30 +0,0 @@ -#!/bin/sh -# SPDX-License-Identifier: GPL-2.0-or-later -# -# Just process the CPP output from systbl_chk.c and complain -# if anything is out of order. -# -# Copyright © 2008 IBM Corporation -# - -awk 'BEGIN { num = -1; } # Ignore the beginning of the file - /^#/ { next; } - /^[ \t]*$/ { next; } - /^START_TABLE/ { num = 0; next; } - /^END_TABLE/ { - if (num != $2) { - printf "Error: NR_syscalls (%s) is not one more than the last syscall (%s)\n", - $2, num - 1; - exit(1); - } - num = -1; # Ignore the rest of the file - } - { - if (num == -1) next; - if (($1 != -1) && ($1 != num)) { - printf "Error: Syscall %s out of order (expected %s)\n", - $1, num; - exit(1); - }; - num++; - }' "$1" diff --git a/arch/powerpc/kernel/tau_6xx.c b/arch/powerpc/kernel/tau_6xx.c index 828d0f4106d2..cba6dd15de3b 100644 --- a/arch/powerpc/kernel/tau_6xx.c +++ b/arch/powerpc/kernel/tau_6xx.c @@ -200,7 +200,7 @@ static int __init TAU_init(void) tau_int_enable = IS_ENABLED(CONFIG_TAU_INT) && !strcmp(cur_cpu_spec->platform, "ppc750"); - tau_workq = alloc_workqueue("tau", WQ_UNBOUND, 1); + tau_workq = alloc_ordered_workqueue("tau", 0); if (!tau_workq) return -ENOMEM; diff --git a/arch/powerpc/kernel/time.c b/arch/powerpc/kernel/time.c index 587adcc12860..8224381c1dba 100644 --- a/arch/powerpc/kernel/time.c +++ b/arch/powerpc/kernel/time.c @@ -71,11 +71,11 @@ #include <asm/vdso_datapage.h> #include <asm/firmware.h> #include <asm/mce.h> +#include <asm/systemcfg.h> /* powerpc clocksource/clockevent code */ #include <linux/clockchips.h> -#include <linux/timekeeper_internal.h> static u64 timebase_read(struct clocksource *); static struct clocksource clocksource_timebase = { @@ -130,7 +130,7 @@ unsigned long tb_ticks_per_jiffy; unsigned long tb_ticks_per_usec = 100; /* sane default */ EXPORT_SYMBOL(tb_ticks_per_usec); unsigned long tb_ticks_per_sec; -EXPORT_SYMBOL(tb_ticks_per_sec); /* for cputime_t conversions */ +EXPORT_SYMBOL(tb_ticks_per_sec); /* for cputime conversions */ DEFINE_SPINLOCK(rtc_lock); EXPORT_SYMBOL_GPL(rtc_lock); @@ -151,21 +151,6 @@ bool tb_invalid; #ifdef CONFIG_VIRT_CPU_ACCOUNTING_NATIVE /* - * Factor for converting from cputime_t (timebase ticks) to - * microseconds. This is stored as 0.64 fixed-point binary fraction. - */ -u64 __cputime_usec_factor; -EXPORT_SYMBOL(__cputime_usec_factor); - -static void calc_cputime_factors(void) -{ - struct div_result res; - - div128_by_32(1000000, 0, tb_ticks_per_sec, &res); - __cputime_usec_factor = res.result_low; -} - -/* * Read the SPURR on systems that have it, otherwise the PURR, * or if that doesn't exist return the timebase value passed in. */ @@ -178,92 +163,6 @@ static inline unsigned long read_spurr(unsigned long tb) return tb; } -#ifdef CONFIG_PPC_SPLPAR - -#include <asm/dtl.h> - -void (*dtl_consumer)(struct dtl_entry *, u64); - -/* - * Scan the dispatch trace log and count up the stolen time. - * Should be called with interrupts disabled. - */ -static u64 scan_dispatch_log(u64 stop_tb) -{ - u64 i = local_paca->dtl_ridx; - struct dtl_entry *dtl = local_paca->dtl_curr; - struct dtl_entry *dtl_end = local_paca->dispatch_log_end; - struct lppaca *vpa = local_paca->lppaca_ptr; - u64 tb_delta; - u64 stolen = 0; - u64 dtb; - - if (!dtl) - return 0; - - if (i == be64_to_cpu(vpa->dtl_idx)) - return 0; - while (i < be64_to_cpu(vpa->dtl_idx)) { - dtb = be64_to_cpu(dtl->timebase); - tb_delta = be32_to_cpu(dtl->enqueue_to_dispatch_time) + - be32_to_cpu(dtl->ready_to_enqueue_time); - barrier(); - if (i + N_DISPATCH_LOG < be64_to_cpu(vpa->dtl_idx)) { - /* buffer has overflowed */ - i = be64_to_cpu(vpa->dtl_idx) - N_DISPATCH_LOG; - dtl = local_paca->dispatch_log + (i % N_DISPATCH_LOG); - continue; - } - if (dtb > stop_tb) - break; - if (dtl_consumer) - dtl_consumer(dtl, i); - stolen += tb_delta; - ++i; - ++dtl; - if (dtl == dtl_end) - dtl = local_paca->dispatch_log; - } - local_paca->dtl_ridx = i; - local_paca->dtl_curr = dtl; - return stolen; -} - -/* - * Accumulate stolen time by scanning the dispatch trace log. - * Called on entry from user mode. - */ -void notrace accumulate_stolen_time(void) -{ - u64 sst, ust; - struct cpu_accounting_data *acct = &local_paca->accounting; - - sst = scan_dispatch_log(acct->starttime_user); - ust = scan_dispatch_log(acct->starttime); - acct->stime -= sst; - acct->utime -= ust; - acct->steal_time += ust + sst; -} - -static inline u64 calculate_stolen_time(u64 stop_tb) -{ - if (!firmware_has_feature(FW_FEATURE_SPLPAR)) - return 0; - - if (get_paca()->dtl_ridx != be64_to_cpu(get_lppaca()->dtl_idx)) - return scan_dispatch_log(stop_tb); - - return 0; -} - -#else /* CONFIG_PPC_SPLPAR */ -static inline u64 calculate_stolen_time(u64 stop_tb) -{ - return 0; -} - -#endif /* CONFIG_PPC_SPLPAR */ - /* * Account time for a transition between system, hard irq * or soft irq state. @@ -322,7 +221,11 @@ static unsigned long vtime_delta(struct cpu_accounting_data *acct, *stime_scaled = vtime_delta_scaled(acct, now, stime); - *steal_time = calculate_stolen_time(now); + if (IS_ENABLED(CONFIG_PPC_SPLPAR) && + firmware_has_feature(FW_FEATURE_SPLPAR)) + *steal_time = pseries_calculate_stolen_time(now); + else + *steal_time = 0; return stime; } @@ -452,11 +355,30 @@ void vtime_flush(struct task_struct *tsk) acct->softirq_time = 0; } -#else /* ! CONFIG_VIRT_CPU_ACCOUNTING_NATIVE */ -#define calc_cputime_factors() -#endif +/* + * Called from the context switch with interrupts disabled, to charge all + * accumulated times to the current process, and to prepare accounting on + * the next process. + */ +void vtime_task_switch(struct task_struct *prev) +{ + if (is_idle_task(prev)) + vtime_account_idle(prev); + else + vtime_account_kernel(prev); + + vtime_flush(prev); + + if (!IS_ENABLED(CONFIG_PPC64)) { + struct cpu_accounting_data *acct = get_accounting(current); + struct cpu_accounting_data *acct0 = get_accounting(prev); + + acct->starttime = acct0->starttime; + } +} +#endif /* CONFIG_VIRT_CPU_ACCOUNTING_NATIVE */ -void __delay(unsigned long loops) +void __no_kcsan __delay(unsigned long loops) { unsigned long start; @@ -477,7 +399,7 @@ void __delay(unsigned long loops) } EXPORT_SYMBOL(__delay); -void udelay(unsigned long usecs) +void __no_kcsan udelay(unsigned long usecs) { __delay(tb_ticks_per_usec * usecs); } @@ -614,22 +536,23 @@ DEFINE_INTERRUPT_HANDLER_ASYNC(timer_interrupt) return; } - /* - * Ensure a positive value is written to the decrementer, or - * else some CPUs will continue to take decrementer exceptions. - * When the PPC_WATCHDOG (decrementer based) is configured, - * keep this at most 31 bits, which is about 4 seconds on most - * systems, which gives the watchdog a chance of catching timer - * interrupt hard lockups. - */ - if (IS_ENABLED(CONFIG_PPC_WATCHDOG)) - set_dec(0x7fffffff); - else - set_dec(decrementer_max); - /* Conditionally hard-enable interrupts. */ - if (should_hard_irq_enable()) + if (should_hard_irq_enable(regs)) { + /* + * Ensure a positive value is written to the decrementer, or + * else some CPUs will continue to take decrementer exceptions. + * When the PPC_WATCHDOG (decrementer based) is configured, + * keep this at most 31 bits, which is about 4 seconds on most + * systems, which gives the watchdog a chance of catching timer + * interrupt hard lockups. + */ + if (IS_ENABLED(CONFIG_PPC_WATCHDOG)) + set_dec(0x7fffffff); + else + set_dec(decrementer_max); + do_hard_irq_enable(); + } #if defined(CONFIG_PPC32) && defined(CONFIG_PPC_PMAC) if (atomic_read(&ppc_n_lost_interrupts) != 0) @@ -772,7 +695,7 @@ static int __init get_freq(char *name, int cells, unsigned long *val) static void start_cpu_decrementer(void) { -#ifdef CONFIG_BOOKE_OR_40x +#ifdef CONFIG_BOOKE unsigned int tcr; /* Clear any pending timer interrupts */ @@ -978,6 +901,38 @@ void secondary_cpu_time_init(void) register_decrementer_clockevent(smp_processor_id()); } +/* + * Divide a 128-bit dividend by a 32-bit divisor, leaving a 128 bit + * result. + */ +static __init void div128_by_32(u64 dividend_high, u64 dividend_low, + unsigned int divisor, struct div_result *dr) +{ + unsigned long a, b, c, d; + unsigned long w, x, y, z; + u64 ra, rb, rc; + + a = dividend_high >> 32; + b = dividend_high & 0xffffffff; + c = dividend_low >> 32; + d = dividend_low & 0xffffffff; + + w = a / divisor; + ra = ((u64)(a - (w * divisor)) << 32) + b; + + rb = ((u64)do_div(ra, divisor) << 32) + c; + x = ra; + + rc = ((u64)do_div(rb, divisor) << 32) + d; + y = rb; + + do_div(rc, divisor); + z = rc; + + dr->result_high = ((u64)w << 32) + x; + dr->result_low = ((u64)y << 32) + z; +} + /* This function is only called on the boot processor */ void __init time_init(void) { @@ -986,7 +941,11 @@ void __init time_init(void) unsigned shift; /* Normal PowerPC with timebase register */ - ppc_md.calibrate_decr(); + if (ppc_md.calibrate_decr) + ppc_md.calibrate_decr(); + else + generic_calibrate_decr(); + printk(KERN_DEBUG "time_init: decrementer frequency = %lu.%.6lu MHz\n", ppc_tb_freq / 1000000, ppc_tb_freq % 1000000); printk(KERN_DEBUG "time_init: processor frequency = %lu.%.6lu MHz\n", @@ -995,7 +954,6 @@ void __init time_init(void) tb_ticks_per_jiffy = ppc_tb_freq / HZ; tb_ticks_per_sec = ppc_tb_freq; tb_ticks_per_usec = ppc_tb_freq / 1000000; - calc_cputime_factors(); /* * Compute scale factor for sched_clock. @@ -1024,7 +982,10 @@ void __init time_init(void) sys_tz.tz_dsttime = 0; } - vdso_data->tb_ticks_per_sec = tb_ticks_per_sec; + vdso_k_arch_data->tb_ticks_per_sec = tb_ticks_per_sec; +#ifdef CONFIG_PPC64_PROC_SYSTEMCFG + systemcfg->tb_ticks_per_sec = tb_ticks_per_sec; +#endif /* initialise and enable the large decrementer (if we have one) */ set_decrementer_max(); @@ -1045,39 +1006,6 @@ void __init time_init(void) enable_sched_clock_irqtime(); } -/* - * Divide a 128-bit dividend by a 32-bit divisor, leaving a 128 bit - * result. - */ -void div128_by_32(u64 dividend_high, u64 dividend_low, - unsigned divisor, struct div_result *dr) -{ - unsigned long a, b, c, d; - unsigned long w, x, y, z; - u64 ra, rb, rc; - - a = dividend_high >> 32; - b = dividend_high & 0xffffffff; - c = dividend_low >> 32; - d = dividend_low & 0xffffffff; - - w = a / divisor; - ra = ((u64)(a - (w * divisor)) << 32) + b; - - rb = ((u64) do_div(ra, divisor) << 32) + c; - x = ra; - - rc = ((u64) do_div(rb, divisor) << 32) + d; - y = rb; - - do_div(rc, divisor); - z = rc; - - dr->result_high = ((u64)w << 32) + x; - dr->result_low = ((u64)y << 32) + z; - -} - /* We don't need to calibrate delay, we use the CPU timebase for that */ void calibrate_delay(void) { diff --git a/arch/powerpc/kernel/tm.S b/arch/powerpc/kernel/tm.S index 5a0f023a26e9..a9cd6507163a 100644 --- a/arch/powerpc/kernel/tm.S +++ b/arch/powerpc/kernel/tm.S @@ -6,13 +6,13 @@ * Copyright 2012 Matt Evans & Michael Neuling, IBM Corporation. */ +#include <linux/export.h> #include <asm/asm-offsets.h> #include <asm/ppc_asm.h> #include <asm/ppc-opcode.h> #include <asm/ptrace.h> #include <asm/reg.h> #include <asm/bug.h> -#include <asm/export.h> #include <asm/feature-fixups.h> #ifdef CONFIG_VSX @@ -117,7 +117,7 @@ _GLOBAL(tm_reclaim) std r2, STK_GOT(r1) stdu r1, -TM_FRAME_SIZE(r1) - /* We've a struct pt_regs at [r1+STACK_FRAME_OVERHEAD]. */ + /* We've a struct pt_regs at [r1+STACK_INT_FRAME_REGS]. */ std r3, STK_PARAM(R3)(r1) SAVE_NVGPRS(r1) @@ -222,7 +222,7 @@ _GLOBAL(tm_reclaim) * Make r7 look like an exception frame so that we can use the neat * GPRx(n) macros. r7 is NOT a pt_regs ptr! */ - subi r7, r7, STACK_FRAME_OVERHEAD + subi r7, r7, STACK_INT_FRAME_REGS /* Sync the userland GPRs 2-12, 14-31 to thread->regs: */ SAVE_GPR(0, r7) /* user r0 */ @@ -359,7 +359,7 @@ _GLOBAL(__tm_recheckpoint) stdu r1, -TM_FRAME_SIZE(r1) /* - * We've a struct pt_regs at [r1+STACK_FRAME_OVERHEAD]. + * We've a struct pt_regs at [r1+STACK_INT_FRAME_REGS]. * This is used for backing up the NVGPRs: */ SAVE_NVGPRS(r1) @@ -379,7 +379,7 @@ _GLOBAL(__tm_recheckpoint) * Make r7 look like an exception frame so that we can use the neat * GPRx(n) macros. r7 is now NOT a pt_regs ptr! */ - subi r7, r7, STACK_FRAME_OVERHEAD + subi r7, r7, STACK_INT_FRAME_REGS /* We need to setup MSR for FP/VMX/VSX register save instructions. */ mfmsr r6 diff --git a/arch/powerpc/kernel/trace/Makefile b/arch/powerpc/kernel/trace/Makefile index af8527538fe4..d6c3885453bd 100644 --- a/arch/powerpc/kernel/trace/Makefile +++ b/arch/powerpc/kernel/trace/Makefile @@ -6,15 +6,18 @@ ifdef CONFIG_FUNCTION_TRACER # do not trace tracer code CFLAGS_REMOVE_ftrace.o = $(CC_FLAGS_FTRACE) +CFLAGS_REMOVE_ftrace_64_pg.o = $(CC_FLAGS_FTRACE) endif -obj32-$(CONFIG_FUNCTION_TRACER) += ftrace_mprofile.o -ifdef CONFIG_MPROFILE_KERNEL -obj64-$(CONFIG_FUNCTION_TRACER) += ftrace_mprofile.o +ifdef CONFIG_FUNCTION_TRACER +obj32-y += ftrace.o ftrace_entry.o +ifeq ($(CONFIG_MPROFILE_KERNEL)$(CONFIG_ARCH_USING_PATCHABLE_FUNCTION_ENTRY),) +obj64-y += ftrace_64_pg.o ftrace_64_pg_entry.o else -obj64-$(CONFIG_FUNCTION_TRACER) += ftrace_64_pg.o +obj64-y += ftrace.o ftrace_entry.o +endif endif -obj-$(CONFIG_FUNCTION_TRACER) += ftrace_low.o ftrace.o + obj-$(CONFIG_TRACING) += trace_clock.o obj-$(CONFIG_PPC64) += $(obj64-y) @@ -23,4 +26,9 @@ obj-$(CONFIG_PPC32) += $(obj32-y) # Disable GCOV, KCOV & sanitizers in odd or sensitive code GCOV_PROFILE_ftrace.o := n KCOV_INSTRUMENT_ftrace.o := n +KCSAN_SANITIZE_ftrace.o := n UBSAN_SANITIZE_ftrace.o := n +GCOV_PROFILE_ftrace_64_pg.o := n +KCOV_INSTRUMENT_ftrace_64_pg.o := n +KCSAN_SANITIZE_ftrace_64_pg.o := n +UBSAN_SANITIZE_ftrace_64_pg.o := n diff --git a/arch/powerpc/kernel/trace/ftrace.c b/arch/powerpc/kernel/trace/ftrace.c index 2a893e06e4f1..6dca92d5a6e8 100644 --- a/arch/powerpc/kernel/trace/ftrace.c +++ b/arch/powerpc/kernel/trace/ftrace.c @@ -23,205 +23,85 @@ #include <linux/list.h> #include <asm/cacheflush.h> -#include <asm/code-patching.h> +#include <asm/text-patching.h> #include <asm/ftrace.h> #include <asm/syscall.h> #include <asm/inst.h> +#include <asm/sections.h> -/* - * We generally only have a single long_branch tramp and at most 2 or 3 plt - * tramps generated. But, we don't use the plt tramps currently. We also allot - * 2 tramps after .text and .init.text. So, we only end up with around 3 usable - * tramps in total. Set aside 8 just to be sure. - */ -#define NUM_FTRACE_TRAMPS 8 +#define NUM_FTRACE_TRAMPS 2 static unsigned long ftrace_tramps[NUM_FTRACE_TRAMPS]; -static ppc_inst_t -ftrace_call_replace(unsigned long ip, unsigned long addr, int link) +unsigned long ftrace_call_adjust(unsigned long addr) { - ppc_inst_t op; + if (addr >= (unsigned long)__exittext_begin && addr < (unsigned long)__exittext_end) + return 0; - addr = ppc_function_entry((void *)addr); + if (IS_ENABLED(CONFIG_ARCH_USING_PATCHABLE_FUNCTION_ENTRY) && + !IS_ENABLED(CONFIG_PPC_FTRACE_OUT_OF_LINE)) { + addr += MCOUNT_INSN_SIZE; + if (IS_ENABLED(CONFIG_DYNAMIC_FTRACE_WITH_CALL_OPS)) + addr += MCOUNT_INSN_SIZE; + } - /* if (link) set op to 'bl' else 'b' */ + return addr; +} + +static ppc_inst_t ftrace_create_branch_inst(unsigned long ip, unsigned long addr, int link) +{ + ppc_inst_t op; + + WARN_ON(!is_offset_in_branch_range(addr - ip)); create_branch(&op, (u32 *)ip, addr, link ? BRANCH_SET_LINK : 0); return op; } -static inline int -ftrace_modify_code(unsigned long ip, ppc_inst_t old, ppc_inst_t new) +static inline int ftrace_read_inst(unsigned long ip, ppc_inst_t *op) { - ppc_inst_t replaced; - - /* - * Note: - * We are paranoid about modifying text, as if a bug was to happen, it - * could cause us to read or write to someplace that could cause harm. - * Carefully read and modify the code with probe_kernel_*(), and make - * sure what we read is what we expected it to be before modifying it. - */ - - /* read the text we want to modify */ - if (copy_inst_from_kernel_nofault(&replaced, (void *)ip)) + if (copy_inst_from_kernel_nofault(op, (void *)ip)) { + pr_err("0x%lx: fetching instruction failed\n", ip); return -EFAULT; - - /* Make sure it is what we expect it to be */ - if (!ppc_inst_equal(replaced, old)) { - pr_err("%p: replaced (%s) != old (%s)", - (void *)ip, ppc_inst_as_str(replaced), ppc_inst_as_str(old)); - return -EINVAL; } - /* replace the text with the new text */ - return patch_instruction((u32 *)ip, new); + return 0; } -/* - * Helper functions that are the same for both PPC64 and PPC32. - */ -static int test_24bit_addr(unsigned long ip, unsigned long addr) +static inline int ftrace_validate_inst(unsigned long ip, ppc_inst_t inst) { - addr = ppc_function_entry((void *)addr); - - return is_offset_in_branch_range(addr - ip); -} + ppc_inst_t op; + int ret; -static int is_bl_op(ppc_inst_t op) -{ - return (ppc_inst_val(op) & ~PPC_LI_MASK) == PPC_RAW_BL(0); -} + ret = ftrace_read_inst(ip, &op); + if (!ret && !ppc_inst_equal(op, inst)) { + pr_err("0x%lx: expected (%08lx) != found (%08lx)\n", + ip, ppc_inst_as_ulong(inst), ppc_inst_as_ulong(op)); + ret = -EINVAL; + } -static int is_b_op(ppc_inst_t op) -{ - return (ppc_inst_val(op) & ~PPC_LI_MASK) == PPC_RAW_BRANCH(0); + return ret; } -static unsigned long find_bl_target(unsigned long ip, ppc_inst_t op) +static inline int ftrace_modify_code(unsigned long ip, ppc_inst_t old, ppc_inst_t new) { - int offset; + int ret = ftrace_validate_inst(ip, old); - offset = PPC_LI(ppc_inst_val(op)); - /* make it signed */ - if (offset & 0x02000000) - offset |= 0xfe000000; + if (!ret && !ppc_inst_equal(old, new)) + ret = patch_instruction((u32 *)ip, new); - return ip + (long)offset; + return ret; } -#ifdef CONFIG_MODULES -static int -__ftrace_make_nop(struct module *mod, - struct dyn_ftrace *rec, unsigned long addr) -{ - unsigned long entry, ptr, tramp; - unsigned long ip = rec->ip; - ppc_inst_t op, pop; - - /* read where this goes */ - if (copy_inst_from_kernel_nofault(&op, (void *)ip)) { - pr_err("Fetching opcode failed.\n"); - return -EFAULT; - } - - /* Make sure that that this is still a 24bit jump */ - if (!is_bl_op(op)) { - pr_err("Not expected bl: opcode is %s\n", ppc_inst_as_str(op)); - return -EINVAL; - } - - /* lets find where the pointer goes */ - tramp = find_bl_target(ip, op); - - pr_devel("ip:%lx jumps to %lx", ip, tramp); - - if (module_trampoline_target(mod, tramp, &ptr)) { - pr_err("Failed to get trampoline target\n"); - return -EFAULT; - } - - pr_devel("trampoline target %lx", ptr); - - entry = ppc_global_function_entry((void *)addr); - /* This should match what was called */ - if (ptr != entry) { - pr_err("addr %lx does not match expected %lx\n", ptr, entry); - return -EINVAL; - } - - if (IS_ENABLED(CONFIG_MPROFILE_KERNEL)) { - if (copy_inst_from_kernel_nofault(&op, (void *)(ip - 4))) { - pr_err("Fetching instruction at %lx failed.\n", ip - 4); - return -EFAULT; - } - - /* We expect either a mflr r0, or a std r0, LRSAVE(r1) */ - if (!ppc_inst_equal(op, ppc_inst(PPC_RAW_MFLR(_R0))) && - !ppc_inst_equal(op, ppc_inst(PPC_INST_STD_LR))) { - pr_err("Unexpected instruction %s around bl _mcount\n", - ppc_inst_as_str(op)); - return -EINVAL; - } - } else if (IS_ENABLED(CONFIG_PPC64)) { - /* - * Check what is in the next instruction. We can see ld r2,40(r1), but - * on first pass after boot we will see mflr r0. - */ - if (copy_inst_from_kernel_nofault(&op, (void *)(ip + 4))) { - pr_err("Fetching op failed.\n"); - return -EFAULT; - } - - if (!ppc_inst_equal(op, ppc_inst(PPC_INST_LD_TOC))) { - pr_err("Expected %08lx found %s\n", PPC_INST_LD_TOC, ppc_inst_as_str(op)); - return -EINVAL; - } - } - - /* - * When using -mprofile-kernel or PPC32 there is no load to jump over. - * - * Otherwise our original call site looks like: - * - * bl <tramp> - * ld r2,XX(r1) - * - * Milton Miller pointed out that we can not simply nop the branch. - * If a task was preempted when calling a trace function, the nops - * will remove the way to restore the TOC in r2 and the r2 TOC will - * get corrupted. - * - * Use a b +8 to jump over the load. - */ - if (IS_ENABLED(CONFIG_MPROFILE_KERNEL) || IS_ENABLED(CONFIG_PPC32)) - pop = ppc_inst(PPC_RAW_NOP()); - else - pop = ppc_inst(PPC_RAW_BRANCH(8)); /* b +8 */ - - if (patch_instruction((u32 *)ip, pop)) { - pr_err("Patching NOP failed.\n"); - return -EPERM; - } - - return 0; -} -#else -static int __ftrace_make_nop(struct module *mod, struct dyn_ftrace *rec, unsigned long addr) +static int is_bl_op(ppc_inst_t op) { - return 0; + return (ppc_inst_val(op) & ~PPC_LI_MASK) == PPC_RAW_BL(0); } -#endif /* CONFIG_MODULES */ static unsigned long find_ftrace_tramp(unsigned long ip) { int i; - /* - * We have the compiler generated long_branch tramps at the end - * and we prefer those - */ - for (i = NUM_FTRACE_TRAMPS - 1; i >= 0; i--) + for (i = 0; i < NUM_FTRACE_TRAMPS; i++) if (!ftrace_tramps[i]) continue; else if (is_offset_in_branch_range(ftrace_tramps[i] - ip)) @@ -230,448 +110,418 @@ static unsigned long find_ftrace_tramp(unsigned long ip) return 0; } -static int add_ftrace_tramp(unsigned long tramp) +#ifdef CONFIG_MODULES +static unsigned long ftrace_lookup_module_stub(unsigned long ip, unsigned long addr) { - int i; + struct module *mod = NULL; - for (i = 0; i < NUM_FTRACE_TRAMPS; i++) - if (!ftrace_tramps[i]) { - ftrace_tramps[i] = tramp; - return 0; - } + scoped_guard(rcu) + mod = __module_text_address(ip); + if (!mod) + pr_err("No module loaded at addr=%lx\n", ip); - return -1; + return (addr == (unsigned long)ftrace_caller ? mod->arch.tramp : mod->arch.tramp_regs); } - -/* - * If this is a compiler generated long_branch trampoline (essentially, a - * trampoline that has a branch to _mcount()), we re-write the branch to - * instead go to ftrace_[regs_]caller() and note down the location of this - * trampoline. - */ -static int setup_mcount_compiler_tramp(unsigned long tramp) +#else +static unsigned long ftrace_lookup_module_stub(unsigned long ip, unsigned long addr) { - int i; - ppc_inst_t op; - unsigned long ptr; - - /* Is this a known long jump tramp? */ - for (i = 0; i < NUM_FTRACE_TRAMPS; i++) - if (ftrace_tramps[i] == tramp) - return 0; - - /* New trampoline -- read where this goes */ - if (copy_inst_from_kernel_nofault(&op, (void *)tramp)) { - pr_debug("Fetching opcode failed.\n"); - return -1; - } + return 0; +} +#endif - /* Is this a 24 bit branch? */ - if (!is_b_op(op)) { - pr_debug("Trampoline is not a long branch tramp.\n"); - return -1; - } +static unsigned long ftrace_get_ool_stub(struct dyn_ftrace *rec) +{ +#ifdef CONFIG_PPC_FTRACE_OUT_OF_LINE + return rec->arch.ool_stub; +#else + BUILD_BUG(); +#endif +} - /* lets find where the pointer goes */ - ptr = find_bl_target(tramp, op); +static int ftrace_get_call_inst(struct dyn_ftrace *rec, unsigned long addr, ppc_inst_t *call_inst) +{ + unsigned long ip; + unsigned long stub; - if (ptr != ppc_global_function_entry((void *)_mcount)) { - pr_debug("Trampoline target %p is not _mcount\n", (void *)ptr); - return -1; + if (IS_ENABLED(CONFIG_PPC_FTRACE_OUT_OF_LINE)) + ip = ftrace_get_ool_stub(rec) + MCOUNT_INSN_SIZE; /* second instruction in stub */ + else + ip = rec->ip; + + if (!is_offset_in_branch_range(addr - ip) && addr != FTRACE_ADDR && + addr != FTRACE_REGS_ADDR) { + /* This can only happen with ftrace direct */ + if (!IS_ENABLED(CONFIG_DYNAMIC_FTRACE_WITH_DIRECT_CALLS)) { + pr_err("0x%lx (0x%lx): Unexpected target address 0x%lx\n", + ip, rec->ip, addr); + return -EINVAL; + } + addr = FTRACE_ADDR; } - /* Let's re-write the tramp to go to ftrace_[regs_]caller */ - if (IS_ENABLED(CONFIG_DYNAMIC_FTRACE_WITH_REGS)) - ptr = ppc_global_function_entry((void *)ftrace_regs_caller); + if (is_offset_in_branch_range(addr - ip)) + /* Within range */ + stub = addr; + else if (core_kernel_text(ip)) + /* We would be branching to one of our ftrace stubs */ + stub = find_ftrace_tramp(ip); else - ptr = ppc_global_function_entry((void *)ftrace_caller); - - if (patch_branch((u32 *)tramp, ptr, 0)) { - pr_debug("REL24 out of range!\n"); - return -1; - } + stub = ftrace_lookup_module_stub(ip, addr); - if (add_ftrace_tramp(tramp)) { - pr_debug("No tramp locations left\n"); - return -1; + if (!stub) { + pr_err("0x%lx (0x%lx): No ftrace stubs reachable\n", ip, rec->ip); + return -EINVAL; } + *call_inst = ftrace_create_branch_inst(ip, stub, 1); return 0; } -static int __ftrace_make_nop_kernel(struct dyn_ftrace *rec, unsigned long addr) +static int ftrace_init_ool_stub(struct module *mod, struct dyn_ftrace *rec) { - unsigned long tramp, ip = rec->ip; - ppc_inst_t op; +#ifdef CONFIG_PPC_FTRACE_OUT_OF_LINE + static int ool_stub_text_index, ool_stub_text_end_index, ool_stub_inittext_index; + int ret = 0, ool_stub_count, *ool_stub_index; + ppc_inst_t inst; + /* + * See ftrace_entry.S if changing the below instruction sequence, as we rely on + * decoding the last branch instruction here to recover the correct function ip. + */ + struct ftrace_ool_stub *ool_stub, ool_stub_template = { + .insn = { + PPC_RAW_MFLR(_R0), + PPC_RAW_NOP(), /* bl ftrace_caller */ + PPC_RAW_MTLR(_R0), + PPC_RAW_NOP() /* b rec->ip + 4 */ + } + }; - /* Read where this goes */ - if (copy_inst_from_kernel_nofault(&op, (void *)ip)) { - pr_err("Fetching opcode failed.\n"); - return -EFAULT; - } + WARN_ON(rec->arch.ool_stub); - /* Make sure that that this is still a 24bit jump */ - if (!is_bl_op(op)) { - pr_err("Not expected bl: opcode is %s\n", ppc_inst_as_str(op)); + if (is_kernel_inittext(rec->ip)) { + ool_stub = ftrace_ool_stub_inittext; + ool_stub_index = &ool_stub_inittext_index; + ool_stub_count = ftrace_ool_stub_inittext_count; + } else if (is_kernel_text(rec->ip)) { + /* + * ftrace records are sorted, so we first use up the stub area within .text + * (ftrace_ool_stub_text) before using the area at the end of .text + * (ftrace_ool_stub_text_end), unless the stub is out of range of the record. + */ + if (ool_stub_text_index >= ftrace_ool_stub_text_count || + !is_offset_in_branch_range((long)rec->ip - + (long)&ftrace_ool_stub_text[ool_stub_text_index])) { + ool_stub = ftrace_ool_stub_text_end; + ool_stub_index = &ool_stub_text_end_index; + ool_stub_count = ftrace_ool_stub_text_end_count; + } else { + ool_stub = ftrace_ool_stub_text; + ool_stub_index = &ool_stub_text_index; + ool_stub_count = ftrace_ool_stub_text_count; + } +#ifdef CONFIG_MODULES + } else if (mod) { + ool_stub = mod->arch.ool_stubs; + ool_stub_index = &mod->arch.ool_stub_index; + ool_stub_count = mod->arch.ool_stub_count; +#endif + } else { return -EINVAL; } - /* Let's find where the pointer goes */ - tramp = find_bl_target(ip, op); + ool_stub += (*ool_stub_index)++; - pr_devel("ip:%lx jumps to %lx", ip, tramp); + if (WARN_ON(*ool_stub_index > ool_stub_count)) + return -EINVAL; - if (setup_mcount_compiler_tramp(tramp)) { - /* Are other trampolines reachable? */ - if (!find_ftrace_tramp(ip)) { - pr_err("No ftrace trampolines reachable from %ps\n", - (void *)ip); - return -EINVAL; - } + if (!is_offset_in_branch_range((long)rec->ip - (long)&ool_stub->insn[0]) || + !is_offset_in_branch_range((long)(rec->ip + MCOUNT_INSN_SIZE) - + (long)&ool_stub->insn[3])) { + pr_err("%s: ftrace ool stub out of range (%p -> %p).\n", + __func__, (void *)rec->ip, (void *)&ool_stub->insn[0]); + return -EINVAL; } - if (patch_instruction((u32 *)ip, ppc_inst(PPC_RAW_NOP()))) { - pr_err("Patching NOP failed.\n"); - return -EPERM; - } + rec->arch.ool_stub = (unsigned long)&ool_stub->insn[0]; - return 0; + /* bl ftrace_caller */ + if (!mod) + ret = ftrace_get_call_inst(rec, (unsigned long)ftrace_caller, &inst); +#ifdef CONFIG_MODULES + else + /* + * We can't use ftrace_get_call_inst() since that uses + * __module_text_address(rec->ip) to look up the module. + * But, since the module is not fully formed at this stage, + * the lookup fails. We know the target though, so generate + * the branch inst directly. + */ + inst = ftrace_create_branch_inst(ftrace_get_ool_stub(rec) + MCOUNT_INSN_SIZE, + mod->arch.tramp, 1); +#endif + ool_stub_template.insn[1] = ppc_inst_val(inst); + + /* b rec->ip + 4 */ + if (!ret && create_branch(&inst, &ool_stub->insn[3], rec->ip + MCOUNT_INSN_SIZE, 0)) + return -EINVAL; + ool_stub_template.insn[3] = ppc_inst_val(inst); + + if (!ret) + ret = patch_instructions((u32 *)ool_stub, (u32 *)&ool_stub_template, + sizeof(ool_stub_template), false); + + return ret; +#else /* !CONFIG_PPC_FTRACE_OUT_OF_LINE */ + BUILD_BUG(); +#endif } -int ftrace_make_nop(struct module *mod, - struct dyn_ftrace *rec, unsigned long addr) +#ifdef CONFIG_DYNAMIC_FTRACE_WITH_CALL_OPS +static const struct ftrace_ops *powerpc_rec_get_ops(struct dyn_ftrace *rec) { - unsigned long ip = rec->ip; - ppc_inst_t old, new; + const struct ftrace_ops *ops = NULL; - /* - * If the calling address is more that 24 bits away, - * then we had to use a trampoline to make the call. - * Otherwise just update the call site. - */ - if (test_24bit_addr(ip, addr)) { - /* within range */ - old = ftrace_call_replace(ip, addr, 1); - new = ppc_inst(PPC_RAW_NOP()); - return ftrace_modify_code(ip, old, new); - } else if (core_kernel_text(ip)) { - return __ftrace_make_nop_kernel(rec, addr); - } else if (!IS_ENABLED(CONFIG_MODULES)) { - return -EINVAL; + if (rec->flags & FTRACE_FL_CALL_OPS_EN) { + ops = ftrace_find_unique_ops(rec); + WARN_ON_ONCE(!ops); } - /* - * Out of range jumps are called from modules. - * We should either already have a pointer to the module - * or it has been passed in. - */ - if (!rec->arch.mod) { - if (!mod) { - pr_err("No module loaded addr=%lx\n", addr); - return -EFAULT; - } - rec->arch.mod = mod; - } else if (mod) { - if (mod != rec->arch.mod) { - pr_err("Record mod %p not equal to passed in mod %p\n", - rec->arch.mod, mod); - return -EINVAL; - } - /* nothing to do if mod == rec->arch.mod */ - } else - mod = rec->arch.mod; + if (!ops) + ops = &ftrace_list_ops; - return __ftrace_make_nop(mod, rec, addr); + return ops; } -#ifdef CONFIG_MODULES -/* - * Examine the existing instructions for __ftrace_make_call. - * They should effectively be a NOP, and follow formal constraints, - * depending on the ABI. Return false if they don't. - */ -static bool expected_nop_sequence(void *ip, ppc_inst_t op0, ppc_inst_t op1) +static int ftrace_rec_set_ops(struct dyn_ftrace *rec, const struct ftrace_ops *ops) { - if (IS_ENABLED(CONFIG_PPC64_ELF_ABI_V1)) - return ppc_inst_equal(op0, ppc_inst(PPC_RAW_BRANCH(8))) && - ppc_inst_equal(op1, ppc_inst(PPC_INST_LD_TOC)); + if (IS_ENABLED(CONFIG_PPC_FTRACE_OUT_OF_LINE)) + return patch_ulong((void *)(ftrace_get_ool_stub(rec) - sizeof(unsigned long)), + (unsigned long)ops); else - return ppc_inst_equal(op0, ppc_inst(PPC_RAW_NOP())); + return patch_ulong((void *)(rec->ip - MCOUNT_INSN_SIZE - sizeof(unsigned long)), + (unsigned long)ops); } -static int -__ftrace_make_call(struct dyn_ftrace *rec, unsigned long addr) +static int ftrace_rec_set_nop_ops(struct dyn_ftrace *rec) { - ppc_inst_t op[2]; - void *ip = (void *)rec->ip; - unsigned long entry, ptr, tramp; - struct module *mod = rec->arch.mod; - - /* read where this goes */ - if (copy_inst_from_kernel_nofault(op, ip)) - return -EFAULT; - - if (IS_ENABLED(CONFIG_PPC64_ELF_ABI_V1) && - copy_inst_from_kernel_nofault(op + 1, ip + 4)) - return -EFAULT; - - if (!expected_nop_sequence(ip, op[0], op[1])) { - pr_err("Unexpected call sequence at %p: %s %s\n", - ip, ppc_inst_as_str(op[0]), ppc_inst_as_str(op[1])); - return -EINVAL; - } - - /* If we never set up ftrace trampoline(s), then bail */ - if (!mod->arch.tramp || - (IS_ENABLED(CONFIG_DYNAMIC_FTRACE_WITH_REGS) && !mod->arch.tramp_regs)) { - pr_err("No ftrace trampoline\n"); - return -EINVAL; - } - - if (IS_ENABLED(CONFIG_DYNAMIC_FTRACE_WITH_REGS) && rec->flags & FTRACE_FL_REGS) - tramp = mod->arch.tramp_regs; - else - tramp = mod->arch.tramp; - - if (module_trampoline_target(mod, tramp, &ptr)) { - pr_err("Failed to get trampoline target\n"); - return -EFAULT; - } - - pr_devel("trampoline target %lx", ptr); - - entry = ppc_global_function_entry((void *)addr); - /* This should match what was called */ - if (ptr != entry) { - pr_err("addr %lx does not match expected %lx\n", ptr, entry); - return -EINVAL; - } - - if (patch_branch(ip, tramp, BRANCH_SET_LINK)) { - pr_err("REL24 out of range!\n"); - return -EINVAL; - } + return ftrace_rec_set_ops(rec, &ftrace_nop_ops); +} - return 0; +static int ftrace_rec_update_ops(struct dyn_ftrace *rec) +{ + return ftrace_rec_set_ops(rec, powerpc_rec_get_ops(rec)); } #else -static int __ftrace_make_call(struct dyn_ftrace *rec, unsigned long addr) +static int ftrace_rec_set_nop_ops(struct dyn_ftrace *rec) { return 0; } +static int ftrace_rec_update_ops(struct dyn_ftrace *rec) { return 0; } +#endif + +#ifdef CONFIG_DYNAMIC_FTRACE_WITH_REGS +int ftrace_modify_call(struct dyn_ftrace *rec, unsigned long old_addr, unsigned long addr) { - return 0; + /* This should never be called since we override ftrace_replace_code() */ + WARN_ON(1); + return -EINVAL; } -#endif /* CONFIG_MODULES */ +#endif -static int __ftrace_make_call_kernel(struct dyn_ftrace *rec, unsigned long addr) +int ftrace_make_call(struct dyn_ftrace *rec, unsigned long addr) { - ppc_inst_t op; - void *ip = (void *)rec->ip; - unsigned long tramp, entry, ptr; - - /* Make sure we're being asked to patch branch to a known ftrace addr */ - entry = ppc_global_function_entry((void *)ftrace_caller); - ptr = ppc_global_function_entry((void *)addr); - - if (ptr != entry && IS_ENABLED(CONFIG_DYNAMIC_FTRACE_WITH_REGS)) - entry = ppc_global_function_entry((void *)ftrace_regs_caller); + ppc_inst_t old, new; + unsigned long ip = rec->ip; + int ret = 0; - if (ptr != entry) { - pr_err("Unknown ftrace addr to patch: %ps\n", (void *)ptr); + /* This can only ever be called during module load */ + if (WARN_ON(!IS_ENABLED(CONFIG_MODULES) || core_kernel_text(ip))) return -EINVAL; - } - /* Make sure we have a nop */ - if (copy_inst_from_kernel_nofault(&op, ip)) { - pr_err("Unable to read ftrace location %p\n", ip); - return -EFAULT; + old = ppc_inst(PPC_RAW_NOP()); + if (IS_ENABLED(CONFIG_PPC_FTRACE_OUT_OF_LINE)) { + ip = ftrace_get_ool_stub(rec) + MCOUNT_INSN_SIZE; /* second instruction in stub */ + ret = ftrace_get_call_inst(rec, (unsigned long)ftrace_caller, &old); } - if (!ppc_inst_equal(op, ppc_inst(PPC_RAW_NOP()))) { - pr_err("Unexpected call sequence at %p: %s\n", ip, ppc_inst_as_str(op)); - return -EINVAL; - } + ret |= ftrace_get_call_inst(rec, addr, &new); - tramp = find_ftrace_tramp((unsigned long)ip); - if (!tramp) { - pr_err("No ftrace trampolines reachable from %ps\n", ip); - return -EINVAL; - } + if (!ret) + ret = ftrace_modify_code(ip, old, new); - if (patch_branch(ip, tramp, BRANCH_SET_LINK)) { - pr_err("Error patching branch to ftrace tramp!\n"); - return -EINVAL; - } + ret = ftrace_rec_update_ops(rec); + if (ret) + return ret; - return 0; + if (!ret && IS_ENABLED(CONFIG_PPC_FTRACE_OUT_OF_LINE)) + ret = ftrace_modify_code(rec->ip, ppc_inst(PPC_RAW_NOP()), + ppc_inst(PPC_RAW_BRANCH((long)ftrace_get_ool_stub(rec) - (long)rec->ip))); + + return ret; } -int ftrace_make_call(struct dyn_ftrace *rec, unsigned long addr) +int ftrace_make_nop(struct module *mod, struct dyn_ftrace *rec, unsigned long addr) { - unsigned long ip = rec->ip; - ppc_inst_t old, new; - /* - * If the calling address is more that 24 bits away, - * then we had to use a trampoline to make the call. - * Otherwise just update the call site. + * This should never be called since we override ftrace_replace_code(), + * as well as ftrace_init_nop() */ - if (test_24bit_addr(ip, addr)) { - /* within range */ - old = ppc_inst(PPC_RAW_NOP()); - new = ftrace_call_replace(ip, addr, 1); - return ftrace_modify_code(ip, old, new); - } else if (core_kernel_text(ip)) { - return __ftrace_make_call_kernel(rec, addr); - } else if (!IS_ENABLED(CONFIG_MODULES)) { - /* We should not get here without modules */ - return -EINVAL; - } - - /* - * Out of range jumps are called from modules. - * Being that we are converting from nop, it had better - * already have a module defined. - */ - if (!rec->arch.mod) { - pr_err("No module loaded\n"); - return -EINVAL; - } - - return __ftrace_make_call(rec, addr); + WARN_ON(1); + return -EINVAL; } -#ifdef CONFIG_DYNAMIC_FTRACE_WITH_REGS -#ifdef CONFIG_MODULES -static int -__ftrace_modify_call(struct dyn_ftrace *rec, unsigned long old_addr, - unsigned long addr) +void ftrace_replace_code(int enable) { - ppc_inst_t op; - unsigned long ip = rec->ip; - unsigned long entry, ptr, tramp; - struct module *mod = rec->arch.mod; - - /* If we never set up ftrace trampolines, then bail */ - if (!mod->arch.tramp || !mod->arch.tramp_regs) { - pr_err("No ftrace trampoline\n"); - return -EINVAL; - } + ppc_inst_t old, new, call_inst, new_call_inst; + ppc_inst_t nop_inst = ppc_inst(PPC_RAW_NOP()); + unsigned long ip, new_addr, addr; + struct ftrace_rec_iter *iter; + struct dyn_ftrace *rec; + int ret = 0, update; - /* read where this goes */ - if (copy_inst_from_kernel_nofault(&op, (void *)ip)) { - pr_err("Fetching opcode failed.\n"); - return -EFAULT; - } - - /* Make sure that that this is still a 24bit jump */ - if (!is_bl_op(op)) { - pr_err("Not expected bl: opcode is %s\n", ppc_inst_as_str(op)); - return -EINVAL; - } + for_ftrace_rec_iter(iter) { + rec = ftrace_rec_iter_record(iter); + ip = rec->ip; - /* lets find where the pointer goes */ - tramp = find_bl_target(ip, op); - entry = ppc_global_function_entry((void *)old_addr); + if (rec->flags & FTRACE_FL_DISABLED && !(rec->flags & FTRACE_FL_ENABLED)) + continue; - pr_devel("ip:%lx jumps to %lx", ip, tramp); + addr = ftrace_get_addr_curr(rec); + new_addr = ftrace_get_addr_new(rec); + update = ftrace_update_record(rec, enable); - if (tramp != entry) { - /* old_addr is not within range, so we must have used a trampoline */ - if (module_trampoline_target(mod, tramp, &ptr)) { - pr_err("Failed to get trampoline target\n"); - return -EFAULT; + if (IS_ENABLED(CONFIG_PPC_FTRACE_OUT_OF_LINE) && update != FTRACE_UPDATE_IGNORE) { + ip = ftrace_get_ool_stub(rec) + MCOUNT_INSN_SIZE; + ret = ftrace_get_call_inst(rec, (unsigned long)ftrace_caller, &nop_inst); + if (ret) + goto out; } - pr_devel("trampoline target %lx", ptr); - - /* This should match what was called */ - if (ptr != entry) { - pr_err("addr %lx does not match expected %lx\n", ptr, entry); - return -EINVAL; + switch (update) { + case FTRACE_UPDATE_IGNORE: + default: + continue; + case FTRACE_UPDATE_MODIFY_CALL: + ret = ftrace_get_call_inst(rec, new_addr, &new_call_inst); + ret |= ftrace_get_call_inst(rec, addr, &call_inst); + ret |= ftrace_rec_update_ops(rec); + old = call_inst; + new = new_call_inst; + break; + case FTRACE_UPDATE_MAKE_NOP: + ret = ftrace_get_call_inst(rec, addr, &call_inst); + ret |= ftrace_rec_set_nop_ops(rec); + old = call_inst; + new = nop_inst; + break; + case FTRACE_UPDATE_MAKE_CALL: + ret = ftrace_get_call_inst(rec, new_addr, &call_inst); + ret |= ftrace_rec_update_ops(rec); + old = nop_inst; + new = call_inst; + break; } - } - /* The new target may be within range */ - if (test_24bit_addr(ip, addr)) { - /* within range */ - if (patch_branch((u32 *)ip, addr, BRANCH_SET_LINK)) { - pr_err("REL24 out of range!\n"); - return -EINVAL; - } + if (!ret) + ret = ftrace_modify_code(ip, old, new); - return 0; - } + if (!ret && IS_ENABLED(CONFIG_PPC_FTRACE_OUT_OF_LINE) && + (update == FTRACE_UPDATE_MAKE_NOP || update == FTRACE_UPDATE_MAKE_CALL)) { + /* Update the actual ftrace location */ + call_inst = ppc_inst(PPC_RAW_BRANCH((long)ftrace_get_ool_stub(rec) - + (long)rec->ip)); + nop_inst = ppc_inst(PPC_RAW_NOP()); + ip = rec->ip; - if (rec->flags & FTRACE_FL_REGS) - tramp = mod->arch.tramp_regs; - else - tramp = mod->arch.tramp; + if (update == FTRACE_UPDATE_MAKE_NOP) + ret = ftrace_modify_code(ip, call_inst, nop_inst); + else + ret = ftrace_modify_code(ip, nop_inst, call_inst); - if (module_trampoline_target(mod, tramp, &ptr)) { - pr_err("Failed to get trampoline target\n"); - return -EFAULT; - } - - pr_devel("trampoline target %lx", ptr); + if (ret) + goto out; + } - entry = ppc_global_function_entry((void *)addr); - /* This should match what was called */ - if (ptr != entry) { - pr_err("addr %lx does not match expected %lx\n", ptr, entry); - return -EINVAL; + if (ret) + goto out; } - if (patch_branch((u32 *)ip, tramp, BRANCH_SET_LINK)) { - pr_err("REL24 out of range!\n"); - return -EINVAL; - } - - return 0; -} -#else -static int __ftrace_modify_call(struct dyn_ftrace *rec, unsigned long old_addr, unsigned long addr) -{ - return 0; +out: + if (ret) + ftrace_bug(ret, rec); + return; } -#endif -int ftrace_modify_call(struct dyn_ftrace *rec, unsigned long old_addr, - unsigned long addr) +int ftrace_init_nop(struct module *mod, struct dyn_ftrace *rec) { - unsigned long ip = rec->ip; + unsigned long addr, ip = rec->ip; ppc_inst_t old, new; - - /* - * If the calling address is more that 24 bits away, - * then we had to use a trampoline to make the call. - * Otherwise just update the call site. - */ - if (test_24bit_addr(ip, addr) && test_24bit_addr(ip, old_addr)) { - /* within range */ - old = ftrace_call_replace(ip, old_addr, 1); - new = ftrace_call_replace(ip, addr, 1); - return ftrace_modify_code(ip, old, new); - } else if (core_kernel_text(ip)) { - /* - * We always patch out of range locations to go to the regs - * variant, so there is nothing to do here - */ - return 0; - } else if (!IS_ENABLED(CONFIG_MODULES)) { - /* We should not get here without modules */ + int ret = 0; + + /* Verify instructions surrounding the ftrace location */ + if (IS_ENABLED(CONFIG_ARCH_USING_PATCHABLE_FUNCTION_ENTRY)) { + /* Expect nops */ + if (!IS_ENABLED(CONFIG_PPC_FTRACE_OUT_OF_LINE)) + ret = ftrace_validate_inst(ip - 4, ppc_inst(PPC_RAW_NOP())); + if (!ret) + ret = ftrace_validate_inst(ip, ppc_inst(PPC_RAW_NOP())); + } else if (IS_ENABLED(CONFIG_PPC32)) { + /* Expected sequence: 'mflr r0', 'stw r0,4(r1)', 'bl _mcount' */ + ret = ftrace_validate_inst(ip - 8, ppc_inst(PPC_RAW_MFLR(_R0))); + if (ret) + return ret; + ret = ftrace_modify_code(ip - 4, ppc_inst(PPC_RAW_STW(_R0, _R1, 4)), + ppc_inst(PPC_RAW_NOP())); + } else if (IS_ENABLED(CONFIG_MPROFILE_KERNEL)) { + /* Expected sequence: 'mflr r0', ['std r0,16(r1)'], 'bl _mcount' */ + ret = ftrace_read_inst(ip - 4, &old); + if (!ret && !ppc_inst_equal(old, ppc_inst(PPC_RAW_MFLR(_R0)))) { + /* Gcc v5.x emit the additional 'std' instruction, gcc v6.x don't */ + ret = ftrace_validate_inst(ip - 8, ppc_inst(PPC_RAW_MFLR(_R0))); + if (ret) + return ret; + ret = ftrace_modify_code(ip - 4, ppc_inst(PPC_RAW_STD(_R0, _R1, 16)), + ppc_inst(PPC_RAW_NOP())); + } + } else { return -EINVAL; } - /* - * Out of range jumps are called from modules. - */ - if (!rec->arch.mod) { - pr_err("No module loaded\n"); + if (ret) + return ret; + + /* Set up out-of-line stub */ + if (IS_ENABLED(CONFIG_PPC_FTRACE_OUT_OF_LINE)) + return ftrace_init_ool_stub(mod, rec); + + /* Nop-out the ftrace location */ + new = ppc_inst(PPC_RAW_NOP()); + addr = MCOUNT_ADDR; + if (IS_ENABLED(CONFIG_ARCH_USING_PATCHABLE_FUNCTION_ENTRY)) { + /* we instead patch-in the 'mflr r0' */ + old = ppc_inst(PPC_RAW_NOP()); + new = ppc_inst(PPC_RAW_MFLR(_R0)); + ret = ftrace_modify_code(ip - 4, old, new); + } else if (is_offset_in_branch_range(addr - ip)) { + /* Within range */ + old = ftrace_create_branch_inst(ip, addr, 1); + ret = ftrace_modify_code(ip, old, new); + } else if (core_kernel_text(ip) || (IS_ENABLED(CONFIG_MODULES) && mod)) { + /* + * We would be branching to a linker-generated stub, or to the module _mcount + * stub. Let's just confirm we have a 'bl' here. + */ + ret = ftrace_read_inst(ip, &old); + if (ret) + return ret; + if (!is_bl_op(old)) { + pr_err("0x%lx: expected (bl) != found (%08lx)\n", ip, ppc_inst_as_ulong(old)); + return -EINVAL; + } + ret = patch_instruction((u32 *)ip, new); + } else { return -EINVAL; } - return __ftrace_modify_call(rec, old_addr, addr); + return ret; } -#endif int ftrace_update_ftrace_func(ftrace_func_t func) { @@ -679,15 +529,22 @@ int ftrace_update_ftrace_func(ftrace_func_t func) ppc_inst_t old, new; int ret; + /* + * When using CALL_OPS, the function to call is associated with the + * call site, and we don't have a global function pointer to update. + */ + if (IS_ENABLED(CONFIG_DYNAMIC_FTRACE_WITH_CALL_OPS)) + return 0; + old = ppc_inst_read((u32 *)&ftrace_call); - new = ftrace_call_replace(ip, (unsigned long)func, 1); + new = ftrace_create_branch_inst(ip, ppc_function_entry(func), 1); ret = ftrace_modify_code(ip, old, new); /* Also update the regs callback function */ if (IS_ENABLED(CONFIG_DYNAMIC_FTRACE_WITH_REGS) && !ret) { ip = (unsigned long)(&ftrace_regs_call); old = ppc_inst_read((u32 *)&ftrace_regs_call); - new = ftrace_call_replace(ip, (unsigned long)func, 1); + new = ftrace_create_branch_inst(ip, ppc_function_entry(func), 1); ret = ftrace_modify_code(ip, old, new); } @@ -703,11 +560,6 @@ void arch_ftrace_update_code(int command) ftrace_modify_all_code(command); } -#ifdef CONFIG_PPC64 -#define PACATOC offsetof(struct paca_struct, kernel_toc) - -extern unsigned int ftrace_tramp_text[], ftrace_tramp_init[]; - void ftrace_free_init_tramp(void) { int i; @@ -719,84 +571,91 @@ void ftrace_free_init_tramp(void) } } -int __init ftrace_dyn_arch_init(void) +static void __init add_ftrace_tramp(unsigned long tramp) { int i; + + for (i = 0; i < NUM_FTRACE_TRAMPS; i++) + if (!ftrace_tramps[i]) { + ftrace_tramps[i] = tramp; + return; + } +} + +int __init ftrace_dyn_arch_init(void) +{ unsigned int *tramp[] = { ftrace_tramp_text, ftrace_tramp_init }; + unsigned long addr = FTRACE_REGS_ADDR; + long reladdr; + int i; u32 stub_insns[] = { - PPC_RAW_LD(_R12, _R13, PACATOC), +#ifdef CONFIG_PPC_KERNEL_PCREL + /* pla r12,addr */ + PPC_PREFIX_MLS | __PPC_PRFX_R(1), + PPC_INST_PADDI | ___PPC_RT(_R12), + PPC_RAW_MTCTR(_R12), + PPC_RAW_BCTR() +#elif defined(CONFIG_PPC64) + PPC_RAW_LD(_R12, _R13, offsetof(struct paca_struct, kernel_toc)), PPC_RAW_ADDIS(_R12, _R12, 0), PPC_RAW_ADDI(_R12, _R12, 0), PPC_RAW_MTCTR(_R12), PPC_RAW_BCTR() +#else + PPC_RAW_LIS(_R12, 0), + PPC_RAW_ADDI(_R12, _R12, 0), + PPC_RAW_MTCTR(_R12), + PPC_RAW_BCTR() +#endif }; - unsigned long addr; - long reladdr; - if (IS_ENABLED(CONFIG_DYNAMIC_FTRACE_WITH_REGS)) - addr = ppc_global_function_entry((void *)ftrace_regs_caller); - else - addr = ppc_global_function_entry((void *)ftrace_caller); + if (IS_ENABLED(CONFIG_PPC_KERNEL_PCREL)) { + for (i = 0; i < 2; i++) { + reladdr = addr - (unsigned long)tramp[i]; - reladdr = addr - kernel_toc_addr(); + if (reladdr >= (long)SZ_8G || reladdr < -(long)SZ_8G) { + pr_err("Address of %ps out of range of pcrel address.\n", + (void *)addr); + return -1; + } - if (reladdr >= SZ_2G || reladdr < -(long)SZ_2G) { - pr_err("Address of %ps out of range of kernel_toc.\n", + memcpy(tramp[i], stub_insns, sizeof(stub_insns)); + tramp[i][0] |= IMM_H18(reladdr); + tramp[i][1] |= IMM_L(reladdr); + add_ftrace_tramp((unsigned long)tramp[i]); + } + } else if (IS_ENABLED(CONFIG_PPC64)) { + reladdr = addr - kernel_toc_addr(); + + if (reladdr >= (long)SZ_2G || reladdr < -(long long)SZ_2G) { + pr_err("Address of %ps out of range of kernel_toc.\n", (void *)addr); - return -1; - } + return -1; + } - for (i = 0; i < 2; i++) { - memcpy(tramp[i], stub_insns, sizeof(stub_insns)); - tramp[i][1] |= PPC_HA(reladdr); - tramp[i][2] |= PPC_LO(reladdr); - add_ftrace_tramp((unsigned long)tramp[i]); + for (i = 0; i < 2; i++) { + memcpy(tramp[i], stub_insns, sizeof(stub_insns)); + tramp[i][1] |= PPC_HA(reladdr); + tramp[i][2] |= PPC_LO(reladdr); + add_ftrace_tramp((unsigned long)tramp[i]); + } + } else { + for (i = 0; i < 2; i++) { + memcpy(tramp[i], stub_insns, sizeof(stub_insns)); + tramp[i][0] |= PPC_HA(addr); + tramp[i][1] |= PPC_LO(addr); + add_ftrace_tramp((unsigned long)tramp[i]); + } } return 0; } -#endif #ifdef CONFIG_FUNCTION_GRAPH_TRACER - -extern void ftrace_graph_call(void); -extern void ftrace_graph_stub(void); - -static int ftrace_modify_ftrace_graph_caller(bool enable) -{ - unsigned long ip = (unsigned long)(&ftrace_graph_call); - unsigned long addr = (unsigned long)(&ftrace_graph_caller); - unsigned long stub = (unsigned long)(&ftrace_graph_stub); - ppc_inst_t old, new; - - if (IS_ENABLED(CONFIG_DYNAMIC_FTRACE_WITH_ARGS)) - return 0; - - old = ftrace_call_replace(ip, enable ? stub : addr, 0); - new = ftrace_call_replace(ip, enable ? addr : stub, 0); - - return ftrace_modify_code(ip, old, new); -} - -int ftrace_enable_ftrace_graph_caller(void) -{ - return ftrace_modify_ftrace_graph_caller(true); -} - -int ftrace_disable_ftrace_graph_caller(void) -{ - return ftrace_modify_ftrace_graph_caller(false); -} - -/* - * Hook the return address and push it in the stack of return addrs - * in current thread info. Return the address we want to divert to. - */ -static unsigned long -__prepare_ftrace_return(unsigned long parent, unsigned long ip, unsigned long sp) +void ftrace_graph_func(unsigned long ip, unsigned long parent_ip, + struct ftrace_ops *op, struct ftrace_regs *fregs) { - unsigned long return_hooker; - int bit; + unsigned long sp = arch_ftrace_regs(fregs)->regs.gpr[1]; if (unlikely(ftrace_graph_is_dead())) goto out; @@ -804,41 +663,10 @@ __prepare_ftrace_return(unsigned long parent, unsigned long ip, unsigned long sp if (unlikely(atomic_read(¤t->tracing_graph_pause))) goto out; - bit = ftrace_test_recursion_trylock(ip, parent); - if (bit < 0) - goto out; + if (!function_graph_enter_regs(parent_ip, ip, 0, (unsigned long *)sp, fregs)) + parent_ip = ppc_function_entry(return_to_handler); - return_hooker = ppc_function_entry(return_to_handler); - - if (!function_graph_enter(parent, ip, 0, (unsigned long *)sp)) - parent = return_hooker; - - ftrace_test_recursion_unlock(bit); out: - return parent; + arch_ftrace_regs(fregs)->regs.link = parent_ip; } - -#ifdef CONFIG_DYNAMIC_FTRACE_WITH_ARGS -void ftrace_graph_func(unsigned long ip, unsigned long parent_ip, - struct ftrace_ops *op, struct ftrace_regs *fregs) -{ - fregs->regs.link = __prepare_ftrace_return(parent_ip, ip, fregs->regs.gpr[1]); -} -#else -unsigned long prepare_ftrace_return(unsigned long parent, unsigned long ip, - unsigned long sp) -{ - return __prepare_ftrace_return(parent, ip, sp); -} -#endif #endif /* CONFIG_FUNCTION_GRAPH_TRACER */ - -#ifdef CONFIG_PPC64_ELF_ABI_V1 -char *arch_ftrace_match_adjust(char *str, const char *search) -{ - if (str[0] == '.' && search[0] != '.') - return str + 1; - else - return str; -} -#endif /* CONFIG_PPC64_ELF_ABI_V1 */ diff --git a/arch/powerpc/kernel/trace/ftrace_64_pg.c b/arch/powerpc/kernel/trace/ftrace_64_pg.c new file mode 100644 index 000000000000..5c6e545d1708 --- /dev/null +++ b/arch/powerpc/kernel/trace/ftrace_64_pg.c @@ -0,0 +1,832 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Code for replacing ftrace calls with jumps. + * + * Copyright (C) 2007-2008 Steven Rostedt <srostedt@redhat.com> + * + * Thanks goes out to P.A. Semi, Inc for supplying me with a PPC64 box. + * + * Added function graph tracer code, taken from x86 that was written + * by Frederic Weisbecker, and ported to PPC by Steven Rostedt. + * + */ + +#define pr_fmt(fmt) "ftrace-powerpc: " fmt + +#include <linux/spinlock.h> +#include <linux/hardirq.h> +#include <linux/uaccess.h> +#include <linux/module.h> +#include <linux/ftrace.h> +#include <linux/percpu.h> +#include <linux/init.h> +#include <linux/list.h> + +#include <asm/cacheflush.h> +#include <asm/text-patching.h> +#include <asm/ftrace.h> +#include <asm/syscall.h> +#include <asm/inst.h> + +/* + * We generally only have a single long_branch tramp and at most 2 or 3 plt + * tramps generated. But, we don't use the plt tramps currently. We also allot + * 2 tramps after .text and .init.text. So, we only end up with around 3 usable + * tramps in total. Set aside 8 just to be sure. + */ +#define NUM_FTRACE_TRAMPS 8 +static unsigned long ftrace_tramps[NUM_FTRACE_TRAMPS]; + +unsigned long ftrace_call_adjust(unsigned long addr) +{ + return addr; +} + +static ppc_inst_t +ftrace_call_replace(unsigned long ip, unsigned long addr, int link) +{ + ppc_inst_t op; + + addr = ppc_function_entry((void *)addr); + + /* if (link) set op to 'bl' else 'b' */ + create_branch(&op, (u32 *)ip, addr, link ? BRANCH_SET_LINK : 0); + + return op; +} + +static inline int +ftrace_modify_code(unsigned long ip, ppc_inst_t old, ppc_inst_t new) +{ + ppc_inst_t replaced; + + /* + * Note: + * We are paranoid about modifying text, as if a bug was to happen, it + * could cause us to read or write to someplace that could cause harm. + * Carefully read and modify the code with probe_kernel_*(), and make + * sure what we read is what we expected it to be before modifying it. + */ + + /* read the text we want to modify */ + if (copy_inst_from_kernel_nofault(&replaced, (void *)ip)) + return -EFAULT; + + /* Make sure it is what we expect it to be */ + if (!ppc_inst_equal(replaced, old)) { + pr_err("%p: replaced (%08lx) != old (%08lx)", (void *)ip, + ppc_inst_as_ulong(replaced), ppc_inst_as_ulong(old)); + return -EINVAL; + } + + /* replace the text with the new text */ + return patch_instruction((u32 *)ip, new); +} + +/* + * Helper functions that are the same for both PPC64 and PPC32. + */ +static int test_24bit_addr(unsigned long ip, unsigned long addr) +{ + addr = ppc_function_entry((void *)addr); + + return is_offset_in_branch_range(addr - ip); +} + +static int is_bl_op(ppc_inst_t op) +{ + return (ppc_inst_val(op) & ~PPC_LI_MASK) == PPC_RAW_BL(0); +} + +static int is_b_op(ppc_inst_t op) +{ + return (ppc_inst_val(op) & ~PPC_LI_MASK) == PPC_RAW_BRANCH(0); +} + +static unsigned long find_bl_target(unsigned long ip, ppc_inst_t op) +{ + int offset; + + offset = PPC_LI(ppc_inst_val(op)); + /* make it signed */ + if (offset & 0x02000000) + offset |= 0xfe000000; + + return ip + (long)offset; +} + +#ifdef CONFIG_MODULES +static struct module *ftrace_lookup_module(struct dyn_ftrace *rec) +{ + struct module *mod; + + scoped_guard(rcu) + mod = __module_text_address(rec->ip); + if (!mod) + pr_err("No module loaded at addr=%lx\n", rec->ip); + + return mod; +} + +static int +__ftrace_make_nop(struct module *mod, + struct dyn_ftrace *rec, unsigned long addr) +{ + unsigned long entry, ptr, tramp; + unsigned long ip = rec->ip; + ppc_inst_t op, pop; + + if (!mod) { + mod = ftrace_lookup_module(rec); + if (!mod) + return -EINVAL; + } + + /* read where this goes */ + if (copy_inst_from_kernel_nofault(&op, (void *)ip)) { + pr_err("Fetching opcode failed.\n"); + return -EFAULT; + } + + /* Make sure that this is still a 24bit jump */ + if (!is_bl_op(op)) { + pr_err("Not expected bl: opcode is %08lx\n", ppc_inst_as_ulong(op)); + return -EINVAL; + } + + /* lets find where the pointer goes */ + tramp = find_bl_target(ip, op); + + pr_devel("ip:%lx jumps to %lx", ip, tramp); + + if (module_trampoline_target(mod, tramp, &ptr)) { + pr_err("Failed to get trampoline target\n"); + return -EFAULT; + } + + pr_devel("trampoline target %lx", ptr); + + entry = ppc_global_function_entry((void *)addr); + /* This should match what was called */ + if (ptr != entry) { + pr_err("addr %lx does not match expected %lx\n", ptr, entry); + return -EINVAL; + } + + if (IS_ENABLED(CONFIG_MPROFILE_KERNEL)) { + if (copy_inst_from_kernel_nofault(&op, (void *)(ip - 4))) { + pr_err("Fetching instruction at %lx failed.\n", ip - 4); + return -EFAULT; + } + + /* We expect either a mflr r0, or a std r0, LRSAVE(r1) */ + if (!ppc_inst_equal(op, ppc_inst(PPC_RAW_MFLR(_R0))) && + !ppc_inst_equal(op, ppc_inst(PPC_INST_STD_LR))) { + pr_err("Unexpected instruction %08lx around bl _mcount\n", + ppc_inst_as_ulong(op)); + return -EINVAL; + } + } else if (IS_ENABLED(CONFIG_PPC64)) { + /* + * Check what is in the next instruction. We can see ld r2,40(r1), but + * on first pass after boot we will see mflr r0. + */ + if (copy_inst_from_kernel_nofault(&op, (void *)(ip + 4))) { + pr_err("Fetching op failed.\n"); + return -EFAULT; + } + + if (!ppc_inst_equal(op, ppc_inst(PPC_INST_LD_TOC))) { + pr_err("Expected %08lx found %08lx\n", PPC_INST_LD_TOC, + ppc_inst_as_ulong(op)); + return -EINVAL; + } + } + + /* + * When using -mprofile-kernel or PPC32 there is no load to jump over. + * + * Otherwise our original call site looks like: + * + * bl <tramp> + * ld r2,XX(r1) + * + * Milton Miller pointed out that we can not simply nop the branch. + * If a task was preempted when calling a trace function, the nops + * will remove the way to restore the TOC in r2 and the r2 TOC will + * get corrupted. + * + * Use a b +8 to jump over the load. + */ + if (IS_ENABLED(CONFIG_MPROFILE_KERNEL) || IS_ENABLED(CONFIG_PPC32)) + pop = ppc_inst(PPC_RAW_NOP()); + else + pop = ppc_inst(PPC_RAW_BRANCH(8)); /* b +8 */ + + if (patch_instruction((u32 *)ip, pop)) { + pr_err("Patching NOP failed.\n"); + return -EPERM; + } + + return 0; +} +#else +static int __ftrace_make_nop(struct module *mod, struct dyn_ftrace *rec, unsigned long addr) +{ + return 0; +} +#endif /* CONFIG_MODULES */ + +static unsigned long find_ftrace_tramp(unsigned long ip) +{ + int i; + + /* + * We have the compiler generated long_branch tramps at the end + * and we prefer those + */ + for (i = NUM_FTRACE_TRAMPS - 1; i >= 0; i--) + if (!ftrace_tramps[i]) + continue; + else if (is_offset_in_branch_range(ftrace_tramps[i] - ip)) + return ftrace_tramps[i]; + + return 0; +} + +static int add_ftrace_tramp(unsigned long tramp) +{ + int i; + + for (i = 0; i < NUM_FTRACE_TRAMPS; i++) + if (!ftrace_tramps[i]) { + ftrace_tramps[i] = tramp; + return 0; + } + + return -1; +} + +/* + * If this is a compiler generated long_branch trampoline (essentially, a + * trampoline that has a branch to _mcount()), we re-write the branch to + * instead go to ftrace_[regs_]caller() and note down the location of this + * trampoline. + */ +static int setup_mcount_compiler_tramp(unsigned long tramp) +{ + int i; + ppc_inst_t op; + unsigned long ptr; + + /* Is this a known long jump tramp? */ + for (i = 0; i < NUM_FTRACE_TRAMPS; i++) + if (ftrace_tramps[i] == tramp) + return 0; + + /* New trampoline -- read where this goes */ + if (copy_inst_from_kernel_nofault(&op, (void *)tramp)) { + pr_debug("Fetching opcode failed.\n"); + return -1; + } + + /* Is this a 24 bit branch? */ + if (!is_b_op(op)) { + pr_debug("Trampoline is not a long branch tramp.\n"); + return -1; + } + + /* lets find where the pointer goes */ + ptr = find_bl_target(tramp, op); + + if (ptr != ppc_global_function_entry((void *)_mcount)) { + pr_debug("Trampoline target %p is not _mcount\n", (void *)ptr); + return -1; + } + + /* Let's re-write the tramp to go to ftrace_[regs_]caller */ + if (IS_ENABLED(CONFIG_DYNAMIC_FTRACE_WITH_REGS)) + ptr = ppc_global_function_entry((void *)ftrace_regs_caller); + else + ptr = ppc_global_function_entry((void *)ftrace_caller); + + if (patch_branch((u32 *)tramp, ptr, 0)) { + pr_debug("REL24 out of range!\n"); + return -1; + } + + if (add_ftrace_tramp(tramp)) { + pr_debug("No tramp locations left\n"); + return -1; + } + + return 0; +} + +static int __ftrace_make_nop_kernel(struct dyn_ftrace *rec, unsigned long addr) +{ + unsigned long tramp, ip = rec->ip; + ppc_inst_t op; + + /* Read where this goes */ + if (copy_inst_from_kernel_nofault(&op, (void *)ip)) { + pr_err("Fetching opcode failed.\n"); + return -EFAULT; + } + + /* Make sure that this is still a 24bit jump */ + if (!is_bl_op(op)) { + pr_err("Not expected bl: opcode is %08lx\n", ppc_inst_as_ulong(op)); + return -EINVAL; + } + + /* Let's find where the pointer goes */ + tramp = find_bl_target(ip, op); + + pr_devel("ip:%lx jumps to %lx", ip, tramp); + + if (setup_mcount_compiler_tramp(tramp)) { + /* Are other trampolines reachable? */ + if (!find_ftrace_tramp(ip)) { + pr_err("No ftrace trampolines reachable from %ps\n", + (void *)ip); + return -EINVAL; + } + } + + if (patch_instruction((u32 *)ip, ppc_inst(PPC_RAW_NOP()))) { + pr_err("Patching NOP failed.\n"); + return -EPERM; + } + + return 0; +} + +int ftrace_make_nop(struct module *mod, + struct dyn_ftrace *rec, unsigned long addr) +{ + unsigned long ip = rec->ip; + ppc_inst_t old, new; + + /* + * If the calling address is more that 24 bits away, + * then we had to use a trampoline to make the call. + * Otherwise just update the call site. + */ + if (test_24bit_addr(ip, addr)) { + /* within range */ + old = ftrace_call_replace(ip, addr, 1); + new = ppc_inst(PPC_RAW_NOP()); + return ftrace_modify_code(ip, old, new); + } else if (core_kernel_text(ip)) { + return __ftrace_make_nop_kernel(rec, addr); + } else if (!IS_ENABLED(CONFIG_MODULES)) { + return -EINVAL; + } + + return __ftrace_make_nop(mod, rec, addr); +} + +#ifdef CONFIG_MODULES +/* + * Examine the existing instructions for __ftrace_make_call. + * They should effectively be a NOP, and follow formal constraints, + * depending on the ABI. Return false if they don't. + */ +static bool expected_nop_sequence(void *ip, ppc_inst_t op0, ppc_inst_t op1) +{ + if (IS_ENABLED(CONFIG_DYNAMIC_FTRACE_WITH_REGS)) + return ppc_inst_equal(op0, ppc_inst(PPC_RAW_NOP())); + else + return ppc_inst_equal(op0, ppc_inst(PPC_RAW_BRANCH(8))) && + ppc_inst_equal(op1, ppc_inst(PPC_INST_LD_TOC)); +} + +static int +__ftrace_make_call(struct dyn_ftrace *rec, unsigned long addr) +{ + ppc_inst_t op[2]; + void *ip = (void *)rec->ip; + unsigned long entry, ptr, tramp; + struct module *mod = ftrace_lookup_module(rec); + + if (!mod) + return -EINVAL; + + /* read where this goes */ + if (copy_inst_from_kernel_nofault(op, ip)) + return -EFAULT; + + if (!IS_ENABLED(CONFIG_DYNAMIC_FTRACE_WITH_REGS) && + copy_inst_from_kernel_nofault(op + 1, ip + 4)) + return -EFAULT; + + if (!expected_nop_sequence(ip, op[0], op[1])) { + pr_err("Unexpected call sequence at %p: %08lx %08lx\n", ip, + ppc_inst_as_ulong(op[0]), ppc_inst_as_ulong(op[1])); + return -EINVAL; + } + + /* If we never set up ftrace trampoline(s), then bail */ + if (!mod->arch.tramp || + (IS_ENABLED(CONFIG_DYNAMIC_FTRACE_WITH_REGS) && !mod->arch.tramp_regs)) { + pr_err("No ftrace trampoline\n"); + return -EINVAL; + } + + if (IS_ENABLED(CONFIG_DYNAMIC_FTRACE_WITH_REGS) && rec->flags & FTRACE_FL_REGS) + tramp = mod->arch.tramp_regs; + else + tramp = mod->arch.tramp; + + if (module_trampoline_target(mod, tramp, &ptr)) { + pr_err("Failed to get trampoline target\n"); + return -EFAULT; + } + + pr_devel("trampoline target %lx", ptr); + + entry = ppc_global_function_entry((void *)addr); + /* This should match what was called */ + if (ptr != entry) { + pr_err("addr %lx does not match expected %lx\n", ptr, entry); + return -EINVAL; + } + + if (patch_branch(ip, tramp, BRANCH_SET_LINK)) { + pr_err("REL24 out of range!\n"); + return -EINVAL; + } + + return 0; +} +#else +static int __ftrace_make_call(struct dyn_ftrace *rec, unsigned long addr) +{ + return 0; +} +#endif /* CONFIG_MODULES */ + +static int __ftrace_make_call_kernel(struct dyn_ftrace *rec, unsigned long addr) +{ + ppc_inst_t op; + void *ip = (void *)rec->ip; + unsigned long tramp, entry, ptr; + + /* Make sure we're being asked to patch branch to a known ftrace addr */ + entry = ppc_global_function_entry((void *)ftrace_caller); + ptr = ppc_global_function_entry((void *)addr); + + if (ptr != entry && IS_ENABLED(CONFIG_DYNAMIC_FTRACE_WITH_REGS)) + entry = ppc_global_function_entry((void *)ftrace_regs_caller); + + if (ptr != entry) { + pr_err("Unknown ftrace addr to patch: %ps\n", (void *)ptr); + return -EINVAL; + } + + /* Make sure we have a nop */ + if (copy_inst_from_kernel_nofault(&op, ip)) { + pr_err("Unable to read ftrace location %p\n", ip); + return -EFAULT; + } + + if (!ppc_inst_equal(op, ppc_inst(PPC_RAW_NOP()))) { + pr_err("Unexpected call sequence at %p: %08lx\n", + ip, ppc_inst_as_ulong(op)); + return -EINVAL; + } + + tramp = find_ftrace_tramp((unsigned long)ip); + if (!tramp) { + pr_err("No ftrace trampolines reachable from %ps\n", ip); + return -EINVAL; + } + + if (patch_branch(ip, tramp, BRANCH_SET_LINK)) { + pr_err("Error patching branch to ftrace tramp!\n"); + return -EINVAL; + } + + return 0; +} + +int ftrace_make_call(struct dyn_ftrace *rec, unsigned long addr) +{ + unsigned long ip = rec->ip; + ppc_inst_t old, new; + + /* + * If the calling address is more that 24 bits away, + * then we had to use a trampoline to make the call. + * Otherwise just update the call site. + */ + if (test_24bit_addr(ip, addr)) { + /* within range */ + old = ppc_inst(PPC_RAW_NOP()); + new = ftrace_call_replace(ip, addr, 1); + return ftrace_modify_code(ip, old, new); + } else if (core_kernel_text(ip)) { + return __ftrace_make_call_kernel(rec, addr); + } else if (!IS_ENABLED(CONFIG_MODULES)) { + /* We should not get here without modules */ + return -EINVAL; + } + + return __ftrace_make_call(rec, addr); +} + +#ifdef CONFIG_DYNAMIC_FTRACE_WITH_REGS +#ifdef CONFIG_MODULES +static int +__ftrace_modify_call(struct dyn_ftrace *rec, unsigned long old_addr, + unsigned long addr) +{ + ppc_inst_t op; + unsigned long ip = rec->ip; + unsigned long entry, ptr, tramp; + struct module *mod = ftrace_lookup_module(rec); + + if (!mod) + return -EINVAL; + + /* If we never set up ftrace trampolines, then bail */ + if (!mod->arch.tramp || !mod->arch.tramp_regs) { + pr_err("No ftrace trampoline\n"); + return -EINVAL; + } + + /* read where this goes */ + if (copy_inst_from_kernel_nofault(&op, (void *)ip)) { + pr_err("Fetching opcode failed.\n"); + return -EFAULT; + } + + /* Make sure that this is still a 24bit jump */ + if (!is_bl_op(op)) { + pr_err("Not expected bl: opcode is %08lx\n", ppc_inst_as_ulong(op)); + return -EINVAL; + } + + /* lets find where the pointer goes */ + tramp = find_bl_target(ip, op); + entry = ppc_global_function_entry((void *)old_addr); + + pr_devel("ip:%lx jumps to %lx", ip, tramp); + + if (tramp != entry) { + /* old_addr is not within range, so we must have used a trampoline */ + if (module_trampoline_target(mod, tramp, &ptr)) { + pr_err("Failed to get trampoline target\n"); + return -EFAULT; + } + + pr_devel("trampoline target %lx", ptr); + + /* This should match what was called */ + if (ptr != entry) { + pr_err("addr %lx does not match expected %lx\n", ptr, entry); + return -EINVAL; + } + } + + /* The new target may be within range */ + if (test_24bit_addr(ip, addr)) { + /* within range */ + if (patch_branch((u32 *)ip, addr, BRANCH_SET_LINK)) { + pr_err("REL24 out of range!\n"); + return -EINVAL; + } + + return 0; + } + + if (rec->flags & FTRACE_FL_REGS) + tramp = mod->arch.tramp_regs; + else + tramp = mod->arch.tramp; + + if (module_trampoline_target(mod, tramp, &ptr)) { + pr_err("Failed to get trampoline target\n"); + return -EFAULT; + } + + pr_devel("trampoline target %lx", ptr); + + entry = ppc_global_function_entry((void *)addr); + /* This should match what was called */ + if (ptr != entry) { + pr_err("addr %lx does not match expected %lx\n", ptr, entry); + return -EINVAL; + } + + if (patch_branch((u32 *)ip, tramp, BRANCH_SET_LINK)) { + pr_err("REL24 out of range!\n"); + return -EINVAL; + } + + return 0; +} +#else +static int __ftrace_modify_call(struct dyn_ftrace *rec, unsigned long old_addr, unsigned long addr) +{ + return 0; +} +#endif + +int ftrace_modify_call(struct dyn_ftrace *rec, unsigned long old_addr, + unsigned long addr) +{ + unsigned long ip = rec->ip; + ppc_inst_t old, new; + + /* + * If the calling address is more that 24 bits away, + * then we had to use a trampoline to make the call. + * Otherwise just update the call site. + */ + if (test_24bit_addr(ip, addr) && test_24bit_addr(ip, old_addr)) { + /* within range */ + old = ftrace_call_replace(ip, old_addr, 1); + new = ftrace_call_replace(ip, addr, 1); + return ftrace_modify_code(ip, old, new); + } else if (core_kernel_text(ip)) { + /* + * We always patch out of range locations to go to the regs + * variant, so there is nothing to do here + */ + return 0; + } else if (!IS_ENABLED(CONFIG_MODULES)) { + /* We should not get here without modules */ + return -EINVAL; + } + + return __ftrace_modify_call(rec, old_addr, addr); +} +#endif + +int ftrace_update_ftrace_func(ftrace_func_t func) +{ + unsigned long ip = (unsigned long)(&ftrace_call); + ppc_inst_t old, new; + int ret; + + old = ppc_inst_read((u32 *)&ftrace_call); + new = ftrace_call_replace(ip, (unsigned long)func, 1); + ret = ftrace_modify_code(ip, old, new); + + /* Also update the regs callback function */ + if (IS_ENABLED(CONFIG_DYNAMIC_FTRACE_WITH_REGS) && !ret) { + ip = (unsigned long)(&ftrace_regs_call); + old = ppc_inst_read((u32 *)&ftrace_regs_call); + new = ftrace_call_replace(ip, (unsigned long)func, 1); + ret = ftrace_modify_code(ip, old, new); + } + + return ret; +} + +/* + * Use the default ftrace_modify_all_code, but without + * stop_machine(). + */ +void arch_ftrace_update_code(int command) +{ + ftrace_modify_all_code(command); +} + +#ifdef CONFIG_PPC64 +#define PACATOC offsetof(struct paca_struct, kernel_toc) + +extern unsigned int ftrace_tramp_text[], ftrace_tramp_init[]; + +void ftrace_free_init_tramp(void) +{ + int i; + + for (i = 0; i < NUM_FTRACE_TRAMPS && ftrace_tramps[i]; i++) + if (ftrace_tramps[i] == (unsigned long)ftrace_tramp_init) { + ftrace_tramps[i] = 0; + return; + } +} + +int __init ftrace_dyn_arch_init(void) +{ + int i; + unsigned int *tramp[] = { ftrace_tramp_text, ftrace_tramp_init }; + u32 stub_insns[] = { + PPC_RAW_LD(_R12, _R13, PACATOC), + PPC_RAW_ADDIS(_R12, _R12, 0), + PPC_RAW_ADDI(_R12, _R12, 0), + PPC_RAW_MTCTR(_R12), + PPC_RAW_BCTR() + }; + unsigned long addr; + long reladdr; + + if (IS_ENABLED(CONFIG_DYNAMIC_FTRACE_WITH_REGS)) + addr = ppc_global_function_entry((void *)ftrace_regs_caller); + else + addr = ppc_global_function_entry((void *)ftrace_caller); + + reladdr = addr - kernel_toc_addr(); + + if (reladdr >= SZ_2G || reladdr < -(long)SZ_2G) { + pr_err("Address of %ps out of range of kernel_toc.\n", + (void *)addr); + return -1; + } + + for (i = 0; i < 2; i++) { + memcpy(tramp[i], stub_insns, sizeof(stub_insns)); + tramp[i][1] |= PPC_HA(reladdr); + tramp[i][2] |= PPC_LO(reladdr); + add_ftrace_tramp((unsigned long)tramp[i]); + } + + return 0; +} +#endif + +#ifdef CONFIG_FUNCTION_GRAPH_TRACER + +extern void ftrace_graph_call(void); +extern void ftrace_graph_stub(void); + +static int ftrace_modify_ftrace_graph_caller(bool enable) +{ + unsigned long ip = (unsigned long)(&ftrace_graph_call); + unsigned long addr = (unsigned long)(&ftrace_graph_caller); + unsigned long stub = (unsigned long)(&ftrace_graph_stub); + ppc_inst_t old, new; + + if (IS_ENABLED(CONFIG_DYNAMIC_FTRACE_WITH_ARGS)) + return 0; + + old = ftrace_call_replace(ip, enable ? stub : addr, 0); + new = ftrace_call_replace(ip, enable ? addr : stub, 0); + + return ftrace_modify_code(ip, old, new); +} + +int ftrace_enable_ftrace_graph_caller(void) +{ + return ftrace_modify_ftrace_graph_caller(true); +} + +int ftrace_disable_ftrace_graph_caller(void) +{ + return ftrace_modify_ftrace_graph_caller(false); +} + +/* + * Hook the return address and push it in the stack of return addrs + * in current thread info. Return the address we want to divert to. + */ +static unsigned long +__prepare_ftrace_return(unsigned long parent, unsigned long ip, unsigned long sp, + struct ftrace_regs *fregs) +{ + unsigned long return_hooker; + + if (unlikely(ftrace_graph_is_dead())) + goto out; + + if (unlikely(atomic_read(¤t->tracing_graph_pause))) + goto out; + + return_hooker = ppc_function_entry(return_to_handler); + + if (!function_graph_enter_regs(parent, ip, 0, (unsigned long *)sp, fregs)) + parent = return_hooker; + +out: + return parent; +} + +#ifdef CONFIG_DYNAMIC_FTRACE_WITH_ARGS +void ftrace_graph_func(unsigned long ip, unsigned long parent_ip, + struct ftrace_ops *op, struct ftrace_regs *fregs) +{ + arch_ftrace_regs(fregs)->regs.link = __prepare_ftrace_return(parent_ip, ip, + arch_ftrace_regs(fregs)->regs.gpr[1], fregs); +} +#else +unsigned long prepare_ftrace_return(unsigned long parent, unsigned long ip, + unsigned long sp) +{ + return __prepare_ftrace_return(parent, ip, sp, NULL); +} +#endif +#endif /* CONFIG_FUNCTION_GRAPH_TRACER */ + +#ifdef CONFIG_PPC64_ELF_ABI_V1 +char *arch_ftrace_match_adjust(char *str, const char *search) +{ + if (str[0] == '.' && search[0] != '.') + return str + 1; + else + return str; +} +#endif /* CONFIG_PPC64_ELF_ABI_V1 */ diff --git a/arch/powerpc/kernel/trace/ftrace_64_pg.S b/arch/powerpc/kernel/trace/ftrace_64_pg_entry.S index 6708e24db0ab..a8a7f28404c8 100644 --- a/arch/powerpc/kernel/trace/ftrace_64_pg.S +++ b/arch/powerpc/kernel/trace/ftrace_64_pg_entry.S @@ -3,12 +3,12 @@ * Split from ftrace_64.S */ +#include <linux/export.h> #include <linux/magic.h> #include <asm/ppc_asm.h> #include <asm/asm-offsets.h> #include <asm/ftrace.h> #include <asm/ppc-opcode.h> -#include <asm/export.h> _GLOBAL_TOC(ftrace_caller) lbz r3, PACA_FTRACE_ENABLED(r13) @@ -65,3 +65,68 @@ _GLOBAL(ftrace_graph_caller) addi r1, r1, 112 blr #endif /* CONFIG_FUNCTION_GRAPH_TRACER */ + +.pushsection ".tramp.ftrace.text","aw",@progbits; +.globl ftrace_tramp_text +ftrace_tramp_text: + .space 32 +.popsection + +.pushsection ".tramp.ftrace.init","aw",@progbits; +.globl ftrace_tramp_init +ftrace_tramp_init: + .space 32 +.popsection + +_GLOBAL(mcount) +_GLOBAL(_mcount) +EXPORT_SYMBOL(_mcount) + mflr r12 + mtctr r12 + mtlr r0 + bctr + +#ifdef CONFIG_FUNCTION_GRAPH_TRACER +_GLOBAL(return_to_handler) + /* need to save return values */ +#ifdef CONFIG_PPC64 + std r4, -32(r1) + std r3, -24(r1) + /* save TOC */ + std r2, -16(r1) + std r31, -8(r1) + mr r31, r1 + stdu r1, -112(r1) + + /* + * We might be called from a module. + * Switch to our TOC to run inside the core kernel. + */ + LOAD_PACA_TOC() +#else + stwu r1, -16(r1) + stw r3, 8(r1) + stw r4, 12(r1) +#endif + + bl ftrace_return_to_handler + nop + + /* return value has real return address */ + mtlr r3 + +#ifdef CONFIG_PPC64 + ld r1, 0(r1) + ld r4, -32(r1) + ld r3, -24(r1) + ld r2, -16(r1) + ld r31, -8(r1) +#else + lwz r3, 8(r1) + lwz r4, 12(r1) + addi r1, r1, 16 +#endif + + /* Jump back to real return address */ + blr +#endif /* CONFIG_FUNCTION_GRAPH_TRACER */ diff --git a/arch/powerpc/kernel/trace/ftrace_entry.S b/arch/powerpc/kernel/trace/ftrace_entry.S new file mode 100644 index 000000000000..3565c67fc638 --- /dev/null +++ b/arch/powerpc/kernel/trace/ftrace_entry.S @@ -0,0 +1,471 @@ +/* SPDX-License-Identifier: GPL-2.0-or-later */ +/* + * Split from ftrace_64.S + */ + +#include <linux/export.h> +#include <linux/magic.h> +#include <asm/ppc_asm.h> +#include <asm/asm-offsets.h> +#include <asm/ftrace.h> +#include <asm/ppc-opcode.h> +#include <asm/thread_info.h> +#include <asm/bug.h> +#include <asm/ptrace.h> + +/* + * + * ftrace_caller()/ftrace_regs_caller() is the function that replaces _mcount() + * when ftrace is active. + * + * We arrive here after a function A calls function B, and we are the trace + * function for B. When we enter r1 points to A's stack frame, B has not yet + * had a chance to allocate one yet. + * + * Additionally r2 may point either to the TOC for A, or B, depending on + * whether B did a TOC setup sequence before calling us. + * + * On entry the LR points back to the _mcount() call site, and r0 holds the + * saved LR as it was on entry to B, ie. the original return address at the + * call site in A. + * + * Our job is to save the register state into a struct pt_regs (on the stack) + * and then arrange for the ftrace function to be called. + */ +.macro ftrace_regs_entry allregs + /* Create a minimal stack frame for representing B */ + PPC_STLU r1, -STACK_FRAME_MIN_SIZE(r1) + + /* Create our stack frame + pt_regs */ + PPC_STLU r1,-SWITCH_FRAME_SIZE(r1) + + .if \allregs == 1 + SAVE_GPRS(11, 12, r1) + .endif + + /* Get the _mcount() call site out of LR */ + mflr r11 + +#ifdef CONFIG_DYNAMIC_FTRACE_WITH_DIRECT_CALLS + /* Load the ftrace_op */ + PPC_LL r12, -(MCOUNT_INSN_SIZE*2 + SZL)(r11) + + /* Load direct_call from the ftrace_op */ + PPC_LL r12, FTRACE_OPS_DIRECT_CALL(r12) + PPC_LCMPI r12, 0 + .if \allregs == 1 + bne .Lftrace_direct_call_regs + .else + bne .Lftrace_direct_call + .endif +#endif + + /* Save the previous LR in pt_regs->link */ + PPC_STL r0, _LINK(r1) + /* Also save it in A's stack frame */ + PPC_STL r0, SWITCH_FRAME_SIZE+STACK_FRAME_MIN_SIZE+LRSAVE(r1) + + /* Save all gprs to pt_regs */ + SAVE_GPR(0, r1) + SAVE_GPRS(3, 10, r1) + +#ifdef CONFIG_PPC64 + /* Ok to continue? */ + lbz r3, PACA_FTRACE_ENABLED(r13) + cmpdi r3, 0 + beq ftrace_no_trace +#endif + + .if \allregs == 1 + SAVE_GPR(2, r1) + SAVE_GPRS(13, 31, r1) + .else +#if defined(CONFIG_LIVEPATCH_64) || defined(CONFIG_PPC_FTRACE_OUT_OF_LINE) + SAVE_GPR(14, r1) +#endif + .endif + + /* Save previous stack pointer (r1) */ + addi r8, r1, SWITCH_FRAME_SIZE+STACK_FRAME_MIN_SIZE + PPC_STL r8, GPR1(r1) + + .if \allregs == 1 + /* Load special regs for save below */ + mfcr r7 + mfmsr r8 + mfctr r9 + mfxer r10 + .else + /* Clear MSR to flag as ftrace_caller versus frace_regs_caller */ + li r8, 0 + .endif + +#ifdef CONFIG_PPC64 + /* Save callee's TOC in the ABI compliant location */ + std r2, STK_GOT(r1) + LOAD_PACA_TOC() /* get kernel TOC in r2 */ +#endif + +#ifdef CONFIG_DYNAMIC_FTRACE_WITH_CALL_OPS + /* r11 points to the instruction following the call to ftrace */ + PPC_LL r5, -(MCOUNT_INSN_SIZE*2 + SZL)(r11) + PPC_LL r12, FTRACE_OPS_FUNC(r5) + mtctr r12 +#else /* !CONFIG_DYNAMIC_FTRACE_WITH_CALL_OPS */ +#ifdef CONFIG_PPC64 + LOAD_REG_ADDR(r3, function_trace_op) + ld r5,0(r3) +#else + lis r3,function_trace_op@ha + lwz r5,function_trace_op@l(r3) +#endif +#endif + + /* Save special regs */ + PPC_STL r8, _MSR(r1) + .if \allregs == 1 + PPC_STL r7, _CCR(r1) + PPC_STL r9, _CTR(r1) + PPC_STL r10, _XER(r1) + .endif + +#ifdef CONFIG_DYNAMIC_FTRACE_WITH_DIRECT_CALLS + /* Clear orig_gpr3 to later detect ftrace_direct call */ + li r7, 0 + PPC_STL r7, ORIG_GPR3(r1) +#endif + +#ifdef CONFIG_PPC_FTRACE_OUT_OF_LINE + /* Save our real return address in nvr for return */ + .if \allregs == 0 + SAVE_GPR(15, r1) + .endif + mr r15, r11 + /* + * We want the ftrace location in the function, but our lr (in r11) + * points at the 'mtlr r0' instruction in the out of line stub. To + * recover the ftrace location, we read the branch instruction in the + * stub, and adjust our lr by the branch offset. + * + * See ftrace_init_ool_stub() for the profile sequence. + */ + lwz r8, MCOUNT_INSN_SIZE(r11) + slwi r8, r8, 6 + srawi r8, r8, 6 + add r3, r11, r8 + /* + * Override our nip to point past the branch in the original function. + * This allows reliable stack trace and the ftrace stack tracer to work as-is. + */ + addi r11, r3, MCOUNT_INSN_SIZE +#else + /* Calculate ip from nip-4 into r3 for call below */ + subi r3, r11, MCOUNT_INSN_SIZE +#endif + + /* Save NIP as pt_regs->nip */ + PPC_STL r11, _NIP(r1) + /* Also save it in B's stackframe header for proper unwind */ + PPC_STL r11, LRSAVE+SWITCH_FRAME_SIZE(r1) +#if defined(CONFIG_LIVEPATCH_64) || defined(CONFIG_PPC_FTRACE_OUT_OF_LINE) + mr r14, r11 /* remember old NIP */ +#endif + + /* Put the original return address in r4 as parent_ip */ + mr r4, r0 + + /* Load &pt_regs in r6 for call below */ + addi r6, r1, STACK_INT_FRAME_REGS +.endm + +.macro ftrace_regs_exit allregs +#ifdef CONFIG_DYNAMIC_FTRACE_WITH_DIRECT_CALLS + /* Check orig_gpr3 to detect ftrace_direct call */ + PPC_LL r3, ORIG_GPR3(r1) + PPC_LCMPI cr1, r3, 0 + mtctr r3 +#endif + + /* Restore possibly modified LR */ + PPC_LL r0, _LINK(r1) + +#ifndef CONFIG_PPC_FTRACE_OUT_OF_LINE + /* Load ctr with the possibly modified NIP */ + PPC_LL r3, _NIP(r1) +#ifdef CONFIG_LIVEPATCH_64 + cmpd r14, r3 /* has NIP been altered? */ +#endif + +#ifdef CONFIG_DYNAMIC_FTRACE_WITH_DIRECT_CALLS + beq cr1,2f + mtlr r3 + b 3f +#endif +2: mtctr r3 + mtlr r0 +3: + +#else /* !CONFIG_PPC_FTRACE_OUT_OF_LINE */ + /* Load LR with the possibly modified NIP */ + PPC_LL r3, _NIP(r1) + cmpd r14, r3 /* has NIP been altered? */ + bne- 1f + + mr r3, r15 +1: mtlr r3 + .if \allregs == 0 + REST_GPR(15, r1) + .endif +#endif + + /* Restore gprs */ + .if \allregs == 1 + REST_GPRS(2, 31, r1) + .else + REST_GPRS(3, 10, r1) +#if defined(CONFIG_LIVEPATCH_64) || defined(CONFIG_PPC_FTRACE_OUT_OF_LINE) + REST_GPR(14, r1) +#endif + .endif + +#ifdef CONFIG_PPC64 + /* Restore callee's TOC */ + ld r2, STK_GOT(r1) +#endif + + /* Pop our stack frame */ + addi r1, r1, SWITCH_FRAME_SIZE+STACK_FRAME_MIN_SIZE + +#ifdef CONFIG_LIVEPATCH_64 + /* Based on the cmpd above, if the NIP was altered handle livepatch */ + bne- livepatch_handler +#endif + + /* jump after _mcount site */ +#ifdef CONFIG_PPC_FTRACE_OUT_OF_LINE +#ifdef CONFIG_DYNAMIC_FTRACE_WITH_DIRECT_CALLS + bnectr cr1 +#endif + /* + * Return with blr to keep the link stack balanced. The function profiling sequence + * uses 'mtlr r0' to restore LR. + */ + blr +#else + bctr +#endif +.endm + +.macro ftrace_regs_func allregs +#ifdef CONFIG_DYNAMIC_FTRACE_WITH_CALL_OPS + bctrl +#else + .if \allregs == 1 +.globl ftrace_regs_call +ftrace_regs_call: + .else +.globl ftrace_call +ftrace_call: + .endif + /* ftrace_call(r3, r4, r5, r6) */ + bl ftrace_stub +#endif +.endm + +_GLOBAL(ftrace_regs_caller) + ftrace_regs_entry 1 + ftrace_regs_func 1 + ftrace_regs_exit 1 + +_GLOBAL(ftrace_caller) + ftrace_regs_entry 0 + ftrace_regs_func 0 + ftrace_regs_exit 0 + +_GLOBAL(ftrace_stub) + blr + +#ifdef CONFIG_PPC64 +ftrace_no_trace: +#ifdef CONFIG_PPC_FTRACE_OUT_OF_LINE + REST_GPR(3, r1) + addi r1, r1, SWITCH_FRAME_SIZE+STACK_FRAME_MIN_SIZE + blr +#else + mflr r3 + mtctr r3 + REST_GPR(3, r1) + addi r1, r1, SWITCH_FRAME_SIZE+STACK_FRAME_MIN_SIZE + mtlr r0 + bctr +#endif +#endif + +#ifdef CONFIG_DYNAMIC_FTRACE_WITH_DIRECT_CALLS +.Lftrace_direct_call_regs: + mtctr r12 + REST_GPRS(11, 12, r1) + addi r1, r1, SWITCH_FRAME_SIZE+STACK_FRAME_MIN_SIZE + bctr +.Lftrace_direct_call: + mtctr r12 + addi r1, r1, SWITCH_FRAME_SIZE+STACK_FRAME_MIN_SIZE + bctr +SYM_FUNC_START(ftrace_stub_direct_tramp) + blr +SYM_FUNC_END(ftrace_stub_direct_tramp) +#endif + +#ifdef CONFIG_LIVEPATCH_64 + /* + * This function runs in the mcount context, between two functions. As + * such it can only clobber registers which are volatile and used in + * function linkage. + * + * We get here when a function A, calls another function B, but B has + * been live patched with a new function C. + * + * On entry, we have no stack frame and can not allocate one. + * + * With PPC_FTRACE_OUT_OF_LINE=n, on entry: + * - LR points back to the original caller (in A) + * - CTR holds the new NIP in C + * - r0, r11 & r12 are free + * + * With PPC_FTRACE_OUT_OF_LINE=y, on entry: + * - r0 points back to the original caller (in A) + * - LR holds the new NIP in C + * - r11 & r12 are free + */ +livepatch_handler: + ld r12, PACA_THREAD_INFO(r13) + + /* Allocate 3 x 8 bytes */ + ld r11, TI_livepatch_sp(r12) + addi r11, r11, 24 + std r11, TI_livepatch_sp(r12) + + /* Store stack end marker */ + lis r12, STACK_END_MAGIC@h + ori r12, r12, STACK_END_MAGIC@l + std r12, -8(r11) + + /* Save toc & real LR on livepatch stack */ + std r2, -24(r11) +#ifndef CONFIG_PPC_FTRACE_OUT_OF_LINE + mflr r12 + std r12, -16(r11) + mfctr r12 +#else + std r0, -16(r11) + mflr r12 + /* Put ctr in r12 for global entry and branch there */ + mtctr r12 +#endif + bctrl + + /* + * Now we are returning from the patched function to the original + * caller A. We are free to use r11, r12 and we can use r2 until we + * restore it. + */ + + ld r12, PACA_THREAD_INFO(r13) + + ld r11, TI_livepatch_sp(r12) + + /* Check stack marker hasn't been trashed */ + lis r2, STACK_END_MAGIC@h + ori r2, r2, STACK_END_MAGIC@l + ld r12, -8(r11) +1: tdne r12, r2 + EMIT_BUG_ENTRY 1b, __FILE__, __LINE__ - 1, 0 + + /* Restore LR & toc from livepatch stack */ + ld r12, -16(r11) + mtlr r12 + ld r2, -24(r11) + + /* Pop livepatch stack frame */ + ld r12, PACA_THREAD_INFO(r13) + subi r11, r11, 24 + std r11, TI_livepatch_sp(r12) + + /* Return to original caller of live patched function */ + blr +#endif /* CONFIG_LIVEPATCH */ + +#ifndef CONFIG_ARCH_USING_PATCHABLE_FUNCTION_ENTRY +_GLOBAL(mcount) +_GLOBAL(_mcount) +EXPORT_SYMBOL(_mcount) + mflr r12 + mtctr r12 + mtlr r0 + bctr +#endif + +#ifdef CONFIG_FUNCTION_GRAPH_TRACER +_GLOBAL(return_to_handler) + /* need to save return values */ +#ifdef CONFIG_PPC64 + std r4, -32(r1) + std r3, -24(r1) + /* save TOC */ + std r2, -16(r1) + std r31, -8(r1) + mr r31, r1 + stdu r1, -112(r1) + + /* + * We might be called from a module. + * Switch to our TOC to run inside the core kernel. + */ + LOAD_PACA_TOC() +#else + stwu r1, -16(r1) + stw r3, 8(r1) + stw r4, 12(r1) +#endif + + bl ftrace_return_to_handler + nop + + /* return value has real return address */ + mtlr r3 + +#ifdef CONFIG_PPC64 + ld r1, 0(r1) + ld r4, -32(r1) + ld r3, -24(r1) + ld r2, -16(r1) + ld r31, -8(r1) +#else + lwz r3, 8(r1) + lwz r4, 12(r1) + addi r1, r1, 16 +#endif + + /* Jump back to real return address */ + blr +#endif /* CONFIG_FUNCTION_GRAPH_TRACER */ + +#ifdef CONFIG_PPC_FTRACE_OUT_OF_LINE +SYM_DATA(ftrace_ool_stub_text_count, .long CONFIG_PPC_FTRACE_OUT_OF_LINE_NUM_RESERVE) + +SYM_START(ftrace_ool_stub_text, SYM_L_GLOBAL, .balign SZL) + .space CONFIG_PPC_FTRACE_OUT_OF_LINE_NUM_RESERVE * FTRACE_OOL_STUB_SIZE +SYM_CODE_END(ftrace_ool_stub_text) +#endif + +.pushsection ".tramp.ftrace.text","aw",@progbits; +.globl ftrace_tramp_text +ftrace_tramp_text: + .space 32 +.popsection + +.pushsection ".tramp.ftrace.init","aw",@progbits; +.globl ftrace_tramp_init +ftrace_tramp_init: + .space 32 +.popsection diff --git a/arch/powerpc/kernel/trace/ftrace_low.S b/arch/powerpc/kernel/trace/ftrace_low.S deleted file mode 100644 index 0bddf1fa6636..000000000000 --- a/arch/powerpc/kernel/trace/ftrace_low.S +++ /dev/null @@ -1,78 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0-or-later */ -/* - * Split from entry_64.S - */ - -#include <linux/magic.h> -#include <asm/ppc_asm.h> -#include <asm/asm-offsets.h> -#include <asm/ftrace.h> -#include <asm/ppc-opcode.h> -#include <asm/export.h> - -#ifdef CONFIG_PPC64 -.pushsection ".tramp.ftrace.text","aw",@progbits; -.globl ftrace_tramp_text -ftrace_tramp_text: - .space 64 -.popsection - -.pushsection ".tramp.ftrace.init","aw",@progbits; -.globl ftrace_tramp_init -ftrace_tramp_init: - .space 64 -.popsection -#endif - -_GLOBAL(mcount) -_GLOBAL(_mcount) -EXPORT_SYMBOL(_mcount) - mflr r12 - mtctr r12 - mtlr r0 - bctr - -#ifdef CONFIG_FUNCTION_GRAPH_TRACER -_GLOBAL(return_to_handler) - /* need to save return values */ -#ifdef CONFIG_PPC64 - std r4, -32(r1) - std r3, -24(r1) - /* save TOC */ - std r2, -16(r1) - std r31, -8(r1) - mr r31, r1 - stdu r1, -112(r1) - - /* - * We might be called from a module. - * Switch to our TOC to run inside the core kernel. - */ - ld r2, PACATOC(r13) -#else - stwu r1, -16(r1) - stw r3, 8(r1) - stw r4, 12(r1) -#endif - - bl ftrace_return_to_handler - nop - - /* return value has real return address */ - mtlr r3 - -#ifdef CONFIG_PPC64 - ld r1, 0(r1) - ld r4, -32(r1) - ld r3, -24(r1) - ld r2, -16(r1) - ld r31, -8(r1) -#else - lwz r3, 8(r1) - lwz r4, 12(r1) - addi r1, r1, 16 -#endif - - /* Jump back to real return address */ - blr -#endif /* CONFIG_FUNCTION_GRAPH_TRACER */ diff --git a/arch/powerpc/kernel/trace/ftrace_mprofile.S b/arch/powerpc/kernel/trace/ftrace_mprofile.S deleted file mode 100644 index 4fa23e260cab..000000000000 --- a/arch/powerpc/kernel/trace/ftrace_mprofile.S +++ /dev/null @@ -1,253 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0-or-later */ -/* - * Split from ftrace_64.S - */ - -#include <linux/magic.h> -#include <asm/ppc_asm.h> -#include <asm/asm-offsets.h> -#include <asm/ftrace.h> -#include <asm/ppc-opcode.h> -#include <asm/export.h> -#include <asm/thread_info.h> -#include <asm/bug.h> -#include <asm/ptrace.h> - -/* - * - * ftrace_caller()/ftrace_regs_caller() is the function that replaces _mcount() - * when ftrace is active. - * - * We arrive here after a function A calls function B, and we are the trace - * function for B. When we enter r1 points to A's stack frame, B has not yet - * had a chance to allocate one yet. - * - * Additionally r2 may point either to the TOC for A, or B, depending on - * whether B did a TOC setup sequence before calling us. - * - * On entry the LR points back to the _mcount() call site, and r0 holds the - * saved LR as it was on entry to B, ie. the original return address at the - * call site in A. - * - * Our job is to save the register state into a struct pt_regs (on the stack) - * and then arrange for the ftrace function to be called. - */ -.macro ftrace_regs_entry allregs - /* Create our stack frame + pt_regs */ - PPC_STLU r1,-SWITCH_FRAME_SIZE(r1) - - /* Save all gprs to pt_regs */ - SAVE_GPR(0, r1) - SAVE_GPRS(3, 10, r1) - -#ifdef CONFIG_PPC64 - /* Save the original return address in A's stack frame */ - std r0, LRSAVE+SWITCH_FRAME_SIZE(r1) - /* Ok to continue? */ - lbz r3, PACA_FTRACE_ENABLED(r13) - cmpdi r3, 0 - beq ftrace_no_trace -#endif - - .if \allregs == 1 - SAVE_GPR(2, r1) - SAVE_GPRS(11, 31, r1) - .else -#ifdef CONFIG_LIVEPATCH_64 - SAVE_GPR(14, r1) -#endif - .endif - - /* Save previous stack pointer (r1) */ - addi r8, r1, SWITCH_FRAME_SIZE - PPC_STL r8, GPR1(r1) - - .if \allregs == 1 - /* Load special regs for save below */ - mfmsr r8 - mfctr r9 - mfxer r10 - mfcr r11 - .else - /* Clear MSR to flag as ftrace_caller versus frace_regs_caller */ - li r8, 0 - .endif - - /* Get the _mcount() call site out of LR */ - mflr r7 - /* Save it as pt_regs->nip */ - PPC_STL r7, _NIP(r1) - /* Save the read LR in pt_regs->link */ - PPC_STL r0, _LINK(r1) - -#ifdef CONFIG_PPC64 - /* Save callee's TOC in the ABI compliant location */ - std r2, STK_GOT(r1) - ld r2,PACATOC(r13) /* get kernel TOC in r2 */ - - addis r3,r2,function_trace_op@toc@ha - addi r3,r3,function_trace_op@toc@l - ld r5,0(r3) -#else - lis r3,function_trace_op@ha - lwz r5,function_trace_op@l(r3) -#endif - -#ifdef CONFIG_LIVEPATCH_64 - mr r14, r7 /* remember old NIP */ -#endif - - /* Calculate ip from nip-4 into r3 for call below */ - subi r3, r7, MCOUNT_INSN_SIZE - - /* Put the original return address in r4 as parent_ip */ - mr r4, r0 - - /* Save special regs */ - PPC_STL r8, _MSR(r1) - .if \allregs == 1 - PPC_STL r9, _CTR(r1) - PPC_STL r10, _XER(r1) - PPC_STL r11, _CCR(r1) - .endif - - /* Load &pt_regs in r6 for call below */ - addi r6, r1, STACK_FRAME_OVERHEAD -.endm - -.macro ftrace_regs_exit allregs - /* Load ctr with the possibly modified NIP */ - PPC_LL r3, _NIP(r1) - mtctr r3 - -#ifdef CONFIG_LIVEPATCH_64 - cmpd r14, r3 /* has NIP been altered? */ -#endif - - /* Restore gprs */ - .if \allregs == 1 - REST_GPRS(2, 31, r1) - .else - REST_GPRS(3, 10, r1) -#ifdef CONFIG_LIVEPATCH_64 - REST_GPR(14, r1) -#endif - .endif - - /* Restore possibly modified LR */ - PPC_LL r0, _LINK(r1) - mtlr r0 - -#ifdef CONFIG_PPC64 - /* Restore callee's TOC */ - ld r2, STK_GOT(r1) -#endif - - /* Pop our stack frame */ - addi r1, r1, SWITCH_FRAME_SIZE - -#ifdef CONFIG_LIVEPATCH_64 - /* Based on the cmpd above, if the NIP was altered handle livepatch */ - bne- livepatch_handler -#endif - bctr /* jump after _mcount site */ -.endm - -_GLOBAL(ftrace_regs_caller) - ftrace_regs_entry 1 - /* ftrace_call(r3, r4, r5, r6) */ -.globl ftrace_regs_call -ftrace_regs_call: - bl ftrace_stub - nop - ftrace_regs_exit 1 - -_GLOBAL(ftrace_caller) - ftrace_regs_entry 0 - /* ftrace_call(r3, r4, r5, r6) */ -.globl ftrace_call -ftrace_call: - bl ftrace_stub - nop - ftrace_regs_exit 0 - -_GLOBAL(ftrace_stub) - blr - -#ifdef CONFIG_PPC64 -ftrace_no_trace: - mflr r3 - mtctr r3 - REST_GPR(3, r1) - addi r1, r1, SWITCH_FRAME_SIZE - mtlr r0 - bctr -#endif - -#ifdef CONFIG_LIVEPATCH_64 - /* - * This function runs in the mcount context, between two functions. As - * such it can only clobber registers which are volatile and used in - * function linkage. - * - * We get here when a function A, calls another function B, but B has - * been live patched with a new function C. - * - * On entry: - * - we have no stack frame and can not allocate one - * - LR points back to the original caller (in A) - * - CTR holds the new NIP in C - * - r0, r11 & r12 are free - */ -livepatch_handler: - ld r12, PACA_THREAD_INFO(r13) - - /* Allocate 3 x 8 bytes */ - ld r11, TI_livepatch_sp(r12) - addi r11, r11, 24 - std r11, TI_livepatch_sp(r12) - - /* Save toc & real LR on livepatch stack */ - std r2, -24(r11) - mflr r12 - std r12, -16(r11) - - /* Store stack end marker */ - lis r12, STACK_END_MAGIC@h - ori r12, r12, STACK_END_MAGIC@l - std r12, -8(r11) - - /* Put ctr in r12 for global entry and branch there */ - mfctr r12 - bctrl - - /* - * Now we are returning from the patched function to the original - * caller A. We are free to use r11, r12 and we can use r2 until we - * restore it. - */ - - ld r12, PACA_THREAD_INFO(r13) - - ld r11, TI_livepatch_sp(r12) - - /* Check stack marker hasn't been trashed */ - lis r2, STACK_END_MAGIC@h - ori r2, r2, STACK_END_MAGIC@l - ld r12, -8(r11) -1: tdne r12, r2 - EMIT_BUG_ENTRY 1b, __FILE__, __LINE__ - 1, 0 - - /* Restore LR & toc from livepatch stack */ - ld r12, -16(r11) - mtlr r12 - ld r2, -24(r11) - - /* Pop livepatch stack frame */ - ld r12, PACA_THREAD_INFO(r13) - subi r11, r11, 24 - std r11, TI_livepatch_sp(r12) - - /* Return to original caller of live patched function */ - blr -#endif /* CONFIG_LIVEPATCH */ diff --git a/arch/powerpc/kernel/traps.c b/arch/powerpc/kernel/traps.c index 3aaa50e5c72f..cb8e9357383e 100644 --- a/arch/powerpc/kernel/traps.c +++ b/arch/powerpc/kernel/traps.c @@ -68,6 +68,7 @@ #include <asm/stacktrace.h> #include <asm/nmi.h> #include <asm/disassemble.h> +#include <asm/udbg.h> #if defined(CONFIG_DEBUGGER) || defined(CONFIG_KEXEC_CORE) int (*__debugger)(struct pt_regs *regs) __read_mostly; @@ -120,7 +121,7 @@ static void pmac_backlight_unblank(void) props = &pmac_backlight->props; props->brightness = props->max_brightness; - props->power = FB_BLANK_UNBLANK; + props->power = BACKLIGHT_POWER_ON; backlight_update_status(pmac_backlight); } mutex_unlock(&pmac_backlight_mutex); @@ -156,7 +157,7 @@ static int die_owner = -1; static unsigned int die_nest_count; static int die_counter; -extern void panic_flush_kmsg_start(void) +void panic_flush_kmsg_start(void) { /* * These are mostly taken from kernel/panic.c, but tries to do @@ -169,7 +170,7 @@ extern void panic_flush_kmsg_start(void) bust_spinlocks(1); } -extern void panic_flush_kmsg_end(void) +void panic_flush_kmsg_end(void) { kmsg_dump(KMSG_DUMP_PANIC); bust_spinlocks(0); @@ -262,10 +263,9 @@ static int __die(const char *str, struct pt_regs *regs, long err) { printk("Oops: %s, sig: %ld [#%d]\n", str, err, ++die_counter); - printk("%s PAGE_SIZE=%luK%s%s%s%s%s%s %s\n", + printk("%s PAGE_SIZE=%luK%s %s%s%s%s %s\n", IS_ENABLED(CONFIG_CPU_LITTLE_ENDIAN) ? "LE" : "BE", PAGE_SIZE / 1024, get_mmu_str(), - IS_ENABLED(CONFIG_PREEMPT) ? " PREEMPT" : "", IS_ENABLED(CONFIG_SMP) ? " SMP" : "", IS_ENABLED(CONFIG_SMP) ? (" NR_CPUS=" __stringify(NR_CPUS)) : "", debug_pagealloc_enabled() ? " DEBUG_PAGEALLOC" : "", @@ -403,7 +403,7 @@ noinstr void hv_nmi_check_nonrecoverable(struct pt_regs *regs) return; if (!(regs->msr & MSR_HV)) return; - if (regs->msr & MSR_PR) + if (user_mode(regs)) return; /* @@ -600,7 +600,7 @@ static inline int check_io_access(struct pt_regs *regs) #define inst_length(reason) (((reason) & REASON_PREFIXED) ? 8 : 4) -#if defined(CONFIG_E500) +#if defined(CONFIG_PPC_E500) int machine_check_e500mc(struct pt_regs *regs) { unsigned long mcsr = mfspr(SPRN_MCSR); @@ -850,6 +850,19 @@ bail: } #ifdef CONFIG_PPC_BOOK3S_64 +DEFINE_INTERRUPT_HANDLER_RAW(machine_check_early_boot) +{ + udbg_printf("Machine check (early boot)\n"); + udbg_printf("SRR0=0x%016lx SRR1=0x%016lx\n", regs->nip, regs->msr); + udbg_printf(" DAR=0x%016lx DSISR=0x%08lx\n", regs->dar, regs->dsisr); + udbg_printf(" LR=0x%016lx R1=0x%08lx\n", regs->link, regs->gpr[1]); + udbg_printf("------\n"); + die("Machine check (early boot)", regs, SIGBUS); + for (;;) + ; + return 0; +} + DEFINE_INTERRUPT_HANDLER_ASYNC(machine_check_exception_async) { __machine_check_exception(regs); @@ -1144,12 +1157,13 @@ DEFINE_INTERRUPT_HANDLER(single_step_exception) * pretend we got a single-step exception. This was pointed out * by Kumar Gala. -- paulus */ -static void emulate_single_step(struct pt_regs *regs) +void emulate_single_step(struct pt_regs *regs) { if (single_stepping(regs)) __single_step_exception(regs); } +#ifdef CONFIG_PPC_FPU_REGS static inline int __parse_fpscr(unsigned long fpscr) { int ret = FPE_FLTUNK; @@ -1176,6 +1190,7 @@ static inline int __parse_fpscr(unsigned long fpscr) return ret; } +#endif static void parse_fpe(struct pt_regs *regs) { @@ -1423,10 +1438,12 @@ static int emulate_instruction(struct pt_regs *regs) return -EINVAL; } +#ifdef CONFIG_GENERIC_BUG int is_valid_bugaddr(unsigned long addr) { return is_kernel_addr(addr); } +#endif #ifdef CONFIG_MATH_EMULATION static int emulate_math(struct pt_regs *regs) @@ -1492,18 +1509,17 @@ static void do_program_check(struct pt_regs *regs) if (!is_kernel_addr(bugaddr) && !(regs->msr & MSR_IR)) bugaddr += PAGE_OFFSET; - if (!(regs->msr & MSR_PR) && /* not user-mode */ + if (!user_mode(regs) && report_bug(bugaddr, regs) == BUG_TRAP_TYPE_WARN) { - const struct exception_table_entry *entry; + regs_add_return_ip(regs, 4); + return; + } - entry = search_exception_tables(bugaddr); - if (entry) { - regs_set_return_ip(regs, extable_fixup(entry) + regs->nip - bugaddr); - return; - } + /* User mode considers other cases after enabling IRQs */ + if (!user_mode(regs)) { + _exception(SIGTRAP, regs, TRAP_BRKPT, regs->nip); + return; } - _exception(SIGTRAP, regs, TRAP_BRKPT, regs->nip); - return; } #ifdef CONFIG_PPC_TRANSACTIONAL_MEM if (reason & REASON_TM) { @@ -1536,16 +1552,44 @@ static void do_program_check(struct pt_regs *regs) /* * If we took the program check in the kernel skip down to sending a - * SIGILL. The subsequent cases all relate to emulating instructions - * which we should only do for userspace. We also do not want to enable - * interrupts for kernel faults because that might lead to further - * faults, and loose the context of the original exception. + * SIGILL. The subsequent cases all relate to user space, such as + * emulating instructions which we should only do for user space. We + * also do not want to enable interrupts for kernel faults because that + * might lead to further faults, and loose the context of the original + * exception. */ if (!user_mode(regs)) goto sigill; interrupt_cond_local_irq_enable(regs); + /* + * (reason & REASON_TRAP) is mostly handled before enabling IRQs, + * except get_user_instr() can sleep so we cannot reliably inspect the + * current instruction in that context. Now that we know we are + * handling a user space trap and can sleep, we can check if the trap + * was a hashchk failure. + */ + if (reason & REASON_TRAP) { + if (cpu_has_feature(CPU_FTR_DEXCR_NPHIE)) { + ppc_inst_t insn; + + if (get_user_instr(insn, (void __user *)regs->nip)) { + _exception(SIGSEGV, regs, SEGV_MAPERR, regs->nip); + return; + } + + if (ppc_inst_primary_opcode(insn) == 31 && + get_xop(ppc_inst_val(insn)) == OP_31_XOP_HASHCHK) { + _exception(SIGILL, regs, ILL_ILLOPN, regs->nip); + return; + } + } + + _exception(SIGTRAP, regs, TRAP_BRKPT, regs->nip); + return; + } + /* (reason & REASON_ILLEGAL) would be the obvious thing here, * but there seems to be a hardware bug on the 405GP (RevD) * that means ESR is sometimes set incorrectly - either to @@ -1676,7 +1720,7 @@ DEFINE_INTERRUPT_HANDLER(vsx_unavailable_exception) die("Unrecoverable VSX Unavailable Exception", regs, SIGABRT); } -#ifdef CONFIG_PPC64 +#ifdef CONFIG_PPC_BOOK3S_64 static void tm_unavailable(struct pt_regs *regs) { #ifdef CONFIG_PPC_TRANSACTIONAL_MEM @@ -2085,7 +2129,7 @@ DEFINE_INTERRUPT_HANDLER(altivec_assist_exception) } #endif /* CONFIG_ALTIVEC */ -#ifdef CONFIG_FSL_BOOKE +#ifdef CONFIG_PPC_85xx DEFINE_INTERRUPT_HANDLER(CacheLockingException) { unsigned long error_code = regs->dsisr; @@ -2098,12 +2142,11 @@ DEFINE_INTERRUPT_HANDLER(CacheLockingException) _exception(SIGILL, regs, ILL_PRVOPC, regs->nip); return; } -#endif /* CONFIG_FSL_BOOKE */ +#endif /* CONFIG_PPC_85xx */ #ifdef CONFIG_SPE DEFINE_INTERRUPT_HANDLER(SPEFloatingPointException) { - extern int do_spe_mathemu(struct pt_regs *regs); unsigned long spefscr; int fpexc_mode; int code = FPE_FLTUNK; @@ -2153,7 +2196,6 @@ DEFINE_INTERRUPT_HANDLER(SPEFloatingPointException) DEFINE_INTERRUPT_HANDLER(SPEFloatingPointRoundException) { - extern int speround_handler(struct pt_regs *regs); int err; interrupt_cond_local_irq_enable(regs); @@ -2201,22 +2243,11 @@ void __noreturn unrecoverable_exception(struct pt_regs *regs) ; } -#if defined(CONFIG_BOOKE_WDT) || defined(CONFIG_40x) -/* - * Default handler for a Watchdog exception, - * spins until a reboot occurs - */ -void __attribute__ ((weak)) WatchdogHandler(struct pt_regs *regs) -{ - /* Generic WatchdogHandler, implement your own */ - mtspr(SPRN_TCR, mfspr(SPRN_TCR)&(~TCR_WIE)); - return; -} - +#ifdef CONFIG_BOOKE_WDT DEFINE_INTERRUPT_HANDLER_NMI(WatchdogException) { printk (KERN_EMERG "PowerPC Book-E Watchdog Exception\n"); - WatchdogHandler(regs); + mtspr(SPRN_TCR, mfspr(SPRN_TCR) & ~TCR_WIE); return 0; } #endif diff --git a/arch/powerpc/kernel/ucall.S b/arch/powerpc/kernel/ucall.S index 07296bc39166..80a1f9a4300a 100644 --- a/arch/powerpc/kernel/ucall.S +++ b/arch/powerpc/kernel/ucall.S @@ -5,8 +5,8 @@ * Copyright 2019, IBM Corporation. * */ +#include <linux/export.h> #include <asm/ppc_asm.h> -#include <asm/export.h> _GLOBAL(ucall_norets) EXPORT_SYMBOL_GPL(ucall_norets) diff --git a/arch/powerpc/kernel/udbg.c b/arch/powerpc/kernel/udbg.c index b1544b2f6321..862b22b2b616 100644 --- a/arch/powerpc/kernel/udbg.c +++ b/arch/powerpc/kernel/udbg.c @@ -36,12 +36,6 @@ void __init udbg_early_init(void) #elif defined(CONFIG_PPC_EARLY_DEBUG_RTAS_PANEL) /* RTAS panel debug */ udbg_init_rtas_panel(); -#elif defined(CONFIG_PPC_EARLY_DEBUG_RTAS_CONSOLE) - /* RTAS console debug */ - udbg_init_rtas_console(); -#elif defined(CONFIG_PPC_EARLY_DEBUG_MAPLE) - /* Maple real mode debug */ - udbg_init_maple_realmode(); #elif defined(CONFIG_PPC_EARLY_DEBUG_PAS_REALMODE) udbg_init_pas_realmode(); #elif defined(CONFIG_PPC_EARLY_DEBUG_BOOTX) @@ -49,9 +43,6 @@ void __init udbg_early_init(void) #elif defined(CONFIG_PPC_EARLY_DEBUG_44x) /* PPC44x debug */ udbg_init_44x_as1(); -#elif defined(CONFIG_PPC_EARLY_DEBUG_40x) - /* PPC40x debug */ - udbg_init_40x_realmode(); #elif defined(CONFIG_PPC_EARLY_DEBUG_CPM) udbg_init_cpm(); #elif defined(CONFIG_PPC_EARLY_DEBUG_USBGECKO) @@ -67,6 +58,8 @@ void __init udbg_early_init(void) udbg_init_debug_opal_raw(); #elif defined(CONFIG_PPC_EARLY_DEBUG_OPAL_HVSI) udbg_init_debug_opal_hvsi(); +#elif defined(CONFIG_PPC_EARLY_DEBUG_16550) + udbg_init_debug_16550(); #endif #ifdef CONFIG_PPC_EARLY_DEBUG diff --git a/arch/powerpc/kernel/udbg_16550.c b/arch/powerpc/kernel/udbg_16550.c index d3942de254c6..dfe8ed2192e8 100644 --- a/arch/powerpc/kernel/udbg_16550.c +++ b/arch/powerpc/kernel/udbg_16550.c @@ -7,7 +7,7 @@ #include <linux/types.h> #include <asm/udbg.h> #include <asm/io.h> -#include <asm/reg_a2.h> +#include <asm/early_ioremap.h> extern u8 real_readb(volatile u8 __iomem *addr); extern void real_writeb(u8 data, volatile u8 __iomem *addr); @@ -205,29 +205,6 @@ void __init udbg_uart_init_mmio(void __iomem *addr, unsigned int stride) udbg_use_uart(); } -#ifdef CONFIG_PPC_MAPLE - -#define UDBG_UART_MAPLE_ADDR ((void __iomem *)0xf40003f8) - -static u8 udbg_uart_in_maple(unsigned int reg) -{ - return real_readb(UDBG_UART_MAPLE_ADDR + reg); -} - -static void udbg_uart_out_maple(unsigned int reg, u8 val) -{ - real_writeb(val, UDBG_UART_MAPLE_ADDR + reg); -} - -void __init udbg_init_maple_realmode(void) -{ - udbg_uart_in = udbg_uart_in_maple; - udbg_uart_out = udbg_uart_out_maple; - udbg_use_uart(); -} - -#endif /* CONFIG_PPC_MAPLE */ - #ifdef CONFIG_PPC_PASEMI #define UDBG_UART_PAS_ADDR ((void __iomem *)0xfcff03f8UL) @@ -274,64 +251,34 @@ void __init udbg_init_44x_as1(void) #endif /* CONFIG_PPC_EARLY_DEBUG_44x */ -#ifdef CONFIG_PPC_EARLY_DEBUG_40x +#ifdef CONFIG_PPC_EARLY_DEBUG_16550 -static u8 udbg_uart_in_40x(unsigned int reg) -{ - return real_readb((void __iomem *)CONFIG_PPC_EARLY_DEBUG_40x_PHYSADDR - + reg); -} +static void __iomem *udbg_uart_early_addr; -static void udbg_uart_out_40x(unsigned int reg, u8 val) +void __init udbg_init_debug_16550(void) { - real_writeb(val, (void __iomem *)CONFIG_PPC_EARLY_DEBUG_40x_PHYSADDR - + reg); + udbg_uart_early_addr = early_ioremap(CONFIG_PPC_EARLY_DEBUG_16550_PHYSADDR, 0x1000); + udbg_uart_init_mmio(udbg_uart_early_addr, CONFIG_PPC_EARLY_DEBUG_16550_STRIDE); } -void __init udbg_init_40x_realmode(void) +static int __init udbg_init_debug_16550_ioremap(void) { - udbg_uart_in = udbg_uart_in_40x; - udbg_uart_out = udbg_uart_out_40x; - udbg_use_uart(); -} - -#endif /* CONFIG_PPC_EARLY_DEBUG_40x */ + void __iomem *addr; -#ifdef CONFIG_PPC_EARLY_DEBUG_MICROWATT + if (!udbg_uart_early_addr) + return 0; -#define UDBG_UART_MW_ADDR ((void __iomem *)0xc0002000) + addr = ioremap(CONFIG_PPC_EARLY_DEBUG_16550_PHYSADDR, 0x1000); + if (WARN_ON(!addr)) + return -ENOMEM; -static u8 udbg_uart_in_isa300_rm(unsigned int reg) -{ - uint64_t msr = mfmsr(); - uint8_t c; - - mtmsr(msr & ~(MSR_EE|MSR_DR)); - isync(); - eieio(); - c = __raw_rm_readb(UDBG_UART_MW_ADDR + (reg << 2)); - mtmsr(msr); - isync(); - return c; -} + udbg_uart_init_mmio(addr, CONFIG_PPC_EARLY_DEBUG_16550_STRIDE); + early_iounmap(udbg_uart_early_addr, 0x1000); + udbg_uart_early_addr = NULL; -static void udbg_uart_out_isa300_rm(unsigned int reg, u8 val) -{ - uint64_t msr = mfmsr(); - - mtmsr(msr & ~(MSR_EE|MSR_DR)); - isync(); - eieio(); - __raw_rm_writeb(val, UDBG_UART_MW_ADDR + (reg << 2)); - mtmsr(msr); - isync(); + return 0; } -void __init udbg_init_debug_microwatt(void) -{ - udbg_uart_in = udbg_uart_in_isa300_rm; - udbg_uart_out = udbg_uart_out_isa300_rm; - udbg_use_uart(); -} +early_initcall(udbg_init_debug_16550_ioremap); -#endif /* CONFIG_PPC_EARLY_DEBUG_MICROWATT */ +#endif /* CONFIG_PPC_EARLY_DEBUG_16550 */ diff --git a/arch/powerpc/kernel/vdso.c b/arch/powerpc/kernel/vdso.c index 0da287544054..219d67bcf747 100644 --- a/arch/powerpc/kernel/vdso.c +++ b/arch/powerpc/kernel/vdso.c @@ -16,9 +16,8 @@ #include <linux/user.h> #include <linux/elf.h> #include <linux/security.h> -#include <linux/memblock.h> #include <linux/syscalls.h> -#include <linux/time_namespace.h> +#include <linux/vdso_datastore.h> #include <vdso/datapage.h> #include <asm/syscall.h> @@ -33,28 +32,15 @@ #include <asm/vdso_datapage.h> #include <asm/setup.h> +static_assert(__VDSO_PAGES == VDSO_NR_PAGES); + /* The alignment of the vDSO */ #define VDSO_ALIGNMENT (1 << 16) extern char vdso32_start, vdso32_end; extern char vdso64_start, vdso64_end; -/* - * The vdso data page (aka. systemcfg for old ppc64 fans) is here. - * Once the early boot kernel code no longer needs to muck around - * with it, it will become dynamically allocated - */ -static union { - struct vdso_arch_data data; - u8 page[PAGE_SIZE]; -} vdso_data_store __page_aligned_data; -struct vdso_arch_data *vdso_data = &vdso_data_store.data; - -enum vvar_pages { - VVAR_DATA_PAGE_OFFSET, - VVAR_TIMENS_PAGE_OFFSET, - VVAR_NR_PAGES, -}; +long sys_ni_syscall(void); static int vdso_mremap(const struct vm_special_mapping *sm, struct vm_area_struct *new_vma, unsigned long text_size) @@ -79,112 +65,33 @@ static int vdso64_mremap(const struct vm_special_mapping *sm, struct vm_area_str return vdso_mremap(sm, new_vma, &vdso64_end - &vdso64_start); } -static vm_fault_t vvar_fault(const struct vm_special_mapping *sm, - struct vm_area_struct *vma, struct vm_fault *vmf); +static void vdso_close(const struct vm_special_mapping *sm, struct vm_area_struct *vma) +{ + struct mm_struct *mm = vma->vm_mm; -static struct vm_special_mapping vvar_spec __ro_after_init = { - .name = "[vvar]", - .fault = vvar_fault, -}; + /* + * close() is called for munmap() but also for mremap(). In the mremap() + * case the vdso pointer has already been updated by the mremap() hook + * above, so it must not be set to NULL here. + */ + if (vma->vm_start != (unsigned long)mm->context.vdso) + return; + + mm->context.vdso = NULL; +} static struct vm_special_mapping vdso32_spec __ro_after_init = { .name = "[vdso]", .mremap = vdso32_mremap, + .close = vdso_close, }; static struct vm_special_mapping vdso64_spec __ro_after_init = { .name = "[vdso]", .mremap = vdso64_mremap, + .close = vdso_close, }; -#ifdef CONFIG_TIME_NS -struct vdso_data *arch_get_vdso_data(void *vvar_page) -{ - return ((struct vdso_arch_data *)vvar_page)->data; -} - -/* - * The vvar mapping contains data for a specific time namespace, so when a task - * changes namespace we must unmap its vvar data for the old namespace. - * Subsequent faults will map in data for the new namespace. - * - * For more details see timens_setup_vdso_data(). - */ -int vdso_join_timens(struct task_struct *task, struct time_namespace *ns) -{ - struct mm_struct *mm = task->mm; - struct vm_area_struct *vma; - - mmap_read_lock(mm); - - for (vma = mm->mmap; vma; vma = vma->vm_next) { - unsigned long size = vma->vm_end - vma->vm_start; - - if (vma_is_special_mapping(vma, &vvar_spec)) - zap_page_range(vma, vma->vm_start, size); - } - - mmap_read_unlock(mm); - return 0; -} - -static struct page *find_timens_vvar_page(struct vm_area_struct *vma) -{ - if (likely(vma->vm_mm == current->mm)) - return current->nsproxy->time_ns->vvar_page; - - /* - * VM_PFNMAP | VM_IO protect .fault() handler from being called - * through interfaces like /proc/$pid/mem or - * process_vm_{readv,writev}() as long as there's no .access() - * in special_mapping_vmops. - * For more details check_vma_flags() and __access_remote_vm() - */ - WARN(1, "vvar_page accessed remotely"); - - return NULL; -} -#else -static struct page *find_timens_vvar_page(struct vm_area_struct *vma) -{ - return NULL; -} -#endif - -static vm_fault_t vvar_fault(const struct vm_special_mapping *sm, - struct vm_area_struct *vma, struct vm_fault *vmf) -{ - struct page *timens_page = find_timens_vvar_page(vma); - unsigned long pfn; - - switch (vmf->pgoff) { - case VVAR_DATA_PAGE_OFFSET: - if (timens_page) - pfn = page_to_pfn(timens_page); - else - pfn = virt_to_pfn(vdso_data); - break; -#ifdef CONFIG_TIME_NS - case VVAR_TIMENS_PAGE_OFFSET: - /* - * If a task belongs to a time namespace then a namespace - * specific VVAR is mapped with the VVAR_DATA_PAGE_OFFSET and - * the real VVAR page is mapped with the VVAR_TIMENS_PAGE_OFFSET - * offset. - * See also the comment near timens_setup_vdso_data(). - */ - if (!timens_page) - return VM_FAULT_SIGBUS; - pfn = virt_to_pfn(vdso_data); - break; -#endif /* CONFIG_TIME_NS */ - default: - return VM_FAULT_SIGBUS; - } - - return vmf_insert_pfn(vma, vmf->address, pfn); -} - /* * This is called from binfmt_elf, we create the special vma for the * vDSO and insert it into the mm struct tree @@ -193,51 +100,33 @@ static int __arch_setup_additional_pages(struct linux_binprm *bprm, int uses_int { unsigned long vdso_size, vdso_base, mappings_size; struct vm_special_mapping *vdso_spec; - unsigned long vvar_size = VVAR_NR_PAGES * PAGE_SIZE; + unsigned long vvar_size = VDSO_NR_PAGES * PAGE_SIZE; struct mm_struct *mm = current->mm; struct vm_area_struct *vma; if (is_32bit_task()) { vdso_spec = &vdso32_spec; vdso_size = &vdso32_end - &vdso32_start; - vdso_base = VDSO32_MBASE; } else { vdso_spec = &vdso64_spec; vdso_size = &vdso64_end - &vdso64_start; - /* - * On 64bit we don't have a preferred map address. This - * allows get_unmapped_area to find an area near other mmaps - * and most likely share a SLB entry. - */ - vdso_base = 0; } mappings_size = vdso_size + vvar_size; mappings_size += (VDSO_ALIGNMENT - 1) & PAGE_MASK; /* - * pick a base address for the vDSO in process space. We try to put it - * at vdso_base which is the "natural" base for it, but we might fail - * and end up putting it elsewhere. + * Pick a base address for the vDSO in process space. * Add enough to the size so that the result can be aligned. */ - vdso_base = get_unmapped_area(NULL, vdso_base, mappings_size, 0, 0); + vdso_base = get_unmapped_area(NULL, 0, mappings_size, 0, 0); if (IS_ERR_VALUE(vdso_base)) return vdso_base; /* Add required alignment. */ vdso_base = ALIGN(vdso_base, VDSO_ALIGNMENT); - /* - * Put vDSO base into mm struct. We need to do this before calling - * install_special_mapping or the perf counter mmap tracking code - * will fail to recognise it as a vDSO. - */ - mm->context.vdso = (void __user *)vdso_base + vvar_size; - - vma = _install_special_mapping(mm, vdso_base, vvar_size, - VM_READ | VM_MAYREAD | VM_IO | - VM_DONTDUMP | VM_PFNMAP, &vvar_spec); + vma = vdso_install_vvar_mapping(mm, vdso_base); if (IS_ERR(vma)) return PTR_ERR(vma); @@ -254,10 +143,15 @@ static int __arch_setup_additional_pages(struct linux_binprm *bprm, int uses_int vma = _install_special_mapping(mm, vdso_base + vvar_size, vdso_size, VM_READ | VM_EXEC | VM_MAYREAD | VM_MAYWRITE | VM_MAYEXEC, vdso_spec); - if (IS_ERR(vma)) + if (IS_ERR(vma)) { do_munmap(mm, vdso_base, vvar_size, NULL); + return PTR_ERR(vma); + } - return PTR_ERR_OR_ZERO(vma); + // Now that the mappings are in place, set the mm VDSO pointer + mm->context.vdso = (void __user *)vdso_base + vvar_size; + + return 0; } int arch_setup_additional_pages(struct linux_binprm *bprm, int uses_interp) @@ -271,8 +165,6 @@ int arch_setup_additional_pages(struct linux_binprm *bprm, int uses_interp) return -EINTR; rc = __arch_setup_additional_pages(bprm, uses_interp); - if (rc) - mm->context.vdso = NULL; mmap_write_unlock(mm); return rc; @@ -313,11 +205,11 @@ static void __init vdso_setup_syscall_map(void) unsigned int i; for (i = 0; i < NR_syscalls; i++) { - if (sys_call_table[i] != (unsigned long)&sys_ni_syscall) - vdso_data->syscall_map[i >> 5] |= 0x80000000UL >> (i & 0x1f); + if (sys_call_table[i] != (void *)&sys_ni_syscall) + vdso_k_arch_data->syscall_map[i >> 5] |= 0x80000000UL >> (i & 0x1f); if (IS_ENABLED(CONFIG_COMPAT) && - compat_sys_call_table[i] != (unsigned long)&sys_ni_syscall) - vdso_data->compat_syscall_map[i >> 5] |= 0x80000000UL >> (i & 0x1f); + compat_sys_call_table[i] != (void *)&sys_ni_syscall) + vdso_k_arch_data->compat_syscall_map[i >> 5] |= 0x80000000UL >> (i & 0x1f); } } @@ -367,29 +259,10 @@ static struct page ** __init vdso_setup_pages(void *start, void *end) static int __init vdso_init(void) { #ifdef CONFIG_PPC64 - /* - * Fill up the "systemcfg" stuff for backward compatibility - */ - strcpy((char *)vdso_data->eye_catcher, "SYSTEMCFG:PPC64"); - vdso_data->version.major = SYSTEMCFG_MAJOR; - vdso_data->version.minor = SYSTEMCFG_MINOR; - vdso_data->processor = mfspr(SPRN_PVR); - /* - * Fake the old platform number for pSeries and add - * in LPAR bit if necessary - */ - vdso_data->platform = 0x100; - if (firmware_has_feature(FW_FEATURE_LPAR)) - vdso_data->platform |= 1; - vdso_data->physicalMemorySize = memblock_phys_mem_size(); - vdso_data->dcache_size = ppc64_caches.l1d.size; - vdso_data->dcache_line_size = ppc64_caches.l1d.line_size; - vdso_data->icache_size = ppc64_caches.l1i.size; - vdso_data->icache_line_size = ppc64_caches.l1i.line_size; - vdso_data->dcache_block_size = ppc64_caches.l1d.block_size; - vdso_data->icache_block_size = ppc64_caches.l1i.block_size; - vdso_data->dcache_log_block_size = ppc64_caches.l1d.log_block_size; - vdso_data->icache_log_block_size = ppc64_caches.l1i.log_block_size; + vdso_k_arch_data->dcache_block_size = ppc64_caches.l1d.block_size; + vdso_k_arch_data->icache_block_size = ppc64_caches.l1i.block_size; + vdso_k_arch_data->dcache_log_block_size = ppc64_caches.l1d.log_block_size; + vdso_k_arch_data->icache_log_block_size = ppc64_caches.l1i.log_block_size; #endif /* CONFIG_PPC64 */ vdso_setup_syscall_map(); diff --git a/arch/powerpc/kernel/vdso/Makefile b/arch/powerpc/kernel/vdso/Makefile index 096b0bf1335f..8834dfe9d727 100644 --- a/arch/powerpc/kernel/vdso/Makefile +++ b/arch/powerpc/kernel/vdso/Makefile @@ -2,31 +2,27 @@ # List of files in the vdso, has to be asm only for now -ARCH_REL_TYPE_ABS := R_PPC_JUMP_SLOT|R_PPC_GLOB_DAT|R_PPC_ADDR32|R_PPC_ADDR24|R_PPC_ADDR16|R_PPC_ADDR16_LO|R_PPC_ADDR16_HI|R_PPC_ADDR16_HA|R_PPC_ADDR14|R_PPC_ADDR14_BRTAKEN|R_PPC_ADDR14_BRNTAKEN|R_PPC_REL24 -include $(srctree)/lib/vdso/Makefile +# Include the generic Makefile to check the built vdso. +include $(srctree)/lib/vdso/Makefile.include obj-vdso32 = sigtramp32-32.o gettimeofday-32.o datapage-32.o cacheflush-32.o note-32.o getcpu-32.o obj-vdso64 = sigtramp64-64.o gettimeofday-64.o datapage-64.o cacheflush-64.o note-64.o getcpu-64.o +obj-vdso32 += getrandom-32.o vgetrandom-chacha-32.o +obj-vdso64 += getrandom-64.o vgetrandom-chacha-64.o + ifneq ($(c-gettimeofday-y),) CFLAGS_vgettimeofday-32.o += -include $(c-gettimeofday-y) - CFLAGS_vgettimeofday-32.o += $(DISABLE_LATENT_ENTROPY_PLUGIN) - CFLAGS_vgettimeofday-32.o += $(call cc-option, -fno-stack-protector) - CFLAGS_vgettimeofday-32.o += -DDISABLE_BRANCH_PROFILING - CFLAGS_vgettimeofday-32.o += -ffreestanding -fasynchronous-unwind-tables - CFLAGS_REMOVE_vgettimeofday-32.o = $(CC_FLAGS_FTRACE) - CFLAGS_REMOVE_vgettimeofday-32.o += -mcmodel=medium -mabi=elfv1 -mabi=elfv2 -mcall-aixdesc - CFLAGS_vgettimeofday-64.o += -include $(c-gettimeofday-y) - CFLAGS_vgettimeofday-64.o += $(DISABLE_LATENT_ENTROPY_PLUGIN) - CFLAGS_vgettimeofday-64.o += $(call cc-option, -fno-stack-protector) - CFLAGS_vgettimeofday-64.o += -DDISABLE_BRANCH_PROFILING - CFLAGS_vgettimeofday-64.o += -ffreestanding -fasynchronous-unwind-tables - CFLAGS_REMOVE_vgettimeofday-64.o = $(CC_FLAGS_FTRACE) # Go prior to 1.16.x assumes r30 is not clobbered by any VDSO code. That used to be true # by accident when the VDSO was hand-written asm code, but may not be now that the VDSO is # compiler generated. To avoid breaking Go tell GCC not to use r30. Impact on code # generation is minimal, it will just use r29 instead. - CFLAGS_vgettimeofday-64.o += $(call cc-option, -ffixed-r30) + CFLAGS_vgettimeofday-64.o += -include $(c-gettimeofday-y) $(call cc-option, -ffixed-r30) +endif + +ifneq ($(c-getrandom-y),) + CFLAGS_vgetrandom-32.o += -include $(c-getrandom-y) + CFLAGS_vgetrandom-64.o += -include $(c-getrandom-y) endif # Build rules @@ -37,51 +33,74 @@ else VDSOCC := $(CC) endif -targets := $(obj-vdso32) vdso32.so.dbg vgettimeofday-32.o +targets := $(obj-vdso32) vdso32.so.dbg vgettimeofday-32.o vgetrandom-32.o +targets += crtsavres-32.o obj-vdso32 := $(addprefix $(obj)/, $(obj-vdso32)) -targets += $(obj-vdso64) vdso64.so.dbg vgettimeofday-64.o +targets += $(obj-vdso64) vdso64.so.dbg vgettimeofday-64.o vgetrandom-64.o obj-vdso64 := $(addprefix $(obj)/, $(obj-vdso64)) -GCOV_PROFILE := n -KCOV_INSTRUMENT := n -UBSAN_SANITIZE := n -KASAN_SANITIZE := n - -ccflags-y := -shared -fno-common -fno-builtin -nostdlib -Wl,--hash-style=both -ccflags-$(CONFIG_LD_IS_LLD) += $(call cc-option,--ld-path=$(LD),-fuse-ld=lld) - -CC32FLAGS := -Wl,-soname=linux-vdso32.so.1 -m32 -AS32FLAGS := -D__VDSO32__ -s +ccflags-y := -fno-common -fno-builtin -DBUILD_VDSO +ccflags-y += $(DISABLE_LATENT_ENTROPY_PLUGIN) +ccflags-y += $(call cc-option, -fno-stack-protector) +ccflags-y += -DDISABLE_BRANCH_PROFILING +ccflags-y += -ffreestanding -fasynchronous-unwind-tables +ccflags-remove-y := $(CC_FLAGS_FTRACE) +ldflags-y := -Wl,--hash-style=both -nostdlib -shared -z noexecstack $(CLANG_FLAGS) +ldflags-$(CONFIG_LD_IS_LLD) += $(call cc-option,--ld-path=$(LD),-fuse-ld=lld) +ldflags-$(CONFIG_LD_ORPHAN_WARN) += -Wl,--orphan-handling=$(CONFIG_LD_ORPHAN_WARN_LEVEL) + +# Filter flags that clang will warn are unused for linking +ldflags-y += $(filter-out $(CC_AUTO_VAR_INIT_ZERO_ENABLER) $(CC_FLAGS_FTRACE) -Wa$(comma)%, $(KBUILD_CPPFLAGS) $(KBUILD_CFLAGS)) + +CC32FLAGS := -m32 +CC32FLAGSREMOVE := -mcmodel=medium -mabi=elfv1 -mabi=elfv2 -mcall-aixdesc -mpcrel +ifdef CONFIG_CC_IS_CLANG +# This flag is supported by clang for 64-bit but not 32-bit so it will cause +# an unused command line flag warning for this file. +CC32FLAGSREMOVE += -fno-stack-clash-protection +# -mstack-protector-guard values from the 64-bit build are not valid for the +# 32-bit one. clang validates the values passed to these arguments during +# parsing, even when -fno-stack-protector is passed afterwards. +CC32FLAGSREMOVE += -mstack-protector-guard% +endif +LD32FLAGS := -Wl,-soname=linux-vdso32.so.1 +AS32FLAGS := -D__VDSO32__ -CC64FLAGS := -Wl,-soname=linux-vdso64.so.1 -AS64FLAGS := -D__VDSO64__ -s +LD64FLAGS := -Wl,-soname=linux-vdso64.so.1 +AS64FLAGS := -D__VDSO64__ targets += vdso32.lds CPPFLAGS_vdso32.lds += -P -C -Upowerpc targets += vdso64.lds -CPPFLAGS_vdso64.lds += -P -C -U$(ARCH) +CPPFLAGS_vdso64.lds += -P -C # link rule for the .so file, .lds has to be first -$(obj)/vdso32.so.dbg: $(src)/vdso32.lds $(obj-vdso32) $(obj)/vgettimeofday-32.o FORCE +$(obj)/vdso32.so.dbg: $(obj)/vdso32.lds $(obj-vdso32) $(obj)/vgettimeofday-32.o $(obj)/vgetrandom-32.o $(obj)/crtsavres-32.o FORCE $(call if_changed,vdso32ld_and_check) -$(obj)/vdso64.so.dbg: $(src)/vdso64.lds $(obj-vdso64) $(obj)/vgettimeofday-64.o FORCE +$(obj)/vdso64.so.dbg: $(obj)/vdso64.lds $(obj-vdso64) $(obj)/vgettimeofday-64.o $(obj)/vgetrandom-64.o FORCE $(call if_changed,vdso64ld_and_check) # assembly rules for the .S files $(obj-vdso32): %-32.o: %.S FORCE $(call if_changed_dep,vdso32as) +$(obj)/crtsavres-32.o: %-32.o: $(srctree)/arch/powerpc/lib/crtsavres.S FORCE + $(call if_changed_dep,vdso32as) $(obj)/vgettimeofday-32.o: %-32.o: %.c FORCE $(call if_changed_dep,vdso32cc) +$(obj)/vgetrandom-32.o: %-32.o: %.c FORCE + $(call if_changed_dep,vdso32cc) $(obj-vdso64): %-64.o: %.S FORCE $(call if_changed_dep,vdso64as) $(obj)/vgettimeofday-64.o: %-64.o: %.c FORCE $(call if_changed_dep,cc_o_c) +$(obj)/vgetrandom-64.o: %-64.o: %.c FORCE + $(call if_changed_dep,cc_o_c) # Generate VDSO offsets using helper script -gen-vdso32sym := $(srctree)/$(src)/gen_vdso32_offsets.sh +gen-vdso32sym := $(src)/gen_vdso32_offsets.sh quiet_cmd_vdso32sym = VDSO32SYM $@ cmd_vdso32sym = $(NM) $< | $(gen-vdso32sym) | LC_ALL=C sort > $@ -gen-vdso64sym := $(srctree)/$(src)/gen_vdso64_offsets.sh +gen-vdso64sym := $(src)/gen_vdso64_offsets.sh quiet_cmd_vdso64sym = VDSO64SYM $@ cmd_vdso64sym = $(NM) $< | $(gen-vdso64sym) | LC_ALL=C sort > $@ @@ -92,13 +111,13 @@ include/generated/vdso64-offsets.h: $(obj)/vdso64.so.dbg FORCE # actual build commands quiet_cmd_vdso32ld_and_check = VDSO32L $@ - cmd_vdso32ld_and_check = $(VDSOCC) $(c_flags) $(CC32FLAGS) -o $@ -Wl,-T$(filter %.lds,$^) $(filter %.o,$^) ; $(cmd_vdso_check) + cmd_vdso32ld_and_check = $(VDSOCC) $(ldflags-y) $(CC32FLAGS) $(LD32FLAGS) -o $@ -Wl,-T$(filter %.lds,$^) $(filter %.o,$^); $(cmd_vdso_check) quiet_cmd_vdso32as = VDSO32A $@ cmd_vdso32as = $(VDSOCC) $(a_flags) $(CC32FLAGS) $(AS32FLAGS) -c -o $@ $< quiet_cmd_vdso32cc = VDSO32C $@ - cmd_vdso32cc = $(VDSOCC) $(c_flags) $(CC32FLAGS) -c -o $@ $< + cmd_vdso32cc = $(VDSOCC) $(filter-out $(CC32FLAGSREMOVE), $(c_flags)) $(CC32FLAGS) -c -o $@ $< quiet_cmd_vdso64ld_and_check = VDSO64L $@ - cmd_vdso64ld_and_check = $(VDSOCC) $(c_flags) $(CC64FLAGS) -o $@ -Wl,-T$(filter %.lds,$^) $(filter %.o,$^) ; $(cmd_vdso_check) + cmd_vdso64ld_and_check = $(VDSOCC) $(ldflags-y) $(LD64FLAGS) -o $@ -Wl,-T$(filter %.lds,$^) $(filter %.o,$^); $(cmd_vdso_check) quiet_cmd_vdso64as = VDSO64A $@ - cmd_vdso64as = $(VDSOCC) $(a_flags) $(CC64FLAGS) $(AS64FLAGS) -c -o $@ $< + cmd_vdso64as = $(VDSOCC) $(a_flags) $(AS64FLAGS) -c -o $@ $< diff --git a/arch/powerpc/kernel/vdso/cacheflush.S b/arch/powerpc/kernel/vdso/cacheflush.S index d4e43ab2d5df..488d3ade11e6 100644 --- a/arch/powerpc/kernel/vdso/cacheflush.S +++ b/arch/powerpc/kernel/vdso/cacheflush.S @@ -30,7 +30,7 @@ END_FTR_SECTION_IFSET(CPU_FTR_COHERENT_ICACHE) #ifdef CONFIG_PPC64 mflr r12 .cfi_register lr,r12 - get_datapage r10 + get_datapage r10 vdso_u_arch_data mtlr r12 .cfi_restore lr #endif @@ -91,6 +91,7 @@ END_FTR_SECTION_IFSET(CPU_FTR_COHERENT_ICACHE) 3: crclr cr0*4+so sync + icbi 0,r1 isync li r3,0 blr diff --git a/arch/powerpc/kernel/vdso/datapage.S b/arch/powerpc/kernel/vdso/datapage.S index db8e167f0166..d23b2e8e2a34 100644 --- a/arch/powerpc/kernel/vdso/datapage.S +++ b/arch/powerpc/kernel/vdso/datapage.S @@ -28,7 +28,7 @@ V_FUNCTION_BEGIN(__kernel_get_syscall_map) mflr r12 .cfi_register lr,r12 mr. r4,r3 - get_datapage r3 + get_datapage r3 vdso_u_arch_data mtlr r12 #ifdef __powerpc64__ addi r3,r3,CFG_SYSCALL_MAP64 @@ -52,7 +52,7 @@ V_FUNCTION_BEGIN(__kernel_get_tbfreq) .cfi_startproc mflr r12 .cfi_register lr,r12 - get_datapage r3 + get_datapage r3 vdso_u_arch_data #ifndef __powerpc64__ lwz r4,(CFG_TB_TICKS_PER_SEC + 4)(r3) #endif diff --git a/arch/powerpc/kernel/vdso/getrandom.S b/arch/powerpc/kernel/vdso/getrandom.S new file mode 100644 index 000000000000..a80d9fb436f7 --- /dev/null +++ b/arch/powerpc/kernel/vdso/getrandom.S @@ -0,0 +1,56 @@ +/* SPDX-License-Identifier: GPL-2.0-or-later */ +/* + * Userland implementation of getrandom() for processes + * for use in the vDSO + * + * Copyright (C) 2024 Christophe Leroy <christophe.leroy@csgroup.eu>, CS GROUP France + */ +#include <asm/processor.h> +#include <asm/ppc_asm.h> +#include <asm/vdso.h> +#include <asm/vdso_datapage.h> +#include <asm/asm-offsets.h> +#include <asm/unistd.h> + +/* + * The macro sets two stack frames, one for the caller and one for the callee + * because there are no requirement for the caller to set a stack frame when + * calling VDSO so it may have omitted to set one, especially on PPC64 + */ + +.macro cvdso_call funct + .cfi_startproc + PPC_STLU r1, -PPC_MIN_STKFRM(r1) + .cfi_adjust_cfa_offset PPC_MIN_STKFRM + mflr r0 + PPC_STLU r1, -PPC_MIN_STKFRM(r1) + .cfi_adjust_cfa_offset PPC_MIN_STKFRM + PPC_STL r0, PPC_MIN_STKFRM + PPC_LR_STKOFF(r1) + .cfi_rel_offset lr, PPC_MIN_STKFRM + PPC_LR_STKOFF +#ifdef __powerpc64__ + PPC_STL r2, PPC_MIN_STKFRM + STK_GOT(r1) + .cfi_rel_offset r2, PPC_MIN_STKFRM + STK_GOT +#endif + bl CFUNC(DOTSYM(\funct)) + PPC_LL r0, PPC_MIN_STKFRM + PPC_LR_STKOFF(r1) +#ifdef __powerpc64__ + PPC_LL r2, PPC_MIN_STKFRM + STK_GOT(r1) + .cfi_restore r2 +#endif + cmpwi r3, 0 + mtlr r0 + addi r1, r1, 2 * PPC_MIN_STKFRM + .cfi_restore lr + .cfi_def_cfa_offset 0 + crclr so + bgelr+ + crset so + neg r3, r3 + blr + .cfi_endproc +.endm + + .text +V_FUNCTION_BEGIN(__kernel_getrandom) + cvdso_call __c_kernel_getrandom +V_FUNCTION_END(__kernel_getrandom) diff --git a/arch/powerpc/kernel/vdso/gettimeofday.S b/arch/powerpc/kernel/vdso/gettimeofday.S index 0c4ecc8fec5a..79c967212444 100644 --- a/arch/powerpc/kernel/vdso/gettimeofday.S +++ b/arch/powerpc/kernel/vdso/gettimeofday.S @@ -32,13 +32,12 @@ PPC_STL r2, PPC_MIN_STKFRM + STK_GOT(r1) .cfi_rel_offset r2, PPC_MIN_STKFRM + STK_GOT #endif - get_datapage r5 .ifeq \call_time - addi r5, r5, VDSO_DATA_OFFSET + get_datapage r5 vdso_u_time_data .else - addi r4, r5, VDSO_DATA_OFFSET + get_datapage r4 vdso_u_time_data .endif - bl DOTSYM(\funct) + bl CFUNC(DOTSYM(\funct)) PPC_LL r0, PPC_MIN_STKFRM + PPC_LR_STKOFF(r1) #ifdef __powerpc64__ PPC_LL r2, PPC_MIN_STKFRM + STK_GOT(r1) @@ -114,16 +113,3 @@ V_FUNCTION_END(__kernel_clock_getres) V_FUNCTION_BEGIN(__kernel_time) cvdso_call __c_kernel_time call_time=1 V_FUNCTION_END(__kernel_time) - -/* Routines for restoring integer registers, called by the compiler. */ -/* Called with r11 pointing to the stack header word of the caller of the */ -/* function, just beyond the end of the integer restore area. */ -#ifndef __powerpc64__ -_GLOBAL(_restgpr_31_x) -_GLOBAL(_rest32gpr_31_x) - lwz r0,4(r11) - lwz r31,-4(r11) - mtlr r0 - mr r1,r11 - blr -#endif diff --git a/arch/powerpc/kernel/vdso/vdso32.lds.S b/arch/powerpc/kernel/vdso/vdso32.lds.S index e0d19d74455f..72a1012b8a20 100644 --- a/arch/powerpc/kernel/vdso/vdso32.lds.S +++ b/arch/powerpc/kernel/vdso/vdso32.lds.S @@ -6,6 +6,7 @@ #include <asm/vdso.h> #include <asm/page.h> #include <asm-generic/vmlinux.lds.h> +#include <vdso/datapage.h> #ifdef __LITTLE_ENDIAN__ OUTPUT_FORMAT("elf32-powerpcle", "elf32-powerpcle", "elf32-powerpcle") @@ -16,7 +17,8 @@ OUTPUT_ARCH(powerpc:common) SECTIONS { - PROVIDE(_vdso_datapage = . - 2 * PAGE_SIZE); + VDSO_VVAR_SYMS + . = SIZEOF_HEADERS; .hash : { *(.hash) } :text @@ -74,19 +76,22 @@ SECTIONS .got : { *(.got) } :text .plt : { *(.plt) } + .rela.dyn : { *(.rela .rela*) } + _end = .; __end = .; PROVIDE(end = .); - STABS_DEBUG DWARF_DEBUG ELF_DETAILS /DISCARD/ : { *(.note.GNU-stack) + *(*.EMB.apuinfo) + *(.branch_lt) *(.data .data.* .gnu.linkonce.d.* .sdata*) *(.bss .sbss .dynbss .dynsbss) - *(.got1) + *(.got1 .glink .iplt) } } @@ -127,6 +132,7 @@ VERSION #if defined(CONFIG_PPC64) || !defined(CONFIG_SMP) __kernel_getcpu; #endif + __kernel_getrandom; local: *; }; diff --git a/arch/powerpc/kernel/vdso/vdso64.lds.S b/arch/powerpc/kernel/vdso/vdso64.lds.S index 1a4a7bc4c815..32102a05eaa7 100644 --- a/arch/powerpc/kernel/vdso/vdso64.lds.S +++ b/arch/powerpc/kernel/vdso/vdso64.lds.S @@ -6,6 +6,7 @@ #include <asm/vdso.h> #include <asm/page.h> #include <asm-generic/vmlinux.lds.h> +#include <vdso/datapage.h> #ifdef __LITTLE_ENDIAN__ OUTPUT_FORMAT("elf64-powerpcle", "elf64-powerpcle", "elf64-powerpcle") @@ -16,7 +17,8 @@ OUTPUT_ARCH(powerpc:common64) SECTIONS { - PROVIDE(_vdso_datapage = . - 2 * PAGE_SIZE); + VDSO_VVAR_SYMS + . = SIZEOF_HEADERS; .hash : { *(.hash) } :text @@ -32,7 +34,7 @@ SECTIONS . = ALIGN(16); .text : { *(.text .stub .text.* .gnu.linkonce.t.* __ftr_alt_*) - *(.sfpr .glink) + *(.sfpr) } :text PROVIDE(__etext = .); PROVIDE(_etext = .); @@ -69,23 +71,24 @@ SECTIONS .eh_frame_hdr : { *(.eh_frame_hdr) } :text :eh_frame_hdr .eh_frame : { KEEP (*(.eh_frame)) } :text .gcc_except_table : { *(.gcc_except_table) } - .rela.dyn ALIGN(8) : { *(.rela.dyn) } + .rela.dyn ALIGN(8) : { *(.rela .rela*) } .got ALIGN(8) : { *(.got .toc) } _end = .; PROVIDE(end = .); - STABS_DEBUG DWARF_DEBUG ELF_DETAILS /DISCARD/ : { *(.note.GNU-stack) + *(*.EMB.apuinfo) *(.branch_lt) *(.data .data.* .gnu.linkonce.d.* .sdata*) *(.bss .sbss .dynbss .dynsbss) *(.opd) + *(.glink .iplt .plt) } } @@ -122,6 +125,7 @@ VERSION __kernel_sigtramp_rt64; __kernel_getcpu; __kernel_time; + __kernel_getrandom; local: *; }; diff --git a/arch/powerpc/kernel/vdso/vgetrandom-chacha.S b/arch/powerpc/kernel/vdso/vgetrandom-chacha.S new file mode 100644 index 000000000000..7f9061a9e8b4 --- /dev/null +++ b/arch/powerpc/kernel/vdso/vgetrandom-chacha.S @@ -0,0 +1,365 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * Copyright (C) 2024 Christophe Leroy <christophe.leroy@csgroup.eu>, CS GROUP France + */ + +#include <linux/linkage.h> + +#include <asm/ppc_asm.h> + +#define dst_bytes r3 +#define key r4 +#define counter r5 +#define nblocks r6 + +#define idx_r0 r0 +#define val4 r4 + +#define const0 0x61707865 +#define const1 0x3320646e +#define const2 0x79622d32 +#define const3 0x6b206574 + +#define key0 r5 +#define key1 r6 +#define key2 r7 +#define key3 r8 +#define key4 r9 +#define key5 r10 +#define key6 r11 +#define key7 r12 + +#define counter0 r14 +#define counter1 r15 + +#define state0 r16 +#define state1 r17 +#define state2 r18 +#define state3 r19 +#define state4 r20 +#define state5 r21 +#define state6 r22 +#define state7 r23 +#define state8 r24 +#define state9 r25 +#define state10 r26 +#define state11 r27 +#define state12 r28 +#define state13 r29 +#define state14 r30 +#define state15 r31 + +.macro quarterround4 a1 b1 c1 d1 a2 b2 c2 d2 a3 b3 c3 d3 a4 b4 c4 d4 + add \a1, \a1, \b1 + add \a2, \a2, \b2 + add \a3, \a3, \b3 + add \a4, \a4, \b4 + xor \d1, \d1, \a1 + xor \d2, \d2, \a2 + xor \d3, \d3, \a3 + xor \d4, \d4, \a4 + rotlwi \d1, \d1, 16 + rotlwi \d2, \d2, 16 + rotlwi \d3, \d3, 16 + rotlwi \d4, \d4, 16 + add \c1, \c1, \d1 + add \c2, \c2, \d2 + add \c3, \c3, \d3 + add \c4, \c4, \d4 + xor \b1, \b1, \c1 + xor \b2, \b2, \c2 + xor \b3, \b3, \c3 + xor \b4, \b4, \c4 + rotlwi \b1, \b1, 12 + rotlwi \b2, \b2, 12 + rotlwi \b3, \b3, 12 + rotlwi \b4, \b4, 12 + add \a1, \a1, \b1 + add \a2, \a2, \b2 + add \a3, \a3, \b3 + add \a4, \a4, \b4 + xor \d1, \d1, \a1 + xor \d2, \d2, \a2 + xor \d3, \d3, \a3 + xor \d4, \d4, \a4 + rotlwi \d1, \d1, 8 + rotlwi \d2, \d2, 8 + rotlwi \d3, \d3, 8 + rotlwi \d4, \d4, 8 + add \c1, \c1, \d1 + add \c2, \c2, \d2 + add \c3, \c3, \d3 + add \c4, \c4, \d4 + xor \b1, \b1, \c1 + xor \b2, \b2, \c2 + xor \b3, \b3, \c3 + xor \b4, \b4, \c4 + rotlwi \b1, \b1, 7 + rotlwi \b2, \b2, 7 + rotlwi \b3, \b3, 7 + rotlwi \b4, \b4, 7 +.endm + +#define QUARTERROUND4(a1,b1,c1,d1,a2,b2,c2,d2,a3,b3,c3,d3,a4,b4,c4,d4) \ + quarterround4 state##a1 state##b1 state##c1 state##d1 \ + state##a2 state##b2 state##c2 state##d2 \ + state##a3 state##b3 state##c3 state##d3 \ + state##a4 state##b4 state##c4 state##d4 + +/* + * Very basic 32 bits implementation of ChaCha20. Produces a given positive number + * of blocks of output with a nonce of 0, taking an input key and 8-byte + * counter. Importantly does not spill to the stack. Its arguments are: + * + * r3: output bytes + * r4: 32-byte key input + * r5: 8-byte counter input/output (saved on stack) + * r6: number of 64-byte blocks to write to output + * + * r0: counter of blocks (initialised with r6) + * r4: Value '4' after key has been read. + * r5-r12: key + * r14-r15: counter + * r16-r31: state + */ +SYM_FUNC_START(__arch_chacha20_blocks_nostack) +#ifdef __powerpc64__ + std counter, -216(r1) + + std r14, -144(r1) + std r15, -136(r1) + std r16, -128(r1) + std r17, -120(r1) + std r18, -112(r1) + std r19, -104(r1) + std r20, -96(r1) + std r21, -88(r1) + std r22, -80(r1) + std r23, -72(r1) + std r24, -64(r1) + std r25, -56(r1) + std r26, -48(r1) + std r27, -40(r1) + std r28, -32(r1) + std r29, -24(r1) + std r30, -16(r1) + std r31, -8(r1) +#else + stwu r1, -96(r1) + stw counter, 20(r1) +#ifdef __BIG_ENDIAN__ + stmw r14, 24(r1) +#else + stw r14, 24(r1) + stw r15, 28(r1) + stw r16, 32(r1) + stw r17, 36(r1) + stw r18, 40(r1) + stw r19, 44(r1) + stw r20, 48(r1) + stw r21, 52(r1) + stw r22, 56(r1) + stw r23, 60(r1) + stw r24, 64(r1) + stw r25, 68(r1) + stw r26, 72(r1) + stw r27, 76(r1) + stw r28, 80(r1) + stw r29, 84(r1) + stw r30, 88(r1) + stw r31, 92(r1) +#endif +#endif /* __powerpc64__ */ + + lwz counter0, 0(counter) + lwz counter1, 4(counter) +#ifdef __powerpc64__ + rldimi counter0, counter1, 32, 0 +#endif + mr idx_r0, nblocks + subi dst_bytes, dst_bytes, 4 + + lwz key0, 0(key) + lwz key1, 4(key) + lwz key2, 8(key) + lwz key3, 12(key) + lwz key4, 16(key) + lwz key5, 20(key) + lwz key6, 24(key) + lwz key7, 28(key) + + li val4, 4 +.Lblock: + li r31, 10 + + lis state0, const0@ha + lis state1, const1@ha + lis state2, const2@ha + lis state3, const3@ha + addi state0, state0, const0@l + addi state1, state1, const1@l + addi state2, state2, const2@l + addi state3, state3, const3@l + + mtctr r31 + + mr state4, key0 + mr state5, key1 + mr state6, key2 + mr state7, key3 + mr state8, key4 + mr state9, key5 + mr state10, key6 + mr state11, key7 + + mr state12, counter0 + mr state13, counter1 + + li state14, 0 + li state15, 0 + +.Lpermute: + QUARTERROUND4( 0, 4, 8,12, 1, 5, 9,13, 2, 6,10,14, 3, 7,11,15) + QUARTERROUND4( 0, 5,10,15, 1, 6,11,12, 2, 7, 8,13, 3, 4, 9,14) + + bdnz .Lpermute + + addis state0, state0, const0@ha + addis state1, state1, const1@ha + addis state2, state2, const2@ha + addis state3, state3, const3@ha + addi state0, state0, const0@l + addi state1, state1, const1@l + addi state2, state2, const2@l + addi state3, state3, const3@l + + add state4, state4, key0 + add state5, state5, key1 + add state6, state6, key2 + add state7, state7, key3 + add state8, state8, key4 + add state9, state9, key5 + add state10, state10, key6 + add state11, state11, key7 + + add state12, state12, counter0 + add state13, state13, counter1 + +#ifdef __BIG_ENDIAN__ + stwbrx state0, val4, dst_bytes + addi dst_bytes, dst_bytes, 8 + stwbrx state1, 0, dst_bytes + stwbrx state2, val4, dst_bytes + addi dst_bytes, dst_bytes, 8 + stwbrx state3, 0, dst_bytes + stwbrx state4, val4, dst_bytes + addi dst_bytes, dst_bytes, 8 + stwbrx state5, 0, dst_bytes + stwbrx state6, val4, dst_bytes + addi dst_bytes, dst_bytes, 8 + stwbrx state7, 0, dst_bytes + stwbrx state8, val4, dst_bytes + addi dst_bytes, dst_bytes, 8 + stwbrx state9, 0, dst_bytes + stwbrx state10, val4, dst_bytes + addi dst_bytes, dst_bytes, 8 + stwbrx state11, 0, dst_bytes + stwbrx state12, val4, dst_bytes + addi dst_bytes, dst_bytes, 8 + stwbrx state13, 0, dst_bytes + stwbrx state14, val4, dst_bytes + addi dst_bytes, dst_bytes, 8 + stwbrx state15, 0, dst_bytes +#else + stw state0, 4(dst_bytes) + stw state1, 8(dst_bytes) + stw state2, 12(dst_bytes) + stw state3, 16(dst_bytes) + stw state4, 20(dst_bytes) + stw state5, 24(dst_bytes) + stw state6, 28(dst_bytes) + stw state7, 32(dst_bytes) + stw state8, 36(dst_bytes) + stw state9, 40(dst_bytes) + stw state10, 44(dst_bytes) + stw state11, 48(dst_bytes) + stw state12, 52(dst_bytes) + stw state13, 56(dst_bytes) + stw state14, 60(dst_bytes) + stwu state15, 64(dst_bytes) +#endif + + subic. idx_r0, idx_r0, 1 /* subi. can't use r0 as source */ + +#ifdef __powerpc64__ + addi counter0, counter0, 1 + srdi counter1, counter0, 32 +#else + addic counter0, counter0, 1 + addze counter1, counter1 +#endif + + bne .Lblock + +#ifdef __powerpc64__ + ld counter, -216(r1) +#else + lwz counter, 20(r1) +#endif + stw counter0, 0(counter) + stw counter1, 4(counter) + + li r6, 0 + li r7, 0 + li r8, 0 + li r9, 0 + li r10, 0 + li r11, 0 + li r12, 0 + +#ifdef __powerpc64__ + ld r14, -144(r1) + ld r15, -136(r1) + ld r16, -128(r1) + ld r17, -120(r1) + ld r18, -112(r1) + ld r19, -104(r1) + ld r20, -96(r1) + ld r21, -88(r1) + ld r22, -80(r1) + ld r23, -72(r1) + ld r24, -64(r1) + ld r25, -56(r1) + ld r26, -48(r1) + ld r27, -40(r1) + ld r28, -32(r1) + ld r29, -24(r1) + ld r30, -16(r1) + ld r31, -8(r1) +#else +#ifdef __BIG_ENDIAN__ + lmw r14, 24(r1) +#else + lwz r14, 24(r1) + lwz r15, 28(r1) + lwz r16, 32(r1) + lwz r17, 36(r1) + lwz r18, 40(r1) + lwz r19, 44(r1) + lwz r20, 48(r1) + lwz r21, 52(r1) + lwz r22, 56(r1) + lwz r23, 60(r1) + lwz r24, 64(r1) + lwz r25, 68(r1) + lwz r26, 72(r1) + lwz r27, 76(r1) + lwz r28, 80(r1) + lwz r29, 84(r1) + lwz r30, 88(r1) + lwz r31, 92(r1) +#endif + addi r1, r1, 96 +#endif /* __powerpc64__ */ + blr +SYM_FUNC_END(__arch_chacha20_blocks_nostack) diff --git a/arch/powerpc/kernel/vdso/vgetrandom.c b/arch/powerpc/kernel/vdso/vgetrandom.c new file mode 100644 index 000000000000..cc79b960a541 --- /dev/null +++ b/arch/powerpc/kernel/vdso/vgetrandom.c @@ -0,0 +1,14 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Powerpc userspace implementation of getrandom() + * + * Copyright (C) 2024 Christophe Leroy <christophe.leroy@csgroup.eu>, CS GROUP France + */ +#include <linux/time.h> +#include <linux/types.h> + +ssize_t __c_kernel_getrandom(void *buffer, size_t len, unsigned int flags, void *opaque_state, + size_t opaque_len) +{ + return __cvdso_getrandom(buffer, len, flags, opaque_state, opaque_len); +} diff --git a/arch/powerpc/kernel/vdso/vgettimeofday.c b/arch/powerpc/kernel/vdso/vgettimeofday.c index 55a287c9a736..6f5167d81af5 100644 --- a/arch/powerpc/kernel/vdso/vgettimeofday.c +++ b/arch/powerpc/kernel/vdso/vgettimeofday.c @@ -7,43 +7,43 @@ #ifdef __powerpc64__ int __c_kernel_clock_gettime(clockid_t clock, struct __kernel_timespec *ts, - const struct vdso_data *vd) + const struct vdso_time_data *vd) { return __cvdso_clock_gettime_data(vd, clock, ts); } int __c_kernel_clock_getres(clockid_t clock_id, struct __kernel_timespec *res, - const struct vdso_data *vd) + const struct vdso_time_data *vd) { return __cvdso_clock_getres_data(vd, clock_id, res); } #else int __c_kernel_clock_gettime(clockid_t clock, struct old_timespec32 *ts, - const struct vdso_data *vd) + const struct vdso_time_data *vd) { return __cvdso_clock_gettime32_data(vd, clock, ts); } int __c_kernel_clock_gettime64(clockid_t clock, struct __kernel_timespec *ts, - const struct vdso_data *vd) + const struct vdso_time_data *vd) { return __cvdso_clock_gettime_data(vd, clock, ts); } int __c_kernel_clock_getres(clockid_t clock_id, struct old_timespec32 *res, - const struct vdso_data *vd) + const struct vdso_time_data *vd) { return __cvdso_clock_getres_time32_data(vd, clock_id, res); } #endif int __c_kernel_gettimeofday(struct __kernel_old_timeval *tv, struct timezone *tz, - const struct vdso_data *vd) + const struct vdso_time_data *vd) { return __cvdso_gettimeofday_data(vd, tv, tz); } -__kernel_old_time_t __c_kernel_time(__kernel_old_time_t *time, const struct vdso_data *vd) +__kernel_old_time_t __c_kernel_time(__kernel_old_time_t *time, const struct vdso_time_data *vd) { return __cvdso_time_data(vd, time); } diff --git a/arch/powerpc/kernel/vdso32_wrapper.S b/arch/powerpc/kernel/vdso32_wrapper.S index 10f92f265d51..20bca3548b44 100644 --- a/arch/powerpc/kernel/vdso32_wrapper.S +++ b/arch/powerpc/kernel/vdso32_wrapper.S @@ -2,7 +2,7 @@ #include <linux/linkage.h> #include <asm/page.h> - __PAGE_ALIGNED_DATA + .section ".data..ro_after_init", "aw" .globl vdso32_start, vdso32_end .balign PAGE_SIZE diff --git a/arch/powerpc/kernel/vdso64_wrapper.S b/arch/powerpc/kernel/vdso64_wrapper.S index 839d1a61411d..1912936fa227 100644 --- a/arch/powerpc/kernel/vdso64_wrapper.S +++ b/arch/powerpc/kernel/vdso64_wrapper.S @@ -2,7 +2,7 @@ #include <linux/linkage.h> #include <asm/page.h> - __PAGE_ALIGNED_DATA + .section ".data..ro_after_init", "aw" .globl vdso64_start, vdso64_end .balign PAGE_SIZE diff --git a/arch/powerpc/kernel/vector.S b/arch/powerpc/kernel/vector.S index 5cc24d8cce94..80b3f6e476b6 100644 --- a/arch/powerpc/kernel/vector.S +++ b/arch/powerpc/kernel/vector.S @@ -1,4 +1,6 @@ /* SPDX-License-Identifier: GPL-2.0 */ +#include <linux/export.h> +#include <linux/linkage.h> #include <asm/processor.h> #include <asm/ppc_asm.h> #include <asm/reg.h> @@ -7,7 +9,6 @@ #include <asm/thread_info.h> #include <asm/page.h> #include <asm/ptrace.h> -#include <asm/export.h> #include <asm/asm-compat.h> /* @@ -32,6 +33,7 @@ _GLOBAL(store_vr_state) mfvscr v0 li r4, VRSTATE_VSCR stvx v0, r4, r3 + lvx v0, 0, r3 blr EXPORT_SYMBOL(store_vr_state) @@ -108,6 +110,7 @@ _GLOBAL(save_altivec) mfvscr v0 li r4,VRSTATE_VSCR stvx v0,r4,r7 + lvx v0,0,r7 blr #ifdef CONFIG_VSX @@ -155,8 +158,8 @@ _GLOBAL(load_up_vsx) * usage of floating-point registers. These routines must be called * with preempt disabled. */ -#ifdef CONFIG_PPC32 .data +#ifdef CONFIG_PPC32 fpzero: .long 0 fpone: @@ -169,24 +172,29 @@ fphalf: lfs fr,name@l(r11) #else - .section ".toc","aw" fpzero: - .tc FD_0_0[TC],0 + .quad 0 fpone: - .tc FD_3ff00000_0[TC],0x3ff0000000000000 /* 1.0 */ + .quad 0x3ff0000000000000 /* 1.0 */ fphalf: - .tc FD_3fe00000_0[TC],0x3fe0000000000000 /* 0.5 */ + .quad 0x3fe0000000000000 /* 0.5 */ -#define LDCONST(fr, name) \ - lfd fr,name@toc(r2) +#ifdef CONFIG_PPC_KERNEL_PCREL +#define LDCONST(fr, name) \ + pla r11,name@pcrel; \ + lfd fr,0(r11) +#else +#define LDCONST(fr, name) \ + addis r11,r2,name@toc@ha; \ + lfd fr,name@toc@l(r11) +#endif #endif - .text /* * Internal routine to enable floating point and set FPSCR to 0. * Don't call it from C; it doesn't use the normal calling convention. */ -fpenable: +SYM_FUNC_START_LOCAL(fpenable) #ifdef CONFIG_PPC32 stwu r1,-64(r1) #else @@ -203,6 +211,7 @@ fpenable: mffs fr31 MTFSF_L(fr1) blr +SYM_FUNC_END(fpenable) fpdisable: mtlr r12 diff --git a/arch/powerpc/kernel/vmlinux.lds.S b/arch/powerpc/kernel/vmlinux.lds.S index fe22d940412f..de6ee7d35cff 100644 --- a/arch/powerpc/kernel/vmlinux.lds.S +++ b/arch/powerpc/kernel/vmlinux.lds.S @@ -1,13 +1,8 @@ /* SPDX-License-Identifier: GPL-2.0 */ -#ifdef CONFIG_PPC64 -#define PROVIDE32(x) PROVIDE(__unused__##x) -#else -#define PROVIDE32(x) PROVIDE(x) -#endif - #define BSS_FIRST_SECTIONS *(.bss.prominit) #define EMITS_PT_NOTE #define RO_EXCEPTION_TABLE_ALIGN 0 +#define RUNTIME_DISCARD_EXIT #define SOFT_MASK_TABLE(align) \ . = ALIGN(align); \ @@ -32,6 +27,10 @@ #define STRICT_ALIGN_SIZE (1 << CONFIG_DATA_SHIFT) +#if STRICT_ALIGN_SIZE < PAGE_SIZE +#error "CONFIG_DATA_SHIFT must be >= PAGE_SHIFT" +#endif + ENTRY(_stext) PHDRS { @@ -67,7 +66,7 @@ SECTIONS .head.text : AT(ADDR(.head.text) - LOAD_OFFSET) { #ifdef CONFIG_PPC64 KEEP(*(.head.text.first_256B)); -#ifdef CONFIG_PPC_BOOK3E +#ifdef CONFIG_PPC_BOOK3E_64 #else KEEP(*(.head.text.real_vectors)); *(.head.text.real_trampolines); @@ -102,12 +101,9 @@ SECTIONS #endif /* careful! __ftr_alt_* sections need to be close to .text */ *(.text.hot .text.hot.* TEXT_MAIN .text.fixup .text.unlikely .text.unlikely.* .fixup __ftr_alt_* .ref.text); -#ifdef CONFIG_PPC64 *(.tramp.ftrace.text); -#endif NOINSTR_TEXT SCHED_TEXT - CPUIDLE_TEXT LOCK_TEXT KPROBES_TEXT IRQENTRY_TEXT @@ -120,34 +116,72 @@ SECTIONS * included with the main text sections, so put it by itself. */ *(.sfpr); - MEM_KEEP(init.text) - MEM_KEEP(exit.text) + *(.text.asan.* .text.tsan.*) + } :text + + . = ALIGN(PAGE_SIZE); + _etext = .; + + /* Read-only data */ + RO_DATA(PAGE_SIZE) + +#ifdef CONFIG_PPC32 + .sdata2 : AT(ADDR(.sdata2) - LOAD_OFFSET) { + *(.sdata2) + } +#endif + + .data.rel.ro : AT(ADDR(.data.rel.ro) - LOAD_OFFSET) { + *(.data.rel.ro .data.rel.ro.*) + } + + .branch_lt : AT(ADDR(.branch_lt) - LOAD_OFFSET) { + *(.branch_lt) + } #ifdef CONFIG_PPC32 + .got1 : AT(ADDR(.got1) - LOAD_OFFSET) { *(.got1) + } + .got2 : AT(ADDR(.got2) - LOAD_OFFSET) { __got2_start = .; *(.got2) __got2_end = .; -#endif /* CONFIG_PPC32 */ - - } :text + } + .got : AT(ADDR(.got) - LOAD_OFFSET) { + *(.got) + *(.got.plt) + } + .plt : AT(ADDR(.plt) - LOAD_OFFSET) { + /* XXX: is .plt (and .got.plt) required? */ + *(.plt) + } - . = ALIGN(PAGE_SIZE); - _etext = .; - PROVIDE32 (etext = .); +#else /* CONFIG_PPC32 */ +#ifndef CONFIG_PPC_KERNEL_PCREL + .toc1 : AT(ADDR(.toc1) - LOAD_OFFSET) { + *(.toc1) + } +#endif - /* Read-only data */ - RO_DATA(PAGE_SIZE) + .got : AT(ADDR(.got) - LOAD_OFFSET) ALIGN(256) { +#ifdef CONFIG_PPC_KERNEL_PCREL + *(.got) +#else + *(.got .toc) +#endif + } -#ifdef CONFIG_PPC64 SOFT_MASK_TABLE(8) RESTART_TABLE(8) +#ifdef CONFIG_PPC64_ELF_ABI_V1 .opd : AT(ADDR(.opd) - LOAD_OFFSET) { __start_opd = .; KEEP(*(.opd)) __end_opd = .; } +#endif . = ALIGN(8); __stf_entry_barrier_fixup : AT(ADDR(__stf_entry_barrier_fixup) - LOAD_OFFSET) { @@ -190,7 +224,7 @@ SECTIONS *(__rfi_flush_fixup) __stop___rfi_flush_fixup = .; } -#endif /* CONFIG_PPC64 */ +#endif /* CONFIG_PPC32 */ #ifdef CONFIG_PPC_BARRIER_NOSPEC . = ALIGN(8); @@ -201,7 +235,7 @@ SECTIONS } #endif /* CONFIG_PPC_BARRIER_NOSPEC */ -#ifdef CONFIG_PPC_FSL_BOOK3E +#ifdef CONFIG_PPC_E500 . = ALIGN(8); __spec_btb_flush_fixup : AT(ADDR(__spec_btb_flush_fixup) - LOAD_OFFSET) { __start__btb_flush_fixup = .; @@ -210,32 +244,36 @@ SECTIONS } #endif + /* + * Various code relies on __init_begin being at the strict RWX boundary. + */ + . = ALIGN(STRICT_ALIGN_SIZE); + __srwx_boundary = .; + __end_rodata = .; + __init_begin = .; + /* * Init sections discarded at runtime */ - . = ALIGN(STRICT_ALIGN_SIZE); - __init_begin = .; - . = ALIGN(PAGE_SIZE); .init.text : AT(ADDR(.init.text) - LOAD_OFFSET) { _sinittext = .; INIT_TEXT - + *(.tramp.ftrace.init); /* *.init.text might be RO so we must ensure this section ends on * a page boundary. */ . = ALIGN(PAGE_SIZE); _einittext = .; -#ifdef CONFIG_PPC64 - *(.tramp.ftrace.init); -#endif } :text /* .exit.text is discarded at runtime, not link time, * to deal with references from __bug_table */ .exit.text : AT(ADDR(.exit.text) - LOAD_OFFSET) { + __exittext_begin = .; EXIT_TEXT + __exittext_end = .; } . = ALIGN(PAGE_SIZE); @@ -317,34 +355,13 @@ SECTIONS . = ALIGN(PAGE_SIZE); _sdata = .; -#ifdef CONFIG_PPC32 .data : AT(ADDR(.data) - LOAD_OFFSET) { DATA_DATA *(.data.rel*) +#ifdef CONFIG_PPC32 *(SDATA_MAIN) - *(.sdata2) - *(.got.plt) *(.got) - *(.plt) - *(.branch_lt) - } -#else - .data : AT(ADDR(.data) - LOAD_OFFSET) { - DATA_DATA - *(.data.rel*) - *(.toc1) - *(.branch_lt) - } - - .got : AT(ADDR(.got) - LOAD_OFFSET) ALIGN(256) { - *(.got) -#ifndef CONFIG_RELOCATABLE - __prom_init_toc_start = .; - arch/powerpc/kernel/prom_init.o*(.toc) - __prom_init_toc_end = .; #endif - *(.toc) } -#endif /* The initial task and kernel stack */ INIT_TASK_DATA_SECTION(THREAD_ALIGN) @@ -370,7 +387,6 @@ SECTIONS . = ALIGN(PAGE_SIZE); _edata = .; - PROVIDE32 (edata = .); /* * And finally the bss @@ -380,18 +396,19 @@ SECTIONS . = ALIGN(PAGE_SIZE); _end = . ; - PROVIDE32 (end = .); - STABS_DEBUG DWARF_DEBUG ELF_DETAILS DISCARDS /DISCARD/ : { *(*.EMB.apuinfo) - *(.glink .iplt .plt .rela* .comment) + *(.glink .iplt .plt) *(.gnu.version*) *(.gnu.attributes) *(.eh_frame) +#ifndef CONFIG_RELOCATABLE + *(.rela*) +#endif } } diff --git a/arch/powerpc/kernel/watchdog.c b/arch/powerpc/kernel/watchdog.c index 7d28b9553654..2429cb1c7baa 100644 --- a/arch/powerpc/kernel/watchdog.c +++ b/arch/powerpc/kernel/watchdog.c @@ -91,6 +91,10 @@ static cpumask_t wd_smp_cpus_pending; static cpumask_t wd_smp_cpus_stuck; static u64 wd_smp_last_reset_tb; +#ifdef CONFIG_PPC_PSERIES +static u64 wd_timeout_pct; +#endif + /* * Try to take the exclusive watchdog action / NMI IPI / printing lock. * wd_smp_lock must be held. If this fails, we should return and wait @@ -241,7 +245,7 @@ static void watchdog_smp_panic(int cpu) __cpumask_clear_cpu(c, &wd_smp_cpus_ipi); } } else { - trigger_allbutself_cpu_backtrace(); + trigger_allbutcpu_cpu_backtrace(cpu); cpumask_clear(&wd_smp_cpus_ipi); } @@ -353,7 +357,7 @@ static void watchdog_timer_interrupt(int cpu) if (__wd_nmi_output && xchg(&__wd_nmi_output, 0)) { /* * Something has called printk from NMI context. It might be - * stuck, so this this triggers a flush that will get that + * stuck, so this triggers a flush that will get that * printk output to the console. * * See wd_lockup_ipi. @@ -412,7 +416,7 @@ DEFINE_INTERRUPT_HANDLER_NMI(soft_nmi_interrupt) xchg(&__wd_nmi_output, 1); // see wd_lockup_ipi if (sysctl_hardlockup_all_cpu_backtrace) - trigger_allbutself_cpu_backtrace(); + trigger_allbutcpu_cpu_backtrace(cpu); if (hardlockup_panic) nmi_panic(regs, "Hard LOCKUP"); @@ -434,7 +438,7 @@ static enum hrtimer_restart watchdog_timer_fn(struct hrtimer *hrtimer) { int cpu = smp_processor_id(); - if (!(watchdog_enabled & NMI_WATCHDOG_ENABLED)) + if (!(watchdog_enabled & WATCHDOG_HARDLOCKUP_ENABLED)) return HRTIMER_NORESTART; if (!cpumask_test_cpu(cpu, &watchdog_cpumask)) @@ -475,7 +479,7 @@ static void start_watchdog(void *arg) return; } - if (!(watchdog_enabled & NMI_WATCHDOG_ENABLED)) + if (!(watchdog_enabled & WATCHDOG_HARDLOCKUP_ENABLED)) return; if (!cpumask_test_cpu(cpu, &watchdog_cpumask)) @@ -491,8 +495,7 @@ static void start_watchdog(void *arg) *this_cpu_ptr(&wd_timer_tb) = get_tb(); - hrtimer_init(hrtimer, CLOCK_MONOTONIC, HRTIMER_MODE_REL); - hrtimer->function = watchdog_timer_fn; + hrtimer_setup(hrtimer, watchdog_timer_fn, CLOCK_MONOTONIC, HRTIMER_MODE_REL); hrtimer_start(hrtimer, ms_to_ktime(wd_timer_period_ms), HRTIMER_MODE_REL_PINNED); } @@ -527,7 +530,13 @@ static int stop_watchdog_on_cpu(unsigned int cpu) static void watchdog_calc_timeouts(void) { - wd_panic_timeout_tb = watchdog_thresh * ppc_tb_freq; + u64 threshold = watchdog_thresh; + +#ifdef CONFIG_PPC_PSERIES + threshold += (READ_ONCE(wd_timeout_pct) * threshold) / 100; +#endif + + wd_panic_timeout_tb = threshold * ppc_tb_freq; /* Have the SMP detector trigger a bit later */ wd_smp_panic_timeout_tb = wd_panic_timeout_tb * 3 / 2; @@ -536,7 +545,7 @@ static void watchdog_calc_timeouts(void) wd_timer_period_ms = watchdog_thresh * 1000 * 2 / 5; } -void watchdog_nmi_stop(void) +void watchdog_hardlockup_stop(void) { int cpu; @@ -544,7 +553,7 @@ void watchdog_nmi_stop(void) stop_watchdog_on_cpu(cpu); } -void watchdog_nmi_start(void) +void watchdog_hardlockup_start(void) { int cpu; @@ -556,7 +565,7 @@ void watchdog_nmi_start(void) /* * Invoked from core watchdog init. */ -int __init watchdog_nmi_probe(void) +int __init watchdog_hardlockup_probe(void) { int err; @@ -570,3 +579,12 @@ int __init watchdog_nmi_probe(void) } return 0; } + +#ifdef CONFIG_PPC_PSERIES +void watchdog_hardlockup_set_timeout_pct(u64 pct) +{ + pr_info("Set the NMI watchdog timeout factor to %llu%%\n", pct); + WRITE_ONCE(wd_timeout_pct, pct); + lockup_detector_reconfigure(); +} +#endif |