diff options
108 files changed, 2864 insertions, 694 deletions
diff --git a/arch/powerpc/Kconfig b/arch/powerpc/Kconfig index c51e6ce42e7a..fddc4478d4d8 100644 --- a/arch/powerpc/Kconfig +++ b/arch/powerpc/Kconfig @@ -142,6 +142,7 @@ config PPC select ARCH_HAS_PMEM_API if PPC64 select ARCH_HAS_SCALED_CPUTIME if VIRT_CPU_ACCOUNTING_NATIVE select ARCH_HAS_SG_CHAIN + select ARCH_HAS_STRICT_KERNEL_RWX if ((PPC_BOOK3S_64 || PPC32) && !RELOCATABLE && !HIBERNATION) select ARCH_HAS_TICK_BROADCAST if GENERIC_CLOCKEVENTS_BROADCAST select ARCH_HAS_UACCESS_FLUSHCACHE if PPC64 select ARCH_HAS_UBSAN_SANITIZE_ALL @@ -149,6 +150,7 @@ config PPC select ARCH_HAVE_NMI_SAFE_CMPXCHG select ARCH_MIGHT_HAVE_PC_PARPORT select ARCH_MIGHT_HAVE_PC_SERIO + select ARCH_OPTIONAL_KERNEL_RWX if ARCH_HAS_STRICT_KERNEL_RWX select ARCH_SUPPORTS_ATOMIC_RMW select ARCH_SUPPORTS_DEFERRED_STRUCT_PAGE_INIT select ARCH_USE_BUILTIN_BSWAP @@ -166,6 +168,7 @@ config PPC select GENERIC_CLOCKEVENTS_BROADCAST if SMP select GENERIC_CMOS_UPDATE select GENERIC_CPU_AUTOPROBE + select GENERIC_CPU_VULNERABILITIES if PPC_BOOK3S_64 select GENERIC_IRQ_SHOW select GENERIC_IRQ_SHOW_LEVEL select GENERIC_SMP_IDLE_THREAD @@ -179,8 +182,6 @@ config PPC select HAVE_ARCH_MMAP_RND_COMPAT_BITS if COMPAT select HAVE_ARCH_SECCOMP_FILTER select HAVE_ARCH_TRACEHOOK - select ARCH_HAS_STRICT_KERNEL_RWX if ((PPC_BOOK3S_64 || PPC32) && !RELOCATABLE && !HIBERNATION) - select ARCH_OPTIONAL_KERNEL_RWX if ARCH_HAS_STRICT_KERNEL_RWX select HAVE_CBPF_JIT if !PPC64 select HAVE_CONTEXT_TRACKING if PPC64 select HAVE_DEBUG_KMEMLEAK @@ -867,6 +868,21 @@ config SECCOMP If unsure, say Y. Only embedded should say N here. +config PPC_MEM_KEYS + prompt "PowerPC Memory Protection Keys" + def_bool y + depends on PPC_BOOK3S_64 + select ARCH_USES_HIGH_VMA_FLAGS + select ARCH_HAS_PKEYS + help + Memory Protection Keys provides a mechanism for enforcing + page-based protections, but without requiring modification of the + page tables when an application changes protection domains. + + For details, see Documentation/vm/protection-keys.txt + + If unsure, say y. + endmenu config ISA_DMA_API diff --git a/arch/powerpc/Kconfig.debug b/arch/powerpc/Kconfig.debug index 5d18169fff58..c45424c64e19 100644 --- a/arch/powerpc/Kconfig.debug +++ b/arch/powerpc/Kconfig.debug @@ -90,6 +90,10 @@ config MSI_BITMAP_SELFTEST depends on DEBUG_KERNEL default n +config PPC_IRQ_SOFT_MASK_DEBUG + bool "Include extra checks for powerpc irq soft masking" + default n + config XMON bool "Include xmon kernel debugger" depends on DEBUG_KERNEL diff --git a/arch/powerpc/include/asm/book3s/64/mmu-hash.h b/arch/powerpc/include/asm/book3s/64/mmu-hash.h index e91e115a816f..50ed64fba4ae 100644 --- a/arch/powerpc/include/asm/book3s/64/mmu-hash.h +++ b/arch/powerpc/include/asm/book3s/64/mmu-hash.h @@ -90,6 +90,8 @@ #define HPTE_R_PP0 ASM_CONST(0x8000000000000000) #define HPTE_R_TS ASM_CONST(0x4000000000000000) #define HPTE_R_KEY_HI ASM_CONST(0x3000000000000000) +#define HPTE_R_KEY_BIT0 ASM_CONST(0x2000000000000000) +#define HPTE_R_KEY_BIT1 ASM_CONST(0x1000000000000000) #define HPTE_R_RPN_SHIFT 12 #define HPTE_R_RPN ASM_CONST(0x0ffffffffffff000) #define HPTE_R_RPN_3_0 ASM_CONST(0x01fffffffffff000) @@ -104,6 +106,9 @@ #define HPTE_R_C ASM_CONST(0x0000000000000080) #define HPTE_R_R ASM_CONST(0x0000000000000100) #define HPTE_R_KEY_LO ASM_CONST(0x0000000000000e00) +#define HPTE_R_KEY_BIT2 ASM_CONST(0x0000000000000800) +#define HPTE_R_KEY_BIT3 ASM_CONST(0x0000000000000400) +#define HPTE_R_KEY_BIT4 ASM_CONST(0x0000000000000200) #define HPTE_R_KEY (HPTE_R_KEY_LO | HPTE_R_KEY_HI) #define HPTE_V_1TB_SEG ASM_CONST(0x4000000000000000) diff --git a/arch/powerpc/include/asm/book3s/64/mmu.h b/arch/powerpc/include/asm/book3s/64/mmu.h index c9448e19847a..0abeb0e2d616 100644 --- a/arch/powerpc/include/asm/book3s/64/mmu.h +++ b/arch/powerpc/include/asm/book3s/64/mmu.h @@ -108,6 +108,16 @@ typedef struct { #ifdef CONFIG_SPAPR_TCE_IOMMU struct list_head iommu_group_mem_list; #endif + +#ifdef CONFIG_PPC_MEM_KEYS + /* + * Each bit represents one protection key. + * bit set -> key allocated + * bit unset -> key available for allocation + */ + u32 pkey_allocation_map; + s16 execute_only_pkey; /* key holding execute-only protection */ +#endif } mm_context_t; /* diff --git a/arch/powerpc/include/asm/book3s/64/pgtable.h b/arch/powerpc/include/asm/book3s/64/pgtable.h index 60a0d7fd82bc..1c495068bcfa 100644 --- a/arch/powerpc/include/asm/book3s/64/pgtable.h +++ b/arch/powerpc/include/asm/book3s/64/pgtable.h @@ -40,6 +40,7 @@ #define _RPAGE_RSV2 0x0800000000000000UL #define _RPAGE_RSV3 0x0400000000000000UL #define _RPAGE_RSV4 0x0200000000000000UL +#define _RPAGE_RSV5 0x00040UL #define _PAGE_PTE 0x4000000000000000UL /* distinguishes PTEs from pointers */ #define _PAGE_PRESENT 0x8000000000000000UL /* pte contains a translation */ @@ -59,6 +60,25 @@ /* Max physical address bit as per radix table */ #define _RPAGE_PA_MAX 57 +#ifdef CONFIG_PPC_MEM_KEYS +#ifdef CONFIG_PPC_64K_PAGES +#define H_PTE_PKEY_BIT0 _RPAGE_RSV1 +#define H_PTE_PKEY_BIT1 _RPAGE_RSV2 +#else /* CONFIG_PPC_64K_PAGES */ +#define H_PTE_PKEY_BIT0 0 /* _RPAGE_RSV1 is not available */ +#define H_PTE_PKEY_BIT1 0 /* _RPAGE_RSV2 is not available */ +#endif /* CONFIG_PPC_64K_PAGES */ +#define H_PTE_PKEY_BIT2 _RPAGE_RSV3 +#define H_PTE_PKEY_BIT3 _RPAGE_RSV4 +#define H_PTE_PKEY_BIT4 _RPAGE_RSV5 +#else /* CONFIG_PPC_MEM_KEYS */ +#define H_PTE_PKEY_BIT0 0 +#define H_PTE_PKEY_BIT1 0 +#define H_PTE_PKEY_BIT2 0 +#define H_PTE_PKEY_BIT3 0 +#define H_PTE_PKEY_BIT4 0 +#endif /* CONFIG_PPC_MEM_KEYS */ + /* * Max physical address bit we will use for now. * @@ -122,13 +142,16 @@ #define _PAGE_CHG_MASK (PTE_RPN_MASK | _PAGE_HPTEFLAGS | _PAGE_DIRTY | \ _PAGE_ACCESSED | _PAGE_SPECIAL | _PAGE_PTE | \ _PAGE_SOFT_DIRTY) + +#define H_PTE_PKEY (H_PTE_PKEY_BIT0 | H_PTE_PKEY_BIT1 | H_PTE_PKEY_BIT2 | \ + H_PTE_PKEY_BIT3 | H_PTE_PKEY_BIT4) /* * Mask of bits returned by pte_pgprot() */ #define PAGE_PROT_BITS (_PAGE_SAO | _PAGE_NON_IDEMPOTENT | _PAGE_TOLERANT | \ H_PAGE_4K_PFN | _PAGE_PRIVILEGED | _PAGE_ACCESSED | \ _PAGE_READ | _PAGE_WRITE | _PAGE_DIRTY | _PAGE_EXEC | \ - _PAGE_SOFT_DIRTY) + _PAGE_SOFT_DIRTY | H_PTE_PKEY) /* * We define 2 sets of base prot bits, one for basic pages (ie, * cacheable kernel and user pages) and one for non cacheable @@ -548,6 +571,15 @@ static inline int pte_present(pte_t pte) return !!(pte_raw(pte) & cpu_to_be64(_PAGE_PRESENT)); } +#ifdef CONFIG_PPC_MEM_KEYS +extern bool arch_pte_access_permitted(u64 pte, bool write, bool execute); +#else +static inline bool arch_pte_access_permitted(u64 pte, bool write, bool execute) +{ + return true; +} +#endif /* CONFIG_PPC_MEM_KEYS */ + #define pte_access_permitted pte_access_permitted static inline bool pte_access_permitted(pte_t pte, bool write) { @@ -568,7 +600,8 @@ static inline bool pte_access_permitted(pte_t pte, bool write) if ((pteval & clear_pte_bits) == clear_pte_bits) return false; - return true; + + return arch_pte_access_permitted(pte_val(pte), write, 0); } /* diff --git a/arch/powerpc/include/asm/book3s/64/tlbflush.h b/arch/powerpc/include/asm/book3s/64/tlbflush.h index 9befb4df235c..0cac17253513 100644 --- a/arch/powerpc/include/asm/book3s/64/tlbflush.h +++ b/arch/powerpc/include/asm/book3s/64/tlbflush.h @@ -14,6 +14,7 @@ enum { TLB_INVAL_SCOPE_LPID = 1, /* invalidate TLBs for current LPID */ }; +#ifdef CONFIG_PPC_NATIVE static inline void tlbiel_all(void) { /* @@ -29,6 +30,9 @@ static inline void tlbiel_all(void) else hash__tlbiel_all(TLB_INVAL_SCOPE_GLOBAL); } +#else +static inline void tlbiel_all(void) { BUG(); }; +#endif static inline void tlbiel_all_lpid(bool radix) { diff --git a/arch/powerpc/include/asm/bug.h b/arch/powerpc/include/asm/bug.h index 3c04249bcf39..97c38472b924 100644 --- a/arch/powerpc/include/asm/bug.h +++ b/arch/powerpc/include/asm/bug.h @@ -133,6 +133,7 @@ struct pt_regs; extern int do_page_fault(struct pt_regs *, unsigned long, unsigned long); extern void bad_page_fault(struct pt_regs *, unsigned long, int); extern void _exception(int, struct pt_regs *, int, unsigned long); +extern void _exception_pkey(int, struct pt_regs *, int, unsigned long, int); extern void die(const char *, struct pt_regs *, long); extern bool die_will_crash(void); diff --git a/arch/powerpc/include/asm/code-patching.h b/arch/powerpc/include/asm/code-patching.h index 2c895e8d07f7..812535f40124 100644 --- a/arch/powerpc/include/asm/code-patching.h +++ b/arch/powerpc/include/asm/code-patching.h @@ -31,6 +31,7 @@ unsigned int create_cond_branch(const unsigned int *addr, unsigned long target, int flags); int patch_branch(unsigned int *addr, unsigned long target, int flags); int patch_instruction(unsigned int *addr, unsigned int instr); +int raw_patch_instruction(unsigned int *addr, unsigned int instr); int instr_is_relative_branch(unsigned int instr); int instr_is_relative_link_branch(unsigned int instr); diff --git a/arch/powerpc/include/asm/cputable.h b/arch/powerpc/include/asm/cputable.h index 78ca2a721d04..a2c5c95882cf 100644 --- a/arch/powerpc/include/asm/cputable.h +++ b/arch/powerpc/include/asm/cputable.h @@ -195,7 +195,7 @@ static inline void cpu_feature_keys_init(void) { } #define CPU_FTR_STCX_CHECKS_ADDRESS LONG_ASM_CONST(0x0004000000000000) #define CPU_FTR_POPCNTB LONG_ASM_CONST(0x0008000000000000) #define CPU_FTR_POPCNTD LONG_ASM_CONST(0x0010000000000000) -/* Free LONG_ASM_CONST(0x0020000000000000) */ +#define CPU_FTR_PKEY LONG_ASM_CONST(0x0020000000000000) #define CPU_FTR_VMX_COPY LONG_ASM_CONST(0x0040000000000000) #define CPU_FTR_TM LONG_ASM_CONST(0x0080000000000000) #define CPU_FTR_CFAR LONG_ASM_CONST(0x0100000000000000) @@ -442,7 +442,7 @@ static inline void cpu_feature_keys_init(void) { } CPU_FTR_DSCR | CPU_FTR_SAO | CPU_FTR_ASYM_SMT | \ CPU_FTR_STCX_CHECKS_ADDRESS | CPU_FTR_POPCNTB | CPU_FTR_POPCNTD | \ CPU_FTR_CFAR | CPU_FTR_HVMODE | \ - CPU_FTR_VMX_COPY | CPU_FTR_HAS_PPR | CPU_FTR_DABRX) + CPU_FTR_VMX_COPY | CPU_FTR_HAS_PPR | CPU_FTR_DABRX | CPU_FTR_PKEY) #define CPU_FTRS_POWER8 (CPU_FTR_USE_TB | CPU_FTR_LWSYNC | \ CPU_FTR_PPCAS_ARCH_V2 | CPU_FTR_CTRL | CPU_FTR_ARCH_206 |\ CPU_FTR_MMCRA | CPU_FTR_SMT | \ @@ -452,7 +452,7 @@ static inline void cpu_feature_keys_init(void) { } CPU_FTR_STCX_CHECKS_ADDRESS | CPU_FTR_POPCNTB | CPU_FTR_POPCNTD | \ CPU_FTR_CFAR | CPU_FTR_HVMODE | CPU_FTR_VMX_COPY | \ CPU_FTR_DBELL | CPU_FTR_HAS_PPR | CPU_FTR_DAWR | \ - CPU_FTR_ARCH_207S | CPU_FTR_TM_COMP) + CPU_FTR_ARCH_207S | CPU_FTR_TM_COMP | CPU_FTR_PKEY) #define CPU_FTRS_POWER8E (CPU_FTRS_POWER8 | CPU_FTR_PMAO_BUG) #define CPU_FTRS_POWER8_DD1 (CPU_FTRS_POWER8 & ~CPU_FTR_DBELL) #define CPU_FTRS_POWER9 (CPU_FTR_USE_TB | CPU_FTR_LWSYNC | \ @@ -464,7 +464,8 @@ static inline void cpu_feature_keys_init(void) { } CPU_FTR_STCX_CHECKS_ADDRESS | CPU_FTR_POPCNTB | CPU_FTR_POPCNTD | \ CPU_FTR_CFAR | CPU_FTR_HVMODE | CPU_FTR_VMX_COPY | \ CPU_FTR_DBELL | CPU_FTR_HAS_PPR | CPU_FTR_DAWR | \ - CPU_FTR_ARCH_207S | CPU_FTR_TM_COMP | CPU_FTR_ARCH_300) + CPU_FTR_ARCH_207S | CPU_FTR_TM_COMP | CPU_FTR_ARCH_300 | \ + CPU_FTR_PKEY) #define CPU_FTRS_POWER9_DD1 ((CPU_FTRS_POWER9 | CPU_FTR_POWER9_DD1) & \ (~CPU_FTR_SAO)) #define CPU_FTRS_POWER9_DD2_0 CPU_FTRS_POWER9 diff --git a/arch/powerpc/include/asm/exception-64e.h b/arch/powerpc/include/asm/exception-64e.h index a703452d67b6..555e22d5e07f 100644 --- a/arch/powerpc/include/asm/exception-64e.h +++ b/arch/powerpc/include/asm/exception-64e.h @@ -209,5 +209,11 @@ exc_##label##_book3e: ori r3,r3,vector_offset@l; \ mtspr SPRN_IVOR##vector_number,r3; +#define RFI_TO_KERNEL \ + rfi + +#define RFI_TO_USER \ + rfi + #endif /* _ASM_POWERPC_EXCEPTION_64E_H */ diff --git a/arch/powerpc/include/asm/exception-64s.h b/arch/powerpc/include/asm/exception-64s.h index b27205297e1d..176dfb73d42c 100644 --- a/arch/powerpc/include/asm/exception-64s.h +++ b/arch/powerpc/include/asm/exception-64s.h @@ -74,6 +74,59 @@ */ #define EX_R3 EX_DAR +/* + * Macros for annotating the expected destination of (h)rfid + * + * The nop instructions allow us to insert one or more instructions to flush the + * L1-D cache when returning to userspace or a guest. + */ +#define RFI_FLUSH_SLOT \ + RFI_FLUSH_FIXUP_SECTION; \ + nop; \ + nop; \ + nop + +#define RFI_TO_KERNEL \ + rfid + +#define RFI_TO_USER \ + RFI_FLUSH_SLOT; \ + rfid; \ + b rfi_flush_fallback + +#define RFI_TO_USER_OR_KERNEL \ + RFI_FLUSH_SLOT; \ + rfid; \ + b rfi_flush_fallback + +#define RFI_TO_GUEST \ + RFI_FLUSH_SLOT; \ + rfid; \ + b rfi_flush_fallback + +#define HRFI_TO_KERNEL \ + hrfid + +#define HRFI_TO_USER \ + RFI_FLUSH_SLOT; \ + hrfid; \ + b hrfi_flush_fallback + +#define HRFI_TO_USER_OR_KERNEL \ + RFI_FLUSH_SLOT; \ + hrfid; \ + b hrfi_flush_fallback + +#define HRFI_TO_GUEST \ + RFI_FLUSH_SLOT; \ + hrfid; \ + b hrfi_flush_fallback + +#define HRFI_TO_UNKNOWN \ + RFI_FLUSH_SLOT; \ + hrfid; \ + b hrfi_flush_fallback + #ifdef CONFIG_RELOCATABLE #define __EXCEPTION_RELON_PROLOG_PSERIES_1(label, h) \ mfspr r11,SPRN_##h##SRR0; /* save SRR0 */ \ @@ -198,18 +251,40 @@ END_FTR_SECTION_NESTED(ftr,ftr,943) std r10,area+EX_R10(r13); /* save r10 - r12 */ \ OPT_GET_SPR(r10, SPRN_CFAR, CPU_FTR_CFAR) -#define __EXCEPTION_PROLOG_1(area, extra, vec) \ +#define __EXCEPTION_PROLOG_1_PRE(area) \ OPT_SAVE_REG_TO_PACA(area+EX_PPR, r9, CPU_FTR_HAS_PPR); \ OPT_SAVE_REG_TO_PACA(area+EX_CFAR, r10, CPU_FTR_CFAR); \ SAVE_CTR(r10, area); \ - mfcr r9; \ - extra(vec); \ + mfcr r9; + +#define __EXCEPTION_PROLOG_1_POST(area) \ std r11,area+EX_R11(r13); \ std r12,area+EX_R12(r13); \ GET_SCRATCH0(r10); \ std r10,area+EX_R13(r13) + +/* + * This version of the EXCEPTION_PROLOG_1 will carry + * addition parameter called "bitmask" to support + * checking of the interrupt maskable level in the SOFTEN_TEST. + * Intended to be used in MASKABLE_EXCPETION_* macros. + */ +#define MASKABLE_EXCEPTION_PROLOG_1(area, extra, vec, bitmask) \ + __EXCEPTION_PROLOG_1_PRE(area); \ + extra(vec, bitmask); \ + __EXCEPTION_PROLOG_1_POST(area); + +/* + * This version of the EXCEPTION_PROLOG_1 is intended + * to be used in STD_EXCEPTION* macros + */ +#define _EXCEPTION_PROLOG_1(area, extra, vec) \ + __EXCEPTION_PROLOG_1_PRE(area); \ + extra(vec); \ + __EXCEPTION_PROLOG_1_POST(area); + #define EXCEPTION_PROLOG_1(area, extra, vec) \ - __EXCEPTION_PROLOG_1(area, extra, vec) + _EXCEPTION_PROLOG_1(area, extra, vec) #define __EXCEPTION_PROLOG_PSERIES_1(label, h) \ ld r10,PACAKMSR(r13); /* get MSR value for kernel */ \ @@ -218,7 +293,7 @@ END_FTR_SECTION_NESTED(ftr,ftr,943) mtspr SPRN_##h##SRR0,r12; \ mfspr r12,SPRN_##h##SRR1; /* and SRR1 */ \ mtspr SPRN_##h##SRR1,r10; \ - h##rfid; \ + h##RFI_TO_KERNEL; \ b . /* prevent speculative execution */ #define EXCEPTION_PROLOG_PSERIES_1(label, h) \ __EXCEPTION_PROLOG_PSERIES_1(label, h) @@ -232,7 +307,7 @@ END_FTR_SECTION_NESTED(ftr,ftr,943) mtspr SPRN_##h##SRR0,r12; \ mfspr r12,SPRN_##h##SRR1; /* and SRR1 */ \ mtspr SPRN_##h##SRR1,r10; \ - h##rfid; \ + h##RFI_TO_KERNEL; \ b . /* prevent speculative execution */ #define EXCEPTION_PROLOG_PSERIES_1_NORI(label, h) \ @@ -432,7 +507,7 @@ END_FTR_SECTION_NESTED(ftr,ftr,943) mflr r9; /* Get LR, later save to stack */ \ ld r2,PACATOC(r13); /* get kernel TOC into r2 */ \ std r9,_LINK(r1); \ - lbz r10,PACASOFTIRQEN(r13); \ + lbz r10,PACAIRQSOFTMASK(r13); \ mfspr r11,SPRN_XER; /* save XER in stackframe */ \ std r10,SOFTE(r1); \ std r11,_XER(r1); \ @@ -496,22 +571,23 @@ END_FTR_SECTION_NESTED(ftr,ftr,943) #define SOFTEN_VALUE_0xe80 PACA_IRQ_DBELL #define SOFTEN_VALUE_0xe60 PACA_IRQ_HMI #define SOFTEN_VALUE_0xea0 PACA_IRQ_EE +#define SOFTEN_VALUE_0xf00 PACA_IRQ_PMI -#define __SOFTEN_TEST(h, vec) \ - lbz r10,PACASOFTIRQEN(r13); \ - cmpwi r10,0; \ +#define __SOFTEN_TEST(h, vec, bitmask) \ + lbz r10,PACAIRQSOFTMASK(r13); \ + andi. r10,r10,bitmask; \ li r10,SOFTEN_VALUE_##vec; \ - beq masked_##h##interrupt + bne masked_##h##interrupt -#define _SOFTEN_TEST(h, vec) __SOFTEN_TEST(h, vec) +#define _SOFTEN_TEST(h, vec, bitmask) __SOFTEN_TEST(h, vec, bitmask) -#define SOFTEN_TEST_PR(vec) \ +#define SOFTEN_TEST_PR(vec, bitmask) \ KVMTEST(EXC_STD, vec); \ - _SOFTEN_TEST(EXC_STD, vec) + _SOFTEN_TEST(EXC_STD, vec, bitmask) -#define SOFTEN_TEST_HV(vec) \ +#define SOFTEN_TEST_HV(vec, bitmask) \ KVMTEST(EXC_HV, vec); \ - _SOFTEN_TEST(EXC_HV, vec) + _SOFTEN_TEST(EXC_HV, vec, bitmask) #define KVMTEST_PR(vec) \ KVMTEST(EXC_STD, vec) @@ -519,53 +595,57 @@ END_FTR_SECTION_NESTED(ftr,ftr,943) #define KVMTEST_HV(vec) \ KVMTEST(EXC_HV, vec) -#define SOFTEN_NOTEST_PR(vec) _SOFTEN_TEST(EXC_STD, vec) -#define SOFTEN_NOTEST_HV(vec) _SOFTEN_TEST(EXC_HV, vec) +#define SOFTEN_NOTEST_PR(vec, bitmask) _SOFTEN_TEST(EXC_STD, vec, bitmask) +#define SOFTEN_NOTEST_HV(vec, bitmask) _SOFTEN_TEST(EXC_HV, vec, bitmask) -#define __MASKABLE_EXCEPTION_PSERIES(vec, label, h, extra) \ +#define __MASKABLE_EXCEPTION_PSERIES(vec, label, h, extra, bitmask) \ SET_SCRATCH0(r13); /* save r13 */ \ EXCEPTION_PROLOG_0(PACA_EXGEN); \ - __EXCEPTION_PROLOG_1(PACA_EXGEN, extra, vec); \ + MASKABLE_EXCEPTION_PROLOG_1(PACA_EXGEN, extra, vec, bitmask); \ EXCEPTION_PROLOG_PSERIES_1(label, h); -#define _MASKABLE_EXCEPTION_PSERIES(vec, label, h, extra) \ - __MASKABLE_EXCEPTION_PSERIES(vec, label, h, extra) +#define _MASKABLE_EXCEPTION_PSERIES(vec, label, h, extra, bitmask) \ + __MASKABLE_EXCEPTION_PSERIES(vec, label, h, extra, bitmask) -#define MASKABLE_EXCEPTION_PSERIES(loc, vec, label) \ +#define MASKABLE_EXCEPTION_PSERIES(loc, vec, label, bitmask) \ _MASKABLE_EXCEPTION_PSERIES(vec, label, \ - EXC_STD, SOFTEN_TEST_PR) + EXC_STD, SOFTEN_TEST_PR, bitmask) -#define MASKABLE_EXCEPTION_PSERIES_OOL(vec, label) \ - EXCEPTION_PROLOG_1(PACA_EXGEN, SOFTEN_TEST_PR, vec); \ +#define MASKABLE_EXCEPTION_PSERIES_OOL(vec, label, bitmask) \ + MASKABLE_EXCEPTION_PROLOG_1(PACA_EXGEN, SOFTEN_TEST_PR, vec, bitmask);\ EXCEPTION_PROLOG_PSERIES_1(label, EXC_STD) -#define MASKABLE_EXCEPTION_HV(loc, vec, label) \ +#define MASKABLE_EXCEPTION_HV(loc, vec, label, bitmask) \ _MASKABLE_EXCEPTION_PSERIES(vec, label, \ - EXC_HV, SOFTEN_TEST_HV) + EXC_HV, SOFTEN_TEST_HV, bitmask) -#define MASKABLE_EXCEPTION_HV_OOL(vec, label) \ - EXCEPTION_PROLOG_1(PACA_EXGEN, SOFTEN_TEST_HV, vec); \ +#define MASKABLE_EXCEPTION_HV_OOL(vec, label, bitmask) \ + MASKABLE_EXCEPTION_PROLOG_1(PACA_EXGEN, SOFTEN_TEST_HV, vec, bitmask);\ EXCEPTION_PROLOG_PSERIES_1(label, EXC_HV) -#define __MASKABLE_RELON_EXCEPTION_PSERIES(vec, label, h, extra) \ +#define __MASKABLE_RELON_EXCEPTION_PSERIES(vec, label, h, extra, bitmask) \ SET_SCRATCH0(r13); /* save r13 */ \ EXCEPTION_PROLOG_0(PACA_EXGEN); \ - __EXCEPTION_PROLOG_1(PACA_EXGEN, extra, vec); \ + MASKABLE_EXCEPTION_PROLOG_1(PACA_EXGEN, extra, vec, bitmask); \ EXCEPTION_RELON_PROLOG_PSERIES_1(label, h) -#define _MASKABLE_RELON_EXCEPTION_PSERIES(vec, label, h, extra) \ - __MASKABLE_RELON_EXCEPTION_PSERIES(vec, label, h, extra) +#define _MASKABLE_RELON_EXCEPTION_PSERIES(vec, label, h, extra, bitmask)\ + __MASKABLE_RELON_EXCEPTION_PSERIES(vec, label, h, extra, bitmask) -#define MASKABLE_RELON_EXCEPTION_PSERIES(loc, vec, label) \ +#define MASKABLE_RELON_EXCEPTION_PSERIES(loc, vec, label, bitmask) \ _MASKABLE_RELON_EXCEPTION_PSERIES(vec, label, \ - EXC_STD, SOFTEN_NOTEST_PR) + EXC_STD, SOFTEN_NOTEST_PR, bitmask) + +#define MASKABLE_RELON_EXCEPTION_PSERIES_OOL(vec, label, bitmask) \ + MASKABLE_EXCEPTION_PROLOG_1(PACA_EXGEN, SOFTEN_NOTEST_PR, vec, bitmask);\ + EXCEPTION_PROLOG_PSERIES_1(label, EXC_STD); -#define MASKABLE_RELON_EXCEPTION_HV(loc, vec, label) \ +#define MASKABLE_RELON_EXCEPTION_HV(loc, vec, label, bitmask) \ _MASKABLE_RELON_EXCEPTION_PSERIES(vec, label, \ - EXC_HV, SOFTEN_TEST_HV) + EXC_HV, SOFTEN_TEST_HV, bitmask) -#define MASKABLE_RELON_EXCEPTION_HV_OOL(vec, label) \ - EXCEPTION_PROLOG_1(PACA_EXGEN, SOFTEN_TEST_HV, vec); \ +#define MASKABLE_RELON_EXCEPTION_HV_OOL(vec, label, bitmask) \ + MASKABLE_EXCEPTION_PROLOG_1(PACA_EXGEN, SOFTEN_NOTEST_HV, vec, bitmask);\ EXCEPTION_RELON_PROLOG_PSERIES_1(label, EXC_HV) /* diff --git a/arch/powerpc/include/asm/feature-fixups.h b/arch/powerpc/include/asm/feature-fixups.h index 8f88f771cc55..1e82eb3caabd 100644 --- a/arch/powerpc/include/asm/feature-fixups.h +++ b/arch/powerpc/include/asm/feature-fixups.h @@ -187,7 +187,20 @@ label##3: \ FTR_ENTRY_OFFSET label##1b-label##3b; \ .popsection; +#define RFI_FLUSH_FIXUP_SECTION \ +951: \ + .pushsection __rfi_flush_fixup,"a"; \ + .align 2; \ +952: \ + FTR_ENTRY_OFFSET 951b-952b; \ + .popsection; + + #ifndef __ASSEMBLY__ +#include <linux/types.h> + +extern long __start___rfi_flush_fixup, __stop___rfi_flush_fixup; + void apply_feature_fixups(void); void setup_feature_keys(void); #endif diff --git a/arch/powerpc/include/asm/firmware.h b/arch/powerpc/include/asm/firmware.h index 832df61f30ef..511acfd7ab0d 100644 --- a/arch/powerpc/include/asm/firmware.h +++ b/arch/powerpc/include/asm/firmware.h @@ -52,6 +52,7 @@ #define FW_FEATURE_TYPE1_AFFINITY ASM_CONST(0x0000000100000000) #define FW_FEATURE_PRRN ASM_CONST(0x0000000200000000) #define FW_FEATURE_DRMEM_V2 ASM_CONST(0x0000000400000000) +#define FW_FEATURE_DRC_INFO ASM_CONST(0x0000000400000000) #ifndef __ASSEMBLY__ @@ -68,7 +69,8 @@ enum { FW_FEATURE_CMO | FW_FEATURE_VPHN | FW_FEATURE_XCMO | FW_FEATURE_SET_MODE | FW_FEATURE_BEST_ENERGY | FW_FEATURE_TYPE1_AFFINITY | FW_FEATURE_PRRN | - FW_FEATURE_HPT_RESIZE | FW_FEATURE_DRMEM_V2, + FW_FEATURE_HPT_RESIZE | FW_FEATURE_DRMEM_V2 | + FW_FEATURE_DRC_INFO, FW_FEATURE_PSERIES_ALWAYS = 0, FW_FEATURE_POWERNV_POSSIBLE = FW_FEATURE_OPAL, FW_FEATURE_POWERNV_ALWAYS = 0, diff --git a/arch/powerpc/include/asm/head-64.h b/arch/powerpc/include/asm/head-64.h index 0a663dfc28b5..7e0e93f24cb7 100644 --- a/arch/powerpc/include/asm/head-64.h +++ b/arch/powerpc/include/asm/head-64.h @@ -268,14 +268,14 @@ name: STD_RELON_EXCEPTION_PSERIES(start, realvec, name##_common); \ EXC_VIRT_END(name, start, size); -#define EXC_REAL_MASKABLE(name, start, size) \ +#define EXC_REAL_MASKABLE(name, start, size, bitmask) \ EXC_REAL_BEGIN(name, start, size); \ - MASKABLE_EXCEPTION_PSERIES(start, start, name##_common); \ + MASKABLE_EXCEPTION_PSERIES(start, start, name##_common, bitmask);\ EXC_REAL_END(name, start, size); -#define EXC_VIRT_MASKABLE(name, start, size, realvec) \ +#define EXC_VIRT_MASKABLE(name, start, size, realvec, bitmask) \ EXC_VIRT_BEGIN(name, start, size); \ - MASKABLE_RELON_EXCEPTION_PSERIES(start, realvec, name##_common); \ + MASKABLE_RELON_EXCEPTION_PSERIES(start, realvec, name##_common, bitmask);\ EXC_VIRT_END(name, start, size); #define EXC_REAL_HV(name, start, size) \ @@ -304,13 +304,13 @@ name: #define __EXC_REAL_OOL_MASKABLE(name, start, size) \ __EXC_REAL_OOL(name, start, size); -#define __TRAMP_REAL_OOL_MASKABLE(name, vec) \ +#define __TRAMP_REAL_OOL_MASKABLE(name, vec, bitmask) \ TRAMP_REAL_BEGIN(tramp_real_##name); \ - MASKABLE_EXCEPTION_PSERIES_OOL(vec, name##_common); \ + MASKABLE_EXCEPTION_PSERIES_OOL(vec, name##_common, bitmask); \ -#define EXC_REAL_OOL_MASKABLE(name, start, size) \ +#define EXC_REAL_OOL_MASKABLE(name, start, size, bitmask) \ __EXC_REAL_OOL_MASKABLE(name, start, size); \ - __TRAMP_REAL_OOL_MASKABLE(name, start); + __TRAMP_REAL_OOL_MASKABLE(name, start, bitmask); #define __EXC_REAL_OOL_HV_DIRECT(name, start, size, handler) \ EXC_REAL_BEGIN(name, start, size); \ @@ -331,13 +331,13 @@ name: #define __EXC_REAL_OOL_MASKABLE_HV(name, start, size) \ __EXC_REAL_OOL(name, start, size); -#define __TRAMP_REAL_OOL_MASKABLE_HV(name, vec) \ +#define __TRAMP_REAL_OOL_MASKABLE_HV(name, vec, bitmask) \ TRAMP_REAL_BEGIN(tramp_real_##name); \ - MASKABLE_EXCEPTION_HV_OOL(vec, name##_common); \ + MASKABLE_EXCEPTION_HV_OOL(vec, name##_common, bitmask); \ -#define EXC_REAL_OOL_MASKABLE_HV(name, start, size) \ +#define EXC_REAL_OOL_MASKABLE_HV(name, start, size, bitmask) \ __EXC_REAL_OOL_MASKABLE_HV(name, start, size); \ - __TRAMP_REAL_OOL_MASKABLE_HV(name, start); + __TRAMP_REAL_OOL_MASKABLE_HV(name, start, bitmask); #define __EXC_VIRT_OOL(name, start, size) \ EXC_VIRT_BEGIN(name, start, size); \ @@ -355,13 +355,13 @@ name: #define __EXC_VIRT_OOL_MASKABLE(name, start, size) \ __EXC_VIRT_OOL(name, start, size); -#define __TRAMP_VIRT_OOL_MASKABLE(name, realvec) \ +#define __TRAMP_VIRT_OOL_MASKABLE(name, realvec, bitmask) \ TRAMP_VIRT_BEGIN(tramp_virt_##name); \ - MASKABLE_RELON_EXCEPTION_PSERIES_OOL(realvec, name##_common); \ + MASKABLE_RELON_EXCEPTION_PSERIES_OOL(realvec, name##_common, bitmask);\ -#define EXC_VIRT_OOL_MASKABLE(name, start, size, realvec) \ +#define EXC_VIRT_OOL_MASKABLE(name, start, size, realvec, bitmask) \ __EXC_VIRT_OOL_MASKABLE(name, start, size); \ - __TRAMP_VIRT_OOL_MASKABLE(name, realvec); + __TRAMP_VIRT_OOL_MASKABLE(name, realvec, bitmask); #define __EXC_VIRT_OOL_HV(name, start, size) \ __EXC_VIRT_OOL(name, start, size); @@ -377,13 +377,13 @@ name: #define __EXC_VIRT_OOL_MASKABLE_HV(name, start, size) \ __EXC_VIRT_OOL(name, start, size); -#define __TRAMP_VIRT_OOL_MASKABLE_HV(name, realvec) \ +#define __TRAMP_VIRT_OOL_MASKABLE_HV(name, realvec, bitmask) \ TRAMP_VIRT_BEGIN(tramp_virt_##name); \ - MASKABLE_RELON_EXCEPTION_HV_OOL(realvec, name##_common); \ + MASKABLE_RELON_EXCEPTION_HV_OOL(realvec, name##_common, bitmask);\ -#define EXC_VIRT_OOL_MASKABLE_HV(name, start, size, realvec) \ +#define EXC_VIRT_OOL_MASKABLE_HV(name, start, size, realvec, bitmask) \ __EXC_VIRT_OOL_MASKABLE_HV(name, start, size); \ - __TRAMP_VIRT_OOL_MASKABLE_HV(name, realvec); + __TRAMP_VIRT_OOL_MASKABLE_HV(name, realvec, bitmask); #define TRAMP_KVM(area, n) \ TRAMP_KVM_BEGIN(do_kvm_##n); \ diff --git a/arch/powerpc/include/asm/hmi.h b/arch/powerpc/include/asm/hmi.h index 85b7a1a21e22..9c14f7b5c46c 100644 --- a/arch/powerpc/include/asm/hmi.h +++ b/arch/powerpc/include/asm/hmi.h @@ -42,4 +42,8 @@ extern void wait_for_tb_resync(void); static inline void wait_for_subcore_guest_exit(void) { } static inline void wait_for_tb_resync(void) { } #endif + +struct pt_regs; +extern long hmi_handle_debugtrig(struct pt_regs *regs); + #endif /* __ASM_PPC64_HMI_H__ */ diff --git a/arch/powerpc/include/asm/hvcall.h b/arch/powerpc/include/asm/hvcall.h index a409177be8bd..eca3f9c68907 100644 --- a/arch/powerpc/include/asm/hvcall.h +++ b/arch/powerpc/include/asm/hvcall.h @@ -241,6 +241,7 @@ #define H_GET_HCA_INFO 0x1B8 #define H_GET_PERF_COUNT 0x1BC #define H_MANAGE_TRACE 0x1C0 +#define H_GET_CPU_CHARACTERISTICS 0x1C8 #define H_FREE_LOGICAL_LAN_BUFFER 0x1D4 #define H_QUERY_INT_STATE 0x1E4 #define H_POLL_PENDING 0x1D8 @@ -330,6 +331,17 @@ #define H_SIGNAL_SYS_RESET_ALL_OTHERS -2 /* >= 0 values are CPU number */ +/* H_GET_CPU_CHARACTERISTICS return values */ +#define H_CPU_CHAR_SPEC_BAR_ORI31 (1ull << 63) // IBM bit 0 +#define H_CPU_CHAR_BCCTRL_SERIALISED (1ull << 62) // IBM bit 1 +#define H_CPU_CHAR_L1D_FLUSH_ORI30 (1ull << 61) // IBM bit 2 +#define H_CPU_CHAR_L1D_FLUSH_TRIG2 (1ull << 60) // IBM bit 3 +#define H_CPU_CHAR_L1D_THREAD_PRIV (1ull << 59) // IBM bit 4 + +#define H_CPU_BEHAV_FAVOUR_SECURITY (1ull << 63) // IBM bit 0 +#define H_CPU_BEHAV_L1D_FLUSH_PR (1ull << 62) // IBM bit 1 +#define H_CPU_BEHAV_BNDS_CHK_SPEC_BAR (1ull << 61) // IBM bit 2 + /* Flag values used in H_REGISTER_PROC_TBL hcall */ #define PROC_TABLE_OP_MASK 0x18 #define PROC_TABLE_DEREG 0x10 @@ -341,6 +353,7 @@ #define PROC_TABLE_GTSE 0x01 #ifndef __ASSEMBLY__ +#include <linux/types.h> /** * plpar_hcall_norets: - Make a pseries hypervisor call with no return arguments @@ -436,6 +449,11 @@ static inline unsigned int get_longbusy_msecs(int longbusy_rc) } } +struct h_cpu_char_result { + u64 character; + u64 behaviour; +}; + #endif /* __ASSEMBLY__ */ #endif /* __KERNEL__ */ #endif /* _ASM_POWERPC_HVCALL_H */ diff --git a/arch/powerpc/include/asm/hw_irq.h b/arch/powerpc/include/asm/hw_irq.h index 3818fa0164f0..88e5e8f17e98 100644 --- a/arch/powerpc/include/asm/hw_irq.h +++ b/arch/powerpc/include/asm/hw_irq.h @@ -27,6 +27,15 @@ #define PACA_IRQ_DEC 0x08 /* Or FIT */ #define PACA_IRQ_EE_EDGE 0x10 /* BookE only */ #define PACA_IRQ_HMI 0x20 +#define PACA_IRQ_PMI 0x40 + +/* + * flags for paca->irq_soft_mask + */ +#define IRQS_ENABLED 0 +#define IRQS_DISABLED 1 /* local_irq_disable() interrupts */ +#define IRQS_PMI_DISABLED 2 +#define IRQS_ALL_DISABLED (IRQS_DISABLED | IRQS_PMI_DISABLED) #endif /* CONFIG_PPC64 */ @@ -43,46 +52,112 @@ extern void unknown_exception(struct pt_regs *regs); #ifdef CONFIG_PPC64 #include <asm/paca.h> -static inline unsigned long arch_local_save_flags(void) +static inline notrace unsigned long irq_soft_mask_return(void) { unsigned long flags; asm volatile( "lbz %0,%1(13)" : "=r" (flags) - : "i" (offsetof(struct paca_struct, soft_enabled))); + : "i" (offsetof(struct paca_struct, irq_soft_mask))); + + return flags; +} + +/* + * The "memory" clobber acts as both a compiler barrier + * for the critical section and as a clobber because + * we changed paca->irq_soft_mask + */ +static inline notrace void irq_soft_mask_set(unsigned long mask) +{ +#ifdef CONFIG_PPC_IRQ_SOFT_MASK_DEBUG + /* + * The irq mask must always include the STD bit if any are set. + * + * and interrupts don't get replayed until the standard + * interrupt (local_irq_disable()) is unmasked. + * + * Other masks must only provide additional masking beyond + * the standard, and they are also not replayed until the + * standard interrupt becomes unmasked. + * + * This could be changed, but it will require partial + * unmasks to be replayed, among other things. For now, take + * the simple approach. + */ + WARN_ON(mask && !(mask & IRQS_DISABLED)); +#endif + + asm volatile( + "stb %0,%1(13)" + : + : "r" (mask), + "i" (offsetof(struct paca_struct, irq_soft_mask)) + : "memory"); +} + +static inline notrace unsigned long irq_soft_mask_set_return(unsigned long mask) +{ + unsigned long flags; + +#ifdef CONFIG_PPC_IRQ_SOFT_MASK_DEBUG + WARN_ON(mask && !(mask & IRQS_DISABLED)); +#endif + + asm volatile( + "lbz %0,%1(13); stb %2,%1(13)" + : "=&r" (flags) + : "i" (offsetof(struct paca_struct, irq_soft_mask)), + "r" (mask) + : "memory"); return flags; } -static inline unsigned long arch_local_irq_disable(void) +static inline notrace unsigned long irq_soft_mask_or_return(unsigned long mask) { - unsigned long flags, zero; + unsigned long flags, tmp; asm volatile( - "li %1,0; lbz %0,%2(13); stb %1,%2(13)" - : "=r" (flags), "=&r" (zero) - : "i" (offsetof(struct paca_struct, soft_enabled)) + "lbz %0,%2(13); or %1,%0,%3; stb %1,%2(13)" + : "=&r" (flags), "=r" (tmp) + : "i" (offsetof(struct paca_struct, irq_soft_mask)), + "r" (mask) : "memory"); +#ifdef CONFIG_PPC_IRQ_SOFT_MASK_DEBUG + WARN_ON((mask | flags) && !((mask | flags) & IRQS_DISABLED)); +#endif + return flags; } +static inline unsigned long arch_local_save_flags(void) +{ + return irq_soft_mask_return(); +} + +static inline void arch_local_irq_disable(void) +{ + irq_soft_mask_set(IRQS_DISABLED); +} + extern void arch_local_irq_restore(unsigned long); static inline void arch_local_irq_enable(void) { - arch_local_irq_restore(1); + arch_local_irq_restore(IRQS_ENABLED); } static inline unsigned long arch_local_irq_save(void) { - return arch_local_irq_disable(); + return irq_soft_mask_set_return(IRQS_DISABLED); } static inline bool arch_irqs_disabled_flags(unsigned long flags) { - return flags == 0; + return flags & IRQS_DISABLED; } static inline bool arch_irqs_disabled(void) @@ -90,6 +165,55 @@ static inline bool arch_irqs_disabled(void) return arch_irqs_disabled_flags(arch_local_save_flags()); } +#ifdef CONFIG_PPC_BOOK3S +/* + * To support disabling and enabling of irq with PMI, set of + * new powerpc_local_irq_pmu_save() and powerpc_local_irq_restore() + * functions are added. These macros are implemented using generic + * linux local_irq_* code from include/linux/irqflags.h. + */ +#define raw_local_irq_pmu_save(flags) \ + do { \ + typecheck(unsigned long, flags); \ + flags = irq_soft_mask_or_return(IRQS_DISABLED | \ + IRQS_PMI_DISABLED); \ + } while(0) + +#define raw_local_irq_pmu_restore(flags) \ + do { \ + typecheck(unsigned long, flags); \ + arch_local_irq_restore(flags); \ + } while(0) + +#ifdef CONFIG_TRACE_IRQFLAGS +#define powerpc_local_irq_pmu_save(flags) \ + do { \ + raw_local_irq_pmu_save(flags); \ + trace_hardirqs_off(); \ + } while(0) +#define powerpc_local_irq_pmu_restore(flags) \ + do { \ + if (raw_irqs_disabled_flags(flags)) { \ + raw_local_irq_pmu_restore(flags); \ + trace_hardirqs_off(); \ + } else { \ + trace_hardirqs_on(); \ + raw_local_irq_pmu_restore(flags); \ + } \ + } while(0) +#else +#define powerpc_local_irq_pmu_save(flags) \ + do { \ + raw_local_irq_pmu_save(flags); \ + } while(0) +#define powerpc_local_irq_pmu_restore(flags) \ + do { \ + raw_local_irq_pmu_restore(flags); \ + } while (0) +#endif /* CONFIG_TRACE_IRQFLAGS */ + +#endif /* CONFIG_PPC_BOOK3S */ + #ifdef CONFIG_PPC_BOOK3E #define __hard_irq_enable() asm volatile("wrteei 1" : : : "memory") #define __hard_irq_disable() asm volatile("wrteei 0" : : : "memory") @@ -98,14 +222,13 @@ static inline bool arch_irqs_disabled(void) #define __hard_irq_disable() __mtmsrd(local_paca->kernel_msr, 1) #endif -#define hard_irq_disable() do { \ - u8 _was_enabled; \ - __hard_irq_disable(); \ - _was_enabled = local_paca->soft_enabled; \ - local_paca->soft_enabled = 0; \ - local_paca->irq_happened |= PACA_IRQ_HARD_DIS; \ - if (_was_enabled) \ - trace_hardirqs_off(); \ +#define hard_irq_disable() do { \ + unsigned long flags; \ + __hard_irq_disable(); \ + flags = irq_soft_mask_set_return(IRQS_ALL_DISABLED); \ + local_paca->irq_happened |= PACA_IRQ_HARD_DIS; \ + if (!arch_irqs_disabled_flags(flags)) \ + trace_hardirqs_off(); \ } while(0) static inline bool lazy_irq_pending(void) @@ -127,7 +250,7 @@ static inline void may_hard_irq_enable(void) static inline bool arch_irq_disabled_regs(struct pt_regs *regs) { - return !regs->softe; + return (regs->softe & IRQS_DISABLED); } extern bool prep_irq_for_idle(void); diff --git a/arch/powerpc/include/asm/imc-pmu.h b/arch/powerpc/include/asm/imc-pmu.h index fad0e6ff460f..d76cb11be3e3 100644 --- a/arch/powerpc/include/asm/imc-pmu.h +++ b/arch/powerpc/include/asm/imc-pmu.h @@ -35,6 +35,13 @@ #define THREAD_IMC_ENABLE 0x8000000000000000ULL /* + * For debugfs interface for imc-mode and imc-command + */ +#define IMC_CNTL_BLK_OFFSET 0x3FC00 +#define IMC_CNTL_BLK_CMD_OFFSET 8 +#define IMC_CNTL_BLK_MODE_OFFSET 32 + +/* * Structure to hold memory address information for imc units. */ struct imc_mem_info { @@ -71,7 +78,7 @@ struct imc_events { struct imc_pmu { struct pmu pmu; struct imc_mem_info *mem_info; - struct imc_events **events; + struct imc_events *events; /* * Attribute groups for the PMU. Slot 0 used for * format attribute, slot 1 used for cpusmask attribute, diff --git a/arch/powerpc/include/asm/irqflags.h b/arch/powerpc/include/asm/irqflags.h index 1aeb5f13b8c4..1a6c1ce17735 100644 --- a/arch/powerpc/include/asm/irqflags.h +++ b/arch/powerpc/include/asm/irqflags.h @@ -47,14 +47,14 @@ * be clobbered. */ #define RECONCILE_IRQ_STATE(__rA, __rB) \ - lbz __rA,PACASOFTIRQEN(r13); \ + lbz __rA,PACAIRQSOFTMASK(r13); \ lbz __rB,PACAIRQHAPPENED(r13); \ - cmpwi cr0,__rA,0; \ - li __rA,0; \ + andi. __rA,__rA,IRQS_DISABLED; \ + li __rA,IRQS_DISABLED; \ ori __rB,__rB,PACA_IRQ_HARD_DIS; \ stb __rB,PACAIRQHAPPENED(r13); \ - beq 44f; \ - stb __rA,PACASOFTIRQEN(r13); \ + bne 44f; \ + stb __rA,PACAIRQSOFTMASK(r13); \ TRACE_DISABLE_INTS; \ 44: @@ -64,9 +64,9 @@ #define RECONCILE_IRQ_STATE(__rA, __rB) \ lbz __rA,PACAIRQHAPPENED(r13); \ - li __rB,0; \ + li __rB,IRQS_DISABLED; \ ori __rA,__rA,PACA_IRQ_HARD_DIS; \ - stb __rB,PACASOFTIRQEN(r13); \ + stb __rB,PACAIRQSOFTMASK(r13); \ stb __rA,PACAIRQHAPPENED(r13) #endif #endif diff --git a/arch/powerpc/include/asm/kvm_ppc.h b/arch/powerpc/include/asm/kvm_ppc.h index 941c2a3f231b..9db18287b5f4 100644 --- a/arch/powerpc/include/asm/kvm_ppc.h +++ b/arch/powerpc/include/asm/kvm_ppc.h @@ -873,7 +873,7 @@ static inline void kvmppc_fix_ee_before_entry(void) /* Only need to enable IRQs by hard enabling them after this */ local_paca->irq_happened = 0; - local_paca->soft_enabled = 1; + irq_soft_mask_set(IRQS_ENABLED); #endif } diff --git a/arch/powerpc/include/asm/local.h b/arch/powerpc/include/asm/local.h index 600a68bd77f5..fdd00939270b 100644 --- a/arch/powerpc/include/asm/local.h +++ b/arch/powerpc/include/asm/local.h @@ -2,76 +2,64 @@ #ifndef _ARCH_POWERPC_LOCAL_H #define _ARCH_POWERPC_LOCAL_H +#ifdef CONFIG_PPC_BOOK3S_64 + #include <linux/percpu.h> #include <linux/atomic.h> +#include <linux/irqflags.h> + +#include <asm/hw_irq.h> typedef struct { - atomic_long_t a; + long v; } local_t; -#define LOCAL_INIT(i) { ATOMIC_LONG_INIT(i) } - -#define local_read(l) atomic_long_read(&(l)->a) -#define local_set(l,i) atomic_long_set(&(l)->a, (i)) +#define LOCAL_INIT(i) { (i) } -#define local_add(i,l) atomic_long_add((i),(&(l)->a)) -#define local_sub(i,l) atomic_long_sub((i),(&(l)->a)) -#define local_inc(l) atomic_long_inc(&(l)->a) -#define local_dec(l) atomic_long_dec(&(l)->a) - -static __inline__ long local_add_return(long a, local_t *l) +static __inline__ long local_read(local_t *l) { - long t; - - __asm__ __volatile__( -"1:" PPC_LLARX(%0,0,%2,0) " # local_add_return\n\ - add %0,%1,%0\n" - PPC405_ERR77(0,%2) - PPC_STLCX "%0,0,%2 \n\ - bne- 1b" - : "=&r" (t) - : "r" (a), "r" (&(l->a.counter)) - : "cc", "memory"); - - return t; + return READ_ONCE(l->v); } -#define local_add_negative(a, l) (local_add_return((a), (l)) < 0) - -static __inline__ long local_sub_return(long a, local_t *l) +static __inline__ void local_set(local_t *l, long i) { - long t; + WRITE_ONCE(l->v, i); +} - __asm__ __volatile__( -"1:" PPC_LLARX(%0,0,%2,0) " # local_sub_return\n\ - subf %0,%1,%0\n" - PPC405_ERR77(0,%2) - PPC_STLCX "%0,0,%2 \n\ - bne- 1b" - : "=&r" (t) - : "r" (a), "r" (&(l->a.counter)) - : "cc", "memory"); +#define LOCAL_OP(op, c_op) \ +static __inline__ void local_##op(long i, local_t *l) \ +{ \ + unsigned long flags; \ + \ + powerpc_local_irq_pmu_save(flags); \ + l->v c_op i; \ + powerpc_local_irq_pmu_restore(flags); \ +} - return t; +#define LOCAL_OP_RETURN(op, c_op) \ +static __inline__ long local_##op##_return(long a, local_t *l) \ +{ \ + long t; \ + unsigned long flags; \ + \ + powerpc_local_irq_pmu_save(flags); \ + t = (l->v c_op a); \ + powerpc_local_irq_pmu_restore(flags); \ + \ + return t; \ } -static __inline__ long local_inc_return(local_t *l) -{ - long t; +#define LOCAL_OPS(op, c_op) \ + LOCAL_OP(op, c_op) \ + LOCAL_OP_RETURN(op, c_op) - __asm__ __volatile__( -"1:" PPC_LLARX(%0,0,%1,0) " # local_inc_return\n\ - addic %0,%0,1\n" - PPC405_ERR77(0,%1) - PPC_STLCX "%0,0,%1 \n\ - bne- 1b" - : "=&r" (t) - : "r" (&(l->a.counter)) - : "cc", "xer", "memory"); +LOCAL_OPS(add, +=) +LOCAL_OPS(sub, -=) - return t; -} +#define local_add_negative(a, l) (local_add_return((a), (l)) < 0) +#define local_inc_return(l) local_add_return(1LL, l) +#define local_inc(l) local_inc_return(l) /* * local_inc_and_test - increment and test @@ -81,28 +69,39 @@ static __inline__ long local_inc_return(local_t *l) * and returns true if the result is zero, or false for all * other cases. */ -#define local_inc_and_test(l) (local_inc_return(l) == 0) +#define local_inc_and_test(l) (local_inc_return(l) == 0) -static __inline__ long local_dec_return(local_t *l) +#define local_dec_return(l) local_sub_return(1LL, l) +#define local_dec(l) local_dec_return(l) +#define local_sub_and_test(a, l) (local_sub_return((a), (l)) == 0) +#define local_dec_and_test(l) (local_dec_return((l)) == 0) + +static __inline__ long local_cmpxchg(local_t *l, long o, long n) { long t; + unsigned long flags; - __asm__ __volatile__( -"1:" PPC_LLARX(%0,0,%1,0) " # local_dec_return\n\ - addic %0,%0,-1\n" - PPC405_ERR77(0,%1) - PPC_STLCX "%0,0,%1\n\ - bne- 1b" - : "=&r" (t) - : "r" (&(l->a.counter)) - : "cc", "xer", "memory"); + powerpc_local_irq_pmu_save(flags); + t = l->v; + if (t == o) + l->v = n; + powerpc_local_irq_pmu_restore(flags); return t; } -#define local_cmpxchg(l, o, n) \ - (cmpxchg_local(&((l)->a.counter), (o), (n))) -#define local_xchg(l, n) (xchg_local(&((l)->a.counter), (n))) +static __inline__ long local_xchg(local_t *l, long n) +{ + long t; + unsigned long flags; + + powerpc_local_irq_pmu_save(flags); + t = l->v; + l->v = n; + powerpc_local_irq_pmu_restore(flags); + + return t; +} /** * local_add_unless - add unless the number is a given value @@ -115,62 +114,35 @@ static __inline__ long local_dec_return(local_t *l) */ static __inline__ int local_add_unless(local_t *l, long a, long u) { - long t; - - __asm__ __volatile__ ( -"1:" PPC_LLARX(%0,0,%1,0) " # local_add_unless\n\ - cmpw 0,%0,%3 \n\ - beq- 2f \n\ - add %0,%2,%0 \n" - PPC405_ERR77(0,%2) - PPC_STLCX "%0,0,%1 \n\ - bne- 1b \n" -" subf %0,%2,%0 \n\ -2:" - : "=&r" (t) - : "r" (&(l->a.counter)), "r" (a), "r" (u) - : "cc", "memory"); - - return t != u; -} - -#define local_inc_not_zero(l) local_add_unless((l), 1, 0) - -#define local_sub_and_test(a, l) (local_sub_return((a), (l)) == 0) -#define local_dec_and_test(l) (local_dec_return((l)) == 0) - -/* - * Atomically test *l and decrement if it is greater than 0. - * The function returns the old value of *l minus 1. - */ -static __inline__ long local_dec_if_positive(local_t *l) -{ - long t; + unsigned long flags; + int ret = 0; - __asm__ __volatile__( -"1:" PPC_LLARX(%0,0,%1,0) " # local_dec_if_positive\n\ - cmpwi %0,1\n\ - addi %0,%0,-1\n\ - blt- 2f\n" - PPC405_ERR77(0,%1) - PPC_STLCX "%0,0,%1\n\ - bne- 1b" - "\n\ -2:" : "=&b" (t) - : "r" (&(l->a.counter)) - : "cc", "memory"); + powerpc_local_irq_pmu_save(flags); + if (l->v != u) { + l->v += a; + ret = 1; + } + powerpc_local_irq_pmu_restore(flags); - return t; + return ret; } +#define local_inc_not_zero(l) local_add_unless((l), 1, 0) + /* Use these for per-cpu local_t variables: on some archs they are * much more efficient than these naive implementations. Note they take * a variable, not an address. */ -#define __local_inc(l) ((l)->a.counter++) -#define __local_dec(l) ((l)->a.counter++) -#define __local_add(i,l) ((l)->a.counter+=(i)) -#define __local_sub(i,l) ((l)->a.counter-=(i)) +#define __local_inc(l) ((l)->v++) +#define __local_dec(l) ((l)->v++) +#define __local_add(i,l) ((l)->v+=(i)) +#define __local_sub(i,l) ((l)->v-=(i)) + +#else /* CONFIG_PPC64 */ + +#include <asm-generic/local.h> + +#endif /* CONFIG_PPC64 */ #endif /* _ARCH_POWERPC_LOCAL_H */ diff --git a/arch/powerpc/include/asm/machdep.h b/arch/powerpc/include/asm/machdep.h index 4298ceae6db5..ffe7c71e1132 100644 --- a/arch/powerpc/include/asm/machdep.h +++ b/arch/powerpc/include/asm/machdep.h @@ -76,6 +76,7 @@ struct machdep_calls { void __noreturn (*restart)(char *cmd); void __noreturn (*halt)(void); + void (*panic)(char *str); void (*cpu_die)(void); long (*time_init)(void); /* Optional, may be NULL */ diff --git a/arch/powerpc/include/asm/mman.h b/arch/powerpc/include/asm/mman.h index 30922f699341..07e3f54de9e3 100644 --- a/arch/powerpc/include/asm/mman.h +++ b/arch/powerpc/include/asm/mman.h @@ -13,6 +13,7 @@ #include <asm/cputable.h> #include <linux/mm.h> +#include <linux/pkeys.h> #include <asm/cpu_has_feature.h> /* @@ -22,13 +23,23 @@ static inline unsigned long arch_calc_vm_prot_bits(unsigned long prot, unsigned long pkey) { - return (prot & PROT_SAO) ? VM_SAO : 0; +#ifdef CONFIG_PPC_MEM_KEYS + return (((prot & PROT_SAO) ? VM_SAO : 0) | pkey_to_vmflag_bits(pkey)); +#else + return ((prot & PROT_SAO) ? VM_SAO : 0); +#endif } #define arch_calc_vm_prot_bits(prot, pkey) arch_calc_vm_prot_bits(prot, pkey) static inline pgprot_t arch_vm_get_page_prot(unsigned long vm_flags) { +#ifdef CONFIG_PPC_MEM_KEYS + return (vm_flags & VM_SAO) ? + __pgprot(_PAGE_SAO | vmflag_to_pte_pkey_bits(vm_flags)) : + __pgprot(0 | vmflag_to_pte_pkey_bits(vm_flags)); +#else return (vm_flags & VM_SAO) ? __pgprot(_PAGE_SAO) : __pgprot(0); +#endif } #define arch_vm_get_page_prot(vm_flags) arch_vm_get_page_prot(vm_flags) diff --git a/arch/powerpc/include/asm/mmu.h b/arch/powerpc/include/asm/mmu.h index 6364f5c2cc3e..bb38312cff28 100644 --- a/arch/powerpc/include/asm/mmu.h +++ b/arch/powerpc/include/asm/mmu.h @@ -260,6 +260,15 @@ static inline bool early_radix_enabled(void) } #endif +#ifdef CONFIG_PPC_MEM_KEYS +extern u16 get_mm_addr_key(struct mm_struct *mm, unsigned long address); +#else +static inline u16 get_mm_addr_key(struct mm_struct *mm, unsigned long address) +{ + return 0; +} +#endif /* CONFIG_PPC_MEM_KEYS */ + #endif /* !__ASSEMBLY__ */ /* The kernel use the constants below to index in the page sizes array. diff --git a/arch/powerpc/include/asm/mmu_context.h b/arch/powerpc/include/asm/mmu_context.h index 6177d43f0ce8..cd2bd739c0df 100644 --- a/arch/powerpc/include/asm/mmu_context.h +++ b/arch/powerpc/include/asm/mmu_context.h @@ -186,11 +186,33 @@ static inline void arch_bprm_mm_init(struct mm_struct *mm, { } +#ifdef CONFIG_PPC_MEM_KEYS +bool arch_vma_access_permitted(struct vm_area_struct *vma, bool write, + bool execute, bool foreign); +#else /* CONFIG_PPC_MEM_KEYS */ static inline bool arch_vma_access_permitted(struct vm_area_struct *vma, bool write, bool execute, bool foreign) { /* by default, allow everything */ return true; } + +#define pkey_mm_init(mm) +#define thread_pkey_regs_save(thread) +#define thread_pkey_regs_restore(new_thread, old_thread) +#define thread_pkey_regs_init(thread) + +static inline int vma_pkey(struct vm_area_struct *vma) +{ + return 0; +} + +static inline u64 pte_to_hpte_pkey_bits(u64 pteflags) +{ + return 0x0UL; +} + +#endif /* CONFIG_PPC_MEM_KEYS */ + #endif /* __KERNEL__ */ #endif /* __ASM_POWERPC_MMU_CONTEXT_H */ diff --git a/arch/powerpc/include/asm/mpic_timer.h b/arch/powerpc/include/asm/mpic_timer.h index 0e23cd4ac8aa..13e6702ec458 100644 --- a/arch/powerpc/include/asm/mpic_timer.h +++ b/arch/powerpc/include/asm/mpic_timer.h @@ -29,17 +29,17 @@ struct mpic_timer { #ifdef CONFIG_MPIC_TIMER struct mpic_timer *mpic_request_timer(irq_handler_t fn, void *dev, - const struct timeval *time); + time64_t time); void mpic_start_timer(struct mpic_timer *handle); void mpic_stop_timer(struct mpic_timer *handle); -void mpic_get_remain_time(struct mpic_timer *handle, struct timeval *time); +void mpic_get_remain_time(struct mpic_timer *handle, time64_t *time); void mpic_free_timer(struct mpic_timer *handle); #else struct mpic_timer *mpic_request_timer(irq_handler_t fn, void *dev, - const struct timeval *time) { return NULL; } + time64_t time) { return NULL; } void mpic_start_timer(struct mpic_timer *handle) { } void mpic_stop_timer(struct mpic_timer *handle) { } -void mpic_get_remain_time(struct mpic_timer *handle, struct timeval *time) { } +void mpic_get_remain_time(struct mpic_timer *handle, time64_t *time) { } void mpic_free_timer(struct mpic_timer *handle) { } #endif diff --git a/arch/powerpc/include/asm/paca.h b/arch/powerpc/include/asm/paca.h index 3892db93b837..1b7fd535393b 100644 --- a/arch/powerpc/include/asm/paca.h +++ b/arch/powerpc/include/asm/paca.h @@ -159,7 +159,7 @@ struct paca_struct { u64 saved_r1; /* r1 save for RTAS calls or PM */ u64 saved_msr; /* MSR saved here by enter_rtas */ u16 trap_save; /* Used when bad stack is encountered */ - u8 soft_enabled; /* irq soft-enable flag */ + u8 irq_soft_mask; /* mask for irq soft masking */ u8 irq_happened; /* irq happened while soft-disabled */ u8 io_sync; /* writel() needs spin_unlock sync */ u8 irq_work_pending; /* IRQ_WORK interrupt while soft-disable */ @@ -232,6 +232,16 @@ struct paca_struct { struct sibling_subcore_state *sibling_subcore_state; #endif #endif +#ifdef CONFIG_PPC_BOOK3S_64 + /* + * rfi fallback flush must be in its own cacheline to prevent + * other paca data leaking into the L1d + */ + u64 exrfi[EX_SIZE] __aligned(0x80); + void *rfi_flush_fallback_area; + u64 l1d_flush_congruence; + u64 l1d_flush_sets; +#endif }; extern void copy_mm_to_paca(struct mm_struct *mm); diff --git a/arch/powerpc/include/asm/pkeys.h b/arch/powerpc/include/asm/pkeys.h new file mode 100644 index 000000000000..0409c80c32c0 --- /dev/null +++ b/arch/powerpc/include/asm/pkeys.h @@ -0,0 +1,218 @@ +/* SPDX-License-Identifier: GPL-2.0+ */ +/* + * PowerPC Memory Protection Keys management + * + * Copyright 2017, Ram Pai, IBM Corporation. + */ + +#ifndef _ASM_POWERPC_KEYS_H +#define _ASM_POWERPC_KEYS_H + +#include <linux/jump_label.h> +#include <asm/firmware.h> + +DECLARE_STATIC_KEY_TRUE(pkey_disabled); +extern int pkeys_total; /* total pkeys as per device tree */ +extern u32 initial_allocation_mask; /* bits set for reserved keys */ + +/* + * Define these here temporarily so we're not dependent on patching linux/mm.h. + * Once it's updated we can drop these. + */ +#ifndef VM_PKEY_BIT0 +# define VM_PKEY_SHIFT VM_HIGH_ARCH_BIT_0 +# define VM_PKEY_BIT0 VM_HIGH_ARCH_0 +# define VM_PKEY_BIT1 VM_HIGH_ARCH_1 +# define VM_PKEY_BIT2 VM_HIGH_ARCH_2 +# define VM_PKEY_BIT3 VM_HIGH_ARCH_3 +# define VM_PKEY_BIT4 VM_HIGH_ARCH_4 +#endif + +#define ARCH_VM_PKEY_FLAGS (VM_PKEY_BIT0 | VM_PKEY_BIT1 | VM_PKEY_BIT2 | \ + VM_PKEY_BIT3 | VM_PKEY_BIT4) + +/* Override any generic PKEY permission defines */ +#define PKEY_DISABLE_EXECUTE 0x4 +#define PKEY_ACCESS_MASK (PKEY_DISABLE_ACCESS | \ + PKEY_DISABLE_WRITE | \ + PKEY_DISABLE_EXECUTE) + +static inline u64 pkey_to_vmflag_bits(u16 pkey) +{ + return (((u64)pkey << VM_PKEY_SHIFT) & ARCH_VM_PKEY_FLAGS); +} + +static inline u64 vmflag_to_pte_pkey_bits(u64 vm_flags) +{ + if (static_branch_likely(&pkey_disabled)) + return 0x0UL; + + return (((vm_flags & VM_PKEY_BIT0) ? H_PTE_PKEY_BIT4 : 0x0UL) | + ((vm_flags & VM_PKEY_BIT1) ? H_PTE_PKEY_BIT3 : 0x0UL) | + ((vm_flags & VM_PKEY_BIT2) ? H_PTE_PKEY_BIT2 : 0x0UL) | + ((vm_flags & VM_PKEY_BIT3) ? H_PTE_PKEY_BIT1 : 0x0UL) | + ((vm_flags & VM_PKEY_BIT4) ? H_PTE_PKEY_BIT0 : 0x0UL)); +} + +static inline int vma_pkey(struct vm_area_struct *vma) +{ + if (static_branch_likely(&pkey_disabled)) + return 0; + return (vma->vm_flags & ARCH_VM_PKEY_FLAGS) >> VM_PKEY_SHIFT; +} + +#define arch_max_pkey() pkeys_total + +static inline u64 pte_to_hpte_pkey_bits(u64 pteflags) +{ + return (((pteflags & H_PTE_PKEY_BIT0) ? HPTE_R_KEY_BIT0 : 0x0UL) | + ((pteflags & H_PTE_PKEY_BIT1) ? HPTE_R_KEY_BIT1 : 0x0UL) | + ((pteflags & H_PTE_PKEY_BIT2) ? HPTE_R_KEY_BIT2 : 0x0UL) | + ((pteflags & H_PTE_PKEY_BIT3) ? HPTE_R_KEY_BIT3 : 0x0UL) | + ((pteflags & H_PTE_PKEY_BIT4) ? HPTE_R_KEY_BIT4 : 0x0UL)); +} + +static inline u16 pte_to_pkey_bits(u64 pteflags) +{ + return (((pteflags & H_PTE_PKEY_BIT0) ? 0x10 : 0x0UL) | + ((pteflags & H_PTE_PKEY_BIT1) ? 0x8 : 0x0UL) | + ((pteflags & H_PTE_PKEY_BIT2) ? 0x4 : 0x0UL) | + ((pteflags & H_PTE_PKEY_BIT3) ? 0x2 : 0x0UL) | + ((pteflags & H_PTE_PKEY_BIT4) ? 0x1 : 0x0UL)); +} + +#define pkey_alloc_mask(pkey) (0x1 << pkey) + +#define mm_pkey_allocation_map(mm) (mm->context.pkey_allocation_map) + +#define __mm_pkey_allocated(mm, pkey) { \ + mm_pkey_allocation_map(mm) |= pkey_alloc_mask(pkey); \ +} + +#define __mm_pkey_free(mm, pkey) { \ + mm_pkey_allocation_map(mm) &= ~pkey_alloc_mask(pkey); \ +} + +#define __mm_pkey_is_allocated(mm, pkey) \ + (mm_pkey_allocation_map(mm) & pkey_alloc_mask(pkey)) + +#define __mm_pkey_is_reserved(pkey) (initial_allocation_mask & \ + pkey_alloc_mask(pkey)) + +static inline bool mm_pkey_is_allocated(struct mm_struct *mm, int pkey) +{ + /* A reserved key is never considered as 'explicitly allocated' */ + return ((pkey < arch_max_pkey()) && + !__mm_pkey_is_reserved(pkey) && + __mm_pkey_is_allocated(mm, pkey)); +} + +extern void __arch_activate_pkey(int pkey); +extern void __arch_deactivate_pkey(int pkey); +/* + * Returns a positive, 5-bit key on success, or -1 on failure. + * Relies on the mmap_sem to protect against concurrency in mm_pkey_alloc() and + * mm_pkey_free(). + */ +static inline int mm_pkey_alloc(struct mm_struct *mm) +{ + /* + * Note: this is the one and only place we make sure that the pkey is + * valid as far as the hardware is concerned. The rest of the kernel + * trusts that only good, valid pkeys come out of here. + */ + u32 all_pkeys_mask = (u32)(~(0x0)); + int ret; + + if (static_branch_likely(&pkey_disabled)) + return -1; + + /* + * Are we out of pkeys? We must handle this specially because ffz() + * behavior is undefined if there are no zeros. + */ + if (mm_pkey_allocation_map(mm) == all_pkeys_mask) + return -1; + + ret = ffz((u32)mm_pkey_allocation_map(mm)); + __mm_pkey_allocated(mm, ret); + + /* + * Enable the key in the hardware + */ + if (ret > 0) + __arch_activate_pkey(ret); + return ret; +} + +static inline int mm_pkey_free(struct mm_struct *mm, int pkey) +{ + if (static_branch_likely(&pkey_disabled)) + return -1; + + if (!mm_pkey_is_allocated(mm, pkey)) + return -EINVAL; + + /* + * Disable the key in the hardware + */ + __arch_deactivate_pkey(pkey); + __mm_pkey_free(mm, pkey); + + return 0; +} + +/* + * Try to dedicate one of the protection keys to be used as an + * execute-only protection key. + */ +extern int __execute_only_pkey(struct mm_struct *mm); +static inline int execute_only_pkey(struct mm_struct *mm) +{ + if (static_branch_likely(&pkey_disabled)) + return -1; + + return __execute_only_pkey(mm); +} + +extern int __arch_override_mprotect_pkey(struct vm_area_struct *vma, + int prot, int pkey); +static inline int arch_override_mprotect_pkey(struct vm_area_struct *vma, + int prot, int pkey) +{ + if (static_branch_likely(&pkey_disabled)) + return 0; + + /* + * Is this an mprotect_pkey() call? If so, never override the value that + * came from the user. + */ + if (pkey != -1) + return pkey; + + return __arch_override_mprotect_pkey(vma, prot, pkey); +} + +extern int __arch_set_user_pkey_access(struct task_struct *tsk, int pkey, + unsigned long init_val); +static inline int arch_set_user_pkey_access(struct task_struct *tsk, int pkey, + unsigned long init_val) +{ + if (static_branch_likely(&pkey_disabled)) + return -EINVAL; + return __arch_set_user_pkey_access(tsk, pkey, init_val); +} + +static inline bool arch_pkeys_enabled(void) +{ + return !static_branch_likely(&pkey_disabled); +} + +extern void pkey_mm_init(struct mm_struct *mm); +extern bool arch_supports_pkeys(int cap); +extern unsigned int arch_usable_pkeys(void); +extern void thread_pkey_regs_save(struct thread_struct *thread); +extern void thread_pkey_regs_restore(struct thread_struct *new_thread, + struct thread_struct *old_thread); +extern void thread_pkey_regs_init(struct thread_struct *thread); +#endif /*_ASM_POWERPC_KEYS_H */ diff --git a/arch/powerpc/include/asm/plpar_wrappers.h b/arch/powerpc/include/asm/plpar_wrappers.h index 7f01b22fa6cb..55eddf50d149 100644 --- a/arch/powerpc/include/asm/plpar_wrappers.h +++ b/arch/powerpc/include/asm/plpar_wrappers.h @@ -326,4 +326,18 @@ static inline long plapr_signal_sys_reset(long cpu) return plpar_hcall_norets(H_SIGNAL_SYS_RESET, cpu); } +static inline long plpar_get_cpu_characteristics(struct h_cpu_char_result *p) +{ + unsigned long retbuf[PLPAR_HCALL_BUFSIZE]; + long rc; + + rc = plpar_hcall(H_GET_CPU_CHARACTERISTICS, retbuf); + if (rc == H_SUCCESS) { + p->character = retbuf[0]; + p->behaviour = retbuf[1]; + } + + return rc; +} + #endif /* _ASM_POWERPC_PLPAR_WRAPPERS_H */ diff --git a/arch/powerpc/include/asm/processor.h b/arch/powerpc/include/asm/processor.h index bdab3b74eb98..01299cdc9806 100644 --- a/arch/powerpc/include/asm/processor.h +++ b/arch/powerpc/include/asm/processor.h @@ -309,6 +309,11 @@ struct thread_struct { struct thread_vr_state ckvr_state; /* Checkpointed VR state */ unsigned long ckvrsave; /* Checkpointed VRSAVE */ #endif /* CONFIG_PPC_TRANSACTIONAL_MEM */ +#ifdef CONFIG_PPC_MEM_KEYS + unsigned long amr; + unsigned long iamr; + unsigned long uamor; +#endif #ifdef CONFIG_KVM_BOOK3S_32_HANDLER void* kvm_shadow_vcpu; /* KVM internal data */ #endif /* CONFIG_KVM_BOOK3S_32_HANDLER */ diff --git a/arch/powerpc/include/asm/prom.h b/arch/powerpc/include/asm/prom.h index 9f27866e3126..b04c5ce8191b 100644 --- a/arch/powerpc/include/asm/prom.h +++ b/arch/powerpc/include/asm/prom.h @@ -80,6 +80,21 @@ extern void of_instantiate_rtc(void); extern int of_get_ibm_chip_id(struct device_node *np); +struct of_drc_info { + char *drc_type; + char *drc_name_prefix; + u32 drc_index_start; + u32 drc_name_suffix_start; + u32 num_sequential_elems; + u32 sequential_inc; + u32 drc_power_domain; + u32 last_drc_index; +}; + +extern int of_read_drc_info_cell(struct property **prop, + const __be32 **curval, struct of_drc_info *data); + + /* * There are two methods for telling firmware what our capabilities are. * Newer machines have an "ibm,client-architecture-support" method on the @@ -160,6 +175,7 @@ extern int of_get_ibm_chip_id(struct device_node *np); #define OV5_HASH_GTSE 0x1940 /* Guest Translation Shoot Down Avail */ /* Radix Table Extensions */ #define OV5_RADIX_GTSE 0x1A40 /* Guest Translation Shoot Down Avail */ +#define OV5_DRC_INFO 0x1640 /* Redef Prop Structures: drc-info */ /* Option Vector 6: IBM PAPR hints */ #define OV6_LINUX 0x02 /* Linux is our OS */ diff --git a/arch/powerpc/include/asm/reg.h b/arch/powerpc/include/asm/reg.h index b779f3ccd412..e6c7eadf6bce 100644 --- a/arch/powerpc/include/asm/reg.h +++ b/arch/powerpc/include/asm/reg.h @@ -312,7 +312,6 @@ DSISR_BAD_EXT_CTRL) #define DSISR_BAD_FAULT_64S (DSISR_BAD_FAULT_32S | \ DSISR_ATTR_CONFLICT | \ - DSISR_KEYFAULT | \ DSISR_UNSUPP_MMU | \ DSISR_PRTABLE_FAULT | \ DSISR_ICSWX_NO_CT | \ @@ -432,8 +431,9 @@ #define SPRN_LPID 0x13F /* Logical Partition Identifier */ #endif #define LPID_RSVD 0x3ff /* Reserved LPID for partn switching */ -#define SPRN_HMER 0x150 /* Hardware m? error recovery */ -#define SPRN_HMEER 0x151 /* Hardware m? enable error recovery */ +#define SPRN_HMER 0x150 /* Hypervisor maintenance exception reg */ +#define HMER_DEBUG_TRIG (1ul << (63 - 17)) /* Debug trigger */ +#define SPRN_HMEER 0x151 /* Hyp maintenance exception enable reg */ #define SPRN_PCR 0x152 /* Processor compatibility register */ #define PCR_VEC_DIS (1ul << (63-0)) /* Vec. disable (bit NA since POWER8) */ #define PCR_VSX_DIS (1ul << (63-1)) /* VSX disable (bit NA since POWER8) */ diff --git a/arch/powerpc/include/asm/setup.h b/arch/powerpc/include/asm/setup.h index 257d23dbf55d..469b7fdc9be4 100644 --- a/arch/powerpc/include/asm/setup.h +++ b/arch/powerpc/include/asm/setup.h @@ -24,6 +24,7 @@ extern void reloc_got2(unsigned long); void check_for_initrd(void); void initmem_init(void); +void setup_panic(void); #define ARCH_PANIC_TIMEOUT 180 #ifdef CONFIG_PPC_PSERIES @@ -38,6 +39,19 @@ static inline void pseries_big_endian_exceptions(void) {} static inline void pseries_little_endian_exceptions(void) {} #endif /* CONFIG_PPC_PSERIES */ +void rfi_flush_enable(bool enable); + +/* These are bit flags */ +enum l1d_flush_type { + L1D_FLUSH_NONE = 0x1, + L1D_FLUSH_FALLBACK = 0x2, + L1D_FLUSH_ORI = 0x4, + L1D_FLUSH_MTTRIG = 0x8, +}; + +void __init setup_rfi_flush(enum l1d_flush_type, bool enable); +void do_rfi_flush_fixups(enum l1d_flush_type types); + #endif /* !__ASSEMBLY__ */ #endif /* _ASM_POWERPC_SETUP_H */ diff --git a/arch/powerpc/include/asm/systbl.h b/arch/powerpc/include/asm/systbl.h index 449912f057f6..d61f9c96d916 100644 --- a/arch/powerpc/include/asm/systbl.h +++ b/arch/powerpc/include/asm/systbl.h @@ -389,3 +389,6 @@ COMPAT_SYS_SPU(preadv2) COMPAT_SYS_SPU(pwritev2) SYSCALL(kexec_file_load) SYSCALL(statx) +SYSCALL(pkey_alloc) +SYSCALL(pkey_free) +SYSCALL(pkey_mprotect) diff --git a/arch/powerpc/include/asm/unistd.h b/arch/powerpc/include/asm/unistd.h index 9ba11dbcaca9..daf1ba97a00c 100644 --- a/arch/powerpc/include/asm/unistd.h +++ b/arch/powerpc/include/asm/unistd.h @@ -12,14 +12,10 @@ #include <uapi/asm/unistd.h> -#define NR_syscalls 384 +#define NR_syscalls 387 #define __NR__exit __NR_exit -#define __IGNORE_pkey_mprotect -#define __IGNORE_pkey_alloc -#define __IGNORE_pkey_free - #ifndef __ASSEMBLY__ #include <linux/types.h> diff --git a/arch/powerpc/include/asm/xive-regs.h b/arch/powerpc/include/asm/xive-regs.h index 1d3f2be5ae39..fa4288822b68 100644 --- a/arch/powerpc/include/asm/xive-regs.h +++ b/arch/powerpc/include/asm/xive-regs.h @@ -10,6 +10,41 @@ #define _ASM_POWERPC_XIVE_REGS_H /* + * "magic" Event State Buffer (ESB) MMIO offsets. + * + * Each interrupt source has a 2-bit state machine called ESB + * which can be controlled by MMIO. It's made of 2 bits, P and + * Q. P indicates that an interrupt is pending (has been sent + * to a queue and is waiting for an EOI). Q indicates that the + * interrupt has been triggered while pending. + * + * This acts as a coalescing mechanism in order to guarantee + * that a given interrupt only occurs at most once in a queue. + * + * When doing an EOI, the Q bit will indicate if the interrupt + * needs to be re-triggered. + * + * The following offsets into the ESB MMIO allow to read or + * manipulate the PQ bits. They must be used with an 8-bytes + * load instruction. They all return the previous state of the + * interrupt (atomically). + * + * Additionally, some ESB pages support doing an EOI via a + * store at 0 and some ESBs support doing a trigger via a + * separate trigger page. + */ +#define XIVE_ESB_STORE_EOI 0x400 /* Store */ +#define XIVE_ESB_LOAD_EOI 0x000 /* Load */ +#define XIVE_ESB_GET 0x800 /* Load */ +#define XIVE_ESB_SET_PQ_00 0xc00 /* Load */ +#define XIVE_ESB_SET_PQ_01 0xd00 /* Load */ +#define XIVE_ESB_SET_PQ_10 0xe00 /* Load */ +#define XIVE_ESB_SET_PQ_11 0xf00 /* Load */ + +#define XIVE_ESB_VAL_P 0x2 +#define XIVE_ESB_VAL_Q 0x1 + +/* * Thread Management (aka "TM") registers */ diff --git a/arch/powerpc/include/asm/xive.h b/arch/powerpc/include/asm/xive.h index 371fbebf1ec9..b619a5585cd6 100644 --- a/arch/powerpc/include/asm/xive.h +++ b/arch/powerpc/include/asm/xive.h @@ -58,6 +58,9 @@ struct xive_irq_data { #define XIVE_IRQ_FLAG_EOI_FW 0x10 #define XIVE_IRQ_FLAG_H_INT_ESB 0x20 +/* Special flag set by KVM for excalation interrupts */ +#define XIVE_IRQ_NO_EOI 0x80 + #define XIVE_INVALID_CHIP_ID -1 /* A queue tracking structure in a CPU */ @@ -72,41 +75,6 @@ struct xive_q { atomic_t pending_count; }; -/* - * "magic" Event State Buffer (ESB) MMIO offsets. - * - * Each interrupt source has a 2-bit state machine called ESB - * which can be controlled by MMIO. It's made of 2 bits, P and - * Q. P indicates that an interrupt is pending (has been sent - * to a queue and is waiting for an EOI). Q indicates that the - * interrupt has been triggered while pending. - * - * This acts as a coalescing mechanism in order to guarantee - * that a given interrupt only occurs at most once in a queue. - * - * When doing an EOI, the Q bit will indicate if the interrupt - * needs to be re-triggered. - * - * The following offsets into the ESB MMIO allow to read or - * manipulate the PQ bits. They must be used with an 8-bytes - * load instruction. They all return the previous state of the - * interrupt (atomically). - * - * Additionally, some ESB pages support doing an EOI via a - * store at 0 and some ESBs support doing a trigger via a - * separate trigger page. - */ -#define XIVE_ESB_STORE_EOI 0x400 /* Store */ -#define XIVE_ESB_LOAD_EOI 0x000 /* Load */ -#define XIVE_ESB_GET 0x800 /* Load */ -#define XIVE_ESB_SET_PQ_00 0xc00 /* Load */ -#define XIVE_ESB_SET_PQ_01 0xd00 /* Load */ -#define XIVE_ESB_SET_PQ_10 0xe00 /* Load */ -#define XIVE_ESB_SET_PQ_11 0xf00 /* Load */ - -#define XIVE_ESB_VAL_P 0x2 -#define XIVE_ESB_VAL_Q 0x1 - /* Global enable flags for the XIVE support */ extern bool __xive_enabled; diff --git a/arch/powerpc/include/uapi/asm/elf.h b/arch/powerpc/include/uapi/asm/elf.h index 5f201d40bcca..860c59291bfc 100644 --- a/arch/powerpc/include/uapi/asm/elf.h +++ b/arch/powerpc/include/uapi/asm/elf.h @@ -97,6 +97,7 @@ #define ELF_NTMSPRREG 3 /* include tfhar, tfiar, texasr */ #define ELF_NEBB 3 /* includes ebbrr, ebbhr, bescr */ #define ELF_NPMU 5 /* includes siar, sdar, sier, mmcr2, mmcr0 */ +#define ELF_NPKEY 3 /* includes amr, iamr, uamor */ typedef unsigned long elf_greg_t64; typedef elf_greg_t64 elf_gregset_t64[ELF_NGREG]; diff --git a/arch/powerpc/include/uapi/asm/mman.h b/arch/powerpc/include/uapi/asm/mman.h index e63bc37e33af..65065ce32814 100644 --- a/arch/powerpc/include/uapi/asm/mman.h +++ b/arch/powerpc/include/uapi/asm/mman.h @@ -30,4 +30,10 @@ #define MAP_STACK 0x20000 /* give out an address that is best suited for process/thread stacks */ #define MAP_HUGETLB 0x40000 /* create a huge page mapping */ +/* Override any generic PKEY permission defines */ +#define PKEY_DISABLE_EXECUTE 0x4 +#undef PKEY_ACCESS_MASK +#define PKEY_ACCESS_MASK (PKEY_DISABLE_ACCESS |\ + PKEY_DISABLE_WRITE |\ + PKEY_DISABLE_EXECUTE) #endif /* _UAPI_ASM_POWERPC_MMAN_H */ diff --git a/arch/powerpc/include/uapi/asm/unistd.h b/arch/powerpc/include/uapi/asm/unistd.h index df8684f31919..389c36fd8299 100644 --- a/arch/powerpc/include/uapi/asm/unistd.h +++ b/arch/powerpc/include/uapi/asm/unistd.h @@ -395,5 +395,8 @@ #define __NR_pwritev2 381 #define __NR_kexec_file_load 382 #define __NR_statx 383 +#define __NR_pkey_alloc 384 +#define __NR_pkey_free 385 +#define __NR_pkey_mprotect 386 #endif /* _UAPI_ASM_POWERPC_UNISTD_H_ */ diff --git a/arch/powerpc/kernel/asm-offsets.c b/arch/powerpc/kernel/asm-offsets.c index 3f6316bcde4e..fa5c4125f42a 100644 --- a/arch/powerpc/kernel/asm-offsets.c +++ b/arch/powerpc/kernel/asm-offsets.c @@ -178,7 +178,7 @@ int main(void) OFFSET(PACATOC, paca_struct, kernel_toc); OFFSET(PACAKBASE, paca_struct, kernelbase); OFFSET(PACAKMSR, paca_struct, kernel_msr); - OFFSET(PACASOFTIRQEN, paca_struct, soft_enabled); + OFFSET(PACAIRQSOFTMASK, paca_struct, irq_soft_mask); OFFSET(PACAIRQHAPPENED, paca_struct, irq_happened); #ifdef CONFIG_PPC_BOOK3S OFFSET(PACACONTEXTID, paca_struct, mm_ctx_id); @@ -237,6 +237,11 @@ int main(void) OFFSET(PACA_NMI_EMERG_SP, paca_struct, nmi_emergency_sp); OFFSET(PACA_IN_MCE, paca_struct, in_mce); OFFSET(PACA_IN_NMI, paca_struct, in_nmi); + OFFSET(PACA_RFI_FLUSH_FALLBACK_AREA, paca_struct, rfi_flush_fallback_area); + OFFSET(PACA_EXRFI, paca_struct, exrfi); + OFFSET(PACA_L1D_FLUSH_CONGRUENCE, paca_struct, l1d_flush_congruence); + OFFSET(PACA_L1D_FLUSH_SETS, paca_struct, l1d_flush_sets); + #endif OFFSET(PACAHWCPUID, paca_struct, hw_cpu_id); OFFSET(PACAKEXECSTATE, paca_struct, kexec_state); diff --git a/arch/powerpc/kernel/cpu_setup_power.S b/arch/powerpc/kernel/cpu_setup_power.S index 730ade48329b..3f30c994e931 100644 --- a/arch/powerpc/kernel/cpu_setup_power.S +++ b/arch/powerpc/kernel/cpu_setup_power.S @@ -98,6 +98,7 @@ _GLOBAL(__setup_cpu_power9) li r0,0 mtspr SPRN_PSSCR,r0 mtspr SPRN_LPID,r0 + mtspr SPRN_PID,r0 mfspr r3,SPRN_LPCR LOAD_REG_IMMEDIATE(r4, LPCR_PECEDH | LPCR_PECE_HVEE | LPCR_HVICE | LPCR_HEIC) or r3, r3, r4 @@ -121,6 +122,7 @@ _GLOBAL(__restore_cpu_power9) li r0,0 mtspr SPRN_PSSCR,r0 mtspr SPRN_LPID,r0 + mtspr SPRN_PID,r0 mfspr r3,SPRN_LPCR LOAD_REG_IMMEDIATE(r4, LPCR_PECEDH | LPCR_PECE_HVEE | LPCR_HVICE | LPCR_HEIC) or r3, r3, r4 diff --git a/arch/powerpc/kernel/entry_64.S b/arch/powerpc/kernel/entry_64.S index 371c05fe250a..2cb5109a7ea3 100644 --- a/arch/powerpc/kernel/entry_64.S +++ b/arch/powerpc/kernel/entry_64.S @@ -37,6 +37,11 @@ #include <asm/tm.h> #include <asm/ppc-opcode.h> #include <asm/export.h> +#ifdef CONFIG_PPC_BOOK3S +#include <asm/exception-64s.h> +#else +#include <asm/exception-64e.h> +#endif /* * System calls. @@ -128,10 +133,9 @@ END_FW_FTR_SECTION_IFSET(FW_FEATURE_SPLPAR) * of irq tracing is used, we additionally check that condition * is correct */ -#if defined(CONFIG_TRACE_IRQFLAGS) && defined(CONFIG_BUG) - lbz r10,PACASOFTIRQEN(r13) - xori r10,r10,1 -1: tdnei r10,0 +#if defined(CONFIG_PPC_IRQ_SOFT_MASK_DEBUG) && defined(CONFIG_BUG) + lbz r10,PACAIRQSOFTMASK(r13) +1: tdnei r10,IRQS_ENABLED EMIT_BUG_ENTRY 1b,__FILE__,__LINE__,BUGFLAG_WARNING #endif @@ -147,7 +151,7 @@ system_call: /* label this so stack traces look sane */ /* We do need to set SOFTE in the stack frame or the return * from interrupt will be painful */ - li r10,1 + li r10,IRQS_ENABLED std r10,SOFTE(r1) CURRENT_THREAD_INFO(r11, r1) @@ -262,13 +266,23 @@ BEGIN_FTR_SECTION END_FTR_SECTION_IFSET(CPU_FTR_HAS_PPR) ld r13,GPR13(r1) /* only restore r13 if returning to usermode */ + ld r2,GPR2(r1) + ld r1,GPR1(r1) + mtlr r4 + mtcr r5 + mtspr SPRN_SRR0,r7 + mtspr SPRN_SRR1,r8 + RFI_TO_USER + b . /* prevent speculative execution */ + + /* exit to kernel */ 1: ld r2,GPR2(r1) ld r1,GPR1(r1) mtlr r4 mtcr r5 mtspr SPRN_SRR0,r7 mtspr SPRN_SRR1,r8 - RFI + RFI_TO_KERNEL b . /* prevent speculative execution */ .Lsyscall_error: @@ -397,8 +411,7 @@ END_FTR_SECTION_IFSET(CPU_FTR_HAS_PPR) mtmsrd r10, 1 mtspr SPRN_SRR0, r11 mtspr SPRN_SRR1, r12 - - rfid + RFI_TO_USER b . /* prevent speculative execution */ #endif _ASM_NOKPROBE_SYMBOL(system_call_common); @@ -741,10 +754,10 @@ resume_kernel: beq+ restore /* Check that preempt_count() == 0 and interrupts are enabled */ lwz r8,TI_PREEMPT(r9) - cmpwi cr1,r8,0 + cmpwi cr0,r8,0 + bne restore ld r0,SOFTE(r1) - cmpdi r0,0 - crandc eq,cr1*4+eq,eq + andi. r0,r0,IRQS_DISABLED bne restore /* @@ -782,12 +795,12 @@ restore: * are about to re-enable interrupts */ ld r5,SOFTE(r1) - lbz r6,PACASOFTIRQEN(r13) - cmpwi cr0,r5,0 - beq .Lrestore_irq_off + lbz r6,PACAIRQSOFTMASK(r13) + andi. r5,r5,IRQS_DISABLED + bne .Lrestore_irq_off /* We are enabling, were we already enabled ? Yes, just return */ - cmpwi cr0,r6,1 + andi. r6,r6,IRQS_DISABLED beq cr0,.Ldo_restore /* @@ -806,8 +819,8 @@ restore: */ .Lrestore_no_replay: TRACE_ENABLE_INTS - li r0,1 - stb r0,PACASOFTIRQEN(r13); + li r0,IRQS_ENABLED + stb r0,PACAIRQSOFTMASK(r13); /* * Final return path. BookE is handled in a different file @@ -878,7 +891,7 @@ BEGIN_FTR_SECTION END_FTR_SECTION_IFSET(CPU_FTR_HAS_PPR) ACCOUNT_CPU_USER_EXIT(r13, r2, r4) REST_GPR(13, r1) -1: + mtspr SPRN_SRR1,r3 ld r2,_CCR(r1) @@ -891,8 +904,22 @@ END_FTR_SECTION_IFSET(CPU_FTR_HAS_PPR) ld r3,GPR3(r1) ld r4,GPR4(r1) ld r1,GPR1(r1) + RFI_TO_USER + b . /* prevent speculative execution */ - rfid +1: mtspr SPRN_SRR1,r3 + + ld r2,_CCR(r1) + mtcrf 0xFF,r2 + ld r2,_NIP(r1) + mtspr SPRN_SRR0,r2 + + ld r0,GPR0(r1) + ld r2,GPR2(r1) + ld r3,GPR3(r1) + ld r4,GPR4(r1) + ld r1,GPR1(r1) + RFI_TO_KERNEL b . /* prevent speculative execution */ #endif /* CONFIG_PPC_BOOK3E */ @@ -912,10 +939,10 @@ END_FTR_SECTION_IFSET(CPU_FTR_HAS_PPR) rlwinm r7,r7,0,~PACA_IRQ_HARD_DIS stb r7,PACAIRQHAPPENED(r13) 1: -#if defined(CONFIG_TRACE_IRQFLAGS) && defined(CONFIG_BUG) +#if defined(CONFIG_PPC_IRQ_SOFT_MASK_DEBUG) && defined(CONFIG_BUG) /* The interrupt should not have soft enabled. */ - lbz r7,PACASOFTIRQEN(r13) -1: tdnei r7,0 + lbz r7,PACAIRQSOFTMASK(r13) +1: tdeqi r7,IRQS_ENABLED EMIT_BUG_ENTRY 1b,__FILE__,__LINE__,BUGFLAG_WARNING #endif b .Ldo_restore @@ -955,6 +982,11 @@ END_FTR_SECTION_IFSET(CPU_FTR_HAS_PPR) addi r3,r1,STACK_FRAME_OVERHEAD; bl do_IRQ b ret_from_except +1: cmpwi cr0,r3,0xf00 + bne 1f + addi r3,r1,STACK_FRAME_OVERHEAD; + bl performance_monitor_exception + b ret_from_except 1: cmpwi cr0,r3,0xe60 bne 1f addi r3,r1,STACK_FRAME_OVERHEAD; @@ -1031,15 +1063,15 @@ _GLOBAL(enter_rtas) li r0,0 mtcr r0 -#ifdef CONFIG_BUG +#ifdef CONFIG_BUG /* There is no way it is acceptable to get here with interrupts enabled, * check it with the asm equivalent of WARN_ON */ - lbz r0,PACASOFTIRQEN(r13) -1: tdnei r0,0 + lbz r0,PACAIRQSOFTMASK(r13) +1: tdeqi r0,IRQS_ENABLED EMIT_BUG_ENTRY 1b,__FILE__,__LINE__,BUGFLAG_WARNING #endif - + /* Hard-disable interrupts */ mfmsr r6 rldicl r7,r6,48,1 @@ -1077,7 +1109,7 @@ __enter_rtas: mtspr SPRN_SRR0,r5 mtspr SPRN_SRR1,r6 - rfid + RFI_TO_KERNEL b . /* prevent speculative execution */ rtas_return_loc: @@ -1107,7 +1139,7 @@ rtas_return_loc: mtspr SPRN_SRR0,r3 mtspr SPRN_SRR1,r4 - rfid + RFI_TO_KERNEL b . /* prevent speculative execution */ _ASM_NOKPROBE_SYMBOL(__enter_rtas) _ASM_NOKPROBE_SYMBOL(rtas_return_loc) @@ -1180,7 +1212,7 @@ _GLOBAL(enter_prom) LOAD_REG_IMMEDIATE(r12, MSR_SF | MSR_ISF | MSR_LE) andc r11,r11,r12 mtsrr1 r11 - rfid + RFI_TO_KERNEL #endif /* CONFIG_PPC_BOOK3E */ 1: /* Return from OF */ diff --git a/arch/powerpc/kernel/exceptions-64e.S b/arch/powerpc/kernel/exceptions-64e.S index acd8ca76233e..ee832d344a5a 100644 --- a/arch/powerpc/kernel/exceptions-64e.S +++ b/arch/powerpc/kernel/exceptions-64e.S @@ -139,7 +139,7 @@ END_FTR_SECTION_IFSET(CPU_FTR_EMB_HV) mfspr r10,SPRN_ESR SPECIAL_EXC_STORE(r10,ESR) - lbz r10,PACASOFTIRQEN(r13) + lbz r10,PACAIRQSOFTMASK(r13) SPECIAL_EXC_STORE(r10,SOFTE) ld r10,_NIP(r1) SPECIAL_EXC_STORE(r10,CSRR0) @@ -206,17 +206,17 @@ BEGIN_FTR_SECTION mtspr SPRN_MAS8,r10 END_FTR_SECTION_IFSET(CPU_FTR_EMB_HV) - lbz r6,PACASOFTIRQEN(r13) + lbz r6,PACAIRQSOFTMASK(r13) ld r5,SOFTE(r1) /* Interrupts had better not already be enabled... */ - twnei r6,0 + tweqi r6,IRQS_ENABLED - cmpwi cr0,r5,0 - beq 1f + andi. r6,r5,IRQS_DISABLED + bne 1f TRACE_ENABLE_INTS - stb r5,PACASOFTIRQEN(r13) + stb r5,PACAIRQSOFTMASK(r13) 1: /* * Restore PACAIRQHAPPENED rather than setting it based on @@ -351,9 +351,9 @@ ret_from_mc_except: #define PROLOG_ADDITION_NONE_MC(n) #define PROLOG_ADDITION_MASKABLE_GEN(n) \ - lbz r10,PACASOFTIRQEN(r13); /* are irqs soft-disabled ? */ \ - cmpwi cr0,r10,0; /* yes -> go out of line */ \ - beq masked_interrupt_book3e_##n + lbz r10,PACAIRQSOFTMASK(r13); /* are irqs soft-masked? */ \ + andi. r10,r10,IRQS_DISABLED; /* yes -> go out of line */ \ + bne masked_interrupt_book3e_##n #define PROLOG_ADDITION_2REGS_GEN(n) \ std r14,PACA_EXGEN+EX_R14(r13); \ @@ -397,7 +397,7 @@ exc_##n##_common: \ mfspr r8,SPRN_XER; /* save XER in stackframe */ \ ld r9,excf+EX_R1(r13); /* load orig r1 back from PACA */ \ lwz r10,excf+EX_CR(r13); /* load orig CR back from PACA */ \ - lbz r11,PACASOFTIRQEN(r13); /* get current IRQ softe */ \ + lbz r11,PACAIRQSOFTMASK(r13); /* get current IRQ softe */ \ ld r12,exception_marker@toc(r2); \ li r0,0; \ std r3,GPR10(r1); /* save r10 to stackframe */ \ diff --git a/arch/powerpc/kernel/exceptions-64s.S b/arch/powerpc/kernel/exceptions-64s.S index 175891c6909c..9e6882bdc526 100644 --- a/arch/powerpc/kernel/exceptions-64s.S +++ b/arch/powerpc/kernel/exceptions-64s.S @@ -256,7 +256,7 @@ BEGIN_FTR_SECTION LOAD_HANDLER(r12, machine_check_handle_early) 1: mtspr SPRN_SRR0,r12 mtspr SPRN_SRR1,r11 - rfid + RFI_TO_KERNEL b . /* prevent speculative execution */ 2: /* Stack overflow. Stay on emergency stack and panic. @@ -445,7 +445,7 @@ EXC_COMMON_BEGIN(machine_check_handle_early) li r3,MSR_ME andc r10,r10,r3 /* Turn off MSR_ME */ mtspr SPRN_SRR1,r10 - rfid + RFI_TO_KERNEL b . 2: /* @@ -463,7 +463,7 @@ EXC_COMMON_BEGIN(machine_check_handle_early) */ bl machine_check_queue_event MACHINE_CHECK_HANDLER_WINDUP - rfid + RFI_TO_USER_OR_KERNEL 9: /* Deliver the machine check to host kernel in V mode. */ MACHINE_CHECK_HANDLER_WINDUP @@ -598,6 +598,9 @@ EXC_COMMON_BEGIN(slb_miss_common) stw r9,PACA_EXSLB+EX_CCR(r13) /* save CR in exc. frame */ std r10,PACA_EXSLB+EX_LR(r13) /* save LR */ + andi. r9,r11,MSR_PR // Check for exception from userspace + cmpdi cr4,r9,MSR_PR // And save the result in CR4 for later + /* * Test MSR_RI before calling slb_allocate_realmode, because the * MSR in r11 gets clobbered. However we still want to allocate @@ -624,9 +627,12 @@ END_MMU_FTR_SECTION_IFCLR(MMU_FTR_TYPE_RADIX) /* All done -- return from exception. */ + bne cr4,1f /* returning to kernel */ + .machine push .machine "power4" mtcrf 0x80,r9 + mtcrf 0x08,r9 /* MSR[PR] indication is in cr4 */ mtcrf 0x04,r9 /* MSR[RI] indication is in cr5 */ mtcrf 0x02,r9 /* I/D indication is in cr6 */ mtcrf 0x01,r9 /* slb_allocate uses cr0 and cr7 */ @@ -640,9 +646,30 @@ END_MMU_FTR_SECTION_IFCLR(MMU_FTR_TYPE_RADIX) ld r11,PACA_EXSLB+EX_R11(r13) ld r12,PACA_EXSLB+EX_R12(r13) ld r13,PACA_EXSLB+EX_R13(r13) - rfid + RFI_TO_USER + b . /* prevent speculative execution */ +1: +.machine push +.machine "power4" + mtcrf 0x80,r9 + mtcrf 0x08,r9 /* MSR[PR] indication is in cr4 */ + mtcrf 0x04,r9 /* MSR[RI] indication is in cr5 */ + mtcrf 0x02,r9 /* I/D indication is in cr6 */ + mtcrf 0x01,r9 /* slb_allocate uses cr0 and cr7 */ +.machine pop + + RESTORE_CTR(r9, PACA_EXSLB) + RESTORE_PPR_PACA(PACA_EXSLB, r9) + mr r3,r12 + ld r9,PACA_EXSLB+EX_R9(r13) + ld r10,PACA_EXSLB+EX_R10(r13) + ld r11,PACA_EXSLB+EX_R11(r13) + ld r12,PACA_EXSLB+EX_R12(r13) + ld r13,PACA_EXSLB+EX_R13(r13) + RFI_TO_KERNEL b . /* prevent speculative execution */ + 2: std r3,PACA_EXSLB+EX_DAR(r13) mr r3,r12 mfspr r11,SPRN_SRR0 @@ -651,7 +678,7 @@ END_MMU_FTR_SECTION_IFCLR(MMU_FTR_TYPE_RADIX) mtspr SPRN_SRR0,r10 ld r10,PACAKMSR(r13) mtspr SPRN_SRR1,r10 - rfid + RFI_TO_KERNEL b . 8: std r3,PACA_EXSLB+EX_DAR(r13) @@ -662,7 +689,7 @@ END_MMU_FTR_SECTION_IFCLR(MMU_FTR_TYPE_RADIX) mtspr SPRN_SRR0,r10 ld r10,PACAKMSR(r13) mtspr SPRN_SRR1,r10 - rfid + RFI_TO_KERNEL b . EXC_COMMON_BEGIN(unrecov_slb) @@ -691,10 +718,12 @@ EXC_REAL_BEGIN(hardware_interrupt, 0x500, 0x100) hardware_interrupt_hv: BEGIN_FTR_SECTION _MASKABLE_EXCEPTION_PSERIES(0x500, hardware_interrupt_common, - EXC_HV, SOFTEN_TEST_HV) + EXC_HV, SOFTEN_TEST_HV, + IRQS_DISABLED) FTR_SECTION_ELSE _MASKABLE_EXCEPTION_PSERIES(0x500, hardware_interrupt_common, - EXC_STD, SOFTEN_TEST_PR) + EXC_STD, SOFTEN_TEST_PR, + IRQS_DISABLED) ALT_FTR_SECTION_END_IFSET(CPU_FTR_HVMODE | CPU_FTR_ARCH_206) EXC_REAL_END(hardware_interrupt, 0x500, 0x100) @@ -702,9 +731,13 @@ EXC_VIRT_BEGIN(hardware_interrupt, 0x4500, 0x100) .globl hardware_interrupt_relon_hv; hardware_interrupt_relon_hv: BEGIN_FTR_SECTION - _MASKABLE_RELON_EXCEPTION_PSERIES(0x500, hardware_interrupt_common, EXC_HV, SOFTEN_TEST_HV) + _MASKABLE_RELON_EXCEPTION_PSERIES(0x500, hardware_interrupt_common, + EXC_HV, SOFTEN_TEST_HV, + IRQS_DISABLED) FTR_SECTION_ELSE - _MASKABLE_RELON_EXCEPTION_PSERIES(0x500, hardware_interrupt_common, EXC_STD, SOFTEN_TEST_PR) + _MASKABLE_RELON_EXCEPTION_PSERIES(0x500, hardware_interrupt_common, + EXC_STD, SOFTEN_TEST_PR, + IRQS_DISABLED) ALT_FTR_SECTION_END_IFSET(CPU_FTR_HVMODE) EXC_VIRT_END(hardware_interrupt, 0x4500, 0x100) @@ -800,8 +833,8 @@ END_FTR_SECTION_IFSET(CPU_FTR_TM) #endif -EXC_REAL_MASKABLE(decrementer, 0x900, 0x80) -EXC_VIRT_MASKABLE(decrementer, 0x4900, 0x80, 0x900) +EXC_REAL_MASKABLE(decrementer, 0x900, 0x80, IRQS_DISABLED) +EXC_VIRT_MASKABLE(decrementer, 0x4900, 0x80, 0x900, IRQS_DISABLED) TRAMP_KVM(PACA_EXGEN, 0x900) EXC_COMMON_ASYNC(decrementer_common, 0x900, timer_interrupt) @@ -812,8 +845,8 @@ TRAMP_KVM_HV(PACA_EXGEN, 0x980) EXC_COMMON(hdecrementer_common, 0x980, hdec_interrupt) -EXC_REAL_MASKABLE(doorbell_super, 0xa00, 0x100) -EXC_VIRT_MASKABLE(doorbell_super, 0x4a00, 0x100, 0xa00) +EXC_REAL_MASKABLE(doorbell_super, 0xa00, 0x100, IRQS_DISABLED) +EXC_VIRT_MASKABLE(doorbell_super, 0x4a00, 0x100, 0xa00, IRQS_DISABLED) TRAMP_KVM(PACA_EXGEN, 0xa00) #ifdef CONFIG_PPC_DOORBELL EXC_COMMON_ASYNC(doorbell_super_common, 0xa00, doorbell_exception) @@ -901,7 +934,7 @@ EXC_COMMON(trap_0b_common, 0xb00, unknown_exception) mtspr SPRN_SRR0,r10 ; \ ld r10,PACAKMSR(r13) ; \ mtspr SPRN_SRR1,r10 ; \ - rfid ; \ + RFI_TO_KERNEL ; \ b . ; /* prevent speculative execution */ #ifdef CONFIG_PPC_FAST_ENDIAN_SWITCH @@ -917,7 +950,7 @@ END_FTR_SECTION_IFSET(CPU_FTR_REAL_LE) \ xori r12,r12,MSR_LE ; \ mtspr SPRN_SRR1,r12 ; \ mr r13,r9 ; \ - rfid ; /* return to userspace */ \ + RFI_TO_USER ; /* return to userspace */ \ b . ; /* prevent speculative execution */ #else #define SYSCALL_FASTENDIAN_TEST @@ -1025,7 +1058,7 @@ EXC_COMMON(emulation_assist_common, 0xe40, emulation_assist_interrupt) * mode. */ __EXC_REAL_OOL_HV_DIRECT(hmi_exception, 0xe60, 0x20, hmi_exception_early) -__TRAMP_REAL_OOL_MASKABLE_HV(hmi_exception, 0xe60) +__TRAMP_REAL_OOL_MASKABLE_HV(hmi_exception, 0xe60, IRQS_DISABLED) EXC_VIRT_NONE(0x4e60, 0x20) TRAMP_KVM_HV(PACA_EXGEN, 0xe60) TRAMP_REAL_BEGIN(hmi_exception_early) @@ -1063,7 +1096,7 @@ TRAMP_REAL_BEGIN(hmi_exception_early) mtcr r11 REST_GPR(11, r1) ld r1,GPR1(r1) - hrfid + HRFI_TO_USER_OR_KERNEL 1: mtcr r11 REST_GPR(11, r1) @@ -1083,8 +1116,8 @@ EXC_COMMON_BEGIN(hmi_exception_common) EXCEPTION_COMMON(PACA_EXGEN, 0xe60, hmi_exception_common, handle_hmi_exception, ret_from_except, FINISH_NAP;ADD_NVGPRS;ADD_RECONCILE;RUNLATCH_ON) -EXC_REAL_OOL_MASKABLE_HV(h_doorbell, 0xe80, 0x20) -EXC_VIRT_OOL_MASKABLE_HV(h_doorbell, 0x4e80, 0x20, 0xe80) +EXC_REAL_OOL_MASKABLE_HV(h_doorbell, 0xe80, 0x20, IRQS_DISABLED) +EXC_VIRT_OOL_MASKABLE_HV(h_doorbell, 0x4e80, 0x20, 0xe80, IRQS_DISABLED) TRAMP_KVM_HV(PACA_EXGEN, 0xe80) #ifdef CONFIG_PPC_DOORBELL EXC_COMMON_ASYNC(h_doorbell_common, 0xe80, doorbell_exception) @@ -1093,8 +1126,8 @@ EXC_COMMON_ASYNC(h_doorbell_common, 0xe80, unknown_exception) #endif -EXC_REAL_OOL_MASKABLE_HV(h_virt_irq, 0xea0, 0x20) -EXC_VIRT_OOL_MASKABLE_HV(h_virt_irq, 0x4ea0, 0x20, 0xea0) +EXC_REAL_OOL_MASKABLE_HV(h_virt_irq, 0xea0, 0x20, IRQS_DISABLED) +EXC_VIRT_OOL_MASKABLE_HV(h_virt_irq, 0x4ea0, 0x20, 0xea0, IRQS_DISABLED) TRAMP_KVM_HV(PACA_EXGEN, 0xea0) EXC_COMMON_ASYNC(h_virt_irq_common, 0xea0, do_IRQ) @@ -1105,8 +1138,8 @@ EXC_REAL_NONE(0xee0, 0x20) EXC_VIRT_NONE(0x4ee0, 0x20) -EXC_REAL_OOL(performance_monitor, 0xf00, 0x20) -EXC_VIRT_OOL(performance_monitor, 0x4f00, 0x20, 0xf00) +EXC_REAL_OOL_MASKABLE(performance_monitor, 0xf00, 0x20, IRQS_PMI_DISABLED) +EXC_VIRT_OOL_MASKABLE(performance_monitor, 0x4f00, 0x20, 0xf00, IRQS_PMI_DISABLED) TRAMP_KVM(PACA_EXGEN, 0xf00) EXC_COMMON_ASYNC(performance_monitor_common, 0xf00, performance_monitor_exception) @@ -1314,7 +1347,7 @@ END_FTR_SECTION_IFSET(CPU_FTR_CFAR) ld r11,PACA_EXGEN+EX_R11(r13) ld r12,PACA_EXGEN+EX_R12(r13) ld r13,PACA_EXGEN+EX_R13(r13) - HRFID + HRFI_TO_UNKNOWN b . #endif @@ -1418,10 +1451,94 @@ masked_##_H##interrupt: \ ld r10,PACA_EXGEN+EX_R10(r13); \ ld r11,PACA_EXGEN+EX_R11(r13); \ /* returns to kernel where r13 must be set up, so don't restore it */ \ - ##_H##rfid; \ + ##_H##RFI_TO_KERNEL; \ b .; \ MASKED_DEC_HANDLER(_H) +TRAMP_REAL_BEGIN(rfi_flush_fallback) + SET_SCRATCH0(r13); + GET_PACA(r13); + std r9,PACA_EXRFI+EX_R9(r13) + std r10,PACA_EXRFI+EX_R10(r13) + std r11,PACA_EXRFI+EX_R11(r13) + std r12,PACA_EXRFI+EX_R12(r13) + std r8,PACA_EXRFI+EX_R13(r13) + mfctr r9 + ld r10,PACA_RFI_FLUSH_FALLBACK_AREA(r13) + ld r11,PACA_L1D_FLUSH_SETS(r13) + ld r12,PACA_L1D_FLUSH_CONGRUENCE(r13) + /* + * The load adresses are at staggered offsets within cachelines, + * which suits some pipelines better (on others it should not + * hurt). + */ + addi r12,r12,8 + mtctr r11 + DCBT_STOP_ALL_STREAM_IDS(r11) /* Stop prefetch streams */ + + /* order ld/st prior to dcbt stop all streams with flushing */ + sync +1: li r8,0 + .rept 8 /* 8-way set associative */ + ldx r11,r10,r8 + add r8,r8,r12 + xor r11,r11,r11 // Ensure r11 is 0 even if fallback area is not + add r8,r8,r11 // Add 0, this creates a dependency on the ldx + .endr + addi r10,r10,128 /* 128 byte cache line */ + bdnz 1b + + mtctr r9 + ld r9,PACA_EXRFI+EX_R9(r13) + ld r10,PACA_EXRFI+EX_R10(r13) + ld r11,PACA_EXRFI+EX_R11(r13) + ld r12,PACA_EXRFI+EX_R12(r13) + ld r8,PACA_EXRFI+EX_R13(r13) + GET_SCRATCH0(r13); + rfid + +TRAMP_REAL_BEGIN(hrfi_flush_fallback) + SET_SCRATCH0(r13); + GET_PACA(r13); + std r9,PACA_EXRFI+EX_R9(r13) + std r10,PACA_EXRFI+EX_R10(r13) + std r11,PACA_EXRFI+EX_R11(r13) + std r12,PACA_EXRFI+EX_R12(r13) + std r8,PACA_EXRFI+EX_R13(r13) + mfctr r9 + ld r10,PACA_RFI_FLUSH_FALLBACK_AREA(r13) + ld r11,PACA_L1D_FLUSH_SETS(r13) + ld r12,PACA_L1D_FLUSH_CONGRUENCE(r13) + /* + * The load adresses are at staggered offsets within cachelines, + * which suits some pipelines better (on others it should not + * hurt). + */ + addi r12,r12,8 + mtctr r11 + DCBT_STOP_ALL_STREAM_IDS(r11) /* Stop prefetch streams */ + + /* order ld/st prior to dcbt stop all streams with flushing */ + sync +1: li r8,0 + .rept 8 /* 8-way set associative */ + ldx r11,r10,r8 + add r8,r8,r12 + xor r11,r11,r11 // Ensure r11 is 0 even if fallback area is not + add r8,r8,r11 // Add 0, this creates a dependency on the ldx + .endr + addi r10,r10,128 /* 128 byte cache line */ + bdnz 1b + + mtctr r9 + ld r9,PACA_EXRFI+EX_R9(r13) + ld r10,PACA_EXRFI+EX_R10(r13) + ld r11,PACA_EXRFI+EX_R11(r13) + ld r12,PACA_EXRFI+EX_R12(r13) + ld r8,PACA_EXRFI+EX_R13(r13) + GET_SCRATCH0(r13); + hrfid + /* * Real mode exceptions actually use this too, but alternate * instruction code patches (which end up in the common .text area) @@ -1441,7 +1558,7 @@ TRAMP_REAL_BEGIN(kvmppc_skip_interrupt) addi r13, r13, 4 mtspr SPRN_SRR0, r13 GET_SCRATCH0(r13) - rfid + RFI_TO_KERNEL b . TRAMP_REAL_BEGIN(kvmppc_skip_Hinterrupt) @@ -1453,7 +1570,7 @@ TRAMP_REAL_BEGIN(kvmppc_skip_Hinterrupt) addi r13, r13, 4 mtspr SPRN_HSRR0, r13 GET_SCRATCH0(r13) - hrfid + HRFI_TO_KERNEL b . #endif @@ -1521,7 +1638,7 @@ USE_TEXT_SECTION() .balign IFETCH_ALIGN_BYTES do_hash_page: #ifdef CONFIG_PPC_BOOK3S_64 - lis r0,(DSISR_BAD_FAULT_64S|DSISR_DABRMATCH)@h + lis r0,(DSISR_BAD_FAULT_64S | DSISR_DABRMATCH | DSISR_KEYFAULT)@h ori r0,r0,DSISR_BAD_FAULT_64S@l and. r0,r4,r0 /* weird error? */ bne- handle_page_fault /* if not, try to insert a HPTE */ @@ -1717,6 +1834,8 @@ BEGIN_FTR_SECTION FTR_SECTION_ELSE beq hardware_interrupt_common ALT_FTR_SECTION_END_IFSET(CPU_FTR_HVMODE | CPU_FTR_ARCH_300) + cmpwi r3,0xf00 + beq performance_monitor_common BEGIN_FTR_SECTION cmpwi r3,0xa00 beq h_doorbell_common_msgclr diff --git a/arch/powerpc/kernel/fadump.c b/arch/powerpc/kernel/fadump.c index 04ea5c04fd24..3c2c2688918f 100644 --- a/arch/powerpc/kernel/fadump.c +++ b/arch/powerpc/kernel/fadump.c @@ -1462,25 +1462,6 @@ static void fadump_init_files(void) return; } -static int fadump_panic_event(struct notifier_block *this, - unsigned long event, void *ptr) -{ - /* - * If firmware-assisted dump has been registered then trigger - * firmware-assisted dump and let firmware handle everything - * else. If this returns, then fadump was not registered, so - * go through the rest of the panic path. - */ - crash_fadump(NULL, ptr); - - return NOTIFY_DONE; -} - -static struct notifier_block fadump_panic_block = { - .notifier_call = fadump_panic_event, - .priority = INT_MIN /* may not return; must be done last */ -}; - /* * Prepare for firmware-assisted dump. */ @@ -1513,9 +1494,6 @@ int __init setup_fadump(void) init_fadump_mem_struct(&fdm, fw_dump.reserve_dump_area_start); fadump_init_files(); - atomic_notifier_chain_register(&panic_notifier_list, - &fadump_panic_block); - return 1; } subsys_initcall(setup_fadump); diff --git a/arch/powerpc/kernel/head_64.S b/arch/powerpc/kernel/head_64.S index aa71a90f5222..a61151a6ea5e 100644 --- a/arch/powerpc/kernel/head_64.S +++ b/arch/powerpc/kernel/head_64.S @@ -765,8 +765,8 @@ _GLOBAL(pmac_secondary_start) /* Mark interrupts soft and hard disabled (they might be enabled * in the PACA when doing hotplug) */ - li r0,0 - stb r0,PACASOFTIRQEN(r13) + li r0,IRQS_DISABLED + stb r0,PACAIRQSOFTMASK(r13) li r0,PACA_IRQ_HARD_DIS stb r0,PACAIRQHAPPENED(r13) @@ -822,7 +822,8 @@ __secondary_start: /* Mark interrupts soft and hard disabled (they might be enabled * in the PACA when doing hotplug) */ - stb r7,PACASOFTIRQEN(r13) + li r7,IRQS_DISABLED + stb r7,PACAIRQSOFTMASK(r13) li r0,PACA_IRQ_HARD_DIS stb r0,PACAIRQHAPPENED(r13) @@ -988,8 +989,8 @@ start_here_common: /* Mark interrupts soft and hard disabled (they might be enabled * in the PACA when doing hotplug) */ - li r0,0 - stb r0,PACASOFTIRQEN(r13) + li r0,IRQS_DISABLED + stb r0,PACAIRQSOFTMASK(r13) li r0,PACA_IRQ_HARD_DIS stb r0,PACAIRQHAPPENED(r13) diff --git a/arch/powerpc/kernel/idle_book3e.S b/arch/powerpc/kernel/idle_book3e.S index 48c21acef915..2b269315d377 100644 --- a/arch/powerpc/kernel/idle_book3e.S +++ b/arch/powerpc/kernel/idle_book3e.S @@ -17,6 +17,7 @@ #include <asm/processor.h> #include <asm/thread_info.h> #include <asm/epapr_hcalls.h> +#include <asm/hw_irq.h> /* 64-bit version only for now */ #ifdef CONFIG_PPC64 @@ -46,8 +47,8 @@ _GLOBAL(\name) bl trace_hardirqs_on addi r1,r1,128 #endif - li r0,1 - stb r0,PACASOFTIRQEN(r13) + li r0,IRQS_ENABLED + stb r0,PACAIRQSOFTMASK(r13) /* Interrupts will make use return to LR, so get something we want * in there diff --git a/arch/powerpc/kernel/idle_power4.S b/arch/powerpc/kernel/idle_power4.S index f57a19348bdd..08faa93755f9 100644 --- a/arch/powerpc/kernel/idle_power4.S +++ b/arch/powerpc/kernel/idle_power4.S @@ -15,6 +15,7 @@ #include <asm/ppc_asm.h> #include <asm/asm-offsets.h> #include <asm/irqflags.h> +#include <asm/hw_irq.h> #undef DEBUG @@ -53,8 +54,8 @@ END_FTR_SECTION_IFCLR(CPU_FTR_CAN_NAP) mfmsr r7 #endif /* CONFIG_TRACE_IRQFLAGS */ - li r0,1 - stb r0,PACASOFTIRQEN(r13) /* we'll hard-enable shortly */ + li r0,IRQS_ENABLED + stb r0,PACAIRQSOFTMASK(r13) /* we'll hard-enable shortly */ BEGIN_FTR_SECTION DSSALL sync diff --git a/arch/powerpc/kernel/irq.c b/arch/powerpc/kernel/irq.c index b7a84522e652..f88038847790 100644 --- a/arch/powerpc/kernel/irq.c +++ b/arch/powerpc/kernel/irq.c @@ -67,6 +67,7 @@ #include <asm/smp.h> #include <asm/livepatch.h> #include <asm/asm-prototypes.h> +#include <asm/hw_irq.h> #ifdef CONFIG_PPC64 #include <asm/paca.h> @@ -106,12 +107,6 @@ static inline notrace unsigned long get_irq_happened(void) return happened; } -static inline notrace void set_soft_enabled(unsigned long enable) -{ - __asm__ __volatile__("stb %0,%1(13)" - : : "r" (enable), "i" (offsetof(struct paca_struct, soft_enabled))); -} - static inline notrace int decrementer_check_overflow(void) { u64 now = get_tb_or_rtc(); @@ -191,6 +186,11 @@ notrace unsigned int __check_irq_replay(void) return 0x900; } + if (happened & PACA_IRQ_PMI) { + local_paca->irq_happened &= ~PACA_IRQ_PMI; + return 0xf00; + } + if (happened & PACA_IRQ_EE) { local_paca->irq_happened &= ~PACA_IRQ_EE; return 0x500; @@ -224,15 +224,16 @@ notrace unsigned int __check_irq_replay(void) return 0; } -notrace void arch_local_irq_restore(unsigned long en) +notrace void arch_local_irq_restore(unsigned long mask) { unsigned char irq_happened; unsigned int replay; /* Write the new soft-enabled value */ - set_soft_enabled(en); - if (!en) + irq_soft_mask_set(mask); + if (mask) return; + /* * From this point onward, we can take interrupts, preempt, * etc... unless we got hard-disabled. We check if an event @@ -263,7 +264,7 @@ notrace void arch_local_irq_restore(unsigned long en) */ if (unlikely(irq_happened != PACA_IRQ_HARD_DIS)) __hard_irq_disable(); -#ifdef CONFIG_TRACE_IRQFLAGS +#ifdef CONFIG_PPC_IRQ_SOFT_MASK_DEBUG else { /* * We should already be hard disabled here. We had bugs @@ -274,9 +275,9 @@ notrace void arch_local_irq_restore(unsigned long en) if (WARN_ON(mfmsr() & MSR_EE)) __hard_irq_disable(); } -#endif /* CONFIG_TRACE_IRQFLAGS */ +#endif - set_soft_enabled(0); + irq_soft_mask_set(IRQS_ALL_DISABLED); trace_hardirqs_off(); /* @@ -288,7 +289,7 @@ notrace void arch_local_irq_restore(unsigned long en) /* We can soft-enable now */ trace_hardirqs_on(); - set_soft_enabled(1); + irq_soft_mask_set(IRQS_ENABLED); /* * And replay if we have to. This will return with interrupts @@ -363,7 +364,7 @@ bool prep_irq_for_idle(void) * of entering the low power state. */ local_paca->irq_happened &= ~PACA_IRQ_HARD_DIS; - local_paca->soft_enabled = 1; + irq_soft_mask_set(IRQS_ENABLED); /* Tell the caller to enter the low power state */ return true; diff --git a/arch/powerpc/kernel/mce.c b/arch/powerpc/kernel/mce.c index 742e4658c5dc..d2fecaec4fec 100644 --- a/arch/powerpc/kernel/mce.c +++ b/arch/powerpc/kernel/mce.c @@ -495,37 +495,123 @@ long machine_check_early(struct pt_regs *regs) return handled; } -long hmi_exception_realmode(struct pt_regs *regs) +/* Possible meanings for HMER_DEBUG_TRIG bit being set on POWER9 */ +static enum { + DTRIG_UNKNOWN, + DTRIG_VECTOR_CI, /* need to emulate vector CI load instr */ + DTRIG_SUSPEND_ESCAPE, /* need to escape from TM suspend mode */ +} hmer_debug_trig_function; + +static int init_debug_trig_function(void) { - __this_cpu_inc(irq_stat.hmi_exceptions); - -#ifdef CONFIG_PPC_BOOK3S_64 - /* Workaround for P9 vector CI loads (see p9_hmi_special_emu) */ - if (pvr_version_is(PVR_POWER9)) { - unsigned long hmer = mfspr(SPRN_HMER); - - /* Do we have the debug bit set */ - if (hmer & PPC_BIT(17)) { - hmer &= ~PPC_BIT(17); - mtspr(SPRN_HMER, hmer); - - /* - * Now to avoid problems with soft-disable we - * only do the emulation if we are coming from - * user space - */ - if (user_mode(regs)) - local_paca->hmi_p9_special_emu = 1; - - /* - * Don't bother going to OPAL if that's the - * only relevant bit. - */ - if (!(hmer & mfspr(SPRN_HMEER))) - return local_paca->hmi_p9_special_emu; + int pvr; + struct device_node *cpun; + struct property *prop = NULL; + const char *str; + + /* First look in the device tree */ + preempt_disable(); + cpun = of_get_cpu_node(smp_processor_id(), NULL); + if (cpun) { + of_property_for_each_string(cpun, "ibm,hmi-special-triggers", + prop, str) { + if (strcmp(str, "bit17-vector-ci-load") == 0) + hmer_debug_trig_function = DTRIG_VECTOR_CI; + else if (strcmp(str, "bit17-tm-suspend-escape") == 0) + hmer_debug_trig_function = DTRIG_SUSPEND_ESCAPE; } + of_node_put(cpun); + } + preempt_enable(); + + /* If we found the property, don't look at PVR */ + if (prop) + goto out; + + pvr = mfspr(SPRN_PVR); + /* Check for POWER9 Nimbus (scale-out) */ + if ((PVR_VER(pvr) == PVR_POWER9) && (pvr & 0xe000) == 0) { + /* DD2.2 and later */ + if ((pvr & 0xfff) >= 0x202) + hmer_debug_trig_function = DTRIG_SUSPEND_ESCAPE; + /* DD2.0 and DD2.1 - used for vector CI load emulation */ + else if ((pvr & 0xfff) >= 0x200) + hmer_debug_trig_function = DTRIG_VECTOR_CI; + } + + out: + switch (hmer_debug_trig_function) { + case DTRIG_VECTOR_CI: + pr_debug("HMI debug trigger used for vector CI load\n"); + break; + case DTRIG_SUSPEND_ESCAPE: + pr_debug("HMI debug trigger used for TM suspend escape\n"); + break; + default: + break; } -#endif /* CONFIG_PPC_BOOK3S_64 */ + return 0; +} +__initcall(init_debug_trig_function); + +/* + * Handle HMIs that occur as a result of a debug trigger. + * Return values: + * -1 means this is not a HMI cause that we know about + * 0 means no further handling is required + * 1 means further handling is required + */ +long hmi_handle_debugtrig(struct pt_regs *regs) +{ + unsigned long hmer = mfspr(SPRN_HMER); + long ret = 0; + + /* HMER_DEBUG_TRIG bit is used for various workarounds on P9 */ + if (!((hmer & HMER_DEBUG_TRIG) + && hmer_debug_trig_function != DTRIG_UNKNOWN)) + return -1; + + hmer &= ~HMER_DEBUG_TRIG; + /* HMER is a write-AND register */ + mtspr(SPRN_HMER, ~HMER_DEBUG_TRIG); + + switch (hmer_debug_trig_function) { + case DTRIG_VECTOR_CI: + /* + * Now to avoid problems with soft-disable we + * only do the emulation if we are coming from + * host user space + */ + if (regs && user_mode(regs)) + ret = local_paca->hmi_p9_special_emu = 1; + + break; + + default: + break; + } + + /* + * See if any other HMI causes remain to be handled + */ + if (hmer & mfspr(SPRN_HMEER)) + return -1; + + return ret; +} + +/* + * Return values: + */ +long hmi_exception_realmode(struct pt_regs *regs) +{ + int ret; + + __this_cpu_inc(irq_stat.hmi_exceptions); + + ret = hmi_handle_debugtrig(regs); + if (ret >= 0) + return ret; wait_for_subcore_guest_exit(); diff --git a/arch/powerpc/kernel/optprobes_head.S b/arch/powerpc/kernel/optprobes_head.S index 52fc864cdec4..98a3aeeb3c8c 100644 --- a/arch/powerpc/kernel/optprobes_head.S +++ b/arch/powerpc/kernel/optprobes_head.S @@ -58,7 +58,7 @@ optprobe_template_entry: std r5,_XER(r1) mfcr r5 std r5,_CCR(r1) - lbz r5,PACASOFTIRQEN(r13) + lbz r5,PACAIRQSOFTMASK(r13) std r5,SOFTE(r1) /* diff --git a/arch/powerpc/kernel/process.c b/arch/powerpc/kernel/process.c index bcd4441304a5..2fd01c9045ba 100644 --- a/arch/powerpc/kernel/process.c +++ b/arch/powerpc/kernel/process.c @@ -42,6 +42,7 @@ #include <linux/hw_breakpoint.h> #include <linux/uaccess.h> #include <linux/elf-randomize.h> +#include <linux/pkeys.h> #include <asm/pgtable.h> #include <asm/io.h> @@ -57,6 +58,7 @@ #include <asm/debug.h> #ifdef CONFIG_PPC64 #include <asm/firmware.h> +#include <asm/hw_irq.h> #endif #include <asm/code-patching.h> #include <asm/exec.h> @@ -1102,6 +1104,8 @@ static inline void save_sprs(struct thread_struct *t) t->tar = mfspr(SPRN_TAR); } #endif + + thread_pkey_regs_save(t); } static inline void restore_sprs(struct thread_struct *old_thread, @@ -1141,6 +1145,8 @@ static inline void restore_sprs(struct thread_struct *old_thread, old_thread->tidr != new_thread->tidr) mtspr(SPRN_TIDR, new_thread->tidr); #endif + + thread_pkey_regs_restore(new_thread, old_thread); } #ifdef CONFIG_PPC_BOOK3S_64 @@ -1403,7 +1409,7 @@ void show_regs(struct pt_regs * regs) printk("NIP: "REG" LR: "REG" CTR: "REG"\n", regs->nip, regs->link, regs->ctr); - printk("REGS: %p TRAP: %04lx %s (%s)\n", + printk("REGS: %px TRAP: %04lx %s (%s)\n", regs, regs->trap, print_tainted(), init_utsname()->release); printk("MSR: "REG" ", regs->msr); print_msr_bits(regs->msr); @@ -1589,6 +1595,7 @@ int set_thread_tidr(struct task_struct *t) return 0; } +EXPORT_SYMBOL_GPL(set_thread_tidr); #endif /* CONFIG_PPC64 */ @@ -1674,7 +1681,7 @@ int copy_thread(unsigned long clone_flags, unsigned long usp, childregs->gpr[14] = ppc_function_entry((void *)usp); #ifdef CONFIG_PPC64 clear_tsk_thread_flag(p, TIF_32BIT); - childregs->softe = 1; + childregs->softe = IRQS_ENABLED; #endif childregs->gpr[15] = kthread_arg; p->thread.regs = NULL; /* no user register state */ @@ -1865,6 +1872,8 @@ void start_thread(struct pt_regs *regs, unsigned long start, unsigned long sp) current->thread.tm_tfiar = 0; current->thread.load_tm = 0; #endif /* CONFIG_PPC_TRANSACTIONAL_MEM */ + + thread_pkey_regs_init(¤t->thread); } EXPORT_SYMBOL(start_thread); diff --git a/arch/powerpc/kernel/prom_init.c b/arch/powerpc/kernel/prom_init.c index acf4b2e0530c..adf044daafd7 100644 --- a/arch/powerpc/kernel/prom_init.c +++ b/arch/powerpc/kernel/prom_init.c @@ -874,6 +874,7 @@ struct ibm_arch_vec __cacheline_aligned ibm_architecture_vec = { .mmu = 0, .hash_ext = 0, .radix_ext = 0, + .byte22 = OV5_FEAT(OV5_DRC_INFO), }, /* option vector 6: IBM PAPR hints */ diff --git a/arch/powerpc/kernel/ptrace.c b/arch/powerpc/kernel/ptrace.c index f52ad5bb7109..ca72d7391d40 100644 --- a/arch/powerpc/kernel/ptrace.c +++ b/arch/powerpc/kernel/ptrace.c @@ -35,6 +35,7 @@ #include <linux/context_tracking.h> #include <linux/uaccess.h> +#include <linux/pkeys.h> #include <asm/page.h> #include <asm/pgtable.h> #include <asm/switch_to.h> @@ -283,6 +284,18 @@ int ptrace_get_reg(struct task_struct *task, int regno, unsigned long *data) if (regno == PT_DSCR) return get_user_dscr(task, data); +#ifdef CONFIG_PPC64 + /* + * softe copies paca->irq_soft_mask variable state. Since irq_soft_mask is + * no more used as a flag, lets force usr to alway see the softe value as 1 + * which means interrupts are not soft disabled. + */ + if (regno == PT_SOFTE) { + *data = 1; + return 0; + } +#endif + if (regno < (sizeof(struct pt_regs) / sizeof(unsigned long))) { *data = ((unsigned long *)task->thread.regs)[regno]; return 0; @@ -1775,6 +1788,61 @@ static int pmu_set(struct task_struct *target, return ret; } #endif + +#ifdef CONFIG_PPC_MEM_KEYS +static int pkey_active(struct task_struct *target, + const struct user_regset *regset) +{ + if (!arch_pkeys_enabled()) + return -ENODEV; + + return regset->n; +} + +static int pkey_get(struct task_struct *target, + const struct user_regset *regset, + unsigned int pos, unsigned int count, + void *kbuf, void __user *ubuf) +{ + BUILD_BUG_ON(TSO(amr) + sizeof(unsigned long) != TSO(iamr)); + BUILD_BUG_ON(TSO(iamr) + sizeof(unsigned long) != TSO(uamor)); + + if (!arch_pkeys_enabled()) + return -ENODEV; + + return user_regset_copyout(&pos, &count, &kbuf, &ubuf, + &target->thread.amr, 0, + ELF_NPKEY * sizeof(unsigned long)); +} + +static int pkey_set(struct task_struct *target, + const struct user_regset *regset, + unsigned int pos, unsigned int count, + const void *kbuf, const void __user *ubuf) +{ + u64 new_amr; + int ret; + + if (!arch_pkeys_enabled()) + return -ENODEV; + + /* Only the AMR can be set from userspace */ + if (pos != 0 || count != sizeof(new_amr)) + return -EINVAL; + + ret = user_regset_copyin(&pos, &count, &kbuf, &ubuf, + &new_amr, 0, sizeof(new_amr)); + if (ret) + return ret; + + /* UAMOR determines which bits of the AMR can be set from userspace. */ + target->thread.amr = (new_amr & target->thread.uamor) | + (target->thread.amr & ~target->thread.uamor); + + return 0; +} +#endif /* CONFIG_PPC_MEM_KEYS */ + /* * These are our native regset flavors. */ @@ -1809,6 +1877,9 @@ enum powerpc_regset { REGSET_EBB, /* EBB registers */ REGSET_PMR, /* Performance Monitor Registers */ #endif +#ifdef CONFIG_PPC_MEM_KEYS + REGSET_PKEY, /* AMR register */ +#endif }; static const struct user_regset native_regsets[] = { @@ -1914,6 +1985,13 @@ static const struct user_regset native_regsets[] = { .active = pmu_active, .get = pmu_get, .set = pmu_set }, #endif +#ifdef CONFIG_PPC_MEM_KEYS + [REGSET_PKEY] = { + .core_note_type = NT_PPC_PKEY, .n = ELF_NPKEY, + .size = sizeof(u64), .align = sizeof(u64), + .active = pkey_active, .get = pkey_get, .set = pkey_set + }, +#endif }; static const struct user_regset_view user_ppc_native_view = { diff --git a/arch/powerpc/kernel/setup-common.c b/arch/powerpc/kernel/setup-common.c index 24da91768133..af128ee67248 100644 --- a/arch/powerpc/kernel/setup-common.c +++ b/arch/powerpc/kernel/setup-common.c @@ -242,14 +242,6 @@ static int show_cpuinfo(struct seq_file *m, void *v) unsigned short maj; unsigned short min; - /* We only show online cpus: disable preempt (overzealous, I - * knew) to prevent cpu going down. */ - preempt_disable(); - if (!cpu_online(cpu_id)) { - preempt_enable(); - return 0; - } - #ifdef CONFIG_SMP pvr = per_cpu(cpu_pvr, cpu_id); #else @@ -356,8 +348,6 @@ static int show_cpuinfo(struct seq_file *m, void *v) #endif seq_printf(m, "\n"); - preempt_enable(); - /* If this is the last cpu, print the summary */ if (cpumask_next(cpu_id, cpu_online_mask) >= nr_cpu_ids) show_cpuinfo_summary(m); @@ -706,6 +696,30 @@ int check_legacy_ioport(unsigned long base_port) } EXPORT_SYMBOL(check_legacy_ioport); +static int ppc_panic_event(struct notifier_block *this, + unsigned long event, void *ptr) +{ + /* + * If firmware-assisted dump has been registered then trigger + * firmware-assisted dump and let firmware handle everything else. + */ + crash_fadump(NULL, ptr); + ppc_md.panic(ptr); /* May not return */ + return NOTIFY_DONE; +} + +static struct notifier_block ppc_panic_block = { + .notifier_call = ppc_panic_event, + .priority = INT_MIN /* may not return; must be done last */ +}; + +void __init setup_panic(void) +{ + if (!ppc_md.panic) + return; + atomic_notifier_chain_register(&panic_notifier_list, &ppc_panic_block); +} + #ifdef CONFIG_CHECK_CACHE_COHERENCY /* * For platforms that have configurable cache-coherency. This function @@ -850,6 +864,9 @@ void __init setup_arch(char **cmdline_p) /* Probe the machine type, establish ppc_md. */ probe_machine(); + /* Setup panic notifier if requested by the platform. */ + setup_panic(); + /* * Configure ppc_md.power_save (ppc32 only, 64-bit machines do * it from their respective probe() function. diff --git a/arch/powerpc/kernel/setup_64.c b/arch/powerpc/kernel/setup_64.c index 8f285d6a3db1..d1fa0e91f526 100644 --- a/arch/powerpc/kernel/setup_64.c +++ b/arch/powerpc/kernel/setup_64.c @@ -36,6 +36,7 @@ #include <linux/memory.h> #include <linux/nmi.h> +#include <asm/debugfs.h> #include <asm/io.h> #include <asm/kdump.h> #include <asm/prom.h> @@ -66,6 +67,7 @@ #include <asm/livepatch.h> #include <asm/opal.h> #include <asm/cputhreads.h> +#include <asm/hw_irq.h> #include "setup.h" @@ -187,6 +189,8 @@ static void __init fixup_boot_paca(void) get_paca()->cpu_start = 1; /* Allow percpu accesses to work until we setup percpu data */ get_paca()->data_offset = 0; + /* Mark interrupts disabled in PACA */ + irq_soft_mask_set(IRQS_DISABLED); } static void __init configure_exceptions(void) @@ -349,7 +353,7 @@ void __init early_setup(unsigned long dt_ptr) void early_setup_secondary(void) { /* Mark interrupts disabled in PACA */ - get_paca()->soft_enabled = 0; + irq_soft_mask_set(IRQS_DISABLED); /* Initialize the hash table or TLB handling */ early_init_mmu_secondary(); @@ -805,3 +809,141 @@ static int __init disable_hardlockup_detector(void) return 0; } early_initcall(disable_hardlockup_detector); + +#ifdef CONFIG_PPC_BOOK3S_64 +static enum l1d_flush_type enabled_flush_types; +static void *l1d_flush_fallback_area; +static bool no_rfi_flush; +bool rfi_flush; + +static int __init handle_no_rfi_flush(char *p) +{ + pr_info("rfi-flush: disabled on command line."); + no_rfi_flush = true; + return 0; +} +early_param("no_rfi_flush", handle_no_rfi_flush); + +/* + * The RFI flush is not KPTI, but because users will see doco that says to use + * nopti we hijack that option here to also disable the RFI flush. + */ +static int __init handle_no_pti(char *p) +{ + pr_info("rfi-flush: disabling due to 'nopti' on command line.\n"); + handle_no_rfi_flush(NULL); + return 0; +} +early_param("nopti", handle_no_pti); + +static void do_nothing(void *unused) +{ + /* + * We don't need to do the flush explicitly, just enter+exit kernel is + * sufficient, the RFI exit handlers will do the right thing. + */ +} + +void rfi_flush_enable(bool enable) +{ + if (rfi_flush == enable) + return; + + if (enable) { + do_rfi_flush_fixups(enabled_flush_types); + on_each_cpu(do_nothing, NULL, 1); + } else + do_rfi_flush_fixups(L1D_FLUSH_NONE); + + rfi_flush = enable; +} + +static void init_fallback_flush(void) +{ + u64 l1d_size, limit; + int cpu; + + l1d_size = ppc64_caches.l1d.size; + limit = min(ppc64_bolted_size(), ppc64_rma_size); + + /* + * Align to L1d size, and size it at 2x L1d size, to catch possible + * hardware prefetch runoff. We don't have a recipe for load patterns to + * reliably avoid the prefetcher. + */ + l1d_flush_fallback_area = __va(memblock_alloc_base(l1d_size * 2, l1d_size, limit)); + memset(l1d_flush_fallback_area, 0, l1d_size * 2); + + for_each_possible_cpu(cpu) { + /* + * The fallback flush is currently coded for 8-way + * associativity. Different associativity is possible, but it + * will be treated as 8-way and may not evict the lines as + * effectively. + * + * 128 byte lines are mandatory. + */ + u64 c = l1d_size / 8; + + paca[cpu].rfi_flush_fallback_area = l1d_flush_fallback_area; + paca[cpu].l1d_flush_congruence = c; + paca[cpu].l1d_flush_sets = c / 128; + } +} + +void __init setup_rfi_flush(enum l1d_flush_type types, bool enable) +{ + if (types & L1D_FLUSH_FALLBACK) { + pr_info("rfi-flush: Using fallback displacement flush\n"); + init_fallback_flush(); + } + + if (types & L1D_FLUSH_ORI) + pr_info("rfi-flush: Using ori type flush\n"); + + if (types & L1D_FLUSH_MTTRIG) + pr_info("rfi-flush: Using mttrig type flush\n"); + + enabled_flush_types = types; + + if (!no_rfi_flush) + rfi_flush_enable(enable); +} + +#ifdef CONFIG_DEBUG_FS +static int rfi_flush_set(void *data, u64 val) +{ + if (val == 1) + rfi_flush_enable(true); + else if (val == 0) + rfi_flush_enable(false); + else + return -EINVAL; + + return 0; +} + +static int rfi_flush_get(void *data, u64 *val) +{ + *val = rfi_flush ? 1 : 0; + return 0; +} + +DEFINE_SIMPLE_ATTRIBUTE(fops_rfi_flush, rfi_flush_get, rfi_flush_set, "%llu\n"); + +static __init int rfi_flush_debugfs_init(void) +{ + debugfs_create_file("rfi_flush", 0600, powerpc_debugfs_root, NULL, &fops_rfi_flush); + return 0; +} +device_initcall(rfi_flush_debugfs_init); +#endif + +ssize_t cpu_show_meltdown(struct device *dev, struct device_attribute *attr, char *buf) +{ + if (rfi_flush) + return sprintf(buf, "Mitigation: RFI Flush\n"); + + return sprintf(buf, "Vulnerable\n"); +} +#endif /* CONFIG_PPC_BOOK3S_64 */ diff --git a/arch/powerpc/kernel/signal_32.c b/arch/powerpc/kernel/signal_32.c index 9ffd73296f64..a30c6562ed66 100644 --- a/arch/powerpc/kernel/signal_32.c +++ b/arch/powerpc/kernel/signal_32.c @@ -111,12 +111,20 @@ static inline int save_general_regs(struct pt_regs *regs, { elf_greg_t64 *gregs = (elf_greg_t64 *)regs; int i; + /* Force usr to alway see softe as 1 (interrupts enabled) */ + elf_greg_t64 softe = 0x1; WARN_ON(!FULL_REGS(regs)); for (i = 0; i <= PT_RESULT; i ++) { if (i == 14 && !FULL_REGS(regs)) i = 32; + if ( i == PT_SOFTE) { + if(__put_user((unsigned int)softe, &frame->mc_gregs[i])) + return -EFAULT; + else + continue; + } if (__put_user((unsigned int)gregs[i], &frame->mc_gregs[i])) return -EFAULT; } diff --git a/arch/powerpc/kernel/signal_64.c b/arch/powerpc/kernel/signal_64.c index 4b9ca3570344..2705fba544ad 100644 --- a/arch/powerpc/kernel/signal_64.c +++ b/arch/powerpc/kernel/signal_64.c @@ -110,6 +110,8 @@ static long setup_sigcontext(struct sigcontext __user *sc, struct pt_regs *regs = tsk->thread.regs; unsigned long msr = regs->msr; long err = 0; + /* Force usr to alway see softe as 1 (interrupts enabled) */ + unsigned long softe = 0x1; BUG_ON(tsk != current); @@ -169,6 +171,7 @@ static long setup_sigcontext(struct sigcontext __user *sc, WARN_ON(!FULL_REGS(regs)); err |= __copy_to_user(&sc->gp_regs, regs, GP_REGS_SIZE); err |= __put_user(msr, &sc->gp_regs[PT_MSR]); + err |= __put_user(softe, &sc->gp_regs[PT_SOFTE]); err |= __put_user(signr, &sc->signal); err |= __put_user(handler, &sc->handler); if (set != NULL) diff --git a/arch/powerpc/kernel/time.c b/arch/powerpc/kernel/time.c index fe6f3a285455..a32823dcd9a4 100644 --- a/arch/powerpc/kernel/time.c +++ b/arch/powerpc/kernel/time.c @@ -244,7 +244,7 @@ static u64 scan_dispatch_log(u64 stop_tb) void accumulate_stolen_time(void) { u64 sst, ust; - u8 save_soft_enabled = local_paca->soft_enabled; + unsigned long save_irq_soft_mask = irq_soft_mask_return(); struct cpu_accounting_data *acct = &local_paca->accounting; /* We are called early in the exception entry, before @@ -253,7 +253,7 @@ void accumulate_stolen_time(void) * needs to reflect that so various debug stuff doesn't * complain */ - local_paca->soft_enabled = 0; + irq_soft_mask_set(IRQS_DISABLED); sst = scan_dispatch_log(acct->starttime_user); ust = scan_dispatch_log(acct->starttime); @@ -261,7 +261,7 @@ void accumulate_stolen_time(void) acct->utime -= ust; acct->steal_time += ust + sst; - local_paca->soft_enabled = save_soft_enabled; + irq_soft_mask_set(save_irq_soft_mask); } static inline u64 calculate_stolen_time(u64 stop_tb) diff --git a/arch/powerpc/kernel/traps.c b/arch/powerpc/kernel/traps.c index d139117d3339..122a3c883f4e 100644 --- a/arch/powerpc/kernel/traps.c +++ b/arch/powerpc/kernel/traps.c @@ -20,6 +20,7 @@ #include <linux/sched/debug.h> #include <linux/kernel.h> #include <linux/mm.h> +#include <linux/pkeys.h> #include <linux/stddef.h> #include <linux/unistd.h> #include <linux/ptrace.h> @@ -266,7 +267,9 @@ void user_single_step_siginfo(struct task_struct *tsk, info->si_addr = (void __user *)regs->nip; } -void _exception(int signr, struct pt_regs *regs, int code, unsigned long addr) + +void _exception_pkey(int signr, struct pt_regs *regs, int code, + unsigned long addr, int key) { siginfo_t info; const char fmt32[] = KERN_INFO "%s[%d]: unhandled signal %d " \ @@ -289,13 +292,27 @@ void _exception(int signr, struct pt_regs *regs, int code, unsigned long addr) local_irq_enable(); current->thread.trap_nr = code; + + /* + * Save all the pkey registers AMR/IAMR/UAMOR. Eg: Core dumps need + * to capture the content, if the task gets killed. + */ + thread_pkey_regs_save(¤t->thread); + memset(&info, 0, sizeof(info)); info.si_signo = signr; info.si_code = code; info.si_addr = (void __user *) addr; + info.si_pkey = key; + force_sig_info(signr, &info, current); } +void _exception(int signr, struct pt_regs *regs, int code, unsigned long addr) +{ + _exception_pkey(signr, regs, code, addr, 0); +} + void system_reset_exception(struct pt_regs *regs) { /* diff --git a/arch/powerpc/kernel/vmlinux.lds.S b/arch/powerpc/kernel/vmlinux.lds.S index 51e4ec92ade1..6507138ebe10 100644 --- a/arch/powerpc/kernel/vmlinux.lds.S +++ b/arch/powerpc/kernel/vmlinux.lds.S @@ -132,6 +132,15 @@ SECTIONS /* Read-only data */ RO_DATA(PAGE_SIZE) +#ifdef CONFIG_PPC64 + . = ALIGN(8); + __rfi_flush_fixup : AT(ADDR(__rfi_flush_fixup) - LOAD_OFFSET) { + __start___rfi_flush_fixup = .; + *(__rfi_flush_fixup) + __stop___rfi_flush_fixup = .; + } +#endif + EXCEPTION_TABLE(0) NOTES :kernel :notes diff --git a/arch/powerpc/kernel/watchdog.c b/arch/powerpc/kernel/watchdog.c index 3963baaba92d..0673230c0261 100644 --- a/arch/powerpc/kernel/watchdog.c +++ b/arch/powerpc/kernel/watchdog.c @@ -26,15 +26,45 @@ #include <asm/paca.h> /* - * The watchdog has a simple timer that runs on each CPU, once per timer - * period. This is the heartbeat. + * The powerpc watchdog ensures that each CPU is able to service timers. + * The watchdog sets up a simple timer on each CPU to run once per timer + * period, and updates a per-cpu timestamp and a "pending" cpumask. This is + * the heartbeat. * - * Then there are checks to see if the heartbeat has not triggered on a CPU - * for the panic timeout period. Currently the watchdog only supports an - * SMP check, so the heartbeat only turns on when we have 2 or more CPUs. + * Then there are two systems to check that the heartbeat is still running. + * The local soft-NMI, and the SMP checker. * - * This is not an NMI watchdog, but Linux uses that name for a generic - * watchdog in some cases, so NMI gets used in some places. + * The soft-NMI checker can detect lockups on the local CPU. When interrupts + * are disabled with local_irq_disable(), platforms that use soft-masking + * can leave hardware interrupts enabled and handle them with a masked + * interrupt handler. The masked handler can send the timer interrupt to the + * watchdog's soft_nmi_interrupt(), which appears to Linux as an NMI + * interrupt, and can be used to detect CPUs stuck with IRQs disabled. + * + * The soft-NMI checker will compare the heartbeat timestamp for this CPU + * with the current time, and take action if the difference exceeds the + * watchdog threshold. + * + * The limitation of the soft-NMI watchdog is that it does not work when + * interrupts are hard disabled or otherwise not being serviced. This is + * solved by also having a SMP watchdog where all CPUs check all other + * CPUs heartbeat. + * + * The SMP checker can detect lockups on other CPUs. A gobal "pending" + * cpumask is kept, containing all CPUs which enable the watchdog. Each + * CPU clears their pending bit in their heartbeat timer. When the bitmask + * becomes empty, the last CPU to clear its pending bit updates a global + * timestamp and refills the pending bitmask. + * + * In the heartbeat timer, if any CPU notices that the global timestamp has + * not been updated for a period exceeding the watchdog threshold, then it + * means the CPU(s) with their bit still set in the pending mask have had + * their heartbeat stop, and action is taken. + * + * Some platforms implement true NMI IPIs, which can by used by the SMP + * watchdog to detect an unresponsive CPU and pull it out of its stuck + * state with the NMI IPI, to get crash/debug data from it. This way the + * SMP watchdog can detect hardware interrupts off lockups. */ static cpumask_t wd_cpus_enabled __read_mostly; @@ -47,19 +77,7 @@ static u64 wd_timer_period_ms __read_mostly; /* interval between heartbeat */ static DEFINE_PER_CPU(struct timer_list, wd_timer); static DEFINE_PER_CPU(u64, wd_timer_tb); -/* - * These are for the SMP checker. CPUs clear their pending bit in their - * heartbeat. If the bitmask becomes empty, the time is noted and the - * bitmask is refilled. - * - * All CPUs clear their bit in the pending mask every timer period. - * Once all have cleared, the time is noted and the bits are reset. - * If the time since all clear was greater than the panic timeout, - * we can panic with the list of stuck CPUs. - * - * This will work best with NMI IPIs for crash code so the stuck CPUs - * can be pulled out to get their backtraces. - */ +/* SMP checker bits */ static unsigned long __wd_smp_lock; static cpumask_t wd_smp_cpus_pending; static cpumask_t wd_smp_cpus_stuck; diff --git a/arch/powerpc/kvm/book3s_hv_ras.c b/arch/powerpc/kvm/book3s_hv_ras.c index e61066bb6725..b11043b23c18 100644 --- a/arch/powerpc/kvm/book3s_hv_ras.c +++ b/arch/powerpc/kvm/book3s_hv_ras.c @@ -266,17 +266,19 @@ static void kvmppc_tb_resync_done(void) * secondary threads to proceed. * - All secondary threads will eventually call opal hmi handler on * their exit path. + * + * Returns 1 if the timebase offset should be applied, 0 if not. */ long kvmppc_realmode_hmi_handler(void) { - int ptid = local_paca->kvm_hstate.ptid; bool resync_req; - /* This is only called on primary thread. */ - BUG_ON(ptid != 0); __this_cpu_inc(irq_stat.hmi_exceptions); + if (hmi_handle_debugtrig(NULL) >= 0) + return 1; + /* * By now primary thread has already completed guest->host * partition switch but haven't signaled secondaries yet. diff --git a/arch/powerpc/kvm/book3s_hv_rm_mmu.c b/arch/powerpc/kvm/book3s_hv_rm_mmu.c index 26c11f678fbf..8888e625a999 100644 --- a/arch/powerpc/kvm/book3s_hv_rm_mmu.c +++ b/arch/powerpc/kvm/book3s_hv_rm_mmu.c @@ -42,7 +42,7 @@ static void *real_vmalloc_addr(void *x) } /* Return 1 if we need to do a global tlbie, 0 if we can use tlbiel */ -static int global_invalidates(struct kvm *kvm, unsigned long flags) +static int global_invalidates(struct kvm *kvm) { int global; int cpu; @@ -522,7 +522,7 @@ long kvmppc_do_h_remove(struct kvm *kvm, unsigned long flags, if (v & HPTE_V_VALID) { hpte[0] &= ~cpu_to_be64(HPTE_V_VALID); rb = compute_tlbie_rb(v, pte_r, pte_index); - do_tlbies(kvm, &rb, 1, global_invalidates(kvm, flags), true); + do_tlbies(kvm, &rb, 1, global_invalidates(kvm), true); /* * The reference (R) and change (C) bits in a HPT * entry can be set by hardware at any time up until @@ -572,7 +572,7 @@ long kvmppc_h_bulk_remove(struct kvm_vcpu *vcpu) if (kvm_is_radix(kvm)) return H_FUNCTION; - global = global_invalidates(kvm, 0); + global = global_invalidates(kvm); for (i = 0; i < 4 && ret == H_SUCCESS; ) { n = 0; for (; i < 4; ++i) { @@ -732,8 +732,7 @@ long kvmppc_h_protect(struct kvm_vcpu *vcpu, unsigned long flags, rb = compute_tlbie_rb(v, r, pte_index); hpte[0] = cpu_to_be64((pte_v & ~HPTE_V_VALID) | HPTE_V_ABSENT); - do_tlbies(kvm, &rb, 1, global_invalidates(kvm, flags), - true); + do_tlbies(kvm, &rb, 1, global_invalidates(kvm), true); /* Don't lose R/C bit updates done by hardware */ r |= be64_to_cpu(hpte[1]) & (HPTE_R_R | HPTE_R_C); hpte[1] = cpu_to_be64(r); diff --git a/arch/powerpc/kvm/book3s_hv_rmhandlers.S b/arch/powerpc/kvm/book3s_hv_rmhandlers.S index 2659844784b8..7886b313d135 100644 --- a/arch/powerpc/kvm/book3s_hv_rmhandlers.S +++ b/arch/powerpc/kvm/book3s_hv_rmhandlers.S @@ -79,7 +79,7 @@ _GLOBAL_TOC(kvmppc_hv_entry_trampoline) mtmsrd r0,1 /* clear RI in MSR */ mtsrr0 r5 mtsrr1 r6 - RFI + RFI_TO_KERNEL kvmppc_call_hv_entry: BEGIN_FTR_SECTION @@ -199,7 +199,7 @@ END_FTR_SECTION_IFSET(CPU_FTR_ARCH_207S) mtmsrd r6, 1 /* Clear RI in MSR */ mtsrr0 r8 mtsrr1 r7 - RFI + RFI_TO_KERNEL /* Virtual-mode return */ .Lvirt_return: @@ -1167,8 +1167,7 @@ END_FTR_SECTION_IFSET(CPU_FTR_ARCH_300) ld r0, VCPU_GPR(R0)(r4) ld r4, VCPU_GPR(R4)(r4) - - hrfid + HRFI_TO_GUEST b . secondary_too_late: @@ -1909,16 +1908,17 @@ END_FTR_SECTION_IFSET(CPU_FTR_ARCH_207S) bne 27f bl kvmppc_realmode_hmi_handler nop + cmpdi r3, 0 li r12, BOOK3S_INTERRUPT_HMI /* - * At this point kvmppc_realmode_hmi_handler would have resync-ed - * the TB. Hence it is not required to subtract guest timebase - * offset from timebase. So, skip it. + * At this point kvmppc_realmode_hmi_handler may have resync-ed + * the TB, and if it has, we must not subtract the guest timebase + * offset from the timebase. So, skip it. * * Also, do not call kvmppc_subcore_exit_guest() because it has * been invoked as part of kvmppc_realmode_hmi_handler(). */ - b 30f + beq 30f 27: /* Subtract timebase offset from timebase */ @@ -3249,7 +3249,7 @@ kvmppc_bad_host_intr: mfctr r4 #endif mfxer r5 - lbz r6, PACASOFTIRQEN(r13) + lbz r6, PACAIRQSOFTMASK(r13) std r3, _LINK(r1) std r4, _CTR(r1) std r5, _XER(r1) @@ -3320,7 +3320,7 @@ END_MMU_FTR_SECTION_IFSET(MMU_FTR_TYPE_RADIX) ld r4, PACAKMSR(r13) mtspr SPRN_SRR0, r3 mtspr SPRN_SRR1, r4 - rfid + RFI_TO_KERNEL 9: addi r3, r1, STACK_FRAME_OVERHEAD bl kvmppc_bad_interrupt b 9b diff --git a/arch/powerpc/kvm/book3s_rmhandlers.S b/arch/powerpc/kvm/book3s_rmhandlers.S index 42a4b237df5f..34a5adeff084 100644 --- a/arch/powerpc/kvm/book3s_rmhandlers.S +++ b/arch/powerpc/kvm/book3s_rmhandlers.S @@ -46,6 +46,9 @@ #define FUNC(name) name +#define RFI_TO_KERNEL RFI +#define RFI_TO_GUEST RFI + .macro INTERRUPT_TRAMPOLINE intno .global kvmppc_trampoline_\intno @@ -141,7 +144,7 @@ kvmppc_handler_skip_ins: GET_SCRATCH0(r13) /* And get back into the code */ - RFI + RFI_TO_KERNEL #endif /* @@ -164,6 +167,6 @@ _GLOBAL_TOC(kvmppc_entry_trampoline) ori r5, r5, MSR_EE mtsrr0 r7 mtsrr1 r6 - RFI + RFI_TO_KERNEL #include "book3s_segment.S" diff --git a/arch/powerpc/kvm/book3s_segment.S b/arch/powerpc/kvm/book3s_segment.S index 2a2b96d53999..93a180ceefad 100644 --- a/arch/powerpc/kvm/book3s_segment.S +++ b/arch/powerpc/kvm/book3s_segment.S @@ -156,7 +156,7 @@ no_dcbz32_on: PPC_LL r9, SVCPU_R9(r3) PPC_LL r3, (SVCPU_R3)(r3) - RFI + RFI_TO_GUEST kvmppc_handler_trampoline_enter_end: @@ -407,5 +407,5 @@ END_FTR_SECTION_IFSET(CPU_FTR_HVMODE) cmpwi r12, BOOK3S_INTERRUPT_DOORBELL beqa BOOK3S_INTERRUPT_DOORBELL - RFI + RFI_TO_KERNEL kvmppc_handler_trampoline_exit_end: diff --git a/arch/powerpc/kvm/book3s_xive.c b/arch/powerpc/kvm/book3s_xive.c index bf457843e032..0d750d274c4e 100644 --- a/arch/powerpc/kvm/book3s_xive.c +++ b/arch/powerpc/kvm/book3s_xive.c @@ -725,7 +725,8 @@ u64 kvmppc_xive_get_icp(struct kvm_vcpu *vcpu) /* Return the per-cpu state for state saving/migration */ return (u64)xc->cppr << KVM_REG_PPC_ICP_CPPR_SHIFT | - (u64)xc->mfrr << KVM_REG_PPC_ICP_MFRR_SHIFT; + (u64)xc->mfrr << KVM_REG_PPC_ICP_MFRR_SHIFT | + (u64)0xff << KVM_REG_PPC_ICP_PPRI_SHIFT; } int kvmppc_xive_set_icp(struct kvm_vcpu *vcpu, u64 icpval) @@ -1558,7 +1559,7 @@ static int xive_set_source(struct kvmppc_xive *xive, long irq, u64 addr) /* * Restore P and Q. If the interrupt was pending, we - * force both P and Q, which will trigger a resend. + * force Q and !P, which will trigger a resend. * * That means that a guest that had both an interrupt * pending (queued) and Q set will restore with only @@ -1566,7 +1567,7 @@ static int xive_set_source(struct kvmppc_xive *xive, long irq, u64 addr) * is perfectly fine as coalescing interrupts that haven't * been presented yet is always allowed. */ - if (val & KVM_XICS_PRESENTED || val & KVM_XICS_PENDING) + if (val & KVM_XICS_PRESENTED && !(val & KVM_XICS_PENDING)) state->old_p = true; if (val & KVM_XICS_QUEUED || val & KVM_XICS_PENDING) state->old_q = true; diff --git a/arch/powerpc/lib/code-patching.c b/arch/powerpc/lib/code-patching.c index 096d4e4d31e6..e0d881ab304e 100644 --- a/arch/powerpc/lib/code-patching.c +++ b/arch/powerpc/lib/code-patching.c @@ -23,19 +23,26 @@ #include <asm/code-patching.h> #include <asm/setup.h> -static int __patch_instruction(unsigned int *addr, unsigned int instr) +static int __patch_instruction(unsigned int *exec_addr, unsigned int instr, + unsigned int *patch_addr) { int err; - __put_user_size(instr, addr, 4, err); + __put_user_size(instr, patch_addr, 4, err); if (err) return err; - asm ("dcbst 0, %0; sync; icbi 0,%0; sync; isync" :: "r" (addr)); + asm ("dcbst 0, %0; sync; icbi 0,%1; sync; isync" :: "r" (patch_addr), + "r" (exec_addr)); return 0; } +int raw_patch_instruction(unsigned int *addr, unsigned int instr) +{ + return __patch_instruction(addr, instr, addr); +} + #ifdef CONFIG_STRICT_KERNEL_RWX static DEFINE_PER_CPU(struct vm_struct *, text_poke_area); @@ -138,7 +145,7 @@ static inline int unmap_patch_area(unsigned long addr) int patch_instruction(unsigned int *addr, unsigned int instr) { int err; - unsigned int *dest = NULL; + unsigned int *patch_addr = NULL; unsigned long flags; unsigned long text_poke_addr; unsigned long kaddr = (unsigned long)addr; @@ -148,8 +155,8 @@ int patch_instruction(unsigned int *addr, unsigned int instr) * when text_poke_area is not ready, but we still need * to allow patching. We just do the plain old patching */ - if (!this_cpu_read(*PTRRELOC(&text_poke_area))) - return __patch_instruction(addr, instr); + if (!this_cpu_read(text_poke_area)) + return raw_patch_instruction(addr, instr); local_irq_save(flags); @@ -159,17 +166,10 @@ int patch_instruction(unsigned int *addr, unsigned int instr) goto out; } - dest = (unsigned int *)(text_poke_addr) + + patch_addr = (unsigned int *)(text_poke_addr) + ((kaddr & ~PAGE_MASK) / sizeof(unsigned int)); - /* - * We use __put_user_size so that we can handle faults while - * writing to dest and return err to handle faults gracefully - */ - __put_user_size(instr, dest, 4, err); - if (!err) - asm ("dcbst 0, %0; sync; icbi 0,%0; icbi 0,%1; sync; isync" - ::"r" (dest), "r"(addr)); + __patch_instruction(addr, instr, patch_addr); err = unmap_patch_area(text_poke_addr); if (err) @@ -184,7 +184,7 @@ out: int patch_instruction(unsigned int *addr, unsigned int instr) { - return __patch_instruction(addr, instr); + return raw_patch_instruction(addr, instr); } #endif /* CONFIG_STRICT_KERNEL_RWX */ diff --git a/arch/powerpc/lib/feature-fixups.c b/arch/powerpc/lib/feature-fixups.c index 41cf5ae273cf..73697c4e3468 100644 --- a/arch/powerpc/lib/feature-fixups.c +++ b/arch/powerpc/lib/feature-fixups.c @@ -62,7 +62,7 @@ static int patch_alt_instruction(unsigned int *src, unsigned int *dest, } } - patch_instruction(dest, instr); + raw_patch_instruction(dest, instr); return 0; } @@ -91,7 +91,7 @@ static int patch_feature_section(unsigned long value, struct fixup_entry *fcur) } for (; dest < end; dest++) - patch_instruction(dest, PPC_INST_NOP); + raw_patch_instruction(dest, PPC_INST_NOP); return 0; } @@ -116,6 +116,47 @@ void do_feature_fixups(unsigned long value, void *fixup_start, void *fixup_end) } } +#ifdef CONFIG_PPC_BOOK3S_64 +void do_rfi_flush_fixups(enum l1d_flush_type types) +{ + unsigned int instrs[3], *dest; + long *start, *end; + int i; + + start = PTRRELOC(&__start___rfi_flush_fixup), + end = PTRRELOC(&__stop___rfi_flush_fixup); + + instrs[0] = 0x60000000; /* nop */ + instrs[1] = 0x60000000; /* nop */ + instrs[2] = 0x60000000; /* nop */ + + if (types & L1D_FLUSH_FALLBACK) + /* b .+16 to fallback flush */ + instrs[0] = 0x48000010; + + i = 0; + if (types & L1D_FLUSH_ORI) { + instrs[i++] = 0x63ff0000; /* ori 31,31,0 speculation barrier */ + instrs[i++] = 0x63de0000; /* ori 30,30,0 L1d flush*/ + } + + if (types & L1D_FLUSH_MTTRIG) + instrs[i++] = 0x7c12dba6; /* mtspr TRIG2,r0 (SPR #882) */ + + for (i = 0; start < end; start++, i++) { + dest = (void *)start + *start; + + pr_devel("patching dest %lx\n", (unsigned long)dest); + + patch_instruction(dest, instrs[0]); + patch_instruction(dest + 1, instrs[1]); + patch_instruction(dest + 2, instrs[2]); + } + + printk(KERN_DEBUG "rfi-flush: patched %d locations\n", i); +} +#endif /* CONFIG_PPC_BOOK3S_64 */ + void do_lwsync_fixups(unsigned long value, void *fixup_start, void *fixup_end) { long *start, *end; @@ -129,7 +170,7 @@ void do_lwsync_fixups(unsigned long value, void *fixup_start, void *fixup_end) for (; start < end; start++) { dest = (void *)start + *start; - patch_instruction(dest, PPC_INST_LWSYNC); + raw_patch_instruction(dest, PPC_INST_LWSYNC); } } @@ -147,7 +188,7 @@ static void do_final_fixups(void) length = (__end_interrupts - _stext) / sizeof(int); while (length--) { - patch_instruction(dest, *src); + raw_patch_instruction(dest, *src); src++; dest++; } diff --git a/arch/powerpc/mm/Makefile b/arch/powerpc/mm/Makefile index 8d271bfe2d94..f06f3577d8d1 100644 --- a/arch/powerpc/mm/Makefile +++ b/arch/powerpc/mm/Makefile @@ -44,3 +44,4 @@ obj-$(CONFIG_PPC_COPRO_BASE) += copro_fault.o obj-$(CONFIG_SPAPR_TCE_IOMMU) += mmu_context_iommu.o obj-$(CONFIG_PPC_PTDUMP) += dump_linuxpagetables.o obj-$(CONFIG_PPC_HTDUMP) += dump_hashpagetable.o +obj-$(CONFIG_PPC_MEM_KEYS) += pkeys.o diff --git a/arch/powerpc/mm/fault.c b/arch/powerpc/mm/fault.c index c07896c19517..866446cf2d9a 100644 --- a/arch/powerpc/mm/fault.c +++ b/arch/powerpc/mm/fault.c @@ -107,7 +107,8 @@ static bool store_updates_sp(struct pt_regs *regs) */ static int -__bad_area_nosemaphore(struct pt_regs *regs, unsigned long address, int si_code) +__bad_area_nosemaphore(struct pt_regs *regs, unsigned long address, int si_code, + int pkey) { /* * If we are in kernel mode, bail out with a SEGV, this will @@ -117,17 +118,18 @@ __bad_area_nosemaphore(struct pt_regs *regs, unsigned long address, int si_code) if (!user_mode(regs)) return SIGSEGV; - _exception(SIGSEGV, regs, si_code, address); + _exception_pkey(SIGSEGV, regs, si_code, address, pkey); return 0; } static noinline int bad_area_nosemaphore(struct pt_regs *regs, unsigned long address) { - return __bad_area_nosemaphore(regs, address, SEGV_MAPERR); + return __bad_area_nosemaphore(regs, address, SEGV_MAPERR, 0); } -static int __bad_area(struct pt_regs *regs, unsigned long address, int si_code) +static int __bad_area(struct pt_regs *regs, unsigned long address, int si_code, + int pkey) { struct mm_struct *mm = current->mm; @@ -137,12 +139,23 @@ static int __bad_area(struct pt_regs *regs, unsigned long address, int si_code) */ up_read(&mm->mmap_sem); - return __bad_area_nosemaphore(regs, address, si_code); + return __bad_area_nosemaphore(regs, address, si_code, pkey); } static noinline int bad_area(struct pt_regs *regs, unsigned long address) { - return __bad_area(regs, address, SEGV_MAPERR); + return __bad_area(regs, address, SEGV_MAPERR, 0); +} + +static int bad_key_fault_exception(struct pt_regs *regs, unsigned long address, + int pkey) +{ + return __bad_area_nosemaphore(regs, address, SEGV_PKUERR, pkey); +} + +static noinline int bad_access(struct pt_regs *regs, unsigned long address) +{ + return __bad_area(regs, address, SEGV_ACCERR, 0); } static int do_sigbus(struct pt_regs *regs, unsigned long address, @@ -427,6 +440,10 @@ static int __do_page_fault(struct pt_regs *regs, unsigned long address, perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS, 1, regs, address); + if (error_code & DSISR_KEYFAULT) + return bad_key_fault_exception(regs, address, + get_mm_addr_key(mm, address)); + /* * We want to do this outside mmap_sem, because reading code around nip * can result in fault, which will cause a deadlock when called with @@ -490,7 +507,7 @@ retry: good_area: if (unlikely(access_error(is_write, is_exec, vma))) - return bad_area(regs, address); + return bad_access(regs, address); /* * If for any reason at all we couldn't handle the fault, @@ -498,6 +515,31 @@ good_area: * the fault. */ fault = handle_mm_fault(vma, address, flags); + +#ifdef CONFIG_PPC_MEM_KEYS + /* + * if the HPTE is not hashed, hardware will not detect + * a key fault. Lets check if we failed because of a + * software detected key fault. + */ + if (unlikely(fault & VM_FAULT_SIGSEGV) && + !arch_vma_access_permitted(vma, flags & FAULT_FLAG_WRITE, + is_exec, 0)) { + /* + * The PGD-PDT...PMD-PTE tree may not have been fully setup. + * Hence we cannot walk the tree to locate the PTE, to locate + * the key. Hence let's use vma_pkey() to get the key; instead + * of get_mm_addr_key(). + */ + int pkey = vma_pkey(vma); + + if (likely(pkey)) { + up_read(&mm->mmap_sem); + return bad_key_fault_exception(regs, address, pkey); + } + } +#endif /* CONFIG_PPC_MEM_KEYS */ + major |= fault & VM_FAULT_MAJOR; /* diff --git a/arch/powerpc/mm/hash_utils_64.c b/arch/powerpc/mm/hash_utils_64.c index 64e704eefad1..462c34e7b01d 100644 --- a/arch/powerpc/mm/hash_utils_64.c +++ b/arch/powerpc/mm/hash_utils_64.c @@ -36,6 +36,7 @@ #include <linux/memblock.h> #include <linux/context_tracking.h> #include <linux/libfdt.h> +#include <linux/pkeys.h> #include <asm/debugfs.h> #include <asm/processor.h> @@ -232,6 +233,7 @@ unsigned long htab_convert_pte_flags(unsigned long pteflags) */ rflags |= HPTE_R_M; + rflags |= pte_to_hpte_pkey_bits(pteflags); return rflags; } @@ -1579,6 +1581,30 @@ out_exit: local_irq_restore(flags); } +#ifdef CONFIG_PPC_MEM_KEYS +/* + * Return the protection key associated with the given address and the + * mm_struct. + */ +u16 get_mm_addr_key(struct mm_struct *mm, unsigned long address) +{ + pte_t *ptep; + u16 pkey = 0; + unsigned long flags; + + if (!mm || !mm->pgd) + return 0; + + local_irq_save(flags); + ptep = find_linux_pte(mm->pgd, address, NULL, NULL); + if (ptep) + pkey = pte_to_pkey_bits(pte_val(READ_ONCE(*ptep))); + local_irq_restore(flags); + + return pkey; +} +#endif /* CONFIG_PPC_MEM_KEYS */ + #ifdef CONFIG_PPC_TRANSACTIONAL_MEM static inline void tm_flush_hash_page(int local) { diff --git a/arch/powerpc/mm/hugetlbpage.c b/arch/powerpc/mm/hugetlbpage.c index f5d1008f8574..876da2bc1796 100644 --- a/arch/powerpc/mm/hugetlbpage.c +++ b/arch/powerpc/mm/hugetlbpage.c @@ -752,7 +752,7 @@ void flush_dcache_icache_hugepage(struct page *page) * So long as we atomically load page table pointers we are safe against teardown, * we can follow the address down to the the page and take a ref on it. * This function need to be called with interrupts disabled. We use this variant - * when we have MSR[EE] = 0 but the paca->soft_enabled = 1 + * when we have MSR[EE] = 0 but the paca->irq_soft_mask = IRQS_ENABLED */ pte_t *__find_linux_pte(pgd_t *pgdir, unsigned long ea, bool *is_thp, unsigned *hpage_shift) diff --git a/arch/powerpc/mm/mmu_context_book3s64.c b/arch/powerpc/mm/mmu_context_book3s64.c index 59c0766ae4e0..929d9ef7083f 100644 --- a/arch/powerpc/mm/mmu_context_book3s64.c +++ b/arch/powerpc/mm/mmu_context_book3s64.c @@ -16,6 +16,7 @@ #include <linux/string.h> #include <linux/types.h> #include <linux/mm.h> +#include <linux/pkeys.h> #include <linux/spinlock.h> #include <linux/idr.h> #include <linux/export.h> @@ -118,6 +119,7 @@ static int hash__init_new_context(struct mm_struct *mm) subpage_prot_init_new_context(mm); + pkey_mm_init(mm); return index; } diff --git a/arch/powerpc/mm/pkeys.c b/arch/powerpc/mm/pkeys.c new file mode 100644 index 000000000000..ba71c5481f42 --- /dev/null +++ b/arch/powerpc/mm/pkeys.c @@ -0,0 +1,468 @@ +// SPDX-License-Identifier: GPL-2.0+ +/* + * PowerPC Memory Protection Keys management + * + * Copyright 2017, Ram Pai, IBM Corporation. + */ + +#include <asm/mman.h> +#include <asm/setup.h> +#include <linux/pkeys.h> +#include <linux/of_device.h> + +DEFINE_STATIC_KEY_TRUE(pkey_disabled); +bool pkey_execute_disable_supported; +int pkeys_total; /* Total pkeys as per device tree */ +bool pkeys_devtree_defined; /* pkey property exported by device tree */ +u32 initial_allocation_mask; /* Bits set for reserved keys */ +u64 pkey_amr_uamor_mask; /* Bits in AMR/UMOR not to be touched */ +u64 pkey_iamr_mask; /* Bits in AMR not to be touched */ + +#define AMR_BITS_PER_PKEY 2 +#define AMR_RD_BIT 0x1UL +#define AMR_WR_BIT 0x2UL +#define IAMR_EX_BIT 0x1UL +#define PKEY_REG_BITS (sizeof(u64)*8) +#define pkeyshift(pkey) (PKEY_REG_BITS - ((pkey+1) * AMR_BITS_PER_PKEY)) + +static void scan_pkey_feature(void) +{ + u32 vals[2]; + struct device_node *cpu; + + cpu = of_find_node_by_type(NULL, "cpu"); + if (!cpu) + return; + + if (of_property_read_u32_array(cpu, + "ibm,processor-storage-keys", vals, 2)) + return; + + /* + * Since any pkey can be used for data or execute, we will just treat + * all keys as equal and track them as one entity. + */ + pkeys_total = be32_to_cpu(vals[0]); + pkeys_devtree_defined = true; +} + +static inline bool pkey_mmu_enabled(void) +{ + if (firmware_has_feature(FW_FEATURE_LPAR)) + return pkeys_total; + else + return cpu_has_feature(CPU_FTR_PKEY); +} + +int pkey_initialize(void) +{ + int os_reserved, i; + + /* + * We define PKEY_DISABLE_EXECUTE in addition to the arch-neutral + * generic defines for PKEY_DISABLE_ACCESS and PKEY_DISABLE_WRITE. + * Ensure that the bits a distinct. + */ + BUILD_BUG_ON(PKEY_DISABLE_EXECUTE & + (PKEY_DISABLE_ACCESS | PKEY_DISABLE_WRITE)); + + /* + * pkey_to_vmflag_bits() assumes that the pkey bits are contiguous + * in the vmaflag. Make sure that is really the case. + */ + BUILD_BUG_ON(__builtin_clzl(ARCH_VM_PKEY_FLAGS >> VM_PKEY_SHIFT) + + __builtin_popcountl(ARCH_VM_PKEY_FLAGS >> VM_PKEY_SHIFT) + != (sizeof(u64) * BITS_PER_BYTE)); + + /* scan the device tree for pkey feature */ + scan_pkey_feature(); + + /* + * Let's assume 32 pkeys on P8 bare metal, if its not defined by device + * tree. We make this exception since skiboot forgot to expose this + * property on power8. + */ + if (!pkeys_devtree_defined && !firmware_has_feature(FW_FEATURE_LPAR) && + cpu_has_feature(CPU_FTRS_POWER8)) + pkeys_total = 32; + + /* + * Adjust the upper limit, based on the number of bits supported by + * arch-neutral code. + */ + pkeys_total = min_t(int, pkeys_total, + (ARCH_VM_PKEY_FLAGS >> VM_PKEY_SHIFT)); + + if (!pkey_mmu_enabled() || radix_enabled() || !pkeys_total) + static_branch_enable(&pkey_disabled); + else + static_branch_disable(&pkey_disabled); + + if (static_branch_likely(&pkey_disabled)) + return 0; + + /* + * The device tree cannot be relied to indicate support for + * execute_disable support. Instead we use a PVR check. + */ + if (pvr_version_is(PVR_POWER7) || pvr_version_is(PVR_POWER7p)) + pkey_execute_disable_supported = false; + else + pkey_execute_disable_supported = true; + +#ifdef CONFIG_PPC_4K_PAGES + /* + * The OS can manage only 8 pkeys due to its inability to represent them + * in the Linux 4K PTE. + */ + os_reserved = pkeys_total - 8; +#else + os_reserved = 0; +#endif + /* + * Bits are in LE format. NOTE: 1, 0 are reserved. + * key 0 is the default key, which allows read/write/execute. + * key 1 is recommended not to be used. PowerISA(3.0) page 1015, + * programming note. + */ + initial_allocation_mask = ~0x0; + + /* register mask is in BE format */ + pkey_amr_uamor_mask = ~0x0ul; + pkey_iamr_mask = ~0x0ul; + + for (i = 2; i < (pkeys_total - os_reserved); i++) { + initial_allocation_mask &= ~(0x1 << i); + pkey_amr_uamor_mask &= ~(0x3ul << pkeyshift(i)); + pkey_iamr_mask &= ~(0x1ul << pkeyshift(i)); + } + return 0; +} + +arch_initcall(pkey_initialize); + +void pkey_mm_init(struct mm_struct *mm) +{ + if (static_branch_likely(&pkey_disabled)) + return; + mm_pkey_allocation_map(mm) = initial_allocation_mask; + /* -1 means unallocated or invalid */ + mm->context.execute_only_pkey = -1; +} + +static inline u64 read_amr(void) +{ + return mfspr(SPRN_AMR); +} + +static inline void write_amr(u64 value) +{ + mtspr(SPRN_AMR, value); +} + +static inline u64 read_iamr(void) +{ + if (!likely(pkey_execute_disable_supported)) + return 0x0UL; + + return mfspr(SPRN_IAMR); +} + +static inline void write_iamr(u64 value) +{ + if (!likely(pkey_execute_disable_supported)) + return; + + mtspr(SPRN_IAMR, value); +} + +static inline u64 read_uamor(void) +{ + return mfspr(SPRN_UAMOR); +} + +static inline void write_uamor(u64 value) +{ + mtspr(SPRN_UAMOR, value); +} + +static bool is_pkey_enabled(int pkey) +{ + u64 uamor = read_uamor(); + u64 pkey_bits = 0x3ul << pkeyshift(pkey); + u64 uamor_pkey_bits = (uamor & pkey_bits); + + /* + * Both the bits in UAMOR corresponding to the key should be set or + * reset. + */ + WARN_ON(uamor_pkey_bits && (uamor_pkey_bits != pkey_bits)); + return !!(uamor_pkey_bits); +} + +static inline void init_amr(int pkey, u8 init_bits) +{ + u64 new_amr_bits = (((u64)init_bits & 0x3UL) << pkeyshift(pkey)); + u64 old_amr = read_amr() & ~((u64)(0x3ul) << pkeyshift(pkey)); + + write_amr(old_amr | new_amr_bits); +} + +static inline void init_iamr(int pkey, u8 init_bits) +{ + u64 new_iamr_bits = (((u64)init_bits & 0x1UL) << pkeyshift(pkey)); + u64 old_iamr = read_iamr() & ~((u64)(0x1ul) << pkeyshift(pkey)); + + write_iamr(old_iamr | new_iamr_bits); +} + +static void pkey_status_change(int pkey, bool enable) +{ + u64 old_uamor; + + /* Reset the AMR and IAMR bits for this key */ + init_amr(pkey, 0x0); + init_iamr(pkey, 0x0); + + /* Enable/disable key */ + old_uamor = read_uamor(); + if (enable) + old_uamor |= (0x3ul << pkeyshift(pkey)); + else + old_uamor &= ~(0x3ul << pkeyshift(pkey)); + write_uamor(old_uamor); +} + +void __arch_activate_pkey(int pkey) +{ + pkey_status_change(pkey, true); +} + +void __arch_deactivate_pkey(int pkey) +{ + pkey_status_change(pkey, false); +} + +/* + * Set the access rights in AMR IAMR and UAMOR registers for @pkey to that + * specified in @init_val. + */ +int __arch_set_user_pkey_access(struct task_struct *tsk, int pkey, + unsigned long init_val) +{ + u64 new_amr_bits = 0x0ul; + u64 new_iamr_bits = 0x0ul; + + if (!is_pkey_enabled(pkey)) + return -EINVAL; + + if (init_val & PKEY_DISABLE_EXECUTE) { + if (!pkey_execute_disable_supported) + return -EINVAL; + new_iamr_bits |= IAMR_EX_BIT; + } + init_iamr(pkey, new_iamr_bits); + + /* Set the bits we need in AMR: */ + if (init_val & PKEY_DISABLE_ACCESS) + new_amr_bits |= AMR_RD_BIT | AMR_WR_BIT; + else if (init_val & PKEY_DISABLE_WRITE) + new_amr_bits |= AMR_WR_BIT; + + init_amr(pkey, new_amr_bits); + return 0; +} + +void thread_pkey_regs_save(struct thread_struct *thread) +{ + if (static_branch_likely(&pkey_disabled)) + return; + + /* + * TODO: Skip saving registers if @thread hasn't used any keys yet. + */ + thread->amr = read_amr(); + thread->iamr = read_iamr(); + thread->uamor = read_uamor(); +} + +void thread_pkey_regs_restore(struct thread_struct *new_thread, + struct thread_struct *old_thread) +{ + if (static_branch_likely(&pkey_disabled)) + return; + + /* + * TODO: Just set UAMOR to zero if @new_thread hasn't used any keys yet. + */ + if (old_thread->amr != new_thread->amr) + write_amr(new_thread->amr); + if (old_thread->iamr != new_thread->iamr) + write_iamr(new_thread->iamr); + if (old_thread->uamor != new_thread->uamor) + write_uamor(new_thread->uamor); +} + +void thread_pkey_regs_init(struct thread_struct *thread) +{ + if (static_branch_likely(&pkey_disabled)) + return; + + write_amr(read_amr() & pkey_amr_uamor_mask); + write_iamr(read_iamr() & pkey_iamr_mask); + write_uamor(read_uamor() & pkey_amr_uamor_mask); +} + +static inline bool pkey_allows_readwrite(int pkey) +{ + int pkey_shift = pkeyshift(pkey); + + if (!is_pkey_enabled(pkey)) + return true; + + return !(read_amr() & ((AMR_RD_BIT|AMR_WR_BIT) << pkey_shift)); +} + +int __execute_only_pkey(struct mm_struct *mm) +{ + bool need_to_set_mm_pkey = false; + int execute_only_pkey = mm->context.execute_only_pkey; + int ret; + + /* Do we need to assign a pkey for mm's execute-only maps? */ + if (execute_only_pkey == -1) { + /* Go allocate one to use, which might fail */ + execute_only_pkey = mm_pkey_alloc(mm); + if (execute_only_pkey < 0) + return -1; + need_to_set_mm_pkey = true; + } + + /* + * We do not want to go through the relatively costly dance to set AMR + * if we do not need to. Check it first and assume that if the + * execute-only pkey is readwrite-disabled than we do not have to set it + * ourselves. + */ + if (!need_to_set_mm_pkey && !pkey_allows_readwrite(execute_only_pkey)) + return execute_only_pkey; + + /* + * Set up AMR so that it denies access for everything other than + * execution. + */ + ret = __arch_set_user_pkey_access(current, execute_only_pkey, + PKEY_DISABLE_ACCESS | + PKEY_DISABLE_WRITE); + /* + * If the AMR-set operation failed somehow, just return 0 and + * effectively disable execute-only support. + */ + if (ret) { + mm_pkey_free(mm, execute_only_pkey); + return -1; + } + + /* We got one, store it and use it from here on out */ + if (need_to_set_mm_pkey) + mm->context.execute_only_pkey = execute_only_pkey; + return execute_only_pkey; +} + +static inline bool vma_is_pkey_exec_only(struct vm_area_struct *vma) +{ + /* Do this check first since the vm_flags should be hot */ + if ((vma->vm_flags & (VM_READ | VM_WRITE | VM_EXEC)) != VM_EXEC) + return false; + + return (vma_pkey(vma) == vma->vm_mm->context.execute_only_pkey); +} + +/* + * This should only be called for *plain* mprotect calls. + */ +int __arch_override_mprotect_pkey(struct vm_area_struct *vma, int prot, + int pkey) +{ + /* + * If the currently associated pkey is execute-only, but the requested + * protection requires read or write, move it back to the default pkey. + */ + if (vma_is_pkey_exec_only(vma) && (prot & (PROT_READ | PROT_WRITE))) + return 0; + + /* + * The requested protection is execute-only. Hence let's use an + * execute-only pkey. + */ + if (prot == PROT_EXEC) { + pkey = execute_only_pkey(vma->vm_mm); + if (pkey > 0) + return pkey; + } + + /* Nothing to override. */ + return vma_pkey(vma); +} + +static bool pkey_access_permitted(int pkey, bool write, bool execute) +{ + int pkey_shift; + u64 amr; + + if (!pkey) + return true; + + if (!is_pkey_enabled(pkey)) + return true; + + pkey_shift = pkeyshift(pkey); + if (execute && !(read_iamr() & (IAMR_EX_BIT << pkey_shift))) + return true; + + amr = read_amr(); /* Delay reading amr until absolutely needed */ + return ((!write && !(amr & (AMR_RD_BIT << pkey_shift))) || + (write && !(amr & (AMR_WR_BIT << pkey_shift)))); +} + +bool arch_pte_access_permitted(u64 pte, bool write, bool execute) +{ + if (static_branch_likely(&pkey_disabled)) + return true; + + return pkey_access_permitted(pte_to_pkey_bits(pte), write, execute); +} + +/* + * We only want to enforce protection keys on the current thread because we + * effectively have no access to AMR/IAMR for other threads or any way to tell + * which AMR/IAMR in a threaded process we could use. + * + * So do not enforce things if the VMA is not from the current mm, or if we are + * in a kernel thread. + */ +static inline bool vma_is_foreign(struct vm_area_struct *vma) +{ + if (!current->mm) + return true; + + /* if it is not our ->mm, it has to be foreign */ + if (current->mm != vma->vm_mm) + return true; + + return false; +} + +bool arch_vma_access_permitted(struct vm_area_struct *vma, bool write, + bool execute, bool foreign) +{ + if (static_branch_likely(&pkey_disabled)) + return true; + /* + * Do not enforce our key-permissions on a foreign vma. + */ + if (foreign || vma_is_foreign(vma)) + return true; + + return pkey_access_permitted(vma_pkey(vma), write, execute); +} diff --git a/arch/powerpc/mm/subpage-prot.c b/arch/powerpc/mm/subpage-prot.c index 781532d7bc4d..f14a07c2fb90 100644 --- a/arch/powerpc/mm/subpage-prot.c +++ b/arch/powerpc/mm/subpage-prot.c @@ -195,6 +195,9 @@ long sys_subpage_prot(unsigned long addr, unsigned long len, u32 __user *map) unsigned long next, limit; int err; + if (radix_enabled()) + return -ENOENT; + /* Check parameters */ if ((addr & ~PAGE_MASK) || (len & ~PAGE_MASK) || addr >= mm->task_size || len >= mm->task_size || diff --git a/arch/powerpc/perf/core-book3s.c b/arch/powerpc/perf/core-book3s.c index 9e3da168d54c..f89bbd54ecec 100644 --- a/arch/powerpc/perf/core-book3s.c +++ b/arch/powerpc/perf/core-book3s.c @@ -322,7 +322,7 @@ static inline void perf_read_regs(struct pt_regs *regs) */ static inline int perf_intr_is_nmi(struct pt_regs *regs) { - return !regs->softe; + return (regs->softe & IRQS_DISABLED); } /* @@ -410,8 +410,12 @@ static __u64 power_pmu_bhrb_to(u64 addr) int ret; __u64 target; - if (is_kernel_addr(addr)) - return branch_target((unsigned int *)addr); + if (is_kernel_addr(addr)) { + if (probe_kernel_read(&instr, (void *)addr, sizeof(instr))) + return 0; + + return branch_target(&instr); + } /* Userspace: need copy instruction here then translate it */ pagefault_disable(); @@ -1415,7 +1419,7 @@ static int collect_events(struct perf_event *group, int max_count, int n = 0; struct perf_event *event; - if (!is_software_event(group)) { + if (group->pmu->task_ctx_nr == perf_hw_context) { if (n >= max_count) return -1; ctrs[n] = group; @@ -1423,7 +1427,7 @@ static int collect_events(struct perf_event *group, int max_count, events[n++] = group->hw.config; } list_for_each_entry(event, &group->sibling_list, group_entry) { - if (!is_software_event(event) && + if (event->pmu->task_ctx_nr == perf_hw_context && event->state != PERF_EVENT_STATE_OFF) { if (n >= max_count) return -1; diff --git a/arch/powerpc/perf/imc-pmu.c b/arch/powerpc/perf/imc-pmu.c index 0ead3cd73caa..d7532e7b9ab5 100644 --- a/arch/powerpc/perf/imc-pmu.c +++ b/arch/powerpc/perf/imc-pmu.c @@ -40,7 +40,6 @@ static struct imc_pmu *core_imc_pmu; /* Thread IMC data structures and variables */ static DEFINE_PER_CPU(u64 *, thread_imc_mem); -static struct imc_pmu *thread_imc_pmu; static int thread_imc_mem_size; struct imc_pmu *imc_event_to_pmu(struct perf_event *event) @@ -117,17 +116,13 @@ static struct attribute *device_str_attr_create(const char *name, const char *st return &attr->attr.attr; } -struct imc_events *imc_parse_event(struct device_node *np, const char *scale, - const char *unit, const char *prefix, u32 base) +static int imc_parse_event(struct device_node *np, const char *scale, + const char *unit, const char *prefix, + u32 base, struct imc_events *event) { - struct imc_events *event; const char *s; u32 reg; - event = kzalloc(sizeof(struct imc_events), GFP_KERNEL); - if (!event) - return NULL; - if (of_property_read_u32(np, "reg", ®)) goto error; /* Add the base_reg value to the "reg" */ @@ -158,14 +153,32 @@ struct imc_events *imc_parse_event(struct device_node *np, const char *scale, goto error; } - return event; + return 0; error: kfree(event->unit); kfree(event->scale); kfree(event->name); - kfree(event); + return -EINVAL; +} + +/* + * imc_free_events: Function to cleanup the events list, having + * "nr_entries". + */ +static void imc_free_events(struct imc_events *events, int nr_entries) +{ + int i; + + /* Nothing to clean, return */ + if (!events) + return; + for (i = 0; i < nr_entries; i++) { + kfree(events[i].unit); + kfree(events[i].scale); + kfree(events[i].name); + } - return NULL; + kfree(events); } /* @@ -177,9 +190,8 @@ static int update_events_in_group(struct device_node *node, struct imc_pmu *pmu) struct attribute_group *attr_group; struct attribute **attrs, *dev_str; struct device_node *np, *pmu_events; - struct imc_events *ev; u32 handle, base_reg; - int i=0, j=0, ct; + int i = 0, j = 0, ct, ret; const char *prefix, *g_scale, *g_unit; const char *ev_val_str, *ev_scale_str, *ev_unit_str; @@ -217,15 +229,17 @@ static int update_events_in_group(struct device_node *node, struct imc_pmu *pmu) ct = 0; /* Parse the events and update the struct */ for_each_child_of_node(pmu_events, np) { - ev = imc_parse_event(np, g_scale, g_unit, prefix, base_reg); - if (ev) - pmu->events[ct++] = ev; + ret = imc_parse_event(np, g_scale, g_unit, prefix, base_reg, &pmu->events[ct]); + if (!ret) + ct++; } /* Allocate memory for attribute group */ attr_group = kzalloc(sizeof(*attr_group), GFP_KERNEL); - if (!attr_group) + if (!attr_group) { + imc_free_events(pmu->events, ct); return -ENOMEM; + } /* * Allocate memory for attributes. @@ -238,31 +252,31 @@ static int update_events_in_group(struct device_node *node, struct imc_pmu *pmu) attrs = kcalloc(((ct * 3) + 1), sizeof(struct attribute *), GFP_KERNEL); if (!attrs) { kfree(attr_group); - kfree(pmu->events); + imc_free_events(pmu->events, ct); return -ENOMEM; } attr_group->name = "events"; attr_group->attrs = attrs; do { - ev_val_str = kasprintf(GFP_KERNEL, "event=0x%x", pmu->events[i]->value); - dev_str = device_str_attr_create(pmu->events[i]->name, ev_val_str); + ev_val_str = kasprintf(GFP_KERNEL, "event=0x%x", pmu->events[i].value); + dev_str = device_str_attr_create(pmu->events[i].name, ev_val_str); if (!dev_str) continue; attrs[j++] = dev_str; - if (pmu->events[i]->scale) { - ev_scale_str = kasprintf(GFP_KERNEL, "%s.scale",pmu->events[i]->name); - dev_str = device_str_attr_create(ev_scale_str, pmu->events[i]->scale); + if (pmu->events[i].scale) { + ev_scale_str = kasprintf(GFP_KERNEL, "%s.scale", pmu->events[i].name); + dev_str = device_str_attr_create(ev_scale_str, pmu->events[i].scale); if (!dev_str) continue; attrs[j++] = dev_str; } - if (pmu->events[i]->unit) { - ev_unit_str = kasprintf(GFP_KERNEL, "%s.unit",pmu->events[i]->name); - dev_str = device_str_attr_create(ev_unit_str, pmu->events[i]->unit); + if (pmu->events[i].unit) { + ev_unit_str = kasprintf(GFP_KERNEL, "%s.unit", pmu->events[i].name); + dev_str = device_str_attr_create(ev_unit_str, pmu->events[i].unit); if (!dev_str) continue; @@ -273,7 +287,6 @@ static int update_events_in_group(struct device_node *node, struct imc_pmu *pmu) /* Save the event attribute */ pmu->attr_groups[IMC_EVENT_ATTR] = attr_group; - kfree(pmu->events); return 0; } @@ -310,6 +323,19 @@ static int ppc_nest_imc_cpu_offline(unsigned int cpu) return 0; /* + * Check whether nest_imc is registered. We could end up here if the + * cpuhotplug callback registration fails. i.e, callback invokes the + * offline path for all successfully registered nodes. At this stage, + * nest_imc pmu will not be registered and we should return here. + * + * We return with a zero since this is not an offline failure. And + * cpuhp_setup_state() returns the actual failure reason to the caller, + * which in turn will call the cleanup routine. + */ + if (!nest_pmus) + return 0; + + /* * Now that this cpu is one of the designated, * find a next cpu a) which is online and b) in same chip. */ @@ -598,7 +624,8 @@ static int ppc_core_imc_cpu_online(unsigned int cpu) static int ppc_core_imc_cpu_offline(unsigned int cpu) { - unsigned int ncpu, core_id; + unsigned int core_id; + int ncpu; struct imc_pmu_ref *ref; /* @@ -1158,6 +1185,15 @@ static void cleanup_all_thread_imc_memory(void) } } +/* Function to free the attr_groups which are dynamically allocated */ +static void imc_common_mem_free(struct imc_pmu *pmu_ptr) +{ + if (pmu_ptr->attr_groups[IMC_EVENT_ATTR]) + kfree(pmu_ptr->attr_groups[IMC_EVENT_ATTR]->attrs); + kfree(pmu_ptr->attr_groups[IMC_EVENT_ATTR]); + kfree(pmu_ptr); +} + /* * Common function to unregister cpu hotplug callback and * free the memory. @@ -1171,6 +1207,7 @@ static void imc_common_cpuhp_mem_free(struct imc_pmu *pmu_ptr) if (nest_pmus == 1) { cpuhp_remove_state(CPUHP_AP_PERF_POWERPC_NEST_IMC_ONLINE); kfree(nest_imc_refc); + kfree(per_nest_pmu_arr); } if (nest_pmus > 0) @@ -1189,14 +1226,6 @@ static void imc_common_cpuhp_mem_free(struct imc_pmu *pmu_ptr) cpuhp_remove_state(CPUHP_AP_PERF_POWERPC_THREAD_IMC_ONLINE); cleanup_all_thread_imc_memory(); } - - /* Only free the attr_groups which are dynamically allocated */ - if (pmu_ptr->attr_groups[IMC_EVENT_ATTR]) - kfree(pmu_ptr->attr_groups[IMC_EVENT_ATTR]->attrs); - kfree(pmu_ptr->attr_groups[IMC_EVENT_ATTR]); - kfree(pmu_ptr); - kfree(per_nest_pmu_arr); - return; } @@ -1245,8 +1274,10 @@ static int imc_mem_init(struct imc_pmu *pmu_ptr, struct device_node *parent, core_imc_refc = kcalloc(nr_cores, sizeof(struct imc_pmu_ref), GFP_KERNEL); - if (!core_imc_refc) + if (!core_imc_refc) { + kfree(pmu_ptr->mem_info); return -ENOMEM; + } core_imc_pmu = pmu_ptr; break; @@ -1259,11 +1290,12 @@ static int imc_mem_init(struct imc_pmu *pmu_ptr, struct device_node *parent, thread_imc_mem_size = pmu_ptr->counter_mem_size; for_each_online_cpu(cpu) { res = thread_imc_mem_alloc(cpu, pmu_ptr->counter_mem_size); - if (res) + if (res) { + cleanup_all_thread_imc_memory(); return res; + } } - thread_imc_pmu = pmu_ptr; break; default: return -EINVAL; @@ -1287,8 +1319,10 @@ int init_imc_pmu(struct device_node *parent, struct imc_pmu *pmu_ptr, int pmu_id int ret; ret = imc_mem_init(pmu_ptr, parent, pmu_idx); - if (ret) - goto err_free; + if (ret) { + imc_common_mem_free(pmu_ptr); + return ret; + } switch (pmu_ptr->domain) { case IMC_DOMAIN_NEST: @@ -1309,6 +1343,8 @@ int init_imc_pmu(struct device_node *parent, struct imc_pmu *pmu_ptr, int pmu_id ret = nest_pmu_cpumask_init(); if (ret) { mutex_unlock(&nest_init_lock); + kfree(nest_imc_refc); + kfree(per_nest_pmu_arr); goto err_free; } } @@ -1353,6 +1389,7 @@ int init_imc_pmu(struct device_node *parent, struct imc_pmu *pmu_ptr, int pmu_id return 0; err_free: + imc_common_mem_free(pmu_ptr); imc_common_cpuhp_mem_free(pmu_ptr); return ret; } diff --git a/arch/powerpc/platforms/cell/spufs/file.c b/arch/powerpc/platforms/cell/spufs/file.c index 5ffcdeb1eb17..94139135be9c 100644 --- a/arch/powerpc/platforms/cell/spufs/file.c +++ b/arch/powerpc/platforms/cell/spufs/file.c @@ -2375,8 +2375,8 @@ static int switch_log_sprint(struct spu_context *ctx, char *tbuf, int n) p = ctx->switch_log->log + ctx->switch_log->tail % SWITCH_LOG_BUFSIZE; - return snprintf(tbuf, n, "%u.%09u %d %u %u %llu\n", - (unsigned int) p->tstamp.tv_sec, + return snprintf(tbuf, n, "%llu.%09u %d %u %u %llu\n", + (unsigned long long) p->tstamp.tv_sec, (unsigned int) p->tstamp.tv_nsec, p->spu_id, (unsigned int) p->type, @@ -2499,7 +2499,7 @@ void spu_switch_log_notify(struct spu *spu, struct spu_context *ctx, struct switch_log_entry *p; p = ctx->switch_log->log + ctx->switch_log->head; - ktime_get_ts(&p->tstamp); + ktime_get_ts64(&p->tstamp); p->timebase = get_tb(); p->spu_id = spu ? spu->number : -1; p->type = type; diff --git a/arch/powerpc/platforms/cell/spufs/spufs.h b/arch/powerpc/platforms/cell/spufs/spufs.h index 5e59f80e95db..5d85c689c2e9 100644 --- a/arch/powerpc/platforms/cell/spufs/spufs.h +++ b/arch/powerpc/platforms/cell/spufs/spufs.h @@ -69,7 +69,7 @@ struct switch_log { unsigned long head; unsigned long tail; struct switch_log_entry { - struct timespec tstamp; + struct timespec64 tstamp; s32 spu_id; u32 type; u32 val; diff --git a/arch/powerpc/platforms/powernv/opal-imc.c b/arch/powerpc/platforms/powernv/opal-imc.c index 465ea105b771..dd4c9b8b8a81 100644 --- a/arch/powerpc/platforms/powernv/opal-imc.c +++ b/arch/powerpc/platforms/powernv/opal-imc.c @@ -21,6 +21,78 @@ #include <asm/io.h> #include <asm/imc-pmu.h> #include <asm/cputhreads.h> +#include <asm/debugfs.h> + +static struct dentry *imc_debugfs_parent; + +/* Helpers to export imc command and mode via debugfs */ +static int imc_mem_get(void *data, u64 *val) +{ + *val = cpu_to_be64(*(u64 *)data); + return 0; +} + +static int imc_mem_set(void *data, u64 val) +{ + *(u64 *)data = cpu_to_be64(val); + return 0; +} +DEFINE_DEBUGFS_ATTRIBUTE(fops_imc_x64, imc_mem_get, imc_mem_set, "0x%016llx\n"); + +static struct dentry *imc_debugfs_create_x64(const char *name, umode_t mode, + struct dentry *parent, u64 *value) +{ + return debugfs_create_file_unsafe(name, mode, parent, + value, &fops_imc_x64); +} + +/* + * export_imc_mode_and_cmd: Create a debugfs interface + * for imc_cmd and imc_mode + * for each node in the system. + * imc_mode and imc_cmd can be changed by echo into + * this interface. + */ +static void export_imc_mode_and_cmd(struct device_node *node, + struct imc_pmu *pmu_ptr) +{ + static u64 loc, *imc_mode_addr, *imc_cmd_addr; + int chip = 0, nid; + char mode[16], cmd[16]; + u32 cb_offset; + + imc_debugfs_parent = debugfs_create_dir("imc", powerpc_debugfs_root); + + /* + * Return here, either because 'imc' directory already exists, + * Or failed to create a new one. + */ + if (!imc_debugfs_parent) + return; + + if (of_property_read_u32(node, "cb_offset", &cb_offset)) + cb_offset = IMC_CNTL_BLK_OFFSET; + + for_each_node(nid) { + loc = (u64)(pmu_ptr->mem_info[chip].vbase) + cb_offset; + imc_mode_addr = (u64 *)(loc + IMC_CNTL_BLK_MODE_OFFSET); + sprintf(mode, "imc_mode_%d", nid); + if (!imc_debugfs_create_x64(mode, 0600, imc_debugfs_parent, + imc_mode_addr)) + goto err; + + imc_cmd_addr = (u64 *)(loc + IMC_CNTL_BLK_CMD_OFFSET); + sprintf(cmd, "imc_cmd_%d", nid); + if (!imc_debugfs_create_x64(cmd, 0600, imc_debugfs_parent, + imc_cmd_addr)) + goto err; + chip++; + } + return; + +err: + debugfs_remove_recursive(imc_debugfs_parent); +} /* * imc_get_mem_addr_nest: Function to get nest counter memory region @@ -65,6 +137,7 @@ static int imc_get_mem_addr_nest(struct device_node *node, } pmu_ptr->imc_counter_mmaped = true; + export_imc_mode_and_cmd(node, pmu_ptr); kfree(base_addr_arr); kfree(chipid_arr); return 0; @@ -213,6 +286,10 @@ static int opal_imc_counters_probe(struct platform_device *pdev) } } + /* If none of the nest units are registered, remove debugfs interface */ + if (pmu_count == 0) + debugfs_remove_recursive(imc_debugfs_parent); + return 0; } diff --git a/arch/powerpc/platforms/powernv/pci-ioda.c b/arch/powerpc/platforms/powernv/pci-ioda.c index b9146d5aef81..1069f9cb273a 100644 --- a/arch/powerpc/platforms/powernv/pci-ioda.c +++ b/arch/powerpc/platforms/powernv/pci-ioda.c @@ -2572,7 +2572,6 @@ static unsigned long pnv_pci_ioda2_get_table_size(__u32 page_shift, unsigned long direct_table_size; if (!levels || (levels > POWERNV_IOMMU_MAX_LEVELS) || - (window_size > memory_hotplug_max()) || !is_power_of_2(window_size)) return 0; diff --git a/arch/powerpc/platforms/powernv/setup.c b/arch/powerpc/platforms/powernv/setup.c index 1edfbc1e40f4..4fb21e17504a 100644 --- a/arch/powerpc/platforms/powernv/setup.c +++ b/arch/powerpc/platforms/powernv/setup.c @@ -37,13 +37,62 @@ #include <asm/kexec.h> #include <asm/smp.h> #include <asm/tm.h> +#include <asm/setup.h> #include "powernv.h" +static void pnv_setup_rfi_flush(void) +{ + struct device_node *np, *fw_features; + enum l1d_flush_type type; + int enable; + + /* Default to fallback in case fw-features are not available */ + type = L1D_FLUSH_FALLBACK; + enable = 1; + + np = of_find_node_by_name(NULL, "ibm,opal"); + fw_features = of_get_child_by_name(np, "fw-features"); + of_node_put(np); + + if (fw_features) { + np = of_get_child_by_name(fw_features, "inst-l1d-flush-trig2"); + if (np && of_property_read_bool(np, "enabled")) + type = L1D_FLUSH_MTTRIG; + + of_node_put(np); + + np = of_get_child_by_name(fw_features, "inst-l1d-flush-ori30,30,0"); + if (np && of_property_read_bool(np, "enabled")) + type = L1D_FLUSH_ORI; + + of_node_put(np); + + /* Enable unless firmware says NOT to */ + enable = 2; + np = of_get_child_by_name(fw_features, "needs-l1d-flush-msr-hv-1-to-0"); + if (np && of_property_read_bool(np, "disabled")) + enable--; + + of_node_put(np); + + np = of_get_child_by_name(fw_features, "needs-l1d-flush-msr-pr-0-to-1"); + if (np && of_property_read_bool(np, "disabled")) + enable--; + + of_node_put(np); + of_node_put(fw_features); + } + + setup_rfi_flush(type, enable > 0); +} + static void __init pnv_setup_arch(void) { set_arch_panic_timeout(10, ARCH_PANIC_TIMEOUT); + pnv_setup_rfi_flush(); + /* Initialize SMP */ pnv_smp_init(); diff --git a/arch/powerpc/platforms/ps3/setup.c b/arch/powerpc/platforms/ps3/setup.c index 9dabea6e1443..6244bc849469 100644 --- a/arch/powerpc/platforms/ps3/setup.c +++ b/arch/powerpc/platforms/ps3/setup.c @@ -104,6 +104,20 @@ static void __noreturn ps3_halt(void) ps3_sys_manager_halt(); /* never returns */ } +static void ps3_panic(char *str) +{ + DBG("%s:%d %s\n", __func__, __LINE__, str); + + smp_send_stop(); + printk("\n"); + printk(" System does not reboot automatically.\n"); + printk(" Please press POWER button.\n"); + printk("\n"); + + while(1) + lv1_pause(1); +} + #if defined(CONFIG_FB_PS3) || defined(CONFIG_FB_PS3_MODULE) || \ defined(CONFIG_PS3_FLASH) || defined(CONFIG_PS3_FLASH_MODULE) static void __init prealloc(struct ps3_prealloc *p) @@ -255,6 +269,7 @@ define_machine(ps3) { .probe = ps3_probe, .setup_arch = ps3_setup_arch, .init_IRQ = ps3_init_IRQ, + .panic = ps3_panic, .get_boot_time = ps3_get_boot_time, .set_dabr = ps3_set_dabr, .calibrate_decr = ps3_calibrate_decr, diff --git a/arch/powerpc/platforms/pseries/dlpar.c b/arch/powerpc/platforms/pseries/dlpar.c index 6e35780c5962..a0b20c03f078 100644 --- a/arch/powerpc/platforms/pseries/dlpar.c +++ b/arch/powerpc/platforms/pseries/dlpar.c @@ -574,11 +574,26 @@ static ssize_t dlpar_show(struct class *class, struct class_attribute *attr, static CLASS_ATTR_RW(dlpar); -static int __init pseries_dlpar_init(void) +int __init dlpar_workqueue_init(void) { + if (pseries_hp_wq) + return 0; + pseries_hp_wq = alloc_workqueue("pseries hotplug workqueue", - WQ_UNBOUND, 1); + WQ_UNBOUND, 1); + + return pseries_hp_wq ? 0 : -ENOMEM; +} + +static int __init dlpar_sysfs_init(void) +{ + int rc; + + rc = dlpar_workqueue_init(); + if (rc) + return rc; + return sysfs_create_file(kernel_kobj, &class_attr_dlpar.attr); } -machine_device_initcall(pseries, pseries_dlpar_init); +machine_device_initcall(pseries, dlpar_sysfs_init); diff --git a/arch/powerpc/platforms/pseries/firmware.c b/arch/powerpc/platforms/pseries/firmware.c index aac3ea2911b2..a3bbeb43689e 100644 --- a/arch/powerpc/platforms/pseries/firmware.c +++ b/arch/powerpc/platforms/pseries/firmware.c @@ -115,6 +115,7 @@ vec5_fw_features_table[] = { {FW_FEATURE_TYPE1_AFFINITY, OV5_TYPE1_AFFINITY}, {FW_FEATURE_PRRN, OV5_PRRN}, {FW_FEATURE_DRMEM_V2, OV5_DRMEM_V2}, + {FW_FEATURE_DRC_INFO, OV5_DRC_INFO}, }; static void __init fw_vec5_feature_init(const char *vec5, unsigned long len) diff --git a/arch/powerpc/platforms/pseries/of_helpers.c b/arch/powerpc/platforms/pseries/of_helpers.c index 7e75101fa522..6df192f38f80 100644 --- a/arch/powerpc/platforms/pseries/of_helpers.c +++ b/arch/powerpc/platforms/pseries/of_helpers.c @@ -3,6 +3,7 @@ #include <linux/err.h> #include <linux/slab.h> #include <linux/of.h> +#include <asm/prom.h> #include "of_helpers.h" @@ -37,3 +38,62 @@ struct device_node *pseries_of_derive_parent(const char *path) kfree(parent_path); return parent ? parent : ERR_PTR(-EINVAL); } + + +/* Helper Routines to convert between drc_index to cpu numbers */ + +int of_read_drc_info_cell(struct property **prop, const __be32 **curval, + struct of_drc_info *data) +{ + const char *p; + const __be32 *p2; + + if (!data) + return -EINVAL; + + /* Get drc-type:encode-string */ + p = data->drc_type = (char*) (*curval); + p = of_prop_next_string(*prop, p); + if (!p) + return -EINVAL; + + /* Get drc-name-prefix:encode-string */ + data->drc_name_prefix = (char *)p; + p = of_prop_next_string(*prop, p); + if (!p) + return -EINVAL; + + /* Get drc-index-start:encode-int */ + p2 = (const __be32 *)p; + p2 = of_prop_next_u32(*prop, p2, &data->drc_index_start); + if (!p2) + return -EINVAL; + + /* Get drc-name-suffix-start:encode-int */ + p2 = of_prop_next_u32(*prop, p2, &data->drc_name_suffix_start); + if (!p2) + return -EINVAL; + + /* Get number-sequential-elements:encode-int */ + p2 = of_prop_next_u32(*prop, p2, &data->num_sequential_elems); + if (!p2) + return -EINVAL; + + /* Get sequential-increment:encode-int */ + p2 = of_prop_next_u32(*prop, p2, &data->sequential_inc); + if (!p2) + return -EINVAL; + + /* Get drc-power-domain:encode-int */ + p2 = of_prop_next_u32(*prop, p2, &data->drc_power_domain); + if (!p2) + return -EINVAL; + + /* Should now know end of current entry */ + (*curval) = (void *)p2; + data->last_drc_index = data->drc_index_start + + ((data->num_sequential_elems - 1) * data->sequential_inc); + + return 0; +} +EXPORT_SYMBOL(of_read_drc_info_cell); diff --git a/arch/powerpc/platforms/pseries/pseries.h b/arch/powerpc/platforms/pseries/pseries.h index 4470a3194311..1ae1d9f4dbe9 100644 --- a/arch/powerpc/platforms/pseries/pseries.h +++ b/arch/powerpc/platforms/pseries/pseries.h @@ -98,4 +98,6 @@ static inline unsigned long cmo_get_page_size(void) return CMO_PageSize; } +int dlpar_workqueue_init(void); + #endif /* _PSERIES_PSERIES_H */ diff --git a/arch/powerpc/platforms/pseries/pseries_energy.c b/arch/powerpc/platforms/pseries/pseries_energy.c index 35c891aabef0..6ed22127391b 100644 --- a/arch/powerpc/platforms/pseries/pseries_energy.c +++ b/arch/powerpc/platforms/pseries/pseries_energy.c @@ -22,6 +22,7 @@ #include <asm/page.h> #include <asm/hvcall.h> #include <asm/firmware.h> +#include <asm/prom.h> #define MODULE_VERS "1.0" @@ -38,26 +39,58 @@ static int sysfs_entries; static u32 cpu_to_drc_index(int cpu) { struct device_node *dn = NULL; - const int *indexes; - int i; + int thread_index; int rc = 1; u32 ret = 0; dn = of_find_node_by_path("/cpus"); if (dn == NULL) goto err; - indexes = of_get_property(dn, "ibm,drc-indexes", NULL); - if (indexes == NULL) - goto err_of_node_put; + /* Convert logical cpu number to core number */ - i = cpu_core_index_of_thread(cpu); - /* - * The first element indexes[0] is the number of drc_indexes - * returned in the list. Hence i+1 will get the drc_index - * corresponding to core number i. - */ - WARN_ON(i > indexes[0]); - ret = indexes[i + 1]; + thread_index = cpu_core_index_of_thread(cpu); + + if (firmware_has_feature(FW_FEATURE_DRC_INFO)) { + struct property *info = NULL; + struct of_drc_info drc; + int j; + u32 num_set_entries; + const __be32 *value; + + info = of_find_property(dn, "ibm,drc-info", NULL); + if (info == NULL) + goto err_of_node_put; + + value = of_prop_next_u32(info, NULL, &num_set_entries); + if (!value) + goto err_of_node_put; + + for (j = 0; j < num_set_entries; j++) { + + of_read_drc_info_cell(&info, &value, &drc); + if (strncmp(drc.drc_type, "CPU", 3)) + goto err; + + if (thread_index < drc.last_drc_index) + break; + } + + ret = drc.drc_index_start + (thread_index * drc.sequential_inc); + } else { + const __be32 *indexes; + + indexes = of_get_property(dn, "ibm,drc-indexes", NULL); + if (indexes == NULL) + goto err_of_node_put; + + /* + * The first element indexes[0] is the number of drc_indexes + * returned in the list. Hence thread_index+1 will get the + * drc_index corresponding to core number thread_index. + */ + ret = indexes[thread_index + 1]; + } + rc = 0; err_of_node_put: @@ -72,34 +105,71 @@ static int drc_index_to_cpu(u32 drc_index) { struct device_node *dn = NULL; const int *indexes; - int i, cpu = 0; + int thread_index = 0, cpu = 0; int rc = 1; dn = of_find_node_by_path("/cpus"); if (dn == NULL) goto err; - indexes = of_get_property(dn, "ibm,drc-indexes", NULL); - if (indexes == NULL) - goto err_of_node_put; - /* - * First element in the array is the number of drc_indexes - * returned. Search through the list to find the matching - * drc_index and get the core number - */ - for (i = 0; i < indexes[0]; i++) { - if (indexes[i + 1] == drc_index) + + if (firmware_has_feature(FW_FEATURE_DRC_INFO)) { + struct property *info = NULL; + struct of_drc_info drc; + int j; + u32 num_set_entries; + const __be32 *value; + + info = of_find_property(dn, "ibm,drc-info", NULL); + if (info == NULL) + goto err_of_node_put; + + value = of_prop_next_u32(info, NULL, &num_set_entries); + if (!value) + goto err_of_node_put; + + for (j = 0; j < num_set_entries; j++) { + + of_read_drc_info_cell(&info, &value, &drc); + if (strncmp(drc.drc_type, "CPU", 3)) + goto err; + + if (drc_index > drc.last_drc_index) { + cpu += drc.num_sequential_elems; + continue; + } + cpu += ((drc_index - drc.drc_index_start) / + drc.sequential_inc); + + thread_index = cpu_first_thread_of_core(cpu); + rc = 0; break; + } + } else { + unsigned long int i; + + indexes = of_get_property(dn, "ibm,drc-indexes", NULL); + if (indexes == NULL) + goto err_of_node_put; + /* + * First element in the array is the number of drc_indexes + * returned. Search through the list to find the matching + * drc_index and get the core number + */ + for (i = 0; i < indexes[0]; i++) { + if (indexes[i + 1] == drc_index) + break; + } + /* Convert core number to logical cpu number */ + thread_index = cpu_first_thread_of_core(i); + rc = 0; } - /* Convert core number to logical cpu number */ - cpu = cpu_first_thread_of_core(i); - rc = 0; err_of_node_put: of_node_put(dn); err: if (rc) printk(KERN_WARNING "drc_index_to_cpu(%d) failed", drc_index); - return cpu; + return thread_index; } /* diff --git a/arch/powerpc/platforms/pseries/ras.c b/arch/powerpc/platforms/pseries/ras.c index 4923ffe230cf..81d8614e7379 100644 --- a/arch/powerpc/platforms/pseries/ras.c +++ b/arch/powerpc/platforms/pseries/ras.c @@ -69,7 +69,8 @@ static int __init init_ras_IRQ(void) /* Hotplug Events */ np = of_find_node_by_path("/event-sources/hot-plug-events"); if (np != NULL) { - request_event_sources_irqs(np, ras_hotplug_interrupt, + if (dlpar_workqueue_init() == 0) + request_event_sources_irqs(np, ras_hotplug_interrupt, "RAS_HOTPLUG"); of_node_put(np); } diff --git a/arch/powerpc/platforms/pseries/setup.c b/arch/powerpc/platforms/pseries/setup.c index 1d6e2de2445c..db76963e693d 100644 --- a/arch/powerpc/platforms/pseries/setup.c +++ b/arch/powerpc/platforms/pseries/setup.c @@ -459,6 +459,39 @@ static void __init find_and_init_phbs(void) of_pci_check_probe_only(); } +static void pseries_setup_rfi_flush(void) +{ + struct h_cpu_char_result result; + enum l1d_flush_type types; + bool enable; + long rc; + + /* Enable by default */ + enable = true; + + rc = plpar_get_cpu_characteristics(&result); + if (rc == H_SUCCESS) { + types = L1D_FLUSH_NONE; + + if (result.character & H_CPU_CHAR_L1D_FLUSH_TRIG2) + types |= L1D_FLUSH_MTTRIG; + if (result.character & H_CPU_CHAR_L1D_FLUSH_ORI30) + types |= L1D_FLUSH_ORI; + + /* Use fallback if nothing set in hcall */ + if (types == L1D_FLUSH_NONE) + types = L1D_FLUSH_FALLBACK; + + if (!(result.behaviour & H_CPU_BEHAV_L1D_FLUSH_PR)) + enable = false; + } else { + /* Default to fallback if case hcall is not available */ + types = L1D_FLUSH_FALLBACK; + } + + setup_rfi_flush(types, enable); +} + static void __init pSeries_setup_arch(void) { set_arch_panic_timeout(10, ARCH_PANIC_TIMEOUT); @@ -476,6 +509,8 @@ static void __init pSeries_setup_arch(void) fwnmi_init(); + pseries_setup_rfi_flush(); + /* By default, only probe PCI (can be overridden by rtas_pci) */ pci_add_flags(PCI_PROBE_ONLY); @@ -726,6 +761,7 @@ define_machine(pseries) { .pcibios_fixup = pSeries_final_fixup, .restart = rtas_restart, .halt = rtas_halt, + .panic = rtas_os_term, .get_boot_time = rtas_get_boot_time, .get_rtc_time = rtas_get_rtc_time, .set_rtc_time = rtas_set_rtc_time, diff --git a/arch/powerpc/sysdev/fsl_mpic_timer_wakeup.c b/arch/powerpc/sysdev/fsl_mpic_timer_wakeup.c index 1707bf04dec6..94278e8af192 100644 --- a/arch/powerpc/sysdev/fsl_mpic_timer_wakeup.c +++ b/arch/powerpc/sysdev/fsl_mpic_timer_wakeup.c @@ -56,17 +56,16 @@ static ssize_t fsl_timer_wakeup_show(struct device *dev, struct device_attribute *attr, char *buf) { - struct timeval interval; - int val = 0; + time64_t interval = 0; mutex_lock(&sysfs_lock); if (fsl_wakeup->timer) { mpic_get_remain_time(fsl_wakeup->timer, &interval); - val = interval.tv_sec + 1; + interval++; } mutex_unlock(&sysfs_lock); - return sprintf(buf, "%d\n", val); + return sprintf(buf, "%lld\n", interval); } static ssize_t fsl_timer_wakeup_store(struct device *dev, @@ -74,11 +73,10 @@ static ssize_t fsl_timer_wakeup_store(struct device *dev, const char *buf, size_t count) { - struct timeval interval; + time64_t interval; int ret; - interval.tv_usec = 0; - if (kstrtol(buf, 0, &interval.tv_sec)) + if (kstrtoll(buf, 0, &interval)) return -EINVAL; mutex_lock(&sysfs_lock); @@ -89,13 +87,13 @@ static ssize_t fsl_timer_wakeup_store(struct device *dev, fsl_wakeup->timer = NULL; } - if (!interval.tv_sec) { + if (!interval) { mutex_unlock(&sysfs_lock); return count; } fsl_wakeup->timer = mpic_request_timer(fsl_mpic_timer_irq, - fsl_wakeup, &interval); + fsl_wakeup, interval); if (!fsl_wakeup->timer) { mutex_unlock(&sysfs_lock); return -EINVAL; diff --git a/arch/powerpc/sysdev/mpic_timer.c b/arch/powerpc/sysdev/mpic_timer.c index a418579591be..87e7c42777a8 100644 --- a/arch/powerpc/sysdev/mpic_timer.c +++ b/arch/powerpc/sysdev/mpic_timer.c @@ -47,9 +47,6 @@ #define MAX_TICKS_CASCADE (~0U) #define TIMER_OFFSET(num) (1 << (TIMERS_PER_GROUP - 1 - num)) -/* tv_usec should be less than ONE_SECOND, otherwise use tv_sec */ -#define ONE_SECOND 1000000 - struct timer_regs { u32 gtccr; u32 res0[3]; @@ -90,51 +87,23 @@ static struct cascade_priv cascade_timer[] = { static LIST_HEAD(timer_group_list); static void convert_ticks_to_time(struct timer_group_priv *priv, - const u64 ticks, struct timeval *time) + const u64 ticks, time64_t *time) { - u64 tmp_sec; - - time->tv_sec = (__kernel_time_t)div_u64(ticks, priv->timerfreq); - tmp_sec = (u64)time->tv_sec * (u64)priv->timerfreq; - - time->tv_usec = 0; - - if (tmp_sec <= ticks) - time->tv_usec = (__kernel_suseconds_t) - div_u64((ticks - tmp_sec) * 1000000, priv->timerfreq); - - return; + *time = (u64)div_u64(ticks, priv->timerfreq); } /* the time set by the user is converted to "ticks" */ static int convert_time_to_ticks(struct timer_group_priv *priv, - const struct timeval *time, u64 *ticks) + time64_t time, u64 *ticks) { u64 max_value; /* prevent u64 overflow */ - u64 tmp = 0; - - u64 tmp_sec; - u64 tmp_ms; - u64 tmp_us; max_value = div_u64(ULLONG_MAX, priv->timerfreq); - if (time->tv_sec > max_value || - (time->tv_sec == max_value && time->tv_usec > 0)) + if (time > max_value) return -EINVAL; - tmp_sec = (u64)time->tv_sec * (u64)priv->timerfreq; - tmp += tmp_sec; - - tmp_ms = time->tv_usec / 1000; - tmp_ms = div_u64((u64)tmp_ms * (u64)priv->timerfreq, 1000); - tmp += tmp_ms; - - tmp_us = time->tv_usec % 1000; - tmp_us = div_u64((u64)tmp_us * (u64)priv->timerfreq, 1000000); - tmp += tmp_us; - - *ticks = tmp; + *ticks = (u64)time * (u64)priv->timerfreq; return 0; } @@ -223,7 +192,7 @@ static struct mpic_timer *get_cascade_timer(struct timer_group_priv *priv, return allocated_timer; } -static struct mpic_timer *get_timer(const struct timeval *time) +static struct mpic_timer *get_timer(time64_t time) { struct timer_group_priv *priv; struct mpic_timer *timer; @@ -277,7 +246,7 @@ static struct mpic_timer *get_timer(const struct timeval *time) * @handle: the timer to be started. * * It will do ->fn(->dev) callback from the hardware interrupt at - * the ->timeval point in the future. + * the 'time64_t' point in the future. */ void mpic_start_timer(struct mpic_timer *handle) { @@ -319,7 +288,7 @@ EXPORT_SYMBOL(mpic_stop_timer); * * Query timer remaining time. */ -void mpic_get_remain_time(struct mpic_timer *handle, struct timeval *time) +void mpic_get_remain_time(struct mpic_timer *handle, time64_t *time) { struct timer_group_priv *priv = container_of(handle, struct timer_group_priv, timer[handle->num]); @@ -391,7 +360,7 @@ EXPORT_SYMBOL(mpic_free_timer); * else "handle" on success. */ struct mpic_timer *mpic_request_timer(irq_handler_t fn, void *dev, - const struct timeval *time) + time64_t time) { struct mpic_timer *allocated_timer; int ret; @@ -399,11 +368,7 @@ struct mpic_timer *mpic_request_timer(irq_handler_t fn, void *dev, if (list_empty(&timer_group_list)) return NULL; - if (!(time->tv_sec + time->tv_usec) || - time->tv_sec < 0 || time->tv_usec < 0) - return NULL; - - if (time->tv_usec > ONE_SECOND) + if (time < 0) return NULL; allocated_timer = get_timer(time); diff --git a/arch/powerpc/sysdev/xive/common.c b/arch/powerpc/sysdev/xive/common.c index 838ebdbfe4c5..40c06110821c 100644 --- a/arch/powerpc/sysdev/xive/common.c +++ b/arch/powerpc/sysdev/xive/common.c @@ -367,7 +367,8 @@ static void xive_irq_eoi(struct irq_data *d) * EOI the source if it hasn't been disabled and hasn't * been passed-through to a KVM guest */ - if (!irqd_irq_disabled(d) && !irqd_is_forwarded_to_vcpu(d)) + if (!irqd_irq_disabled(d) && !irqd_is_forwarded_to_vcpu(d) && + !(xd->flags & XIVE_IRQ_NO_EOI)) xive_do_source_eoi(irqd_to_hwirq(d), xd); /* diff --git a/arch/powerpc/xmon/xmon.c b/arch/powerpc/xmon/xmon.c index 1b2d8cb49abb..01d9a2dcff20 100644 --- a/arch/powerpc/xmon/xmon.c +++ b/arch/powerpc/xmon/xmon.c @@ -1590,7 +1590,7 @@ static void print_bug_trap(struct pt_regs *regs) printf("kernel BUG at %s:%u!\n", bug->file, bug->line); #else - printf("kernel BUG at %p!\n", (void *)bug->bug_addr); + printf("kernel BUG at %px!\n", (void *)bug->bug_addr); #endif #endif /* CONFIG_BUG */ } @@ -1623,7 +1623,7 @@ static void excprint(struct pt_regs *fp) printf(" current = 0x%lx\n", current); #ifdef CONFIG_PPC64 printf(" paca = 0x%lx\t softe: %d\t irq_happened: 0x%02x\n", - local_paca, local_paca->soft_enabled, local_paca->irq_happened); + local_paca, local_paca->irq_soft_mask, local_paca->irq_happened); #endif if (current) { printf(" pid = %ld, comm = %s\n", @@ -2329,7 +2329,7 @@ static void dump_one_paca(int cpu) p = &paca[cpu]; - printf("paca for cpu 0x%x @ %p:\n", cpu, p); + printf("paca for cpu 0x%x @ %px:\n", cpu, p); printf(" %-*s = %s\n", 20, "possible", cpu_possible(cpu) ? "yes" : "no"); printf(" %-*s = %s\n", 20, "present", cpu_present(cpu) ? "yes" : "no"); @@ -2344,10 +2344,10 @@ static void dump_one_paca(int cpu) DUMP(p, kernel_toc, "lx"); DUMP(p, kernelbase, "lx"); DUMP(p, kernel_msr, "lx"); - DUMP(p, emergency_sp, "p"); + DUMP(p, emergency_sp, "px"); #ifdef CONFIG_PPC_BOOK3S_64 - DUMP(p, nmi_emergency_sp, "p"); - DUMP(p, mc_emergency_sp, "p"); + DUMP(p, nmi_emergency_sp, "px"); + DUMP(p, mc_emergency_sp, "px"); DUMP(p, in_nmi, "x"); DUMP(p, in_mce, "x"); DUMP(p, hmi_event_available, "x"); @@ -2375,23 +2375,27 @@ static void dump_one_paca(int cpu) DUMP(p, slb_cache_ptr, "x"); for (i = 0; i < SLB_CACHE_ENTRIES; i++) printf(" slb_cache[%d]: = 0x%016lx\n", i, p->slb_cache[i]); + + DUMP(p, rfi_flush_fallback_area, "px"); + DUMP(p, l1d_flush_congruence, "llx"); + DUMP(p, l1d_flush_sets, "llx"); #endif DUMP(p, dscr_default, "llx"); #ifdef CONFIG_PPC_BOOK3E - DUMP(p, pgd, "p"); - DUMP(p, kernel_pgd, "p"); - DUMP(p, tcd_ptr, "p"); - DUMP(p, mc_kstack, "p"); - DUMP(p, crit_kstack, "p"); - DUMP(p, dbg_kstack, "p"); + DUMP(p, pgd, "px"); + DUMP(p, kernel_pgd, "px"); + DUMP(p, tcd_ptr, "px"); + DUMP(p, mc_kstack, "px"); + DUMP(p, crit_kstack, "px"); + DUMP(p, dbg_kstack, "px"); #endif - DUMP(p, __current, "p"); + DUMP(p, __current, "px"); DUMP(p, kstack, "lx"); printf(" kstack_base = 0x%016lx\n", p->kstack & ~(THREAD_SIZE - 1)); DUMP(p, stab_rr, "lx"); DUMP(p, saved_r1, "lx"); DUMP(p, trap_save, "x"); - DUMP(p, soft_enabled, "x"); + DUMP(p, irq_soft_mask, "x"); DUMP(p, irq_happened, "x"); DUMP(p, io_sync, "x"); DUMP(p, irq_work_pending, "x"); @@ -2403,7 +2407,7 @@ static void dump_one_paca(int cpu) #endif #ifdef CONFIG_PPC_POWERNV - DUMP(p, core_idle_state_ptr, "p"); + DUMP(p, core_idle_state_ptr, "px"); DUMP(p, thread_idle_state, "x"); DUMP(p, thread_mask, "x"); DUMP(p, subcore_sibling_mask, "x"); @@ -2945,7 +2949,7 @@ static void show_task(struct task_struct *tsk) (tsk->exit_state & EXIT_DEAD) ? 'E' : (tsk->state & TASK_INTERRUPTIBLE) ? 'S' : '?'; - printf("%p %016lx %6d %6d %c %2d %s\n", tsk, + printf("%px %016lx %6d %6d %c %2d %s\n", tsk, tsk->thread.ksp, tsk->pid, tsk->parent->pid, state, task_thread_info(tsk)->cpu, @@ -2988,7 +2992,7 @@ static void show_pte(unsigned long addr) if (setjmp(bus_error_jmp) != 0) { catch_memory_errors = 0; - printf("*** Error dumping pte for task %p\n", tsk); + printf("*** Error dumping pte for task %px\n", tsk); return; } @@ -3074,7 +3078,7 @@ static void show_tasks(void) if (setjmp(bus_error_jmp) != 0) { catch_memory_errors = 0; - printf("*** Error dumping task %p\n", tsk); + printf("*** Error dumping task %px\n", tsk); return; } diff --git a/drivers/misc/cxl/context.c b/drivers/misc/cxl/context.c index 12a41b2753f0..7ff315ad3692 100644 --- a/drivers/misc/cxl/context.c +++ b/drivers/misc/cxl/context.c @@ -45,6 +45,8 @@ int cxl_context_init(struct cxl_context *ctx, struct cxl_afu *afu, bool master) ctx->pid = NULL; /* Set in start work ioctl */ mutex_init(&ctx->mapping_lock); ctx->mapping = NULL; + ctx->tidr = 0; + ctx->assign_tidr = false; if (cxl_is_power8()) { spin_lock_init(&ctx->sste_lock); diff --git a/drivers/misc/cxl/cxl.h b/drivers/misc/cxl/cxl.h index e46a4062904a..53149fbd780e 100644 --- a/drivers/misc/cxl/cxl.h +++ b/drivers/misc/cxl/cxl.h @@ -630,6 +630,9 @@ struct cxl_context { struct list_head extra_irq_contexts; struct mm_struct *mm; + + u16 tidr; + bool assign_tidr; }; struct cxl_irq_info; diff --git a/drivers/misc/cxl/cxllib.c b/drivers/misc/cxl/cxllib.c index dc9bc1807fdf..30ccba436b3b 100644 --- a/drivers/misc/cxl/cxllib.c +++ b/drivers/misc/cxl/cxllib.c @@ -199,10 +199,11 @@ int cxllib_get_PE_attributes(struct task_struct *task, */ attr->pid = mm->context.id; mmput(mm); + attr->tid = task->thread.tidr; } else { attr->pid = 0; + attr->tid = 0; } - attr->tid = 0; return 0; } EXPORT_SYMBOL_GPL(cxllib_get_PE_attributes); diff --git a/drivers/misc/cxl/file.c b/drivers/misc/cxl/file.c index 76c0b0ca9388..93fd381f6484 100644 --- a/drivers/misc/cxl/file.c +++ b/drivers/misc/cxl/file.c @@ -173,7 +173,7 @@ static long afu_ioctl_start_work(struct cxl_context *ctx, * flags are set it's invalid */ if (work.reserved1 || work.reserved2 || work.reserved3 || - work.reserved4 || work.reserved5 || work.reserved6 || + work.reserved4 || work.reserved5 || (work.flags & ~CXL_START_WORK_ALL)) { rc = -EINVAL; goto out; @@ -186,12 +186,16 @@ static long afu_ioctl_start_work(struct cxl_context *ctx, rc = -EINVAL; goto out; } + if ((rc = afu_register_irqs(ctx, work.num_interrupts))) goto out; if (work.flags & CXL_START_WORK_AMR) amr = work.amr & mfspr(SPRN_UAMOR); + if (work.flags & CXL_START_WORK_TID) + ctx->assign_tidr = true; + ctx->mmio_err_ff = !!(work.flags & CXL_START_WORK_ERR_FF); /* @@ -263,8 +267,15 @@ static long afu_ioctl_start_work(struct cxl_context *ctx, goto out; } - ctx->status = STARTED; rc = 0; + if (work.flags & CXL_START_WORK_TID) { + work.tid = ctx->tidr; + if (copy_to_user(uwork, &work, sizeof(work))) + rc = -EFAULT; + } + + ctx->status = STARTED; + out: mutex_unlock(&ctx->status_mutex); return rc; diff --git a/drivers/misc/cxl/native.c b/drivers/misc/cxl/native.c index 02b6b45b4c20..1b3d7c65ea3f 100644 --- a/drivers/misc/cxl/native.c +++ b/drivers/misc/cxl/native.c @@ -16,6 +16,7 @@ #include <linux/uaccess.h> #include <linux/delay.h> #include <asm/synch.h> +#include <asm/switch_to.h> #include <misc/cxl-base.h> #include "cxl.h" @@ -655,6 +656,7 @@ static void update_ivtes_directed(struct cxl_context *ctx) static int process_element_entry_psl9(struct cxl_context *ctx, u64 wed, u64 amr) { u32 pid; + int rc; cxl_assign_psn_space(ctx); @@ -673,7 +675,16 @@ static int process_element_entry_psl9(struct cxl_context *ctx, u64 wed, u64 amr) pid = ctx->mm->context.id; } - ctx->elem->common.tid = 0; + /* Assign a unique TIDR (thread id) for the current thread */ + if (!(ctx->tidr) && (ctx->assign_tidr)) { + rc = set_thread_tidr(current); + if (rc) + return -ENODEV; + ctx->tidr = current->thread.tidr; + pr_devel("%s: current tidr: %d\n", __func__, ctx->tidr); + } + + ctx->elem->common.tid = cpu_to_be32(ctx->tidr); ctx->elem->common.pid = cpu_to_be32(pid); ctx->elem->sr = cpu_to_be64(calculate_sr(ctx)); diff --git a/drivers/pci/hotplug/rpadlpar_core.c b/drivers/pci/hotplug/rpadlpar_core.c index a3449d717a99..fc01d7d807f3 100644 --- a/drivers/pci/hotplug/rpadlpar_core.c +++ b/drivers/pci/hotplug/rpadlpar_core.c @@ -27,6 +27,7 @@ #include <linux/mutex.h> #include <asm/rtas.h> #include <asm/vio.h> +#include <linux/firmware.h> #include "../pci.h" #include "rpaphp.h" @@ -44,15 +45,14 @@ static struct device_node *find_vio_slot_node(char *drc_name) { struct device_node *parent = of_find_node_by_name(NULL, "vdevice"); struct device_node *dn = NULL; - char *name; int rc; if (!parent) return NULL; while ((dn = of_get_next_child(parent, dn))) { - rc = rpaphp_get_drc_props(dn, NULL, &name, NULL, NULL); - if ((rc == 0) && (!strcmp(drc_name, name))) + rc = rpaphp_check_drc_props(dn, drc_name, NULL); + if (rc == 0) break; } @@ -64,15 +64,12 @@ static struct device_node *find_php_slot_pci_node(char *drc_name, char *drc_type) { struct device_node *np = NULL; - char *name; - char *type; int rc; while ((np = of_find_node_by_name(np, "pci"))) { - rc = rpaphp_get_drc_props(np, NULL, &name, &type, NULL); + rc = rpaphp_check_drc_props(np, drc_name, drc_type); if (rc == 0) - if (!strcmp(drc_name, name) && !strcmp(drc_type, type)) - break; + break; } return np; diff --git a/drivers/pci/hotplug/rpadlpar_sysfs.c b/drivers/pci/hotplug/rpadlpar_sysfs.c index edb5d8a53020..b806314349cf 100644 --- a/drivers/pci/hotplug/rpadlpar_sysfs.c +++ b/drivers/pci/hotplug/rpadlpar_sysfs.c @@ -16,6 +16,7 @@ #include <linux/string.h> #include <linux/pci.h> #include <linux/pci_hotplug.h> +#include "rpaphp.h" #include "rpadlpar.h" #include "../pci.h" @@ -27,8 +28,6 @@ #define ADD_SLOT_ATTR_NAME add_slot #define REMOVE_SLOT_ATTR_NAME remove_slot -#define MAX_DRC_NAME_LEN 64 - static ssize_t add_slot_store(struct kobject *kobj, struct kobj_attribute *attr, const char *buf, size_t nbytes) { diff --git a/drivers/pci/hotplug/rpaphp.h b/drivers/pci/hotplug/rpaphp.h index 7db024e68fe6..bdb844b01a3d 100644 --- a/drivers/pci/hotplug/rpaphp.h +++ b/drivers/pci/hotplug/rpaphp.h @@ -64,6 +64,10 @@ extern bool rpaphp_debug; #define CONFIGURED 1 #define EMPTY 0 +/* DRC constants */ + +#define MAX_DRC_NAME_LEN 64 + /* * struct slot - slot information for each *physical* slot */ @@ -91,8 +95,8 @@ int rpaphp_get_sensor_state(struct slot *slot, int *state); /* rpaphp_core.c */ int rpaphp_add_slot(struct device_node *dn); -int rpaphp_get_drc_props(struct device_node *dn, int *drc_index, - char **drc_name, char **drc_type, int *drc_power_domain); +int rpaphp_check_drc_props(struct device_node *dn, char *drc_name, + char *drc_type); /* rpaphp_slot.c */ void dealloc_slot_struct(struct slot *slot); diff --git a/drivers/pci/hotplug/rpaphp_core.c b/drivers/pci/hotplug/rpaphp_core.c index 1e29abaaea08..53902c7c38f2 100644 --- a/drivers/pci/hotplug/rpaphp_core.c +++ b/drivers/pci/hotplug/rpaphp_core.c @@ -30,6 +30,7 @@ #include <linux/smp.h> #include <linux/init.h> #include <linux/vmalloc.h> +#include <asm/firmware.h> #include <asm/eeh.h> /* for eeh_add_device() */ #include <asm/rtas.h> /* rtas_call */ #include <asm/pci-bridge.h> /* for pci_controller */ @@ -196,25 +197,21 @@ static int get_children_props(struct device_node *dn, const int **drc_indexes, return 0; } -/* To get the DRC props describing the current node, first obtain it's - * my-drc-index property. Next obtain the DRC list from it's parent. Use - * the my-drc-index for correlation, and obtain the requested properties. + +/* Verify the existence of 'drc_name' and/or 'drc_type' within the + * current node. First obtain it's my-drc-index property. Next, + * obtain the DRC info from it's parent. Use the my-drc-index for + * correlation, and obtain/validate the requested properties. */ -int rpaphp_get_drc_props(struct device_node *dn, int *drc_index, - char **drc_name, char **drc_type, int *drc_power_domain) + +static int rpaphp_check_drc_props_v1(struct device_node *dn, char *drc_name, + char *drc_type, unsigned int my_index) { + char *name_tmp, *type_tmp; const int *indexes, *names; const int *types, *domains; - const unsigned int *my_index; - char *name_tmp, *type_tmp; int i, rc; - my_index = of_get_property(dn, "ibm,my-drc-index", NULL); - if (!my_index) { - /* Node isn't DLPAR/hotplug capable */ - return -EINVAL; - } - rc = get_children_props(dn->parent, &indexes, &names, &types, &domains); if (rc < 0) { return -EINVAL; @@ -225,24 +222,84 @@ int rpaphp_get_drc_props(struct device_node *dn, int *drc_index, /* Iterate through parent properties, looking for my-drc-index */ for (i = 0; i < be32_to_cpu(indexes[0]); i++) { - if ((unsigned int) indexes[i + 1] == *my_index) { - if (drc_name) - *drc_name = name_tmp; - if (drc_type) - *drc_type = type_tmp; - if (drc_index) - *drc_index = be32_to_cpu(*my_index); - if (drc_power_domain) - *drc_power_domain = be32_to_cpu(domains[i+1]); - return 0; - } + if ((unsigned int) indexes[i + 1] == my_index) + break; + name_tmp += (strlen(name_tmp) + 1); type_tmp += (strlen(type_tmp) + 1); } + if (((drc_name == NULL) || (drc_name && !strcmp(drc_name, name_tmp))) && + ((drc_type == NULL) || (drc_type && !strcmp(drc_type, type_tmp)))) + return 0; + return -EINVAL; } -EXPORT_SYMBOL_GPL(rpaphp_get_drc_props); + +static int rpaphp_check_drc_props_v2(struct device_node *dn, char *drc_name, + char *drc_type, unsigned int my_index) +{ + struct property *info; + unsigned int entries; + struct of_drc_info drc; + const __be32 *value; + char cell_drc_name[MAX_DRC_NAME_LEN]; + int j, fndit; + + info = of_find_property(dn->parent, "ibm,drc-info", NULL); + if (info == NULL) + return -EINVAL; + + value = of_prop_next_u32(info, NULL, &entries); + if (!value) + return -EINVAL; + + for (j = 0; j < entries; j++) { + of_read_drc_info_cell(&info, &value, &drc); + + /* Should now know end of current entry */ + + if (my_index > drc.last_drc_index) + continue; + + fndit = 1; + break; + } + /* Found it */ + + if (fndit) + sprintf(cell_drc_name, "%s%d", drc.drc_name_prefix, + my_index); + + if (((drc_name == NULL) || + (drc_name && !strcmp(drc_name, cell_drc_name))) && + ((drc_type == NULL) || + (drc_type && !strcmp(drc_type, drc.drc_type)))) + return 0; + + return -EINVAL; +} + +int rpaphp_check_drc_props(struct device_node *dn, char *drc_name, + char *drc_type) +{ + const unsigned int *my_index; + + my_index = of_get_property(dn, "ibm,my-drc-index", NULL); + if (!my_index) { + /* Node isn't DLPAR/hotplug capable */ + return -EINVAL; + } + + if (firmware_has_feature(FW_FEATURE_DRC_INFO)) + return rpaphp_check_drc_props_v2(dn, drc_name, drc_type, + *my_index); + else + return rpaphp_check_drc_props_v1(dn, drc_name, drc_type, + *my_index); +} +EXPORT_SYMBOL_GPL(rpaphp_check_drc_props); + static int is_php_type(char *drc_type) { diff --git a/include/uapi/linux/elf.h b/include/uapi/linux/elf.h index bb6836986200..3bf73fb58045 100644 --- a/include/uapi/linux/elf.h +++ b/include/uapi/linux/elf.h @@ -396,6 +396,7 @@ typedef struct elf64_shdr { #define NT_PPC_TM_CTAR 0x10d /* TM checkpointed Target Address Register */ #define NT_PPC_TM_CPPR 0x10e /* TM checkpointed Program Priority Register */ #define NT_PPC_TM_CDSCR 0x10f /* TM checkpointed Data Stream Control Register */ +#define NT_PPC_PKEY 0x110 /* Memory Protection Keys registers */ #define NT_386_TLS 0x200 /* i386 TLS slots (struct user_desc) */ #define NT_386_IOPERM 0x201 /* x86 io permission bitmap (1=deny) */ #define NT_X86_XSTATE 0x202 /* x86 extended state using xsave */ diff --git a/include/uapi/misc/cxl.h b/include/uapi/misc/cxl.h index 49e8fd08855a..56376d3907d8 100644 --- a/include/uapi/misc/cxl.h +++ b/include/uapi/misc/cxl.h @@ -20,20 +20,22 @@ struct cxl_ioctl_start_work { __u64 work_element_descriptor; __u64 amr; __s16 num_interrupts; - __s16 reserved1; - __s32 reserved2; + __u16 tid; + __s32 reserved1; + __u64 reserved2; __u64 reserved3; __u64 reserved4; __u64 reserved5; - __u64 reserved6; }; #define CXL_START_WORK_AMR 0x0000000000000001ULL #define CXL_START_WORK_NUM_IRQS 0x0000000000000002ULL #define CXL_START_WORK_ERR_FF 0x0000000000000004ULL +#define CXL_START_WORK_TID 0x0000000000000008ULL #define CXL_START_WORK_ALL (CXL_START_WORK_AMR |\ CXL_START_WORK_NUM_IRQS |\ - CXL_START_WORK_ERR_FF) + CXL_START_WORK_ERR_FF |\ + CXL_START_WORK_TID) /* Possible modes that an afu can be in */ |