aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--arch/powerpc/Kconfig20
-rw-r--r--arch/powerpc/Kconfig.debug4
-rw-r--r--arch/powerpc/include/asm/book3s/64/mmu-hash.h5
-rw-r--r--arch/powerpc/include/asm/book3s/64/mmu.h10
-rw-r--r--arch/powerpc/include/asm/book3s/64/pgtable.h37
-rw-r--r--arch/powerpc/include/asm/book3s/64/tlbflush.h4
-rw-r--r--arch/powerpc/include/asm/bug.h1
-rw-r--r--arch/powerpc/include/asm/code-patching.h1
-rw-r--r--arch/powerpc/include/asm/cputable.h9
-rw-r--r--arch/powerpc/include/asm/exception-64e.h6
-rw-r--r--arch/powerpc/include/asm/exception-64s.h160
-rw-r--r--arch/powerpc/include/asm/feature-fixups.h13
-rw-r--r--arch/powerpc/include/asm/firmware.h4
-rw-r--r--arch/powerpc/include/asm/head-64.h40
-rw-r--r--arch/powerpc/include/asm/hmi.h4
-rw-r--r--arch/powerpc/include/asm/hvcall.h18
-rw-r--r--arch/powerpc/include/asm/hw_irq.h161
-rw-r--r--arch/powerpc/include/asm/imc-pmu.h9
-rw-r--r--arch/powerpc/include/asm/irqflags.h14
-rw-r--r--arch/powerpc/include/asm/kvm_ppc.h2
-rw-r--r--arch/powerpc/include/asm/local.h200
-rw-r--r--arch/powerpc/include/asm/machdep.h1
-rw-r--r--arch/powerpc/include/asm/mman.h13
-rw-r--r--arch/powerpc/include/asm/mmu.h9
-rw-r--r--arch/powerpc/include/asm/mmu_context.h22
-rw-r--r--arch/powerpc/include/asm/mpic_timer.h8
-rw-r--r--arch/powerpc/include/asm/paca.h12
-rw-r--r--arch/powerpc/include/asm/pkeys.h218
-rw-r--r--arch/powerpc/include/asm/plpar_wrappers.h14
-rw-r--r--arch/powerpc/include/asm/processor.h5
-rw-r--r--arch/powerpc/include/asm/prom.h16
-rw-r--r--arch/powerpc/include/asm/reg.h6
-rw-r--r--arch/powerpc/include/asm/setup.h14
-rw-r--r--arch/powerpc/include/asm/systbl.h3
-rw-r--r--arch/powerpc/include/asm/unistd.h6
-rw-r--r--arch/powerpc/include/asm/xive-regs.h35
-rw-r--r--arch/powerpc/include/asm/xive.h38
-rw-r--r--arch/powerpc/include/uapi/asm/elf.h1
-rw-r--r--arch/powerpc/include/uapi/asm/mman.h6
-rw-r--r--arch/powerpc/include/uapi/asm/unistd.h3
-rw-r--r--arch/powerpc/kernel/asm-offsets.c7
-rw-r--r--arch/powerpc/kernel/cpu_setup_power.S2
-rw-r--r--arch/powerpc/kernel/entry_64.S90
-rw-r--r--arch/powerpc/kernel/exceptions-64e.S20
-rw-r--r--arch/powerpc/kernel/exceptions-64s.S177
-rw-r--r--arch/powerpc/kernel/fadump.c22
-rw-r--r--arch/powerpc/kernel/head_64.S11
-rw-r--r--arch/powerpc/kernel/idle_book3e.S5
-rw-r--r--arch/powerpc/kernel/idle_power4.S5
-rw-r--r--arch/powerpc/kernel/irq.c29
-rw-r--r--arch/powerpc/kernel/mce.c142
-rw-r--r--arch/powerpc/kernel/optprobes_head.S2
-rw-r--r--arch/powerpc/kernel/process.c13
-rw-r--r--arch/powerpc/kernel/prom_init.c1
-rw-r--r--arch/powerpc/kernel/ptrace.c78
-rw-r--r--arch/powerpc/kernel/setup-common.c37
-rw-r--r--arch/powerpc/kernel/setup_64.c144
-rw-r--r--arch/powerpc/kernel/signal_32.c8
-rw-r--r--arch/powerpc/kernel/signal_64.c3
-rw-r--r--arch/powerpc/kernel/time.c6
-rw-r--r--arch/powerpc/kernel/traps.c19
-rw-r--r--arch/powerpc/kernel/vmlinux.lds.S9
-rw-r--r--arch/powerpc/kernel/watchdog.c58
-rw-r--r--arch/powerpc/kvm/book3s_hv_ras.c8
-rw-r--r--arch/powerpc/kvm/book3s_hv_rm_mmu.c9
-rw-r--r--arch/powerpc/kvm/book3s_hv_rmhandlers.S20
-rw-r--r--arch/powerpc/kvm/book3s_rmhandlers.S7
-rw-r--r--arch/powerpc/kvm/book3s_segment.S4
-rw-r--r--arch/powerpc/kvm/book3s_xive.c7
-rw-r--r--arch/powerpc/lib/code-patching.c32
-rw-r--r--arch/powerpc/lib/feature-fixups.c49
-rw-r--r--arch/powerpc/mm/Makefile1
-rw-r--r--arch/powerpc/mm/fault.c56
-rw-r--r--arch/powerpc/mm/hash_utils_64.c26
-rw-r--r--arch/powerpc/mm/hugetlbpage.c2
-rw-r--r--arch/powerpc/mm/mmu_context_book3s64.c2
-rw-r--r--arch/powerpc/mm/pkeys.c468
-rw-r--r--arch/powerpc/mm/subpage-prot.c3
-rw-r--r--arch/powerpc/perf/core-book3s.c14
-rw-r--r--arch/powerpc/perf/imc-pmu.c119
-rw-r--r--arch/powerpc/platforms/cell/spufs/file.c6
-rw-r--r--arch/powerpc/platforms/cell/spufs/spufs.h2
-rw-r--r--arch/powerpc/platforms/powernv/opal-imc.c77
-rw-r--r--arch/powerpc/platforms/powernv/pci-ioda.c1
-rw-r--r--arch/powerpc/platforms/powernv/setup.c49
-rw-r--r--arch/powerpc/platforms/ps3/setup.c15
-rw-r--r--arch/powerpc/platforms/pseries/dlpar.c21
-rw-r--r--arch/powerpc/platforms/pseries/firmware.c1
-rw-r--r--arch/powerpc/platforms/pseries/of_helpers.c60
-rw-r--r--arch/powerpc/platforms/pseries/pseries.h2
-rw-r--r--arch/powerpc/platforms/pseries/pseries_energy.c126
-rw-r--r--arch/powerpc/platforms/pseries/ras.c3
-rw-r--r--arch/powerpc/platforms/pseries/setup.c36
-rw-r--r--arch/powerpc/sysdev/fsl_mpic_timer_wakeup.c16
-rw-r--r--arch/powerpc/sysdev/mpic_timer.c55
-rw-r--r--arch/powerpc/sysdev/xive/common.c3
-rw-r--r--arch/powerpc/xmon/xmon.c40
-rw-r--r--drivers/misc/cxl/context.c2
-rw-r--r--drivers/misc/cxl/cxl.h3
-rw-r--r--drivers/misc/cxl/cxllib.c3
-rw-r--r--drivers/misc/cxl/file.c15
-rw-r--r--drivers/misc/cxl/native.c13
-rw-r--r--drivers/pci/hotplug/rpadlpar_core.c13
-rw-r--r--drivers/pci/hotplug/rpadlpar_sysfs.c3
-rw-r--r--drivers/pci/hotplug/rpaphp.h8
-rw-r--r--drivers/pci/hotplug/rpaphp_core.c107
-rw-r--r--include/uapi/linux/elf.h1
-rw-r--r--include/uapi/misc/cxl.h10
108 files changed, 2864 insertions, 694 deletions
diff --git a/arch/powerpc/Kconfig b/arch/powerpc/Kconfig
index c51e6ce42e7a..fddc4478d4d8 100644
--- a/arch/powerpc/Kconfig
+++ b/arch/powerpc/Kconfig
@@ -142,6 +142,7 @@ config PPC
select ARCH_HAS_PMEM_API if PPC64
select ARCH_HAS_SCALED_CPUTIME if VIRT_CPU_ACCOUNTING_NATIVE
select ARCH_HAS_SG_CHAIN
+ select ARCH_HAS_STRICT_KERNEL_RWX if ((PPC_BOOK3S_64 || PPC32) && !RELOCATABLE && !HIBERNATION)
select ARCH_HAS_TICK_BROADCAST if GENERIC_CLOCKEVENTS_BROADCAST
select ARCH_HAS_UACCESS_FLUSHCACHE if PPC64
select ARCH_HAS_UBSAN_SANITIZE_ALL
@@ -149,6 +150,7 @@ config PPC
select ARCH_HAVE_NMI_SAFE_CMPXCHG
select ARCH_MIGHT_HAVE_PC_PARPORT
select ARCH_MIGHT_HAVE_PC_SERIO
+ select ARCH_OPTIONAL_KERNEL_RWX if ARCH_HAS_STRICT_KERNEL_RWX
select ARCH_SUPPORTS_ATOMIC_RMW
select ARCH_SUPPORTS_DEFERRED_STRUCT_PAGE_INIT
select ARCH_USE_BUILTIN_BSWAP
@@ -166,6 +168,7 @@ config PPC
select GENERIC_CLOCKEVENTS_BROADCAST if SMP
select GENERIC_CMOS_UPDATE
select GENERIC_CPU_AUTOPROBE
+ select GENERIC_CPU_VULNERABILITIES if PPC_BOOK3S_64
select GENERIC_IRQ_SHOW
select GENERIC_IRQ_SHOW_LEVEL
select GENERIC_SMP_IDLE_THREAD
@@ -179,8 +182,6 @@ config PPC
select HAVE_ARCH_MMAP_RND_COMPAT_BITS if COMPAT
select HAVE_ARCH_SECCOMP_FILTER
select HAVE_ARCH_TRACEHOOK
- select ARCH_HAS_STRICT_KERNEL_RWX if ((PPC_BOOK3S_64 || PPC32) && !RELOCATABLE && !HIBERNATION)
- select ARCH_OPTIONAL_KERNEL_RWX if ARCH_HAS_STRICT_KERNEL_RWX
select HAVE_CBPF_JIT if !PPC64
select HAVE_CONTEXT_TRACKING if PPC64
select HAVE_DEBUG_KMEMLEAK
@@ -867,6 +868,21 @@ config SECCOMP
If unsure, say Y. Only embedded should say N here.
+config PPC_MEM_KEYS
+ prompt "PowerPC Memory Protection Keys"
+ def_bool y
+ depends on PPC_BOOK3S_64
+ select ARCH_USES_HIGH_VMA_FLAGS
+ select ARCH_HAS_PKEYS
+ help
+ Memory Protection Keys provides a mechanism for enforcing
+ page-based protections, but without requiring modification of the
+ page tables when an application changes protection domains.
+
+ For details, see Documentation/vm/protection-keys.txt
+
+ If unsure, say y.
+
endmenu
config ISA_DMA_API
diff --git a/arch/powerpc/Kconfig.debug b/arch/powerpc/Kconfig.debug
index 5d18169fff58..c45424c64e19 100644
--- a/arch/powerpc/Kconfig.debug
+++ b/arch/powerpc/Kconfig.debug
@@ -90,6 +90,10 @@ config MSI_BITMAP_SELFTEST
depends on DEBUG_KERNEL
default n
+config PPC_IRQ_SOFT_MASK_DEBUG
+ bool "Include extra checks for powerpc irq soft masking"
+ default n
+
config XMON
bool "Include xmon kernel debugger"
depends on DEBUG_KERNEL
diff --git a/arch/powerpc/include/asm/book3s/64/mmu-hash.h b/arch/powerpc/include/asm/book3s/64/mmu-hash.h
index e91e115a816f..50ed64fba4ae 100644
--- a/arch/powerpc/include/asm/book3s/64/mmu-hash.h
+++ b/arch/powerpc/include/asm/book3s/64/mmu-hash.h
@@ -90,6 +90,8 @@
#define HPTE_R_PP0 ASM_CONST(0x8000000000000000)
#define HPTE_R_TS ASM_CONST(0x4000000000000000)
#define HPTE_R_KEY_HI ASM_CONST(0x3000000000000000)
+#define HPTE_R_KEY_BIT0 ASM_CONST(0x2000000000000000)
+#define HPTE_R_KEY_BIT1 ASM_CONST(0x1000000000000000)
#define HPTE_R_RPN_SHIFT 12
#define HPTE_R_RPN ASM_CONST(0x0ffffffffffff000)
#define HPTE_R_RPN_3_0 ASM_CONST(0x01fffffffffff000)
@@ -104,6 +106,9 @@
#define HPTE_R_C ASM_CONST(0x0000000000000080)
#define HPTE_R_R ASM_CONST(0x0000000000000100)
#define HPTE_R_KEY_LO ASM_CONST(0x0000000000000e00)
+#define HPTE_R_KEY_BIT2 ASM_CONST(0x0000000000000800)
+#define HPTE_R_KEY_BIT3 ASM_CONST(0x0000000000000400)
+#define HPTE_R_KEY_BIT4 ASM_CONST(0x0000000000000200)
#define HPTE_R_KEY (HPTE_R_KEY_LO | HPTE_R_KEY_HI)
#define HPTE_V_1TB_SEG ASM_CONST(0x4000000000000000)
diff --git a/arch/powerpc/include/asm/book3s/64/mmu.h b/arch/powerpc/include/asm/book3s/64/mmu.h
index c9448e19847a..0abeb0e2d616 100644
--- a/arch/powerpc/include/asm/book3s/64/mmu.h
+++ b/arch/powerpc/include/asm/book3s/64/mmu.h
@@ -108,6 +108,16 @@ typedef struct {
#ifdef CONFIG_SPAPR_TCE_IOMMU
struct list_head iommu_group_mem_list;
#endif
+
+#ifdef CONFIG_PPC_MEM_KEYS
+ /*
+ * Each bit represents one protection key.
+ * bit set -> key allocated
+ * bit unset -> key available for allocation
+ */
+ u32 pkey_allocation_map;
+ s16 execute_only_pkey; /* key holding execute-only protection */
+#endif
} mm_context_t;
/*
diff --git a/arch/powerpc/include/asm/book3s/64/pgtable.h b/arch/powerpc/include/asm/book3s/64/pgtable.h
index 60a0d7fd82bc..1c495068bcfa 100644
--- a/arch/powerpc/include/asm/book3s/64/pgtable.h
+++ b/arch/powerpc/include/asm/book3s/64/pgtable.h
@@ -40,6 +40,7 @@
#define _RPAGE_RSV2 0x0800000000000000UL
#define _RPAGE_RSV3 0x0400000000000000UL
#define _RPAGE_RSV4 0x0200000000000000UL
+#define _RPAGE_RSV5 0x00040UL
#define _PAGE_PTE 0x4000000000000000UL /* distinguishes PTEs from pointers */
#define _PAGE_PRESENT 0x8000000000000000UL /* pte contains a translation */
@@ -59,6 +60,25 @@
/* Max physical address bit as per radix table */
#define _RPAGE_PA_MAX 57
+#ifdef CONFIG_PPC_MEM_KEYS
+#ifdef CONFIG_PPC_64K_PAGES
+#define H_PTE_PKEY_BIT0 _RPAGE_RSV1
+#define H_PTE_PKEY_BIT1 _RPAGE_RSV2
+#else /* CONFIG_PPC_64K_PAGES */
+#define H_PTE_PKEY_BIT0 0 /* _RPAGE_RSV1 is not available */
+#define H_PTE_PKEY_BIT1 0 /* _RPAGE_RSV2 is not available */
+#endif /* CONFIG_PPC_64K_PAGES */
+#define H_PTE_PKEY_BIT2 _RPAGE_RSV3
+#define H_PTE_PKEY_BIT3 _RPAGE_RSV4
+#define H_PTE_PKEY_BIT4 _RPAGE_RSV5
+#else /* CONFIG_PPC_MEM_KEYS */
+#define H_PTE_PKEY_BIT0 0
+#define H_PTE_PKEY_BIT1 0
+#define H_PTE_PKEY_BIT2 0
+#define H_PTE_PKEY_BIT3 0
+#define H_PTE_PKEY_BIT4 0
+#endif /* CONFIG_PPC_MEM_KEYS */
+
/*
* Max physical address bit we will use for now.
*
@@ -122,13 +142,16 @@
#define _PAGE_CHG_MASK (PTE_RPN_MASK | _PAGE_HPTEFLAGS | _PAGE_DIRTY | \
_PAGE_ACCESSED | _PAGE_SPECIAL | _PAGE_PTE | \
_PAGE_SOFT_DIRTY)
+
+#define H_PTE_PKEY (H_PTE_PKEY_BIT0 | H_PTE_PKEY_BIT1 | H_PTE_PKEY_BIT2 | \
+ H_PTE_PKEY_BIT3 | H_PTE_PKEY_BIT4)
/*
* Mask of bits returned by pte_pgprot()
*/
#define PAGE_PROT_BITS (_PAGE_SAO | _PAGE_NON_IDEMPOTENT | _PAGE_TOLERANT | \
H_PAGE_4K_PFN | _PAGE_PRIVILEGED | _PAGE_ACCESSED | \
_PAGE_READ | _PAGE_WRITE | _PAGE_DIRTY | _PAGE_EXEC | \
- _PAGE_SOFT_DIRTY)
+ _PAGE_SOFT_DIRTY | H_PTE_PKEY)
/*
* We define 2 sets of base prot bits, one for basic pages (ie,
* cacheable kernel and user pages) and one for non cacheable
@@ -548,6 +571,15 @@ static inline int pte_present(pte_t pte)
return !!(pte_raw(pte) & cpu_to_be64(_PAGE_PRESENT));
}
+#ifdef CONFIG_PPC_MEM_KEYS
+extern bool arch_pte_access_permitted(u64 pte, bool write, bool execute);
+#else
+static inline bool arch_pte_access_permitted(u64 pte, bool write, bool execute)
+{
+ return true;
+}
+#endif /* CONFIG_PPC_MEM_KEYS */
+
#define pte_access_permitted pte_access_permitted
static inline bool pte_access_permitted(pte_t pte, bool write)
{
@@ -568,7 +600,8 @@ static inline bool pte_access_permitted(pte_t pte, bool write)
if ((pteval & clear_pte_bits) == clear_pte_bits)
return false;
- return true;
+
+ return arch_pte_access_permitted(pte_val(pte), write, 0);
}
/*
diff --git a/arch/powerpc/include/asm/book3s/64/tlbflush.h b/arch/powerpc/include/asm/book3s/64/tlbflush.h
index 9befb4df235c..0cac17253513 100644
--- a/arch/powerpc/include/asm/book3s/64/tlbflush.h
+++ b/arch/powerpc/include/asm/book3s/64/tlbflush.h
@@ -14,6 +14,7 @@ enum {
TLB_INVAL_SCOPE_LPID = 1, /* invalidate TLBs for current LPID */
};
+#ifdef CONFIG_PPC_NATIVE
static inline void tlbiel_all(void)
{
/*
@@ -29,6 +30,9 @@ static inline void tlbiel_all(void)
else
hash__tlbiel_all(TLB_INVAL_SCOPE_GLOBAL);
}
+#else
+static inline void tlbiel_all(void) { BUG(); };
+#endif
static inline void tlbiel_all_lpid(bool radix)
{
diff --git a/arch/powerpc/include/asm/bug.h b/arch/powerpc/include/asm/bug.h
index 3c04249bcf39..97c38472b924 100644
--- a/arch/powerpc/include/asm/bug.h
+++ b/arch/powerpc/include/asm/bug.h
@@ -133,6 +133,7 @@ struct pt_regs;
extern int do_page_fault(struct pt_regs *, unsigned long, unsigned long);
extern void bad_page_fault(struct pt_regs *, unsigned long, int);
extern void _exception(int, struct pt_regs *, int, unsigned long);
+extern void _exception_pkey(int, struct pt_regs *, int, unsigned long, int);
extern void die(const char *, struct pt_regs *, long);
extern bool die_will_crash(void);
diff --git a/arch/powerpc/include/asm/code-patching.h b/arch/powerpc/include/asm/code-patching.h
index 2c895e8d07f7..812535f40124 100644
--- a/arch/powerpc/include/asm/code-patching.h
+++ b/arch/powerpc/include/asm/code-patching.h
@@ -31,6 +31,7 @@ unsigned int create_cond_branch(const unsigned int *addr,
unsigned long target, int flags);
int patch_branch(unsigned int *addr, unsigned long target, int flags);
int patch_instruction(unsigned int *addr, unsigned int instr);
+int raw_patch_instruction(unsigned int *addr, unsigned int instr);
int instr_is_relative_branch(unsigned int instr);
int instr_is_relative_link_branch(unsigned int instr);
diff --git a/arch/powerpc/include/asm/cputable.h b/arch/powerpc/include/asm/cputable.h
index 78ca2a721d04..a2c5c95882cf 100644
--- a/arch/powerpc/include/asm/cputable.h
+++ b/arch/powerpc/include/asm/cputable.h
@@ -195,7 +195,7 @@ static inline void cpu_feature_keys_init(void) { }
#define CPU_FTR_STCX_CHECKS_ADDRESS LONG_ASM_CONST(0x0004000000000000)
#define CPU_FTR_POPCNTB LONG_ASM_CONST(0x0008000000000000)
#define CPU_FTR_POPCNTD LONG_ASM_CONST(0x0010000000000000)
-/* Free LONG_ASM_CONST(0x0020000000000000) */
+#define CPU_FTR_PKEY LONG_ASM_CONST(0x0020000000000000)
#define CPU_FTR_VMX_COPY LONG_ASM_CONST(0x0040000000000000)
#define CPU_FTR_TM LONG_ASM_CONST(0x0080000000000000)
#define CPU_FTR_CFAR LONG_ASM_CONST(0x0100000000000000)
@@ -442,7 +442,7 @@ static inline void cpu_feature_keys_init(void) { }
CPU_FTR_DSCR | CPU_FTR_SAO | CPU_FTR_ASYM_SMT | \
CPU_FTR_STCX_CHECKS_ADDRESS | CPU_FTR_POPCNTB | CPU_FTR_POPCNTD | \
CPU_FTR_CFAR | CPU_FTR_HVMODE | \
- CPU_FTR_VMX_COPY | CPU_FTR_HAS_PPR | CPU_FTR_DABRX)
+ CPU_FTR_VMX_COPY | CPU_FTR_HAS_PPR | CPU_FTR_DABRX | CPU_FTR_PKEY)
#define CPU_FTRS_POWER8 (CPU_FTR_USE_TB | CPU_FTR_LWSYNC | \
CPU_FTR_PPCAS_ARCH_V2 | CPU_FTR_CTRL | CPU_FTR_ARCH_206 |\
CPU_FTR_MMCRA | CPU_FTR_SMT | \
@@ -452,7 +452,7 @@ static inline void cpu_feature_keys_init(void) { }
CPU_FTR_STCX_CHECKS_ADDRESS | CPU_FTR_POPCNTB | CPU_FTR_POPCNTD | \
CPU_FTR_CFAR | CPU_FTR_HVMODE | CPU_FTR_VMX_COPY | \
CPU_FTR_DBELL | CPU_FTR_HAS_PPR | CPU_FTR_DAWR | \
- CPU_FTR_ARCH_207S | CPU_FTR_TM_COMP)
+ CPU_FTR_ARCH_207S | CPU_FTR_TM_COMP | CPU_FTR_PKEY)
#define CPU_FTRS_POWER8E (CPU_FTRS_POWER8 | CPU_FTR_PMAO_BUG)
#define CPU_FTRS_POWER8_DD1 (CPU_FTRS_POWER8 & ~CPU_FTR_DBELL)
#define CPU_FTRS_POWER9 (CPU_FTR_USE_TB | CPU_FTR_LWSYNC | \
@@ -464,7 +464,8 @@ static inline void cpu_feature_keys_init(void) { }
CPU_FTR_STCX_CHECKS_ADDRESS | CPU_FTR_POPCNTB | CPU_FTR_POPCNTD | \
CPU_FTR_CFAR | CPU_FTR_HVMODE | CPU_FTR_VMX_COPY | \
CPU_FTR_DBELL | CPU_FTR_HAS_PPR | CPU_FTR_DAWR | \
- CPU_FTR_ARCH_207S | CPU_FTR_TM_COMP | CPU_FTR_ARCH_300)
+ CPU_FTR_ARCH_207S | CPU_FTR_TM_COMP | CPU_FTR_ARCH_300 | \
+ CPU_FTR_PKEY)
#define CPU_FTRS_POWER9_DD1 ((CPU_FTRS_POWER9 | CPU_FTR_POWER9_DD1) & \
(~CPU_FTR_SAO))
#define CPU_FTRS_POWER9_DD2_0 CPU_FTRS_POWER9
diff --git a/arch/powerpc/include/asm/exception-64e.h b/arch/powerpc/include/asm/exception-64e.h
index a703452d67b6..555e22d5e07f 100644
--- a/arch/powerpc/include/asm/exception-64e.h
+++ b/arch/powerpc/include/asm/exception-64e.h
@@ -209,5 +209,11 @@ exc_##label##_book3e:
ori r3,r3,vector_offset@l; \
mtspr SPRN_IVOR##vector_number,r3;
+#define RFI_TO_KERNEL \
+ rfi
+
+#define RFI_TO_USER \
+ rfi
+
#endif /* _ASM_POWERPC_EXCEPTION_64E_H */
diff --git a/arch/powerpc/include/asm/exception-64s.h b/arch/powerpc/include/asm/exception-64s.h
index b27205297e1d..176dfb73d42c 100644
--- a/arch/powerpc/include/asm/exception-64s.h
+++ b/arch/powerpc/include/asm/exception-64s.h
@@ -74,6 +74,59 @@
*/
#define EX_R3 EX_DAR
+/*
+ * Macros for annotating the expected destination of (h)rfid
+ *
+ * The nop instructions allow us to insert one or more instructions to flush the
+ * L1-D cache when returning to userspace or a guest.
+ */
+#define RFI_FLUSH_SLOT \
+ RFI_FLUSH_FIXUP_SECTION; \
+ nop; \
+ nop; \
+ nop
+
+#define RFI_TO_KERNEL \
+ rfid
+
+#define RFI_TO_USER \
+ RFI_FLUSH_SLOT; \
+ rfid; \
+ b rfi_flush_fallback
+
+#define RFI_TO_USER_OR_KERNEL \
+ RFI_FLUSH_SLOT; \
+ rfid; \
+ b rfi_flush_fallback
+
+#define RFI_TO_GUEST \
+ RFI_FLUSH_SLOT; \
+ rfid; \
+ b rfi_flush_fallback
+
+#define HRFI_TO_KERNEL \
+ hrfid
+
+#define HRFI_TO_USER \
+ RFI_FLUSH_SLOT; \
+ hrfid; \
+ b hrfi_flush_fallback
+
+#define HRFI_TO_USER_OR_KERNEL \
+ RFI_FLUSH_SLOT; \
+ hrfid; \
+ b hrfi_flush_fallback
+
+#define HRFI_TO_GUEST \
+ RFI_FLUSH_SLOT; \
+ hrfid; \
+ b hrfi_flush_fallback
+
+#define HRFI_TO_UNKNOWN \
+ RFI_FLUSH_SLOT; \
+ hrfid; \
+ b hrfi_flush_fallback
+
#ifdef CONFIG_RELOCATABLE
#define __EXCEPTION_RELON_PROLOG_PSERIES_1(label, h) \
mfspr r11,SPRN_##h##SRR0; /* save SRR0 */ \
@@ -198,18 +251,40 @@ END_FTR_SECTION_NESTED(ftr,ftr,943)
std r10,area+EX_R10(r13); /* save r10 - r12 */ \
OPT_GET_SPR(r10, SPRN_CFAR, CPU_FTR_CFAR)
-#define __EXCEPTION_PROLOG_1(area, extra, vec) \
+#define __EXCEPTION_PROLOG_1_PRE(area) \
OPT_SAVE_REG_TO_PACA(area+EX_PPR, r9, CPU_FTR_HAS_PPR); \
OPT_SAVE_REG_TO_PACA(area+EX_CFAR, r10, CPU_FTR_CFAR); \
SAVE_CTR(r10, area); \
- mfcr r9; \
- extra(vec); \
+ mfcr r9;
+
+#define __EXCEPTION_PROLOG_1_POST(area) \
std r11,area+EX_R11(r13); \
std r12,area+EX_R12(r13); \
GET_SCRATCH0(r10); \
std r10,area+EX_R13(r13)
+
+/*
+ * This version of the EXCEPTION_PROLOG_1 will carry
+ * addition parameter called "bitmask" to support
+ * checking of the interrupt maskable level in the SOFTEN_TEST.
+ * Intended to be used in MASKABLE_EXCPETION_* macros.
+ */
+#define MASKABLE_EXCEPTION_PROLOG_1(area, extra, vec, bitmask) \
+ __EXCEPTION_PROLOG_1_PRE(area); \
+ extra(vec, bitmask); \
+ __EXCEPTION_PROLOG_1_POST(area);
+
+/*
+ * This version of the EXCEPTION_PROLOG_1 is intended
+ * to be used in STD_EXCEPTION* macros
+ */
+#define _EXCEPTION_PROLOG_1(area, extra, vec) \
+ __EXCEPTION_PROLOG_1_PRE(area); \
+ extra(vec); \
+ __EXCEPTION_PROLOG_1_POST(area);
+
#define EXCEPTION_PROLOG_1(area, extra, vec) \
- __EXCEPTION_PROLOG_1(area, extra, vec)
+ _EXCEPTION_PROLOG_1(area, extra, vec)
#define __EXCEPTION_PROLOG_PSERIES_1(label, h) \
ld r10,PACAKMSR(r13); /* get MSR value for kernel */ \
@@ -218,7 +293,7 @@ END_FTR_SECTION_NESTED(ftr,ftr,943)
mtspr SPRN_##h##SRR0,r12; \
mfspr r12,SPRN_##h##SRR1; /* and SRR1 */ \
mtspr SPRN_##h##SRR1,r10; \
- h##rfid; \
+ h##RFI_TO_KERNEL; \
b . /* prevent speculative execution */
#define EXCEPTION_PROLOG_PSERIES_1(label, h) \
__EXCEPTION_PROLOG_PSERIES_1(label, h)
@@ -232,7 +307,7 @@ END_FTR_SECTION_NESTED(ftr,ftr,943)
mtspr SPRN_##h##SRR0,r12; \
mfspr r12,SPRN_##h##SRR1; /* and SRR1 */ \
mtspr SPRN_##h##SRR1,r10; \
- h##rfid; \
+ h##RFI_TO_KERNEL; \
b . /* prevent speculative execution */
#define EXCEPTION_PROLOG_PSERIES_1_NORI(label, h) \
@@ -432,7 +507,7 @@ END_FTR_SECTION_NESTED(ftr,ftr,943)
mflr r9; /* Get LR, later save to stack */ \
ld r2,PACATOC(r13); /* get kernel TOC into r2 */ \
std r9,_LINK(r1); \
- lbz r10,PACASOFTIRQEN(r13); \
+ lbz r10,PACAIRQSOFTMASK(r13); \
mfspr r11,SPRN_XER; /* save XER in stackframe */ \
std r10,SOFTE(r1); \
std r11,_XER(r1); \
@@ -496,22 +571,23 @@ END_FTR_SECTION_NESTED(ftr,ftr,943)
#define SOFTEN_VALUE_0xe80 PACA_IRQ_DBELL
#define SOFTEN_VALUE_0xe60 PACA_IRQ_HMI
#define SOFTEN_VALUE_0xea0 PACA_IRQ_EE
+#define SOFTEN_VALUE_0xf00 PACA_IRQ_PMI
-#define __SOFTEN_TEST(h, vec) \
- lbz r10,PACASOFTIRQEN(r13); \
- cmpwi r10,0; \
+#define __SOFTEN_TEST(h, vec, bitmask) \
+ lbz r10,PACAIRQSOFTMASK(r13); \
+ andi. r10,r10,bitmask; \
li r10,SOFTEN_VALUE_##vec; \
- beq masked_##h##interrupt
+ bne masked_##h##interrupt
-#define _SOFTEN_TEST(h, vec) __SOFTEN_TEST(h, vec)
+#define _SOFTEN_TEST(h, vec, bitmask) __SOFTEN_TEST(h, vec, bitmask)
-#define SOFTEN_TEST_PR(vec) \
+#define SOFTEN_TEST_PR(vec, bitmask) \
KVMTEST(EXC_STD, vec); \
- _SOFTEN_TEST(EXC_STD, vec)
+ _SOFTEN_TEST(EXC_STD, vec, bitmask)
-#define SOFTEN_TEST_HV(vec) \
+#define SOFTEN_TEST_HV(vec, bitmask) \
KVMTEST(EXC_HV, vec); \
- _SOFTEN_TEST(EXC_HV, vec)
+ _SOFTEN_TEST(EXC_HV, vec, bitmask)
#define KVMTEST_PR(vec) \
KVMTEST(EXC_STD, vec)
@@ -519,53 +595,57 @@ END_FTR_SECTION_NESTED(ftr,ftr,943)
#define KVMTEST_HV(vec) \
KVMTEST(EXC_HV, vec)
-#define SOFTEN_NOTEST_PR(vec) _SOFTEN_TEST(EXC_STD, vec)
-#define SOFTEN_NOTEST_HV(vec) _SOFTEN_TEST(EXC_HV, vec)
+#define SOFTEN_NOTEST_PR(vec, bitmask) _SOFTEN_TEST(EXC_STD, vec, bitmask)
+#define SOFTEN_NOTEST_HV(vec, bitmask) _SOFTEN_TEST(EXC_HV, vec, bitmask)
-#define __MASKABLE_EXCEPTION_PSERIES(vec, label, h, extra) \
+#define __MASKABLE_EXCEPTION_PSERIES(vec, label, h, extra, bitmask) \
SET_SCRATCH0(r13); /* save r13 */ \
EXCEPTION_PROLOG_0(PACA_EXGEN); \
- __EXCEPTION_PROLOG_1(PACA_EXGEN, extra, vec); \
+ MASKABLE_EXCEPTION_PROLOG_1(PACA_EXGEN, extra, vec, bitmask); \
EXCEPTION_PROLOG_PSERIES_1(label, h);
-#define _MASKABLE_EXCEPTION_PSERIES(vec, label, h, extra) \
- __MASKABLE_EXCEPTION_PSERIES(vec, label, h, extra)
+#define _MASKABLE_EXCEPTION_PSERIES(vec, label, h, extra, bitmask) \
+ __MASKABLE_EXCEPTION_PSERIES(vec, label, h, extra, bitmask)
-#define MASKABLE_EXCEPTION_PSERIES(loc, vec, label) \
+#define MASKABLE_EXCEPTION_PSERIES(loc, vec, label, bitmask) \
_MASKABLE_EXCEPTION_PSERIES(vec, label, \
- EXC_STD, SOFTEN_TEST_PR)
+ EXC_STD, SOFTEN_TEST_PR, bitmask)
-#define MASKABLE_EXCEPTION_PSERIES_OOL(vec, label) \
- EXCEPTION_PROLOG_1(PACA_EXGEN, SOFTEN_TEST_PR, vec); \
+#define MASKABLE_EXCEPTION_PSERIES_OOL(vec, label, bitmask) \
+ MASKABLE_EXCEPTION_PROLOG_1(PACA_EXGEN, SOFTEN_TEST_PR, vec, bitmask);\
EXCEPTION_PROLOG_PSERIES_1(label, EXC_STD)
-#define MASKABLE_EXCEPTION_HV(loc, vec, label) \
+#define MASKABLE_EXCEPTION_HV(loc, vec, label, bitmask) \
_MASKABLE_EXCEPTION_PSERIES(vec, label, \
- EXC_HV, SOFTEN_TEST_HV)
+ EXC_HV, SOFTEN_TEST_HV, bitmask)
-#define MASKABLE_EXCEPTION_HV_OOL(vec, label) \
- EXCEPTION_PROLOG_1(PACA_EXGEN, SOFTEN_TEST_HV, vec); \
+#define MASKABLE_EXCEPTION_HV_OOL(vec, label, bitmask) \
+ MASKABLE_EXCEPTION_PROLOG_1(PACA_EXGEN, SOFTEN_TEST_HV, vec, bitmask);\
EXCEPTION_PROLOG_PSERIES_1(label, EXC_HV)
-#define __MASKABLE_RELON_EXCEPTION_PSERIES(vec, label, h, extra) \
+#define __MASKABLE_RELON_EXCEPTION_PSERIES(vec, label, h, extra, bitmask) \
SET_SCRATCH0(r13); /* save r13 */ \
EXCEPTION_PROLOG_0(PACA_EXGEN); \
- __EXCEPTION_PROLOG_1(PACA_EXGEN, extra, vec); \
+ MASKABLE_EXCEPTION_PROLOG_1(PACA_EXGEN, extra, vec, bitmask); \
EXCEPTION_RELON_PROLOG_PSERIES_1(label, h)
-#define _MASKABLE_RELON_EXCEPTION_PSERIES(vec, label, h, extra) \
- __MASKABLE_RELON_EXCEPTION_PSERIES(vec, label, h, extra)
+#define _MASKABLE_RELON_EXCEPTION_PSERIES(vec, label, h, extra, bitmask)\
+ __MASKABLE_RELON_EXCEPTION_PSERIES(vec, label, h, extra, bitmask)
-#define MASKABLE_RELON_EXCEPTION_PSERIES(loc, vec, label) \
+#define MASKABLE_RELON_EXCEPTION_PSERIES(loc, vec, label, bitmask) \
_MASKABLE_RELON_EXCEPTION_PSERIES(vec, label, \
- EXC_STD, SOFTEN_NOTEST_PR)
+ EXC_STD, SOFTEN_NOTEST_PR, bitmask)
+
+#define MASKABLE_RELON_EXCEPTION_PSERIES_OOL(vec, label, bitmask) \
+ MASKABLE_EXCEPTION_PROLOG_1(PACA_EXGEN, SOFTEN_NOTEST_PR, vec, bitmask);\
+ EXCEPTION_PROLOG_PSERIES_1(label, EXC_STD);
-#define MASKABLE_RELON_EXCEPTION_HV(loc, vec, label) \
+#define MASKABLE_RELON_EXCEPTION_HV(loc, vec, label, bitmask) \
_MASKABLE_RELON_EXCEPTION_PSERIES(vec, label, \
- EXC_HV, SOFTEN_TEST_HV)
+ EXC_HV, SOFTEN_TEST_HV, bitmask)
-#define MASKABLE_RELON_EXCEPTION_HV_OOL(vec, label) \
- EXCEPTION_PROLOG_1(PACA_EXGEN, SOFTEN_TEST_HV, vec); \
+#define MASKABLE_RELON_EXCEPTION_HV_OOL(vec, label, bitmask) \
+ MASKABLE_EXCEPTION_PROLOG_1(PACA_EXGEN, SOFTEN_NOTEST_HV, vec, bitmask);\
EXCEPTION_RELON_PROLOG_PSERIES_1(label, EXC_HV)
/*
diff --git a/arch/powerpc/include/asm/feature-fixups.h b/arch/powerpc/include/asm/feature-fixups.h
index 8f88f771cc55..1e82eb3caabd 100644
--- a/arch/powerpc/include/asm/feature-fixups.h
+++ b/arch/powerpc/include/asm/feature-fixups.h
@@ -187,7 +187,20 @@ label##3: \
FTR_ENTRY_OFFSET label##1b-label##3b; \
.popsection;
+#define RFI_FLUSH_FIXUP_SECTION \
+951: \
+ .pushsection __rfi_flush_fixup,"a"; \
+ .align 2; \
+952: \
+ FTR_ENTRY_OFFSET 951b-952b; \
+ .popsection;
+
+
#ifndef __ASSEMBLY__
+#include <linux/types.h>
+
+extern long __start___rfi_flush_fixup, __stop___rfi_flush_fixup;
+
void apply_feature_fixups(void);
void setup_feature_keys(void);
#endif
diff --git a/arch/powerpc/include/asm/firmware.h b/arch/powerpc/include/asm/firmware.h
index 832df61f30ef..511acfd7ab0d 100644
--- a/arch/powerpc/include/asm/firmware.h
+++ b/arch/powerpc/include/asm/firmware.h
@@ -52,6 +52,7 @@
#define FW_FEATURE_TYPE1_AFFINITY ASM_CONST(0x0000000100000000)
#define FW_FEATURE_PRRN ASM_CONST(0x0000000200000000)
#define FW_FEATURE_DRMEM_V2 ASM_CONST(0x0000000400000000)
+#define FW_FEATURE_DRC_INFO ASM_CONST(0x0000000400000000)
#ifndef __ASSEMBLY__
@@ -68,7 +69,8 @@ enum {
FW_FEATURE_CMO | FW_FEATURE_VPHN | FW_FEATURE_XCMO |
FW_FEATURE_SET_MODE | FW_FEATURE_BEST_ENERGY |
FW_FEATURE_TYPE1_AFFINITY | FW_FEATURE_PRRN |
- FW_FEATURE_HPT_RESIZE | FW_FEATURE_DRMEM_V2,
+ FW_FEATURE_HPT_RESIZE | FW_FEATURE_DRMEM_V2 |
+ FW_FEATURE_DRC_INFO,
FW_FEATURE_PSERIES_ALWAYS = 0,
FW_FEATURE_POWERNV_POSSIBLE = FW_FEATURE_OPAL,
FW_FEATURE_POWERNV_ALWAYS = 0,
diff --git a/arch/powerpc/include/asm/head-64.h b/arch/powerpc/include/asm/head-64.h
index 0a663dfc28b5..7e0e93f24cb7 100644
--- a/arch/powerpc/include/asm/head-64.h
+++ b/arch/powerpc/include/asm/head-64.h
@@ -268,14 +268,14 @@ name:
STD_RELON_EXCEPTION_PSERIES(start, realvec, name##_common); \
EXC_VIRT_END(name, start, size);
-#define EXC_REAL_MASKABLE(name, start, size) \
+#define EXC_REAL_MASKABLE(name, start, size, bitmask) \
EXC_REAL_BEGIN(name, start, size); \
- MASKABLE_EXCEPTION_PSERIES(start, start, name##_common); \
+ MASKABLE_EXCEPTION_PSERIES(start, start, name##_common, bitmask);\
EXC_REAL_END(name, start, size);
-#define EXC_VIRT_MASKABLE(name, start, size, realvec) \
+#define EXC_VIRT_MASKABLE(name, start, size, realvec, bitmask) \
EXC_VIRT_BEGIN(name, start, size); \
- MASKABLE_RELON_EXCEPTION_PSERIES(start, realvec, name##_common); \
+ MASKABLE_RELON_EXCEPTION_PSERIES(start, realvec, name##_common, bitmask);\
EXC_VIRT_END(name, start, size);
#define EXC_REAL_HV(name, start, size) \
@@ -304,13 +304,13 @@ name:
#define __EXC_REAL_OOL_MASKABLE(name, start, size) \
__EXC_REAL_OOL(name, start, size);
-#define __TRAMP_REAL_OOL_MASKABLE(name, vec) \
+#define __TRAMP_REAL_OOL_MASKABLE(name, vec, bitmask) \
TRAMP_REAL_BEGIN(tramp_real_##name); \
- MASKABLE_EXCEPTION_PSERIES_OOL(vec, name##_common); \
+ MASKABLE_EXCEPTION_PSERIES_OOL(vec, name##_common, bitmask); \
-#define EXC_REAL_OOL_MASKABLE(name, start, size) \
+#define EXC_REAL_OOL_MASKABLE(name, start, size, bitmask) \
__EXC_REAL_OOL_MASKABLE(name, start, size); \
- __TRAMP_REAL_OOL_MASKABLE(name, start);
+ __TRAMP_REAL_OOL_MASKABLE(name, start, bitmask);
#define __EXC_REAL_OOL_HV_DIRECT(name, start, size, handler) \
EXC_REAL_BEGIN(name, start, size); \
@@ -331,13 +331,13 @@ name:
#define __EXC_REAL_OOL_MASKABLE_HV(name, start, size) \
__EXC_REAL_OOL(name, start, size);
-#define __TRAMP_REAL_OOL_MASKABLE_HV(name, vec) \
+#define __TRAMP_REAL_OOL_MASKABLE_HV(name, vec, bitmask) \
TRAMP_REAL_BEGIN(tramp_real_##name); \
- MASKABLE_EXCEPTION_HV_OOL(vec, name##_common); \
+ MASKABLE_EXCEPTION_HV_OOL(vec, name##_common, bitmask); \
-#define EXC_REAL_OOL_MASKABLE_HV(name, start, size) \
+#define EXC_REAL_OOL_MASKABLE_HV(name, start, size, bitmask) \
__EXC_REAL_OOL_MASKABLE_HV(name, start, size); \
- __TRAMP_REAL_OOL_MASKABLE_HV(name, start);
+ __TRAMP_REAL_OOL_MASKABLE_HV(name, start, bitmask);
#define __EXC_VIRT_OOL(name, start, size) \
EXC_VIRT_BEGIN(name, start, size); \
@@ -355,13 +355,13 @@ name:
#define __EXC_VIRT_OOL_MASKABLE(name, start, size) \
__EXC_VIRT_OOL(name, start, size);
-#define __TRAMP_VIRT_OOL_MASKABLE(name, realvec) \
+#define __TRAMP_VIRT_OOL_MASKABLE(name, realvec, bitmask) \
TRAMP_VIRT_BEGIN(tramp_virt_##name); \
- MASKABLE_RELON_EXCEPTION_PSERIES_OOL(realvec, name##_common); \
+ MASKABLE_RELON_EXCEPTION_PSERIES_OOL(realvec, name##_common, bitmask);\
-#define EXC_VIRT_OOL_MASKABLE(name, start, size, realvec) \
+#define EXC_VIRT_OOL_MASKABLE(name, start, size, realvec, bitmask) \
__EXC_VIRT_OOL_MASKABLE(name, start, size); \
- __TRAMP_VIRT_OOL_MASKABLE(name, realvec);
+ __TRAMP_VIRT_OOL_MASKABLE(name, realvec, bitmask);
#define __EXC_VIRT_OOL_HV(name, start, size) \
__EXC_VIRT_OOL(name, start, size);
@@ -377,13 +377,13 @@ name:
#define __EXC_VIRT_OOL_MASKABLE_HV(name, start, size) \
__EXC_VIRT_OOL(name, start, size);
-#define __TRAMP_VIRT_OOL_MASKABLE_HV(name, realvec) \
+#define __TRAMP_VIRT_OOL_MASKABLE_HV(name, realvec, bitmask) \
TRAMP_VIRT_BEGIN(tramp_virt_##name); \
- MASKABLE_RELON_EXCEPTION_HV_OOL(realvec, name##_common); \
+ MASKABLE_RELON_EXCEPTION_HV_OOL(realvec, name##_common, bitmask);\
-#define EXC_VIRT_OOL_MASKABLE_HV(name, start, size, realvec) \
+#define EXC_VIRT_OOL_MASKABLE_HV(name, start, size, realvec, bitmask) \
__EXC_VIRT_OOL_MASKABLE_HV(name, start, size); \
- __TRAMP_VIRT_OOL_MASKABLE_HV(name, realvec);
+ __TRAMP_VIRT_OOL_MASKABLE_HV(name, realvec, bitmask);
#define TRAMP_KVM(area, n) \
TRAMP_KVM_BEGIN(do_kvm_##n); \
diff --git a/arch/powerpc/include/asm/hmi.h b/arch/powerpc/include/asm/hmi.h
index 85b7a1a21e22..9c14f7b5c46c 100644
--- a/arch/powerpc/include/asm/hmi.h
+++ b/arch/powerpc/include/asm/hmi.h
@@ -42,4 +42,8 @@ extern void wait_for_tb_resync(void);
static inline void wait_for_subcore_guest_exit(void) { }
static inline void wait_for_tb_resync(void) { }
#endif
+
+struct pt_regs;
+extern long hmi_handle_debugtrig(struct pt_regs *regs);
+
#endif /* __ASM_PPC64_HMI_H__ */
diff --git a/arch/powerpc/include/asm/hvcall.h b/arch/powerpc/include/asm/hvcall.h
index a409177be8bd..eca3f9c68907 100644
--- a/arch/powerpc/include/asm/hvcall.h
+++ b/arch/powerpc/include/asm/hvcall.h
@@ -241,6 +241,7 @@
#define H_GET_HCA_INFO 0x1B8
#define H_GET_PERF_COUNT 0x1BC
#define H_MANAGE_TRACE 0x1C0
+#define H_GET_CPU_CHARACTERISTICS 0x1C8
#define H_FREE_LOGICAL_LAN_BUFFER 0x1D4
#define H_QUERY_INT_STATE 0x1E4
#define H_POLL_PENDING 0x1D8
@@ -330,6 +331,17 @@
#define H_SIGNAL_SYS_RESET_ALL_OTHERS -2
/* >= 0 values are CPU number */
+/* H_GET_CPU_CHARACTERISTICS return values */
+#define H_CPU_CHAR_SPEC_BAR_ORI31 (1ull << 63) // IBM bit 0
+#define H_CPU_CHAR_BCCTRL_SERIALISED (1ull << 62) // IBM bit 1
+#define H_CPU_CHAR_L1D_FLUSH_ORI30 (1ull << 61) // IBM bit 2
+#define H_CPU_CHAR_L1D_FLUSH_TRIG2 (1ull << 60) // IBM bit 3
+#define H_CPU_CHAR_L1D_THREAD_PRIV (1ull << 59) // IBM bit 4
+
+#define H_CPU_BEHAV_FAVOUR_SECURITY (1ull << 63) // IBM bit 0
+#define H_CPU_BEHAV_L1D_FLUSH_PR (1ull << 62) // IBM bit 1
+#define H_CPU_BEHAV_BNDS_CHK_SPEC_BAR (1ull << 61) // IBM bit 2
+
/* Flag values used in H_REGISTER_PROC_TBL hcall */
#define PROC_TABLE_OP_MASK 0x18
#define PROC_TABLE_DEREG 0x10
@@ -341,6 +353,7 @@
#define PROC_TABLE_GTSE 0x01
#ifndef __ASSEMBLY__
+#include <linux/types.h>
/**
* plpar_hcall_norets: - Make a pseries hypervisor call with no return arguments
@@ -436,6 +449,11 @@ static inline unsigned int get_longbusy_msecs(int longbusy_rc)
}
}
+struct h_cpu_char_result {
+ u64 character;
+ u64 behaviour;
+};
+
#endif /* __ASSEMBLY__ */
#endif /* __KERNEL__ */
#endif /* _ASM_POWERPC_HVCALL_H */
diff --git a/arch/powerpc/include/asm/hw_irq.h b/arch/powerpc/include/asm/hw_irq.h
index 3818fa0164f0..88e5e8f17e98 100644
--- a/arch/powerpc/include/asm/hw_irq.h
+++ b/arch/powerpc/include/asm/hw_irq.h
@@ -27,6 +27,15 @@
#define PACA_IRQ_DEC 0x08 /* Or FIT */
#define PACA_IRQ_EE_EDGE 0x10 /* BookE only */
#define PACA_IRQ_HMI 0x20
+#define PACA_IRQ_PMI 0x40
+
+/*
+ * flags for paca->irq_soft_mask
+ */
+#define IRQS_ENABLED 0
+#define IRQS_DISABLED 1 /* local_irq_disable() interrupts */
+#define IRQS_PMI_DISABLED 2
+#define IRQS_ALL_DISABLED (IRQS_DISABLED | IRQS_PMI_DISABLED)
#endif /* CONFIG_PPC64 */
@@ -43,46 +52,112 @@ extern void unknown_exception(struct pt_regs *regs);
#ifdef CONFIG_PPC64
#include <asm/paca.h>
-static inline unsigned long arch_local_save_flags(void)
+static inline notrace unsigned long irq_soft_mask_return(void)
{
unsigned long flags;
asm volatile(
"lbz %0,%1(13)"
: "=r" (flags)
- : "i" (offsetof(struct paca_struct, soft_enabled)));
+ : "i" (offsetof(struct paca_struct, irq_soft_mask)));
+
+ return flags;
+}
+
+/*
+ * The "memory" clobber acts as both a compiler barrier
+ * for the critical section and as a clobber because
+ * we changed paca->irq_soft_mask
+ */
+static inline notrace void irq_soft_mask_set(unsigned long mask)
+{
+#ifdef CONFIG_PPC_IRQ_SOFT_MASK_DEBUG
+ /*
+ * The irq mask must always include the STD bit if any are set.
+ *
+ * and interrupts don't get replayed until the standard
+ * interrupt (local_irq_disable()) is unmasked.
+ *
+ * Other masks must only provide additional masking beyond
+ * the standard, and they are also not replayed until the
+ * standard interrupt becomes unmasked.
+ *
+ * This could be changed, but it will require partial
+ * unmasks to be replayed, among other things. For now, take
+ * the simple approach.
+ */
+ WARN_ON(mask && !(mask & IRQS_DISABLED));
+#endif
+
+ asm volatile(
+ "stb %0,%1(13)"
+ :
+ : "r" (mask),
+ "i" (offsetof(struct paca_struct, irq_soft_mask))
+ : "memory");
+}
+
+static inline notrace unsigned long irq_soft_mask_set_return(unsigned long mask)
+{
+ unsigned long flags;
+
+#ifdef CONFIG_PPC_IRQ_SOFT_MASK_DEBUG
+ WARN_ON(mask && !(mask & IRQS_DISABLED));
+#endif
+
+ asm volatile(
+ "lbz %0,%1(13); stb %2,%1(13)"
+ : "=&r" (flags)
+ : "i" (offsetof(struct paca_struct, irq_soft_mask)),
+ "r" (mask)
+ : "memory");
return flags;
}
-static inline unsigned long arch_local_irq_disable(void)
+static inline notrace unsigned long irq_soft_mask_or_return(unsigned long mask)
{
- unsigned long flags, zero;
+ unsigned long flags, tmp;
asm volatile(
- "li %1,0; lbz %0,%2(13); stb %1,%2(13)"
- : "=r" (flags), "=&r" (zero)
- : "i" (offsetof(struct paca_struct, soft_enabled))
+ "lbz %0,%2(13); or %1,%0,%3; stb %1,%2(13)"
+ : "=&r" (flags), "=r" (tmp)
+ : "i" (offsetof(struct paca_struct, irq_soft_mask)),
+ "r" (mask)
: "memory");
+#ifdef CONFIG_PPC_IRQ_SOFT_MASK_DEBUG
+ WARN_ON((mask | flags) && !((mask | flags) & IRQS_DISABLED));
+#endif
+
return flags;
}
+static inline unsigned long arch_local_save_flags(void)
+{
+ return irq_soft_mask_return();
+}
+
+static inline void arch_local_irq_disable(void)
+{
+ irq_soft_mask_set(IRQS_DISABLED);
+}
+
extern void arch_local_irq_restore(unsigned long);
static inline void arch_local_irq_enable(void)
{
- arch_local_irq_restore(1);
+ arch_local_irq_restore(IRQS_ENABLED);
}
static inline unsigned long arch_local_irq_save(void)
{
- return arch_local_irq_disable();
+ return irq_soft_mask_set_return(IRQS_DISABLED);
}
static inline bool arch_irqs_disabled_flags(unsigned long flags)
{
- return flags == 0;
+ return flags & IRQS_DISABLED;
}
static inline bool arch_irqs_disabled(void)
@@ -90,6 +165,55 @@ static inline bool arch_irqs_disabled(void)
return arch_irqs_disabled_flags(arch_local_save_flags());
}
+#ifdef CONFIG_PPC_BOOK3S
+/*
+ * To support disabling and enabling of irq with PMI, set of
+ * new powerpc_local_irq_pmu_save() and powerpc_local_irq_restore()
+ * functions are added. These macros are implemented using generic
+ * linux local_irq_* code from include/linux/irqflags.h.
+ */
+#define raw_local_irq_pmu_save(flags) \
+ do { \
+ typecheck(unsigned long, flags); \
+ flags = irq_soft_mask_or_return(IRQS_DISABLED | \
+ IRQS_PMI_DISABLED); \
+ } while(0)
+
+#define raw_local_irq_pmu_restore(flags) \
+ do { \
+ typecheck(unsigned long, flags); \
+ arch_local_irq_restore(flags); \
+ } while(0)
+
+#ifdef CONFIG_TRACE_IRQFLAGS
+#define powerpc_local_irq_pmu_save(flags) \
+ do { \
+ raw_local_irq_pmu_save(flags); \
+ trace_hardirqs_off(); \
+ } while(0)
+#define powerpc_local_irq_pmu_restore(flags) \
+ do { \
+ if (raw_irqs_disabled_flags(flags)) { \
+ raw_local_irq_pmu_restore(flags); \
+ trace_hardirqs_off(); \
+ } else { \
+ trace_hardirqs_on(); \
+ raw_local_irq_pmu_restore(flags); \
+ } \
+ } while(0)
+#else
+#define powerpc_local_irq_pmu_save(flags) \
+ do { \
+ raw_local_irq_pmu_save(flags); \
+ } while(0)
+#define powerpc_local_irq_pmu_restore(flags) \
+ do { \
+ raw_local_irq_pmu_restore(flags); \
+ } while (0)
+#endif /* CONFIG_TRACE_IRQFLAGS */
+
+#endif /* CONFIG_PPC_BOOK3S */
+
#ifdef CONFIG_PPC_BOOK3E
#define __hard_irq_enable() asm volatile("wrteei 1" : : : "memory")
#define __hard_irq_disable() asm volatile("wrteei 0" : : : "memory")
@@ -98,14 +222,13 @@ static inline bool arch_irqs_disabled(void)
#define __hard_irq_disable() __mtmsrd(local_paca->kernel_msr, 1)
#endif
-#define hard_irq_disable() do { \
- u8 _was_enabled; \
- __hard_irq_disable(); \
- _was_enabled = local_paca->soft_enabled; \
- local_paca->soft_enabled = 0; \
- local_paca->irq_happened |= PACA_IRQ_HARD_DIS; \
- if (_was_enabled) \
- trace_hardirqs_off(); \
+#define hard_irq_disable() do { \
+ unsigned long flags; \
+ __hard_irq_disable(); \
+ flags = irq_soft_mask_set_return(IRQS_ALL_DISABLED); \
+ local_paca->irq_happened |= PACA_IRQ_HARD_DIS; \
+ if (!arch_irqs_disabled_flags(flags)) \
+ trace_hardirqs_off(); \
} while(0)
static inline bool lazy_irq_pending(void)
@@ -127,7 +250,7 @@ static inline void may_hard_irq_enable(void)
static inline bool arch_irq_disabled_regs(struct pt_regs *regs)
{
- return !regs->softe;
+ return (regs->softe & IRQS_DISABLED);
}
extern bool prep_irq_for_idle(void);
diff --git a/arch/powerpc/include/asm/imc-pmu.h b/arch/powerpc/include/asm/imc-pmu.h
index fad0e6ff460f..d76cb11be3e3 100644
--- a/arch/powerpc/include/asm/imc-pmu.h
+++ b/arch/powerpc/include/asm/imc-pmu.h
@@ -35,6 +35,13 @@
#define THREAD_IMC_ENABLE 0x8000000000000000ULL
/*
+ * For debugfs interface for imc-mode and imc-command
+ */
+#define IMC_CNTL_BLK_OFFSET 0x3FC00
+#define IMC_CNTL_BLK_CMD_OFFSET 8
+#define IMC_CNTL_BLK_MODE_OFFSET 32
+
+/*
* Structure to hold memory address information for imc units.
*/
struct imc_mem_info {
@@ -71,7 +78,7 @@ struct imc_events {
struct imc_pmu {
struct pmu pmu;
struct imc_mem_info *mem_info;
- struct imc_events **events;
+ struct imc_events *events;
/*
* Attribute groups for the PMU. Slot 0 used for
* format attribute, slot 1 used for cpusmask attribute,
diff --git a/arch/powerpc/include/asm/irqflags.h b/arch/powerpc/include/asm/irqflags.h
index 1aeb5f13b8c4..1a6c1ce17735 100644
--- a/arch/powerpc/include/asm/irqflags.h
+++ b/arch/powerpc/include/asm/irqflags.h
@@ -47,14 +47,14 @@
* be clobbered.
*/
#define RECONCILE_IRQ_STATE(__rA, __rB) \
- lbz __rA,PACASOFTIRQEN(r13); \
+ lbz __rA,PACAIRQSOFTMASK(r13); \
lbz __rB,PACAIRQHAPPENED(r13); \
- cmpwi cr0,__rA,0; \
- li __rA,0; \
+ andi. __rA,__rA,IRQS_DISABLED; \
+ li __rA,IRQS_DISABLED; \
ori __rB,__rB,PACA_IRQ_HARD_DIS; \
stb __rB,PACAIRQHAPPENED(r13); \
- beq 44f; \
- stb __rA,PACASOFTIRQEN(r13); \
+ bne 44f; \
+ stb __rA,PACAIRQSOFTMASK(r13); \
TRACE_DISABLE_INTS; \
44:
@@ -64,9 +64,9 @@
#define RECONCILE_IRQ_STATE(__rA, __rB) \
lbz __rA,PACAIRQHAPPENED(r13); \
- li __rB,0; \
+ li __rB,IRQS_DISABLED; \
ori __rA,__rA,PACA_IRQ_HARD_DIS; \
- stb __rB,PACASOFTIRQEN(r13); \
+ stb __rB,PACAIRQSOFTMASK(r13); \
stb __rA,PACAIRQHAPPENED(r13)
#endif
#endif
diff --git a/arch/powerpc/include/asm/kvm_ppc.h b/arch/powerpc/include/asm/kvm_ppc.h
index 941c2a3f231b..9db18287b5f4 100644
--- a/arch/powerpc/include/asm/kvm_ppc.h
+++ b/arch/powerpc/include/asm/kvm_ppc.h
@@ -873,7 +873,7 @@ static inline void kvmppc_fix_ee_before_entry(void)
/* Only need to enable IRQs by hard enabling them after this */
local_paca->irq_happened = 0;
- local_paca->soft_enabled = 1;
+ irq_soft_mask_set(IRQS_ENABLED);
#endif
}
diff --git a/arch/powerpc/include/asm/local.h b/arch/powerpc/include/asm/local.h
index 600a68bd77f5..fdd00939270b 100644
--- a/arch/powerpc/include/asm/local.h
+++ b/arch/powerpc/include/asm/local.h
@@ -2,76 +2,64 @@
#ifndef _ARCH_POWERPC_LOCAL_H
#define _ARCH_POWERPC_LOCAL_H
+#ifdef CONFIG_PPC_BOOK3S_64
+
#include <linux/percpu.h>
#include <linux/atomic.h>
+#include <linux/irqflags.h>
+
+#include <asm/hw_irq.h>
typedef struct
{
- atomic_long_t a;
+ long v;
} local_t;
-#define LOCAL_INIT(i) { ATOMIC_LONG_INIT(i) }
-
-#define local_read(l) atomic_long_read(&(l)->a)
-#define local_set(l,i) atomic_long_set(&(l)->a, (i))
+#define LOCAL_INIT(i) { (i) }
-#define local_add(i,l) atomic_long_add((i),(&(l)->a))
-#define local_sub(i,l) atomic_long_sub((i),(&(l)->a))
-#define local_inc(l) atomic_long_inc(&(l)->a)
-#define local_dec(l) atomic_long_dec(&(l)->a)
-
-static __inline__ long local_add_return(long a, local_t *l)
+static __inline__ long local_read(local_t *l)
{
- long t;
-
- __asm__ __volatile__(
-"1:" PPC_LLARX(%0,0,%2,0) " # local_add_return\n\
- add %0,%1,%0\n"
- PPC405_ERR77(0,%2)
- PPC_STLCX "%0,0,%2 \n\
- bne- 1b"
- : "=&r" (t)
- : "r" (a), "r" (&(l->a.counter))
- : "cc", "memory");
-
- return t;
+ return READ_ONCE(l->v);
}
-#define local_add_negative(a, l) (local_add_return((a), (l)) < 0)
-
-static __inline__ long local_sub_return(long a, local_t *l)
+static __inline__ void local_set(local_t *l, long i)
{
- long t;
+ WRITE_ONCE(l->v, i);
+}
- __asm__ __volatile__(
-"1:" PPC_LLARX(%0,0,%2,0) " # local_sub_return\n\
- subf %0,%1,%0\n"
- PPC405_ERR77(0,%2)
- PPC_STLCX "%0,0,%2 \n\
- bne- 1b"
- : "=&r" (t)
- : "r" (a), "r" (&(l->a.counter))
- : "cc", "memory");
+#define LOCAL_OP(op, c_op) \
+static __inline__ void local_##op(long i, local_t *l) \
+{ \
+ unsigned long flags; \
+ \
+ powerpc_local_irq_pmu_save(flags); \
+ l->v c_op i; \
+ powerpc_local_irq_pmu_restore(flags); \
+}
- return t;
+#define LOCAL_OP_RETURN(op, c_op) \
+static __inline__ long local_##op##_return(long a, local_t *l) \
+{ \
+ long t; \
+ unsigned long flags; \
+ \
+ powerpc_local_irq_pmu_save(flags); \
+ t = (l->v c_op a); \
+ powerpc_local_irq_pmu_restore(flags); \
+ \
+ return t; \
}
-static __inline__ long local_inc_return(local_t *l)
-{
- long t;
+#define LOCAL_OPS(op, c_op) \
+ LOCAL_OP(op, c_op) \
+ LOCAL_OP_RETURN(op, c_op)
- __asm__ __volatile__(
-"1:" PPC_LLARX(%0,0,%1,0) " # local_inc_return\n\
- addic %0,%0,1\n"
- PPC405_ERR77(0,%1)
- PPC_STLCX "%0,0,%1 \n\
- bne- 1b"
- : "=&r" (t)
- : "r" (&(l->a.counter))
- : "cc", "xer", "memory");
+LOCAL_OPS(add, +=)
+LOCAL_OPS(sub, -=)
- return t;
-}
+#define local_add_negative(a, l) (local_add_return((a), (l)) < 0)
+#define local_inc_return(l) local_add_return(1LL, l)
+#define local_inc(l) local_inc_return(l)
/*
* local_inc_and_test - increment and test
@@ -81,28 +69,39 @@ static __inline__ long local_inc_return(local_t *l)
* and returns true if the result is zero, or false for all
* other cases.
*/
-#define local_inc_and_test(l) (local_inc_return(l) == 0)
+#define local_inc_and_test(l) (local_inc_return(l) == 0)
-static __inline__ long local_dec_return(local_t *l)
+#define local_dec_return(l) local_sub_return(1LL, l)
+#define local_dec(l) local_dec_return(l)
+#define local_sub_and_test(a, l) (local_sub_return((a), (l)) == 0)
+#define local_dec_and_test(l) (local_dec_return((l)) == 0)
+
+static __inline__ long local_cmpxchg(local_t *l, long o, long n)
{
long t;
+ unsigned long flags;
- __asm__ __volatile__(
-"1:" PPC_LLARX(%0,0,%1,0) " # local_dec_return\n\
- addic %0,%0,-1\n"
- PPC405_ERR77(0,%1)
- PPC_STLCX "%0,0,%1\n\
- bne- 1b"
- : "=&r" (t)
- : "r" (&(l->a.counter))
- : "cc", "xer", "memory");
+ powerpc_local_irq_pmu_save(flags);
+ t = l->v;
+ if (t == o)
+ l->v = n;
+ powerpc_local_irq_pmu_restore(flags);
return t;
}
-#define local_cmpxchg(l, o, n) \
- (cmpxchg_local(&((l)->a.counter), (o), (n)))
-#define local_xchg(l, n) (xchg_local(&((l)->a.counter), (n)))
+static __inline__ long local_xchg(local_t *l, long n)
+{
+ long t;
+ unsigned long flags;
+
+ powerpc_local_irq_pmu_save(flags);
+ t = l->v;
+ l->v = n;
+ powerpc_local_irq_pmu_restore(flags);
+
+ return t;
+}
/**
* local_add_unless - add unless the number is a given value
@@ -115,62 +114,35 @@ static __inline__ long local_dec_return(local_t *l)
*/
static __inline__ int local_add_unless(local_t *l, long a, long u)
{
- long t;
-
- __asm__ __volatile__ (
-"1:" PPC_LLARX(%0,0,%1,0) " # local_add_unless\n\
- cmpw 0,%0,%3 \n\
- beq- 2f \n\
- add %0,%2,%0 \n"
- PPC405_ERR77(0,%2)
- PPC_STLCX "%0,0,%1 \n\
- bne- 1b \n"
-" subf %0,%2,%0 \n\
-2:"
- : "=&r" (t)
- : "r" (&(l->a.counter)), "r" (a), "r" (u)
- : "cc", "memory");
-
- return t != u;
-}
-
-#define local_inc_not_zero(l) local_add_unless((l), 1, 0)
-
-#define local_sub_and_test(a, l) (local_sub_return((a), (l)) == 0)
-#define local_dec_and_test(l) (local_dec_return((l)) == 0)
-
-/*
- * Atomically test *l and decrement if it is greater than 0.
- * The function returns the old value of *l minus 1.
- */
-static __inline__ long local_dec_if_positive(local_t *l)
-{
- long t;
+ unsigned long flags;
+ int ret = 0;
- __asm__ __volatile__(
-"1:" PPC_LLARX(%0,0,%1,0) " # local_dec_if_positive\n\
- cmpwi %0,1\n\
- addi %0,%0,-1\n\
- blt- 2f\n"
- PPC405_ERR77(0,%1)
- PPC_STLCX "%0,0,%1\n\
- bne- 1b"
- "\n\
-2:" : "=&b" (t)
- : "r" (&(l->a.counter))
- : "cc", "memory");
+ powerpc_local_irq_pmu_save(flags);
+ if (l->v != u) {
+ l->v += a;
+ ret = 1;
+ }
+ powerpc_local_irq_pmu_restore(flags);
- return t;
+ return ret;
}
+#define local_inc_not_zero(l) local_add_unless((l), 1, 0)
+
/* Use these for per-cpu local_t variables: on some archs they are
* much more efficient than these naive implementations. Note they take
* a variable, not an address.
*/
-#define __local_inc(l) ((l)->a.counter++)
-#define __local_dec(l) ((l)->a.counter++)
-#define __local_add(i,l) ((l)->a.counter+=(i))
-#define __local_sub(i,l) ((l)->a.counter-=(i))
+#define __local_inc(l) ((l)->v++)
+#define __local_dec(l) ((l)->v++)
+#define __local_add(i,l) ((l)->v+=(i))
+#define __local_sub(i,l) ((l)->v-=(i))
+
+#else /* CONFIG_PPC64 */
+
+#include <asm-generic/local.h>
+
+#endif /* CONFIG_PPC64 */
#endif /* _ARCH_POWERPC_LOCAL_H */
diff --git a/arch/powerpc/include/asm/machdep.h b/arch/powerpc/include/asm/machdep.h
index 4298ceae6db5..ffe7c71e1132 100644
--- a/arch/powerpc/include/asm/machdep.h
+++ b/arch/powerpc/include/asm/machdep.h
@@ -76,6 +76,7 @@ struct machdep_calls {
void __noreturn (*restart)(char *cmd);
void __noreturn (*halt)(void);
+ void (*panic)(char *str);
void (*cpu_die)(void);
long (*time_init)(void); /* Optional, may be NULL */
diff --git a/arch/powerpc/include/asm/mman.h b/arch/powerpc/include/asm/mman.h
index 30922f699341..07e3f54de9e3 100644
--- a/arch/powerpc/include/asm/mman.h
+++ b/arch/powerpc/include/asm/mman.h
@@ -13,6 +13,7 @@
#include <asm/cputable.h>
#include <linux/mm.h>
+#include <linux/pkeys.h>
#include <asm/cpu_has_feature.h>
/*
@@ -22,13 +23,23 @@
static inline unsigned long arch_calc_vm_prot_bits(unsigned long prot,
unsigned long pkey)
{
- return (prot & PROT_SAO) ? VM_SAO : 0;
+#ifdef CONFIG_PPC_MEM_KEYS
+ return (((prot & PROT_SAO) ? VM_SAO : 0) | pkey_to_vmflag_bits(pkey));
+#else
+ return ((prot & PROT_SAO) ? VM_SAO : 0);
+#endif
}
#define arch_calc_vm_prot_bits(prot, pkey) arch_calc_vm_prot_bits(prot, pkey)
static inline pgprot_t arch_vm_get_page_prot(unsigned long vm_flags)
{
+#ifdef CONFIG_PPC_MEM_KEYS
+ return (vm_flags & VM_SAO) ?
+ __pgprot(_PAGE_SAO | vmflag_to_pte_pkey_bits(vm_flags)) :
+ __pgprot(0 | vmflag_to_pte_pkey_bits(vm_flags));
+#else
return (vm_flags & VM_SAO) ? __pgprot(_PAGE_SAO) : __pgprot(0);
+#endif
}
#define arch_vm_get_page_prot(vm_flags) arch_vm_get_page_prot(vm_flags)
diff --git a/arch/powerpc/include/asm/mmu.h b/arch/powerpc/include/asm/mmu.h
index 6364f5c2cc3e..bb38312cff28 100644
--- a/arch/powerpc/include/asm/mmu.h
+++ b/arch/powerpc/include/asm/mmu.h
@@ -260,6 +260,15 @@ static inline bool early_radix_enabled(void)
}
#endif
+#ifdef CONFIG_PPC_MEM_KEYS
+extern u16 get_mm_addr_key(struct mm_struct *mm, unsigned long address);
+#else
+static inline u16 get_mm_addr_key(struct mm_struct *mm, unsigned long address)
+{
+ return 0;
+}
+#endif /* CONFIG_PPC_MEM_KEYS */
+
#endif /* !__ASSEMBLY__ */
/* The kernel use the constants below to index in the page sizes array.
diff --git a/arch/powerpc/include/asm/mmu_context.h b/arch/powerpc/include/asm/mmu_context.h
index 6177d43f0ce8..cd2bd739c0df 100644
--- a/arch/powerpc/include/asm/mmu_context.h
+++ b/arch/powerpc/include/asm/mmu_context.h
@@ -186,11 +186,33 @@ static inline void arch_bprm_mm_init(struct mm_struct *mm,
{
}
+#ifdef CONFIG_PPC_MEM_KEYS
+bool arch_vma_access_permitted(struct vm_area_struct *vma, bool write,
+ bool execute, bool foreign);
+#else /* CONFIG_PPC_MEM_KEYS */
static inline bool arch_vma_access_permitted(struct vm_area_struct *vma,
bool write, bool execute, bool foreign)
{
/* by default, allow everything */
return true;
}
+
+#define pkey_mm_init(mm)
+#define thread_pkey_regs_save(thread)
+#define thread_pkey_regs_restore(new_thread, old_thread)
+#define thread_pkey_regs_init(thread)
+
+static inline int vma_pkey(struct vm_area_struct *vma)
+{
+ return 0;
+}
+
+static inline u64 pte_to_hpte_pkey_bits(u64 pteflags)
+{
+ return 0x0UL;
+}
+
+#endif /* CONFIG_PPC_MEM_KEYS */
+
#endif /* __KERNEL__ */
#endif /* __ASM_POWERPC_MMU_CONTEXT_H */
diff --git a/arch/powerpc/include/asm/mpic_timer.h b/arch/powerpc/include/asm/mpic_timer.h
index 0e23cd4ac8aa..13e6702ec458 100644
--- a/arch/powerpc/include/asm/mpic_timer.h
+++ b/arch/powerpc/include/asm/mpic_timer.h
@@ -29,17 +29,17 @@ struct mpic_timer {
#ifdef CONFIG_MPIC_TIMER
struct mpic_timer *mpic_request_timer(irq_handler_t fn, void *dev,
- const struct timeval *time);
+ time64_t time);
void mpic_start_timer(struct mpic_timer *handle);
void mpic_stop_timer(struct mpic_timer *handle);
-void mpic_get_remain_time(struct mpic_timer *handle, struct timeval *time);
+void mpic_get_remain_time(struct mpic_timer *handle, time64_t *time);
void mpic_free_timer(struct mpic_timer *handle);
#else
struct mpic_timer *mpic_request_timer(irq_handler_t fn, void *dev,
- const struct timeval *time) { return NULL; }
+ time64_t time) { return NULL; }
void mpic_start_timer(struct mpic_timer *handle) { }
void mpic_stop_timer(struct mpic_timer *handle) { }
-void mpic_get_remain_time(struct mpic_timer *handle, struct timeval *time) { }
+void mpic_get_remain_time(struct mpic_timer *handle, time64_t *time) { }
void mpic_free_timer(struct mpic_timer *handle) { }
#endif
diff --git a/arch/powerpc/include/asm/paca.h b/arch/powerpc/include/asm/paca.h
index 3892db93b837..1b7fd535393b 100644
--- a/arch/powerpc/include/asm/paca.h
+++ b/arch/powerpc/include/asm/paca.h
@@ -159,7 +159,7 @@ struct paca_struct {
u64 saved_r1; /* r1 save for RTAS calls or PM */
u64 saved_msr; /* MSR saved here by enter_rtas */
u16 trap_save; /* Used when bad stack is encountered */
- u8 soft_enabled; /* irq soft-enable flag */
+ u8 irq_soft_mask; /* mask for irq soft masking */
u8 irq_happened; /* irq happened while soft-disabled */
u8 io_sync; /* writel() needs spin_unlock sync */
u8 irq_work_pending; /* IRQ_WORK interrupt while soft-disable */
@@ -232,6 +232,16 @@ struct paca_struct {
struct sibling_subcore_state *sibling_subcore_state;
#endif
#endif
+#ifdef CONFIG_PPC_BOOK3S_64
+ /*
+ * rfi fallback flush must be in its own cacheline to prevent
+ * other paca data leaking into the L1d
+ */
+ u64 exrfi[EX_SIZE] __aligned(0x80);
+ void *rfi_flush_fallback_area;
+ u64 l1d_flush_congruence;
+ u64 l1d_flush_sets;
+#endif
};
extern void copy_mm_to_paca(struct mm_struct *mm);
diff --git a/arch/powerpc/include/asm/pkeys.h b/arch/powerpc/include/asm/pkeys.h
new file mode 100644
index 000000000000..0409c80c32c0
--- /dev/null
+++ b/arch/powerpc/include/asm/pkeys.h
@@ -0,0 +1,218 @@
+/* SPDX-License-Identifier: GPL-2.0+ */
+/*
+ * PowerPC Memory Protection Keys management
+ *
+ * Copyright 2017, Ram Pai, IBM Corporation.
+ */
+
+#ifndef _ASM_POWERPC_KEYS_H
+#define _ASM_POWERPC_KEYS_H
+
+#include <linux/jump_label.h>
+#include <asm/firmware.h>
+
+DECLARE_STATIC_KEY_TRUE(pkey_disabled);
+extern int pkeys_total; /* total pkeys as per device tree */
+extern u32 initial_allocation_mask; /* bits set for reserved keys */
+
+/*
+ * Define these here temporarily so we're not dependent on patching linux/mm.h.
+ * Once it's updated we can drop these.
+ */
+#ifndef VM_PKEY_BIT0
+# define VM_PKEY_SHIFT VM_HIGH_ARCH_BIT_0
+# define VM_PKEY_BIT0 VM_HIGH_ARCH_0
+# define VM_PKEY_BIT1 VM_HIGH_ARCH_1
+# define VM_PKEY_BIT2 VM_HIGH_ARCH_2
+# define VM_PKEY_BIT3 VM_HIGH_ARCH_3
+# define VM_PKEY_BIT4 VM_HIGH_ARCH_4
+#endif
+
+#define ARCH_VM_PKEY_FLAGS (VM_PKEY_BIT0 | VM_PKEY_BIT1 | VM_PKEY_BIT2 | \
+ VM_PKEY_BIT3 | VM_PKEY_BIT4)
+
+/* Override any generic PKEY permission defines */
+#define PKEY_DISABLE_EXECUTE 0x4
+#define PKEY_ACCESS_MASK (PKEY_DISABLE_ACCESS | \
+ PKEY_DISABLE_WRITE | \
+ PKEY_DISABLE_EXECUTE)
+
+static inline u64 pkey_to_vmflag_bits(u16 pkey)
+{
+ return (((u64)pkey << VM_PKEY_SHIFT) & ARCH_VM_PKEY_FLAGS);
+}
+
+static inline u64 vmflag_to_pte_pkey_bits(u64 vm_flags)
+{
+ if (static_branch_likely(&pkey_disabled))
+ return 0x0UL;
+
+ return (((vm_flags & VM_PKEY_BIT0) ? H_PTE_PKEY_BIT4 : 0x0UL) |
+ ((vm_flags & VM_PKEY_BIT1) ? H_PTE_PKEY_BIT3 : 0x0UL) |
+ ((vm_flags & VM_PKEY_BIT2) ? H_PTE_PKEY_BIT2 : 0x0UL) |
+ ((vm_flags & VM_PKEY_BIT3) ? H_PTE_PKEY_BIT1 : 0x0UL) |
+ ((vm_flags & VM_PKEY_BIT4) ? H_PTE_PKEY_BIT0 : 0x0UL));
+}
+
+static inline int vma_pkey(struct vm_area_struct *vma)
+{
+ if (static_branch_likely(&pkey_disabled))
+ return 0;
+ return (vma->vm_flags & ARCH_VM_PKEY_FLAGS) >> VM_PKEY_SHIFT;
+}
+
+#define arch_max_pkey() pkeys_total
+
+static inline u64 pte_to_hpte_pkey_bits(u64 pteflags)
+{
+ return (((pteflags & H_PTE_PKEY_BIT0) ? HPTE_R_KEY_BIT0 : 0x0UL) |
+ ((pteflags & H_PTE_PKEY_BIT1) ? HPTE_R_KEY_BIT1 : 0x0UL) |
+ ((pteflags & H_PTE_PKEY_BIT2) ? HPTE_R_KEY_BIT2 : 0x0UL) |
+ ((pteflags & H_PTE_PKEY_BIT3) ? HPTE_R_KEY_BIT3 : 0x0UL) |
+ ((pteflags & H_PTE_PKEY_BIT4) ? HPTE_R_KEY_BIT4 : 0x0UL));
+}
+
+static inline u16 pte_to_pkey_bits(u64 pteflags)
+{
+ return (((pteflags & H_PTE_PKEY_BIT0) ? 0x10 : 0x0UL) |
+ ((pteflags & H_PTE_PKEY_BIT1) ? 0x8 : 0x0UL) |
+ ((pteflags & H_PTE_PKEY_BIT2) ? 0x4 : 0x0UL) |
+ ((pteflags & H_PTE_PKEY_BIT3) ? 0x2 : 0x0UL) |
+ ((pteflags & H_PTE_PKEY_BIT4) ? 0x1 : 0x0UL));
+}
+
+#define pkey_alloc_mask(pkey) (0x1 << pkey)
+
+#define mm_pkey_allocation_map(mm) (mm->context.pkey_allocation_map)
+
+#define __mm_pkey_allocated(mm, pkey) { \
+ mm_pkey_allocation_map(mm) |= pkey_alloc_mask(pkey); \
+}
+
+#define __mm_pkey_free(mm, pkey) { \
+ mm_pkey_allocation_map(mm) &= ~pkey_alloc_mask(pkey); \
+}
+
+#define __mm_pkey_is_allocated(mm, pkey) \
+ (mm_pkey_allocation_map(mm) & pkey_alloc_mask(pkey))
+
+#define __mm_pkey_is_reserved(pkey) (initial_allocation_mask & \
+ pkey_alloc_mask(pkey))
+
+static inline bool mm_pkey_is_allocated(struct mm_struct *mm, int pkey)
+{
+ /* A reserved key is never considered as 'explicitly allocated' */
+ return ((pkey < arch_max_pkey()) &&
+ !__mm_pkey_is_reserved(pkey) &&
+ __mm_pkey_is_allocated(mm, pkey));
+}
+
+extern void __arch_activate_pkey(int pkey);
+extern void __arch_deactivate_pkey(int pkey);
+/*
+ * Returns a positive, 5-bit key on success, or -1 on failure.
+ * Relies on the mmap_sem to protect against concurrency in mm_pkey_alloc() and
+ * mm_pkey_free().
+ */
+static inline int mm_pkey_alloc(struct mm_struct *mm)
+{
+ /*
+ * Note: this is the one and only place we make sure that the pkey is
+ * valid as far as the hardware is concerned. The rest of the kernel
+ * trusts that only good, valid pkeys come out of here.
+ */
+ u32 all_pkeys_mask = (u32)(~(0x0));
+ int ret;
+
+ if (static_branch_likely(&pkey_disabled))
+ return -1;
+
+ /*
+ * Are we out of pkeys? We must handle this specially because ffz()
+ * behavior is undefined if there are no zeros.
+ */
+ if (mm_pkey_allocation_map(mm) == all_pkeys_mask)
+ return -1;
+
+ ret = ffz((u32)mm_pkey_allocation_map(mm));
+ __mm_pkey_allocated(mm, ret);
+
+ /*
+ * Enable the key in the hardware
+ */
+ if (ret > 0)
+ __arch_activate_pkey(ret);
+ return ret;
+}
+
+static inline int mm_pkey_free(struct mm_struct *mm, int pkey)
+{
+ if (static_branch_likely(&pkey_disabled))
+ return -1;
+
+ if (!mm_pkey_is_allocated(mm, pkey))
+ return -EINVAL;
+
+ /*
+ * Disable the key in the hardware
+ */
+ __arch_deactivate_pkey(pkey);
+ __mm_pkey_free(mm, pkey);
+
+ return 0;
+}
+
+/*
+ * Try to dedicate one of the protection keys to be used as an
+ * execute-only protection key.
+ */
+extern int __execute_only_pkey(struct mm_struct *mm);
+static inline int execute_only_pkey(struct mm_struct *mm)
+{
+ if (static_branch_likely(&pkey_disabled))
+ return -1;
+
+ return __execute_only_pkey(mm);
+}
+
+extern int __arch_override_mprotect_pkey(struct vm_area_struct *vma,
+ int prot, int pkey);
+static inline int arch_override_mprotect_pkey(struct vm_area_struct *vma,
+ int prot, int pkey)
+{
+ if (static_branch_likely(&pkey_disabled))
+ return 0;
+
+ /*
+ * Is this an mprotect_pkey() call? If so, never override the value that
+ * came from the user.
+ */
+ if (pkey != -1)
+ return pkey;
+
+ return __arch_override_mprotect_pkey(vma, prot, pkey);
+}
+
+extern int __arch_set_user_pkey_access(struct task_struct *tsk, int pkey,
+ unsigned long init_val);
+static inline int arch_set_user_pkey_access(struct task_struct *tsk, int pkey,
+ unsigned long init_val)
+{
+ if (static_branch_likely(&pkey_disabled))
+ return -EINVAL;
+ return __arch_set_user_pkey_access(tsk, pkey, init_val);
+}
+
+static inline bool arch_pkeys_enabled(void)
+{
+ return !static_branch_likely(&pkey_disabled);
+}
+
+extern void pkey_mm_init(struct mm_struct *mm);
+extern bool arch_supports_pkeys(int cap);
+extern unsigned int arch_usable_pkeys(void);
+extern void thread_pkey_regs_save(struct thread_struct *thread);
+extern void thread_pkey_regs_restore(struct thread_struct *new_thread,
+ struct thread_struct *old_thread);
+extern void thread_pkey_regs_init(struct thread_struct *thread);
+#endif /*_ASM_POWERPC_KEYS_H */
diff --git a/arch/powerpc/include/asm/plpar_wrappers.h b/arch/powerpc/include/asm/plpar_wrappers.h
index 7f01b22fa6cb..55eddf50d149 100644
--- a/arch/powerpc/include/asm/plpar_wrappers.h
+++ b/arch/powerpc/include/asm/plpar_wrappers.h
@@ -326,4 +326,18 @@ static inline long plapr_signal_sys_reset(long cpu)
return plpar_hcall_norets(H_SIGNAL_SYS_RESET, cpu);
}
+static inline long plpar_get_cpu_characteristics(struct h_cpu_char_result *p)
+{
+ unsigned long retbuf[PLPAR_HCALL_BUFSIZE];
+ long rc;
+
+ rc = plpar_hcall(H_GET_CPU_CHARACTERISTICS, retbuf);
+ if (rc == H_SUCCESS) {
+ p->character = retbuf[0];
+ p->behaviour = retbuf[1];
+ }
+
+ return rc;
+}
+
#endif /* _ASM_POWERPC_PLPAR_WRAPPERS_H */
diff --git a/arch/powerpc/include/asm/processor.h b/arch/powerpc/include/asm/processor.h
index bdab3b74eb98..01299cdc9806 100644
--- a/arch/powerpc/include/asm/processor.h
+++ b/arch/powerpc/include/asm/processor.h
@@ -309,6 +309,11 @@ struct thread_struct {
struct thread_vr_state ckvr_state; /* Checkpointed VR state */
unsigned long ckvrsave; /* Checkpointed VRSAVE */
#endif /* CONFIG_PPC_TRANSACTIONAL_MEM */
+#ifdef CONFIG_PPC_MEM_KEYS
+ unsigned long amr;
+ unsigned long iamr;
+ unsigned long uamor;
+#endif
#ifdef CONFIG_KVM_BOOK3S_32_HANDLER
void* kvm_shadow_vcpu; /* KVM internal data */
#endif /* CONFIG_KVM_BOOK3S_32_HANDLER */
diff --git a/arch/powerpc/include/asm/prom.h b/arch/powerpc/include/asm/prom.h
index 9f27866e3126..b04c5ce8191b 100644
--- a/arch/powerpc/include/asm/prom.h
+++ b/arch/powerpc/include/asm/prom.h
@@ -80,6 +80,21 @@ extern void of_instantiate_rtc(void);
extern int of_get_ibm_chip_id(struct device_node *np);
+struct of_drc_info {
+ char *drc_type;
+ char *drc_name_prefix;
+ u32 drc_index_start;
+ u32 drc_name_suffix_start;
+ u32 num_sequential_elems;
+ u32 sequential_inc;
+ u32 drc_power_domain;
+ u32 last_drc_index;
+};
+
+extern int of_read_drc_info_cell(struct property **prop,
+ const __be32 **curval, struct of_drc_info *data);
+
+
/*
* There are two methods for telling firmware what our capabilities are.
* Newer machines have an "ibm,client-architecture-support" method on the
@@ -160,6 +175,7 @@ extern int of_get_ibm_chip_id(struct device_node *np);
#define OV5_HASH_GTSE 0x1940 /* Guest Translation Shoot Down Avail */
/* Radix Table Extensions */
#define OV5_RADIX_GTSE 0x1A40 /* Guest Translation Shoot Down Avail */
+#define OV5_DRC_INFO 0x1640 /* Redef Prop Structures: drc-info */
/* Option Vector 6: IBM PAPR hints */
#define OV6_LINUX 0x02 /* Linux is our OS */
diff --git a/arch/powerpc/include/asm/reg.h b/arch/powerpc/include/asm/reg.h
index b779f3ccd412..e6c7eadf6bce 100644
--- a/arch/powerpc/include/asm/reg.h
+++ b/arch/powerpc/include/asm/reg.h
@@ -312,7 +312,6 @@
DSISR_BAD_EXT_CTRL)
#define DSISR_BAD_FAULT_64S (DSISR_BAD_FAULT_32S | \
DSISR_ATTR_CONFLICT | \
- DSISR_KEYFAULT | \
DSISR_UNSUPP_MMU | \
DSISR_PRTABLE_FAULT | \
DSISR_ICSWX_NO_CT | \
@@ -432,8 +431,9 @@
#define SPRN_LPID 0x13F /* Logical Partition Identifier */
#endif
#define LPID_RSVD 0x3ff /* Reserved LPID for partn switching */
-#define SPRN_HMER 0x150 /* Hardware m? error recovery */
-#define SPRN_HMEER 0x151 /* Hardware m? enable error recovery */
+#define SPRN_HMER 0x150 /* Hypervisor maintenance exception reg */
+#define HMER_DEBUG_TRIG (1ul << (63 - 17)) /* Debug trigger */
+#define SPRN_HMEER 0x151 /* Hyp maintenance exception enable reg */
#define SPRN_PCR 0x152 /* Processor compatibility register */
#define PCR_VEC_DIS (1ul << (63-0)) /* Vec. disable (bit NA since POWER8) */
#define PCR_VSX_DIS (1ul << (63-1)) /* VSX disable (bit NA since POWER8) */
diff --git a/arch/powerpc/include/asm/setup.h b/arch/powerpc/include/asm/setup.h
index 257d23dbf55d..469b7fdc9be4 100644
--- a/arch/powerpc/include/asm/setup.h
+++ b/arch/powerpc/include/asm/setup.h
@@ -24,6 +24,7 @@ extern void reloc_got2(unsigned long);
void check_for_initrd(void);
void initmem_init(void);
+void setup_panic(void);
#define ARCH_PANIC_TIMEOUT 180
#ifdef CONFIG_PPC_PSERIES
@@ -38,6 +39,19 @@ static inline void pseries_big_endian_exceptions(void) {}
static inline void pseries_little_endian_exceptions(void) {}
#endif /* CONFIG_PPC_PSERIES */
+void rfi_flush_enable(bool enable);
+
+/* These are bit flags */
+enum l1d_flush_type {
+ L1D_FLUSH_NONE = 0x1,
+ L1D_FLUSH_FALLBACK = 0x2,
+ L1D_FLUSH_ORI = 0x4,
+ L1D_FLUSH_MTTRIG = 0x8,
+};
+
+void __init setup_rfi_flush(enum l1d_flush_type, bool enable);
+void do_rfi_flush_fixups(enum l1d_flush_type types);
+
#endif /* !__ASSEMBLY__ */
#endif /* _ASM_POWERPC_SETUP_H */
diff --git a/arch/powerpc/include/asm/systbl.h b/arch/powerpc/include/asm/systbl.h
index 449912f057f6..d61f9c96d916 100644
--- a/arch/powerpc/include/asm/systbl.h
+++ b/arch/powerpc/include/asm/systbl.h
@@ -389,3 +389,6 @@ COMPAT_SYS_SPU(preadv2)
COMPAT_SYS_SPU(pwritev2)
SYSCALL(kexec_file_load)
SYSCALL(statx)
+SYSCALL(pkey_alloc)
+SYSCALL(pkey_free)
+SYSCALL(pkey_mprotect)
diff --git a/arch/powerpc/include/asm/unistd.h b/arch/powerpc/include/asm/unistd.h
index 9ba11dbcaca9..daf1ba97a00c 100644
--- a/arch/powerpc/include/asm/unistd.h
+++ b/arch/powerpc/include/asm/unistd.h
@@ -12,14 +12,10 @@
#include <uapi/asm/unistd.h>
-#define NR_syscalls 384
+#define NR_syscalls 387
#define __NR__exit __NR_exit
-#define __IGNORE_pkey_mprotect
-#define __IGNORE_pkey_alloc
-#define __IGNORE_pkey_free
-
#ifndef __ASSEMBLY__
#include <linux/types.h>
diff --git a/arch/powerpc/include/asm/xive-regs.h b/arch/powerpc/include/asm/xive-regs.h
index 1d3f2be5ae39..fa4288822b68 100644
--- a/arch/powerpc/include/asm/xive-regs.h
+++ b/arch/powerpc/include/asm/xive-regs.h
@@ -10,6 +10,41 @@
#define _ASM_POWERPC_XIVE_REGS_H
/*
+ * "magic" Event State Buffer (ESB) MMIO offsets.
+ *
+ * Each interrupt source has a 2-bit state machine called ESB
+ * which can be controlled by MMIO. It's made of 2 bits, P and
+ * Q. P indicates that an interrupt is pending (has been sent
+ * to a queue and is waiting for an EOI). Q indicates that the
+ * interrupt has been triggered while pending.
+ *
+ * This acts as a coalescing mechanism in order to guarantee
+ * that a given interrupt only occurs at most once in a queue.
+ *
+ * When doing an EOI, the Q bit will indicate if the interrupt
+ * needs to be re-triggered.
+ *
+ * The following offsets into the ESB MMIO allow to read or
+ * manipulate the PQ bits. They must be used with an 8-bytes
+ * load instruction. They all return the previous state of the
+ * interrupt (atomically).
+ *
+ * Additionally, some ESB pages support doing an EOI via a
+ * store at 0 and some ESBs support doing a trigger via a
+ * separate trigger page.
+ */
+#define XIVE_ESB_STORE_EOI 0x400 /* Store */
+#define XIVE_ESB_LOAD_EOI 0x000 /* Load */
+#define XIVE_ESB_GET 0x800 /* Load */
+#define XIVE_ESB_SET_PQ_00 0xc00 /* Load */
+#define XIVE_ESB_SET_PQ_01 0xd00 /* Load */
+#define XIVE_ESB_SET_PQ_10 0xe00 /* Load */
+#define XIVE_ESB_SET_PQ_11 0xf00 /* Load */
+
+#define XIVE_ESB_VAL_P 0x2
+#define XIVE_ESB_VAL_Q 0x1
+
+/*
* Thread Management (aka "TM") registers
*/
diff --git a/arch/powerpc/include/asm/xive.h b/arch/powerpc/include/asm/xive.h
index 371fbebf1ec9..b619a5585cd6 100644
--- a/arch/powerpc/include/asm/xive.h
+++ b/arch/powerpc/include/asm/xive.h
@@ -58,6 +58,9 @@ struct xive_irq_data {
#define XIVE_IRQ_FLAG_EOI_FW 0x10
#define XIVE_IRQ_FLAG_H_INT_ESB 0x20
+/* Special flag set by KVM for excalation interrupts */
+#define XIVE_IRQ_NO_EOI 0x80
+
#define XIVE_INVALID_CHIP_ID -1
/* A queue tracking structure in a CPU */
@@ -72,41 +75,6 @@ struct xive_q {
atomic_t pending_count;
};
-/*
- * "magic" Event State Buffer (ESB) MMIO offsets.
- *
- * Each interrupt source has a 2-bit state machine called ESB
- * which can be controlled by MMIO. It's made of 2 bits, P and
- * Q. P indicates that an interrupt is pending (has been sent
- * to a queue and is waiting for an EOI). Q indicates that the
- * interrupt has been triggered while pending.
- *
- * This acts as a coalescing mechanism in order to guarantee
- * that a given interrupt only occurs at most once in a queue.
- *
- * When doing an EOI, the Q bit will indicate if the interrupt
- * needs to be re-triggered.
- *
- * The following offsets into the ESB MMIO allow to read or
- * manipulate the PQ bits. They must be used with an 8-bytes
- * load instruction. They all return the previous state of the
- * interrupt (atomically).
- *
- * Additionally, some ESB pages support doing an EOI via a
- * store at 0 and some ESBs support doing a trigger via a
- * separate trigger page.
- */
-#define XIVE_ESB_STORE_EOI 0x400 /* Store */
-#define XIVE_ESB_LOAD_EOI 0x000 /* Load */
-#define XIVE_ESB_GET 0x800 /* Load */
-#define XIVE_ESB_SET_PQ_00 0xc00 /* Load */
-#define XIVE_ESB_SET_PQ_01 0xd00 /* Load */
-#define XIVE_ESB_SET_PQ_10 0xe00 /* Load */
-#define XIVE_ESB_SET_PQ_11 0xf00 /* Load */
-
-#define XIVE_ESB_VAL_P 0x2
-#define XIVE_ESB_VAL_Q 0x1
-
/* Global enable flags for the XIVE support */
extern bool __xive_enabled;
diff --git a/arch/powerpc/include/uapi/asm/elf.h b/arch/powerpc/include/uapi/asm/elf.h
index 5f201d40bcca..860c59291bfc 100644
--- a/arch/powerpc/include/uapi/asm/elf.h
+++ b/arch/powerpc/include/uapi/asm/elf.h
@@ -97,6 +97,7 @@
#define ELF_NTMSPRREG 3 /* include tfhar, tfiar, texasr */
#define ELF_NEBB 3 /* includes ebbrr, ebbhr, bescr */
#define ELF_NPMU 5 /* includes siar, sdar, sier, mmcr2, mmcr0 */
+#define ELF_NPKEY 3 /* includes amr, iamr, uamor */
typedef unsigned long elf_greg_t64;
typedef elf_greg_t64 elf_gregset_t64[ELF_NGREG];
diff --git a/arch/powerpc/include/uapi/asm/mman.h b/arch/powerpc/include/uapi/asm/mman.h
index e63bc37e33af..65065ce32814 100644
--- a/arch/powerpc/include/uapi/asm/mman.h
+++ b/arch/powerpc/include/uapi/asm/mman.h
@@ -30,4 +30,10 @@
#define MAP_STACK 0x20000 /* give out an address that is best suited for process/thread stacks */
#define MAP_HUGETLB 0x40000 /* create a huge page mapping */
+/* Override any generic PKEY permission defines */
+#define PKEY_DISABLE_EXECUTE 0x4
+#undef PKEY_ACCESS_MASK
+#define PKEY_ACCESS_MASK (PKEY_DISABLE_ACCESS |\
+ PKEY_DISABLE_WRITE |\
+ PKEY_DISABLE_EXECUTE)
#endif /* _UAPI_ASM_POWERPC_MMAN_H */
diff --git a/arch/powerpc/include/uapi/asm/unistd.h b/arch/powerpc/include/uapi/asm/unistd.h
index df8684f31919..389c36fd8299 100644
--- a/arch/powerpc/include/uapi/asm/unistd.h
+++ b/arch/powerpc/include/uapi/asm/unistd.h
@@ -395,5 +395,8 @@
#define __NR_pwritev2 381
#define __NR_kexec_file_load 382
#define __NR_statx 383
+#define __NR_pkey_alloc 384
+#define __NR_pkey_free 385
+#define __NR_pkey_mprotect 386
#endif /* _UAPI_ASM_POWERPC_UNISTD_H_ */
diff --git a/arch/powerpc/kernel/asm-offsets.c b/arch/powerpc/kernel/asm-offsets.c
index 3f6316bcde4e..fa5c4125f42a 100644
--- a/arch/powerpc/kernel/asm-offsets.c
+++ b/arch/powerpc/kernel/asm-offsets.c
@@ -178,7 +178,7 @@ int main(void)
OFFSET(PACATOC, paca_struct, kernel_toc);
OFFSET(PACAKBASE, paca_struct, kernelbase);
OFFSET(PACAKMSR, paca_struct, kernel_msr);
- OFFSET(PACASOFTIRQEN, paca_struct, soft_enabled);
+ OFFSET(PACAIRQSOFTMASK, paca_struct, irq_soft_mask);
OFFSET(PACAIRQHAPPENED, paca_struct, irq_happened);
#ifdef CONFIG_PPC_BOOK3S
OFFSET(PACACONTEXTID, paca_struct, mm_ctx_id);
@@ -237,6 +237,11 @@ int main(void)
OFFSET(PACA_NMI_EMERG_SP, paca_struct, nmi_emergency_sp);
OFFSET(PACA_IN_MCE, paca_struct, in_mce);
OFFSET(PACA_IN_NMI, paca_struct, in_nmi);
+ OFFSET(PACA_RFI_FLUSH_FALLBACK_AREA, paca_struct, rfi_flush_fallback_area);
+ OFFSET(PACA_EXRFI, paca_struct, exrfi);
+ OFFSET(PACA_L1D_FLUSH_CONGRUENCE, paca_struct, l1d_flush_congruence);
+ OFFSET(PACA_L1D_FLUSH_SETS, paca_struct, l1d_flush_sets);
+
#endif
OFFSET(PACAHWCPUID, paca_struct, hw_cpu_id);
OFFSET(PACAKEXECSTATE, paca_struct, kexec_state);
diff --git a/arch/powerpc/kernel/cpu_setup_power.S b/arch/powerpc/kernel/cpu_setup_power.S
index 730ade48329b..3f30c994e931 100644
--- a/arch/powerpc/kernel/cpu_setup_power.S
+++ b/arch/powerpc/kernel/cpu_setup_power.S
@@ -98,6 +98,7 @@ _GLOBAL(__setup_cpu_power9)
li r0,0
mtspr SPRN_PSSCR,r0
mtspr SPRN_LPID,r0
+ mtspr SPRN_PID,r0
mfspr r3,SPRN_LPCR
LOAD_REG_IMMEDIATE(r4, LPCR_PECEDH | LPCR_PECE_HVEE | LPCR_HVICE | LPCR_HEIC)
or r3, r3, r4
@@ -121,6 +122,7 @@ _GLOBAL(__restore_cpu_power9)
li r0,0
mtspr SPRN_PSSCR,r0
mtspr SPRN_LPID,r0
+ mtspr SPRN_PID,r0
mfspr r3,SPRN_LPCR
LOAD_REG_IMMEDIATE(r4, LPCR_PECEDH | LPCR_PECE_HVEE | LPCR_HVICE | LPCR_HEIC)
or r3, r3, r4
diff --git a/arch/powerpc/kernel/entry_64.S b/arch/powerpc/kernel/entry_64.S
index 371c05fe250a..2cb5109a7ea3 100644
--- a/arch/powerpc/kernel/entry_64.S
+++ b/arch/powerpc/kernel/entry_64.S
@@ -37,6 +37,11 @@
#include <asm/tm.h>
#include <asm/ppc-opcode.h>
#include <asm/export.h>
+#ifdef CONFIG_PPC_BOOK3S
+#include <asm/exception-64s.h>
+#else
+#include <asm/exception-64e.h>
+#endif
/*
* System calls.
@@ -128,10 +133,9 @@ END_FW_FTR_SECTION_IFSET(FW_FEATURE_SPLPAR)
* of irq tracing is used, we additionally check that condition
* is correct
*/
-#if defined(CONFIG_TRACE_IRQFLAGS) && defined(CONFIG_BUG)
- lbz r10,PACASOFTIRQEN(r13)
- xori r10,r10,1
-1: tdnei r10,0
+#if defined(CONFIG_PPC_IRQ_SOFT_MASK_DEBUG) && defined(CONFIG_BUG)
+ lbz r10,PACAIRQSOFTMASK(r13)
+1: tdnei r10,IRQS_ENABLED
EMIT_BUG_ENTRY 1b,__FILE__,__LINE__,BUGFLAG_WARNING
#endif
@@ -147,7 +151,7 @@ system_call: /* label this so stack traces look sane */
/* We do need to set SOFTE in the stack frame or the return
* from interrupt will be painful
*/
- li r10,1
+ li r10,IRQS_ENABLED
std r10,SOFTE(r1)
CURRENT_THREAD_INFO(r11, r1)
@@ -262,13 +266,23 @@ BEGIN_FTR_SECTION
END_FTR_SECTION_IFSET(CPU_FTR_HAS_PPR)
ld r13,GPR13(r1) /* only restore r13 if returning to usermode */
+ ld r2,GPR2(r1)
+ ld r1,GPR1(r1)
+ mtlr r4
+ mtcr r5
+ mtspr SPRN_SRR0,r7
+ mtspr SPRN_SRR1,r8
+ RFI_TO_USER
+ b . /* prevent speculative execution */
+
+ /* exit to kernel */
1: ld r2,GPR2(r1)
ld r1,GPR1(r1)
mtlr r4
mtcr r5
mtspr SPRN_SRR0,r7
mtspr SPRN_SRR1,r8
- RFI
+ RFI_TO_KERNEL
b . /* prevent speculative execution */
.Lsyscall_error:
@@ -397,8 +411,7 @@ END_FTR_SECTION_IFSET(CPU_FTR_HAS_PPR)
mtmsrd r10, 1
mtspr SPRN_SRR0, r11
mtspr SPRN_SRR1, r12
-
- rfid
+ RFI_TO_USER
b . /* prevent speculative execution */
#endif
_ASM_NOKPROBE_SYMBOL(system_call_common);
@@ -741,10 +754,10 @@ resume_kernel:
beq+ restore
/* Check that preempt_count() == 0 and interrupts are enabled */
lwz r8,TI_PREEMPT(r9)
- cmpwi cr1,r8,0
+ cmpwi cr0,r8,0
+ bne restore
ld r0,SOFTE(r1)
- cmpdi r0,0
- crandc eq,cr1*4+eq,eq
+ andi. r0,r0,IRQS_DISABLED
bne restore
/*
@@ -782,12 +795,12 @@ restore:
* are about to re-enable interrupts
*/
ld r5,SOFTE(r1)
- lbz r6,PACASOFTIRQEN(r13)
- cmpwi cr0,r5,0
- beq .Lrestore_irq_off
+ lbz r6,PACAIRQSOFTMASK(r13)
+ andi. r5,r5,IRQS_DISABLED
+ bne .Lrestore_irq_off
/* We are enabling, were we already enabled ? Yes, just return */
- cmpwi cr0,r6,1
+ andi. r6,r6,IRQS_DISABLED
beq cr0,.Ldo_restore
/*
@@ -806,8 +819,8 @@ restore:
*/
.Lrestore_no_replay:
TRACE_ENABLE_INTS
- li r0,1
- stb r0,PACASOFTIRQEN(r13);
+ li r0,IRQS_ENABLED
+ stb r0,PACAIRQSOFTMASK(r13);
/*
* Final return path. BookE is handled in a different file
@@ -878,7 +891,7 @@ BEGIN_FTR_SECTION
END_FTR_SECTION_IFSET(CPU_FTR_HAS_PPR)
ACCOUNT_CPU_USER_EXIT(r13, r2, r4)
REST_GPR(13, r1)
-1:
+
mtspr SPRN_SRR1,r3
ld r2,_CCR(r1)
@@ -891,8 +904,22 @@ END_FTR_SECTION_IFSET(CPU_FTR_HAS_PPR)
ld r3,GPR3(r1)
ld r4,GPR4(r1)
ld r1,GPR1(r1)
+ RFI_TO_USER
+ b . /* prevent speculative execution */
- rfid
+1: mtspr SPRN_SRR1,r3
+
+ ld r2,_CCR(r1)
+ mtcrf 0xFF,r2
+ ld r2,_NIP(r1)
+ mtspr SPRN_SRR0,r2
+
+ ld r0,GPR0(r1)
+ ld r2,GPR2(r1)
+ ld r3,GPR3(r1)
+ ld r4,GPR4(r1)
+ ld r1,GPR1(r1)
+ RFI_TO_KERNEL
b . /* prevent speculative execution */
#endif /* CONFIG_PPC_BOOK3E */
@@ -912,10 +939,10 @@ END_FTR_SECTION_IFSET(CPU_FTR_HAS_PPR)
rlwinm r7,r7,0,~PACA_IRQ_HARD_DIS
stb r7,PACAIRQHAPPENED(r13)
1:
-#if defined(CONFIG_TRACE_IRQFLAGS) && defined(CONFIG_BUG)
+#if defined(CONFIG_PPC_IRQ_SOFT_MASK_DEBUG) && defined(CONFIG_BUG)
/* The interrupt should not have soft enabled. */
- lbz r7,PACASOFTIRQEN(r13)
-1: tdnei r7,0
+ lbz r7,PACAIRQSOFTMASK(r13)
+1: tdeqi r7,IRQS_ENABLED
EMIT_BUG_ENTRY 1b,__FILE__,__LINE__,BUGFLAG_WARNING
#endif
b .Ldo_restore
@@ -955,6 +982,11 @@ END_FTR_SECTION_IFSET(CPU_FTR_HAS_PPR)
addi r3,r1,STACK_FRAME_OVERHEAD;
bl do_IRQ
b ret_from_except
+1: cmpwi cr0,r3,0xf00
+ bne 1f
+ addi r3,r1,STACK_FRAME_OVERHEAD;
+ bl performance_monitor_exception
+ b ret_from_except
1: cmpwi cr0,r3,0xe60
bne 1f
addi r3,r1,STACK_FRAME_OVERHEAD;
@@ -1031,15 +1063,15 @@ _GLOBAL(enter_rtas)
li r0,0
mtcr r0
-#ifdef CONFIG_BUG
+#ifdef CONFIG_BUG
/* There is no way it is acceptable to get here with interrupts enabled,
* check it with the asm equivalent of WARN_ON
*/
- lbz r0,PACASOFTIRQEN(r13)
-1: tdnei r0,0
+ lbz r0,PACAIRQSOFTMASK(r13)
+1: tdeqi r0,IRQS_ENABLED
EMIT_BUG_ENTRY 1b,__FILE__,__LINE__,BUGFLAG_WARNING
#endif
-
+
/* Hard-disable interrupts */
mfmsr r6
rldicl r7,r6,48,1
@@ -1077,7 +1109,7 @@ __enter_rtas:
mtspr SPRN_SRR0,r5
mtspr SPRN_SRR1,r6
- rfid
+ RFI_TO_KERNEL
b . /* prevent speculative execution */
rtas_return_loc:
@@ -1107,7 +1139,7 @@ rtas_return_loc:
mtspr SPRN_SRR0,r3
mtspr SPRN_SRR1,r4
- rfid
+ RFI_TO_KERNEL
b . /* prevent speculative execution */
_ASM_NOKPROBE_SYMBOL(__enter_rtas)
_ASM_NOKPROBE_SYMBOL(rtas_return_loc)
@@ -1180,7 +1212,7 @@ _GLOBAL(enter_prom)
LOAD_REG_IMMEDIATE(r12, MSR_SF | MSR_ISF | MSR_LE)
andc r11,r11,r12
mtsrr1 r11
- rfid
+ RFI_TO_KERNEL
#endif /* CONFIG_PPC_BOOK3E */
1: /* Return from OF */
diff --git a/arch/powerpc/kernel/exceptions-64e.S b/arch/powerpc/kernel/exceptions-64e.S
index acd8ca76233e..ee832d344a5a 100644
--- a/arch/powerpc/kernel/exceptions-64e.S
+++ b/arch/powerpc/kernel/exceptions-64e.S
@@ -139,7 +139,7 @@ END_FTR_SECTION_IFSET(CPU_FTR_EMB_HV)
mfspr r10,SPRN_ESR
SPECIAL_EXC_STORE(r10,ESR)
- lbz r10,PACASOFTIRQEN(r13)
+ lbz r10,PACAIRQSOFTMASK(r13)
SPECIAL_EXC_STORE(r10,SOFTE)
ld r10,_NIP(r1)
SPECIAL_EXC_STORE(r10,CSRR0)
@@ -206,17 +206,17 @@ BEGIN_FTR_SECTION
mtspr SPRN_MAS8,r10
END_FTR_SECTION_IFSET(CPU_FTR_EMB_HV)
- lbz r6,PACASOFTIRQEN(r13)
+ lbz r6,PACAIRQSOFTMASK(r13)
ld r5,SOFTE(r1)
/* Interrupts had better not already be enabled... */
- twnei r6,0
+ tweqi r6,IRQS_ENABLED
- cmpwi cr0,r5,0
- beq 1f
+ andi. r6,r5,IRQS_DISABLED
+ bne 1f
TRACE_ENABLE_INTS
- stb r5,PACASOFTIRQEN(r13)
+ stb r5,PACAIRQSOFTMASK(r13)
1:
/*
* Restore PACAIRQHAPPENED rather than setting it based on
@@ -351,9 +351,9 @@ ret_from_mc_except:
#define PROLOG_ADDITION_NONE_MC(n)
#define PROLOG_ADDITION_MASKABLE_GEN(n) \
- lbz r10,PACASOFTIRQEN(r13); /* are irqs soft-disabled ? */ \
- cmpwi cr0,r10,0; /* yes -> go out of line */ \
- beq masked_interrupt_book3e_##n
+ lbz r10,PACAIRQSOFTMASK(r13); /* are irqs soft-masked? */ \
+ andi. r10,r10,IRQS_DISABLED; /* yes -> go out of line */ \
+ bne masked_interrupt_book3e_##n
#define PROLOG_ADDITION_2REGS_GEN(n) \
std r14,PACA_EXGEN+EX_R14(r13); \
@@ -397,7 +397,7 @@ exc_##n##_common: \
mfspr r8,SPRN_XER; /* save XER in stackframe */ \
ld r9,excf+EX_R1(r13); /* load orig r1 back from PACA */ \
lwz r10,excf+EX_CR(r13); /* load orig CR back from PACA */ \
- lbz r11,PACASOFTIRQEN(r13); /* get current IRQ softe */ \
+ lbz r11,PACAIRQSOFTMASK(r13); /* get current IRQ softe */ \
ld r12,exception_marker@toc(r2); \
li r0,0; \
std r3,GPR10(r1); /* save r10 to stackframe */ \
diff --git a/arch/powerpc/kernel/exceptions-64s.S b/arch/powerpc/kernel/exceptions-64s.S
index 175891c6909c..9e6882bdc526 100644
--- a/arch/powerpc/kernel/exceptions-64s.S
+++ b/arch/powerpc/kernel/exceptions-64s.S
@@ -256,7 +256,7 @@ BEGIN_FTR_SECTION
LOAD_HANDLER(r12, machine_check_handle_early)
1: mtspr SPRN_SRR0,r12
mtspr SPRN_SRR1,r11
- rfid
+ RFI_TO_KERNEL
b . /* prevent speculative execution */
2:
/* Stack overflow. Stay on emergency stack and panic.
@@ -445,7 +445,7 @@ EXC_COMMON_BEGIN(machine_check_handle_early)
li r3,MSR_ME
andc r10,r10,r3 /* Turn off MSR_ME */
mtspr SPRN_SRR1,r10
- rfid
+ RFI_TO_KERNEL
b .
2:
/*
@@ -463,7 +463,7 @@ EXC_COMMON_BEGIN(machine_check_handle_early)
*/
bl machine_check_queue_event
MACHINE_CHECK_HANDLER_WINDUP
- rfid
+ RFI_TO_USER_OR_KERNEL
9:
/* Deliver the machine check to host kernel in V mode. */
MACHINE_CHECK_HANDLER_WINDUP
@@ -598,6 +598,9 @@ EXC_COMMON_BEGIN(slb_miss_common)
stw r9,PACA_EXSLB+EX_CCR(r13) /* save CR in exc. frame */
std r10,PACA_EXSLB+EX_LR(r13) /* save LR */
+ andi. r9,r11,MSR_PR // Check for exception from userspace
+ cmpdi cr4,r9,MSR_PR // And save the result in CR4 for later
+
/*
* Test MSR_RI before calling slb_allocate_realmode, because the
* MSR in r11 gets clobbered. However we still want to allocate
@@ -624,9 +627,12 @@ END_MMU_FTR_SECTION_IFCLR(MMU_FTR_TYPE_RADIX)
/* All done -- return from exception. */
+ bne cr4,1f /* returning to kernel */
+
.machine push
.machine "power4"
mtcrf 0x80,r9
+ mtcrf 0x08,r9 /* MSR[PR] indication is in cr4 */
mtcrf 0x04,r9 /* MSR[RI] indication is in cr5 */
mtcrf 0x02,r9 /* I/D indication is in cr6 */
mtcrf 0x01,r9 /* slb_allocate uses cr0 and cr7 */
@@ -640,9 +646,30 @@ END_MMU_FTR_SECTION_IFCLR(MMU_FTR_TYPE_RADIX)
ld r11,PACA_EXSLB+EX_R11(r13)
ld r12,PACA_EXSLB+EX_R12(r13)
ld r13,PACA_EXSLB+EX_R13(r13)
- rfid
+ RFI_TO_USER
+ b . /* prevent speculative execution */
+1:
+.machine push
+.machine "power4"
+ mtcrf 0x80,r9
+ mtcrf 0x08,r9 /* MSR[PR] indication is in cr4 */
+ mtcrf 0x04,r9 /* MSR[RI] indication is in cr5 */
+ mtcrf 0x02,r9 /* I/D indication is in cr6 */
+ mtcrf 0x01,r9 /* slb_allocate uses cr0 and cr7 */
+.machine pop
+
+ RESTORE_CTR(r9, PACA_EXSLB)
+ RESTORE_PPR_PACA(PACA_EXSLB, r9)
+ mr r3,r12
+ ld r9,PACA_EXSLB+EX_R9(r13)
+ ld r10,PACA_EXSLB+EX_R10(r13)
+ ld r11,PACA_EXSLB+EX_R11(r13)
+ ld r12,PACA_EXSLB+EX_R12(r13)
+ ld r13,PACA_EXSLB+EX_R13(r13)
+ RFI_TO_KERNEL
b . /* prevent speculative execution */
+
2: std r3,PACA_EXSLB+EX_DAR(r13)
mr r3,r12
mfspr r11,SPRN_SRR0
@@ -651,7 +678,7 @@ END_MMU_FTR_SECTION_IFCLR(MMU_FTR_TYPE_RADIX)
mtspr SPRN_SRR0,r10
ld r10,PACAKMSR(r13)
mtspr SPRN_SRR1,r10
- rfid
+ RFI_TO_KERNEL
b .
8: std r3,PACA_EXSLB+EX_DAR(r13)
@@ -662,7 +689,7 @@ END_MMU_FTR_SECTION_IFCLR(MMU_FTR_TYPE_RADIX)
mtspr SPRN_SRR0,r10
ld r10,PACAKMSR(r13)
mtspr SPRN_SRR1,r10
- rfid
+ RFI_TO_KERNEL
b .
EXC_COMMON_BEGIN(unrecov_slb)
@@ -691,10 +718,12 @@ EXC_REAL_BEGIN(hardware_interrupt, 0x500, 0x100)
hardware_interrupt_hv:
BEGIN_FTR_SECTION
_MASKABLE_EXCEPTION_PSERIES(0x500, hardware_interrupt_common,
- EXC_HV, SOFTEN_TEST_HV)
+ EXC_HV, SOFTEN_TEST_HV,
+ IRQS_DISABLED)
FTR_SECTION_ELSE
_MASKABLE_EXCEPTION_PSERIES(0x500, hardware_interrupt_common,
- EXC_STD, SOFTEN_TEST_PR)
+ EXC_STD, SOFTEN_TEST_PR,
+ IRQS_DISABLED)
ALT_FTR_SECTION_END_IFSET(CPU_FTR_HVMODE | CPU_FTR_ARCH_206)
EXC_REAL_END(hardware_interrupt, 0x500, 0x100)
@@ -702,9 +731,13 @@ EXC_VIRT_BEGIN(hardware_interrupt, 0x4500, 0x100)
.globl hardware_interrupt_relon_hv;
hardware_interrupt_relon_hv:
BEGIN_FTR_SECTION
- _MASKABLE_RELON_EXCEPTION_PSERIES(0x500, hardware_interrupt_common, EXC_HV, SOFTEN_TEST_HV)
+ _MASKABLE_RELON_EXCEPTION_PSERIES(0x500, hardware_interrupt_common,
+ EXC_HV, SOFTEN_TEST_HV,
+ IRQS_DISABLED)
FTR_SECTION_ELSE
- _MASKABLE_RELON_EXCEPTION_PSERIES(0x500, hardware_interrupt_common, EXC_STD, SOFTEN_TEST_PR)
+ _MASKABLE_RELON_EXCEPTION_PSERIES(0x500, hardware_interrupt_common,
+ EXC_STD, SOFTEN_TEST_PR,
+ IRQS_DISABLED)
ALT_FTR_SECTION_END_IFSET(CPU_FTR_HVMODE)
EXC_VIRT_END(hardware_interrupt, 0x4500, 0x100)
@@ -800,8 +833,8 @@ END_FTR_SECTION_IFSET(CPU_FTR_TM)
#endif
-EXC_REAL_MASKABLE(decrementer, 0x900, 0x80)
-EXC_VIRT_MASKABLE(decrementer, 0x4900, 0x80, 0x900)
+EXC_REAL_MASKABLE(decrementer, 0x900, 0x80, IRQS_DISABLED)
+EXC_VIRT_MASKABLE(decrementer, 0x4900, 0x80, 0x900, IRQS_DISABLED)
TRAMP_KVM(PACA_EXGEN, 0x900)
EXC_COMMON_ASYNC(decrementer_common, 0x900, timer_interrupt)
@@ -812,8 +845,8 @@ TRAMP_KVM_HV(PACA_EXGEN, 0x980)
EXC_COMMON(hdecrementer_common, 0x980, hdec_interrupt)
-EXC_REAL_MASKABLE(doorbell_super, 0xa00, 0x100)
-EXC_VIRT_MASKABLE(doorbell_super, 0x4a00, 0x100, 0xa00)
+EXC_REAL_MASKABLE(doorbell_super, 0xa00, 0x100, IRQS_DISABLED)
+EXC_VIRT_MASKABLE(doorbell_super, 0x4a00, 0x100, 0xa00, IRQS_DISABLED)
TRAMP_KVM(PACA_EXGEN, 0xa00)
#ifdef CONFIG_PPC_DOORBELL
EXC_COMMON_ASYNC(doorbell_super_common, 0xa00, doorbell_exception)
@@ -901,7 +934,7 @@ EXC_COMMON(trap_0b_common, 0xb00, unknown_exception)
mtspr SPRN_SRR0,r10 ; \
ld r10,PACAKMSR(r13) ; \
mtspr SPRN_SRR1,r10 ; \
- rfid ; \
+ RFI_TO_KERNEL ; \
b . ; /* prevent speculative execution */
#ifdef CONFIG_PPC_FAST_ENDIAN_SWITCH
@@ -917,7 +950,7 @@ END_FTR_SECTION_IFSET(CPU_FTR_REAL_LE) \
xori r12,r12,MSR_LE ; \
mtspr SPRN_SRR1,r12 ; \
mr r13,r9 ; \
- rfid ; /* return to userspace */ \
+ RFI_TO_USER ; /* return to userspace */ \
b . ; /* prevent speculative execution */
#else
#define SYSCALL_FASTENDIAN_TEST
@@ -1025,7 +1058,7 @@ EXC_COMMON(emulation_assist_common, 0xe40, emulation_assist_interrupt)
* mode.
*/
__EXC_REAL_OOL_HV_DIRECT(hmi_exception, 0xe60, 0x20, hmi_exception_early)
-__TRAMP_REAL_OOL_MASKABLE_HV(hmi_exception, 0xe60)
+__TRAMP_REAL_OOL_MASKABLE_HV(hmi_exception, 0xe60, IRQS_DISABLED)
EXC_VIRT_NONE(0x4e60, 0x20)
TRAMP_KVM_HV(PACA_EXGEN, 0xe60)
TRAMP_REAL_BEGIN(hmi_exception_early)
@@ -1063,7 +1096,7 @@ TRAMP_REAL_BEGIN(hmi_exception_early)
mtcr r11
REST_GPR(11, r1)
ld r1,GPR1(r1)
- hrfid
+ HRFI_TO_USER_OR_KERNEL
1: mtcr r11
REST_GPR(11, r1)
@@ -1083,8 +1116,8 @@ EXC_COMMON_BEGIN(hmi_exception_common)
EXCEPTION_COMMON(PACA_EXGEN, 0xe60, hmi_exception_common, handle_hmi_exception,
ret_from_except, FINISH_NAP;ADD_NVGPRS;ADD_RECONCILE;RUNLATCH_ON)
-EXC_REAL_OOL_MASKABLE_HV(h_doorbell, 0xe80, 0x20)
-EXC_VIRT_OOL_MASKABLE_HV(h_doorbell, 0x4e80, 0x20, 0xe80)
+EXC_REAL_OOL_MASKABLE_HV(h_doorbell, 0xe80, 0x20, IRQS_DISABLED)
+EXC_VIRT_OOL_MASKABLE_HV(h_doorbell, 0x4e80, 0x20, 0xe80, IRQS_DISABLED)
TRAMP_KVM_HV(PACA_EXGEN, 0xe80)
#ifdef CONFIG_PPC_DOORBELL
EXC_COMMON_ASYNC(h_doorbell_common, 0xe80, doorbell_exception)
@@ -1093,8 +1126,8 @@ EXC_COMMON_ASYNC(h_doorbell_common, 0xe80, unknown_exception)
#endif
-EXC_REAL_OOL_MASKABLE_HV(h_virt_irq, 0xea0, 0x20)
-EXC_VIRT_OOL_MASKABLE_HV(h_virt_irq, 0x4ea0, 0x20, 0xea0)
+EXC_REAL_OOL_MASKABLE_HV(h_virt_irq, 0xea0, 0x20, IRQS_DISABLED)
+EXC_VIRT_OOL_MASKABLE_HV(h_virt_irq, 0x4ea0, 0x20, 0xea0, IRQS_DISABLED)
TRAMP_KVM_HV(PACA_EXGEN, 0xea0)
EXC_COMMON_ASYNC(h_virt_irq_common, 0xea0, do_IRQ)
@@ -1105,8 +1138,8 @@ EXC_REAL_NONE(0xee0, 0x20)
EXC_VIRT_NONE(0x4ee0, 0x20)
-EXC_REAL_OOL(performance_monitor, 0xf00, 0x20)
-EXC_VIRT_OOL(performance_monitor, 0x4f00, 0x20, 0xf00)
+EXC_REAL_OOL_MASKABLE(performance_monitor, 0xf00, 0x20, IRQS_PMI_DISABLED)
+EXC_VIRT_OOL_MASKABLE(performance_monitor, 0x4f00, 0x20, 0xf00, IRQS_PMI_DISABLED)
TRAMP_KVM(PACA_EXGEN, 0xf00)
EXC_COMMON_ASYNC(performance_monitor_common, 0xf00, performance_monitor_exception)
@@ -1314,7 +1347,7 @@ END_FTR_SECTION_IFSET(CPU_FTR_CFAR)
ld r11,PACA_EXGEN+EX_R11(r13)
ld r12,PACA_EXGEN+EX_R12(r13)
ld r13,PACA_EXGEN+EX_R13(r13)
- HRFID
+ HRFI_TO_UNKNOWN
b .
#endif
@@ -1418,10 +1451,94 @@ masked_##_H##interrupt: \
ld r10,PACA_EXGEN+EX_R10(r13); \
ld r11,PACA_EXGEN+EX_R11(r13); \
/* returns to kernel where r13 must be set up, so don't restore it */ \
- ##_H##rfid; \
+ ##_H##RFI_TO_KERNEL; \
b .; \
MASKED_DEC_HANDLER(_H)
+TRAMP_REAL_BEGIN(rfi_flush_fallback)
+ SET_SCRATCH0(r13);
+ GET_PACA(r13);
+ std r9,PACA_EXRFI+EX_R9(r13)
+ std r10,PACA_EXRFI+EX_R10(r13)
+ std r11,PACA_EXRFI+EX_R11(r13)
+ std r12,PACA_EXRFI+EX_R12(r13)
+ std r8,PACA_EXRFI+EX_R13(r13)
+ mfctr r9
+ ld r10,PACA_RFI_FLUSH_FALLBACK_AREA(r13)
+ ld r11,PACA_L1D_FLUSH_SETS(r13)
+ ld r12,PACA_L1D_FLUSH_CONGRUENCE(r13)
+ /*
+ * The load adresses are at staggered offsets within cachelines,
+ * which suits some pipelines better (on others it should not
+ * hurt).
+ */
+ addi r12,r12,8
+ mtctr r11
+ DCBT_STOP_ALL_STREAM_IDS(r11) /* Stop prefetch streams */
+
+ /* order ld/st prior to dcbt stop all streams with flushing */
+ sync
+1: li r8,0
+ .rept 8 /* 8-way set associative */
+ ldx r11,r10,r8
+ add r8,r8,r12
+ xor r11,r11,r11 // Ensure r11 is 0 even if fallback area is not
+ add r8,r8,r11 // Add 0, this creates a dependency on the ldx
+ .endr
+ addi r10,r10,128 /* 128 byte cache line */
+ bdnz 1b
+
+ mtctr r9
+ ld r9,PACA_EXRFI+EX_R9(r13)
+ ld r10,PACA_EXRFI+EX_R10(r13)
+ ld r11,PACA_EXRFI+EX_R11(r13)
+ ld r12,PACA_EXRFI+EX_R12(r13)
+ ld r8,PACA_EXRFI+EX_R13(r13)
+ GET_SCRATCH0(r13);
+ rfid
+
+TRAMP_REAL_BEGIN(hrfi_flush_fallback)
+ SET_SCRATCH0(r13);
+ GET_PACA(r13);
+ std r9,PACA_EXRFI+EX_R9(r13)
+ std r10,PACA_EXRFI+EX_R10(r13)
+ std r11,PACA_EXRFI+EX_R11(r13)
+ std r12,PACA_EXRFI+EX_R12(r13)
+ std r8,PACA_EXRFI+EX_R13(r13)
+ mfctr r9
+ ld r10,PACA_RFI_FLUSH_FALLBACK_AREA(r13)
+ ld r11,PACA_L1D_FLUSH_SETS(r13)
+ ld r12,PACA_L1D_FLUSH_CONGRUENCE(r13)
+ /*
+ * The load adresses are at staggered offsets within cachelines,
+ * which suits some pipelines better (on others it should not
+ * hurt).
+ */
+ addi r12,r12,8
+ mtctr r11
+ DCBT_STOP_ALL_STREAM_IDS(r11) /* Stop prefetch streams */
+
+ /* order ld/st prior to dcbt stop all streams with flushing */
+ sync
+1: li r8,0
+ .rept 8 /* 8-way set associative */
+ ldx r11,r10,r8
+ add r8,r8,r12
+ xor r11,r11,r11 // Ensure r11 is 0 even if fallback area is not
+ add r8,r8,r11 // Add 0, this creates a dependency on the ldx
+ .endr
+ addi r10,r10,128 /* 128 byte cache line */
+ bdnz 1b
+
+ mtctr r9
+ ld r9,PACA_EXRFI+EX_R9(r13)
+ ld r10,PACA_EXRFI+EX_R10(r13)
+ ld r11,PACA_EXRFI+EX_R11(r13)
+ ld r12,PACA_EXRFI+EX_R12(r13)
+ ld r8,PACA_EXRFI+EX_R13(r13)
+ GET_SCRATCH0(r13);
+ hrfid
+
/*
* Real mode exceptions actually use this too, but alternate
* instruction code patches (which end up in the common .text area)
@@ -1441,7 +1558,7 @@ TRAMP_REAL_BEGIN(kvmppc_skip_interrupt)
addi r13, r13, 4
mtspr SPRN_SRR0, r13
GET_SCRATCH0(r13)
- rfid
+ RFI_TO_KERNEL
b .
TRAMP_REAL_BEGIN(kvmppc_skip_Hinterrupt)
@@ -1453,7 +1570,7 @@ TRAMP_REAL_BEGIN(kvmppc_skip_Hinterrupt)
addi r13, r13, 4
mtspr SPRN_HSRR0, r13
GET_SCRATCH0(r13)
- hrfid
+ HRFI_TO_KERNEL
b .
#endif
@@ -1521,7 +1638,7 @@ USE_TEXT_SECTION()
.balign IFETCH_ALIGN_BYTES
do_hash_page:
#ifdef CONFIG_PPC_BOOK3S_64
- lis r0,(DSISR_BAD_FAULT_64S|DSISR_DABRMATCH)@h
+ lis r0,(DSISR_BAD_FAULT_64S | DSISR_DABRMATCH | DSISR_KEYFAULT)@h
ori r0,r0,DSISR_BAD_FAULT_64S@l
and. r0,r4,r0 /* weird error? */
bne- handle_page_fault /* if not, try to insert a HPTE */
@@ -1717,6 +1834,8 @@ BEGIN_FTR_SECTION
FTR_SECTION_ELSE
beq hardware_interrupt_common
ALT_FTR_SECTION_END_IFSET(CPU_FTR_HVMODE | CPU_FTR_ARCH_300)
+ cmpwi r3,0xf00
+ beq performance_monitor_common
BEGIN_FTR_SECTION
cmpwi r3,0xa00
beq h_doorbell_common_msgclr
diff --git a/arch/powerpc/kernel/fadump.c b/arch/powerpc/kernel/fadump.c
index 04ea5c04fd24..3c2c2688918f 100644
--- a/arch/powerpc/kernel/fadump.c
+++ b/arch/powerpc/kernel/fadump.c
@@ -1462,25 +1462,6 @@ static void fadump_init_files(void)
return;
}
-static int fadump_panic_event(struct notifier_block *this,
- unsigned long event, void *ptr)
-{
- /*
- * If firmware-assisted dump has been registered then trigger
- * firmware-assisted dump and let firmware handle everything
- * else. If this returns, then fadump was not registered, so
- * go through the rest of the panic path.
- */
- crash_fadump(NULL, ptr);
-
- return NOTIFY_DONE;
-}
-
-static struct notifier_block fadump_panic_block = {
- .notifier_call = fadump_panic_event,
- .priority = INT_MIN /* may not return; must be done last */
-};
-
/*
* Prepare for firmware-assisted dump.
*/
@@ -1513,9 +1494,6 @@ int __init setup_fadump(void)
init_fadump_mem_struct(&fdm, fw_dump.reserve_dump_area_start);
fadump_init_files();
- atomic_notifier_chain_register(&panic_notifier_list,
- &fadump_panic_block);
-
return 1;
}
subsys_initcall(setup_fadump);
diff --git a/arch/powerpc/kernel/head_64.S b/arch/powerpc/kernel/head_64.S
index aa71a90f5222..a61151a6ea5e 100644
--- a/arch/powerpc/kernel/head_64.S
+++ b/arch/powerpc/kernel/head_64.S
@@ -765,8 +765,8 @@ _GLOBAL(pmac_secondary_start)
/* Mark interrupts soft and hard disabled (they might be enabled
* in the PACA when doing hotplug)
*/
- li r0,0
- stb r0,PACASOFTIRQEN(r13)
+ li r0,IRQS_DISABLED
+ stb r0,PACAIRQSOFTMASK(r13)
li r0,PACA_IRQ_HARD_DIS
stb r0,PACAIRQHAPPENED(r13)
@@ -822,7 +822,8 @@ __secondary_start:
/* Mark interrupts soft and hard disabled (they might be enabled
* in the PACA when doing hotplug)
*/
- stb r7,PACASOFTIRQEN(r13)
+ li r7,IRQS_DISABLED
+ stb r7,PACAIRQSOFTMASK(r13)
li r0,PACA_IRQ_HARD_DIS
stb r0,PACAIRQHAPPENED(r13)
@@ -988,8 +989,8 @@ start_here_common:
/* Mark interrupts soft and hard disabled (they might be enabled
* in the PACA when doing hotplug)
*/
- li r0,0
- stb r0,PACASOFTIRQEN(r13)
+ li r0,IRQS_DISABLED
+ stb r0,PACAIRQSOFTMASK(r13)
li r0,PACA_IRQ_HARD_DIS
stb r0,PACAIRQHAPPENED(r13)
diff --git a/arch/powerpc/kernel/idle_book3e.S b/arch/powerpc/kernel/idle_book3e.S
index 48c21acef915..2b269315d377 100644
--- a/arch/powerpc/kernel/idle_book3e.S
+++ b/arch/powerpc/kernel/idle_book3e.S
@@ -17,6 +17,7 @@
#include <asm/processor.h>
#include <asm/thread_info.h>
#include <asm/epapr_hcalls.h>
+#include <asm/hw_irq.h>
/* 64-bit version only for now */
#ifdef CONFIG_PPC64
@@ -46,8 +47,8 @@ _GLOBAL(\name)
bl trace_hardirqs_on
addi r1,r1,128
#endif
- li r0,1
- stb r0,PACASOFTIRQEN(r13)
+ li r0,IRQS_ENABLED
+ stb r0,PACAIRQSOFTMASK(r13)
/* Interrupts will make use return to LR, so get something we want
* in there
diff --git a/arch/powerpc/kernel/idle_power4.S b/arch/powerpc/kernel/idle_power4.S
index f57a19348bdd..08faa93755f9 100644
--- a/arch/powerpc/kernel/idle_power4.S
+++ b/arch/powerpc/kernel/idle_power4.S
@@ -15,6 +15,7 @@
#include <asm/ppc_asm.h>
#include <asm/asm-offsets.h>
#include <asm/irqflags.h>
+#include <asm/hw_irq.h>
#undef DEBUG
@@ -53,8 +54,8 @@ END_FTR_SECTION_IFCLR(CPU_FTR_CAN_NAP)
mfmsr r7
#endif /* CONFIG_TRACE_IRQFLAGS */
- li r0,1
- stb r0,PACASOFTIRQEN(r13) /* we'll hard-enable shortly */
+ li r0,IRQS_ENABLED
+ stb r0,PACAIRQSOFTMASK(r13) /* we'll hard-enable shortly */
BEGIN_FTR_SECTION
DSSALL
sync
diff --git a/arch/powerpc/kernel/irq.c b/arch/powerpc/kernel/irq.c
index b7a84522e652..f88038847790 100644
--- a/arch/powerpc/kernel/irq.c
+++ b/arch/powerpc/kernel/irq.c
@@ -67,6 +67,7 @@
#include <asm/smp.h>
#include <asm/livepatch.h>
#include <asm/asm-prototypes.h>
+#include <asm/hw_irq.h>
#ifdef CONFIG_PPC64
#include <asm/paca.h>
@@ -106,12 +107,6 @@ static inline notrace unsigned long get_irq_happened(void)
return happened;
}
-static inline notrace void set_soft_enabled(unsigned long enable)
-{
- __asm__ __volatile__("stb %0,%1(13)"
- : : "r" (enable), "i" (offsetof(struct paca_struct, soft_enabled)));
-}
-
static inline notrace int decrementer_check_overflow(void)
{
u64 now = get_tb_or_rtc();
@@ -191,6 +186,11 @@ notrace unsigned int __check_irq_replay(void)
return 0x900;
}
+ if (happened & PACA_IRQ_PMI) {
+ local_paca->irq_happened &= ~PACA_IRQ_PMI;
+ return 0xf00;
+ }
+
if (happened & PACA_IRQ_EE) {
local_paca->irq_happened &= ~PACA_IRQ_EE;
return 0x500;
@@ -224,15 +224,16 @@ notrace unsigned int __check_irq_replay(void)
return 0;
}
-notrace void arch_local_irq_restore(unsigned long en)
+notrace void arch_local_irq_restore(unsigned long mask)
{
unsigned char irq_happened;
unsigned int replay;
/* Write the new soft-enabled value */
- set_soft_enabled(en);
- if (!en)
+ irq_soft_mask_set(mask);
+ if (mask)
return;
+
/*
* From this point onward, we can take interrupts, preempt,
* etc... unless we got hard-disabled. We check if an event
@@ -263,7 +264,7 @@ notrace void arch_local_irq_restore(unsigned long en)
*/
if (unlikely(irq_happened != PACA_IRQ_HARD_DIS))
__hard_irq_disable();
-#ifdef CONFIG_TRACE_IRQFLAGS
+#ifdef CONFIG_PPC_IRQ_SOFT_MASK_DEBUG
else {
/*
* We should already be hard disabled here. We had bugs
@@ -274,9 +275,9 @@ notrace void arch_local_irq_restore(unsigned long en)
if (WARN_ON(mfmsr() & MSR_EE))
__hard_irq_disable();
}
-#endif /* CONFIG_TRACE_IRQFLAGS */
+#endif
- set_soft_enabled(0);
+ irq_soft_mask_set(IRQS_ALL_DISABLED);
trace_hardirqs_off();
/*
@@ -288,7 +289,7 @@ notrace void arch_local_irq_restore(unsigned long en)
/* We can soft-enable now */
trace_hardirqs_on();
- set_soft_enabled(1);
+ irq_soft_mask_set(IRQS_ENABLED);
/*
* And replay if we have to. This will return with interrupts
@@ -363,7 +364,7 @@ bool prep_irq_for_idle(void)
* of entering the low power state.
*/
local_paca->irq_happened &= ~PACA_IRQ_HARD_DIS;
- local_paca->soft_enabled = 1;
+ irq_soft_mask_set(IRQS_ENABLED);
/* Tell the caller to enter the low power state */
return true;
diff --git a/arch/powerpc/kernel/mce.c b/arch/powerpc/kernel/mce.c
index 742e4658c5dc..d2fecaec4fec 100644
--- a/arch/powerpc/kernel/mce.c
+++ b/arch/powerpc/kernel/mce.c
@@ -495,37 +495,123 @@ long machine_check_early(struct pt_regs *regs)
return handled;
}
-long hmi_exception_realmode(struct pt_regs *regs)
+/* Possible meanings for HMER_DEBUG_TRIG bit being set on POWER9 */
+static enum {
+ DTRIG_UNKNOWN,
+ DTRIG_VECTOR_CI, /* need to emulate vector CI load instr */
+ DTRIG_SUSPEND_ESCAPE, /* need to escape from TM suspend mode */
+} hmer_debug_trig_function;
+
+static int init_debug_trig_function(void)
{
- __this_cpu_inc(irq_stat.hmi_exceptions);
-
-#ifdef CONFIG_PPC_BOOK3S_64
- /* Workaround for P9 vector CI loads (see p9_hmi_special_emu) */
- if (pvr_version_is(PVR_POWER9)) {
- unsigned long hmer = mfspr(SPRN_HMER);
-
- /* Do we have the debug bit set */
- if (hmer & PPC_BIT(17)) {
- hmer &= ~PPC_BIT(17);
- mtspr(SPRN_HMER, hmer);
-
- /*
- * Now to avoid problems with soft-disable we
- * only do the emulation if we are coming from
- * user space
- */
- if (user_mode(regs))
- local_paca->hmi_p9_special_emu = 1;
-
- /*
- * Don't bother going to OPAL if that's the
- * only relevant bit.
- */
- if (!(hmer & mfspr(SPRN_HMEER)))
- return local_paca->hmi_p9_special_emu;
+ int pvr;
+ struct device_node *cpun;
+ struct property *prop = NULL;
+ const char *str;
+
+ /* First look in the device tree */
+ preempt_disable();
+ cpun = of_get_cpu_node(smp_processor_id(), NULL);
+ if (cpun) {
+ of_property_for_each_string(cpun, "ibm,hmi-special-triggers",
+ prop, str) {
+ if (strcmp(str, "bit17-vector-ci-load") == 0)
+ hmer_debug_trig_function = DTRIG_VECTOR_CI;
+ else if (strcmp(str, "bit17-tm-suspend-escape") == 0)
+ hmer_debug_trig_function = DTRIG_SUSPEND_ESCAPE;
}
+ of_node_put(cpun);
+ }
+ preempt_enable();
+
+ /* If we found the property, don't look at PVR */
+ if (prop)
+ goto out;
+
+ pvr = mfspr(SPRN_PVR);
+ /* Check for POWER9 Nimbus (scale-out) */
+ if ((PVR_VER(pvr) == PVR_POWER9) && (pvr & 0xe000) == 0) {
+ /* DD2.2 and later */
+ if ((pvr & 0xfff) >= 0x202)
+ hmer_debug_trig_function = DTRIG_SUSPEND_ESCAPE;
+ /* DD2.0 and DD2.1 - used for vector CI load emulation */
+ else if ((pvr & 0xfff) >= 0x200)
+ hmer_debug_trig_function = DTRIG_VECTOR_CI;
+ }
+
+ out:
+ switch (hmer_debug_trig_function) {
+ case DTRIG_VECTOR_CI:
+ pr_debug("HMI debug trigger used for vector CI load\n");
+ break;
+ case DTRIG_SUSPEND_ESCAPE:
+ pr_debug("HMI debug trigger used for TM suspend escape\n");
+ break;
+ default:
+ break;
}
-#endif /* CONFIG_PPC_BOOK3S_64 */
+ return 0;
+}
+__initcall(init_debug_trig_function);
+
+/*
+ * Handle HMIs that occur as a result of a debug trigger.
+ * Return values:
+ * -1 means this is not a HMI cause that we know about
+ * 0 means no further handling is required
+ * 1 means further handling is required
+ */
+long hmi_handle_debugtrig(struct pt_regs *regs)
+{
+ unsigned long hmer = mfspr(SPRN_HMER);
+ long ret = 0;
+
+ /* HMER_DEBUG_TRIG bit is used for various workarounds on P9 */
+ if (!((hmer & HMER_DEBUG_TRIG)
+ && hmer_debug_trig_function != DTRIG_UNKNOWN))
+ return -1;
+
+ hmer &= ~HMER_DEBUG_TRIG;
+ /* HMER is a write-AND register */
+ mtspr(SPRN_HMER, ~HMER_DEBUG_TRIG);
+
+ switch (hmer_debug_trig_function) {
+ case DTRIG_VECTOR_CI:
+ /*
+ * Now to avoid problems with soft-disable we
+ * only do the emulation if we are coming from
+ * host user space
+ */
+ if (regs && user_mode(regs))
+ ret = local_paca->hmi_p9_special_emu = 1;
+
+ break;
+
+ default:
+ break;
+ }
+
+ /*
+ * See if any other HMI causes remain to be handled
+ */
+ if (hmer & mfspr(SPRN_HMEER))
+ return -1;
+
+ return ret;
+}
+
+/*
+ * Return values:
+ */
+long hmi_exception_realmode(struct pt_regs *regs)
+{
+ int ret;
+
+ __this_cpu_inc(irq_stat.hmi_exceptions);
+
+ ret = hmi_handle_debugtrig(regs);
+ if (ret >= 0)
+ return ret;
wait_for_subcore_guest_exit();
diff --git a/arch/powerpc/kernel/optprobes_head.S b/arch/powerpc/kernel/optprobes_head.S
index 52fc864cdec4..98a3aeeb3c8c 100644
--- a/arch/powerpc/kernel/optprobes_head.S
+++ b/arch/powerpc/kernel/optprobes_head.S
@@ -58,7 +58,7 @@ optprobe_template_entry:
std r5,_XER(r1)
mfcr r5
std r5,_CCR(r1)
- lbz r5,PACASOFTIRQEN(r13)
+ lbz r5,PACAIRQSOFTMASK(r13)
std r5,SOFTE(r1)
/*
diff --git a/arch/powerpc/kernel/process.c b/arch/powerpc/kernel/process.c
index bcd4441304a5..2fd01c9045ba 100644
--- a/arch/powerpc/kernel/process.c
+++ b/arch/powerpc/kernel/process.c
@@ -42,6 +42,7 @@
#include <linux/hw_breakpoint.h>
#include <linux/uaccess.h>
#include <linux/elf-randomize.h>
+#include <linux/pkeys.h>
#include <asm/pgtable.h>
#include <asm/io.h>
@@ -57,6 +58,7 @@
#include <asm/debug.h>
#ifdef CONFIG_PPC64
#include <asm/firmware.h>
+#include <asm/hw_irq.h>
#endif
#include <asm/code-patching.h>
#include <asm/exec.h>
@@ -1102,6 +1104,8 @@ static inline void save_sprs(struct thread_struct *t)
t->tar = mfspr(SPRN_TAR);
}
#endif
+
+ thread_pkey_regs_save(t);
}
static inline void restore_sprs(struct thread_struct *old_thread,
@@ -1141,6 +1145,8 @@ static inline void restore_sprs(struct thread_struct *old_thread,
old_thread->tidr != new_thread->tidr)
mtspr(SPRN_TIDR, new_thread->tidr);
#endif
+
+ thread_pkey_regs_restore(new_thread, old_thread);
}
#ifdef CONFIG_PPC_BOOK3S_64
@@ -1403,7 +1409,7 @@ void show_regs(struct pt_regs * regs)
printk("NIP: "REG" LR: "REG" CTR: "REG"\n",
regs->nip, regs->link, regs->ctr);
- printk("REGS: %p TRAP: %04lx %s (%s)\n",
+ printk("REGS: %px TRAP: %04lx %s (%s)\n",
regs, regs->trap, print_tainted(), init_utsname()->release);
printk("MSR: "REG" ", regs->msr);
print_msr_bits(regs->msr);
@@ -1589,6 +1595,7 @@ int set_thread_tidr(struct task_struct *t)
return 0;
}
+EXPORT_SYMBOL_GPL(set_thread_tidr);
#endif /* CONFIG_PPC64 */
@@ -1674,7 +1681,7 @@ int copy_thread(unsigned long clone_flags, unsigned long usp,
childregs->gpr[14] = ppc_function_entry((void *)usp);
#ifdef CONFIG_PPC64
clear_tsk_thread_flag(p, TIF_32BIT);
- childregs->softe = 1;
+ childregs->softe = IRQS_ENABLED;
#endif
childregs->gpr[15] = kthread_arg;
p->thread.regs = NULL; /* no user register state */
@@ -1865,6 +1872,8 @@ void start_thread(struct pt_regs *regs, unsigned long start, unsigned long sp)
current->thread.tm_tfiar = 0;
current->thread.load_tm = 0;
#endif /* CONFIG_PPC_TRANSACTIONAL_MEM */
+
+ thread_pkey_regs_init(&current->thread);
}
EXPORT_SYMBOL(start_thread);
diff --git a/arch/powerpc/kernel/prom_init.c b/arch/powerpc/kernel/prom_init.c
index acf4b2e0530c..adf044daafd7 100644
--- a/arch/powerpc/kernel/prom_init.c
+++ b/arch/powerpc/kernel/prom_init.c
@@ -874,6 +874,7 @@ struct ibm_arch_vec __cacheline_aligned ibm_architecture_vec = {
.mmu = 0,
.hash_ext = 0,
.radix_ext = 0,
+ .byte22 = OV5_FEAT(OV5_DRC_INFO),
},
/* option vector 6: IBM PAPR hints */
diff --git a/arch/powerpc/kernel/ptrace.c b/arch/powerpc/kernel/ptrace.c
index f52ad5bb7109..ca72d7391d40 100644
--- a/arch/powerpc/kernel/ptrace.c
+++ b/arch/powerpc/kernel/ptrace.c
@@ -35,6 +35,7 @@
#include <linux/context_tracking.h>
#include <linux/uaccess.h>
+#include <linux/pkeys.h>
#include <asm/page.h>
#include <asm/pgtable.h>
#include <asm/switch_to.h>
@@ -283,6 +284,18 @@ int ptrace_get_reg(struct task_struct *task, int regno, unsigned long *data)
if (regno == PT_DSCR)
return get_user_dscr(task, data);
+#ifdef CONFIG_PPC64
+ /*
+ * softe copies paca->irq_soft_mask variable state. Since irq_soft_mask is
+ * no more used as a flag, lets force usr to alway see the softe value as 1
+ * which means interrupts are not soft disabled.
+ */
+ if (regno == PT_SOFTE) {
+ *data = 1;
+ return 0;
+ }
+#endif
+
if (regno < (sizeof(struct pt_regs) / sizeof(unsigned long))) {
*data = ((unsigned long *)task->thread.regs)[regno];
return 0;
@@ -1775,6 +1788,61 @@ static int pmu_set(struct task_struct *target,
return ret;
}
#endif
+
+#ifdef CONFIG_PPC_MEM_KEYS
+static int pkey_active(struct task_struct *target,
+ const struct user_regset *regset)
+{
+ if (!arch_pkeys_enabled())
+ return -ENODEV;
+
+ return regset->n;
+}
+
+static int pkey_get(struct task_struct *target,
+ const struct user_regset *regset,
+ unsigned int pos, unsigned int count,
+ void *kbuf, void __user *ubuf)
+{
+ BUILD_BUG_ON(TSO(amr) + sizeof(unsigned long) != TSO(iamr));
+ BUILD_BUG_ON(TSO(iamr) + sizeof(unsigned long) != TSO(uamor));
+
+ if (!arch_pkeys_enabled())
+ return -ENODEV;
+
+ return user_regset_copyout(&pos, &count, &kbuf, &ubuf,
+ &target->thread.amr, 0,
+ ELF_NPKEY * sizeof(unsigned long));
+}
+
+static int pkey_set(struct task_struct *target,
+ const struct user_regset *regset,
+ unsigned int pos, unsigned int count,
+ const void *kbuf, const void __user *ubuf)
+{
+ u64 new_amr;
+ int ret;
+
+ if (!arch_pkeys_enabled())
+ return -ENODEV;
+
+ /* Only the AMR can be set from userspace */
+ if (pos != 0 || count != sizeof(new_amr))
+ return -EINVAL;
+
+ ret = user_regset_copyin(&pos, &count, &kbuf, &ubuf,
+ &new_amr, 0, sizeof(new_amr));
+ if (ret)
+ return ret;
+
+ /* UAMOR determines which bits of the AMR can be set from userspace. */
+ target->thread.amr = (new_amr & target->thread.uamor) |
+ (target->thread.amr & ~target->thread.uamor);
+
+ return 0;
+}
+#endif /* CONFIG_PPC_MEM_KEYS */
+
/*
* These are our native regset flavors.
*/
@@ -1809,6 +1877,9 @@ enum powerpc_regset {
REGSET_EBB, /* EBB registers */
REGSET_PMR, /* Performance Monitor Registers */
#endif
+#ifdef CONFIG_PPC_MEM_KEYS
+ REGSET_PKEY, /* AMR register */
+#endif
};
static const struct user_regset native_regsets[] = {
@@ -1914,6 +1985,13 @@ static const struct user_regset native_regsets[] = {
.active = pmu_active, .get = pmu_get, .set = pmu_set
},
#endif
+#ifdef CONFIG_PPC_MEM_KEYS
+ [REGSET_PKEY] = {
+ .core_note_type = NT_PPC_PKEY, .n = ELF_NPKEY,
+ .size = sizeof(u64), .align = sizeof(u64),
+ .active = pkey_active, .get = pkey_get, .set = pkey_set
+ },
+#endif
};
static const struct user_regset_view user_ppc_native_view = {
diff --git a/arch/powerpc/kernel/setup-common.c b/arch/powerpc/kernel/setup-common.c
index 24da91768133..af128ee67248 100644
--- a/arch/powerpc/kernel/setup-common.c
+++ b/arch/powerpc/kernel/setup-common.c
@@ -242,14 +242,6 @@ static int show_cpuinfo(struct seq_file *m, void *v)
unsigned short maj;
unsigned short min;
- /* We only show online cpus: disable preempt (overzealous, I
- * knew) to prevent cpu going down. */
- preempt_disable();
- if (!cpu_online(cpu_id)) {
- preempt_enable();
- return 0;
- }
-
#ifdef CONFIG_SMP
pvr = per_cpu(cpu_pvr, cpu_id);
#else
@@ -356,8 +348,6 @@ static int show_cpuinfo(struct seq_file *m, void *v)
#endif
seq_printf(m, "\n");
- preempt_enable();
-
/* If this is the last cpu, print the summary */
if (cpumask_next(cpu_id, cpu_online_mask) >= nr_cpu_ids)
show_cpuinfo_summary(m);
@@ -706,6 +696,30 @@ int check_legacy_ioport(unsigned long base_port)
}
EXPORT_SYMBOL(check_legacy_ioport);
+static int ppc_panic_event(struct notifier_block *this,
+ unsigned long event, void *ptr)
+{
+ /*
+ * If firmware-assisted dump has been registered then trigger
+ * firmware-assisted dump and let firmware handle everything else.
+ */
+ crash_fadump(NULL, ptr);
+ ppc_md.panic(ptr); /* May not return */
+ return NOTIFY_DONE;
+}
+
+static struct notifier_block ppc_panic_block = {
+ .notifier_call = ppc_panic_event,
+ .priority = INT_MIN /* may not return; must be done last */
+};
+
+void __init setup_panic(void)
+{
+ if (!ppc_md.panic)
+ return;
+ atomic_notifier_chain_register(&panic_notifier_list, &ppc_panic_block);
+}
+
#ifdef CONFIG_CHECK_CACHE_COHERENCY
/*
* For platforms that have configurable cache-coherency. This function
@@ -850,6 +864,9 @@ void __init setup_arch(char **cmdline_p)
/* Probe the machine type, establish ppc_md. */
probe_machine();
+ /* Setup panic notifier if requested by the platform. */
+ setup_panic();
+
/*
* Configure ppc_md.power_save (ppc32 only, 64-bit machines do
* it from their respective probe() function.
diff --git a/arch/powerpc/kernel/setup_64.c b/arch/powerpc/kernel/setup_64.c
index 8f285d6a3db1..d1fa0e91f526 100644
--- a/arch/powerpc/kernel/setup_64.c
+++ b/arch/powerpc/kernel/setup_64.c
@@ -36,6 +36,7 @@
#include <linux/memory.h>
#include <linux/nmi.h>
+#include <asm/debugfs.h>
#include <asm/io.h>
#include <asm/kdump.h>
#include <asm/prom.h>
@@ -66,6 +67,7 @@
#include <asm/livepatch.h>
#include <asm/opal.h>
#include <asm/cputhreads.h>
+#include <asm/hw_irq.h>
#include "setup.h"
@@ -187,6 +189,8 @@ static void __init fixup_boot_paca(void)
get_paca()->cpu_start = 1;
/* Allow percpu accesses to work until we setup percpu data */
get_paca()->data_offset = 0;
+ /* Mark interrupts disabled in PACA */
+ irq_soft_mask_set(IRQS_DISABLED);
}
static void __init configure_exceptions(void)
@@ -349,7 +353,7 @@ void __init early_setup(unsigned long dt_ptr)
void early_setup_secondary(void)
{
/* Mark interrupts disabled in PACA */
- get_paca()->soft_enabled = 0;
+ irq_soft_mask_set(IRQS_DISABLED);
/* Initialize the hash table or TLB handling */
early_init_mmu_secondary();
@@ -805,3 +809,141 @@ static int __init disable_hardlockup_detector(void)
return 0;
}
early_initcall(disable_hardlockup_detector);
+
+#ifdef CONFIG_PPC_BOOK3S_64
+static enum l1d_flush_type enabled_flush_types;
+static void *l1d_flush_fallback_area;
+static bool no_rfi_flush;
+bool rfi_flush;
+
+static int __init handle_no_rfi_flush(char *p)
+{
+ pr_info("rfi-flush: disabled on command line.");
+ no_rfi_flush = true;
+ return 0;
+}
+early_param("no_rfi_flush", handle_no_rfi_flush);
+
+/*
+ * The RFI flush is not KPTI, but because users will see doco that says to use
+ * nopti we hijack that option here to also disable the RFI flush.
+ */
+static int __init handle_no_pti(char *p)
+{
+ pr_info("rfi-flush: disabling due to 'nopti' on command line.\n");
+ handle_no_rfi_flush(NULL);
+ return 0;
+}
+early_param("nopti", handle_no_pti);
+
+static void do_nothing(void *unused)
+{
+ /*
+ * We don't need to do the flush explicitly, just enter+exit kernel is
+ * sufficient, the RFI exit handlers will do the right thing.
+ */
+}
+
+void rfi_flush_enable(bool enable)
+{
+ if (rfi_flush == enable)
+ return;
+
+ if (enable) {
+ do_rfi_flush_fixups(enabled_flush_types);
+ on_each_cpu(do_nothing, NULL, 1);
+ } else
+ do_rfi_flush_fixups(L1D_FLUSH_NONE);
+
+ rfi_flush = enable;
+}
+
+static void init_fallback_flush(void)
+{
+ u64 l1d_size, limit;
+ int cpu;
+
+ l1d_size = ppc64_caches.l1d.size;
+ limit = min(ppc64_bolted_size(), ppc64_rma_size);
+
+ /*
+ * Align to L1d size, and size it at 2x L1d size, to catch possible
+ * hardware prefetch runoff. We don't have a recipe for load patterns to
+ * reliably avoid the prefetcher.
+ */
+ l1d_flush_fallback_area = __va(memblock_alloc_base(l1d_size * 2, l1d_size, limit));
+ memset(l1d_flush_fallback_area, 0, l1d_size * 2);
+
+ for_each_possible_cpu(cpu) {
+ /*
+ * The fallback flush is currently coded for 8-way
+ * associativity. Different associativity is possible, but it
+ * will be treated as 8-way and may not evict the lines as
+ * effectively.
+ *
+ * 128 byte lines are mandatory.
+ */
+ u64 c = l1d_size / 8;
+
+ paca[cpu].rfi_flush_fallback_area = l1d_flush_fallback_area;
+ paca[cpu].l1d_flush_congruence = c;
+ paca[cpu].l1d_flush_sets = c / 128;
+ }
+}
+
+void __init setup_rfi_flush(enum l1d_flush_type types, bool enable)
+{
+ if (types & L1D_FLUSH_FALLBACK) {
+ pr_info("rfi-flush: Using fallback displacement flush\n");
+ init_fallback_flush();
+ }
+
+ if (types & L1D_FLUSH_ORI)
+ pr_info("rfi-flush: Using ori type flush\n");
+
+ if (types & L1D_FLUSH_MTTRIG)
+ pr_info("rfi-flush: Using mttrig type flush\n");
+
+ enabled_flush_types = types;
+
+ if (!no_rfi_flush)
+ rfi_flush_enable(enable);
+}
+
+#ifdef CONFIG_DEBUG_FS
+static int rfi_flush_set(void *data, u64 val)
+{
+ if (val == 1)
+ rfi_flush_enable(true);
+ else if (val == 0)
+ rfi_flush_enable(false);
+ else
+ return -EINVAL;
+
+ return 0;
+}
+
+static int rfi_flush_get(void *data, u64 *val)
+{
+ *val = rfi_flush ? 1 : 0;
+ return 0;
+}
+
+DEFINE_SIMPLE_ATTRIBUTE(fops_rfi_flush, rfi_flush_get, rfi_flush_set, "%llu\n");
+
+static __init int rfi_flush_debugfs_init(void)
+{
+ debugfs_create_file("rfi_flush", 0600, powerpc_debugfs_root, NULL, &fops_rfi_flush);
+ return 0;
+}
+device_initcall(rfi_flush_debugfs_init);
+#endif
+
+ssize_t cpu_show_meltdown(struct device *dev, struct device_attribute *attr, char *buf)
+{
+ if (rfi_flush)
+ return sprintf(buf, "Mitigation: RFI Flush\n");
+
+ return sprintf(buf, "Vulnerable\n");
+}
+#endif /* CONFIG_PPC_BOOK3S_64 */
diff --git a/arch/powerpc/kernel/signal_32.c b/arch/powerpc/kernel/signal_32.c
index 9ffd73296f64..a30c6562ed66 100644
--- a/arch/powerpc/kernel/signal_32.c
+++ b/arch/powerpc/kernel/signal_32.c
@@ -111,12 +111,20 @@ static inline int save_general_regs(struct pt_regs *regs,
{
elf_greg_t64 *gregs = (elf_greg_t64 *)regs;
int i;
+ /* Force usr to alway see softe as 1 (interrupts enabled) */
+ elf_greg_t64 softe = 0x1;
WARN_ON(!FULL_REGS(regs));
for (i = 0; i <= PT_RESULT; i ++) {
if (i == 14 && !FULL_REGS(regs))
i = 32;
+ if ( i == PT_SOFTE) {
+ if(__put_user((unsigned int)softe, &frame->mc_gregs[i]))
+ return -EFAULT;
+ else
+ continue;
+ }
if (__put_user((unsigned int)gregs[i], &frame->mc_gregs[i]))
return -EFAULT;
}
diff --git a/arch/powerpc/kernel/signal_64.c b/arch/powerpc/kernel/signal_64.c
index 4b9ca3570344..2705fba544ad 100644
--- a/arch/powerpc/kernel/signal_64.c
+++ b/arch/powerpc/kernel/signal_64.c
@@ -110,6 +110,8 @@ static long setup_sigcontext(struct sigcontext __user *sc,
struct pt_regs *regs = tsk->thread.regs;
unsigned long msr = regs->msr;
long err = 0;
+ /* Force usr to alway see softe as 1 (interrupts enabled) */
+ unsigned long softe = 0x1;
BUG_ON(tsk != current);
@@ -169,6 +171,7 @@ static long setup_sigcontext(struct sigcontext __user *sc,
WARN_ON(!FULL_REGS(regs));
err |= __copy_to_user(&sc->gp_regs, regs, GP_REGS_SIZE);
err |= __put_user(msr, &sc->gp_regs[PT_MSR]);
+ err |= __put_user(softe, &sc->gp_regs[PT_SOFTE]);
err |= __put_user(signr, &sc->signal);
err |= __put_user(handler, &sc->handler);
if (set != NULL)
diff --git a/arch/powerpc/kernel/time.c b/arch/powerpc/kernel/time.c
index fe6f3a285455..a32823dcd9a4 100644
--- a/arch/powerpc/kernel/time.c
+++ b/arch/powerpc/kernel/time.c
@@ -244,7 +244,7 @@ static u64 scan_dispatch_log(u64 stop_tb)
void accumulate_stolen_time(void)
{
u64 sst, ust;
- u8 save_soft_enabled = local_paca->soft_enabled;
+ unsigned long save_irq_soft_mask = irq_soft_mask_return();
struct cpu_accounting_data *acct = &local_paca->accounting;
/* We are called early in the exception entry, before
@@ -253,7 +253,7 @@ void accumulate_stolen_time(void)
* needs to reflect that so various debug stuff doesn't
* complain
*/
- local_paca->soft_enabled = 0;
+ irq_soft_mask_set(IRQS_DISABLED);
sst = scan_dispatch_log(acct->starttime_user);
ust = scan_dispatch_log(acct->starttime);
@@ -261,7 +261,7 @@ void accumulate_stolen_time(void)
acct->utime -= ust;
acct->steal_time += ust + sst;
- local_paca->soft_enabled = save_soft_enabled;
+ irq_soft_mask_set(save_irq_soft_mask);
}
static inline u64 calculate_stolen_time(u64 stop_tb)
diff --git a/arch/powerpc/kernel/traps.c b/arch/powerpc/kernel/traps.c
index d139117d3339..122a3c883f4e 100644
--- a/arch/powerpc/kernel/traps.c
+++ b/arch/powerpc/kernel/traps.c
@@ -20,6 +20,7 @@
#include <linux/sched/debug.h>
#include <linux/kernel.h>
#include <linux/mm.h>
+#include <linux/pkeys.h>
#include <linux/stddef.h>
#include <linux/unistd.h>
#include <linux/ptrace.h>
@@ -266,7 +267,9 @@ void user_single_step_siginfo(struct task_struct *tsk,
info->si_addr = (void __user *)regs->nip;
}
-void _exception(int signr, struct pt_regs *regs, int code, unsigned long addr)
+
+void _exception_pkey(int signr, struct pt_regs *regs, int code,
+ unsigned long addr, int key)
{
siginfo_t info;
const char fmt32[] = KERN_INFO "%s[%d]: unhandled signal %d " \
@@ -289,13 +292,27 @@ void _exception(int signr, struct pt_regs *regs, int code, unsigned long addr)
local_irq_enable();
current->thread.trap_nr = code;
+
+ /*
+ * Save all the pkey registers AMR/IAMR/UAMOR. Eg: Core dumps need
+ * to capture the content, if the task gets killed.
+ */
+ thread_pkey_regs_save(&current->thread);
+
memset(&info, 0, sizeof(info));
info.si_signo = signr;
info.si_code = code;
info.si_addr = (void __user *) addr;
+ info.si_pkey = key;
+
force_sig_info(signr, &info, current);
}
+void _exception(int signr, struct pt_regs *regs, int code, unsigned long addr)
+{
+ _exception_pkey(signr, regs, code, addr, 0);
+}
+
void system_reset_exception(struct pt_regs *regs)
{
/*
diff --git a/arch/powerpc/kernel/vmlinux.lds.S b/arch/powerpc/kernel/vmlinux.lds.S
index 51e4ec92ade1..6507138ebe10 100644
--- a/arch/powerpc/kernel/vmlinux.lds.S
+++ b/arch/powerpc/kernel/vmlinux.lds.S
@@ -132,6 +132,15 @@ SECTIONS
/* Read-only data */
RO_DATA(PAGE_SIZE)
+#ifdef CONFIG_PPC64
+ . = ALIGN(8);
+ __rfi_flush_fixup : AT(ADDR(__rfi_flush_fixup) - LOAD_OFFSET) {
+ __start___rfi_flush_fixup = .;
+ *(__rfi_flush_fixup)
+ __stop___rfi_flush_fixup = .;
+ }
+#endif
+
EXCEPTION_TABLE(0)
NOTES :kernel :notes
diff --git a/arch/powerpc/kernel/watchdog.c b/arch/powerpc/kernel/watchdog.c
index 3963baaba92d..0673230c0261 100644
--- a/arch/powerpc/kernel/watchdog.c
+++ b/arch/powerpc/kernel/watchdog.c
@@ -26,15 +26,45 @@
#include <asm/paca.h>
/*
- * The watchdog has a simple timer that runs on each CPU, once per timer
- * period. This is the heartbeat.
+ * The powerpc watchdog ensures that each CPU is able to service timers.
+ * The watchdog sets up a simple timer on each CPU to run once per timer
+ * period, and updates a per-cpu timestamp and a "pending" cpumask. This is
+ * the heartbeat.
*
- * Then there are checks to see if the heartbeat has not triggered on a CPU
- * for the panic timeout period. Currently the watchdog only supports an
- * SMP check, so the heartbeat only turns on when we have 2 or more CPUs.
+ * Then there are two systems to check that the heartbeat is still running.
+ * The local soft-NMI, and the SMP checker.
*
- * This is not an NMI watchdog, but Linux uses that name for a generic
- * watchdog in some cases, so NMI gets used in some places.
+ * The soft-NMI checker can detect lockups on the local CPU. When interrupts
+ * are disabled with local_irq_disable(), platforms that use soft-masking
+ * can leave hardware interrupts enabled and handle them with a masked
+ * interrupt handler. The masked handler can send the timer interrupt to the
+ * watchdog's soft_nmi_interrupt(), which appears to Linux as an NMI
+ * interrupt, and can be used to detect CPUs stuck with IRQs disabled.
+ *
+ * The soft-NMI checker will compare the heartbeat timestamp for this CPU
+ * with the current time, and take action if the difference exceeds the
+ * watchdog threshold.
+ *
+ * The limitation of the soft-NMI watchdog is that it does not work when
+ * interrupts are hard disabled or otherwise not being serviced. This is
+ * solved by also having a SMP watchdog where all CPUs check all other
+ * CPUs heartbeat.
+ *
+ * The SMP checker can detect lockups on other CPUs. A gobal "pending"
+ * cpumask is kept, containing all CPUs which enable the watchdog. Each
+ * CPU clears their pending bit in their heartbeat timer. When the bitmask
+ * becomes empty, the last CPU to clear its pending bit updates a global
+ * timestamp and refills the pending bitmask.
+ *
+ * In the heartbeat timer, if any CPU notices that the global timestamp has
+ * not been updated for a period exceeding the watchdog threshold, then it
+ * means the CPU(s) with their bit still set in the pending mask have had
+ * their heartbeat stop, and action is taken.
+ *
+ * Some platforms implement true NMI IPIs, which can by used by the SMP
+ * watchdog to detect an unresponsive CPU and pull it out of its stuck
+ * state with the NMI IPI, to get crash/debug data from it. This way the
+ * SMP watchdog can detect hardware interrupts off lockups.
*/
static cpumask_t wd_cpus_enabled __read_mostly;
@@ -47,19 +77,7 @@ static u64 wd_timer_period_ms __read_mostly; /* interval between heartbeat */
static DEFINE_PER_CPU(struct timer_list, wd_timer);
static DEFINE_PER_CPU(u64, wd_timer_tb);
-/*
- * These are for the SMP checker. CPUs clear their pending bit in their
- * heartbeat. If the bitmask becomes empty, the time is noted and the
- * bitmask is refilled.
- *
- * All CPUs clear their bit in the pending mask every timer period.
- * Once all have cleared, the time is noted and the bits are reset.
- * If the time since all clear was greater than the panic timeout,
- * we can panic with the list of stuck CPUs.
- *
- * This will work best with NMI IPIs for crash code so the stuck CPUs
- * can be pulled out to get their backtraces.
- */
+/* SMP checker bits */
static unsigned long __wd_smp_lock;
static cpumask_t wd_smp_cpus_pending;
static cpumask_t wd_smp_cpus_stuck;
diff --git a/arch/powerpc/kvm/book3s_hv_ras.c b/arch/powerpc/kvm/book3s_hv_ras.c
index e61066bb6725..b11043b23c18 100644
--- a/arch/powerpc/kvm/book3s_hv_ras.c
+++ b/arch/powerpc/kvm/book3s_hv_ras.c
@@ -266,17 +266,19 @@ static void kvmppc_tb_resync_done(void)
* secondary threads to proceed.
* - All secondary threads will eventually call opal hmi handler on
* their exit path.
+ *
+ * Returns 1 if the timebase offset should be applied, 0 if not.
*/
long kvmppc_realmode_hmi_handler(void)
{
- int ptid = local_paca->kvm_hstate.ptid;
bool resync_req;
- /* This is only called on primary thread. */
- BUG_ON(ptid != 0);
__this_cpu_inc(irq_stat.hmi_exceptions);
+ if (hmi_handle_debugtrig(NULL) >= 0)
+ return 1;
+
/*
* By now primary thread has already completed guest->host
* partition switch but haven't signaled secondaries yet.
diff --git a/arch/powerpc/kvm/book3s_hv_rm_mmu.c b/arch/powerpc/kvm/book3s_hv_rm_mmu.c
index 26c11f678fbf..8888e625a999 100644
--- a/arch/powerpc/kvm/book3s_hv_rm_mmu.c
+++ b/arch/powerpc/kvm/book3s_hv_rm_mmu.c
@@ -42,7 +42,7 @@ static void *real_vmalloc_addr(void *x)
}
/* Return 1 if we need to do a global tlbie, 0 if we can use tlbiel */
-static int global_invalidates(struct kvm *kvm, unsigned long flags)
+static int global_invalidates(struct kvm *kvm)
{
int global;
int cpu;
@@ -522,7 +522,7 @@ long kvmppc_do_h_remove(struct kvm *kvm, unsigned long flags,
if (v & HPTE_V_VALID) {
hpte[0] &= ~cpu_to_be64(HPTE_V_VALID);
rb = compute_tlbie_rb(v, pte_r, pte_index);
- do_tlbies(kvm, &rb, 1, global_invalidates(kvm, flags), true);
+ do_tlbies(kvm, &rb, 1, global_invalidates(kvm), true);
/*
* The reference (R) and change (C) bits in a HPT
* entry can be set by hardware at any time up until
@@ -572,7 +572,7 @@ long kvmppc_h_bulk_remove(struct kvm_vcpu *vcpu)
if (kvm_is_radix(kvm))
return H_FUNCTION;
- global = global_invalidates(kvm, 0);
+ global = global_invalidates(kvm);
for (i = 0; i < 4 && ret == H_SUCCESS; ) {
n = 0;
for (; i < 4; ++i) {
@@ -732,8 +732,7 @@ long kvmppc_h_protect(struct kvm_vcpu *vcpu, unsigned long flags,
rb = compute_tlbie_rb(v, r, pte_index);
hpte[0] = cpu_to_be64((pte_v & ~HPTE_V_VALID) |
HPTE_V_ABSENT);
- do_tlbies(kvm, &rb, 1, global_invalidates(kvm, flags),
- true);
+ do_tlbies(kvm, &rb, 1, global_invalidates(kvm), true);
/* Don't lose R/C bit updates done by hardware */
r |= be64_to_cpu(hpte[1]) & (HPTE_R_R | HPTE_R_C);
hpte[1] = cpu_to_be64(r);
diff --git a/arch/powerpc/kvm/book3s_hv_rmhandlers.S b/arch/powerpc/kvm/book3s_hv_rmhandlers.S
index 2659844784b8..7886b313d135 100644
--- a/arch/powerpc/kvm/book3s_hv_rmhandlers.S
+++ b/arch/powerpc/kvm/book3s_hv_rmhandlers.S
@@ -79,7 +79,7 @@ _GLOBAL_TOC(kvmppc_hv_entry_trampoline)
mtmsrd r0,1 /* clear RI in MSR */
mtsrr0 r5
mtsrr1 r6
- RFI
+ RFI_TO_KERNEL
kvmppc_call_hv_entry:
BEGIN_FTR_SECTION
@@ -199,7 +199,7 @@ END_FTR_SECTION_IFSET(CPU_FTR_ARCH_207S)
mtmsrd r6, 1 /* Clear RI in MSR */
mtsrr0 r8
mtsrr1 r7
- RFI
+ RFI_TO_KERNEL
/* Virtual-mode return */
.Lvirt_return:
@@ -1167,8 +1167,7 @@ END_FTR_SECTION_IFSET(CPU_FTR_ARCH_300)
ld r0, VCPU_GPR(R0)(r4)
ld r4, VCPU_GPR(R4)(r4)
-
- hrfid
+ HRFI_TO_GUEST
b .
secondary_too_late:
@@ -1909,16 +1908,17 @@ END_FTR_SECTION_IFSET(CPU_FTR_ARCH_207S)
bne 27f
bl kvmppc_realmode_hmi_handler
nop
+ cmpdi r3, 0
li r12, BOOK3S_INTERRUPT_HMI
/*
- * At this point kvmppc_realmode_hmi_handler would have resync-ed
- * the TB. Hence it is not required to subtract guest timebase
- * offset from timebase. So, skip it.
+ * At this point kvmppc_realmode_hmi_handler may have resync-ed
+ * the TB, and if it has, we must not subtract the guest timebase
+ * offset from the timebase. So, skip it.
*
* Also, do not call kvmppc_subcore_exit_guest() because it has
* been invoked as part of kvmppc_realmode_hmi_handler().
*/
- b 30f
+ beq 30f
27:
/* Subtract timebase offset from timebase */
@@ -3249,7 +3249,7 @@ kvmppc_bad_host_intr:
mfctr r4
#endif
mfxer r5
- lbz r6, PACASOFTIRQEN(r13)
+ lbz r6, PACAIRQSOFTMASK(r13)
std r3, _LINK(r1)
std r4, _CTR(r1)
std r5, _XER(r1)
@@ -3320,7 +3320,7 @@ END_MMU_FTR_SECTION_IFSET(MMU_FTR_TYPE_RADIX)
ld r4, PACAKMSR(r13)
mtspr SPRN_SRR0, r3
mtspr SPRN_SRR1, r4
- rfid
+ RFI_TO_KERNEL
9: addi r3, r1, STACK_FRAME_OVERHEAD
bl kvmppc_bad_interrupt
b 9b
diff --git a/arch/powerpc/kvm/book3s_rmhandlers.S b/arch/powerpc/kvm/book3s_rmhandlers.S
index 42a4b237df5f..34a5adeff084 100644
--- a/arch/powerpc/kvm/book3s_rmhandlers.S
+++ b/arch/powerpc/kvm/book3s_rmhandlers.S
@@ -46,6 +46,9 @@
#define FUNC(name) name
+#define RFI_TO_KERNEL RFI
+#define RFI_TO_GUEST RFI
+
.macro INTERRUPT_TRAMPOLINE intno
.global kvmppc_trampoline_\intno
@@ -141,7 +144,7 @@ kvmppc_handler_skip_ins:
GET_SCRATCH0(r13)
/* And get back into the code */
- RFI
+ RFI_TO_KERNEL
#endif
/*
@@ -164,6 +167,6 @@ _GLOBAL_TOC(kvmppc_entry_trampoline)
ori r5, r5, MSR_EE
mtsrr0 r7
mtsrr1 r6
- RFI
+ RFI_TO_KERNEL
#include "book3s_segment.S"
diff --git a/arch/powerpc/kvm/book3s_segment.S b/arch/powerpc/kvm/book3s_segment.S
index 2a2b96d53999..93a180ceefad 100644
--- a/arch/powerpc/kvm/book3s_segment.S
+++ b/arch/powerpc/kvm/book3s_segment.S
@@ -156,7 +156,7 @@ no_dcbz32_on:
PPC_LL r9, SVCPU_R9(r3)
PPC_LL r3, (SVCPU_R3)(r3)
- RFI
+ RFI_TO_GUEST
kvmppc_handler_trampoline_enter_end:
@@ -407,5 +407,5 @@ END_FTR_SECTION_IFSET(CPU_FTR_HVMODE)
cmpwi r12, BOOK3S_INTERRUPT_DOORBELL
beqa BOOK3S_INTERRUPT_DOORBELL
- RFI
+ RFI_TO_KERNEL
kvmppc_handler_trampoline_exit_end:
diff --git a/arch/powerpc/kvm/book3s_xive.c b/arch/powerpc/kvm/book3s_xive.c
index bf457843e032..0d750d274c4e 100644
--- a/arch/powerpc/kvm/book3s_xive.c
+++ b/arch/powerpc/kvm/book3s_xive.c
@@ -725,7 +725,8 @@ u64 kvmppc_xive_get_icp(struct kvm_vcpu *vcpu)
/* Return the per-cpu state for state saving/migration */
return (u64)xc->cppr << KVM_REG_PPC_ICP_CPPR_SHIFT |
- (u64)xc->mfrr << KVM_REG_PPC_ICP_MFRR_SHIFT;
+ (u64)xc->mfrr << KVM_REG_PPC_ICP_MFRR_SHIFT |
+ (u64)0xff << KVM_REG_PPC_ICP_PPRI_SHIFT;
}
int kvmppc_xive_set_icp(struct kvm_vcpu *vcpu, u64 icpval)
@@ -1558,7 +1559,7 @@ static int xive_set_source(struct kvmppc_xive *xive, long irq, u64 addr)
/*
* Restore P and Q. If the interrupt was pending, we
- * force both P and Q, which will trigger a resend.
+ * force Q and !P, which will trigger a resend.
*
* That means that a guest that had both an interrupt
* pending (queued) and Q set will restore with only
@@ -1566,7 +1567,7 @@ static int xive_set_source(struct kvmppc_xive *xive, long irq, u64 addr)
* is perfectly fine as coalescing interrupts that haven't
* been presented yet is always allowed.
*/
- if (val & KVM_XICS_PRESENTED || val & KVM_XICS_PENDING)
+ if (val & KVM_XICS_PRESENTED && !(val & KVM_XICS_PENDING))
state->old_p = true;
if (val & KVM_XICS_QUEUED || val & KVM_XICS_PENDING)
state->old_q = true;
diff --git a/arch/powerpc/lib/code-patching.c b/arch/powerpc/lib/code-patching.c
index 096d4e4d31e6..e0d881ab304e 100644
--- a/arch/powerpc/lib/code-patching.c
+++ b/arch/powerpc/lib/code-patching.c
@@ -23,19 +23,26 @@
#include <asm/code-patching.h>
#include <asm/setup.h>
-static int __patch_instruction(unsigned int *addr, unsigned int instr)
+static int __patch_instruction(unsigned int *exec_addr, unsigned int instr,
+ unsigned int *patch_addr)
{
int err;
- __put_user_size(instr, addr, 4, err);
+ __put_user_size(instr, patch_addr, 4, err);
if (err)
return err;
- asm ("dcbst 0, %0; sync; icbi 0,%0; sync; isync" :: "r" (addr));
+ asm ("dcbst 0, %0; sync; icbi 0,%1; sync; isync" :: "r" (patch_addr),
+ "r" (exec_addr));
return 0;
}
+int raw_patch_instruction(unsigned int *addr, unsigned int instr)
+{
+ return __patch_instruction(addr, instr, addr);
+}
+
#ifdef CONFIG_STRICT_KERNEL_RWX
static DEFINE_PER_CPU(struct vm_struct *, text_poke_area);
@@ -138,7 +145,7 @@ static inline int unmap_patch_area(unsigned long addr)
int patch_instruction(unsigned int *addr, unsigned int instr)
{
int err;
- unsigned int *dest = NULL;
+ unsigned int *patch_addr = NULL;
unsigned long flags;
unsigned long text_poke_addr;
unsigned long kaddr = (unsigned long)addr;
@@ -148,8 +155,8 @@ int patch_instruction(unsigned int *addr, unsigned int instr)
* when text_poke_area is not ready, but we still need
* to allow patching. We just do the plain old patching
*/
- if (!this_cpu_read(*PTRRELOC(&text_poke_area)))
- return __patch_instruction(addr, instr);
+ if (!this_cpu_read(text_poke_area))
+ return raw_patch_instruction(addr, instr);
local_irq_save(flags);
@@ -159,17 +166,10 @@ int patch_instruction(unsigned int *addr, unsigned int instr)
goto out;
}
- dest = (unsigned int *)(text_poke_addr) +
+ patch_addr = (unsigned int *)(text_poke_addr) +
((kaddr & ~PAGE_MASK) / sizeof(unsigned int));
- /*
- * We use __put_user_size so that we can handle faults while
- * writing to dest and return err to handle faults gracefully
- */
- __put_user_size(instr, dest, 4, err);
- if (!err)
- asm ("dcbst 0, %0; sync; icbi 0,%0; icbi 0,%1; sync; isync"
- ::"r" (dest), "r"(addr));
+ __patch_instruction(addr, instr, patch_addr);
err = unmap_patch_area(text_poke_addr);
if (err)
@@ -184,7 +184,7 @@ out:
int patch_instruction(unsigned int *addr, unsigned int instr)
{
- return __patch_instruction(addr, instr);
+ return raw_patch_instruction(addr, instr);
}
#endif /* CONFIG_STRICT_KERNEL_RWX */
diff --git a/arch/powerpc/lib/feature-fixups.c b/arch/powerpc/lib/feature-fixups.c
index 41cf5ae273cf..73697c4e3468 100644
--- a/arch/powerpc/lib/feature-fixups.c
+++ b/arch/powerpc/lib/feature-fixups.c
@@ -62,7 +62,7 @@ static int patch_alt_instruction(unsigned int *src, unsigned int *dest,
}
}
- patch_instruction(dest, instr);
+ raw_patch_instruction(dest, instr);
return 0;
}
@@ -91,7 +91,7 @@ static int patch_feature_section(unsigned long value, struct fixup_entry *fcur)
}
for (; dest < end; dest++)
- patch_instruction(dest, PPC_INST_NOP);
+ raw_patch_instruction(dest, PPC_INST_NOP);
return 0;
}
@@ -116,6 +116,47 @@ void do_feature_fixups(unsigned long value, void *fixup_start, void *fixup_end)
}
}
+#ifdef CONFIG_PPC_BOOK3S_64
+void do_rfi_flush_fixups(enum l1d_flush_type types)
+{
+ unsigned int instrs[3], *dest;
+ long *start, *end;
+ int i;
+
+ start = PTRRELOC(&__start___rfi_flush_fixup),
+ end = PTRRELOC(&__stop___rfi_flush_fixup);
+
+ instrs[0] = 0x60000000; /* nop */
+ instrs[1] = 0x60000000; /* nop */
+ instrs[2] = 0x60000000; /* nop */
+
+ if (types & L1D_FLUSH_FALLBACK)
+ /* b .+16 to fallback flush */
+ instrs[0] = 0x48000010;
+
+ i = 0;
+ if (types & L1D_FLUSH_ORI) {
+ instrs[i++] = 0x63ff0000; /* ori 31,31,0 speculation barrier */
+ instrs[i++] = 0x63de0000; /* ori 30,30,0 L1d flush*/
+ }
+
+ if (types & L1D_FLUSH_MTTRIG)
+ instrs[i++] = 0x7c12dba6; /* mtspr TRIG2,r0 (SPR #882) */
+
+ for (i = 0; start < end; start++, i++) {
+ dest = (void *)start + *start;
+
+ pr_devel("patching dest %lx\n", (unsigned long)dest);
+
+ patch_instruction(dest, instrs[0]);
+ patch_instruction(dest + 1, instrs[1]);
+ patch_instruction(dest + 2, instrs[2]);
+ }
+
+ printk(KERN_DEBUG "rfi-flush: patched %d locations\n", i);
+}
+#endif /* CONFIG_PPC_BOOK3S_64 */
+
void do_lwsync_fixups(unsigned long value, void *fixup_start, void *fixup_end)
{
long *start, *end;
@@ -129,7 +170,7 @@ void do_lwsync_fixups(unsigned long value, void *fixup_start, void *fixup_end)
for (; start < end; start++) {
dest = (void *)start + *start;
- patch_instruction(dest, PPC_INST_LWSYNC);
+ raw_patch_instruction(dest, PPC_INST_LWSYNC);
}
}
@@ -147,7 +188,7 @@ static void do_final_fixups(void)
length = (__end_interrupts - _stext) / sizeof(int);
while (length--) {
- patch_instruction(dest, *src);
+ raw_patch_instruction(dest, *src);
src++;
dest++;
}
diff --git a/arch/powerpc/mm/Makefile b/arch/powerpc/mm/Makefile
index 8d271bfe2d94..f06f3577d8d1 100644
--- a/arch/powerpc/mm/Makefile
+++ b/arch/powerpc/mm/Makefile
@@ -44,3 +44,4 @@ obj-$(CONFIG_PPC_COPRO_BASE) += copro_fault.o
obj-$(CONFIG_SPAPR_TCE_IOMMU) += mmu_context_iommu.o
obj-$(CONFIG_PPC_PTDUMP) += dump_linuxpagetables.o
obj-$(CONFIG_PPC_HTDUMP) += dump_hashpagetable.o
+obj-$(CONFIG_PPC_MEM_KEYS) += pkeys.o
diff --git a/arch/powerpc/mm/fault.c b/arch/powerpc/mm/fault.c
index c07896c19517..866446cf2d9a 100644
--- a/arch/powerpc/mm/fault.c
+++ b/arch/powerpc/mm/fault.c
@@ -107,7 +107,8 @@ static bool store_updates_sp(struct pt_regs *regs)
*/
static int
-__bad_area_nosemaphore(struct pt_regs *regs, unsigned long address, int si_code)
+__bad_area_nosemaphore(struct pt_regs *regs, unsigned long address, int si_code,
+ int pkey)
{
/*
* If we are in kernel mode, bail out with a SEGV, this will
@@ -117,17 +118,18 @@ __bad_area_nosemaphore(struct pt_regs *regs, unsigned long address, int si_code)
if (!user_mode(regs))
return SIGSEGV;
- _exception(SIGSEGV, regs, si_code, address);
+ _exception_pkey(SIGSEGV, regs, si_code, address, pkey);
return 0;
}
static noinline int bad_area_nosemaphore(struct pt_regs *regs, unsigned long address)
{
- return __bad_area_nosemaphore(regs, address, SEGV_MAPERR);
+ return __bad_area_nosemaphore(regs, address, SEGV_MAPERR, 0);
}
-static int __bad_area(struct pt_regs *regs, unsigned long address, int si_code)
+static int __bad_area(struct pt_regs *regs, unsigned long address, int si_code,
+ int pkey)
{
struct mm_struct *mm = current->mm;
@@ -137,12 +139,23 @@ static int __bad_area(struct pt_regs *regs, unsigned long address, int si_code)
*/
up_read(&mm->mmap_sem);
- return __bad_area_nosemaphore(regs, address, si_code);
+ return __bad_area_nosemaphore(regs, address, si_code, pkey);
}
static noinline int bad_area(struct pt_regs *regs, unsigned long address)
{
- return __bad_area(regs, address, SEGV_MAPERR);
+ return __bad_area(regs, address, SEGV_MAPERR, 0);
+}
+
+static int bad_key_fault_exception(struct pt_regs *regs, unsigned long address,
+ int pkey)
+{
+ return __bad_area_nosemaphore(regs, address, SEGV_PKUERR, pkey);
+}
+
+static noinline int bad_access(struct pt_regs *regs, unsigned long address)
+{
+ return __bad_area(regs, address, SEGV_ACCERR, 0);
}
static int do_sigbus(struct pt_regs *regs, unsigned long address,
@@ -427,6 +440,10 @@ static int __do_page_fault(struct pt_regs *regs, unsigned long address,
perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS, 1, regs, address);
+ if (error_code & DSISR_KEYFAULT)
+ return bad_key_fault_exception(regs, address,
+ get_mm_addr_key(mm, address));
+
/*
* We want to do this outside mmap_sem, because reading code around nip
* can result in fault, which will cause a deadlock when called with
@@ -490,7 +507,7 @@ retry:
good_area:
if (unlikely(access_error(is_write, is_exec, vma)))
- return bad_area(regs, address);
+ return bad_access(regs, address);
/*
* If for any reason at all we couldn't handle the fault,
@@ -498,6 +515,31 @@ good_area:
* the fault.
*/
fault = handle_mm_fault(vma, address, flags);
+
+#ifdef CONFIG_PPC_MEM_KEYS
+ /*
+ * if the HPTE is not hashed, hardware will not detect
+ * a key fault. Lets check if we failed because of a
+ * software detected key fault.
+ */
+ if (unlikely(fault & VM_FAULT_SIGSEGV) &&
+ !arch_vma_access_permitted(vma, flags & FAULT_FLAG_WRITE,
+ is_exec, 0)) {
+ /*
+ * The PGD-PDT...PMD-PTE tree may not have been fully setup.
+ * Hence we cannot walk the tree to locate the PTE, to locate
+ * the key. Hence let's use vma_pkey() to get the key; instead
+ * of get_mm_addr_key().
+ */
+ int pkey = vma_pkey(vma);
+
+ if (likely(pkey)) {
+ up_read(&mm->mmap_sem);
+ return bad_key_fault_exception(regs, address, pkey);
+ }
+ }
+#endif /* CONFIG_PPC_MEM_KEYS */
+
major |= fault & VM_FAULT_MAJOR;
/*
diff --git a/arch/powerpc/mm/hash_utils_64.c b/arch/powerpc/mm/hash_utils_64.c
index 64e704eefad1..462c34e7b01d 100644
--- a/arch/powerpc/mm/hash_utils_64.c
+++ b/arch/powerpc/mm/hash_utils_64.c
@@ -36,6 +36,7 @@
#include <linux/memblock.h>
#include <linux/context_tracking.h>
#include <linux/libfdt.h>
+#include <linux/pkeys.h>
#include <asm/debugfs.h>
#include <asm/processor.h>
@@ -232,6 +233,7 @@ unsigned long htab_convert_pte_flags(unsigned long pteflags)
*/
rflags |= HPTE_R_M;
+ rflags |= pte_to_hpte_pkey_bits(pteflags);
return rflags;
}
@@ -1579,6 +1581,30 @@ out_exit:
local_irq_restore(flags);
}
+#ifdef CONFIG_PPC_MEM_KEYS
+/*
+ * Return the protection key associated with the given address and the
+ * mm_struct.
+ */
+u16 get_mm_addr_key(struct mm_struct *mm, unsigned long address)
+{
+ pte_t *ptep;
+ u16 pkey = 0;
+ unsigned long flags;
+
+ if (!mm || !mm->pgd)
+ return 0;
+
+ local_irq_save(flags);
+ ptep = find_linux_pte(mm->pgd, address, NULL, NULL);
+ if (ptep)
+ pkey = pte_to_pkey_bits(pte_val(READ_ONCE(*ptep)));
+ local_irq_restore(flags);
+
+ return pkey;
+}
+#endif /* CONFIG_PPC_MEM_KEYS */
+
#ifdef CONFIG_PPC_TRANSACTIONAL_MEM
static inline void tm_flush_hash_page(int local)
{
diff --git a/arch/powerpc/mm/hugetlbpage.c b/arch/powerpc/mm/hugetlbpage.c
index f5d1008f8574..876da2bc1796 100644
--- a/arch/powerpc/mm/hugetlbpage.c
+++ b/arch/powerpc/mm/hugetlbpage.c
@@ -752,7 +752,7 @@ void flush_dcache_icache_hugepage(struct page *page)
* So long as we atomically load page table pointers we are safe against teardown,
* we can follow the address down to the the page and take a ref on it.
* This function need to be called with interrupts disabled. We use this variant
- * when we have MSR[EE] = 0 but the paca->soft_enabled = 1
+ * when we have MSR[EE] = 0 but the paca->irq_soft_mask = IRQS_ENABLED
*/
pte_t *__find_linux_pte(pgd_t *pgdir, unsigned long ea,
bool *is_thp, unsigned *hpage_shift)
diff --git a/arch/powerpc/mm/mmu_context_book3s64.c b/arch/powerpc/mm/mmu_context_book3s64.c
index 59c0766ae4e0..929d9ef7083f 100644
--- a/arch/powerpc/mm/mmu_context_book3s64.c
+++ b/arch/powerpc/mm/mmu_context_book3s64.c
@@ -16,6 +16,7 @@
#include <linux/string.h>
#include <linux/types.h>
#include <linux/mm.h>
+#include <linux/pkeys.h>
#include <linux/spinlock.h>
#include <linux/idr.h>
#include <linux/export.h>
@@ -118,6 +119,7 @@ static int hash__init_new_context(struct mm_struct *mm)
subpage_prot_init_new_context(mm);
+ pkey_mm_init(mm);
return index;
}
diff --git a/arch/powerpc/mm/pkeys.c b/arch/powerpc/mm/pkeys.c
new file mode 100644
index 000000000000..ba71c5481f42
--- /dev/null
+++ b/arch/powerpc/mm/pkeys.c
@@ -0,0 +1,468 @@
+// SPDX-License-Identifier: GPL-2.0+
+/*
+ * PowerPC Memory Protection Keys management
+ *
+ * Copyright 2017, Ram Pai, IBM Corporation.
+ */
+
+#include <asm/mman.h>
+#include <asm/setup.h>
+#include <linux/pkeys.h>
+#include <linux/of_device.h>
+
+DEFINE_STATIC_KEY_TRUE(pkey_disabled);
+bool pkey_execute_disable_supported;
+int pkeys_total; /* Total pkeys as per device tree */
+bool pkeys_devtree_defined; /* pkey property exported by device tree */
+u32 initial_allocation_mask; /* Bits set for reserved keys */
+u64 pkey_amr_uamor_mask; /* Bits in AMR/UMOR not to be touched */
+u64 pkey_iamr_mask; /* Bits in AMR not to be touched */
+
+#define AMR_BITS_PER_PKEY 2
+#define AMR_RD_BIT 0x1UL
+#define AMR_WR_BIT 0x2UL
+#define IAMR_EX_BIT 0x1UL
+#define PKEY_REG_BITS (sizeof(u64)*8)
+#define pkeyshift(pkey) (PKEY_REG_BITS - ((pkey+1) * AMR_BITS_PER_PKEY))
+
+static void scan_pkey_feature(void)
+{
+ u32 vals[2];
+ struct device_node *cpu;
+
+ cpu = of_find_node_by_type(NULL, "cpu");
+ if (!cpu)
+ return;
+
+ if (of_property_read_u32_array(cpu,
+ "ibm,processor-storage-keys", vals, 2))
+ return;
+
+ /*
+ * Since any pkey can be used for data or execute, we will just treat
+ * all keys as equal and track them as one entity.
+ */
+ pkeys_total = be32_to_cpu(vals[0]);
+ pkeys_devtree_defined = true;
+}
+
+static inline bool pkey_mmu_enabled(void)
+{
+ if (firmware_has_feature(FW_FEATURE_LPAR))
+ return pkeys_total;
+ else
+ return cpu_has_feature(CPU_FTR_PKEY);
+}
+
+int pkey_initialize(void)
+{
+ int os_reserved, i;
+
+ /*
+ * We define PKEY_DISABLE_EXECUTE in addition to the arch-neutral
+ * generic defines for PKEY_DISABLE_ACCESS and PKEY_DISABLE_WRITE.
+ * Ensure that the bits a distinct.
+ */
+ BUILD_BUG_ON(PKEY_DISABLE_EXECUTE &
+ (PKEY_DISABLE_ACCESS | PKEY_DISABLE_WRITE));
+
+ /*
+ * pkey_to_vmflag_bits() assumes that the pkey bits are contiguous
+ * in the vmaflag. Make sure that is really the case.
+ */
+ BUILD_BUG_ON(__builtin_clzl(ARCH_VM_PKEY_FLAGS >> VM_PKEY_SHIFT) +
+ __builtin_popcountl(ARCH_VM_PKEY_FLAGS >> VM_PKEY_SHIFT)
+ != (sizeof(u64) * BITS_PER_BYTE));
+
+ /* scan the device tree for pkey feature */
+ scan_pkey_feature();
+
+ /*
+ * Let's assume 32 pkeys on P8 bare metal, if its not defined by device
+ * tree. We make this exception since skiboot forgot to expose this
+ * property on power8.
+ */
+ if (!pkeys_devtree_defined && !firmware_has_feature(FW_FEATURE_LPAR) &&
+ cpu_has_feature(CPU_FTRS_POWER8))
+ pkeys_total = 32;
+
+ /*
+ * Adjust the upper limit, based on the number of bits supported by
+ * arch-neutral code.
+ */
+ pkeys_total = min_t(int, pkeys_total,
+ (ARCH_VM_PKEY_FLAGS >> VM_PKEY_SHIFT));
+
+ if (!pkey_mmu_enabled() || radix_enabled() || !pkeys_total)
+ static_branch_enable(&pkey_disabled);
+ else
+ static_branch_disable(&pkey_disabled);
+
+ if (static_branch_likely(&pkey_disabled))
+ return 0;
+
+ /*
+ * The device tree cannot be relied to indicate support for
+ * execute_disable support. Instead we use a PVR check.
+ */
+ if (pvr_version_is(PVR_POWER7) || pvr_version_is(PVR_POWER7p))
+ pkey_execute_disable_supported = false;
+ else
+ pkey_execute_disable_supported = true;
+
+#ifdef CONFIG_PPC_4K_PAGES
+ /*
+ * The OS can manage only 8 pkeys due to its inability to represent them
+ * in the Linux 4K PTE.
+ */
+ os_reserved = pkeys_total - 8;
+#else
+ os_reserved = 0;
+#endif
+ /*
+ * Bits are in LE format. NOTE: 1, 0 are reserved.
+ * key 0 is the default key, which allows read/write/execute.
+ * key 1 is recommended not to be used. PowerISA(3.0) page 1015,
+ * programming note.
+ */
+ initial_allocation_mask = ~0x0;
+
+ /* register mask is in BE format */
+ pkey_amr_uamor_mask = ~0x0ul;
+ pkey_iamr_mask = ~0x0ul;
+
+ for (i = 2; i < (pkeys_total - os_reserved); i++) {
+ initial_allocation_mask &= ~(0x1 << i);
+ pkey_amr_uamor_mask &= ~(0x3ul << pkeyshift(i));
+ pkey_iamr_mask &= ~(0x1ul << pkeyshift(i));
+ }
+ return 0;
+}
+
+arch_initcall(pkey_initialize);
+
+void pkey_mm_init(struct mm_struct *mm)
+{
+ if (static_branch_likely(&pkey_disabled))
+ return;
+ mm_pkey_allocation_map(mm) = initial_allocation_mask;
+ /* -1 means unallocated or invalid */
+ mm->context.execute_only_pkey = -1;
+}
+
+static inline u64 read_amr(void)
+{
+ return mfspr(SPRN_AMR);
+}
+
+static inline void write_amr(u64 value)
+{
+ mtspr(SPRN_AMR, value);
+}
+
+static inline u64 read_iamr(void)
+{
+ if (!likely(pkey_execute_disable_supported))
+ return 0x0UL;
+
+ return mfspr(SPRN_IAMR);
+}
+
+static inline void write_iamr(u64 value)
+{
+ if (!likely(pkey_execute_disable_supported))
+ return;
+
+ mtspr(SPRN_IAMR, value);
+}
+
+static inline u64 read_uamor(void)
+{
+ return mfspr(SPRN_UAMOR);
+}
+
+static inline void write_uamor(u64 value)
+{
+ mtspr(SPRN_UAMOR, value);
+}
+
+static bool is_pkey_enabled(int pkey)
+{
+ u64 uamor = read_uamor();
+ u64 pkey_bits = 0x3ul << pkeyshift(pkey);
+ u64 uamor_pkey_bits = (uamor & pkey_bits);
+
+ /*
+ * Both the bits in UAMOR corresponding to the key should be set or
+ * reset.
+ */
+ WARN_ON(uamor_pkey_bits && (uamor_pkey_bits != pkey_bits));
+ return !!(uamor_pkey_bits);
+}
+
+static inline void init_amr(int pkey, u8 init_bits)
+{
+ u64 new_amr_bits = (((u64)init_bits & 0x3UL) << pkeyshift(pkey));
+ u64 old_amr = read_amr() & ~((u64)(0x3ul) << pkeyshift(pkey));
+
+ write_amr(old_amr | new_amr_bits);
+}
+
+static inline void init_iamr(int pkey, u8 init_bits)
+{
+ u64 new_iamr_bits = (((u64)init_bits & 0x1UL) << pkeyshift(pkey));
+ u64 old_iamr = read_iamr() & ~((u64)(0x1ul) << pkeyshift(pkey));
+
+ write_iamr(old_iamr | new_iamr_bits);
+}
+
+static void pkey_status_change(int pkey, bool enable)
+{
+ u64 old_uamor;
+
+ /* Reset the AMR and IAMR bits for this key */
+ init_amr(pkey, 0x0);
+ init_iamr(pkey, 0x0);
+
+ /* Enable/disable key */
+ old_uamor = read_uamor();
+ if (enable)
+ old_uamor |= (0x3ul << pkeyshift(pkey));
+ else
+ old_uamor &= ~(0x3ul << pkeyshift(pkey));
+ write_uamor(old_uamor);
+}
+
+void __arch_activate_pkey(int pkey)
+{
+ pkey_status_change(pkey, true);
+}
+
+void __arch_deactivate_pkey(int pkey)
+{
+ pkey_status_change(pkey, false);
+}
+
+/*
+ * Set the access rights in AMR IAMR and UAMOR registers for @pkey to that
+ * specified in @init_val.
+ */
+int __arch_set_user_pkey_access(struct task_struct *tsk, int pkey,
+ unsigned long init_val)
+{
+ u64 new_amr_bits = 0x0ul;
+ u64 new_iamr_bits = 0x0ul;
+
+ if (!is_pkey_enabled(pkey))
+ return -EINVAL;
+
+ if (init_val & PKEY_DISABLE_EXECUTE) {
+ if (!pkey_execute_disable_supported)
+ return -EINVAL;
+ new_iamr_bits |= IAMR_EX_BIT;
+ }
+ init_iamr(pkey, new_iamr_bits);
+
+ /* Set the bits we need in AMR: */
+ if (init_val & PKEY_DISABLE_ACCESS)
+ new_amr_bits |= AMR_RD_BIT | AMR_WR_BIT;
+ else if (init_val & PKEY_DISABLE_WRITE)
+ new_amr_bits |= AMR_WR_BIT;
+
+ init_amr(pkey, new_amr_bits);
+ return 0;
+}
+
+void thread_pkey_regs_save(struct thread_struct *thread)
+{
+ if (static_branch_likely(&pkey_disabled))
+ return;
+
+ /*
+ * TODO: Skip saving registers if @thread hasn't used any keys yet.
+ */
+ thread->amr = read_amr();
+ thread->iamr = read_iamr();
+ thread->uamor = read_uamor();
+}
+
+void thread_pkey_regs_restore(struct thread_struct *new_thread,
+ struct thread_struct *old_thread)
+{
+ if (static_branch_likely(&pkey_disabled))
+ return;
+
+ /*
+ * TODO: Just set UAMOR to zero if @new_thread hasn't used any keys yet.
+ */
+ if (old_thread->amr != new_thread->amr)
+ write_amr(new_thread->amr);
+ if (old_thread->iamr != new_thread->iamr)
+ write_iamr(new_thread->iamr);
+ if (old_thread->uamor != new_thread->uamor)
+ write_uamor(new_thread->uamor);
+}
+
+void thread_pkey_regs_init(struct thread_struct *thread)
+{
+ if (static_branch_likely(&pkey_disabled))
+ return;
+
+ write_amr(read_amr() & pkey_amr_uamor_mask);
+ write_iamr(read_iamr() & pkey_iamr_mask);
+ write_uamor(read_uamor() & pkey_amr_uamor_mask);
+}
+
+static inline bool pkey_allows_readwrite(int pkey)
+{
+ int pkey_shift = pkeyshift(pkey);
+
+ if (!is_pkey_enabled(pkey))
+ return true;
+
+ return !(read_amr() & ((AMR_RD_BIT|AMR_WR_BIT) << pkey_shift));
+}
+
+int __execute_only_pkey(struct mm_struct *mm)
+{
+ bool need_to_set_mm_pkey = false;
+ int execute_only_pkey = mm->context.execute_only_pkey;
+ int ret;
+
+ /* Do we need to assign a pkey for mm's execute-only maps? */
+ if (execute_only_pkey == -1) {
+ /* Go allocate one to use, which might fail */
+ execute_only_pkey = mm_pkey_alloc(mm);
+ if (execute_only_pkey < 0)
+ return -1;
+ need_to_set_mm_pkey = true;
+ }
+
+ /*
+ * We do not want to go through the relatively costly dance to set AMR
+ * if we do not need to. Check it first and assume that if the
+ * execute-only pkey is readwrite-disabled than we do not have to set it
+ * ourselves.
+ */
+ if (!need_to_set_mm_pkey && !pkey_allows_readwrite(execute_only_pkey))
+ return execute_only_pkey;
+
+ /*
+ * Set up AMR so that it denies access for everything other than
+ * execution.
+ */
+ ret = __arch_set_user_pkey_access(current, execute_only_pkey,
+ PKEY_DISABLE_ACCESS |
+ PKEY_DISABLE_WRITE);
+ /*
+ * If the AMR-set operation failed somehow, just return 0 and
+ * effectively disable execute-only support.
+ */
+ if (ret) {
+ mm_pkey_free(mm, execute_only_pkey);
+ return -1;
+ }
+
+ /* We got one, store it and use it from here on out */
+ if (need_to_set_mm_pkey)
+ mm->context.execute_only_pkey = execute_only_pkey;
+ return execute_only_pkey;
+}
+
+static inline bool vma_is_pkey_exec_only(struct vm_area_struct *vma)
+{
+ /* Do this check first since the vm_flags should be hot */
+ if ((vma->vm_flags & (VM_READ | VM_WRITE | VM_EXEC)) != VM_EXEC)
+ return false;
+
+ return (vma_pkey(vma) == vma->vm_mm->context.execute_only_pkey);
+}
+
+/*
+ * This should only be called for *plain* mprotect calls.
+ */
+int __arch_override_mprotect_pkey(struct vm_area_struct *vma, int prot,
+ int pkey)
+{
+ /*
+ * If the currently associated pkey is execute-only, but the requested
+ * protection requires read or write, move it back to the default pkey.
+ */
+ if (vma_is_pkey_exec_only(vma) && (prot & (PROT_READ | PROT_WRITE)))
+ return 0;
+
+ /*
+ * The requested protection is execute-only. Hence let's use an
+ * execute-only pkey.
+ */
+ if (prot == PROT_EXEC) {
+ pkey = execute_only_pkey(vma->vm_mm);
+ if (pkey > 0)
+ return pkey;
+ }
+
+ /* Nothing to override. */
+ return vma_pkey(vma);
+}
+
+static bool pkey_access_permitted(int pkey, bool write, bool execute)
+{
+ int pkey_shift;
+ u64 amr;
+
+ if (!pkey)
+ return true;
+
+ if (!is_pkey_enabled(pkey))
+ return true;
+
+ pkey_shift = pkeyshift(pkey);
+ if (execute && !(read_iamr() & (IAMR_EX_BIT << pkey_shift)))
+ return true;
+
+ amr = read_amr(); /* Delay reading amr until absolutely needed */
+ return ((!write && !(amr & (AMR_RD_BIT << pkey_shift))) ||
+ (write && !(amr & (AMR_WR_BIT << pkey_shift))));
+}
+
+bool arch_pte_access_permitted(u64 pte, bool write, bool execute)
+{
+ if (static_branch_likely(&pkey_disabled))
+ return true;
+
+ return pkey_access_permitted(pte_to_pkey_bits(pte), write, execute);
+}
+
+/*
+ * We only want to enforce protection keys on the current thread because we
+ * effectively have no access to AMR/IAMR for other threads or any way to tell
+ * which AMR/IAMR in a threaded process we could use.
+ *
+ * So do not enforce things if the VMA is not from the current mm, or if we are
+ * in a kernel thread.
+ */
+static inline bool vma_is_foreign(struct vm_area_struct *vma)
+{
+ if (!current->mm)
+ return true;
+
+ /* if it is not our ->mm, it has to be foreign */
+ if (current->mm != vma->vm_mm)
+ return true;
+
+ return false;
+}
+
+bool arch_vma_access_permitted(struct vm_area_struct *vma, bool write,
+ bool execute, bool foreign)
+{
+ if (static_branch_likely(&pkey_disabled))
+ return true;
+ /*
+ * Do not enforce our key-permissions on a foreign vma.
+ */
+ if (foreign || vma_is_foreign(vma))
+ return true;
+
+ return pkey_access_permitted(vma_pkey(vma), write, execute);
+}
diff --git a/arch/powerpc/mm/subpage-prot.c b/arch/powerpc/mm/subpage-prot.c
index 781532d7bc4d..f14a07c2fb90 100644
--- a/arch/powerpc/mm/subpage-prot.c
+++ b/arch/powerpc/mm/subpage-prot.c
@@ -195,6 +195,9 @@ long sys_subpage_prot(unsigned long addr, unsigned long len, u32 __user *map)
unsigned long next, limit;
int err;
+ if (radix_enabled())
+ return -ENOENT;
+
/* Check parameters */
if ((addr & ~PAGE_MASK) || (len & ~PAGE_MASK) ||
addr >= mm->task_size || len >= mm->task_size ||
diff --git a/arch/powerpc/perf/core-book3s.c b/arch/powerpc/perf/core-book3s.c
index 9e3da168d54c..f89bbd54ecec 100644
--- a/arch/powerpc/perf/core-book3s.c
+++ b/arch/powerpc/perf/core-book3s.c
@@ -322,7 +322,7 @@ static inline void perf_read_regs(struct pt_regs *regs)
*/
static inline int perf_intr_is_nmi(struct pt_regs *regs)
{
- return !regs->softe;
+ return (regs->softe & IRQS_DISABLED);
}
/*
@@ -410,8 +410,12 @@ static __u64 power_pmu_bhrb_to(u64 addr)
int ret;
__u64 target;
- if (is_kernel_addr(addr))
- return branch_target((unsigned int *)addr);
+ if (is_kernel_addr(addr)) {
+ if (probe_kernel_read(&instr, (void *)addr, sizeof(instr)))
+ return 0;
+
+ return branch_target(&instr);
+ }
/* Userspace: need copy instruction here then translate it */
pagefault_disable();
@@ -1415,7 +1419,7 @@ static int collect_events(struct perf_event *group, int max_count,
int n = 0;
struct perf_event *event;
- if (!is_software_event(group)) {
+ if (group->pmu->task_ctx_nr == perf_hw_context) {
if (n >= max_count)
return -1;
ctrs[n] = group;
@@ -1423,7 +1427,7 @@ static int collect_events(struct perf_event *group, int max_count,
events[n++] = group->hw.config;
}
list_for_each_entry(event, &group->sibling_list, group_entry) {
- if (!is_software_event(event) &&
+ if (event->pmu->task_ctx_nr == perf_hw_context &&
event->state != PERF_EVENT_STATE_OFF) {
if (n >= max_count)
return -1;
diff --git a/arch/powerpc/perf/imc-pmu.c b/arch/powerpc/perf/imc-pmu.c
index 0ead3cd73caa..d7532e7b9ab5 100644
--- a/arch/powerpc/perf/imc-pmu.c
+++ b/arch/powerpc/perf/imc-pmu.c
@@ -40,7 +40,6 @@ static struct imc_pmu *core_imc_pmu;
/* Thread IMC data structures and variables */
static DEFINE_PER_CPU(u64 *, thread_imc_mem);
-static struct imc_pmu *thread_imc_pmu;
static int thread_imc_mem_size;
struct imc_pmu *imc_event_to_pmu(struct perf_event *event)
@@ -117,17 +116,13 @@ static struct attribute *device_str_attr_create(const char *name, const char *st
return &attr->attr.attr;
}
-struct imc_events *imc_parse_event(struct device_node *np, const char *scale,
- const char *unit, const char *prefix, u32 base)
+static int imc_parse_event(struct device_node *np, const char *scale,
+ const char *unit, const char *prefix,
+ u32 base, struct imc_events *event)
{
- struct imc_events *event;
const char *s;
u32 reg;
- event = kzalloc(sizeof(struct imc_events), GFP_KERNEL);
- if (!event)
- return NULL;
-
if (of_property_read_u32(np, "reg", &reg))
goto error;
/* Add the base_reg value to the "reg" */
@@ -158,14 +153,32 @@ struct imc_events *imc_parse_event(struct device_node *np, const char *scale,
goto error;
}
- return event;
+ return 0;
error:
kfree(event->unit);
kfree(event->scale);
kfree(event->name);
- kfree(event);
+ return -EINVAL;
+}
+
+/*
+ * imc_free_events: Function to cleanup the events list, having
+ * "nr_entries".
+ */
+static void imc_free_events(struct imc_events *events, int nr_entries)
+{
+ int i;
+
+ /* Nothing to clean, return */
+ if (!events)
+ return;
+ for (i = 0; i < nr_entries; i++) {
+ kfree(events[i].unit);
+ kfree(events[i].scale);
+ kfree(events[i].name);
+ }
- return NULL;
+ kfree(events);
}
/*
@@ -177,9 +190,8 @@ static int update_events_in_group(struct device_node *node, struct imc_pmu *pmu)
struct attribute_group *attr_group;
struct attribute **attrs, *dev_str;
struct device_node *np, *pmu_events;
- struct imc_events *ev;
u32 handle, base_reg;
- int i=0, j=0, ct;
+ int i = 0, j = 0, ct, ret;
const char *prefix, *g_scale, *g_unit;
const char *ev_val_str, *ev_scale_str, *ev_unit_str;
@@ -217,15 +229,17 @@ static int update_events_in_group(struct device_node *node, struct imc_pmu *pmu)
ct = 0;
/* Parse the events and update the struct */
for_each_child_of_node(pmu_events, np) {
- ev = imc_parse_event(np, g_scale, g_unit, prefix, base_reg);
- if (ev)
- pmu->events[ct++] = ev;
+ ret = imc_parse_event(np, g_scale, g_unit, prefix, base_reg, &pmu->events[ct]);
+ if (!ret)
+ ct++;
}
/* Allocate memory for attribute group */
attr_group = kzalloc(sizeof(*attr_group), GFP_KERNEL);
- if (!attr_group)
+ if (!attr_group) {
+ imc_free_events(pmu->events, ct);
return -ENOMEM;
+ }
/*
* Allocate memory for attributes.
@@ -238,31 +252,31 @@ static int update_events_in_group(struct device_node *node, struct imc_pmu *pmu)
attrs = kcalloc(((ct * 3) + 1), sizeof(struct attribute *), GFP_KERNEL);
if (!attrs) {
kfree(attr_group);
- kfree(pmu->events);
+ imc_free_events(pmu->events, ct);
return -ENOMEM;
}
attr_group->name = "events";
attr_group->attrs = attrs;
do {
- ev_val_str = kasprintf(GFP_KERNEL, "event=0x%x", pmu->events[i]->value);
- dev_str = device_str_attr_create(pmu->events[i]->name, ev_val_str);
+ ev_val_str = kasprintf(GFP_KERNEL, "event=0x%x", pmu->events[i].value);
+ dev_str = device_str_attr_create(pmu->events[i].name, ev_val_str);
if (!dev_str)
continue;
attrs[j++] = dev_str;
- if (pmu->events[i]->scale) {
- ev_scale_str = kasprintf(GFP_KERNEL, "%s.scale",pmu->events[i]->name);
- dev_str = device_str_attr_create(ev_scale_str, pmu->events[i]->scale);
+ if (pmu->events[i].scale) {
+ ev_scale_str = kasprintf(GFP_KERNEL, "%s.scale", pmu->events[i].name);
+ dev_str = device_str_attr_create(ev_scale_str, pmu->events[i].scale);
if (!dev_str)
continue;
attrs[j++] = dev_str;
}
- if (pmu->events[i]->unit) {
- ev_unit_str = kasprintf(GFP_KERNEL, "%s.unit",pmu->events[i]->name);
- dev_str = device_str_attr_create(ev_unit_str, pmu->events[i]->unit);
+ if (pmu->events[i].unit) {
+ ev_unit_str = kasprintf(GFP_KERNEL, "%s.unit", pmu->events[i].name);
+ dev_str = device_str_attr_create(ev_unit_str, pmu->events[i].unit);
if (!dev_str)
continue;
@@ -273,7 +287,6 @@ static int update_events_in_group(struct device_node *node, struct imc_pmu *pmu)
/* Save the event attribute */
pmu->attr_groups[IMC_EVENT_ATTR] = attr_group;
- kfree(pmu->events);
return 0;
}
@@ -310,6 +323,19 @@ static int ppc_nest_imc_cpu_offline(unsigned int cpu)
return 0;
/*
+ * Check whether nest_imc is registered. We could end up here if the
+ * cpuhotplug callback registration fails. i.e, callback invokes the
+ * offline path for all successfully registered nodes. At this stage,
+ * nest_imc pmu will not be registered and we should return here.
+ *
+ * We return with a zero since this is not an offline failure. And
+ * cpuhp_setup_state() returns the actual failure reason to the caller,
+ * which in turn will call the cleanup routine.
+ */
+ if (!nest_pmus)
+ return 0;
+
+ /*
* Now that this cpu is one of the designated,
* find a next cpu a) which is online and b) in same chip.
*/
@@ -598,7 +624,8 @@ static int ppc_core_imc_cpu_online(unsigned int cpu)
static int ppc_core_imc_cpu_offline(unsigned int cpu)
{
- unsigned int ncpu, core_id;
+ unsigned int core_id;
+ int ncpu;
struct imc_pmu_ref *ref;
/*
@@ -1158,6 +1185,15 @@ static void cleanup_all_thread_imc_memory(void)
}
}
+/* Function to free the attr_groups which are dynamically allocated */
+static void imc_common_mem_free(struct imc_pmu *pmu_ptr)
+{
+ if (pmu_ptr->attr_groups[IMC_EVENT_ATTR])
+ kfree(pmu_ptr->attr_groups[IMC_EVENT_ATTR]->attrs);
+ kfree(pmu_ptr->attr_groups[IMC_EVENT_ATTR]);
+ kfree(pmu_ptr);
+}
+
/*
* Common function to unregister cpu hotplug callback and
* free the memory.
@@ -1171,6 +1207,7 @@ static void imc_common_cpuhp_mem_free(struct imc_pmu *pmu_ptr)
if (nest_pmus == 1) {
cpuhp_remove_state(CPUHP_AP_PERF_POWERPC_NEST_IMC_ONLINE);
kfree(nest_imc_refc);
+ kfree(per_nest_pmu_arr);
}
if (nest_pmus > 0)
@@ -1189,14 +1226,6 @@ static void imc_common_cpuhp_mem_free(struct imc_pmu *pmu_ptr)
cpuhp_remove_state(CPUHP_AP_PERF_POWERPC_THREAD_IMC_ONLINE);
cleanup_all_thread_imc_memory();
}
-
- /* Only free the attr_groups which are dynamically allocated */
- if (pmu_ptr->attr_groups[IMC_EVENT_ATTR])
- kfree(pmu_ptr->attr_groups[IMC_EVENT_ATTR]->attrs);
- kfree(pmu_ptr->attr_groups[IMC_EVENT_ATTR]);
- kfree(pmu_ptr);
- kfree(per_nest_pmu_arr);
- return;
}
@@ -1245,8 +1274,10 @@ static int imc_mem_init(struct imc_pmu *pmu_ptr, struct device_node *parent,
core_imc_refc = kcalloc(nr_cores, sizeof(struct imc_pmu_ref),
GFP_KERNEL);
- if (!core_imc_refc)
+ if (!core_imc_refc) {
+ kfree(pmu_ptr->mem_info);
return -ENOMEM;
+ }
core_imc_pmu = pmu_ptr;
break;
@@ -1259,11 +1290,12 @@ static int imc_mem_init(struct imc_pmu *pmu_ptr, struct device_node *parent,
thread_imc_mem_size = pmu_ptr->counter_mem_size;
for_each_online_cpu(cpu) {
res = thread_imc_mem_alloc(cpu, pmu_ptr->counter_mem_size);
- if (res)
+ if (res) {
+ cleanup_all_thread_imc_memory();
return res;
+ }
}
- thread_imc_pmu = pmu_ptr;
break;
default:
return -EINVAL;
@@ -1287,8 +1319,10 @@ int init_imc_pmu(struct device_node *parent, struct imc_pmu *pmu_ptr, int pmu_id
int ret;
ret = imc_mem_init(pmu_ptr, parent, pmu_idx);
- if (ret)
- goto err_free;
+ if (ret) {
+ imc_common_mem_free(pmu_ptr);
+ return ret;
+ }
switch (pmu_ptr->domain) {
case IMC_DOMAIN_NEST:
@@ -1309,6 +1343,8 @@ int init_imc_pmu(struct device_node *parent, struct imc_pmu *pmu_ptr, int pmu_id
ret = nest_pmu_cpumask_init();
if (ret) {
mutex_unlock(&nest_init_lock);
+ kfree(nest_imc_refc);
+ kfree(per_nest_pmu_arr);
goto err_free;
}
}
@@ -1353,6 +1389,7 @@ int init_imc_pmu(struct device_node *parent, struct imc_pmu *pmu_ptr, int pmu_id
return 0;
err_free:
+ imc_common_mem_free(pmu_ptr);
imc_common_cpuhp_mem_free(pmu_ptr);
return ret;
}
diff --git a/arch/powerpc/platforms/cell/spufs/file.c b/arch/powerpc/platforms/cell/spufs/file.c
index 5ffcdeb1eb17..94139135be9c 100644
--- a/arch/powerpc/platforms/cell/spufs/file.c
+++ b/arch/powerpc/platforms/cell/spufs/file.c
@@ -2375,8 +2375,8 @@ static int switch_log_sprint(struct spu_context *ctx, char *tbuf, int n)
p = ctx->switch_log->log + ctx->switch_log->tail % SWITCH_LOG_BUFSIZE;
- return snprintf(tbuf, n, "%u.%09u %d %u %u %llu\n",
- (unsigned int) p->tstamp.tv_sec,
+ return snprintf(tbuf, n, "%llu.%09u %d %u %u %llu\n",
+ (unsigned long long) p->tstamp.tv_sec,
(unsigned int) p->tstamp.tv_nsec,
p->spu_id,
(unsigned int) p->type,
@@ -2499,7 +2499,7 @@ void spu_switch_log_notify(struct spu *spu, struct spu_context *ctx,
struct switch_log_entry *p;
p = ctx->switch_log->log + ctx->switch_log->head;
- ktime_get_ts(&p->tstamp);
+ ktime_get_ts64(&p->tstamp);
p->timebase = get_tb();
p->spu_id = spu ? spu->number : -1;
p->type = type;
diff --git a/arch/powerpc/platforms/cell/spufs/spufs.h b/arch/powerpc/platforms/cell/spufs/spufs.h
index 5e59f80e95db..5d85c689c2e9 100644
--- a/arch/powerpc/platforms/cell/spufs/spufs.h
+++ b/arch/powerpc/platforms/cell/spufs/spufs.h
@@ -69,7 +69,7 @@ struct switch_log {
unsigned long head;
unsigned long tail;
struct switch_log_entry {
- struct timespec tstamp;
+ struct timespec64 tstamp;
s32 spu_id;
u32 type;
u32 val;
diff --git a/arch/powerpc/platforms/powernv/opal-imc.c b/arch/powerpc/platforms/powernv/opal-imc.c
index 465ea105b771..dd4c9b8b8a81 100644
--- a/arch/powerpc/platforms/powernv/opal-imc.c
+++ b/arch/powerpc/platforms/powernv/opal-imc.c
@@ -21,6 +21,78 @@
#include <asm/io.h>
#include <asm/imc-pmu.h>
#include <asm/cputhreads.h>
+#include <asm/debugfs.h>
+
+static struct dentry *imc_debugfs_parent;
+
+/* Helpers to export imc command and mode via debugfs */
+static int imc_mem_get(void *data, u64 *val)
+{
+ *val = cpu_to_be64(*(u64 *)data);
+ return 0;
+}
+
+static int imc_mem_set(void *data, u64 val)
+{
+ *(u64 *)data = cpu_to_be64(val);
+ return 0;
+}
+DEFINE_DEBUGFS_ATTRIBUTE(fops_imc_x64, imc_mem_get, imc_mem_set, "0x%016llx\n");
+
+static struct dentry *imc_debugfs_create_x64(const char *name, umode_t mode,
+ struct dentry *parent, u64 *value)
+{
+ return debugfs_create_file_unsafe(name, mode, parent,
+ value, &fops_imc_x64);
+}
+
+/*
+ * export_imc_mode_and_cmd: Create a debugfs interface
+ * for imc_cmd and imc_mode
+ * for each node in the system.
+ * imc_mode and imc_cmd can be changed by echo into
+ * this interface.
+ */
+static void export_imc_mode_and_cmd(struct device_node *node,
+ struct imc_pmu *pmu_ptr)
+{
+ static u64 loc, *imc_mode_addr, *imc_cmd_addr;
+ int chip = 0, nid;
+ char mode[16], cmd[16];
+ u32 cb_offset;
+
+ imc_debugfs_parent = debugfs_create_dir("imc", powerpc_debugfs_root);
+
+ /*
+ * Return here, either because 'imc' directory already exists,
+ * Or failed to create a new one.
+ */
+ if (!imc_debugfs_parent)
+ return;
+
+ if (of_property_read_u32(node, "cb_offset", &cb_offset))
+ cb_offset = IMC_CNTL_BLK_OFFSET;
+
+ for_each_node(nid) {
+ loc = (u64)(pmu_ptr->mem_info[chip].vbase) + cb_offset;
+ imc_mode_addr = (u64 *)(loc + IMC_CNTL_BLK_MODE_OFFSET);
+ sprintf(mode, "imc_mode_%d", nid);
+ if (!imc_debugfs_create_x64(mode, 0600, imc_debugfs_parent,
+ imc_mode_addr))
+ goto err;
+
+ imc_cmd_addr = (u64 *)(loc + IMC_CNTL_BLK_CMD_OFFSET);
+ sprintf(cmd, "imc_cmd_%d", nid);
+ if (!imc_debugfs_create_x64(cmd, 0600, imc_debugfs_parent,
+ imc_cmd_addr))
+ goto err;
+ chip++;
+ }
+ return;
+
+err:
+ debugfs_remove_recursive(imc_debugfs_parent);
+}
/*
* imc_get_mem_addr_nest: Function to get nest counter memory region
@@ -65,6 +137,7 @@ static int imc_get_mem_addr_nest(struct device_node *node,
}
pmu_ptr->imc_counter_mmaped = true;
+ export_imc_mode_and_cmd(node, pmu_ptr);
kfree(base_addr_arr);
kfree(chipid_arr);
return 0;
@@ -213,6 +286,10 @@ static int opal_imc_counters_probe(struct platform_device *pdev)
}
}
+ /* If none of the nest units are registered, remove debugfs interface */
+ if (pmu_count == 0)
+ debugfs_remove_recursive(imc_debugfs_parent);
+
return 0;
}
diff --git a/arch/powerpc/platforms/powernv/pci-ioda.c b/arch/powerpc/platforms/powernv/pci-ioda.c
index b9146d5aef81..1069f9cb273a 100644
--- a/arch/powerpc/platforms/powernv/pci-ioda.c
+++ b/arch/powerpc/platforms/powernv/pci-ioda.c
@@ -2572,7 +2572,6 @@ static unsigned long pnv_pci_ioda2_get_table_size(__u32 page_shift,
unsigned long direct_table_size;
if (!levels || (levels > POWERNV_IOMMU_MAX_LEVELS) ||
- (window_size > memory_hotplug_max()) ||
!is_power_of_2(window_size))
return 0;
diff --git a/arch/powerpc/platforms/powernv/setup.c b/arch/powerpc/platforms/powernv/setup.c
index 1edfbc1e40f4..4fb21e17504a 100644
--- a/arch/powerpc/platforms/powernv/setup.c
+++ b/arch/powerpc/platforms/powernv/setup.c
@@ -37,13 +37,62 @@
#include <asm/kexec.h>
#include <asm/smp.h>
#include <asm/tm.h>
+#include <asm/setup.h>
#include "powernv.h"
+static void pnv_setup_rfi_flush(void)
+{
+ struct device_node *np, *fw_features;
+ enum l1d_flush_type type;
+ int enable;
+
+ /* Default to fallback in case fw-features are not available */
+ type = L1D_FLUSH_FALLBACK;
+ enable = 1;
+
+ np = of_find_node_by_name(NULL, "ibm,opal");
+ fw_features = of_get_child_by_name(np, "fw-features");
+ of_node_put(np);
+
+ if (fw_features) {
+ np = of_get_child_by_name(fw_features, "inst-l1d-flush-trig2");
+ if (np && of_property_read_bool(np, "enabled"))
+ type = L1D_FLUSH_MTTRIG;
+
+ of_node_put(np);
+
+ np = of_get_child_by_name(fw_features, "inst-l1d-flush-ori30,30,0");
+ if (np && of_property_read_bool(np, "enabled"))
+ type = L1D_FLUSH_ORI;
+
+ of_node_put(np);
+
+ /* Enable unless firmware says NOT to */
+ enable = 2;
+ np = of_get_child_by_name(fw_features, "needs-l1d-flush-msr-hv-1-to-0");
+ if (np && of_property_read_bool(np, "disabled"))
+ enable--;
+
+ of_node_put(np);
+
+ np = of_get_child_by_name(fw_features, "needs-l1d-flush-msr-pr-0-to-1");
+ if (np && of_property_read_bool(np, "disabled"))
+ enable--;
+
+ of_node_put(np);
+ of_node_put(fw_features);
+ }
+
+ setup_rfi_flush(type, enable > 0);
+}
+
static void __init pnv_setup_arch(void)
{
set_arch_panic_timeout(10, ARCH_PANIC_TIMEOUT);
+ pnv_setup_rfi_flush();
+
/* Initialize SMP */
pnv_smp_init();
diff --git a/arch/powerpc/platforms/ps3/setup.c b/arch/powerpc/platforms/ps3/setup.c
index 9dabea6e1443..6244bc849469 100644
--- a/arch/powerpc/platforms/ps3/setup.c
+++ b/arch/powerpc/platforms/ps3/setup.c
@@ -104,6 +104,20 @@ static void __noreturn ps3_halt(void)
ps3_sys_manager_halt(); /* never returns */
}
+static void ps3_panic(char *str)
+{
+ DBG("%s:%d %s\n", __func__, __LINE__, str);
+
+ smp_send_stop();
+ printk("\n");
+ printk(" System does not reboot automatically.\n");
+ printk(" Please press POWER button.\n");
+ printk("\n");
+
+ while(1)
+ lv1_pause(1);
+}
+
#if defined(CONFIG_FB_PS3) || defined(CONFIG_FB_PS3_MODULE) || \
defined(CONFIG_PS3_FLASH) || defined(CONFIG_PS3_FLASH_MODULE)
static void __init prealloc(struct ps3_prealloc *p)
@@ -255,6 +269,7 @@ define_machine(ps3) {
.probe = ps3_probe,
.setup_arch = ps3_setup_arch,
.init_IRQ = ps3_init_IRQ,
+ .panic = ps3_panic,
.get_boot_time = ps3_get_boot_time,
.set_dabr = ps3_set_dabr,
.calibrate_decr = ps3_calibrate_decr,
diff --git a/arch/powerpc/platforms/pseries/dlpar.c b/arch/powerpc/platforms/pseries/dlpar.c
index 6e35780c5962..a0b20c03f078 100644
--- a/arch/powerpc/platforms/pseries/dlpar.c
+++ b/arch/powerpc/platforms/pseries/dlpar.c
@@ -574,11 +574,26 @@ static ssize_t dlpar_show(struct class *class, struct class_attribute *attr,
static CLASS_ATTR_RW(dlpar);
-static int __init pseries_dlpar_init(void)
+int __init dlpar_workqueue_init(void)
{
+ if (pseries_hp_wq)
+ return 0;
+
pseries_hp_wq = alloc_workqueue("pseries hotplug workqueue",
- WQ_UNBOUND, 1);
+ WQ_UNBOUND, 1);
+
+ return pseries_hp_wq ? 0 : -ENOMEM;
+}
+
+static int __init dlpar_sysfs_init(void)
+{
+ int rc;
+
+ rc = dlpar_workqueue_init();
+ if (rc)
+ return rc;
+
return sysfs_create_file(kernel_kobj, &class_attr_dlpar.attr);
}
-machine_device_initcall(pseries, pseries_dlpar_init);
+machine_device_initcall(pseries, dlpar_sysfs_init);
diff --git a/arch/powerpc/platforms/pseries/firmware.c b/arch/powerpc/platforms/pseries/firmware.c
index aac3ea2911b2..a3bbeb43689e 100644
--- a/arch/powerpc/platforms/pseries/firmware.c
+++ b/arch/powerpc/platforms/pseries/firmware.c
@@ -115,6 +115,7 @@ vec5_fw_features_table[] = {
{FW_FEATURE_TYPE1_AFFINITY, OV5_TYPE1_AFFINITY},
{FW_FEATURE_PRRN, OV5_PRRN},
{FW_FEATURE_DRMEM_V2, OV5_DRMEM_V2},
+ {FW_FEATURE_DRC_INFO, OV5_DRC_INFO},
};
static void __init fw_vec5_feature_init(const char *vec5, unsigned long len)
diff --git a/arch/powerpc/platforms/pseries/of_helpers.c b/arch/powerpc/platforms/pseries/of_helpers.c
index 7e75101fa522..6df192f38f80 100644
--- a/arch/powerpc/platforms/pseries/of_helpers.c
+++ b/arch/powerpc/platforms/pseries/of_helpers.c
@@ -3,6 +3,7 @@
#include <linux/err.h>
#include <linux/slab.h>
#include <linux/of.h>
+#include <asm/prom.h>
#include "of_helpers.h"
@@ -37,3 +38,62 @@ struct device_node *pseries_of_derive_parent(const char *path)
kfree(parent_path);
return parent ? parent : ERR_PTR(-EINVAL);
}
+
+
+/* Helper Routines to convert between drc_index to cpu numbers */
+
+int of_read_drc_info_cell(struct property **prop, const __be32 **curval,
+ struct of_drc_info *data)
+{
+ const char *p;
+ const __be32 *p2;
+
+ if (!data)
+ return -EINVAL;
+
+ /* Get drc-type:encode-string */
+ p = data->drc_type = (char*) (*curval);
+ p = of_prop_next_string(*prop, p);
+ if (!p)
+ return -EINVAL;
+
+ /* Get drc-name-prefix:encode-string */
+ data->drc_name_prefix = (char *)p;
+ p = of_prop_next_string(*prop, p);
+ if (!p)
+ return -EINVAL;
+
+ /* Get drc-index-start:encode-int */
+ p2 = (const __be32 *)p;
+ p2 = of_prop_next_u32(*prop, p2, &data->drc_index_start);
+ if (!p2)
+ return -EINVAL;
+
+ /* Get drc-name-suffix-start:encode-int */
+ p2 = of_prop_next_u32(*prop, p2, &data->drc_name_suffix_start);
+ if (!p2)
+ return -EINVAL;
+
+ /* Get number-sequential-elements:encode-int */
+ p2 = of_prop_next_u32(*prop, p2, &data->num_sequential_elems);
+ if (!p2)
+ return -EINVAL;
+
+ /* Get sequential-increment:encode-int */
+ p2 = of_prop_next_u32(*prop, p2, &data->sequential_inc);
+ if (!p2)
+ return -EINVAL;
+
+ /* Get drc-power-domain:encode-int */
+ p2 = of_prop_next_u32(*prop, p2, &data->drc_power_domain);
+ if (!p2)
+ return -EINVAL;
+
+ /* Should now know end of current entry */
+ (*curval) = (void *)p2;
+ data->last_drc_index = data->drc_index_start +
+ ((data->num_sequential_elems - 1) * data->sequential_inc);
+
+ return 0;
+}
+EXPORT_SYMBOL(of_read_drc_info_cell);
diff --git a/arch/powerpc/platforms/pseries/pseries.h b/arch/powerpc/platforms/pseries/pseries.h
index 4470a3194311..1ae1d9f4dbe9 100644
--- a/arch/powerpc/platforms/pseries/pseries.h
+++ b/arch/powerpc/platforms/pseries/pseries.h
@@ -98,4 +98,6 @@ static inline unsigned long cmo_get_page_size(void)
return CMO_PageSize;
}
+int dlpar_workqueue_init(void);
+
#endif /* _PSERIES_PSERIES_H */
diff --git a/arch/powerpc/platforms/pseries/pseries_energy.c b/arch/powerpc/platforms/pseries/pseries_energy.c
index 35c891aabef0..6ed22127391b 100644
--- a/arch/powerpc/platforms/pseries/pseries_energy.c
+++ b/arch/powerpc/platforms/pseries/pseries_energy.c
@@ -22,6 +22,7 @@
#include <asm/page.h>
#include <asm/hvcall.h>
#include <asm/firmware.h>
+#include <asm/prom.h>
#define MODULE_VERS "1.0"
@@ -38,26 +39,58 @@ static int sysfs_entries;
static u32 cpu_to_drc_index(int cpu)
{
struct device_node *dn = NULL;
- const int *indexes;
- int i;
+ int thread_index;
int rc = 1;
u32 ret = 0;
dn = of_find_node_by_path("/cpus");
if (dn == NULL)
goto err;
- indexes = of_get_property(dn, "ibm,drc-indexes", NULL);
- if (indexes == NULL)
- goto err_of_node_put;
+
/* Convert logical cpu number to core number */
- i = cpu_core_index_of_thread(cpu);
- /*
- * The first element indexes[0] is the number of drc_indexes
- * returned in the list. Hence i+1 will get the drc_index
- * corresponding to core number i.
- */
- WARN_ON(i > indexes[0]);
- ret = indexes[i + 1];
+ thread_index = cpu_core_index_of_thread(cpu);
+
+ if (firmware_has_feature(FW_FEATURE_DRC_INFO)) {
+ struct property *info = NULL;
+ struct of_drc_info drc;
+ int j;
+ u32 num_set_entries;
+ const __be32 *value;
+
+ info = of_find_property(dn, "ibm,drc-info", NULL);
+ if (info == NULL)
+ goto err_of_node_put;
+
+ value = of_prop_next_u32(info, NULL, &num_set_entries);
+ if (!value)
+ goto err_of_node_put;
+
+ for (j = 0; j < num_set_entries; j++) {
+
+ of_read_drc_info_cell(&info, &value, &drc);
+ if (strncmp(drc.drc_type, "CPU", 3))
+ goto err;
+
+ if (thread_index < drc.last_drc_index)
+ break;
+ }
+
+ ret = drc.drc_index_start + (thread_index * drc.sequential_inc);
+ } else {
+ const __be32 *indexes;
+
+ indexes = of_get_property(dn, "ibm,drc-indexes", NULL);
+ if (indexes == NULL)
+ goto err_of_node_put;
+
+ /*
+ * The first element indexes[0] is the number of drc_indexes
+ * returned in the list. Hence thread_index+1 will get the
+ * drc_index corresponding to core number thread_index.
+ */
+ ret = indexes[thread_index + 1];
+ }
+
rc = 0;
err_of_node_put:
@@ -72,34 +105,71 @@ static int drc_index_to_cpu(u32 drc_index)
{
struct device_node *dn = NULL;
const int *indexes;
- int i, cpu = 0;
+ int thread_index = 0, cpu = 0;
int rc = 1;
dn = of_find_node_by_path("/cpus");
if (dn == NULL)
goto err;
- indexes = of_get_property(dn, "ibm,drc-indexes", NULL);
- if (indexes == NULL)
- goto err_of_node_put;
- /*
- * First element in the array is the number of drc_indexes
- * returned. Search through the list to find the matching
- * drc_index and get the core number
- */
- for (i = 0; i < indexes[0]; i++) {
- if (indexes[i + 1] == drc_index)
+
+ if (firmware_has_feature(FW_FEATURE_DRC_INFO)) {
+ struct property *info = NULL;
+ struct of_drc_info drc;
+ int j;
+ u32 num_set_entries;
+ const __be32 *value;
+
+ info = of_find_property(dn, "ibm,drc-info", NULL);
+ if (info == NULL)
+ goto err_of_node_put;
+
+ value = of_prop_next_u32(info, NULL, &num_set_entries);
+ if (!value)
+ goto err_of_node_put;
+
+ for (j = 0; j < num_set_entries; j++) {
+
+ of_read_drc_info_cell(&info, &value, &drc);
+ if (strncmp(drc.drc_type, "CPU", 3))
+ goto err;
+
+ if (drc_index > drc.last_drc_index) {
+ cpu += drc.num_sequential_elems;
+ continue;
+ }
+ cpu += ((drc_index - drc.drc_index_start) /
+ drc.sequential_inc);
+
+ thread_index = cpu_first_thread_of_core(cpu);
+ rc = 0;
break;
+ }
+ } else {
+ unsigned long int i;
+
+ indexes = of_get_property(dn, "ibm,drc-indexes", NULL);
+ if (indexes == NULL)
+ goto err_of_node_put;
+ /*
+ * First element in the array is the number of drc_indexes
+ * returned. Search through the list to find the matching
+ * drc_index and get the core number
+ */
+ for (i = 0; i < indexes[0]; i++) {
+ if (indexes[i + 1] == drc_index)
+ break;
+ }
+ /* Convert core number to logical cpu number */
+ thread_index = cpu_first_thread_of_core(i);
+ rc = 0;
}
- /* Convert core number to logical cpu number */
- cpu = cpu_first_thread_of_core(i);
- rc = 0;
err_of_node_put:
of_node_put(dn);
err:
if (rc)
printk(KERN_WARNING "drc_index_to_cpu(%d) failed", drc_index);
- return cpu;
+ return thread_index;
}
/*
diff --git a/arch/powerpc/platforms/pseries/ras.c b/arch/powerpc/platforms/pseries/ras.c
index 4923ffe230cf..81d8614e7379 100644
--- a/arch/powerpc/platforms/pseries/ras.c
+++ b/arch/powerpc/platforms/pseries/ras.c
@@ -69,7 +69,8 @@ static int __init init_ras_IRQ(void)
/* Hotplug Events */
np = of_find_node_by_path("/event-sources/hot-plug-events");
if (np != NULL) {
- request_event_sources_irqs(np, ras_hotplug_interrupt,
+ if (dlpar_workqueue_init() == 0)
+ request_event_sources_irqs(np, ras_hotplug_interrupt,
"RAS_HOTPLUG");
of_node_put(np);
}
diff --git a/arch/powerpc/platforms/pseries/setup.c b/arch/powerpc/platforms/pseries/setup.c
index 1d6e2de2445c..db76963e693d 100644
--- a/arch/powerpc/platforms/pseries/setup.c
+++ b/arch/powerpc/platforms/pseries/setup.c
@@ -459,6 +459,39 @@ static void __init find_and_init_phbs(void)
of_pci_check_probe_only();
}
+static void pseries_setup_rfi_flush(void)
+{
+ struct h_cpu_char_result result;
+ enum l1d_flush_type types;
+ bool enable;
+ long rc;
+
+ /* Enable by default */
+ enable = true;
+
+ rc = plpar_get_cpu_characteristics(&result);
+ if (rc == H_SUCCESS) {
+ types = L1D_FLUSH_NONE;
+
+ if (result.character & H_CPU_CHAR_L1D_FLUSH_TRIG2)
+ types |= L1D_FLUSH_MTTRIG;
+ if (result.character & H_CPU_CHAR_L1D_FLUSH_ORI30)
+ types |= L1D_FLUSH_ORI;
+
+ /* Use fallback if nothing set in hcall */
+ if (types == L1D_FLUSH_NONE)
+ types = L1D_FLUSH_FALLBACK;
+
+ if (!(result.behaviour & H_CPU_BEHAV_L1D_FLUSH_PR))
+ enable = false;
+ } else {
+ /* Default to fallback if case hcall is not available */
+ types = L1D_FLUSH_FALLBACK;
+ }
+
+ setup_rfi_flush(types, enable);
+}
+
static void __init pSeries_setup_arch(void)
{
set_arch_panic_timeout(10, ARCH_PANIC_TIMEOUT);
@@ -476,6 +509,8 @@ static void __init pSeries_setup_arch(void)
fwnmi_init();
+ pseries_setup_rfi_flush();
+
/* By default, only probe PCI (can be overridden by rtas_pci) */
pci_add_flags(PCI_PROBE_ONLY);
@@ -726,6 +761,7 @@ define_machine(pseries) {
.pcibios_fixup = pSeries_final_fixup,
.restart = rtas_restart,
.halt = rtas_halt,
+ .panic = rtas_os_term,
.get_boot_time = rtas_get_boot_time,
.get_rtc_time = rtas_get_rtc_time,
.set_rtc_time = rtas_set_rtc_time,
diff --git a/arch/powerpc/sysdev/fsl_mpic_timer_wakeup.c b/arch/powerpc/sysdev/fsl_mpic_timer_wakeup.c
index 1707bf04dec6..94278e8af192 100644
--- a/arch/powerpc/sysdev/fsl_mpic_timer_wakeup.c
+++ b/arch/powerpc/sysdev/fsl_mpic_timer_wakeup.c
@@ -56,17 +56,16 @@ static ssize_t fsl_timer_wakeup_show(struct device *dev,
struct device_attribute *attr,
char *buf)
{
- struct timeval interval;
- int val = 0;
+ time64_t interval = 0;
mutex_lock(&sysfs_lock);
if (fsl_wakeup->timer) {
mpic_get_remain_time(fsl_wakeup->timer, &interval);
- val = interval.tv_sec + 1;
+ interval++;
}
mutex_unlock(&sysfs_lock);
- return sprintf(buf, "%d\n", val);
+ return sprintf(buf, "%lld\n", interval);
}
static ssize_t fsl_timer_wakeup_store(struct device *dev,
@@ -74,11 +73,10 @@ static ssize_t fsl_timer_wakeup_store(struct device *dev,
const char *buf,
size_t count)
{
- struct timeval interval;
+ time64_t interval;
int ret;
- interval.tv_usec = 0;
- if (kstrtol(buf, 0, &interval.tv_sec))
+ if (kstrtoll(buf, 0, &interval))
return -EINVAL;
mutex_lock(&sysfs_lock);
@@ -89,13 +87,13 @@ static ssize_t fsl_timer_wakeup_store(struct device *dev,
fsl_wakeup->timer = NULL;
}
- if (!interval.tv_sec) {
+ if (!interval) {
mutex_unlock(&sysfs_lock);
return count;
}
fsl_wakeup->timer = mpic_request_timer(fsl_mpic_timer_irq,
- fsl_wakeup, &interval);
+ fsl_wakeup, interval);
if (!fsl_wakeup->timer) {
mutex_unlock(&sysfs_lock);
return -EINVAL;
diff --git a/arch/powerpc/sysdev/mpic_timer.c b/arch/powerpc/sysdev/mpic_timer.c
index a418579591be..87e7c42777a8 100644
--- a/arch/powerpc/sysdev/mpic_timer.c
+++ b/arch/powerpc/sysdev/mpic_timer.c
@@ -47,9 +47,6 @@
#define MAX_TICKS_CASCADE (~0U)
#define TIMER_OFFSET(num) (1 << (TIMERS_PER_GROUP - 1 - num))
-/* tv_usec should be less than ONE_SECOND, otherwise use tv_sec */
-#define ONE_SECOND 1000000
-
struct timer_regs {
u32 gtccr;
u32 res0[3];
@@ -90,51 +87,23 @@ static struct cascade_priv cascade_timer[] = {
static LIST_HEAD(timer_group_list);
static void convert_ticks_to_time(struct timer_group_priv *priv,
- const u64 ticks, struct timeval *time)
+ const u64 ticks, time64_t *time)
{
- u64 tmp_sec;
-
- time->tv_sec = (__kernel_time_t)div_u64(ticks, priv->timerfreq);
- tmp_sec = (u64)time->tv_sec * (u64)priv->timerfreq;
-
- time->tv_usec = 0;
-
- if (tmp_sec <= ticks)
- time->tv_usec = (__kernel_suseconds_t)
- div_u64((ticks - tmp_sec) * 1000000, priv->timerfreq);
-
- return;
+ *time = (u64)div_u64(ticks, priv->timerfreq);
}
/* the time set by the user is converted to "ticks" */
static int convert_time_to_ticks(struct timer_group_priv *priv,
- const struct timeval *time, u64 *ticks)
+ time64_t time, u64 *ticks)
{
u64 max_value; /* prevent u64 overflow */
- u64 tmp = 0;
-
- u64 tmp_sec;
- u64 tmp_ms;
- u64 tmp_us;
max_value = div_u64(ULLONG_MAX, priv->timerfreq);
- if (time->tv_sec > max_value ||
- (time->tv_sec == max_value && time->tv_usec > 0))
+ if (time > max_value)
return -EINVAL;
- tmp_sec = (u64)time->tv_sec * (u64)priv->timerfreq;
- tmp += tmp_sec;
-
- tmp_ms = time->tv_usec / 1000;
- tmp_ms = div_u64((u64)tmp_ms * (u64)priv->timerfreq, 1000);
- tmp += tmp_ms;
-
- tmp_us = time->tv_usec % 1000;
- tmp_us = div_u64((u64)tmp_us * (u64)priv->timerfreq, 1000000);
- tmp += tmp_us;
-
- *ticks = tmp;
+ *ticks = (u64)time * (u64)priv->timerfreq;
return 0;
}
@@ -223,7 +192,7 @@ static struct mpic_timer *get_cascade_timer(struct timer_group_priv *priv,
return allocated_timer;
}
-static struct mpic_timer *get_timer(const struct timeval *time)
+static struct mpic_timer *get_timer(time64_t time)
{
struct timer_group_priv *priv;
struct mpic_timer *timer;
@@ -277,7 +246,7 @@ static struct mpic_timer *get_timer(const struct timeval *time)
* @handle: the timer to be started.
*
* It will do ->fn(->dev) callback from the hardware interrupt at
- * the ->timeval point in the future.
+ * the 'time64_t' point in the future.
*/
void mpic_start_timer(struct mpic_timer *handle)
{
@@ -319,7 +288,7 @@ EXPORT_SYMBOL(mpic_stop_timer);
*
* Query timer remaining time.
*/
-void mpic_get_remain_time(struct mpic_timer *handle, struct timeval *time)
+void mpic_get_remain_time(struct mpic_timer *handle, time64_t *time)
{
struct timer_group_priv *priv = container_of(handle,
struct timer_group_priv, timer[handle->num]);
@@ -391,7 +360,7 @@ EXPORT_SYMBOL(mpic_free_timer);
* else "handle" on success.
*/
struct mpic_timer *mpic_request_timer(irq_handler_t fn, void *dev,
- const struct timeval *time)
+ time64_t time)
{
struct mpic_timer *allocated_timer;
int ret;
@@ -399,11 +368,7 @@ struct mpic_timer *mpic_request_timer(irq_handler_t fn, void *dev,
if (list_empty(&timer_group_list))
return NULL;
- if (!(time->tv_sec + time->tv_usec) ||
- time->tv_sec < 0 || time->tv_usec < 0)
- return NULL;
-
- if (time->tv_usec > ONE_SECOND)
+ if (time < 0)
return NULL;
allocated_timer = get_timer(time);
diff --git a/arch/powerpc/sysdev/xive/common.c b/arch/powerpc/sysdev/xive/common.c
index 838ebdbfe4c5..40c06110821c 100644
--- a/arch/powerpc/sysdev/xive/common.c
+++ b/arch/powerpc/sysdev/xive/common.c
@@ -367,7 +367,8 @@ static void xive_irq_eoi(struct irq_data *d)
* EOI the source if it hasn't been disabled and hasn't
* been passed-through to a KVM guest
*/
- if (!irqd_irq_disabled(d) && !irqd_is_forwarded_to_vcpu(d))
+ if (!irqd_irq_disabled(d) && !irqd_is_forwarded_to_vcpu(d) &&
+ !(xd->flags & XIVE_IRQ_NO_EOI))
xive_do_source_eoi(irqd_to_hwirq(d), xd);
/*
diff --git a/arch/powerpc/xmon/xmon.c b/arch/powerpc/xmon/xmon.c
index 1b2d8cb49abb..01d9a2dcff20 100644
--- a/arch/powerpc/xmon/xmon.c
+++ b/arch/powerpc/xmon/xmon.c
@@ -1590,7 +1590,7 @@ static void print_bug_trap(struct pt_regs *regs)
printf("kernel BUG at %s:%u!\n",
bug->file, bug->line);
#else
- printf("kernel BUG at %p!\n", (void *)bug->bug_addr);
+ printf("kernel BUG at %px!\n", (void *)bug->bug_addr);
#endif
#endif /* CONFIG_BUG */
}
@@ -1623,7 +1623,7 @@ static void excprint(struct pt_regs *fp)
printf(" current = 0x%lx\n", current);
#ifdef CONFIG_PPC64
printf(" paca = 0x%lx\t softe: %d\t irq_happened: 0x%02x\n",
- local_paca, local_paca->soft_enabled, local_paca->irq_happened);
+ local_paca, local_paca->irq_soft_mask, local_paca->irq_happened);
#endif
if (current) {
printf(" pid = %ld, comm = %s\n",
@@ -2329,7 +2329,7 @@ static void dump_one_paca(int cpu)
p = &paca[cpu];
- printf("paca for cpu 0x%x @ %p:\n", cpu, p);
+ printf("paca for cpu 0x%x @ %px:\n", cpu, p);
printf(" %-*s = %s\n", 20, "possible", cpu_possible(cpu) ? "yes" : "no");
printf(" %-*s = %s\n", 20, "present", cpu_present(cpu) ? "yes" : "no");
@@ -2344,10 +2344,10 @@ static void dump_one_paca(int cpu)
DUMP(p, kernel_toc, "lx");
DUMP(p, kernelbase, "lx");
DUMP(p, kernel_msr, "lx");
- DUMP(p, emergency_sp, "p");
+ DUMP(p, emergency_sp, "px");
#ifdef CONFIG_PPC_BOOK3S_64
- DUMP(p, nmi_emergency_sp, "p");
- DUMP(p, mc_emergency_sp, "p");
+ DUMP(p, nmi_emergency_sp, "px");
+ DUMP(p, mc_emergency_sp, "px");
DUMP(p, in_nmi, "x");
DUMP(p, in_mce, "x");
DUMP(p, hmi_event_available, "x");
@@ -2375,23 +2375,27 @@ static void dump_one_paca(int cpu)
DUMP(p, slb_cache_ptr, "x");
for (i = 0; i < SLB_CACHE_ENTRIES; i++)
printf(" slb_cache[%d]: = 0x%016lx\n", i, p->slb_cache[i]);
+
+ DUMP(p, rfi_flush_fallback_area, "px");
+ DUMP(p, l1d_flush_congruence, "llx");
+ DUMP(p, l1d_flush_sets, "llx");
#endif
DUMP(p, dscr_default, "llx");
#ifdef CONFIG_PPC_BOOK3E
- DUMP(p, pgd, "p");
- DUMP(p, kernel_pgd, "p");
- DUMP(p, tcd_ptr, "p");
- DUMP(p, mc_kstack, "p");
- DUMP(p, crit_kstack, "p");
- DUMP(p, dbg_kstack, "p");
+ DUMP(p, pgd, "px");
+ DUMP(p, kernel_pgd, "px");
+ DUMP(p, tcd_ptr, "px");
+ DUMP(p, mc_kstack, "px");
+ DUMP(p, crit_kstack, "px");
+ DUMP(p, dbg_kstack, "px");
#endif
- DUMP(p, __current, "p");
+ DUMP(p, __current, "px");
DUMP(p, kstack, "lx");
printf(" kstack_base = 0x%016lx\n", p->kstack & ~(THREAD_SIZE - 1));
DUMP(p, stab_rr, "lx");
DUMP(p, saved_r1, "lx");
DUMP(p, trap_save, "x");
- DUMP(p, soft_enabled, "x");
+ DUMP(p, irq_soft_mask, "x");
DUMP(p, irq_happened, "x");
DUMP(p, io_sync, "x");
DUMP(p, irq_work_pending, "x");
@@ -2403,7 +2407,7 @@ static void dump_one_paca(int cpu)
#endif
#ifdef CONFIG_PPC_POWERNV
- DUMP(p, core_idle_state_ptr, "p");
+ DUMP(p, core_idle_state_ptr, "px");
DUMP(p, thread_idle_state, "x");
DUMP(p, thread_mask, "x");
DUMP(p, subcore_sibling_mask, "x");
@@ -2945,7 +2949,7 @@ static void show_task(struct task_struct *tsk)
(tsk->exit_state & EXIT_DEAD) ? 'E' :
(tsk->state & TASK_INTERRUPTIBLE) ? 'S' : '?';
- printf("%p %016lx %6d %6d %c %2d %s\n", tsk,
+ printf("%px %016lx %6d %6d %c %2d %s\n", tsk,
tsk->thread.ksp,
tsk->pid, tsk->parent->pid,
state, task_thread_info(tsk)->cpu,
@@ -2988,7 +2992,7 @@ static void show_pte(unsigned long addr)
if (setjmp(bus_error_jmp) != 0) {
catch_memory_errors = 0;
- printf("*** Error dumping pte for task %p\n", tsk);
+ printf("*** Error dumping pte for task %px\n", tsk);
return;
}
@@ -3074,7 +3078,7 @@ static void show_tasks(void)
if (setjmp(bus_error_jmp) != 0) {
catch_memory_errors = 0;
- printf("*** Error dumping task %p\n", tsk);
+ printf("*** Error dumping task %px\n", tsk);
return;
}
diff --git a/drivers/misc/cxl/context.c b/drivers/misc/cxl/context.c
index 12a41b2753f0..7ff315ad3692 100644
--- a/drivers/misc/cxl/context.c
+++ b/drivers/misc/cxl/context.c
@@ -45,6 +45,8 @@ int cxl_context_init(struct cxl_context *ctx, struct cxl_afu *afu, bool master)
ctx->pid = NULL; /* Set in start work ioctl */
mutex_init(&ctx->mapping_lock);
ctx->mapping = NULL;
+ ctx->tidr = 0;
+ ctx->assign_tidr = false;
if (cxl_is_power8()) {
spin_lock_init(&ctx->sste_lock);
diff --git a/drivers/misc/cxl/cxl.h b/drivers/misc/cxl/cxl.h
index e46a4062904a..53149fbd780e 100644
--- a/drivers/misc/cxl/cxl.h
+++ b/drivers/misc/cxl/cxl.h
@@ -630,6 +630,9 @@ struct cxl_context {
struct list_head extra_irq_contexts;
struct mm_struct *mm;
+
+ u16 tidr;
+ bool assign_tidr;
};
struct cxl_irq_info;
diff --git a/drivers/misc/cxl/cxllib.c b/drivers/misc/cxl/cxllib.c
index dc9bc1807fdf..30ccba436b3b 100644
--- a/drivers/misc/cxl/cxllib.c
+++ b/drivers/misc/cxl/cxllib.c
@@ -199,10 +199,11 @@ int cxllib_get_PE_attributes(struct task_struct *task,
*/
attr->pid = mm->context.id;
mmput(mm);
+ attr->tid = task->thread.tidr;
} else {
attr->pid = 0;
+ attr->tid = 0;
}
- attr->tid = 0;
return 0;
}
EXPORT_SYMBOL_GPL(cxllib_get_PE_attributes);
diff --git a/drivers/misc/cxl/file.c b/drivers/misc/cxl/file.c
index 76c0b0ca9388..93fd381f6484 100644
--- a/drivers/misc/cxl/file.c
+++ b/drivers/misc/cxl/file.c
@@ -173,7 +173,7 @@ static long afu_ioctl_start_work(struct cxl_context *ctx,
* flags are set it's invalid
*/
if (work.reserved1 || work.reserved2 || work.reserved3 ||
- work.reserved4 || work.reserved5 || work.reserved6 ||
+ work.reserved4 || work.reserved5 ||
(work.flags & ~CXL_START_WORK_ALL)) {
rc = -EINVAL;
goto out;
@@ -186,12 +186,16 @@ static long afu_ioctl_start_work(struct cxl_context *ctx,
rc = -EINVAL;
goto out;
}
+
if ((rc = afu_register_irqs(ctx, work.num_interrupts)))
goto out;
if (work.flags & CXL_START_WORK_AMR)
amr = work.amr & mfspr(SPRN_UAMOR);
+ if (work.flags & CXL_START_WORK_TID)
+ ctx->assign_tidr = true;
+
ctx->mmio_err_ff = !!(work.flags & CXL_START_WORK_ERR_FF);
/*
@@ -263,8 +267,15 @@ static long afu_ioctl_start_work(struct cxl_context *ctx,
goto out;
}
- ctx->status = STARTED;
rc = 0;
+ if (work.flags & CXL_START_WORK_TID) {
+ work.tid = ctx->tidr;
+ if (copy_to_user(uwork, &work, sizeof(work)))
+ rc = -EFAULT;
+ }
+
+ ctx->status = STARTED;
+
out:
mutex_unlock(&ctx->status_mutex);
return rc;
diff --git a/drivers/misc/cxl/native.c b/drivers/misc/cxl/native.c
index 02b6b45b4c20..1b3d7c65ea3f 100644
--- a/drivers/misc/cxl/native.c
+++ b/drivers/misc/cxl/native.c
@@ -16,6 +16,7 @@
#include <linux/uaccess.h>
#include <linux/delay.h>
#include <asm/synch.h>
+#include <asm/switch_to.h>
#include <misc/cxl-base.h>
#include "cxl.h"
@@ -655,6 +656,7 @@ static void update_ivtes_directed(struct cxl_context *ctx)
static int process_element_entry_psl9(struct cxl_context *ctx, u64 wed, u64 amr)
{
u32 pid;
+ int rc;
cxl_assign_psn_space(ctx);
@@ -673,7 +675,16 @@ static int process_element_entry_psl9(struct cxl_context *ctx, u64 wed, u64 amr)
pid = ctx->mm->context.id;
}
- ctx->elem->common.tid = 0;
+ /* Assign a unique TIDR (thread id) for the current thread */
+ if (!(ctx->tidr) && (ctx->assign_tidr)) {
+ rc = set_thread_tidr(current);
+ if (rc)
+ return -ENODEV;
+ ctx->tidr = current->thread.tidr;
+ pr_devel("%s: current tidr: %d\n", __func__, ctx->tidr);
+ }
+
+ ctx->elem->common.tid = cpu_to_be32(ctx->tidr);
ctx->elem->common.pid = cpu_to_be32(pid);
ctx->elem->sr = cpu_to_be64(calculate_sr(ctx));
diff --git a/drivers/pci/hotplug/rpadlpar_core.c b/drivers/pci/hotplug/rpadlpar_core.c
index a3449d717a99..fc01d7d807f3 100644
--- a/drivers/pci/hotplug/rpadlpar_core.c
+++ b/drivers/pci/hotplug/rpadlpar_core.c
@@ -27,6 +27,7 @@
#include <linux/mutex.h>
#include <asm/rtas.h>
#include <asm/vio.h>
+#include <linux/firmware.h>
#include "../pci.h"
#include "rpaphp.h"
@@ -44,15 +45,14 @@ static struct device_node *find_vio_slot_node(char *drc_name)
{
struct device_node *parent = of_find_node_by_name(NULL, "vdevice");
struct device_node *dn = NULL;
- char *name;
int rc;
if (!parent)
return NULL;
while ((dn = of_get_next_child(parent, dn))) {
- rc = rpaphp_get_drc_props(dn, NULL, &name, NULL, NULL);
- if ((rc == 0) && (!strcmp(drc_name, name)))
+ rc = rpaphp_check_drc_props(dn, drc_name, NULL);
+ if (rc == 0)
break;
}
@@ -64,15 +64,12 @@ static struct device_node *find_php_slot_pci_node(char *drc_name,
char *drc_type)
{
struct device_node *np = NULL;
- char *name;
- char *type;
int rc;
while ((np = of_find_node_by_name(np, "pci"))) {
- rc = rpaphp_get_drc_props(np, NULL, &name, &type, NULL);
+ rc = rpaphp_check_drc_props(np, drc_name, drc_type);
if (rc == 0)
- if (!strcmp(drc_name, name) && !strcmp(drc_type, type))
- break;
+ break;
}
return np;
diff --git a/drivers/pci/hotplug/rpadlpar_sysfs.c b/drivers/pci/hotplug/rpadlpar_sysfs.c
index edb5d8a53020..b806314349cf 100644
--- a/drivers/pci/hotplug/rpadlpar_sysfs.c
+++ b/drivers/pci/hotplug/rpadlpar_sysfs.c
@@ -16,6 +16,7 @@
#include <linux/string.h>
#include <linux/pci.h>
#include <linux/pci_hotplug.h>
+#include "rpaphp.h"
#include "rpadlpar.h"
#include "../pci.h"
@@ -27,8 +28,6 @@
#define ADD_SLOT_ATTR_NAME add_slot
#define REMOVE_SLOT_ATTR_NAME remove_slot
-#define MAX_DRC_NAME_LEN 64
-
static ssize_t add_slot_store(struct kobject *kobj, struct kobj_attribute *attr,
const char *buf, size_t nbytes)
{
diff --git a/drivers/pci/hotplug/rpaphp.h b/drivers/pci/hotplug/rpaphp.h
index 7db024e68fe6..bdb844b01a3d 100644
--- a/drivers/pci/hotplug/rpaphp.h
+++ b/drivers/pci/hotplug/rpaphp.h
@@ -64,6 +64,10 @@ extern bool rpaphp_debug;
#define CONFIGURED 1
#define EMPTY 0
+/* DRC constants */
+
+#define MAX_DRC_NAME_LEN 64
+
/*
* struct slot - slot information for each *physical* slot
*/
@@ -91,8 +95,8 @@ int rpaphp_get_sensor_state(struct slot *slot, int *state);
/* rpaphp_core.c */
int rpaphp_add_slot(struct device_node *dn);
-int rpaphp_get_drc_props(struct device_node *dn, int *drc_index,
- char **drc_name, char **drc_type, int *drc_power_domain);
+int rpaphp_check_drc_props(struct device_node *dn, char *drc_name,
+ char *drc_type);
/* rpaphp_slot.c */
void dealloc_slot_struct(struct slot *slot);
diff --git a/drivers/pci/hotplug/rpaphp_core.c b/drivers/pci/hotplug/rpaphp_core.c
index 1e29abaaea08..53902c7c38f2 100644
--- a/drivers/pci/hotplug/rpaphp_core.c
+++ b/drivers/pci/hotplug/rpaphp_core.c
@@ -30,6 +30,7 @@
#include <linux/smp.h>
#include <linux/init.h>
#include <linux/vmalloc.h>
+#include <asm/firmware.h>
#include <asm/eeh.h> /* for eeh_add_device() */
#include <asm/rtas.h> /* rtas_call */
#include <asm/pci-bridge.h> /* for pci_controller */
@@ -196,25 +197,21 @@ static int get_children_props(struct device_node *dn, const int **drc_indexes,
return 0;
}
-/* To get the DRC props describing the current node, first obtain it's
- * my-drc-index property. Next obtain the DRC list from it's parent. Use
- * the my-drc-index for correlation, and obtain the requested properties.
+
+/* Verify the existence of 'drc_name' and/or 'drc_type' within the
+ * current node. First obtain it's my-drc-index property. Next,
+ * obtain the DRC info from it's parent. Use the my-drc-index for
+ * correlation, and obtain/validate the requested properties.
*/
-int rpaphp_get_drc_props(struct device_node *dn, int *drc_index,
- char **drc_name, char **drc_type, int *drc_power_domain)
+
+static int rpaphp_check_drc_props_v1(struct device_node *dn, char *drc_name,
+ char *drc_type, unsigned int my_index)
{
+ char *name_tmp, *type_tmp;
const int *indexes, *names;
const int *types, *domains;
- const unsigned int *my_index;
- char *name_tmp, *type_tmp;
int i, rc;
- my_index = of_get_property(dn, "ibm,my-drc-index", NULL);
- if (!my_index) {
- /* Node isn't DLPAR/hotplug capable */
- return -EINVAL;
- }
-
rc = get_children_props(dn->parent, &indexes, &names, &types, &domains);
if (rc < 0) {
return -EINVAL;
@@ -225,24 +222,84 @@ int rpaphp_get_drc_props(struct device_node *dn, int *drc_index,
/* Iterate through parent properties, looking for my-drc-index */
for (i = 0; i < be32_to_cpu(indexes[0]); i++) {
- if ((unsigned int) indexes[i + 1] == *my_index) {
- if (drc_name)
- *drc_name = name_tmp;
- if (drc_type)
- *drc_type = type_tmp;
- if (drc_index)
- *drc_index = be32_to_cpu(*my_index);
- if (drc_power_domain)
- *drc_power_domain = be32_to_cpu(domains[i+1]);
- return 0;
- }
+ if ((unsigned int) indexes[i + 1] == my_index)
+ break;
+
name_tmp += (strlen(name_tmp) + 1);
type_tmp += (strlen(type_tmp) + 1);
}
+ if (((drc_name == NULL) || (drc_name && !strcmp(drc_name, name_tmp))) &&
+ ((drc_type == NULL) || (drc_type && !strcmp(drc_type, type_tmp))))
+ return 0;
+
return -EINVAL;
}
-EXPORT_SYMBOL_GPL(rpaphp_get_drc_props);
+
+static int rpaphp_check_drc_props_v2(struct device_node *dn, char *drc_name,
+ char *drc_type, unsigned int my_index)
+{
+ struct property *info;
+ unsigned int entries;
+ struct of_drc_info drc;
+ const __be32 *value;
+ char cell_drc_name[MAX_DRC_NAME_LEN];
+ int j, fndit;
+
+ info = of_find_property(dn->parent, "ibm,drc-info", NULL);
+ if (info == NULL)
+ return -EINVAL;
+
+ value = of_prop_next_u32(info, NULL, &entries);
+ if (!value)
+ return -EINVAL;
+
+ for (j = 0; j < entries; j++) {
+ of_read_drc_info_cell(&info, &value, &drc);
+
+ /* Should now know end of current entry */
+
+ if (my_index > drc.last_drc_index)
+ continue;
+
+ fndit = 1;
+ break;
+ }
+ /* Found it */
+
+ if (fndit)
+ sprintf(cell_drc_name, "%s%d", drc.drc_name_prefix,
+ my_index);
+
+ if (((drc_name == NULL) ||
+ (drc_name && !strcmp(drc_name, cell_drc_name))) &&
+ ((drc_type == NULL) ||
+ (drc_type && !strcmp(drc_type, drc.drc_type))))
+ return 0;
+
+ return -EINVAL;
+}
+
+int rpaphp_check_drc_props(struct device_node *dn, char *drc_name,
+ char *drc_type)
+{
+ const unsigned int *my_index;
+
+ my_index = of_get_property(dn, "ibm,my-drc-index", NULL);
+ if (!my_index) {
+ /* Node isn't DLPAR/hotplug capable */
+ return -EINVAL;
+ }
+
+ if (firmware_has_feature(FW_FEATURE_DRC_INFO))
+ return rpaphp_check_drc_props_v2(dn, drc_name, drc_type,
+ *my_index);
+ else
+ return rpaphp_check_drc_props_v1(dn, drc_name, drc_type,
+ *my_index);
+}
+EXPORT_SYMBOL_GPL(rpaphp_check_drc_props);
+
static int is_php_type(char *drc_type)
{
diff --git a/include/uapi/linux/elf.h b/include/uapi/linux/elf.h
index bb6836986200..3bf73fb58045 100644
--- a/include/uapi/linux/elf.h
+++ b/include/uapi/linux/elf.h
@@ -396,6 +396,7 @@ typedef struct elf64_shdr {
#define NT_PPC_TM_CTAR 0x10d /* TM checkpointed Target Address Register */
#define NT_PPC_TM_CPPR 0x10e /* TM checkpointed Program Priority Register */
#define NT_PPC_TM_CDSCR 0x10f /* TM checkpointed Data Stream Control Register */
+#define NT_PPC_PKEY 0x110 /* Memory Protection Keys registers */
#define NT_386_TLS 0x200 /* i386 TLS slots (struct user_desc) */
#define NT_386_IOPERM 0x201 /* x86 io permission bitmap (1=deny) */
#define NT_X86_XSTATE 0x202 /* x86 extended state using xsave */
diff --git a/include/uapi/misc/cxl.h b/include/uapi/misc/cxl.h
index 49e8fd08855a..56376d3907d8 100644
--- a/include/uapi/misc/cxl.h
+++ b/include/uapi/misc/cxl.h
@@ -20,20 +20,22 @@ struct cxl_ioctl_start_work {
__u64 work_element_descriptor;
__u64 amr;
__s16 num_interrupts;
- __s16 reserved1;
- __s32 reserved2;
+ __u16 tid;
+ __s32 reserved1;
+ __u64 reserved2;
__u64 reserved3;
__u64 reserved4;
__u64 reserved5;
- __u64 reserved6;
};
#define CXL_START_WORK_AMR 0x0000000000000001ULL
#define CXL_START_WORK_NUM_IRQS 0x0000000000000002ULL
#define CXL_START_WORK_ERR_FF 0x0000000000000004ULL
+#define CXL_START_WORK_TID 0x0000000000000008ULL
#define CXL_START_WORK_ALL (CXL_START_WORK_AMR |\
CXL_START_WORK_NUM_IRQS |\
- CXL_START_WORK_ERR_FF)
+ CXL_START_WORK_ERR_FF |\
+ CXL_START_WORK_TID)
/* Possible modes that an afu can be in */