aboutsummaryrefslogtreecommitdiffstatshomepage
path: root/arch/powerpc
diff options
context:
space:
mode:
Diffstat (limited to 'arch/powerpc')
-rw-r--r--arch/powerpc/Kconfig12
-rw-r--r--arch/powerpc/include/asm/book3s/64/pgtable.h8
-rw-r--r--arch/powerpc/include/asm/cputable.h15
-rw-r--r--arch/powerpc/include/asm/device.h3
-rw-r--r--arch/powerpc/include/asm/fixmap.h2
-rw-r--r--arch/powerpc/include/asm/hw_irq.h11
-rw-r--r--arch/powerpc/include/asm/kasan.h9
-rw-r--r--arch/powerpc/include/asm/kvm_book3s_uvmem.h14
-rw-r--r--arch/powerpc/include/asm/kvm_host.h3
-rw-r--r--arch/powerpc/include/asm/kvm_ppc.h2
-rw-r--r--arch/powerpc/include/asm/mce.h7
-rw-r--r--arch/powerpc/include/asm/mman.h31
-rw-r--r--arch/powerpc/include/asm/nohash/64/pgtable.h2
-rw-r--r--arch/powerpc/include/asm/perf_event.h3
-rw-r--r--arch/powerpc/include/asm/perf_event_server.h5
-rw-r--r--arch/powerpc/include/asm/reg.h4
-rw-r--r--arch/powerpc/include/asm/uaccess.h3
-rw-r--r--arch/powerpc/include/uapi/asm/mman.h2
-rw-r--r--arch/powerpc/include/uapi/asm/perf_regs.h20
-rw-r--r--arch/powerpc/kernel/cputable.c22
-rw-r--r--arch/powerpc/kernel/dt_cpu_ftrs.c6
-rw-r--r--arch/powerpc/kernel/entry_64.S4
-rw-r--r--arch/powerpc/kernel/iomap.c28
-rw-r--r--arch/powerpc/kernel/process.c12
-rw-r--r--arch/powerpc/kernel/setup-common.c1
-rw-r--r--arch/powerpc/kernel/syscalls/syscall.tbl2
-rw-r--r--arch/powerpc/kvm/book3s.c3
-rw-r--r--arch/powerpc/kvm/book3s_64_mmu_hv.c8
-rw-r--r--arch/powerpc/kvm/book3s_64_mmu_radix.c4
-rw-r--r--arch/powerpc/kvm/book3s_hv.c26
-rw-r--r--arch/powerpc/kvm/book3s_hv_nested.c30
-rw-r--r--arch/powerpc/kvm/book3s_hv_uvmem.c700
-rw-r--r--arch/powerpc/kvm/book3s_interrupts.S56
-rw-r--r--arch/powerpc/kvm/book3s_pr.c9
-rw-r--r--arch/powerpc/kvm/book3s_rtas.c2
-rw-r--r--arch/powerpc/kvm/booke.c9
-rw-r--r--arch/powerpc/kvm/booke_interrupts.S9
-rw-r--r--arch/powerpc/kvm/bookehv_interrupts.S10
-rw-r--r--arch/powerpc/kvm/e500_mmu_host.c3
-rw-r--r--arch/powerpc/kvm/powerpc.c5
-rw-r--r--arch/powerpc/mm/book3s32/mmu.c9
-rw-r--r--arch/powerpc/mm/book3s64/hash_utils.c5
-rw-r--r--arch/powerpc/mm/book3s64/pkeys.c12
-rw-r--r--arch/powerpc/mm/copro_fault.c7
-rw-r--r--arch/powerpc/mm/fault.c11
-rw-r--r--arch/powerpc/net/bpf_jit_comp.c2
-rw-r--r--arch/powerpc/perf/core-book3s.c24
-rw-r--r--arch/powerpc/perf/hv-24x7.c11
-rw-r--r--arch/powerpc/perf/imc-pmu.c4
-rw-r--r--arch/powerpc/perf/perf_regs.c44
-rw-r--r--arch/powerpc/perf/power10-pmu.c6
-rw-r--r--arch/powerpc/perf/power9-pmu.c6
-rw-r--r--arch/powerpc/platforms/Kconfig.cputype2
-rw-r--r--arch/powerpc/platforms/powernv/idle.c2
-rw-r--r--arch/powerpc/platforms/powernv/pci-ioda.c2
-rw-r--r--arch/powerpc/platforms/pseries/hotplug-cpu.c18
-rw-r--r--arch/powerpc/platforms/pseries/ras.c1
57 files changed, 924 insertions, 347 deletions
diff --git a/arch/powerpc/Kconfig b/arch/powerpc/Kconfig
index 1f48bbfb3ce9..65bed1fdeaad 100644
--- a/arch/powerpc/Kconfig
+++ b/arch/powerpc/Kconfig
@@ -860,6 +860,18 @@ config PPC_SUBPAGE_PROT
If unsure, say N here.
+config PPC_PROT_SAO_LPAR
+ bool "Support PROT_SAO mappings in LPARs"
+ depends on PPC_BOOK3S_64
+ help
+ This option adds support for PROT_SAO mappings from userspace
+ inside LPARs on supported CPUs.
+
+ This may cause issues when performing guest migration from
+ a CPU that supports SAO to one that does not.
+
+ If unsure, say N here.
+
config PPC_COPRO_BASE
bool
diff --git a/arch/powerpc/include/asm/book3s/64/pgtable.h b/arch/powerpc/include/asm/book3s/64/pgtable.h
index 6de56c3b33c4..495fc0ccb453 100644
--- a/arch/powerpc/include/asm/book3s/64/pgtable.h
+++ b/arch/powerpc/include/asm/book3s/64/pgtable.h
@@ -20,13 +20,9 @@
#define _PAGE_RW (_PAGE_READ | _PAGE_WRITE)
#define _PAGE_RWX (_PAGE_READ | _PAGE_WRITE | _PAGE_EXEC)
#define _PAGE_PRIVILEGED 0x00008 /* kernel access only */
-
-#define _PAGE_CACHE_CTL 0x00030 /* Bits for the folowing cache modes */
- /* No bits set is normal cacheable memory */
- /* 0x00010 unused, is SAO bit on radix POWER9 */
+#define _PAGE_SAO 0x00010 /* Strong access order */
#define _PAGE_NON_IDEMPOTENT 0x00020 /* non idempotent memory */
#define _PAGE_TOLERANT 0x00030 /* tolerant memory, cache inhibited */
-
#define _PAGE_DIRTY 0x00080 /* C: page changed */
#define _PAGE_ACCESSED 0x00100 /* R: page referenced */
/*
@@ -828,6 +824,8 @@ static inline void __set_pte_at(struct mm_struct *mm, unsigned long addr,
return hash__set_pte_at(mm, addr, ptep, pte, percpu);
}
+#define _PAGE_CACHE_CTL (_PAGE_SAO | _PAGE_NON_IDEMPOTENT | _PAGE_TOLERANT)
+
#define pgprot_noncached pgprot_noncached
static inline pgprot_t pgprot_noncached(pgprot_t prot)
{
diff --git a/arch/powerpc/include/asm/cputable.h b/arch/powerpc/include/asm/cputable.h
index fdddb822d564..32a15dc49e8c 100644
--- a/arch/powerpc/include/asm/cputable.h
+++ b/arch/powerpc/include/asm/cputable.h
@@ -9,6 +9,11 @@
#ifndef __ASSEMBLY__
+/*
+ * Added to include __machine_check_early_realmode_* functions
+ */
+#include <asm/mce.h>
+
/* This structure can grow, it's real size is used by head.S code
* via the mkdefs mechanism.
*/
@@ -191,7 +196,7 @@ static inline void cpu_feature_keys_init(void) { }
#define CPU_FTR_SPURR LONG_ASM_CONST(0x0000000001000000)
#define CPU_FTR_DSCR LONG_ASM_CONST(0x0000000002000000)
#define CPU_FTR_VSX LONG_ASM_CONST(0x0000000004000000)
-// Free LONG_ASM_CONST(0x0000000008000000)
+#define CPU_FTR_SAO LONG_ASM_CONST(0x0000000008000000)
#define CPU_FTR_CP_USE_DCBTZ LONG_ASM_CONST(0x0000000010000000)
#define CPU_FTR_UNALIGNED_LD_STD LONG_ASM_CONST(0x0000000020000000)
#define CPU_FTR_ASYM_SMT LONG_ASM_CONST(0x0000000040000000)
@@ -436,7 +441,7 @@ static inline void cpu_feature_keys_init(void) { }
CPU_FTR_MMCRA | CPU_FTR_SMT | \
CPU_FTR_COHERENT_ICACHE | \
CPU_FTR_PURR | CPU_FTR_SPURR | CPU_FTR_REAL_LE | \
- CPU_FTR_DSCR | CPU_FTR_ASYM_SMT | \
+ CPU_FTR_DSCR | CPU_FTR_SAO | CPU_FTR_ASYM_SMT | \
CPU_FTR_STCX_CHECKS_ADDRESS | CPU_FTR_POPCNTB | CPU_FTR_POPCNTD | \
CPU_FTR_CFAR | CPU_FTR_HVMODE | \
CPU_FTR_VMX_COPY | CPU_FTR_HAS_PPR | CPU_FTR_DABRX )
@@ -445,7 +450,7 @@ static inline void cpu_feature_keys_init(void) { }
CPU_FTR_MMCRA | CPU_FTR_SMT | \
CPU_FTR_COHERENT_ICACHE | \
CPU_FTR_PURR | CPU_FTR_SPURR | CPU_FTR_REAL_LE | \
- CPU_FTR_DSCR | \
+ CPU_FTR_DSCR | CPU_FTR_SAO | \
CPU_FTR_STCX_CHECKS_ADDRESS | CPU_FTR_POPCNTB | CPU_FTR_POPCNTD | \
CPU_FTR_CFAR | CPU_FTR_HVMODE | CPU_FTR_VMX_COPY | \
CPU_FTR_DBELL | CPU_FTR_HAS_PPR | CPU_FTR_DAWR | \
@@ -456,7 +461,7 @@ static inline void cpu_feature_keys_init(void) { }
CPU_FTR_MMCRA | CPU_FTR_SMT | \
CPU_FTR_COHERENT_ICACHE | \
CPU_FTR_PURR | CPU_FTR_SPURR | CPU_FTR_REAL_LE | \
- CPU_FTR_DSCR | \
+ CPU_FTR_DSCR | CPU_FTR_SAO | \
CPU_FTR_STCX_CHECKS_ADDRESS | CPU_FTR_POPCNTB | CPU_FTR_POPCNTD | \
CPU_FTR_CFAR | CPU_FTR_HVMODE | CPU_FTR_VMX_COPY | \
CPU_FTR_DBELL | CPU_FTR_HAS_PPR | CPU_FTR_ARCH_207S | \
@@ -474,7 +479,7 @@ static inline void cpu_feature_keys_init(void) { }
CPU_FTR_MMCRA | CPU_FTR_SMT | \
CPU_FTR_COHERENT_ICACHE | \
CPU_FTR_PURR | CPU_FTR_SPURR | CPU_FTR_REAL_LE | \
- CPU_FTR_DSCR | \
+ CPU_FTR_DSCR | CPU_FTR_SAO | \
CPU_FTR_STCX_CHECKS_ADDRESS | CPU_FTR_POPCNTB | CPU_FTR_POPCNTD | \
CPU_FTR_CFAR | CPU_FTR_HVMODE | CPU_FTR_VMX_COPY | \
CPU_FTR_DBELL | CPU_FTR_HAS_PPR | CPU_FTR_ARCH_207S | \
diff --git a/arch/powerpc/include/asm/device.h b/arch/powerpc/include/asm/device.h
index d8a0729cf754..219559d65864 100644
--- a/arch/powerpc/include/asm/device.h
+++ b/arch/powerpc/include/asm/device.h
@@ -29,9 +29,6 @@ struct dev_archdata {
struct iommu_table *iommu_table_base;
#endif
-#ifdef CONFIG_IOMMU_API
- void *iommu_domain;
-#endif
#ifdef CONFIG_PPC64
struct pci_dn *pci_data;
#endif
diff --git a/arch/powerpc/include/asm/fixmap.h b/arch/powerpc/include/asm/fixmap.h
index 925cf89cbf4b..6bfc87915d5d 100644
--- a/arch/powerpc/include/asm/fixmap.h
+++ b/arch/powerpc/include/asm/fixmap.h
@@ -52,7 +52,7 @@ enum fixed_addresses {
FIX_HOLE,
/* reserve the top 128K for early debugging purposes */
FIX_EARLY_DEBUG_TOP = FIX_HOLE,
- FIX_EARLY_DEBUG_BASE = FIX_EARLY_DEBUG_TOP+(ALIGN(SZ_128, PAGE_SIZE)/PAGE_SIZE)-1,
+ FIX_EARLY_DEBUG_BASE = FIX_EARLY_DEBUG_TOP+(ALIGN(SZ_128K, PAGE_SIZE)/PAGE_SIZE)-1,
#ifdef CONFIG_HIGHMEM
FIX_KMAP_BEGIN, /* reserved pte's for temporary kernel mappings */
FIX_KMAP_END = FIX_KMAP_BEGIN+(KM_TYPE_NR*NR_CPUS)-1,
diff --git a/arch/powerpc/include/asm/hw_irq.h b/arch/powerpc/include/asm/hw_irq.h
index 3a0db7b0b46e..35060be09073 100644
--- a/arch/powerpc/include/asm/hw_irq.h
+++ b/arch/powerpc/include/asm/hw_irq.h
@@ -200,17 +200,14 @@ static inline bool arch_irqs_disabled(void)
#define powerpc_local_irq_pmu_save(flags) \
do { \
raw_local_irq_pmu_save(flags); \
- trace_hardirqs_off(); \
+ if (!raw_irqs_disabled_flags(flags)) \
+ trace_hardirqs_off(); \
} while(0)
#define powerpc_local_irq_pmu_restore(flags) \
do { \
- if (raw_irqs_disabled_flags(flags)) { \
- raw_local_irq_pmu_restore(flags); \
- trace_hardirqs_off(); \
- } else { \
+ if (!raw_irqs_disabled_flags(flags)) \
trace_hardirqs_on(); \
- raw_local_irq_pmu_restore(flags); \
- } \
+ raw_local_irq_pmu_restore(flags); \
} while(0)
#else
#define powerpc_local_irq_pmu_save(flags) \
diff --git a/arch/powerpc/include/asm/kasan.h b/arch/powerpc/include/asm/kasan.h
index d635b96c7ea6..7355ed05e65e 100644
--- a/arch/powerpc/include/asm/kasan.h
+++ b/arch/powerpc/include/asm/kasan.h
@@ -15,11 +15,18 @@
#ifndef __ASSEMBLY__
#include <asm/page.h>
+#include <linux/sizes.h>
#define KASAN_SHADOW_SCALE_SHIFT 3
+#if defined(CONFIG_PPC_BOOK3S_32) && defined(CONFIG_MODULES) && defined(CONFIG_STRICT_KERNEL_RWX)
+#define KASAN_KERN_START ALIGN_DOWN(PAGE_OFFSET - SZ_256M, SZ_256M)
+#else
+#define KASAN_KERN_START PAGE_OFFSET
+#endif
+
#define KASAN_SHADOW_START (KASAN_SHADOW_OFFSET + \
- (PAGE_OFFSET >> KASAN_SHADOW_SCALE_SHIFT))
+ (KASAN_KERN_START >> KASAN_SHADOW_SCALE_SHIFT))
#define KASAN_SHADOW_OFFSET ASM_CONST(CONFIG_KASAN_SHADOW_OFFSET)
diff --git a/arch/powerpc/include/asm/kvm_book3s_uvmem.h b/arch/powerpc/include/asm/kvm_book3s_uvmem.h
index 9cb7d8be2366..0a6319448cb6 100644
--- a/arch/powerpc/include/asm/kvm_book3s_uvmem.h
+++ b/arch/powerpc/include/asm/kvm_book3s_uvmem.h
@@ -23,6 +23,10 @@ int kvmppc_send_page_to_uv(struct kvm *kvm, unsigned long gfn);
unsigned long kvmppc_h_svm_init_abort(struct kvm *kvm);
void kvmppc_uvmem_drop_pages(const struct kvm_memory_slot *free,
struct kvm *kvm, bool skip_page_out);
+int kvmppc_uvmem_memslot_create(struct kvm *kvm,
+ const struct kvm_memory_slot *new);
+void kvmppc_uvmem_memslot_delete(struct kvm *kvm,
+ const struct kvm_memory_slot *old);
#else
static inline int kvmppc_uvmem_init(void)
{
@@ -82,5 +86,15 @@ static inline int kvmppc_send_page_to_uv(struct kvm *kvm, unsigned long gfn)
static inline void
kvmppc_uvmem_drop_pages(const struct kvm_memory_slot *free,
struct kvm *kvm, bool skip_page_out) { }
+
+static inline int kvmppc_uvmem_memslot_create(struct kvm *kvm,
+ const struct kvm_memory_slot *new)
+{
+ return H_UNSUPPORTED;
+}
+
+static inline void kvmppc_uvmem_memslot_delete(struct kvm *kvm,
+ const struct kvm_memory_slot *old) { }
+
#endif /* CONFIG_PPC_UV */
#endif /* __ASM_KVM_BOOK3S_UVMEM_H__ */
diff --git a/arch/powerpc/include/asm/kvm_host.h b/arch/powerpc/include/asm/kvm_host.h
index e020d269416d..10ded83414de 100644
--- a/arch/powerpc/include/asm/kvm_host.h
+++ b/arch/powerpc/include/asm/kvm_host.h
@@ -58,7 +58,8 @@
#define KVM_ARCH_WANT_MMU_NOTIFIER
extern int kvm_unmap_hva_range(struct kvm *kvm,
- unsigned long start, unsigned long end);
+ unsigned long start, unsigned long end,
+ unsigned flags);
extern int kvm_age_hva(struct kvm *kvm, unsigned long start, unsigned long end);
extern int kvm_test_age_hva(struct kvm *kvm, unsigned long hva);
extern int kvm_set_spte_hva(struct kvm *kvm, unsigned long hva, pte_t pte);
diff --git a/arch/powerpc/include/asm/kvm_ppc.h b/arch/powerpc/include/asm/kvm_ppc.h
index ccf66b3a4c1d..0a056c64c317 100644
--- a/arch/powerpc/include/asm/kvm_ppc.h
+++ b/arch/powerpc/include/asm/kvm_ppc.h
@@ -59,7 +59,7 @@ enum xlate_readwrite {
};
extern int kvmppc_vcpu_run(struct kvm_vcpu *vcpu);
-extern int __kvmppc_vcpu_run(struct kvm_run *run, struct kvm_vcpu *vcpu);
+extern int __kvmppc_vcpu_run(struct kvm_vcpu *vcpu);
extern void kvmppc_handler_highmem(void);
extern void kvmppc_dump_vcpu(struct kvm_vcpu *vcpu);
diff --git a/arch/powerpc/include/asm/mce.h b/arch/powerpc/include/asm/mce.h
index adf2cda67f9a..89aa8248a57d 100644
--- a/arch/powerpc/include/asm/mce.h
+++ b/arch/powerpc/include/asm/mce.h
@@ -210,6 +210,9 @@ struct mce_error_info {
#define MCE_EVENT_RELEASE true
#define MCE_EVENT_DONTRELEASE false
+struct pt_regs;
+struct notifier_block;
+
extern void save_mce_event(struct pt_regs *regs, long handled,
struct mce_error_info *mce_err, uint64_t nip,
uint64_t addr, uint64_t phys_addr);
@@ -225,5 +228,9 @@ int mce_register_notifier(struct notifier_block *nb);
int mce_unregister_notifier(struct notifier_block *nb);
#ifdef CONFIG_PPC_BOOK3S_64
void flush_and_reload_slb(void);
+long __machine_check_early_realmode_p7(struct pt_regs *regs);
+long __machine_check_early_realmode_p8(struct pt_regs *regs);
+long __machine_check_early_realmode_p9(struct pt_regs *regs);
+long __machine_check_early_realmode_p10(struct pt_regs *regs);
#endif /* CONFIG_PPC_BOOK3S_64 */
#endif /* __ASM_PPC64_MCE_H__ */
diff --git a/arch/powerpc/include/asm/mman.h b/arch/powerpc/include/asm/mman.h
index 7c07728af300..7cb6d18f5cd6 100644
--- a/arch/powerpc/include/asm/mman.h
+++ b/arch/powerpc/include/asm/mman.h
@@ -13,20 +13,43 @@
#include <linux/pkeys.h>
#include <asm/cpu_has_feature.h>
-#ifdef CONFIG_PPC_MEM_KEYS
static inline unsigned long arch_calc_vm_prot_bits(unsigned long prot,
unsigned long pkey)
{
- return pkey_to_vmflag_bits(pkey);
+#ifdef CONFIG_PPC_MEM_KEYS
+ return (((prot & PROT_SAO) ? VM_SAO : 0) | pkey_to_vmflag_bits(pkey));
+#else
+ return ((prot & PROT_SAO) ? VM_SAO : 0);
+#endif
}
#define arch_calc_vm_prot_bits(prot, pkey) arch_calc_vm_prot_bits(prot, pkey)
static inline pgprot_t arch_vm_get_page_prot(unsigned long vm_flags)
{
- return __pgprot(vmflag_to_pte_pkey_bits(vm_flags));
+#ifdef CONFIG_PPC_MEM_KEYS
+ return (vm_flags & VM_SAO) ?
+ __pgprot(_PAGE_SAO | vmflag_to_pte_pkey_bits(vm_flags)) :
+ __pgprot(0 | vmflag_to_pte_pkey_bits(vm_flags));
+#else
+ return (vm_flags & VM_SAO) ? __pgprot(_PAGE_SAO) : __pgprot(0);
+#endif
}
#define arch_vm_get_page_prot(vm_flags) arch_vm_get_page_prot(vm_flags)
-#endif
+
+static inline bool arch_validate_prot(unsigned long prot, unsigned long addr)
+{
+ if (prot & ~(PROT_READ | PROT_WRITE | PROT_EXEC | PROT_SEM | PROT_SAO))
+ return false;
+ if (prot & PROT_SAO) {
+ if (!cpu_has_feature(CPU_FTR_SAO))
+ return false;
+ if (firmware_has_feature(FW_FEATURE_LPAR) &&
+ !IS_ENABLED(CONFIG_PPC_PROT_SAO_LPAR))
+ return false;
+ }
+ return true;
+}
+#define arch_validate_prot arch_validate_prot
#endif /* CONFIG_PPC64 */
#endif /* _ASM_POWERPC_MMAN_H */
diff --git a/arch/powerpc/include/asm/nohash/64/pgtable.h b/arch/powerpc/include/asm/nohash/64/pgtable.h
index 59ee9fa4ae09..6cb8aa357191 100644
--- a/arch/powerpc/include/asm/nohash/64/pgtable.h
+++ b/arch/powerpc/include/asm/nohash/64/pgtable.h
@@ -82,6 +82,8 @@
*/
#include <asm/nohash/pte-book3e.h>
+#define _PAGE_SAO 0
+
#define PTE_RPN_MASK (~((1UL << PTE_RPN_SHIFT) - 1))
/*
diff --git a/arch/powerpc/include/asm/perf_event.h b/arch/powerpc/include/asm/perf_event.h
index 1e8b2e1ec1db..daec64d41b44 100644
--- a/arch/powerpc/include/asm/perf_event.h
+++ b/arch/powerpc/include/asm/perf_event.h
@@ -40,4 +40,7 @@ static inline bool is_sier_available(void) { return false; }
/* To support perf_regs sier update */
extern bool is_sier_available(void);
+/* To define perf extended regs mask value */
+extern u64 PERF_REG_EXTENDED_MASK;
+#define PERF_REG_EXTENDED_MASK PERF_REG_EXTENDED_MASK
#endif
diff --git a/arch/powerpc/include/asm/perf_event_server.h b/arch/powerpc/include/asm/perf_event_server.h
index 86c9eb064b22..f6acabb6c9be 100644
--- a/arch/powerpc/include/asm/perf_event_server.h
+++ b/arch/powerpc/include/asm/perf_event_server.h
@@ -62,6 +62,11 @@ struct power_pmu {
int *blacklist_ev;
/* BHRB entries in the PMU */
int bhrb_nr;
+ /*
+ * set this flag with `PERF_PMU_CAP_EXTENDED_REGS` if
+ * the pmu supports extended perf regs capability
+ */
+ int capabilities;
};
/*
diff --git a/arch/powerpc/include/asm/reg.h b/arch/powerpc/include/asm/reg.h
index 41419f1fc00f..88fb88491fe9 100644
--- a/arch/powerpc/include/asm/reg.h
+++ b/arch/powerpc/include/asm/reg.h
@@ -474,7 +474,8 @@
#ifndef SPRN_LPID
#define SPRN_LPID 0x13F /* Logical Partition Identifier */
#endif
-#define LPID_RSVD 0x3ff /* Reserved LPID for partn switching */
+#define LPID_RSVD_POWER7 0x3ff /* Reserved LPID for partn switching */
+#define LPID_RSVD 0xfff /* Reserved LPID for partn switching */
#define SPRN_HMER 0x150 /* Hypervisor maintenance exception reg */
#define HMER_DEBUG_TRIG (1ul << (63 - 17)) /* Debug trigger */
#define SPRN_HMEER 0x151 /* Hyp maintenance exception enable reg */
@@ -1362,6 +1363,7 @@
#define PVR_ARCH_206p 0x0f100003
#define PVR_ARCH_207 0x0f000004
#define PVR_ARCH_300 0x0f000005
+#define PVR_ARCH_31 0x0f000006
/* Macros for setting and retrieving special purpose registers */
#ifndef __ASSEMBLY__
diff --git a/arch/powerpc/include/asm/uaccess.h b/arch/powerpc/include/asm/uaccess.h
index 64c04ab09112..00699903f1ef 100644
--- a/arch/powerpc/include/asm/uaccess.h
+++ b/arch/powerpc/include/asm/uaccess.h
@@ -38,8 +38,7 @@ static inline void set_fs(mm_segment_t fs)
set_thread_flag(TIF_FSCHECK);
}
-#define segment_eq(a, b) ((a).seg == (b).seg)
-
+#define uaccess_kernel() (get_fs().seg == KERNEL_DS.seg)
#define user_addr_max() (get_fs().seg)
#ifdef __powerpc64__
diff --git a/arch/powerpc/include/uapi/asm/mman.h b/arch/powerpc/include/uapi/asm/mman.h
index 3a700351feca..c0c737215b00 100644
--- a/arch/powerpc/include/uapi/asm/mman.h
+++ b/arch/powerpc/include/uapi/asm/mman.h
@@ -11,7 +11,7 @@
#include <asm-generic/mman-common.h>
-#define PROT_SAO 0x10 /* Unsupported since v5.9 */
+#define PROT_SAO 0x10 /* Strong Access Ordering */
#define MAP_RENAME MAP_ANONYMOUS /* In SunOS terminology */
#define MAP_NORESERVE 0x40 /* don't reserve swap pages */
diff --git a/arch/powerpc/include/uapi/asm/perf_regs.h b/arch/powerpc/include/uapi/asm/perf_regs.h
index f599064dd8dc..bdf5f10f8b9f 100644
--- a/arch/powerpc/include/uapi/asm/perf_regs.h
+++ b/arch/powerpc/include/uapi/asm/perf_regs.h
@@ -48,6 +48,24 @@ enum perf_event_powerpc_regs {
PERF_REG_POWERPC_DSISR,
PERF_REG_POWERPC_SIER,
PERF_REG_POWERPC_MMCRA,
- PERF_REG_POWERPC_MAX,
+ /* Extended registers */
+ PERF_REG_POWERPC_MMCR0,
+ PERF_REG_POWERPC_MMCR1,
+ PERF_REG_POWERPC_MMCR2,
+ PERF_REG_POWERPC_MMCR3,
+ PERF_REG_POWERPC_SIER2,
+ PERF_REG_POWERPC_SIER3,
+ /* Max regs without the extended regs */
+ PERF_REG_POWERPC_MAX = PERF_REG_POWERPC_MMCRA + 1,
};
+
+#define PERF_REG_PMU_MASK ((1ULL << PERF_REG_POWERPC_MAX) - 1)
+
+/* PERF_REG_EXTENDED_MASK value for CPU_FTR_ARCH_300 */
+#define PERF_REG_PMU_MASK_300 (((1ULL << (PERF_REG_POWERPC_MMCR2 + 1)) - 1) - PERF_REG_PMU_MASK)
+/* PERF_REG_EXTENDED_MASK value for CPU_FTR_ARCH_31 */
+#define PERF_REG_PMU_MASK_31 (((1ULL << (PERF_REG_POWERPC_SIER3 + 1)) - 1) - PERF_REG_PMU_MASK)
+
+#define PERF_REG_MAX_ISA_300 (PERF_REG_POWERPC_MMCR2 + 1)
+#define PERF_REG_MAX_ISA_31 (PERF_REG_POWERPC_SIER3 + 1)
#endif /* _UAPI_ASM_POWERPC_PERF_REGS_H */
diff --git a/arch/powerpc/kernel/cputable.c b/arch/powerpc/kernel/cputable.c
index 3d406a9626e8..2aa89c6b2896 100644
--- a/arch/powerpc/kernel/cputable.c
+++ b/arch/powerpc/kernel/cputable.c
@@ -72,9 +72,6 @@ extern void __setup_cpu_power9(unsigned long offset, struct cpu_spec* spec);
extern void __restore_cpu_power9(void);
extern void __setup_cpu_power10(unsigned long offset, struct cpu_spec* spec);
extern void __restore_cpu_power10(void);
-extern long __machine_check_early_realmode_p7(struct pt_regs *regs);
-extern long __machine_check_early_realmode_p8(struct pt_regs *regs);
-extern long __machine_check_early_realmode_p9(struct pt_regs *regs);
#endif /* CONFIG_PPC64 */
#if defined(CONFIG_E500)
extern void __setup_cpu_e5500(unsigned long offset, struct cpu_spec* spec);
@@ -542,6 +539,25 @@ static struct cpu_spec __initdata cpu_specs[] = {
.machine_check_early = __machine_check_early_realmode_p9,
.platform = "power9",
},
+ { /* Power10 */
+ .pvr_mask = 0xffff0000,
+ .pvr_value = 0x00800000,
+ .cpu_name = "POWER10 (raw)",
+ .cpu_features = CPU_FTRS_POWER10,
+ .cpu_user_features = COMMON_USER_POWER10,
+ .cpu_user_features2 = COMMON_USER2_POWER10,
+ .mmu_features = MMU_FTRS_POWER10,
+ .icache_bsize = 128,
+ .dcache_bsize = 128,
+ .num_pmcs = 6,
+ .pmc_type = PPC_PMC_IBM,
+ .oprofile_cpu_type = "ppc64/power10",
+ .oprofile_type = PPC_OPROFILE_INVALID,
+ .cpu_setup = __setup_cpu_power10,
+ .cpu_restore = __restore_cpu_power10,
+ .machine_check_early = __machine_check_early_realmode_p10,
+ .platform = "power10",
+ },
{ /* Cell Broadband Engine */
.pvr_mask = 0xffff0000,
.pvr_value = 0x00700000,
diff --git a/arch/powerpc/kernel/dt_cpu_ftrs.c b/arch/powerpc/kernel/dt_cpu_ftrs.c
index 6f8c0c6b937a..f204ad79b6b5 100644
--- a/arch/powerpc/kernel/dt_cpu_ftrs.c
+++ b/arch/powerpc/kernel/dt_cpu_ftrs.c
@@ -64,10 +64,6 @@ struct dt_cpu_feature {
* Set up the base CPU
*/
-extern long __machine_check_early_realmode_p8(struct pt_regs *regs);
-extern long __machine_check_early_realmode_p9(struct pt_regs *regs);
-extern long __machine_check_early_realmode_p10(struct pt_regs *regs);
-
static int hv_mode;
static struct {
@@ -657,7 +653,7 @@ static struct dt_cpu_feature_match __initdata
{"processor-control-facility-v3", feat_enable_dbell, CPU_FTR_DBELL},
{"processor-utilization-of-resources-register", feat_enable_purr, 0},
{"no-execute", feat_enable, 0},
- /* strong-access-ordering is unused */
+ {"strong-access-ordering", feat_enable, CPU_FTR_SAO},
{"cache-inhibited-large-page", feat_enable_large_ci, 0},
{"coprocessor-icswx", feat_enable, 0},
{"hypervisor-virtualization-interrupt", feat_enable_hvi, 0},
diff --git a/arch/powerpc/kernel/entry_64.S b/arch/powerpc/kernel/entry_64.S
index 33a42e42c56f..733e40eba4eb 100644
--- a/arch/powerpc/kernel/entry_64.S
+++ b/arch/powerpc/kernel/entry_64.S
@@ -113,6 +113,10 @@ END_FTR_SECTION_IFSET(CPU_FTR_TM)
ld r11,exception_marker@toc(r2)
std r11,-16(r10) /* "regshere" marker */
+BEGIN_FTR_SECTION
+ HMT_MEDIUM
+END_FTR_SECTION_IFSET(CPU_FTR_HAS_PPR)
+
/*
* RECONCILE_IRQ_STATE without calling trace_hardirqs_off(), which
* would clobber syscall parameters. Also we always enter with IRQs
diff --git a/arch/powerpc/kernel/iomap.c b/arch/powerpc/kernel/iomap.c
index 5ac84efc6ede..9fe4fb3b08aa 100644
--- a/arch/powerpc/kernel/iomap.c
+++ b/arch/powerpc/kernel/iomap.c
@@ -15,23 +15,23 @@
* Here comes the ppc64 implementation of the IOMAP
* interfaces.
*/
-unsigned int ioread8(void __iomem *addr)
+unsigned int ioread8(const void __iomem *addr)
{
return readb(addr);
}
-unsigned int ioread16(void __iomem *addr)
+unsigned int ioread16(const void __iomem *addr)
{
return readw(addr);
}
-unsigned int ioread16be(void __iomem *addr)
+unsigned int ioread16be(const void __iomem *addr)
{
return readw_be(addr);
}
-unsigned int ioread32(void __iomem *addr)
+unsigned int ioread32(const void __iomem *addr)
{
return readl(addr);
}
-unsigned int ioread32be(void __iomem *addr)
+unsigned int ioread32be(const void __iomem *addr)
{
return readl_be(addr);
}
@@ -41,27 +41,27 @@ EXPORT_SYMBOL(ioread16be);
EXPORT_SYMBOL(ioread32);
EXPORT_SYMBOL(ioread32be);
#ifdef __powerpc64__
-u64 ioread64(void __iomem *addr)
+u64 ioread64(const void __iomem *addr)
{
return readq(addr);
}
-u64 ioread64_lo_hi(void __iomem *addr)
+u64 ioread64_lo_hi(const void __iomem *addr)
{
return readq(addr);
}
-u64 ioread64_hi_lo(void __iomem *addr)
+u64 ioread64_hi_lo(const void __iomem *addr)
{
return readq(addr);
}
-u64 ioread64be(void __iomem *addr)
+u64 ioread64be(const void __iomem *addr)
{
return readq_be(addr);
}
-u64 ioread64be_lo_hi(void __iomem *addr)
+u64 ioread64be_lo_hi(const void __iomem *addr)
{
return readq_be(addr);
}
-u64 ioread64be_hi_lo(void __iomem *addr)
+u64 ioread64be_hi_lo(const void __iomem *addr)
{
return readq_be(addr);
}
@@ -139,15 +139,15 @@ EXPORT_SYMBOL(iowrite64be_hi_lo);
* FIXME! We could make these do EEH handling if we really
* wanted. Not clear if we do.
*/
-void ioread8_rep(void __iomem *addr, void *dst, unsigned long count)
+void ioread8_rep(const void __iomem *addr, void *dst, unsigned long count)
{
readsb(addr, dst, count);
}
-void ioread16_rep(void __iomem *addr, void *dst, unsigned long count)
+void ioread16_rep(const void __iomem *addr, void *dst, unsigned long count)
{
readsw(addr, dst, count);
}
-void ioread32_rep(void __iomem *addr, void *dst, unsigned long count)
+void ioread32_rep(const void __iomem *addr, void *dst, unsigned long count)
{
readsl(addr, dst, count);
}
diff --git a/arch/powerpc/kernel/process.c b/arch/powerpc/kernel/process.c
index 016bd831908e..73a57043ee66 100644
--- a/arch/powerpc/kernel/process.c
+++ b/arch/powerpc/kernel/process.c
@@ -548,7 +548,7 @@ void notrace restore_math(struct pt_regs *regs)
* are live for the user thread).
*/
if ((!(msr & MSR_FP)) && should_restore_fp())
- new_msr |= MSR_FP | current->thread.fpexc_mode;
+ new_msr |= MSR_FP;
if ((!(msr & MSR_VEC)) && should_restore_altivec())
new_msr |= MSR_VEC;
@@ -559,11 +559,17 @@ void notrace restore_math(struct pt_regs *regs)
}
if (new_msr) {
+ unsigned long fpexc_mode = 0;
+
msr_check_and_set(new_msr);
- if (new_msr & MSR_FP)
+ if (new_msr & MSR_FP) {
do_restore_fp();
+ // This also covers VSX, because VSX implies FP
+ fpexc_mode = current->thread.fpexc_mode;
+ }
+
if (new_msr & MSR_VEC)
do_restore_altivec();
@@ -572,7 +578,7 @@ void notrace restore_math(struct pt_regs *regs)
msr_check_and_clear(new_msr);
- regs->msr |= new_msr;
+ regs->msr |= new_msr | fpexc_mode;
}
}
#endif
diff --git a/arch/powerpc/kernel/setup-common.c b/arch/powerpc/kernel/setup-common.c
index b198b0ff25bc..808ec9fab605 100644
--- a/arch/powerpc/kernel/setup-common.c
+++ b/arch/powerpc/kernel/setup-common.c
@@ -311,6 +311,7 @@ static int show_cpuinfo(struct seq_file *m, void *v)
min = pvr & 0xFF;
break;
case 0x004e: /* POWER9 bits 12-15 give chip type */
+ case 0x0080: /* POWER10 bit 12 gives SMT8/4 */
maj = (pvr >> 8) & 0x0F;
min = pvr & 0xFF;
break;
diff --git a/arch/powerpc/kernel/syscalls/syscall.tbl b/arch/powerpc/kernel/syscalls/syscall.tbl
index be9f74546068..c2d737ff2e7b 100644
--- a/arch/powerpc/kernel/syscalls/syscall.tbl
+++ b/arch/powerpc/kernel/syscalls/syscall.tbl
@@ -197,7 +197,7 @@
146 common writev sys_writev compat_sys_writev
147 common getsid sys_getsid
148 common fdatasync sys_fdatasync
-149 nospu _sysctl sys_sysctl compat_sys_sysctl
+149 nospu _sysctl sys_ni_syscall
150 common mlock sys_mlock
151 common munlock sys_munlock
152 common mlockall sys_mlockall
diff --git a/arch/powerpc/kvm/book3s.c b/arch/powerpc/kvm/book3s.c
index 41fedec69ac3..49db50d1db04 100644
--- a/arch/powerpc/kvm/book3s.c
+++ b/arch/powerpc/kvm/book3s.c
@@ -834,7 +834,8 @@ void kvmppc_core_commit_memory_region(struct kvm *kvm,
kvm->arch.kvm_ops->commit_memory_region(kvm, mem, old, new, change);
}
-int kvm_unmap_hva_range(struct kvm *kvm, unsigned long start, unsigned long end)
+int kvm_unmap_hva_range(struct kvm *kvm, unsigned long start, unsigned long end,
+ unsigned flags)
{
return kvm->arch.kvm_ops->unmap_hva_range(kvm, start, end);
}
diff --git a/arch/powerpc/kvm/book3s_64_mmu_hv.c b/arch/powerpc/kvm/book3s_64_mmu_hv.c
index 7c5a1812a1c3..38ea396a23d6 100644
--- a/arch/powerpc/kvm/book3s_64_mmu_hv.c
+++ b/arch/powerpc/kvm/book3s_64_mmu_hv.c
@@ -260,11 +260,15 @@ int kvmppc_mmu_hv_init(void)
if (!mmu_has_feature(MMU_FTR_LOCKLESS_TLBIE))
return -EINVAL;
- /* POWER7 has 10-bit LPIDs (12-bit in POWER8) */
host_lpid = 0;
if (cpu_has_feature(CPU_FTR_HVMODE))
host_lpid = mfspr(SPRN_LPID);
- rsvd_lpid = LPID_RSVD;
+
+ /* POWER8 and above have 12-bit LPIDs (10-bit in POWER7) */
+ if (cpu_has_feature(CPU_FTR_ARCH_207S))
+ rsvd_lpid = LPID_RSVD;
+ else
+ rsvd_lpid = LPID_RSVD_POWER7;
kvmppc_init_lpid(rsvd_lpid + 1);
diff --git a/arch/powerpc/kvm/book3s_64_mmu_radix.c b/arch/powerpc/kvm/book3s_64_mmu_radix.c
index 777aa5625d5f..22a677b18695 100644
--- a/arch/powerpc/kvm/book3s_64_mmu_radix.c
+++ b/arch/powerpc/kvm/book3s_64_mmu_radix.c
@@ -161,7 +161,9 @@ int kvmppc_mmu_walk_radix_tree(struct kvm_vcpu *vcpu, gva_t eaddr,
return -EINVAL;
/* Read the entry from guest memory */
addr = base + (index * sizeof(rpte));
+ vcpu->srcu_idx = srcu_read_lock(&kvm->srcu);
ret = kvm_read_guest(kvm, addr, &rpte, sizeof(rpte));
+ srcu_read_unlock(&kvm->srcu, vcpu->srcu_idx);
if (ret) {
if (pte_ret_p)
*pte_ret_p = addr;
@@ -237,7 +239,9 @@ int kvmppc_mmu_radix_translate_table(struct kvm_vcpu *vcpu, gva_t eaddr,
/* Read the table to find the root of the radix tree */
ptbl = (table & PRTB_MASK) + (table_index * sizeof(entry));
+ vcpu->srcu_idx = srcu_read_lock(&kvm->srcu);
ret = kvm_read_guest(kvm, ptbl, &entry, sizeof(entry));
+ srcu_read_unlock(&kvm->srcu, vcpu->srcu_idx);
if (ret)
return ret;
diff --git a/arch/powerpc/kvm/book3s_hv.c b/arch/powerpc/kvm/book3s_hv.c
index 0f83f39a2bd2..4ba06a2a306c 100644
--- a/arch/powerpc/kvm/book3s_hv.c
+++ b/arch/powerpc/kvm/book3s_hv.c
@@ -343,13 +343,18 @@ static void kvmppc_set_pvr_hv(struct kvm_vcpu *vcpu, u32 pvr)
vcpu->arch.pvr = pvr;
}
+/* Dummy value used in computing PCR value below */
+#define PCR_ARCH_31 (PCR_ARCH_300 << 1)
+
static int kvmppc_set_arch_compat(struct kvm_vcpu *vcpu, u32 arch_compat)
{
unsigned long host_pcr_bit = 0, guest_pcr_bit = 0;
struct kvmppc_vcore *vc = vcpu->arch.vcore;
/* We can (emulate) our own architecture version and anything older */
- if (cpu_has_feature(CPU_FTR_ARCH_300))
+ if (cpu_has_feature(CPU_FTR_ARCH_31))
+ host_pcr_bit = PCR_ARCH_31;
+ else if (cpu_has_feature(CPU_FTR_ARCH_300))
host_pcr_bit = PCR_ARCH_300;
else if (cpu_has_feature(CPU_FTR_ARCH_207S))
host_pcr_bit = PCR_ARCH_207;
@@ -375,6 +380,9 @@ static int kvmppc_set_arch_compat(struct kvm_vcpu *vcpu, u32 arch_compat)
case PVR_ARCH_300:
guest_pcr_bit = PCR_ARCH_300;
break;
+ case PVR_ARCH_31:
+ guest_pcr_bit = PCR_ARCH_31;
+ break;
default:
return -EINVAL;
}
@@ -2355,7 +2363,7 @@ static int kvmppc_core_vcpu_create_hv(struct kvm_vcpu *vcpu)
* to trap and then we emulate them.
*/
vcpu->arch.hfscr = HFSCR_TAR | HFSCR_EBB | HFSCR_PM | HFSCR_BHRB |
- HFSCR_DSCR | HFSCR_VECVSX | HFSCR_FP;
+ HFSCR_DSCR | HFSCR_VECVSX | HFSCR_FP | HFSCR_PREFIX;
if (cpu_has_feature(CPU_FTR_HVMODE)) {
vcpu->arch.hfscr &= mfspr(SPRN_HFSCR);
if (cpu_has_feature(CPU_FTR_P9_TM_HV_ASSIST))
@@ -4552,16 +4560,14 @@ static void kvmppc_core_commit_memory_region_hv(struct kvm *kvm,
switch (change) {
case KVM_MR_CREATE:
- if (kvmppc_uvmem_slot_init(kvm, new))
- return;
- uv_register_mem_slot(kvm->arch.lpid,
- new->base_gfn << PAGE_SHIFT,
- new->npages * PAGE_SIZE,
- 0, new->id);
+ /*
+ * @TODO kvmppc_uvmem_memslot_create() can fail and
+ * return error. Fix this.
+ */
+ kvmppc_uvmem_memslot_create(kvm, new);
break;
case KVM_MR_DELETE:
- uv_unregister_mem_slot(kvm->arch.lpid, old->id);
- kvmppc_uvmem_slot_free(kvm, old);
+ kvmppc_uvmem_memslot_delete(kvm, old);
break;
default:
/* TODO: Handle KVM_MR_MOVE */
diff --git a/arch/powerpc/kvm/book3s_hv_nested.c b/arch/powerpc/kvm/book3s_hv_nested.c
index 2c849a65db77..6822d23a2da4 100644
--- a/arch/powerpc/kvm/book3s_hv_nested.c
+++ b/arch/powerpc/kvm/book3s_hv_nested.c
@@ -233,20 +233,21 @@ long kvmhv_enter_nested_guest(struct kvm_vcpu *vcpu)
/* copy parameters in */
hv_ptr = kvmppc_get_gpr(vcpu, 4);
+ regs_ptr = kvmppc_get_gpr(vcpu, 5);
+ vcpu->srcu_idx = srcu_read_lock(&vcpu->kvm->srcu);
err = kvm_vcpu_read_guest(vcpu, hv_ptr, &l2_hv,
- sizeof(struct hv_guest_state));
+ sizeof(struct hv_guest_state)) ||
+ kvm_vcpu_read_guest(vcpu, regs_ptr, &l2_regs,
+ sizeof(struct pt_regs));
+ srcu_read_unlock(&vcpu->kvm->srcu, vcpu->srcu_idx);
if (err)
return H_PARAMETER;
+
if (kvmppc_need_byteswap(vcpu))
byteswap_hv_regs(&l2_hv);
if (l2_hv.version != HV_GUEST_STATE_VERSION)
return H_P2;
- regs_ptr = kvmppc_get_gpr(vcpu, 5);
- err = kvm_vcpu_read_guest(vcpu, regs_ptr, &l2_regs,
- sizeof(struct pt_regs));
- if (err)
- return H_PARAMETER;
if (kvmppc_need_byteswap(vcpu))
byteswap_pt_regs(&l2_regs);
if (l2_hv.vcpu_token >= NR_CPUS)
@@ -323,12 +324,12 @@ long kvmhv_enter_nested_guest(struct kvm_vcpu *vcpu)
byteswap_hv_regs(&l2_hv);
byteswap_pt_regs(&l2_regs);
}
+ vcpu->srcu_idx = srcu_read_lock(&vcpu->kvm->srcu);
err = kvm_vcpu_write_guest(vcpu, hv_ptr, &l2_hv,
- sizeof(struct hv_guest_state));
- if (err)
- return H_AUTHORITY;
- err = kvm_vcpu_write_guest(vcpu, regs_ptr, &l2_regs,
+ sizeof(struct hv_guest_state)) ||
+ kvm_vcpu_write_guest(vcpu, regs_ptr, &l2_regs,
sizeof(struct pt_regs));
+ srcu_read_unlock(&vcpu->kvm->srcu, vcpu->srcu_idx);
if (err)
return H_AUTHORITY;
@@ -508,12 +509,16 @@ long kvmhv_copy_tofrom_guest_nested(struct kvm_vcpu *vcpu)
goto not_found;
/* Write what was loaded into our buffer back to the L1 guest */
+ vcpu->srcu_idx = srcu_read_lock(&vcpu->kvm->srcu);
rc = kvm_vcpu_write_guest(vcpu, gp_to, buf, n);
+ srcu_read_unlock(&vcpu->kvm->srcu, vcpu->srcu_idx);
if (rc)
goto not_found;
} else {
/* Load the data to be stored from the L1 guest into our buf */
+ vcpu->srcu_idx = srcu_read_lock(&vcpu->kvm->srcu);
rc = kvm_vcpu_read_guest(vcpu, gp_from, buf, n);
+ srcu_read_unlock(&vcpu->kvm->srcu, vcpu->srcu_idx);
if (rc)
goto not_found;
@@ -548,9 +553,12 @@ static void kvmhv_update_ptbl_cache(struct kvm_nested_guest *gp)
ret = -EFAULT;
ptbl_addr = (kvm->arch.l1_ptcr & PRTB_MASK) + (gp->l1_lpid << 4);
- if (gp->l1_lpid < (1ul << ((kvm->arch.l1_ptcr & PRTS_MASK) + 8)))
+ if (gp->l1_lpid < (1ul << ((kvm->arch.l1_ptcr & PRTS_MASK) + 8))) {
+ int srcu_idx = srcu_read_lock(&kvm->srcu);
ret = kvm_read_guest(kvm, ptbl_addr,
&ptbl_entry, sizeof(ptbl_entry));
+ srcu_read_unlock(&kvm->srcu, srcu_idx);
+ }
if (ret) {
gp->l1_gr_to_hr = 0;
gp->process_table = 0;
diff --git a/arch/powerpc/kvm/book3s_hv_uvmem.c b/arch/powerpc/kvm/book3s_hv_uvmem.c
index 6850bd04bcb9..7705d5557239 100644
--- a/arch/powerpc/kvm/book3s_hv_uvmem.c
+++ b/arch/powerpc/kvm/book3s_hv_uvmem.c
@@ -93,12 +93,133 @@
#include <asm/ultravisor.h>
#include <asm/mman.h>
#include <asm/kvm_ppc.h>
+#include <asm/kvm_book3s_uvmem.h>
static struct dev_pagemap kvmppc_uvmem_pgmap;
static unsigned long *kvmppc_uvmem_bitmap;
static DEFINE_SPINLOCK(kvmppc_uvmem_bitmap_lock);
-#define KVMPPC_UVMEM_PFN (1UL << 63)
+/*
+ * States of a GFN
+ * ---------------
+ * The GFN can be in one of the following states.
+ *
+ * (a) Secure - The GFN is secure. The GFN is associated with
+ * a Secure VM, the contents of the GFN is not accessible
+ * to the Hypervisor. This GFN can be backed by a secure-PFN,
+ * or can be backed by a normal-PFN with contents encrypted.
+ * The former is true when the GFN is paged-in into the
+ * ultravisor. The latter is true when the GFN is paged-out
+ * of the ultravisor.
+ *
+ * (b) Shared - The GFN is shared. The GFN is associated with a
+ * a secure VM. The contents of the GFN is accessible to
+ * Hypervisor. This GFN is backed by a normal-PFN and its
+ * content is un-encrypted.
+ *
+ * (c) Normal - The GFN is a normal. The GFN is associated with
+ * a normal VM. The contents of the GFN is accesible to
+ * the Hypervisor. Its content is never encrypted.
+ *
+ * States of a VM.
+ * ---------------
+ *
+ * Normal VM: A VM whose contents are always accessible to
+ * the hypervisor. All its GFNs are normal-GFNs.
+ *
+ * Secure VM: A VM whose contents are not accessible to the
+ * hypervisor without the VM's consent. Its GFNs are
+ * either Shared-GFN or Secure-GFNs.
+ *
+ * Transient VM: A Normal VM that is transitioning to secure VM.
+ * The transition starts on successful return of
+ * H_SVM_INIT_START, and ends on successful return
+ * of H_SVM_INIT_DONE. This transient VM, can have GFNs
+ * in any of the three states; i.e Secure-GFN, Shared-GFN,
+ * and Normal-GFN. The VM never executes in this state
+ * in supervisor-mode.
+ *
+ * Memory slot State.
+ * -----------------------------
+ * The state of a memory slot mirrors the state of the
+ * VM the memory slot is associated with.
+ *
+ * VM State transition.
+ * --------------------
+ *
+ * A VM always starts in Normal Mode.
+ *
+ * H_SVM_INIT_START moves the VM into transient state. During this
+ * time the Ultravisor may request some of its GFNs to be shared or
+ * secured. So its GFNs can be in one of the three GFN states.
+ *
+ * H_SVM_INIT_DONE moves the VM entirely from transient state to
+ * secure-state. At this point any left-over normal-GFNs are
+ * transitioned to Secure-GFN.
+ *
+ * H_SVM_INIT_ABORT moves the transient VM back to normal VM.
+ * All its GFNs are moved to Normal-GFNs.
+ *
+ * UV_TERMINATE transitions the secure-VM back to normal-VM. All
+ * the secure-GFN and shared-GFNs are tranistioned to normal-GFN
+ * Note: The contents of the normal-GFN is undefined at this point.
+ *
+ * GFN state implementation:
+ * -------------------------
+ *
+ * Secure GFN is associated with a secure-PFN; also called uvmem_pfn,
+ * when the GFN is paged-in. Its pfn[] has KVMPPC_GFN_UVMEM_PFN flag
+ * set, and contains the value of the secure-PFN.
+ * It is associated with a normal-PFN; also called mem_pfn, when
+ * the GFN is pagedout. Its pfn[] has KVMPPC_GFN_MEM_PFN flag set.
+ * The value of the normal-PFN is not tracked.
+ *
+ * Shared GFN is associated with a normal-PFN. Its pfn[] has
+ * KVMPPC_UVMEM_SHARED_PFN flag set. The value of the normal-PFN
+ * is not tracked.
+ *
+ * Normal GFN is associated with normal-PFN. Its pfn[] has
+ * no flag set. The value of the normal-PFN is not tracked.
+ *
+ * Life cycle of a GFN
+ * --------------------
+ *
+ * --------------------------------------------------------------
+ * | | Share | Unshare | SVM |H_SVM_INIT_DONE|
+ * | |operation |operation | abort/ | |
+ * | | | | terminate | |
+ * -------------------------------------------------------------
+ * | | | | | |
+ * | Secure | Shared | Secure |Normal |Secure |
+ * | | | | | |
+ * | Shared | Shared | Secure |Normal |Shared |
+ * | | | | | |
+ * | Normal | Shared | Secure |Normal |Secure |
+ * --------------------------------------------------------------
+ *
+ * Life cycle of a VM
+ * --------------------
+ *
+ * --------------------------------------------------------------------
+ * | | start | H_SVM_ |H_SVM_ |H_SVM_ |UV_SVM_ |
+ * | | VM |INIT_START|INIT_DONE|INIT_ABORT |TERMINATE |
+ * | | | | | | |
+ * --------- ----------------------------------------------------------
+ * | | | | | | |
+ * | Normal | Normal | Transient|Error |Error |Normal |
+ * | | | | | | |
+ * | Secure | Error | Error |Error |Error |Normal |
+ * | | | | | | |
+ * |Transient| N/A | Error |Secure |Normal |Normal |
+ * --------------------------------------------------------------------
+ */
+
+#define KVMPPC_GFN_UVMEM_PFN (1UL << 63)
+#define KVMPPC_GFN_MEM_PFN (1UL << 62)
+#define KVMPPC_GFN_SHARED (1UL << 61)
+#define KVMPPC_GFN_SECURE (KVMPPC_GFN_UVMEM_PFN | KVMPPC_GFN_MEM_PFN)
+#define KVMPPC_GFN_FLAG_MASK (KVMPPC_GFN_SECURE | KVMPPC_GFN_SHARED)
+#define KVMPPC_GFN_PFN_MASK (~KVMPPC_GFN_FLAG_MASK)
struct kvmppc_uvmem_slot {
struct list_head list;
@@ -106,11 +227,11 @@ struct kvmppc_uvmem_slot {
unsigned long base_pfn;
unsigned long *pfns;
};
-
struct kvmppc_uvmem_page_pvt {
struct kvm *kvm;
unsigned long gpa;
bool skip_page_out;
+ bool remove_gfn;
};
bool kvmppc_uvmem_available(void)
@@ -163,8 +284,8 @@ void kvmppc_uvmem_slot_free(struct kvm *kvm, const struct kvm_memory_slot *slot)
mutex_unlock(&kvm->arch.uvmem_lock);
}
-static void kvmppc_uvmem_pfn_insert(unsigned long gfn, unsigned long uvmem_pfn,
- struct kvm *kvm)
+static void kvmppc_mark_gfn(unsigned long gfn, struct kvm *kvm,
+ unsigned long flag, unsigned long uvmem_pfn)
{
struct kvmppc_uvmem_slot *p;
@@ -172,24 +293,41 @@ static void kvmppc_uvmem_pfn_insert(unsigned long gfn, unsigned long uvmem_pfn,
if (gfn >= p->base_pfn && gfn < p->base_pfn + p->nr_pfns) {
unsigned long index = gfn - p->base_pfn;
- p->pfns[index] = uvmem_pfn | KVMPPC_UVMEM_PFN;
+ if (flag == KVMPPC_GFN_UVMEM_PFN)
+ p->pfns[index] = uvmem_pfn | flag;
+ else
+ p->pfns[index] = flag;
return;
}
}
}
-static void kvmppc_uvmem_pfn_remove(unsigned long gfn, struct kvm *kvm)
+/* mark the GFN as secure-GFN associated with @uvmem pfn device-PFN. */
+static void kvmppc_gfn_secure_uvmem_pfn(unsigned long gfn,
+ unsigned long uvmem_pfn, struct kvm *kvm)
{
- struct kvmppc_uvmem_slot *p;
+ kvmppc_mark_gfn(gfn, kvm, KVMPPC_GFN_UVMEM_PFN, uvmem_pfn);
+}
- list_for_each_entry(p, &kvm->arch.uvmem_pfns, list) {
- if (gfn >= p->base_pfn && gfn < p->base_pfn + p->nr_pfns) {
- p->pfns[gfn - p->base_pfn] = 0;
- return;
- }
- }
+/* mark the GFN as secure-GFN associated with a memory-PFN. */
+static void kvmppc_gfn_secure_mem_pfn(unsigned long gfn, struct kvm *kvm)
+{
+ kvmppc_mark_gfn(gfn, kvm, KVMPPC_GFN_MEM_PFN, 0);
+}
+
+/* mark the GFN as a shared GFN. */
+static void kvmppc_gfn_shared(unsigned long gfn, struct kvm *kvm)
+{
+ kvmppc_mark_gfn(gfn, kvm, KVMPPC_GFN_SHARED, 0);
+}
+
+/* mark the GFN as a non-existent GFN. */
+static void kvmppc_gfn_remove(unsigned long gfn, struct kvm *kvm)
+{
+ kvmppc_mark_gfn(gfn, kvm, 0, 0);
}
+/* return true, if the GFN is a secure-GFN backed by a secure-PFN */
static bool kvmppc_gfn_is_uvmem_pfn(unsigned long gfn, struct kvm *kvm,
unsigned long *uvmem_pfn)
{
@@ -199,10 +337,10 @@ static bool kvmppc_gfn_is_uvmem_pfn(unsigned long gfn, struct kvm *kvm,
if (gfn >= p->base_pfn && gfn < p->base_pfn + p->nr_pfns) {
unsigned long index = gfn - p->base_pfn;
- if (p->pfns[index] & KVMPPC_UVMEM_PFN) {
+ if (p->pfns[index] & KVMPPC_GFN_UVMEM_PFN) {
if (uvmem_pfn)
*uvmem_pfn = p->pfns[index] &
- ~KVMPPC_UVMEM_PFN;
+ KVMPPC_GFN_PFN_MASK;
return true;
} else
return false;
@@ -211,10 +349,114 @@ static bool kvmppc_gfn_is_uvmem_pfn(unsigned long gfn, struct kvm *kvm,
return false;
}
+/*
+ * starting from *gfn search for the next available GFN that is not yet
+ * transitioned to a secure GFN. return the value of that GFN in *gfn. If a
+ * GFN is found, return true, else return false
+ *
+ * Must be called with kvm->arch.uvmem_lock held.
+ */
+static bool kvmppc_next_nontransitioned_gfn(const struct kvm_memory_slot *memslot,
+ struct kvm *kvm, unsigned long *gfn)
+{
+ struct kvmppc_uvmem_slot *p;
+ bool ret = false;
+ unsigned long i;
+
+ list_for_each_entry(p, &kvm->arch.uvmem_pfns, list)
+ if (*gfn >= p->base_pfn && *gfn < p->base_pfn + p->nr_pfns)
+ break;
+ if (!p)
+ return ret;
+ /*
+ * The code below assumes, one to one correspondence between
+ * kvmppc_uvmem_slot and memslot.
+ */
+ for (i = *gfn; i < p->base_pfn + p->nr_pfns; i++) {
+ unsigned long index = i - p->base_pfn;
+
+ if (!(p->pfns[index] & KVMPPC_GFN_FLAG_MASK)) {
+ *gfn = i;
+ ret = true;
+ break;
+ }
+ }
+ return ret;
+}
+
+static int kvmppc_memslot_page_merge(struct kvm *kvm,
+ const struct kvm_memory_slot *memslot, bool merge)
+{
+ unsigned long gfn = memslot->base_gfn;
+ unsigned long end, start = gfn_to_hva(kvm, gfn);
+ int ret = 0;
+ struct vm_area_struct *vma;
+ int merge_flag = (merge) ? MADV_MERGEABLE : MADV_UNMERGEABLE;
+
+ if (kvm_is_error_hva(start))
+ return H_STATE;
+
+ end = start + (memslot->npages << PAGE_SHIFT);
+
+ mmap_write_lock(kvm->mm);
+ do {
+ vma = find_vma_intersection(kvm->mm, start, end);
+ if (!vma) {
+ ret = H_STATE;
+ break;
+ }
+ ret = ksm_madvise(vma, vma->vm_start, vma->vm_end,
+ merge_flag, &vma->vm_flags);
+ if (ret) {
+ ret = H_STATE;
+ break;
+ }
+ start = vma->vm_end;
+ } while (end > vma->vm_end);
+
+ mmap_write_unlock(kvm->mm);
+ return ret;
+}
+
+static void __kvmppc_uvmem_memslot_delete(struct kvm *kvm,
+ const struct kvm_memory_slot *memslot)
+{
+ uv_unregister_mem_slot(kvm->arch.lpid, memslot->id);
+ kvmppc_uvmem_slot_free(kvm, memslot);
+ kvmppc_memslot_page_merge(kvm, memslot, true);
+}
+
+static int __kvmppc_uvmem_memslot_create(struct kvm *kvm,
+ const struct kvm_memory_slot *memslot)
+{
+ int ret = H_PARAMETER;
+
+ if (kvmppc_memslot_page_merge(kvm, memslot, false))
+ return ret;
+
+ if (kvmppc_uvmem_slot_init(kvm, memslot))
+ goto out1;
+
+ ret = uv_register_mem_slot(kvm->arch.lpid,
+ memslot->base_gfn << PAGE_SHIFT,
+ memslot->npages * PAGE_SIZE,
+ 0, memslot->id);
+ if (ret < 0) {
+ ret = H_PARAMETER;
+ goto out;
+ }
+ return 0;
+out:
+ kvmppc_uvmem_slot_free(kvm, memslot);
+out1:
+ kvmppc_memslot_page_merge(kvm, memslot, true);
+ return ret;
+}
+
unsigned long kvmppc_h_svm_init_start(struct kvm *kvm)
{
struct kvm_memslots *slots;
- struct kvm_memory_slot *memslot;
+ struct kvm_memory_slot *memslot, *m;
int ret = H_SUCCESS;
int srcu_idx;
@@ -232,35 +474,117 @@ unsigned long kvmppc_h_svm_init_start(struct kvm *kvm)
return H_AUTHORITY;
srcu_idx = srcu_read_lock(&kvm->srcu);
+
+ /* register the memslot */
slots = kvm_memslots(kvm);
kvm_for_each_memslot(memslot, slots) {
- if (kvmppc_uvmem_slot_init(kvm, memslot)) {
- ret = H_PARAMETER;
- goto out;
- }
- ret = uv_register_mem_slot(kvm->arch.lpid,
- memslot->base_gfn << PAGE_SHIFT,
- memslot->npages * PAGE_SIZE,
- 0, memslot->id);
- if (ret < 0) {
- kvmppc_uvmem_slot_free(kvm, memslot);
- ret = H_PARAMETER;
- goto out;
+ ret = __kvmppc_uvmem_memslot_create(kvm, memslot);
+ if (ret)
+ break;
+ }
+
+ if (ret) {
+ slots = kvm_memslots(kvm);
+ kvm_for_each_memslot(m, slots) {
+ if (m == memslot)
+ break;
+ __kvmppc_uvmem_memslot_delete(kvm, memslot);
}
}
-out:
+
srcu_read_unlock(&kvm->srcu, srcu_idx);
return ret;
}
-unsigned long kvmppc_h_svm_init_done(struct kvm *kvm)
+/*
+ * Provision a new page on HV side and copy over the contents
+ * from secure memory using UV_PAGE_OUT uvcall.
+ * Caller must held kvm->arch.uvmem_lock.
+ */
+static int __kvmppc_svm_page_out(struct vm_area_struct *vma,
+ unsigned long start,
+ unsigned long end, unsigned long page_shift,
+ struct kvm *kvm, unsigned long gpa)
{
- if (!(kvm->arch.secure_guest & KVMPPC_SECURE_INIT_START))
- return H_UNSUPPORTED;
+ unsigned long src_pfn, dst_pfn = 0;
+ struct migrate_vma mig;
+ struct page *dpage, *spage;
+ struct kvmppc_uvmem_page_pvt *pvt;
+ unsigned long pfn;
+ int ret = U_SUCCESS;
- kvm->arch.secure_guest |= KVMPPC_SECURE_INIT_DONE;
- pr_info("LPID %d went secure\n", kvm->arch.lpid);
- return H_SUCCESS;
+ memset(&mig, 0, sizeof(mig));
+ mig.vma = vma;
+ mig.start = start;
+ mig.end = end;
+ mig.src = &src_pfn;
+ mig.dst = &dst_pfn;
+ mig.pgmap_owner = &kvmppc_uvmem_pgmap;
+ mig.flags = MIGRATE_VMA_SELECT_DEVICE_PRIVATE;
+
+ /* The requested page is already paged-out, nothing to do */
+ if (!kvmppc_gfn_is_uvmem_pfn(gpa >> page_shift, kvm, NULL))
+ return ret;
+
+ ret = migrate_vma_setup(&mig);
+ if (ret)
+ return -1;
+
+ spage = migrate_pfn_to_page(*mig.src);
+ if (!spage || !(*mig.src & MIGRATE_PFN_MIGRATE))
+ goto out_finalize;
+
+ if (!is_zone_device_page(spage))
+ goto out_finalize;
+
+ dpage = alloc_page_vma(GFP_HIGHUSER, vma, start);
+ if (!dpage) {
+ ret = -1;
+ goto out_finalize;
+ }
+
+ lock_page(dpage);
+ pvt = spage->zone_device_data;
+ pfn = page_to_pfn(dpage);
+
+ /*
+ * This function is used in two cases:
+ * - When HV touches a secure page, for which we do UV_PAGE_OUT
+ * - When a secure page is converted to shared page, we *get*
+ * the page to essentially unmap the device page. In this
+ * case we skip page-out.
+ */
+ if (!pvt->skip_page_out)
+ ret = uv_page_out(kvm->arch.lpid, pfn << page_shift,
+ gpa, 0, page_shift);
+
+ if (ret == U_SUCCESS)
+ *mig.dst = migrate_pfn(pfn) | MIGRATE_PFN_LOCKED;
+ else {
+ unlock_page(dpage);
+ __free_page(dpage);
+ goto out_finalize;
+ }
+
+ migrate_vma_pages(&mig);
+
+out_finalize:
+ migrate_vma_finalize(&mig);
+ return ret;
+}
+
+static inline int kvmppc_svm_page_out(struct vm_area_struct *vma,
+ unsigned long start, unsigned long end,
+ unsigned long page_shift,
+ struct kvm *kvm, unsigned long gpa)
+{
+ int ret;
+
+ mutex_lock(&kvm->arch.uvmem_lock);
+ ret = __kvmppc_svm_page_out(vma, start, end, page_shift, kvm, gpa);
+ mutex_unlock(&kvm->arch.uvmem_lock);
+
+ return ret;
}
/*
@@ -271,33 +595,53 @@ unsigned long kvmppc_h_svm_init_done(struct kvm *kvm)
* fault on them, do fault time migration to replace the device PTEs in
* QEMU page table with normal PTEs from newly allocated pages.
*/
-void kvmppc_uvmem_drop_pages(const struct kvm_memory_slot *free,
+void kvmppc_uvmem_drop_pages(const struct kvm_memory_slot *slot,
struct kvm *kvm, bool skip_page_out)
{
int i;
struct kvmppc_uvmem_page_pvt *pvt;
- unsigned long pfn, uvmem_pfn;
- unsigned long gfn = free->base_gfn;
+ struct page *uvmem_page;
+ struct vm_area_struct *vma = NULL;
+ unsigned long uvmem_pfn, gfn;
+ unsigned long addr;
- for (i = free->npages; i; --i, ++gfn) {
- struct page *uvmem_page;
+ mmap_read_lock(kvm->mm);
+
+ addr = slot->userspace_addr;
+
+ gfn = slot->base_gfn;
+ for (i = slot->npages; i; --i, ++gfn, addr += PAGE_SIZE) {
+
+ /* Fetch the VMA if addr is not in the latest fetched one */
+ if (!vma || addr >= vma->vm_end) {
+ vma = find_vma_intersection(kvm->mm, addr, addr+1);
+ if (!vma) {
+ pr_err("Can't find VMA for gfn:0x%lx\n", gfn);
+ break;
+ }
+ }
mutex_lock(&kvm->arch.uvmem_lock);
- if (!kvmppc_gfn_is_uvmem_pfn(gfn, kvm, &uvmem_pfn)) {
- mutex_unlock(&kvm->arch.uvmem_lock);
- continue;
+
+ if (kvmppc_gfn_is_uvmem_pfn(gfn, kvm, &uvmem_pfn)) {
+ uvmem_page = pfn_to_page(uvmem_pfn);
+ pvt = uvmem_page->zone_device_data;
+ pvt->skip_page_out = skip_page_out;
+ pvt->remove_gfn = true;
+
+ if (__kvmppc_svm_page_out(vma, addr, addr + PAGE_SIZE,
+ PAGE_SHIFT, kvm, pvt->gpa))
+ pr_err("Can't page out gpa:0x%lx addr:0x%lx\n",
+ pvt->gpa, addr);
+ } else {
+ /* Remove the shared flag if any */
+ kvmppc_gfn_remove(gfn, kvm);
}
- uvmem_page = pfn_to_page(uvmem_pfn);
- pvt = uvmem_page->zone_device_data;
- pvt->skip_page_out = skip_page_out;
mutex_unlock(&kvm->arch.uvmem_lock);
-
- pfn = gfn_to_pfn(kvm, gfn);
- if (is_error_noslot_pfn(pfn))
- continue;
- kvm_release_pfn_clean(pfn);
}
+
+ mmap_read_unlock(kvm->mm);
}
unsigned long kvmppc_h_svm_init_abort(struct kvm *kvm)
@@ -360,7 +704,7 @@ static struct page *kvmppc_uvmem_get_page(unsigned long gpa, struct kvm *kvm)
goto out_clear;
uvmem_pfn = bit + pfn_first;
- kvmppc_uvmem_pfn_insert(gpa >> PAGE_SHIFT, uvmem_pfn, kvm);
+ kvmppc_gfn_secure_uvmem_pfn(gpa >> PAGE_SHIFT, uvmem_pfn, kvm);
pvt->gpa = gpa;
pvt->kvm = kvm;
@@ -379,13 +723,14 @@ out:
}
/*
- * Alloc a PFN from private device memory pool and copy page from normal
- * memory to secure memory using UV_PAGE_IN uvcall.
+ * Alloc a PFN from private device memory pool. If @pagein is true,
+ * copy page from normal memory to secure memory using UV_PAGE_IN uvcall.
*/
-static int
-kvmppc_svm_page_in(struct vm_area_struct *vma, unsigned long start,
- unsigned long end, unsigned long gpa, struct kvm *kvm,
- unsigned long page_shift, bool *downgrade)
+static int kvmppc_svm_page_in(struct vm_area_struct *vma,
+ unsigned long start,
+ unsigned long end, unsigned long gpa, struct kvm *kvm,
+ unsigned long page_shift,
+ bool pagein)
{
unsigned long src_pfn, dst_pfn = 0;
struct migrate_vma mig;
@@ -402,18 +747,6 @@ kvmppc_svm_page_in(struct vm_area_struct *vma, unsigned long start,
mig.dst = &dst_pfn;
mig.flags = MIGRATE_VMA_SELECT_SYSTEM;
- /*
- * We come here with mmap_lock write lock held just for
- * ksm_madvise(), otherwise we only need read mmap_lock.
- * Hence downgrade to read lock once ksm_madvise() is done.
- */
- ret = ksm_madvise(vma, vma->vm_start, vma->vm_end,
- MADV_UNMERGEABLE, &vma->vm_flags);
- mmap_write_downgrade(kvm->mm);
- *downgrade = true;
- if (ret)
- return ret;
-
ret = migrate_vma_setup(&mig);
if (ret)
return ret;
@@ -429,11 +762,16 @@ kvmppc_svm_page_in(struct vm_area_struct *vma, unsigned long start,
goto out_finalize;
}
- pfn = *mig.src >> MIGRATE_PFN_SHIFT;
- spage = migrate_pfn_to_page(*mig.src);
- if (spage)
- uv_page_in(kvm->arch.lpid, pfn << page_shift, gpa, 0,
- page_shift);
+ if (pagein) {
+ pfn = *mig.src >> MIGRATE_PFN_SHIFT;
+ spage = migrate_pfn_to_page(*mig.src);
+ if (spage) {
+ ret = uv_page_in(kvm->arch.lpid, pfn << page_shift,
+ gpa, 0, page_shift);
+ if (ret)
+ goto out_finalize;
+ }
+ }
*mig.dst = migrate_pfn(page_to_pfn(dpage)) | MIGRATE_PFN_LOCKED;
migrate_vma_pages(&mig);
@@ -442,6 +780,80 @@ out_finalize:
return ret;
}
+static int kvmppc_uv_migrate_mem_slot(struct kvm *kvm,
+ const struct kvm_memory_slot *memslot)
+{
+ unsigned long gfn = memslot->base_gfn;
+ struct vm_area_struct *vma;
+ unsigned long start, end;
+ int ret = 0;
+
+ mmap_read_lock(kvm->mm);
+ mutex_lock(&kvm->arch.uvmem_lock);
+ while (kvmppc_next_nontransitioned_gfn(memslot, kvm, &gfn)) {
+ ret = H_STATE;
+ start = gfn_to_hva(kvm, gfn);
+ if (kvm_is_error_hva(start))
+ break;
+
+ end = start + (1UL << PAGE_SHIFT);
+ vma = find_vma_intersection(kvm->mm, start, end);
+ if (!vma || vma->vm_start > start || vma->vm_end < end)
+ break;
+
+ ret = kvmppc_svm_page_in(vma, start, end,
+ (gfn << PAGE_SHIFT), kvm, PAGE_SHIFT, false);
+ if (ret) {
+ ret = H_STATE;
+ break;
+ }
+
+ /* relinquish the cpu if needed */
+ cond_resched();
+ }
+ mutex_unlock(&kvm->arch.uvmem_lock);
+ mmap_read_unlock(kvm->mm);
+ return ret;
+}
+
+unsigned long kvmppc_h_svm_init_done(struct kvm *kvm)
+{
+ struct kvm_memslots *slots;
+ struct kvm_memory_slot *memslot;
+ int srcu_idx;
+ long ret = H_SUCCESS;
+
+ if (!(kvm->arch.secure_guest & KVMPPC_SECURE_INIT_START))
+ return H_UNSUPPORTED;
+
+ /* migrate any unmoved normal pfn to device pfns*/
+ srcu_idx = srcu_read_lock(&kvm->srcu);
+ slots = kvm_memslots(kvm);
+ kvm_for_each_memslot(memslot, slots) {
+ ret = kvmppc_uv_migrate_mem_slot(kvm, memslot);
+ if (ret) {
+ /*
+ * The pages will remain transitioned.
+ * Its the callers responsibility to
+ * terminate the VM, which will undo
+ * all state of the VM. Till then
+ * this VM is in a erroneous state.
+ * Its KVMPPC_SECURE_INIT_DONE will
+ * remain unset.
+ */
+ ret = H_STATE;
+ goto out;
+ }
+ }
+
+ kvm->arch.secure_guest |= KVMPPC_SECURE_INIT_DONE;
+ pr_info("LPID %d went secure\n", kvm->arch.lpid);
+
+out:
+ srcu_read_unlock(&kvm->srcu, srcu_idx);
+ return ret;
+}
+
/*
* Shares the page with HV, thus making it a normal page.
*
@@ -451,8 +863,8 @@ out_finalize:
* In the former case, uses dev_pagemap_ops.migrate_to_ram handler
* to unmap the device page from QEMU's page tables.
*/
-static unsigned long
-kvmppc_share_page(struct kvm *kvm, unsigned long gpa, unsigned long page_shift)
+static unsigned long kvmppc_share_page(struct kvm *kvm, unsigned long gpa,
+ unsigned long page_shift)
{
int ret = H_PARAMETER;
@@ -469,6 +881,11 @@ kvmppc_share_page(struct kvm *kvm, unsigned long gpa, unsigned long page_shift)
uvmem_page = pfn_to_page(uvmem_pfn);
pvt = uvmem_page->zone_device_data;
pvt->skip_page_out = true;
+ /*
+ * do not drop the GFN. It is a valid GFN
+ * that is transitioned to a shared GFN.
+ */
+ pvt->remove_gfn = false;
}
retry:
@@ -482,12 +899,16 @@ retry:
uvmem_page = pfn_to_page(uvmem_pfn);
pvt = uvmem_page->zone_device_data;
pvt->skip_page_out = true;
+ pvt->remove_gfn = false; /* it continues to be a valid GFN */
kvm_release_pfn_clean(pfn);
goto retry;
}
- if (!uv_page_in(kvm->arch.lpid, pfn << page_shift, gpa, 0, page_shift))
+ if (!uv_page_in(kvm->arch.lpid, pfn << page_shift, gpa, 0,
+ page_shift)) {
+ kvmppc_gfn_shared(gfn, kvm);
ret = H_SUCCESS;
+ }
kvm_release_pfn_clean(pfn);
mutex_unlock(&kvm->arch.uvmem_lock);
out:
@@ -501,11 +922,10 @@ out:
* H_PAGE_IN_SHARED flag makes the page shared which means that the same
* memory in is visible from both UV and HV.
*/
-unsigned long
-kvmppc_h_svm_page_in(struct kvm *kvm, unsigned long gpa,
- unsigned long flags, unsigned long page_shift)
+unsigned long kvmppc_h_svm_page_in(struct kvm *kvm, unsigned long gpa,
+ unsigned long flags,
+ unsigned long page_shift)
{
- bool downgrade = false;
unsigned long start, end;
struct vm_area_struct *vma;
int srcu_idx;
@@ -526,7 +946,7 @@ kvmppc_h_svm_page_in(struct kvm *kvm, unsigned long gpa,
ret = H_PARAMETER;
srcu_idx = srcu_read_lock(&kvm->srcu);
- mmap_write_lock(kvm->mm);
+ mmap_read_lock(kvm->mm);
start = gfn_to_hva(kvm, gfn);
if (kvm_is_error_hva(start))
@@ -542,97 +962,20 @@ kvmppc_h_svm_page_in(struct kvm *kvm, unsigned long gpa,
if (!vma || vma->vm_start > start || vma->vm_end < end)
goto out_unlock;
- if (!kvmppc_svm_page_in(vma, start, end, gpa, kvm, page_shift,
- &downgrade))
- ret = H_SUCCESS;
+ if (kvmppc_svm_page_in(vma, start, end, gpa, kvm, page_shift,
+ true))
+ goto out_unlock;
+
+ ret = H_SUCCESS;
+
out_unlock:
mutex_unlock(&kvm->arch.uvmem_lock);
out:
- if (downgrade)
- mmap_read_unlock(kvm->mm);
- else
- mmap_write_unlock(kvm->mm);
+ mmap_read_unlock(kvm->mm);
srcu_read_unlock(&kvm->srcu, srcu_idx);
return ret;
}
-/*
- * Provision a new page on HV side and copy over the contents
- * from secure memory using UV_PAGE_OUT uvcall.
- */
-static int
-kvmppc_svm_page_out(struct vm_area_struct *vma, unsigned long start,
- unsigned long end, unsigned long page_shift,
- struct kvm *kvm, unsigned long gpa)
-{
- unsigned long src_pfn, dst_pfn = 0;
- struct migrate_vma mig;
- struct page *dpage, *spage;
- struct kvmppc_uvmem_page_pvt *pvt;
- unsigned long pfn;
- int ret = U_SUCCESS;
-
- memset(&mig, 0, sizeof(mig));
- mig.vma = vma;
- mig.start = start;
- mig.end = end;
- mig.src = &src_pfn;
- mig.dst = &dst_pfn;
- mig.pgmap_owner = &kvmppc_uvmem_pgmap;
- mig.flags = MIGRATE_VMA_SELECT_DEVICE_PRIVATE;
-
- mutex_lock(&kvm->arch.uvmem_lock);
- /* The requested page is already paged-out, nothing to do */
- if (!kvmppc_gfn_is_uvmem_pfn(gpa >> page_shift, kvm, NULL))
- goto out;
-
- ret = migrate_vma_setup(&mig);
- if (ret)
- goto out;
-
- spage = migrate_pfn_to_page(*mig.src);
- if (!spage || !(*mig.src & MIGRATE_PFN_MIGRATE))
- goto out_finalize;
-
- if (!is_zone_device_page(spage))
- goto out_finalize;
-
- dpage = alloc_page_vma(GFP_HIGHUSER, vma, start);
- if (!dpage) {
- ret = -1;
- goto out_finalize;
- }
-
- lock_page(dpage);
- pvt = spage->zone_device_data;
- pfn = page_to_pfn(dpage);
-
- /*
- * This function is used in two cases:
- * - When HV touches a secure page, for which we do UV_PAGE_OUT
- * - When a secure page is converted to shared page, we *get*
- * the page to essentially unmap the device page. In this
- * case we skip page-out.
- */
- if (!pvt->skip_page_out)
- ret = uv_page_out(kvm->arch.lpid, pfn << page_shift,
- gpa, 0, page_shift);
-
- if (ret == U_SUCCESS)
- *mig.dst = migrate_pfn(pfn) | MIGRATE_PFN_LOCKED;
- else {
- unlock_page(dpage);
- __free_page(dpage);
- goto out_finalize;
- }
-
- migrate_vma_pages(&mig);
-out_finalize:
- migrate_vma_finalize(&mig);
-out:
- mutex_unlock(&kvm->arch.uvmem_lock);
- return ret;
-}
/*
* Fault handler callback that gets called when HV touches any page that
@@ -657,7 +1000,8 @@ static vm_fault_t kvmppc_uvmem_migrate_to_ram(struct vm_fault *vmf)
/*
* Release the device PFN back to the pool
*
- * Gets called when secure page becomes a normal page during H_SVM_PAGE_OUT.
+ * Gets called when secure GFN tranistions from a secure-PFN
+ * to a normal PFN during H_SVM_PAGE_OUT.
* Gets called with kvm->arch.uvmem_lock held.
*/
static void kvmppc_uvmem_page_free(struct page *page)
@@ -672,7 +1016,10 @@ static void kvmppc_uvmem_page_free(struct page *page)
pvt = page->zone_device_data;
page->zone_device_data = NULL;
- kvmppc_uvmem_pfn_remove(pvt->gpa >> PAGE_SHIFT, pvt->kvm);
+ if (pvt->remove_gfn)
+ kvmppc_gfn_remove(pvt->gpa >> PAGE_SHIFT, pvt->kvm);
+ else
+ kvmppc_gfn_secure_mem_pfn(pvt->gpa >> PAGE_SHIFT, pvt->kvm);
kfree(pvt);
}
@@ -744,6 +1091,21 @@ out:
return (ret == U_SUCCESS) ? RESUME_GUEST : -EFAULT;
}
+int kvmppc_uvmem_memslot_create(struct kvm *kvm, const struct kvm_memory_slot *new)
+{
+ int ret = __kvmppc_uvmem_memslot_create(kvm, new);
+
+ if (!ret)
+ ret = kvmppc_uv_migrate_mem_slot(kvm, new);
+
+ return ret;
+}
+
+void kvmppc_uvmem_memslot_delete(struct kvm *kvm, const struct kvm_memory_slot *old)
+{
+ __kvmppc_uvmem_memslot_delete(kvm, old);
+}
+
static u64 kvmppc_get_secmem_size(void)
{
struct device_node *np;
diff --git a/arch/powerpc/kvm/book3s_interrupts.S b/arch/powerpc/kvm/book3s_interrupts.S
index 607a9b99c334..25a3679fb590 100644
--- a/arch/powerpc/kvm/book3s_interrupts.S
+++ b/arch/powerpc/kvm/book3s_interrupts.S
@@ -55,8 +55,7 @@
****************************************************************************/
/* Registers:
- * r3: kvm_run pointer
- * r4: vcpu pointer
+ * r3: vcpu pointer
*/
_GLOBAL(__kvmppc_vcpu_run)
@@ -68,8 +67,8 @@ kvm_start_entry:
/* Save host state to the stack */
PPC_STLU r1, -SWITCH_FRAME_SIZE(r1)
- /* Save r3 (kvm_run) and r4 (vcpu) */
- SAVE_2GPRS(3, r1)
+ /* Save r3 (vcpu) */
+ SAVE_GPR(3, r1)
/* Save non-volatile registers (r14 - r31) */
SAVE_NVGPRS(r1)
@@ -82,47 +81,46 @@ kvm_start_entry:
PPC_STL r0, _LINK(r1)
/* Load non-volatile guest state from the vcpu */
- VCPU_LOAD_NVGPRS(r4)
+ VCPU_LOAD_NVGPRS(r3)
kvm_start_lightweight:
/* Copy registers into shadow vcpu so we can access them in real mode */
- mr r3, r4
bl FUNC(kvmppc_copy_to_svcpu)
nop
- REST_GPR(4, r1)
+ REST_GPR(3, r1)
#ifdef CONFIG_PPC_BOOK3S_64
/* Get the dcbz32 flag */
- PPC_LL r3, VCPU_HFLAGS(r4)
- rldicl r3, r3, 0, 63 /* r3 &= 1 */
- stb r3, HSTATE_RESTORE_HID5(r13)
+ PPC_LL r0, VCPU_HFLAGS(r3)
+ rldicl r0, r0, 0, 63 /* r3 &= 1 */
+ stb r0, HSTATE_RESTORE_HID5(r13)
/* Load up guest SPRG3 value, since it's user readable */
- lwz r3, VCPU_SHAREDBE(r4)
- cmpwi r3, 0
- ld r5, VCPU_SHARED(r4)
+ lbz r4, VCPU_SHAREDBE(r3)
+ cmpwi r4, 0
+ ld r5, VCPU_SHARED(r3)
beq sprg3_little_endian
sprg3_big_endian:
#ifdef __BIG_ENDIAN__
- ld r3, VCPU_SHARED_SPRG3(r5)
+ ld r4, VCPU_SHARED_SPRG3(r5)
#else
addi r5, r5, VCPU_SHARED_SPRG3
- ldbrx r3, 0, r5
+ ldbrx r4, 0, r5
#endif
b after_sprg3_load
sprg3_little_endian:
#ifdef __LITTLE_ENDIAN__
- ld r3, VCPU_SHARED_SPRG3(r5)
+ ld r4, VCPU_SHARED_SPRG3(r5)
#else
addi r5, r5, VCPU_SHARED_SPRG3
- ldbrx r3, 0, r5
+ ldbrx r4, 0, r5
#endif
after_sprg3_load:
- mtspr SPRN_SPRG3, r3
+ mtspr SPRN_SPRG3, r4
#endif /* CONFIG_PPC_BOOK3S_64 */
- PPC_LL r4, VCPU_SHADOW_MSR(r4) /* get shadow_msr */
+ PPC_LL r4, VCPU_SHADOW_MSR(r3) /* get shadow_msr */
/* Jump to segment patching handler and into our guest */
bl FUNC(kvmppc_entry_trampoline)
@@ -146,7 +144,7 @@ after_sprg3_load:
*
*/
- PPC_LL r3, GPR4(r1) /* vcpu pointer */
+ PPC_LL r3, GPR3(r1) /* vcpu pointer */
/*
* kvmppc_copy_from_svcpu can clobber volatile registers, save
@@ -169,7 +167,7 @@ after_sprg3_load:
#endif /* CONFIG_PPC_BOOK3S_64 */
/* R7 = vcpu */
- PPC_LL r7, GPR4(r1)
+ PPC_LL r7, GPR3(r1)
PPC_STL r14, VCPU_GPR(R14)(r7)
PPC_STL r15, VCPU_GPR(R15)(r7)
@@ -190,11 +188,11 @@ after_sprg3_load:
PPC_STL r30, VCPU_GPR(R30)(r7)
PPC_STL r31, VCPU_GPR(R31)(r7)
- /* Pass the exit number as 3rd argument to kvmppc_handle_exit */
- lwz r5, VCPU_TRAP(r7)
+ /* Pass the exit number as 2nd argument to kvmppc_handle_exit */
+ lwz r4, VCPU_TRAP(r7)
- /* Restore r3 (kvm_run) and r4 (vcpu) */
- REST_2GPRS(3, r1)
+ /* Restore r3 (vcpu) */
+ REST_GPR(3, r1)
bl FUNC(kvmppc_handle_exit_pr)
/* If RESUME_GUEST, get back in the loop */
@@ -223,11 +221,11 @@ kvm_loop_heavyweight:
PPC_LL r4, _LINK(r1)
PPC_STL r4, (PPC_LR_STKOFF + SWITCH_FRAME_SIZE)(r1)
- /* Load vcpu and cpu_run */
- REST_2GPRS(3, r1)
+ /* Load vcpu */
+ REST_GPR(3, r1)
/* Load non-volatile guest state from the vcpu */
- VCPU_LOAD_NVGPRS(r4)
+ VCPU_LOAD_NVGPRS(r3)
/* Jump back into the beginning of this function */
b kvm_start_lightweight
@@ -235,7 +233,7 @@ kvm_loop_heavyweight:
kvm_loop_lightweight:
/* We'll need the vcpu pointer */
- REST_GPR(4, r1)
+ REST_GPR(3, r1)
/* Jump back into the beginning of this function */
b kvm_start_lightweight
diff --git a/arch/powerpc/kvm/book3s_pr.c b/arch/powerpc/kvm/book3s_pr.c
index ed12dfbf9bb5..88fac22fbf09 100644
--- a/arch/powerpc/kvm/book3s_pr.c
+++ b/arch/powerpc/kvm/book3s_pr.c
@@ -1151,9 +1151,9 @@ static int kvmppc_exit_pr_progint(struct kvm_vcpu *vcpu, unsigned int exit_nr)
return r;
}
-int kvmppc_handle_exit_pr(struct kvm_run *run, struct kvm_vcpu *vcpu,
- unsigned int exit_nr)
+int kvmppc_handle_exit_pr(struct kvm_vcpu *vcpu, unsigned int exit_nr)
{
+ struct kvm_run *run = vcpu->run;
int r = RESUME_HOST;
int s;
@@ -1826,12 +1826,11 @@ static void kvmppc_core_vcpu_free_pr(struct kvm_vcpu *vcpu)
static int kvmppc_vcpu_run_pr(struct kvm_vcpu *vcpu)
{
- struct kvm_run *run = vcpu->run;
int ret;
/* Check if we can run the vcpu at all */
if (!vcpu->arch.sane) {
- run->exit_reason = KVM_EXIT_INTERNAL_ERROR;
+ vcpu->run->exit_reason = KVM_EXIT_INTERNAL_ERROR;
ret = -EINVAL;
goto out;
}
@@ -1858,7 +1857,7 @@ static int kvmppc_vcpu_run_pr(struct kvm_vcpu *vcpu)
kvmppc_fix_ee_before_entry();
- ret = __kvmppc_vcpu_run(run, vcpu);
+ ret = __kvmppc_vcpu_run(vcpu);
kvmppc_clear_debug(vcpu);
diff --git a/arch/powerpc/kvm/book3s_rtas.c b/arch/powerpc/kvm/book3s_rtas.c
index 26b25994c969..c5e677508d3b 100644
--- a/arch/powerpc/kvm/book3s_rtas.c
+++ b/arch/powerpc/kvm/book3s_rtas.c
@@ -229,7 +229,9 @@ int kvmppc_rtas_hcall(struct kvm_vcpu *vcpu)
*/
args_phys = kvmppc_get_gpr(vcpu, 4) & KVM_PAM;
+ vcpu->srcu_idx = srcu_read_lock(&vcpu->kvm->srcu);
rc = kvm_read_guest(vcpu->kvm, args_phys, &args, sizeof(args));
+ srcu_read_unlock(&vcpu->kvm->srcu, vcpu->srcu_idx);
if (rc)
goto fail;
diff --git a/arch/powerpc/kvm/booke.c b/arch/powerpc/kvm/booke.c
index c0d62a917e20..3e1c9f08e302 100644
--- a/arch/powerpc/kvm/booke.c
+++ b/arch/powerpc/kvm/booke.c
@@ -731,12 +731,11 @@ int kvmppc_core_check_requests(struct kvm_vcpu *vcpu)
int kvmppc_vcpu_run(struct kvm_vcpu *vcpu)
{
- struct kvm_run *run = vcpu->run;
int ret, s;
struct debug_reg debug;
if (!vcpu->arch.sane) {
- run->exit_reason = KVM_EXIT_INTERNAL_ERROR;
+ vcpu->run->exit_reason = KVM_EXIT_INTERNAL_ERROR;
return -EINVAL;
}
@@ -778,7 +777,7 @@ int kvmppc_vcpu_run(struct kvm_vcpu *vcpu)
vcpu->arch.pgdir = vcpu->kvm->mm->pgd;
kvmppc_fix_ee_before_entry();
- ret = __kvmppc_vcpu_run(run, vcpu);
+ ret = __kvmppc_vcpu_run(vcpu);
/* No need for guest_exit. It's done in handle_exit.
We also get here with interrupts enabled. */
@@ -982,9 +981,9 @@ static int kvmppc_resume_inst_load(struct kvm_vcpu *vcpu,
*
* Return value is in the form (errcode<<2 | RESUME_FLAG_HOST | RESUME_FLAG_NV)
*/
-int kvmppc_handle_exit(struct kvm_run *run, struct kvm_vcpu *vcpu,
- unsigned int exit_nr)
+int kvmppc_handle_exit(struct kvm_vcpu *vcpu, unsigned int exit_nr)
{
+ struct kvm_run *run = vcpu->run;
int r = RESUME_HOST;
int s;
int idx;
diff --git a/arch/powerpc/kvm/booke_interrupts.S b/arch/powerpc/kvm/booke_interrupts.S
index 2e56ab5a5f55..6fa82efe833b 100644
--- a/arch/powerpc/kvm/booke_interrupts.S
+++ b/arch/powerpc/kvm/booke_interrupts.S
@@ -237,7 +237,7 @@ _GLOBAL(kvmppc_resume_host)
/* Switch to kernel stack and jump to handler. */
LOAD_REG_ADDR(r3, kvmppc_handle_exit)
mtctr r3
- lwz r3, HOST_RUN(r1)
+ mr r3, r4
lwz r2, HOST_R2(r1)
mr r14, r4 /* Save vcpu pointer. */
@@ -337,15 +337,14 @@ heavyweight_exit:
/* Registers:
- * r3: kvm_run pointer
- * r4: vcpu pointer
+ * r3: vcpu pointer
*/
_GLOBAL(__kvmppc_vcpu_run)
stwu r1, -HOST_STACK_SIZE(r1)
- stw r1, VCPU_HOST_STACK(r4) /* Save stack pointer to vcpu. */
+ stw r1, VCPU_HOST_STACK(r3) /* Save stack pointer to vcpu. */
/* Save host state to stack. */
- stw r3, HOST_RUN(r1)
+ mr r4, r3
mflr r3
stw r3, HOST_STACK_LR(r1)
mfcr r5
diff --git a/arch/powerpc/kvm/bookehv_interrupts.S b/arch/powerpc/kvm/bookehv_interrupts.S
index c577ba4b3169..8262c14fc9e6 100644
--- a/arch/powerpc/kvm/bookehv_interrupts.S
+++ b/arch/powerpc/kvm/bookehv_interrupts.S
@@ -434,9 +434,10 @@ _GLOBAL(kvmppc_resume_host)
#endif
/* Switch to kernel stack and jump to handler. */
- PPC_LL r3, HOST_RUN(r1)
+ mr r3, r4
mr r5, r14 /* intno */
mr r14, r4 /* Save vcpu pointer. */
+ mr r4, r5
bl kvmppc_handle_exit
/* Restore vcpu pointer and the nonvolatiles we used. */
@@ -525,15 +526,14 @@ heavyweight_exit:
blr
/* Registers:
- * r3: kvm_run pointer
- * r4: vcpu pointer
+ * r3: vcpu pointer
*/
_GLOBAL(__kvmppc_vcpu_run)
stwu r1, -HOST_STACK_SIZE(r1)
- PPC_STL r1, VCPU_HOST_STACK(r4) /* Save stack pointer to vcpu. */
+ PPC_STL r1, VCPU_HOST_STACK(r3) /* Save stack pointer to vcpu. */
/* Save host state to stack. */
- PPC_STL r3, HOST_RUN(r1)
+ mr r4, r3
mflr r3
mfcr r5
PPC_STL r3, HOST_STACK_LR(r1)
diff --git a/arch/powerpc/kvm/e500_mmu_host.c b/arch/powerpc/kvm/e500_mmu_host.c
index d6c1069e9954..ed0c9c43d0cf 100644
--- a/arch/powerpc/kvm/e500_mmu_host.c
+++ b/arch/powerpc/kvm/e500_mmu_host.c
@@ -734,7 +734,8 @@ static int kvm_unmap_hva(struct kvm *kvm, unsigned long hva)
return 0;
}
-int kvm_unmap_hva_range(struct kvm *kvm, unsigned long start, unsigned long end)
+int kvm_unmap_hva_range(struct kvm *kvm, unsigned long start, unsigned long end,
+ unsigned flags)
{
/* kvm_unmap_hva flushes everything anyways */
kvm_unmap_hva(kvm, start);
diff --git a/arch/powerpc/kvm/powerpc.c b/arch/powerpc/kvm/powerpc.c
index aaa7b62f2f82..13999123b735 100644
--- a/arch/powerpc/kvm/powerpc.c
+++ b/arch/powerpc/kvm/powerpc.c
@@ -403,7 +403,10 @@ int kvmppc_ld(struct kvm_vcpu *vcpu, ulong *eaddr, int size, void *ptr,
return EMULATE_DONE;
}
- if (kvm_read_guest(vcpu->kvm, pte.raddr, ptr, size))
+ vcpu->srcu_idx = srcu_read_lock(&vcpu->kvm->srcu);
+ rc = kvm_read_guest(vcpu->kvm, pte.raddr, ptr, size);
+ srcu_read_unlock(&vcpu->kvm->srcu, vcpu->srcu_idx);
+ if (rc)
return EMULATE_DO_MMIO;
return EMULATE_DONE;
diff --git a/arch/powerpc/mm/book3s32/mmu.c b/arch/powerpc/mm/book3s32/mmu.c
index c0162911f6cb..d426eaf76bb0 100644
--- a/arch/powerpc/mm/book3s32/mmu.c
+++ b/arch/powerpc/mm/book3s32/mmu.c
@@ -191,10 +191,17 @@ static bool is_module_segment(unsigned long addr)
{
if (!IS_ENABLED(CONFIG_MODULES))
return false;
+#ifdef MODULES_VADDR
+ if (addr < ALIGN_DOWN(MODULES_VADDR, SZ_256M))
+ return false;
+ if (addr > ALIGN(MODULES_END, SZ_256M) - 1)
+ return false;
+#else
if (addr < ALIGN_DOWN(VMALLOC_START, SZ_256M))
return false;
- if (addr >= ALIGN(VMALLOC_END, SZ_256M))
+ if (addr > ALIGN(VMALLOC_END, SZ_256M) - 1)
return false;
+#endif
return true;
}
diff --git a/arch/powerpc/mm/book3s64/hash_utils.c b/arch/powerpc/mm/book3s64/hash_utils.c
index 1478fceeb683..c663e7ba801f 100644
--- a/arch/powerpc/mm/book3s64/hash_utils.c
+++ b/arch/powerpc/mm/book3s64/hash_utils.c
@@ -232,6 +232,8 @@ unsigned long htab_convert_pte_flags(unsigned long pteflags)
rflags |= HPTE_R_I;
else if ((pteflags & _PAGE_CACHE_CTL) == _PAGE_NON_IDEMPOTENT)
rflags |= (HPTE_R_I | HPTE_R_G);
+ else if ((pteflags & _PAGE_CACHE_CTL) == _PAGE_SAO)
+ rflags |= (HPTE_R_W | HPTE_R_I | HPTE_R_M);
else
/*
* Add memory coherence if cache inhibited is not set
@@ -1116,7 +1118,8 @@ void hash__early_init_mmu_secondary(void)
tlbiel_all();
#ifdef CONFIG_PPC_MEM_KEYS
- mtspr(SPRN_UAMOR, default_uamor);
+ if (mmu_has_feature(MMU_FTR_PKEY))
+ mtspr(SPRN_UAMOR, default_uamor);
#endif
}
#endif /* CONFIG_SMP */
diff --git a/arch/powerpc/mm/book3s64/pkeys.c b/arch/powerpc/mm/book3s64/pkeys.c
index 69a6b87f2bb4..b1d091a97611 100644
--- a/arch/powerpc/mm/book3s64/pkeys.c
+++ b/arch/powerpc/mm/book3s64/pkeys.c
@@ -73,12 +73,6 @@ static int scan_pkey_feature(void)
if (early_radix_enabled())
return 0;
- /*
- * Only P7 and above supports SPRN_AMR update with MSR[PR] = 1
- */
- if (!early_cpu_has_feature(CPU_FTR_ARCH_206))
- return 0;
-
ret = of_scan_flat_dt(dt_scan_storage_keys, &pkeys_total);
if (ret == 0) {
/*
@@ -124,6 +118,12 @@ void __init pkey_early_init_devtree(void)
__builtin_popcountl(ARCH_VM_PKEY_FLAGS >> VM_PKEY_SHIFT)
!= (sizeof(u64) * BITS_PER_BYTE));
+ /*
+ * Only P7 and above supports SPRN_AMR update with MSR[PR] = 1
+ */
+ if (!early_cpu_has_feature(CPU_FTR_ARCH_206))
+ return;
+
/* scan the device tree for pkey feature */
pkeys_total = scan_pkey_feature();
if (!pkeys_total)
diff --git a/arch/powerpc/mm/copro_fault.c b/arch/powerpc/mm/copro_fault.c
index b83abbead4a2..8acd00178956 100644
--- a/arch/powerpc/mm/copro_fault.c
+++ b/arch/powerpc/mm/copro_fault.c
@@ -64,7 +64,7 @@ int copro_handle_mm_fault(struct mm_struct *mm, unsigned long ea,
}
ret = 0;
- *flt = handle_mm_fault(vma, ea, is_write ? FAULT_FLAG_WRITE : 0);
+ *flt = handle_mm_fault(vma, ea, is_write ? FAULT_FLAG_WRITE : 0, NULL);
if (unlikely(*flt & VM_FAULT_ERROR)) {
if (*flt & VM_FAULT_OOM) {
ret = -ENOMEM;
@@ -76,11 +76,6 @@ int copro_handle_mm_fault(struct mm_struct *mm, unsigned long ea,
BUG();
}
- if (*flt & VM_FAULT_MAJOR)
- current->maj_flt++;
- else
- current->min_flt++;
-
out_unlock:
mmap_read_unlock(mm);
return ret;
diff --git a/arch/powerpc/mm/fault.c b/arch/powerpc/mm/fault.c
index 925a7231abb3..0add963a849b 100644
--- a/arch/powerpc/mm/fault.c
+++ b/arch/powerpc/mm/fault.c
@@ -511,7 +511,7 @@ retry:
* make sure we exit gracefully rather than endlessly redo
* the fault.
*/
- fault = handle_mm_fault(vma, address, flags);
+ fault = handle_mm_fault(vma, address, flags, regs);
major |= fault & VM_FAULT_MAJOR;
@@ -537,14 +537,9 @@ retry:
/*
* Major/minor page fault accounting.
*/
- if (major) {
- current->maj_flt++;
- perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS_MAJ, 1, regs, address);
+ if (major)
cmo_account_page_fault();
- } else {
- current->min_flt++;
- perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS_MIN, 1, regs, address);
- }
+
return 0;
}
NOKPROBE_SYMBOL(__do_page_fault);
diff --git a/arch/powerpc/net/bpf_jit_comp.c b/arch/powerpc/net/bpf_jit_comp.c
index 16d09b36fe06..78d61f97371e 100644
--- a/arch/powerpc/net/bpf_jit_comp.c
+++ b/arch/powerpc/net/bpf_jit_comp.c
@@ -475,7 +475,7 @@ static int bpf_jit_build_body(struct bpf_prog *fp, u32 *image,
case BPF_JMP | BPF_JSET | BPF_K:
case BPF_JMP | BPF_JSET | BPF_X:
true_cond = COND_NE;
- /* Fall through */
+ fallthrough;
cond_branch:
/* same targets, can avoid doing the test :) */
if (filter[i].jt == filter[i].jf) {
diff --git a/arch/powerpc/perf/core-book3s.c b/arch/powerpc/perf/core-book3s.c
index 78fe34986594..08643cba1494 100644
--- a/arch/powerpc/perf/core-book3s.c
+++ b/arch/powerpc/perf/core-book3s.c
@@ -1557,9 +1557,16 @@ nocheck:
ret = 0;
out:
if (has_branch_stack(event)) {
- power_pmu_bhrb_enable(event);
- cpuhw->bhrb_filter = ppmu->bhrb_filter_map(
- event->attr.branch_sample_type);
+ u64 bhrb_filter = -1;
+
+ if (ppmu->bhrb_filter_map)
+ bhrb_filter = ppmu->bhrb_filter_map(
+ event->attr.branch_sample_type);
+
+ if (bhrb_filter != -1) {
+ cpuhw->bhrb_filter = bhrb_filter;
+ power_pmu_bhrb_enable(event);
+ }
}
perf_pmu_enable(event->pmu);
@@ -1881,7 +1888,6 @@ static int power_pmu_event_init(struct perf_event *event)
int n;
int err;
struct cpu_hw_events *cpuhw;
- u64 bhrb_filter;
if (!ppmu)
return -ENOENT;
@@ -1987,7 +1993,10 @@ static int power_pmu_event_init(struct perf_event *event)
err = power_check_constraints(cpuhw, events, cflags, n + 1);
if (has_branch_stack(event)) {
- bhrb_filter = ppmu->bhrb_filter_map(
+ u64 bhrb_filter = -1;
+
+ if (ppmu->bhrb_filter_map)
+ bhrb_filter = ppmu->bhrb_filter_map(
event->attr.branch_sample_type);
if (bhrb_filter == -1) {
@@ -2141,6 +2150,10 @@ static void record_and_restart(struct perf_event *event, unsigned long val,
if (perf_event_overflow(event, &data, regs))
power_pmu_stop(event, 0);
+ } else if (period) {
+ /* Account for interrupt in case of invalid SIAR */
+ if (perf_event_account_interrupt(event))
+ power_pmu_stop(event, 0);
}
}
@@ -2323,6 +2336,7 @@ int register_power_pmu(struct power_pmu *pmu)
pmu->name);
power_pmu.attr_groups = ppmu->attr_groups;
+ power_pmu.capabilities |= (ppmu->capabilities & PERF_PMU_CAP_EXTENDED_REGS);
#ifdef MSR_HV
/*
diff --git a/arch/powerpc/perf/hv-24x7.c b/arch/powerpc/perf/hv-24x7.c
index cdb7bfbd157e..6e7e820508df 100644
--- a/arch/powerpc/perf/hv-24x7.c
+++ b/arch/powerpc/perf/hv-24x7.c
@@ -1128,6 +1128,15 @@ static struct bin_attribute *if_bin_attrs[] = {
NULL,
};
+static struct attribute *cpumask_attrs[] = {
+ &dev_attr_cpumask.attr,
+ NULL,
+};
+
+static struct attribute_group cpumask_attr_group = {
+ .attrs = cpumask_attrs,
+};
+
static struct attribute *if_attrs[] = {
&dev_attr_catalog_len.attr,
&dev_attr_catalog_version.attr,
@@ -1135,7 +1144,6 @@ static struct attribute *if_attrs[] = {
&dev_attr_sockets.attr,
&dev_attr_chipspersocket.attr,
&dev_attr_coresperchip.attr,
- &dev_attr_cpumask.attr,
NULL,
};
@@ -1151,6 +1159,7 @@ static const struct attribute_group *attr_groups[] = {
&event_desc_group,
&event_long_desc_group,
&if_group,
+ &cpumask_attr_group,
NULL,
};
diff --git a/arch/powerpc/perf/imc-pmu.c b/arch/powerpc/perf/imc-pmu.c
index a45d694a5d5d..62d0b54086f8 100644
--- a/arch/powerpc/perf/imc-pmu.c
+++ b/arch/powerpc/perf/imc-pmu.c
@@ -1289,7 +1289,7 @@ static int trace_imc_prepare_sample(struct trace_imc_data *mem,
header->misc = 0;
if (cpu_has_feature(CPU_FTR_ARCH_31)) {
- switch (IMC_TRACE_RECORD_VAL_HVPR(mem->val)) {
+ switch (IMC_TRACE_RECORD_VAL_HVPR(be64_to_cpu(READ_ONCE(mem->val)))) {
case 0:/* when MSR HV and PR not set in the trace-record */
header->misc |= PERF_RECORD_MISC_GUEST_KERNEL;
break;
@@ -1297,7 +1297,7 @@ static int trace_imc_prepare_sample(struct trace_imc_data *mem,
header->misc |= PERF_RECORD_MISC_GUEST_USER;
break;
case 2: /* MSR HV is 1 and PR is 0 */
- header->misc |= PERF_RECORD_MISC_HYPERVISOR;
+ header->misc |= PERF_RECORD_MISC_KERNEL;
break;
case 3: /* MSR HV is 1 and PR is 1 */
header->misc |= PERF_RECORD_MISC_USER;
diff --git a/arch/powerpc/perf/perf_regs.c b/arch/powerpc/perf/perf_regs.c
index a213a0aa5d25..8e53f2fc3fe0 100644
--- a/arch/powerpc/perf/perf_regs.c
+++ b/arch/powerpc/perf/perf_regs.c
@@ -13,9 +13,11 @@
#include <asm/ptrace.h>
#include <asm/perf_regs.h>
+u64 PERF_REG_EXTENDED_MASK;
+
#define PT_REGS_OFFSET(id, r) [id] = offsetof(struct pt_regs, r)
-#define REG_RESERVED (~((1ULL << PERF_REG_POWERPC_MAX) - 1))
+#define REG_RESERVED (~(PERF_REG_EXTENDED_MASK | PERF_REG_PMU_MASK))
static unsigned int pt_regs_offset[PERF_REG_POWERPC_MAX] = {
PT_REGS_OFFSET(PERF_REG_POWERPC_R0, gpr[0]),
@@ -69,10 +71,36 @@ static unsigned int pt_regs_offset[PERF_REG_POWERPC_MAX] = {
PT_REGS_OFFSET(PERF_REG_POWERPC_MMCRA, dsisr),
};
+/* Function to return the extended register values */
+static u64 get_ext_regs_value(int idx)
+{
+ switch (idx) {
+ case PERF_REG_POWERPC_MMCR0:
+ return mfspr(SPRN_MMCR0);
+ case PERF_REG_POWERPC_MMCR1:
+ return mfspr(SPRN_MMCR1);
+ case PERF_REG_POWERPC_MMCR2:
+ return mfspr(SPRN_MMCR2);
+#ifdef CONFIG_PPC64
+ case PERF_REG_POWERPC_MMCR3:
+ return mfspr(SPRN_MMCR3);
+ case PERF_REG_POWERPC_SIER2:
+ return mfspr(SPRN_SIER2);
+ case PERF_REG_POWERPC_SIER3:
+ return mfspr(SPRN_SIER3);
+#endif
+ default: return 0;
+ }
+}
+
u64 perf_reg_value(struct pt_regs *regs, int idx)
{
- if (WARN_ON_ONCE(idx >= PERF_REG_POWERPC_MAX))
- return 0;
+ u64 perf_reg_extended_max = PERF_REG_POWERPC_MAX;
+
+ if (cpu_has_feature(CPU_FTR_ARCH_31))
+ perf_reg_extended_max = PERF_REG_MAX_ISA_31;
+ else if (cpu_has_feature(CPU_FTR_ARCH_300))
+ perf_reg_extended_max = PERF_REG_MAX_ISA_300;
if (idx == PERF_REG_POWERPC_SIER &&
(IS_ENABLED(CONFIG_FSL_EMB_PERF_EVENT) ||
@@ -85,6 +113,16 @@ u64 perf_reg_value(struct pt_regs *regs, int idx)
IS_ENABLED(CONFIG_PPC32)))
return 0;
+ if (idx >= PERF_REG_POWERPC_MAX && idx < perf_reg_extended_max)
+ return get_ext_regs_value(idx);
+
+ /*
+ * If the idx is referring to value beyond the
+ * supported registers, return 0 with a warning
+ */
+ if (WARN_ON_ONCE(idx >= perf_reg_extended_max))
+ return 0;
+
return regs_get_register(regs, pt_regs_offset[idx]);
}
diff --git a/arch/powerpc/perf/power10-pmu.c b/arch/powerpc/perf/power10-pmu.c
index f7cff7f36a1c..83148656b524 100644
--- a/arch/powerpc/perf/power10-pmu.c
+++ b/arch/powerpc/perf/power10-pmu.c
@@ -87,6 +87,8 @@
#define POWER10_MMCRA_IFM3 0x00000000C0000000UL
#define POWER10_MMCRA_BHRB_MASK 0x00000000C0000000UL
+extern u64 PERF_REG_EXTENDED_MASK;
+
/* Table of alternatives, sorted by column 0 */
static const unsigned int power10_event_alternatives[][MAX_ALT] = {
{ PM_RUN_CYC_ALT, PM_RUN_CYC },
@@ -397,6 +399,7 @@ static struct power_pmu power10_pmu = {
.cache_events = &power10_cache_events,
.attr_groups = power10_pmu_attr_groups,
.bhrb_nr = 32,
+ .capabilities = PERF_PMU_CAP_EXTENDED_REGS,
};
int init_power10_pmu(void)
@@ -408,6 +411,9 @@ int init_power10_pmu(void)
strcmp(cur_cpu_spec->oprofile_cpu_type, "ppc64/power10"))
return -ENODEV;
+ /* Set the PERF_REG_EXTENDED_MASK here */
+ PERF_REG_EXTENDED_MASK = PERF_REG_PMU_MASK_31;
+
rc = register_power_pmu(&power10_pmu);
if (rc)
return rc;
diff --git a/arch/powerpc/perf/power9-pmu.c b/arch/powerpc/perf/power9-pmu.c
index 05dae38b969a..2a57e93a79dc 100644
--- a/arch/powerpc/perf/power9-pmu.c
+++ b/arch/powerpc/perf/power9-pmu.c
@@ -90,6 +90,8 @@ enum {
#define POWER9_MMCRA_IFM3 0x00000000C0000000UL
#define POWER9_MMCRA_BHRB_MASK 0x00000000C0000000UL
+extern u64 PERF_REG_EXTENDED_MASK;
+
/* Nasty Power9 specific hack */
#define PVR_POWER9_CUMULUS 0x00002000
@@ -434,6 +436,7 @@ static struct power_pmu power9_pmu = {
.cache_events = &power9_cache_events,
.attr_groups = power9_pmu_attr_groups,
.bhrb_nr = 32,
+ .capabilities = PERF_PMU_CAP_EXTENDED_REGS,
};
int init_power9_pmu(void)
@@ -457,6 +460,9 @@ int init_power9_pmu(void)
}
}
+ /* Set the PERF_REG_EXTENDED_MASK here */
+ PERF_REG_EXTENDED_MASK = PERF_REG_PMU_MASK_300;
+
rc = register_power_pmu(&power9_pmu);
if (rc)
return rc;
diff --git a/arch/powerpc/platforms/Kconfig.cputype b/arch/powerpc/platforms/Kconfig.cputype
index 87737ec86d39..1dc9d3c81872 100644
--- a/arch/powerpc/platforms/Kconfig.cputype
+++ b/arch/powerpc/platforms/Kconfig.cputype
@@ -36,7 +36,7 @@ config PPC_BOOK3S_6xx
select PPC_HAVE_PMU_SUPPORT
select PPC_HAVE_KUEP
select PPC_HAVE_KUAP
- select HAVE_ARCH_VMAP_STACK
+ select HAVE_ARCH_VMAP_STACK if !ADB_PMU
config PPC_BOOK3S_601
bool "PowerPC 601"
diff --git a/arch/powerpc/platforms/powernv/idle.c b/arch/powerpc/platforms/powernv/idle.c
index 77513a80cef9..345ab062b21a 100644
--- a/arch/powerpc/platforms/powernv/idle.c
+++ b/arch/powerpc/platforms/powernv/idle.c
@@ -1223,7 +1223,7 @@ static void __init pnv_probe_idle_states(void)
return;
}
- if (pvr_version_is(PVR_POWER9))
+ if (cpu_has_feature(CPU_FTR_ARCH_300))
pnv_power9_idle_init();
for (i = 0; i < nr_pnv_idle_states; i++)
diff --git a/arch/powerpc/platforms/powernv/pci-ioda.c b/arch/powerpc/platforms/powernv/pci-ioda.c
index c9c25fb0783c..023a4f987bb2 100644
--- a/arch/powerpc/platforms/powernv/pci-ioda.c
+++ b/arch/powerpc/platforms/powernv/pci-ioda.c
@@ -2705,7 +2705,7 @@ void pnv_pci_ioda2_release_pe_dma(struct pnv_ioda_pe *pe)
struct iommu_table *tbl = pe->table_group.tables[0];
int64_t rc;
- if (pe->dma_setup_done)
+ if (!pe->dma_setup_done)
return;
rc = pnv_pci_ioda2_unset_window(&pe->table_group, 0);
diff --git a/arch/powerpc/platforms/pseries/hotplug-cpu.c b/arch/powerpc/platforms/pseries/hotplug-cpu.c
index c6e0d8abf75e..7a974ed6b240 100644
--- a/arch/powerpc/platforms/pseries/hotplug-cpu.c
+++ b/arch/powerpc/platforms/pseries/hotplug-cpu.c
@@ -107,22 +107,28 @@ static int pseries_cpu_disable(void)
*/
static void pseries_cpu_die(unsigned int cpu)
{
- int tries;
int cpu_status = 1;
unsigned int pcpu = get_hard_smp_processor_id(cpu);
+ unsigned long timeout = jiffies + msecs_to_jiffies(120000);
- for (tries = 0; tries < 25; tries++) {
+ while (true) {
cpu_status = smp_query_cpu_stopped(pcpu);
if (cpu_status == QCSS_STOPPED ||
cpu_status == QCSS_HARDWARE_ERROR)
break;
- cpu_relax();
+ if (time_after(jiffies, timeout)) {
+ pr_warn("CPU %i (hwid %i) didn't die after 120 seconds\n",
+ cpu, pcpu);
+ timeout = jiffies + msecs_to_jiffies(120000);
+ }
+
+ cond_resched();
}
- if (cpu_status != 0) {
- printk("Querying DEAD? cpu %i (%i) shows %i\n",
- cpu, pcpu, cpu_status);
+ if (cpu_status == QCSS_HARDWARE_ERROR) {
+ pr_warn("CPU %i (hwid %i) reported error while dying\n",
+ cpu, pcpu);
}
/* Isolation and deallocation are definitely done by
diff --git a/arch/powerpc/platforms/pseries/ras.c b/arch/powerpc/platforms/pseries/ras.c
index f3736fcd98fc..13c86a292c6d 100644
--- a/arch/powerpc/platforms/pseries/ras.c
+++ b/arch/powerpc/platforms/pseries/ras.c
@@ -184,7 +184,6 @@ static void handle_system_shutdown(char event_modifier)
case EPOW_SHUTDOWN_ON_UPS:
pr_emerg("Loss of system power detected. System is running on"
" UPS/battery. Check RTAS error log for details\n");
- orderly_poweroff(true);
break;
case EPOW_SHUTDOWN_LOSS_OF_CRITICAL_FUNCTIONS: