aboutsummaryrefslogtreecommitdiffstatshomepage
diff options
context:
space:
mode:
authorMichael Ellerman <mpe@ellerman.id.au>2021-06-17 16:51:38 +1000
committerMichael Ellerman <mpe@ellerman.id.au>2021-06-17 16:51:38 +1000
commit3c53642324f526c0aba411bf8e6cf2ab2471192a (patch)
treef603ab1fa6c3236618f721806327d4f21a8a1f83
parentpowerpc/mm/book3s64: Fix possible build error (diff)
parentKVM: PPC: Book3S HV: remove ISA v3.0 and v3.1 support from P7/8 path (diff)
downloadwireguard-linux-3c53642324f526c0aba411bf8e6cf2ab2471192a.tar.xz
wireguard-linux-3c53642324f526c0aba411bf8e6cf2ab2471192a.zip
Merge branch 'topic/ppc-kvm' into next
Merge some powerpc KVM patches from our topic branch. In particular this brings in Nick's big series rewriting parts of the guest entry/exit path in C. Conflicts: arch/powerpc/kernel/security.c arch/powerpc/kvm/book3s_hv_rmhandlers.S
-rw-r--r--arch/powerpc/include/asm/asm-prototypes.h3
-rw-r--r--arch/powerpc/include/asm/exception-64s.h13
-rw-r--r--arch/powerpc/include/asm/kvm_asm.h1
-rw-r--r--arch/powerpc/include/asm/kvm_book3s_64.h8
-rw-r--r--arch/powerpc/include/asm/kvm_host.h8
-rw-r--r--arch/powerpc/include/asm/kvm_ppc.h18
-rw-r--r--arch/powerpc/include/asm/mmu_context.h6
-rw-r--r--arch/powerpc/include/asm/time.h12
-rw-r--r--arch/powerpc/kernel/asm-offsets.c1
-rw-r--r--arch/powerpc/kernel/exceptions-64s.S250
-rw-r--r--arch/powerpc/kernel/security.c5
-rw-r--r--arch/powerpc/kernel/time.c10
-rw-r--r--arch/powerpc/kvm/Makefile2
-rw-r--r--arch/powerpc/kvm/book3s.c17
-rw-r--r--arch/powerpc/kvm/book3s_64_entry.S416
-rw-r--r--arch/powerpc/kvm/book3s_64_vio_hv.c12
-rw-r--r--arch/powerpc/kvm/book3s_hv.c696
-rw-r--r--arch/powerpc/kvm/book3s_hv_builtin.c135
-rw-r--r--arch/powerpc/kvm/book3s_hv_interrupts.S9
-rw-r--r--arch/powerpc/kvm/book3s_hv_p9_entry.c508
-rw-r--r--arch/powerpc/kvm/book3s_hv_rm_mmu.c12
-rw-r--r--arch/powerpc/kvm/book3s_hv_rm_xics.c15
-rw-r--r--arch/powerpc/kvm/book3s_hv_rmhandlers.S684
-rw-r--r--arch/powerpc/kvm/book3s_segment.S3
-rw-r--r--arch/powerpc/kvm/book3s_xive.c113
-rw-r--r--arch/powerpc/kvm/book3s_xive.h7
-rw-r--r--arch/powerpc/kvm/book3s_xive_native.c10
-rw-r--r--arch/powerpc/mm/book3s64/radix_pgtable.c27
-rw-r--r--arch/powerpc/mm/book3s64/radix_tlb.c46
-rw-r--r--arch/powerpc/mm/mmu_context.c4
-rw-r--r--arch/powerpc/platforms/powernv/idle.c52
31 files changed, 1586 insertions, 1517 deletions
diff --git a/arch/powerpc/include/asm/asm-prototypes.h b/arch/powerpc/include/asm/asm-prototypes.h
index 1c7b75834e04..02ee6f5ac9fe 100644
--- a/arch/powerpc/include/asm/asm-prototypes.h
+++ b/arch/powerpc/include/asm/asm-prototypes.h
@@ -120,6 +120,7 @@ extern s32 patch__call_flush_branch_caches3;
extern s32 patch__flush_count_cache_return;
extern s32 patch__flush_link_stack_return;
extern s32 patch__call_kvm_flush_link_stack;
+extern s32 patch__call_kvm_flush_link_stack_p9;
extern s32 patch__memset_nocache, patch__memcpy_nocache;
extern long flush_branch_caches;
@@ -140,7 +141,7 @@ void kvmhv_load_host_pmu(void);
void kvmhv_save_guest_pmu(struct kvm_vcpu *vcpu, bool pmu_in_use);
void kvmhv_load_guest_pmu(struct kvm_vcpu *vcpu);
-int __kvmhv_vcpu_entry_p9(struct kvm_vcpu *vcpu);
+void kvmppc_p9_enter_guest(struct kvm_vcpu *vcpu);
long kvmppc_h_set_dabr(struct kvm_vcpu *vcpu, unsigned long dabr);
long kvmppc_h_set_xdabr(struct kvm_vcpu *vcpu, unsigned long dabr,
diff --git a/arch/powerpc/include/asm/exception-64s.h b/arch/powerpc/include/asm/exception-64s.h
index c1a8aac01cf9..bb6f78fcf981 100644
--- a/arch/powerpc/include/asm/exception-64s.h
+++ b/arch/powerpc/include/asm/exception-64s.h
@@ -35,6 +35,19 @@
/* PACA save area size in u64 units (exgen, exmc, etc) */
#define EX_SIZE 10
+/* PACA save area offsets */
+#define EX_R9 0
+#define EX_R10 8
+#define EX_R11 16
+#define EX_R12 24
+#define EX_R13 32
+#define EX_DAR 40
+#define EX_DSISR 48
+#define EX_CCR 52
+#define EX_CFAR 56
+#define EX_PPR 64
+#define EX_CTR 72
+
/*
* maximum recursive depth of MCE exceptions
*/
diff --git a/arch/powerpc/include/asm/kvm_asm.h b/arch/powerpc/include/asm/kvm_asm.h
index a3633560493b..fbbf3cec92e9 100644
--- a/arch/powerpc/include/asm/kvm_asm.h
+++ b/arch/powerpc/include/asm/kvm_asm.h
@@ -147,6 +147,7 @@
#define KVM_GUEST_MODE_SKIP 2
#define KVM_GUEST_MODE_GUEST_HV 3
#define KVM_GUEST_MODE_HOST_HV 4
+#define KVM_GUEST_MODE_HV_P9 5 /* ISA >= v3.0 path */
#define KVM_INST_FETCH_FAILED -1
diff --git a/arch/powerpc/include/asm/kvm_book3s_64.h b/arch/powerpc/include/asm/kvm_book3s_64.h
index 9bb9bb370b53..eaf3a562bf1e 100644
--- a/arch/powerpc/include/asm/kvm_book3s_64.h
+++ b/arch/powerpc/include/asm/kvm_book3s_64.h
@@ -153,10 +153,18 @@ static inline bool kvmhv_vcpu_is_radix(struct kvm_vcpu *vcpu)
return radix;
}
+int kvmhv_vcpu_entry_p9(struct kvm_vcpu *vcpu, u64 time_limit, unsigned long lpcr);
+
#define KVM_DEFAULT_HPT_ORDER 24 /* 16MB HPT by default */
#endif
/*
+ * Invalid HDSISR value which is used to indicate when HW has not set the reg.
+ * Used to work around an errata.
+ */
+#define HDSISR_CANARY 0x7fff
+
+/*
* We use a lock bit in HPTE dword 0 to synchronize updates and
* accesses to each HPTE, and another bit to indicate non-present
* HPTEs.
diff --git a/arch/powerpc/include/asm/kvm_host.h b/arch/powerpc/include/asm/kvm_host.h
index 1e83359f286b..6904ce9e8190 100644
--- a/arch/powerpc/include/asm/kvm_host.h
+++ b/arch/powerpc/include/asm/kvm_host.h
@@ -297,7 +297,6 @@ struct kvm_arch {
u8 fwnmi_enabled;
u8 secure_guest;
u8 svm_enabled;
- bool threads_indep;
bool nested_enable;
bool dawr1_enabled;
pgd_t *pgtable;
@@ -683,7 +682,12 @@ struct kvm_vcpu_arch {
ulong fault_dar;
u32 fault_dsisr;
unsigned long intr_msr;
- ulong fault_gpa; /* guest real address of page fault (POWER9) */
+ /*
+ * POWER9 and later: fault_gpa contains the guest real address of page
+ * fault for a radix guest, or segment descriptor (equivalent to result
+ * from slbmfev of SLB entry that translated the EA) for hash guests.
+ */
+ ulong fault_gpa;
#endif
#ifdef CONFIG_BOOKE
diff --git a/arch/powerpc/include/asm/kvm_ppc.h b/arch/powerpc/include/asm/kvm_ppc.h
index 5bf8ae9bb2cc..2d88944f9f34 100644
--- a/arch/powerpc/include/asm/kvm_ppc.h
+++ b/arch/powerpc/include/asm/kvm_ppc.h
@@ -129,6 +129,7 @@ extern void kvmppc_core_vcpu_put(struct kvm_vcpu *vcpu);
extern int kvmppc_core_prepare_to_enter(struct kvm_vcpu *vcpu);
extern int kvmppc_core_pending_dec(struct kvm_vcpu *vcpu);
extern void kvmppc_core_queue_machine_check(struct kvm_vcpu *vcpu, ulong flags);
+extern void kvmppc_core_queue_syscall(struct kvm_vcpu *vcpu);
extern void kvmppc_core_queue_program(struct kvm_vcpu *vcpu, ulong flags);
extern void kvmppc_core_queue_fpunavail(struct kvm_vcpu *vcpu);
extern void kvmppc_core_queue_vec_unavail(struct kvm_vcpu *vcpu);
@@ -606,6 +607,7 @@ extern void kvmppc_free_pimap(struct kvm *kvm);
extern int kvmppc_xics_rm_complete(struct kvm_vcpu *vcpu, u32 hcall);
extern void kvmppc_xics_free_icp(struct kvm_vcpu *vcpu);
extern int kvmppc_xics_hcall(struct kvm_vcpu *vcpu, u32 cmd);
+extern int kvmppc_xive_xics_hcall(struct kvm_vcpu *vcpu, u32 req);
extern u64 kvmppc_xics_get_icp(struct kvm_vcpu *vcpu);
extern int kvmppc_xics_set_icp(struct kvm_vcpu *vcpu, u64 icpval);
extern int kvmppc_xics_connect_vcpu(struct kvm_device *dev,
@@ -638,6 +640,8 @@ static inline int kvmppc_xics_enabled(struct kvm_vcpu *vcpu)
static inline void kvmppc_xics_free_icp(struct kvm_vcpu *vcpu) { }
static inline int kvmppc_xics_hcall(struct kvm_vcpu *vcpu, u32 cmd)
{ return 0; }
+static inline int kvmppc_xive_xics_hcall(struct kvm_vcpu *vcpu, u32 req)
+ { return 0; }
#endif
#ifdef CONFIG_KVM_XIVE
@@ -655,8 +659,6 @@ extern int kvmppc_xive_get_xive(struct kvm *kvm, u32 irq, u32 *server,
u32 *priority);
extern int kvmppc_xive_int_on(struct kvm *kvm, u32 irq);
extern int kvmppc_xive_int_off(struct kvm *kvm, u32 irq);
-extern void kvmppc_xive_init_module(void);
-extern void kvmppc_xive_exit_module(void);
extern int kvmppc_xive_connect_vcpu(struct kvm_device *dev,
struct kvm_vcpu *vcpu, u32 cpu);
@@ -671,6 +673,8 @@ extern int kvmppc_xive_set_icp(struct kvm_vcpu *vcpu, u64 icpval);
extern int kvmppc_xive_set_irq(struct kvm *kvm, int irq_source_id, u32 irq,
int level, bool line_status);
extern void kvmppc_xive_push_vcpu(struct kvm_vcpu *vcpu);
+extern void kvmppc_xive_pull_vcpu(struct kvm_vcpu *vcpu);
+extern void kvmppc_xive_rearm_escalation(struct kvm_vcpu *vcpu);
static inline int kvmppc_xive_enabled(struct kvm_vcpu *vcpu)
{
@@ -680,8 +684,6 @@ static inline int kvmppc_xive_enabled(struct kvm_vcpu *vcpu)
extern int kvmppc_xive_native_connect_vcpu(struct kvm_device *dev,
struct kvm_vcpu *vcpu, u32 cpu);
extern void kvmppc_xive_native_cleanup_vcpu(struct kvm_vcpu *vcpu);
-extern void kvmppc_xive_native_init_module(void);
-extern void kvmppc_xive_native_exit_module(void);
extern int kvmppc_xive_native_get_vp(struct kvm_vcpu *vcpu,
union kvmppc_one_reg *val);
extern int kvmppc_xive_native_set_vp(struct kvm_vcpu *vcpu,
@@ -695,8 +697,6 @@ static inline int kvmppc_xive_get_xive(struct kvm *kvm, u32 irq, u32 *server,
u32 *priority) { return -1; }
static inline int kvmppc_xive_int_on(struct kvm *kvm, u32 irq) { return -1; }
static inline int kvmppc_xive_int_off(struct kvm *kvm, u32 irq) { return -1; }
-static inline void kvmppc_xive_init_module(void) { }
-static inline void kvmppc_xive_exit_module(void) { }
static inline int kvmppc_xive_connect_vcpu(struct kvm_device *dev,
struct kvm_vcpu *vcpu, u32 cpu) { return -EBUSY; }
@@ -711,14 +711,14 @@ static inline int kvmppc_xive_set_icp(struct kvm_vcpu *vcpu, u64 icpval) { retur
static inline int kvmppc_xive_set_irq(struct kvm *kvm, int irq_source_id, u32 irq,
int level, bool line_status) { return -ENODEV; }
static inline void kvmppc_xive_push_vcpu(struct kvm_vcpu *vcpu) { }
+static inline void kvmppc_xive_pull_vcpu(struct kvm_vcpu *vcpu) { }
+static inline void kvmppc_xive_rearm_escalation(struct kvm_vcpu *vcpu) { }
static inline int kvmppc_xive_enabled(struct kvm_vcpu *vcpu)
{ return 0; }
static inline int kvmppc_xive_native_connect_vcpu(struct kvm_device *dev,
struct kvm_vcpu *vcpu, u32 cpu) { return -EBUSY; }
static inline void kvmppc_xive_native_cleanup_vcpu(struct kvm_vcpu *vcpu) { }
-static inline void kvmppc_xive_native_init_module(void) { }
-static inline void kvmppc_xive_native_exit_module(void) { }
static inline int kvmppc_xive_native_get_vp(struct kvm_vcpu *vcpu,
union kvmppc_one_reg *val)
{ return 0; }
@@ -754,7 +754,7 @@ long kvmppc_rm_h_stuff_tce(struct kvm_vcpu *vcpu,
unsigned long tce_value, unsigned long npages);
long int kvmppc_rm_h_confer(struct kvm_vcpu *vcpu, int target,
unsigned int yield_count);
-long kvmppc_h_random(struct kvm_vcpu *vcpu);
+long kvmppc_rm_h_random(struct kvm_vcpu *vcpu);
void kvmhv_commence_exit(int trap);
void kvmppc_realmode_machine_check(struct kvm_vcpu *vcpu);
void kvmppc_subcore_enter_guest(void);
diff --git a/arch/powerpc/include/asm/mmu_context.h b/arch/powerpc/include/asm/mmu_context.h
index ef6df2681582..1450ddf95691 100644
--- a/arch/powerpc/include/asm/mmu_context.h
+++ b/arch/powerpc/include/asm/mmu_context.h
@@ -121,12 +121,6 @@ static inline bool need_extra_context(struct mm_struct *mm, unsigned long ea)
}
#endif
-#if defined(CONFIG_KVM_BOOK3S_HV_POSSIBLE) && defined(CONFIG_PPC_RADIX_MMU)
-extern void radix_kvm_prefetch_workaround(struct mm_struct *mm);
-#else
-static inline void radix_kvm_prefetch_workaround(struct mm_struct *mm) { }
-#endif
-
extern void switch_cop(struct mm_struct *next);
extern int use_cop(unsigned long acop, struct mm_struct *mm);
extern void drop_cop(unsigned long acop, struct mm_struct *mm);
diff --git a/arch/powerpc/include/asm/time.h b/arch/powerpc/include/asm/time.h
index 8dd3cdb25338..8c2c3dd4ddba 100644
--- a/arch/powerpc/include/asm/time.h
+++ b/arch/powerpc/include/asm/time.h
@@ -97,6 +97,18 @@ extern void div128_by_32(u64 dividend_high, u64 dividend_low,
extern void secondary_cpu_time_init(void);
extern void __init time_init(void);
+#ifdef CONFIG_PPC64
+static inline unsigned long test_irq_work_pending(void)
+{
+ unsigned long x;
+
+ asm volatile("lbz %0,%1(13)"
+ : "=r" (x)
+ : "i" (offsetof(struct paca_struct, irq_work_pending)));
+ return x;
+}
+#endif
+
DECLARE_PER_CPU(u64, decrementers_next_tb);
/* Convert timebase ticks to nanoseconds */
diff --git a/arch/powerpc/kernel/asm-offsets.c b/arch/powerpc/kernel/asm-offsets.c
index 8599cf4f799c..31881ef3641d 100644
--- a/arch/powerpc/kernel/asm-offsets.c
+++ b/arch/powerpc/kernel/asm-offsets.c
@@ -473,7 +473,6 @@ int main(void)
OFFSET(VCPU_SLB_NR, kvm_vcpu, arch.slb_nr);
OFFSET(VCPU_FAULT_DSISR, kvm_vcpu, arch.fault_dsisr);
OFFSET(VCPU_FAULT_DAR, kvm_vcpu, arch.fault_dar);
- OFFSET(VCPU_FAULT_GPA, kvm_vcpu, arch.fault_gpa);
OFFSET(VCPU_INTR_MSR, kvm_vcpu, arch.intr_msr);
OFFSET(VCPU_LAST_INST, kvm_vcpu, arch.last_inst);
OFFSET(VCPU_TRAP, kvm_vcpu, arch.trap);
diff --git a/arch/powerpc/kernel/exceptions-64s.S b/arch/powerpc/kernel/exceptions-64s.S
index fa8e52a0239e..f7fc6e078d4e 100644
--- a/arch/powerpc/kernel/exceptions-64s.S
+++ b/arch/powerpc/kernel/exceptions-64s.S
@@ -21,22 +21,6 @@
#include <asm/feature-fixups.h>
#include <asm/kup.h>
-/* PACA save area offsets (exgen, exmc, etc) */
-#define EX_R9 0
-#define EX_R10 8
-#define EX_R11 16
-#define EX_R12 24
-#define EX_R13 32
-#define EX_DAR 40
-#define EX_DSISR 48
-#define EX_CCR 52
-#define EX_CFAR 56
-#define EX_PPR 64
-#define EX_CTR 72
-.if EX_SIZE != 10
- .error "EX_SIZE is wrong"
-.endif
-
/*
* Following are fixed section helper macros.
*
@@ -133,7 +117,6 @@ name:
#define IBRANCH_TO_COMMON .L_IBRANCH_TO_COMMON_\name\() /* ENTRY branch to common */
#define IREALMODE_COMMON .L_IREALMODE_COMMON_\name\() /* Common runs in realmode */
#define IMASK .L_IMASK_\name\() /* IRQ soft-mask bit */
-#define IKVM_SKIP .L_IKVM_SKIP_\name\() /* Generate KVM skip handler */
#define IKVM_REAL .L_IKVM_REAL_\name\() /* Real entry tests KVM */
#define __IKVM_REAL(name) .L_IKVM_REAL_ ## name
#define IKVM_VIRT .L_IKVM_VIRT_\name\() /* Virt entry tests KVM */
@@ -190,9 +173,6 @@ do_define_int n
.ifndef IMASK
IMASK=0
.endif
- .ifndef IKVM_SKIP
- IKVM_SKIP=0
- .endif
.ifndef IKVM_REAL
IKVM_REAL=0
.endif
@@ -207,8 +187,6 @@ do_define_int n
.endif
.endm
-#ifdef CONFIG_KVM_BOOK3S_64_HANDLER
-#ifdef CONFIG_KVM_BOOK3S_HV_POSSIBLE
/*
* All interrupts which set HSRR registers, as well as SRESET and MCE and
* syscall when invoked with "sc 1" switch to MSR[HV]=1 (HVMODE) to be taken,
@@ -238,88 +216,28 @@ do_define_int n
/*
* If an interrupt is taken while a guest is running, it is immediately routed
- * to KVM to handle. If both HV and PR KVM arepossible, KVM interrupts go first
- * to kvmppc_interrupt_hv, which handles the PR guest case.
+ * to KVM to handle.
*/
-#define kvmppc_interrupt kvmppc_interrupt_hv
-#else
-#define kvmppc_interrupt kvmppc_interrupt_pr
-#endif
-.macro KVMTEST name
+.macro KVMTEST name handler
+#ifdef CONFIG_KVM_BOOK3S_64_HANDLER
lbz r10,HSTATE_IN_GUEST(r13)
cmpwi r10,0
- bne \name\()_kvm
-.endm
-
-.macro GEN_KVM name
- .balign IFETCH_ALIGN_BYTES
-\name\()_kvm:
-
- .if IKVM_SKIP
- cmpwi r10,KVM_GUEST_MODE_SKIP
- beq 89f
- .else
-BEGIN_FTR_SECTION
- ld r10,IAREA+EX_CFAR(r13)
- std r10,HSTATE_CFAR(r13)
-END_FTR_SECTION_IFSET(CPU_FTR_CFAR)
- .endif
-
- ld r10,IAREA+EX_CTR(r13)
- mtctr r10
-BEGIN_FTR_SECTION
- ld r10,IAREA+EX_PPR(r13)
- std r10,HSTATE_PPR(r13)
-END_FTR_SECTION_IFSET(CPU_FTR_HAS_PPR)
- ld r11,IAREA+EX_R11(r13)
- ld r12,IAREA+EX_R12(r13)
- std r12,HSTATE_SCRATCH0(r13)
- sldi r12,r9,32
- ld r9,IAREA+EX_R9(r13)
- ld r10,IAREA+EX_R10(r13)
/* HSRR variants have the 0x2 bit added to their trap number */
.if IHSRR_IF_HVMODE
BEGIN_FTR_SECTION
- ori r12,r12,(IVEC + 0x2)
- FTR_SECTION_ELSE
- ori r12,r12,(IVEC)
- ALT_FTR_SECTION_END_IFSET(CPU_FTR_HVMODE | CPU_FTR_ARCH_206)
- .elseif IHSRR
- ori r12,r12,(IVEC+ 0x2)
- .else
- ori r12,r12,(IVEC)
- .endif
- b kvmppc_interrupt
-
- .if IKVM_SKIP
-89: mtocrf 0x80,r9
- ld r10,IAREA+EX_CTR(r13)
- mtctr r10
- ld r9,IAREA+EX_R9(r13)
- ld r10,IAREA+EX_R10(r13)
- ld r11,IAREA+EX_R11(r13)
- ld r12,IAREA+EX_R12(r13)
- .if IHSRR_IF_HVMODE
- BEGIN_FTR_SECTION
- b kvmppc_skip_Hinterrupt
+ li r10,(IVEC + 0x2)
FTR_SECTION_ELSE
- b kvmppc_skip_interrupt
+ li r10,(IVEC)
ALT_FTR_SECTION_END_IFSET(CPU_FTR_HVMODE | CPU_FTR_ARCH_206)
.elseif IHSRR
- b kvmppc_skip_Hinterrupt
+ li r10,(IVEC + 0x2)
.else
- b kvmppc_skip_interrupt
+ li r10,(IVEC)
.endif
- .endif
-.endm
-
-#else
-.macro KVMTEST name
-.endm
-.macro GEN_KVM name
-.endm
+ bne \handler
#endif
+.endm
/*
* This is the BOOK3S interrupt entry code macro.
@@ -461,7 +379,7 @@ END_FTR_SECTION_IFSET(CPU_FTR_CFAR)
DEFINE_FIXED_SYMBOL(\name\()_common_real)
\name\()_common_real:
.if IKVM_REAL
- KVMTEST \name
+ KVMTEST \name kvm_interrupt
.endif
ld r10,PACAKMSR(r13) /* get MSR value for kernel */
@@ -484,7 +402,7 @@ DEFINE_FIXED_SYMBOL(\name\()_common_real)
DEFINE_FIXED_SYMBOL(\name\()_common_virt)
\name\()_common_virt:
.if IKVM_VIRT
- KVMTEST \name
+ KVMTEST \name kvm_interrupt
1:
.endif
.endif /* IVIRT */
@@ -498,7 +416,7 @@ DEFINE_FIXED_SYMBOL(\name\()_common_virt)
DEFINE_FIXED_SYMBOL(\name\()_common_real)
\name\()_common_real:
.if IKVM_REAL
- KVMTEST \name
+ KVMTEST \name kvm_interrupt
.endif
.endm
@@ -1000,8 +918,6 @@ EXC_COMMON_BEGIN(system_reset_common)
EXCEPTION_RESTORE_REGS
RFI_TO_USER_OR_KERNEL
- GEN_KVM system_reset
-
/**
* Interrupt 0x200 - Machine Check Interrupt (MCE).
@@ -1070,7 +986,6 @@ INT_DEFINE_BEGIN(machine_check)
ISET_RI=0
IDAR=1
IDSISR=1
- IKVM_SKIP=1
IKVM_REAL=1
INT_DEFINE_END(machine_check)
@@ -1166,7 +1081,7 @@ END_FTR_SECTION_IFSET(CPU_FTR_HVMODE | CPU_FTR_ARCH_206)
/*
* Check if we are coming from guest. If yes, then run the normal
* exception handler which will take the
- * machine_check_kvm->kvmppc_interrupt branch to deliver the MC event
+ * machine_check_kvm->kvm_interrupt branch to deliver the MC event
* to guest.
*/
lbz r11,HSTATE_IN_GUEST(r13)
@@ -1236,8 +1151,6 @@ EXC_COMMON_BEGIN(machine_check_common)
bl machine_check_exception
b interrupt_return
- GEN_KVM machine_check
-
#ifdef CONFIG_PPC_P7_NAP
/*
@@ -1342,7 +1255,6 @@ INT_DEFINE_BEGIN(data_access)
IVEC=0x300
IDAR=1
IDSISR=1
- IKVM_SKIP=1
IKVM_REAL=1
INT_DEFINE_END(data_access)
@@ -1373,8 +1285,6 @@ ALT_MMU_FTR_SECTION_END_IFCLR(MMU_FTR_TYPE_RADIX)
REST_NVGPRS(r1)
b interrupt_return
- GEN_KVM data_access
-
/**
* Interrupt 0x380 - Data Segment Interrupt (DSLB).
@@ -1396,7 +1306,6 @@ ALT_MMU_FTR_SECTION_END_IFCLR(MMU_FTR_TYPE_RADIX)
INT_DEFINE_BEGIN(data_access_slb)
IVEC=0x380
IDAR=1
- IKVM_SKIP=1
IKVM_REAL=1
INT_DEFINE_END(data_access_slb)
@@ -1425,8 +1334,6 @@ ALT_MMU_FTR_SECTION_END_IFCLR(MMU_FTR_TYPE_RADIX)
bl do_bad_slb_fault
b interrupt_return
- GEN_KVM data_access_slb
-
/**
* Interrupt 0x400 - Instruction Storage Interrupt (ISI).
@@ -1463,8 +1370,6 @@ MMU_FTR_SECTION_ELSE
ALT_MMU_FTR_SECTION_END_IFCLR(MMU_FTR_TYPE_RADIX)
b interrupt_return
- GEN_KVM instruction_access
-
/**
* Interrupt 0x480 - Instruction Segment Interrupt (ISLB).
@@ -1509,8 +1414,6 @@ ALT_MMU_FTR_SECTION_END_IFCLR(MMU_FTR_TYPE_RADIX)
bl do_bad_slb_fault
b interrupt_return
- GEN_KVM instruction_access_slb
-
/**
* Interrupt 0x500 - External Interrupt.
@@ -1555,8 +1458,6 @@ EXC_COMMON_BEGIN(hardware_interrupt_common)
bl do_IRQ
b interrupt_return
- GEN_KVM hardware_interrupt
-
/**
* Interrupt 0x600 - Alignment Interrupt
@@ -1584,8 +1485,6 @@ EXC_COMMON_BEGIN(alignment_common)
REST_NVGPRS(r1) /* instruction emulation may change GPRs */
b interrupt_return
- GEN_KVM alignment
-
/**
* Interrupt 0x700 - Program Interrupt (program check).
@@ -1693,8 +1592,6 @@ EXC_COMMON_BEGIN(program_check_common)
REST_NVGPRS(r1) /* instruction emulation may change GPRs */
b interrupt_return
- GEN_KVM program_check
-
/*
* Interrupt 0x800 - Floating-Point Unavailable Interrupt.
@@ -1744,8 +1641,6 @@ END_FTR_SECTION_IFSET(CPU_FTR_TM)
b interrupt_return
#endif
- GEN_KVM fp_unavailable
-
/**
* Interrupt 0x900 - Decrementer Interrupt.
@@ -1784,8 +1679,6 @@ EXC_COMMON_BEGIN(decrementer_common)
bl timer_interrupt
b interrupt_return
- GEN_KVM decrementer
-
/**
* Interrupt 0x980 - Hypervisor Decrementer Interrupt.
@@ -1831,8 +1724,6 @@ EXC_COMMON_BEGIN(hdecrementer_common)
ld r13,PACA_EXGEN+EX_R13(r13)
HRFI_TO_KERNEL
- GEN_KVM hdecrementer
-
/**
* Interrupt 0xa00 - Directed Privileged Doorbell Interrupt.
@@ -1872,8 +1763,6 @@ EXC_COMMON_BEGIN(doorbell_super_common)
#endif
b interrupt_return
- GEN_KVM doorbell_super
-
EXC_REAL_NONE(0xb00, 0x100)
EXC_VIRT_NONE(0x4b00, 0x100)
@@ -1923,7 +1812,7 @@ INT_DEFINE_END(system_call)
GET_PACA(r13)
std r10,PACA_EXGEN+EX_R10(r13)
INTERRUPT_TO_KERNEL
- KVMTEST system_call /* uses r10, branch to system_call_kvm */
+ KVMTEST system_call kvm_hcall /* uses r10, branch to kvm_hcall */
mfctr r9
#else
mr r9,r13
@@ -1979,14 +1868,16 @@ EXC_VIRT_BEGIN(system_call, 0x4c00, 0x100)
EXC_VIRT_END(system_call, 0x4c00, 0x100)
#ifdef CONFIG_KVM_BOOK3S_64_HANDLER
-TRAMP_REAL_BEGIN(system_call_kvm)
- /*
- * This is a hcall, so register convention is as above, with these
- * differences:
- * r13 = PACA
- * ctr = orig r13
- * orig r10 saved in PACA
- */
+TRAMP_REAL_BEGIN(kvm_hcall)
+ std r9,PACA_EXGEN+EX_R9(r13)
+ std r11,PACA_EXGEN+EX_R11(r13)
+ std r12,PACA_EXGEN+EX_R12(r13)
+ mfcr r9
+ mfctr r10
+ std r10,PACA_EXGEN+EX_R13(r13)
+ li r10,0
+ std r10,PACA_EXGEN+EX_CFAR(r13)
+ std r10,PACA_EXGEN+EX_CTR(r13)
/*
* Save the PPR (on systems that support it) before changing to
* HMT_MEDIUM. That allows the KVM code to save that value into the
@@ -1994,31 +1885,24 @@ TRAMP_REAL_BEGIN(system_call_kvm)
*/
BEGIN_FTR_SECTION
mfspr r10,SPRN_PPR
- std r10,HSTATE_PPR(r13)
+ std r10,PACA_EXGEN+EX_PPR(r13)
END_FTR_SECTION_IFSET(CPU_FTR_HAS_PPR)
+
HMT_MEDIUM
- mfctr r10
- SET_SCRATCH0(r10)
- mfcr r10
- std r12,HSTATE_SCRATCH0(r13)
- sldi r12,r10,32
- ori r12,r12,0xc00
+
#ifdef CONFIG_RELOCATABLE
/*
- * Requires __LOAD_FAR_HANDLER beause kvmppc_interrupt lives
+ * Requires __LOAD_FAR_HANDLER beause kvmppc_hcall lives
* outside the head section.
*/
- __LOAD_FAR_HANDLER(r10, kvmppc_interrupt)
+ __LOAD_FAR_HANDLER(r10, kvmppc_hcall)
mtctr r10
- ld r10,PACA_EXGEN+EX_R10(r13)
bctr
#else
- ld r10,PACA_EXGEN+EX_R10(r13)
- b kvmppc_interrupt
+ b kvmppc_hcall
#endif
#endif
-
/**
* Interrupt 0xd00 - Trace Interrupt.
* This is a synchronous interrupt in response to instruction step or
@@ -2043,8 +1927,6 @@ EXC_COMMON_BEGIN(single_step_common)
bl single_step_exception
b interrupt_return
- GEN_KVM single_step
-
/**
* Interrupt 0xe00 - Hypervisor Data Storage Interrupt (HDSI).
@@ -2063,7 +1945,6 @@ INT_DEFINE_BEGIN(h_data_storage)
IHSRR=1
IDAR=1
IDSISR=1
- IKVM_SKIP=1
IKVM_REAL=1
IKVM_VIRT=1
INT_DEFINE_END(h_data_storage)
@@ -2084,8 +1965,6 @@ MMU_FTR_SECTION_ELSE
ALT_MMU_FTR_SECTION_END_IFSET(MMU_FTR_TYPE_RADIX)
b interrupt_return
- GEN_KVM h_data_storage
-
/**
* Interrupt 0xe20 - Hypervisor Instruction Storage Interrupt (HISI).
@@ -2111,8 +1990,6 @@ EXC_COMMON_BEGIN(h_instr_storage_common)
bl unknown_exception
b interrupt_return
- GEN_KVM h_instr_storage
-
/**
* Interrupt 0xe40 - Hypervisor Emulation Assistance Interrupt.
@@ -2137,8 +2014,6 @@ EXC_COMMON_BEGIN(emulation_assist_common)
REST_NVGPRS(r1) /* instruction emulation may change GPRs */
b interrupt_return
- GEN_KVM emulation_assist
-
/**
* Interrupt 0xe60 - Hypervisor Maintenance Interrupt (HMI).
@@ -2210,16 +2085,12 @@ EXC_COMMON_BEGIN(hmi_exception_early_common)
EXCEPTION_RESTORE_REGS hsrr=1
GEN_INT_ENTRY hmi_exception, virt=0
- GEN_KVM hmi_exception_early
-
EXC_COMMON_BEGIN(hmi_exception_common)
GEN_COMMON hmi_exception
addi r3,r1,STACK_FRAME_OVERHEAD
bl handle_hmi_exception
b interrupt_return
- GEN_KVM hmi_exception
-
/**
* Interrupt 0xe80 - Directed Hypervisor Doorbell Interrupt.
@@ -2250,8 +2121,6 @@ EXC_COMMON_BEGIN(h_doorbell_common)
#endif
b interrupt_return
- GEN_KVM h_doorbell
-
/**
* Interrupt 0xea0 - Hypervisor Virtualization Interrupt.
@@ -2278,8 +2147,6 @@ EXC_COMMON_BEGIN(h_virt_irq_common)
bl do_IRQ
b interrupt_return
- GEN_KVM h_virt_irq
-
EXC_REAL_NONE(0xec0, 0x20)
EXC_VIRT_NONE(0x4ec0, 0x20)
@@ -2323,8 +2190,6 @@ EXC_COMMON_BEGIN(performance_monitor_common)
bl performance_monitor_exception
b interrupt_return
- GEN_KVM performance_monitor
-
/**
* Interrupt 0xf20 - Vector Unavailable Interrupt.
@@ -2374,8 +2239,6 @@ END_FTR_SECTION_IFSET(CPU_FTR_ALTIVEC)
bl altivec_unavailable_exception
b interrupt_return
- GEN_KVM altivec_unavailable
-
/**
* Interrupt 0xf40 - VSX Unavailable Interrupt.
@@ -2424,8 +2287,6 @@ END_FTR_SECTION_IFSET(CPU_FTR_VSX)
bl vsx_unavailable_exception
b interrupt_return
- GEN_KVM vsx_unavailable
-
/**
* Interrupt 0xf60 - Facility Unavailable Interrupt.
@@ -2454,8 +2315,6 @@ EXC_COMMON_BEGIN(facility_unavailable_common)
REST_NVGPRS(r1) /* instruction emulation may change GPRs */
b interrupt_return
- GEN_KVM facility_unavailable
-
/**
* Interrupt 0xf60 - Hypervisor Facility Unavailable Interrupt.
@@ -2484,8 +2343,6 @@ EXC_COMMON_BEGIN(h_facility_unavailable_common)
REST_NVGPRS(r1) /* XXX Shouldn't be necessary in practice */
b interrupt_return
- GEN_KVM h_facility_unavailable
-
EXC_REAL_NONE(0xfa0, 0x20)
EXC_VIRT_NONE(0x4fa0, 0x20)
@@ -2515,8 +2372,6 @@ EXC_COMMON_BEGIN(cbe_system_error_common)
bl cbe_system_error_exception
b interrupt_return
- GEN_KVM cbe_system_error
-
#else /* CONFIG_CBE_RAS */
EXC_REAL_NONE(0x1200, 0x100)
EXC_VIRT_NONE(0x5200, 0x100)
@@ -2548,8 +2403,6 @@ EXC_COMMON_BEGIN(instruction_breakpoint_common)
bl instruction_breakpoint_exception
b interrupt_return
- GEN_KVM instruction_breakpoint
-
EXC_REAL_NONE(0x1400, 0x100)
EXC_VIRT_NONE(0x5400, 0x100)
@@ -2670,8 +2523,6 @@ EXC_COMMON_BEGIN(denorm_exception_common)
bl unknown_exception
b interrupt_return
- GEN_KVM denorm_exception
-
#ifdef CONFIG_CBE_RAS
INT_DEFINE_BEGIN(cbe_maintenance)
@@ -2689,8 +2540,6 @@ EXC_COMMON_BEGIN(cbe_maintenance_common)
bl cbe_maintenance_exception
b interrupt_return
- GEN_KVM cbe_maintenance
-
#else /* CONFIG_CBE_RAS */
EXC_REAL_NONE(0x1600, 0x100)
EXC_VIRT_NONE(0x5600, 0x100)
@@ -2721,8 +2570,6 @@ EXC_COMMON_BEGIN(altivec_assist_common)
#endif
b interrupt_return
- GEN_KVM altivec_assist
-
#ifdef CONFIG_CBE_RAS
INT_DEFINE_BEGIN(cbe_thermal)
@@ -2740,8 +2587,6 @@ EXC_COMMON_BEGIN(cbe_thermal_common)
bl cbe_thermal_exception
b interrupt_return
- GEN_KVM cbe_thermal
-
#else /* CONFIG_CBE_RAS */
EXC_REAL_NONE(0x1800, 0x100)
EXC_VIRT_NONE(0x5800, 0x100)
@@ -2994,6 +2839,15 @@ TRAMP_REAL_BEGIN(rfscv_flush_fallback)
USE_TEXT_SECTION()
+#ifdef CONFIG_KVM_BOOK3S_64_HANDLER
+kvm_interrupt:
+ /*
+ * The conditional branch in KVMTEST can't reach all the way,
+ * make a stub.
+ */
+ b kvmppc_interrupt
+#endif
+
_GLOBAL(do_uaccess_flush)
UACCESS_FLUSH_FIXUP_SECTION
nop
@@ -3009,32 +2863,6 @@ EXPORT_SYMBOL(do_uaccess_flush)
MASKED_INTERRUPT
MASKED_INTERRUPT hsrr=1
-#ifdef CONFIG_KVM_BOOK3S_64_HANDLER
-kvmppc_skip_interrupt:
- /*
- * Here all GPRs are unchanged from when the interrupt happened
- * except for r13, which is saved in SPRG_SCRATCH0.
- */
- mfspr r13, SPRN_SRR0
- addi r13, r13, 4
- mtspr SPRN_SRR0, r13
- GET_SCRATCH0(r13)
- RFI_TO_KERNEL
- b .
-
-kvmppc_skip_Hinterrupt:
- /*
- * Here all GPRs are unchanged from when the interrupt happened
- * except for r13, which is saved in SPRG_SCRATCH0.
- */
- mfspr r13, SPRN_HSRR0
- addi r13, r13, 4
- mtspr SPRN_HSRR0, r13
- GET_SCRATCH0(r13)
- HRFI_TO_KERNEL
- b .
-#endif
-
/*
* Relocation-on interrupts: A subset of the interrupts can be delivered
* with IR=1/DR=1, if AIL==2 and MSR.HV won't be changed by delivering
diff --git a/arch/powerpc/kernel/security.c b/arch/powerpc/kernel/security.c
index 118f10d14af8..9c2f7b909911 100644
--- a/arch/powerpc/kernel/security.c
+++ b/arch/powerpc/kernel/security.c
@@ -432,16 +432,19 @@ device_initcall(stf_barrier_debugfs_init);
static void update_branch_cache_flush(void)
{
- u32 *site;
+ u32 *site, __maybe_unused *site2;
#ifdef CONFIG_KVM_BOOK3S_HV_POSSIBLE
site = &patch__call_kvm_flush_link_stack;
+ site2 = &patch__call_kvm_flush_link_stack_p9;
// This controls the branch from guest_exit_cont to kvm_flush_link_stack
if (link_stack_flush_type == BRANCH_CACHE_FLUSH_NONE) {
patch_instruction_site(site, ppc_inst(PPC_RAW_NOP()));
+ patch_instruction_site(site2, ppc_inst(PPC_RAW_NOP()));
} else {
// Could use HW flush, but that could also flush count cache
patch_branch_site(site, (u64)&kvm_flush_link_stack, BRANCH_SET_LINK);
+ patch_branch_site(site2, (u64)&kvm_flush_link_stack, BRANCH_SET_LINK);
}
#endif
diff --git a/arch/powerpc/kernel/time.c b/arch/powerpc/kernel/time.c
index b67d93a609a2..da995c5fb97d 100644
--- a/arch/powerpc/kernel/time.c
+++ b/arch/powerpc/kernel/time.c
@@ -508,16 +508,6 @@ EXPORT_SYMBOL(profile_pc);
* 64-bit uses a byte in the PACA, 32-bit uses a per-cpu variable...
*/
#ifdef CONFIG_PPC64
-static inline unsigned long test_irq_work_pending(void)
-{
- unsigned long x;
-
- asm volatile("lbz %0,%1(13)"
- : "=r" (x)
- : "i" (offsetof(struct paca_struct, irq_work_pending)));
- return x;
-}
-
static inline void set_irq_work_pending_flag(void)
{
asm volatile("stb %0,%1(13)" : :
diff --git a/arch/powerpc/kvm/Makefile b/arch/powerpc/kvm/Makefile
index 2bfeaa13befb..ab241317481c 100644
--- a/arch/powerpc/kvm/Makefile
+++ b/arch/powerpc/kvm/Makefile
@@ -57,6 +57,7 @@ kvm-pr-y := \
book3s_32_mmu.o
kvm-book3s_64-builtin-objs-$(CONFIG_KVM_BOOK3S_64_HANDLER) += \
+ book3s_64_entry.o \
tm.o
ifdef CONFIG_KVM_BOOK3S_PR_POSSIBLE
@@ -86,6 +87,7 @@ kvm-book3s_64-builtin-tm-objs-$(CONFIG_PPC_TRANSACTIONAL_MEM) += \
ifdef CONFIG_KVM_BOOK3S_HV_POSSIBLE
kvm-book3s_64-builtin-objs-$(CONFIG_KVM_BOOK3S_64_HANDLER) += \
book3s_hv_hmi.o \
+ book3s_hv_p9_entry.o \
book3s_hv_rmhandlers.o \
book3s_hv_rm_mmu.o \
book3s_hv_ras.o \
diff --git a/arch/powerpc/kvm/book3s.c b/arch/powerpc/kvm/book3s.c
index 2b691f4d1f26..5e1e1cff0ee3 100644
--- a/arch/powerpc/kvm/book3s.c
+++ b/arch/powerpc/kvm/book3s.c
@@ -171,6 +171,12 @@ void kvmppc_core_queue_machine_check(struct kvm_vcpu *vcpu, ulong flags)
}
EXPORT_SYMBOL_GPL(kvmppc_core_queue_machine_check);
+void kvmppc_core_queue_syscall(struct kvm_vcpu *vcpu)
+{
+ kvmppc_inject_interrupt(vcpu, BOOK3S_INTERRUPT_SYSCALL, 0);
+}
+EXPORT_SYMBOL(kvmppc_core_queue_syscall);
+
void kvmppc_core_queue_program(struct kvm_vcpu *vcpu, ulong flags)
{
/* might as well deliver this straight away */
@@ -1044,13 +1050,10 @@ static int kvmppc_book3s_init(void)
#ifdef CONFIG_KVM_XICS
#ifdef CONFIG_KVM_XIVE
if (xics_on_xive()) {
- kvmppc_xive_init_module();
kvm_register_device_ops(&kvm_xive_ops, KVM_DEV_TYPE_XICS);
- if (kvmppc_xive_native_supported()) {
- kvmppc_xive_native_init_module();
+ if (kvmppc_xive_native_supported())
kvm_register_device_ops(&kvm_xive_native_ops,
KVM_DEV_TYPE_XIVE);
- }
} else
#endif
kvm_register_device_ops(&kvm_xics_ops, KVM_DEV_TYPE_XICS);
@@ -1060,12 +1063,6 @@ static int kvmppc_book3s_init(void)
static void kvmppc_book3s_exit(void)
{
-#ifdef CONFIG_KVM_XICS
- if (xics_on_xive()) {
- kvmppc_xive_exit_module();
- kvmppc_xive_native_exit_module();
- }
-#endif
#ifdef CONFIG_KVM_BOOK3S_32_HANDLER
kvmppc_book3s_exit_pr();
#endif
diff --git a/arch/powerpc/kvm/book3s_64_entry.S b/arch/powerpc/kvm/book3s_64_entry.S
new file mode 100644
index 000000000000..983b8c18bc31
--- /dev/null
+++ b/arch/powerpc/kvm/book3s_64_entry.S
@@ -0,0 +1,416 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+#include <asm/asm-offsets.h>
+#include <asm/cache.h>
+#include <asm/code-patching-asm.h>
+#include <asm/exception-64s.h>
+#include <asm/export.h>
+#include <asm/kvm_asm.h>
+#include <asm/kvm_book3s_asm.h>
+#include <asm/mmu.h>
+#include <asm/ppc_asm.h>
+#include <asm/ptrace.h>
+#include <asm/reg.h>
+#include <asm/ultravisor-api.h>
+
+/*
+ * These are branched to from interrupt handlers in exception-64s.S which set
+ * IKVM_REAL or IKVM_VIRT, if HSTATE_IN_GUEST was found to be non-zero.
+ */
+
+/*
+ * This is a hcall, so register convention is as
+ * Documentation/powerpc/papr_hcalls.rst.
+ *
+ * This may also be a syscall from PR-KVM userspace that is to be
+ * reflected to the PR guest kernel, so registers may be set up for
+ * a system call rather than hcall. We don't currently clobber
+ * anything here, but the 0xc00 handler has already clobbered CTR
+ * and CR0, so PR-KVM can not support a guest kernel that preserves
+ * those registers across its system calls.
+ *
+ * The state of registers is as kvmppc_interrupt, except CFAR is not
+ * saved, R13 is not in SCRATCH0, and R10 does not contain the trap.
+ */
+.global kvmppc_hcall
+.balign IFETCH_ALIGN_BYTES
+kvmppc_hcall:
+#ifdef CONFIG_KVM_BOOK3S_HV_POSSIBLE
+ lbz r10,HSTATE_IN_GUEST(r13)
+ cmpwi r10,KVM_GUEST_MODE_HV_P9
+ beq kvmppc_p9_exit_hcall
+#endif
+ ld r10,PACA_EXGEN+EX_R13(r13)
+ SET_SCRATCH0(r10)
+ li r10,0xc00
+ /* Now we look like kvmppc_interrupt */
+ li r11,PACA_EXGEN
+ b .Lgot_save_area
+
+/*
+ * KVM interrupt entry occurs after GEN_INT_ENTRY runs, and follows that
+ * call convention:
+ *
+ * guest R9-R13, CTR, CFAR, PPR saved in PACA EX_xxx save area
+ * guest (H)DAR, (H)DSISR are also in the save area for relevant interrupts
+ * guest R13 also saved in SCRATCH0
+ * R13 = PACA
+ * R11 = (H)SRR0
+ * R12 = (H)SRR1
+ * R9 = guest CR
+ * PPR is set to medium
+ *
+ * With the addition for KVM:
+ * R10 = trap vector
+ */
+.global kvmppc_interrupt
+.balign IFETCH_ALIGN_BYTES
+kvmppc_interrupt:
+#ifdef CONFIG_KVM_BOOK3S_HV_POSSIBLE
+ std r10,HSTATE_SCRATCH0(r13)
+ lbz r10,HSTATE_IN_GUEST(r13)
+ cmpwi r10,KVM_GUEST_MODE_HV_P9
+ beq kvmppc_p9_exit_interrupt
+ ld r10,HSTATE_SCRATCH0(r13)
+#endif
+ li r11,PACA_EXGEN
+ cmpdi r10,0x200
+ bgt+ .Lgot_save_area
+ li r11,PACA_EXMC
+ beq .Lgot_save_area
+ li r11,PACA_EXNMI
+.Lgot_save_area:
+ add r11,r11,r13
+BEGIN_FTR_SECTION
+ ld r12,EX_CFAR(r11)
+ std r12,HSTATE_CFAR(r13)
+END_FTR_SECTION_IFSET(CPU_FTR_CFAR)
+ ld r12,EX_CTR(r11)
+ mtctr r12
+BEGIN_FTR_SECTION
+ ld r12,EX_PPR(r11)
+ std r12,HSTATE_PPR(r13)
+END_FTR_SECTION_IFSET(CPU_FTR_HAS_PPR)
+ ld r12,EX_R12(r11)
+ std r12,HSTATE_SCRATCH0(r13)
+ sldi r12,r9,32
+ or r12,r12,r10
+ ld r9,EX_R9(r11)
+ ld r10,EX_R10(r11)
+ ld r11,EX_R11(r11)
+
+ /*
+ * Hcalls and other interrupts come here after normalising register
+ * contents and save locations:
+ *
+ * R12 = (guest CR << 32) | interrupt vector
+ * R13 = PACA
+ * guest R12 saved in shadow HSTATE_SCRATCH0
+ * guest R13 saved in SPRN_SCRATCH0
+ */
+ std r9,HSTATE_SCRATCH2(r13)
+ lbz r9,HSTATE_IN_GUEST(r13)
+ cmpwi r9,KVM_GUEST_MODE_SKIP
+ beq- .Lmaybe_skip
+.Lno_skip:
+#ifdef CONFIG_KVM_BOOK3S_HV_POSSIBLE
+#ifdef CONFIG_KVM_BOOK3S_PR_POSSIBLE
+ cmpwi r9,KVM_GUEST_MODE_GUEST
+ beq kvmppc_interrupt_pr
+#endif
+ b kvmppc_interrupt_hv
+#else
+ b kvmppc_interrupt_pr
+#endif
+
+/*
+ * "Skip" interrupts are part of a trick KVM uses a with hash guests to load
+ * the faulting instruction in guest memory from the the hypervisor without
+ * walking page tables.
+ *
+ * When the guest takes a fault that requires the hypervisor to load the
+ * instruction (e.g., MMIO emulation), KVM is running in real-mode with HV=1
+ * and the guest MMU context loaded. It sets KVM_GUEST_MODE_SKIP, and sets
+ * MSR[DR]=1 while leaving MSR[IR]=0, so it continues to fetch HV instructions
+ * but loads and stores will access the guest context. This is used to load
+ * the faulting instruction using the faulting guest effective address.
+ *
+ * However the guest context may not be able to translate, or it may cause a
+ * machine check or other issue, which results in a fault in the host
+ * (even with KVM-HV).
+ *
+ * These faults come here because KVM_GUEST_MODE_SKIP was set, so if they
+ * are (or are likely) caused by that load, the instruction is skipped by
+ * just returning with the PC advanced +4, where it is noticed the load did
+ * not execute and it goes to the slow path which walks the page tables to
+ * read guest memory.
+ */
+.Lmaybe_skip:
+ cmpwi r12,BOOK3S_INTERRUPT_MACHINE_CHECK
+ beq 1f
+ cmpwi r12,BOOK3S_INTERRUPT_DATA_STORAGE
+ beq 1f
+ cmpwi r12,BOOK3S_INTERRUPT_DATA_SEGMENT
+ beq 1f
+#ifdef CONFIG_KVM_BOOK3S_HV_POSSIBLE
+ /* HSRR interrupts get 2 added to interrupt number */
+ cmpwi r12,BOOK3S_INTERRUPT_H_DATA_STORAGE | 0x2
+ beq 2f
+#endif
+ b .Lno_skip
+1: mfspr r9,SPRN_SRR0
+ addi r9,r9,4
+ mtspr SPRN_SRR0,r9
+ ld r12,HSTATE_SCRATCH0(r13)
+ ld r9,HSTATE_SCRATCH2(r13)
+ GET_SCRATCH0(r13)
+ RFI_TO_KERNEL
+#ifdef CONFIG_KVM_BOOK3S_HV_POSSIBLE
+2: mfspr r9,SPRN_HSRR0
+ addi r9,r9,4
+ mtspr SPRN_HSRR0,r9
+ ld r12,HSTATE_SCRATCH0(r13)
+ ld r9,HSTATE_SCRATCH2(r13)
+ GET_SCRATCH0(r13)
+ HRFI_TO_KERNEL
+#endif
+
+#ifdef CONFIG_KVM_BOOK3S_HV_POSSIBLE
+
+/* Stack frame offsets for kvmppc_p9_enter_guest */
+#define SFS (144 + STACK_FRAME_MIN_SIZE)
+#define STACK_SLOT_NVGPRS (SFS - 144) /* 18 gprs */
+
+/*
+ * void kvmppc_p9_enter_guest(struct vcpu *vcpu);
+ *
+ * Enter the guest on a ISAv3.0 or later system.
+ */
+.balign IFETCH_ALIGN_BYTES
+_GLOBAL(kvmppc_p9_enter_guest)
+EXPORT_SYMBOL_GPL(kvmppc_p9_enter_guest)
+ mflr r0
+ std r0,PPC_LR_STKOFF(r1)
+ stdu r1,-SFS(r1)
+
+ std r1,HSTATE_HOST_R1(r13)
+
+ mfcr r4
+ stw r4,SFS+8(r1)
+
+ reg = 14
+ .rept 18
+ std reg,STACK_SLOT_NVGPRS + ((reg - 14) * 8)(r1)
+ reg = reg + 1
+ .endr
+
+ ld r4,VCPU_LR(r3)
+ mtlr r4
+ ld r4,VCPU_CTR(r3)
+ mtctr r4
+ ld r4,VCPU_XER(r3)
+ mtspr SPRN_XER,r4
+
+ ld r1,VCPU_CR(r3)
+
+BEGIN_FTR_SECTION
+ ld r4,VCPU_CFAR(r3)
+ mtspr SPRN_CFAR,r4
+END_FTR_SECTION_IFSET(CPU_FTR_CFAR)
+BEGIN_FTR_SECTION
+ ld r4,VCPU_PPR(r3)
+ mtspr SPRN_PPR,r4
+END_FTR_SECTION_IFSET(CPU_FTR_HAS_PPR)
+
+ reg = 4
+ .rept 28
+ ld reg,__VCPU_GPR(reg)(r3)
+ reg = reg + 1
+ .endr
+
+ ld r4,VCPU_KVM(r3)
+ lbz r4,KVM_SECURE_GUEST(r4)
+ cmpdi r4,0
+ ld r4,VCPU_GPR(R4)(r3)
+ bne .Lret_to_ultra
+
+ mtcr r1
+
+ ld r0,VCPU_GPR(R0)(r3)
+ ld r1,VCPU_GPR(R1)(r3)
+ ld r2,VCPU_GPR(R2)(r3)
+ ld r3,VCPU_GPR(R3)(r3)
+
+ HRFI_TO_GUEST
+ b .
+
+ /*
+ * Use UV_RETURN ultracall to return control back to the Ultravisor
+ * after processing an hypercall or interrupt that was forwarded
+ * (a.k.a. reflected) to the Hypervisor.
+ *
+ * All registers have already been reloaded except the ucall requires:
+ * R0 = hcall result
+ * R2 = SRR1, so UV can detect a synthesized interrupt (if any)
+ * R3 = UV_RETURN
+ */
+.Lret_to_ultra:
+ mtcr r1
+ ld r1,VCPU_GPR(R1)(r3)
+
+ ld r0,VCPU_GPR(R3)(r3)
+ mfspr r2,SPRN_SRR1
+ LOAD_REG_IMMEDIATE(r3, UV_RETURN)
+ sc 2
+
+/*
+ * kvmppc_p9_exit_hcall and kvmppc_p9_exit_interrupt are branched to from
+ * above if the interrupt was taken for a guest that was entered via
+ * kvmppc_p9_enter_guest().
+ *
+ * The exit code recovers the host stack and vcpu pointer, saves all guest GPRs
+ * and CR, LR, XER as well as guest MSR and NIA into the VCPU, then re-
+ * establishes the host stack and registers to return from the
+ * kvmppc_p9_enter_guest() function, which saves CTR and other guest registers
+ * (SPRs and FP, VEC, etc).
+ */
+.balign IFETCH_ALIGN_BYTES
+kvmppc_p9_exit_hcall:
+ mfspr r11,SPRN_SRR0
+ mfspr r12,SPRN_SRR1
+ li r10,0xc00
+ std r10,HSTATE_SCRATCH0(r13)
+
+.balign IFETCH_ALIGN_BYTES
+kvmppc_p9_exit_interrupt:
+ /*
+ * If set to KVM_GUEST_MODE_HV_P9 but we're still in the
+ * hypervisor, that means we can't return from the entry stack.
+ */
+ rldicl. r10,r12,64-MSR_HV_LG,63
+ bne- kvmppc_p9_bad_interrupt
+
+ std r1,HSTATE_SCRATCH1(r13)
+ std r3,HSTATE_SCRATCH2(r13)
+ ld r1,HSTATE_HOST_R1(r13)
+ ld r3,HSTATE_KVM_VCPU(r13)
+
+ std r9,VCPU_CR(r3)
+
+1:
+ std r11,VCPU_PC(r3)
+ std r12,VCPU_MSR(r3)
+
+ reg = 14
+ .rept 18
+ std reg,__VCPU_GPR(reg)(r3)
+ reg = reg + 1
+ .endr
+
+ /* r1, r3, r9-r13 are saved to vcpu by C code */
+ std r0,VCPU_GPR(R0)(r3)
+ std r2,VCPU_GPR(R2)(r3)
+ reg = 4
+ .rept 5
+ std reg,__VCPU_GPR(reg)(r3)
+ reg = reg + 1
+ .endr
+
+ ld r2,PACATOC(r13)
+
+ mflr r4
+ std r4,VCPU_LR(r3)
+ mfspr r4,SPRN_XER
+ std r4,VCPU_XER(r3)
+
+ reg = 14
+ .rept 18
+ ld reg,STACK_SLOT_NVGPRS + ((reg - 14) * 8)(r1)
+ reg = reg + 1
+ .endr
+
+ lwz r4,SFS+8(r1)
+ mtcr r4
+
+ /*
+ * Flush the link stack here, before executing the first blr on the
+ * way out of the guest.
+ *
+ * The link stack won't match coming out of the guest anyway so the
+ * only cost is the flush itself. The call clobbers r0.
+ */
+1: nop
+ patch_site 1b patch__call_kvm_flush_link_stack_p9
+
+ addi r1,r1,SFS
+ ld r0,PPC_LR_STKOFF(r1)
+ mtlr r0
+ blr
+
+/*
+ * Took an interrupt somewhere right before HRFID to guest, so registers are
+ * in a bad way. Return things hopefully enough to run host virtual code and
+ * run the Linux interrupt handler (SRESET or MCE) to print something useful.
+ *
+ * We could be really clever and save all host registers in known locations
+ * before setting HSTATE_IN_GUEST, then restoring them all here, and setting
+ * return address to a fixup that sets them up again. But that's a lot of
+ * effort for a small bit of code. Lots of other things to do first.
+ */
+kvmppc_p9_bad_interrupt:
+BEGIN_MMU_FTR_SECTION
+ /*
+ * Hash host doesn't try to recover MMU (requires host SLB reload)
+ */
+ b .
+END_MMU_FTR_SECTION_IFCLR(MMU_FTR_TYPE_RADIX)
+ /*
+ * Clean up guest registers to give host a chance to run.
+ */
+ li r10,0
+ mtspr SPRN_AMR,r10
+ mtspr SPRN_IAMR,r10
+ mtspr SPRN_CIABR,r10
+ mtspr SPRN_DAWRX0,r10
+BEGIN_FTR_SECTION
+ mtspr SPRN_DAWRX1,r10
+END_FTR_SECTION_IFSET(CPU_FTR_DAWR1)
+ mtspr SPRN_PID,r10
+
+ /*
+ * Switch to host MMU mode
+ */
+ ld r10, HSTATE_KVM_VCPU(r13)
+ ld r10, VCPU_KVM(r10)
+ lwz r10, KVM_HOST_LPID(r10)
+ mtspr SPRN_LPID,r10
+
+ ld r10, HSTATE_KVM_VCPU(r13)
+ ld r10, VCPU_KVM(r10)
+ ld r10, KVM_HOST_LPCR(r10)
+ mtspr SPRN_LPCR,r10
+
+ /*
+ * Set GUEST_MODE_NONE so the handler won't branch to KVM, and clear
+ * MSR_RI in r12 ([H]SRR1) so the handler won't try to return.
+ */
+ li r10,KVM_GUEST_MODE_NONE
+ stb r10,HSTATE_IN_GUEST(r13)
+ li r10,MSR_RI
+ andc r12,r12,r10
+
+ /*
+ * Go back to interrupt handler. MCE and SRESET have their specific
+ * PACA save area so they should be used directly. They set up their
+ * own stack. The other handlers all use EXGEN. They will use the
+ * guest r1 if it looks like a kernel stack, so just load the
+ * emergency stack and go to program check for all other interrupts.
+ */
+ ld r10,HSTATE_SCRATCH0(r13)
+ cmpwi r10,BOOK3S_INTERRUPT_MACHINE_CHECK
+ beq machine_check_common
+
+ cmpwi r10,BOOK3S_INTERRUPT_SYSTEM_RESET
+ beq system_reset_common
+
+ b .
+#endif
diff --git a/arch/powerpc/kvm/book3s_64_vio_hv.c b/arch/powerpc/kvm/book3s_64_vio_hv.c
index 083a4e037718..dc6591548f0c 100644
--- a/arch/powerpc/kvm/book3s_64_vio_hv.c
+++ b/arch/powerpc/kvm/book3s_64_vio_hv.c
@@ -391,10 +391,6 @@ long kvmppc_rm_h_put_tce(struct kvm_vcpu *vcpu, unsigned long liobn,
/* udbg_printf("H_PUT_TCE(): liobn=0x%lx ioba=0x%lx, tce=0x%lx\n", */
/* liobn, ioba, tce); */
- /* For radix, we might be in virtual mode, so punt */
- if (kvm_is_radix(vcpu->kvm))
- return H_TOO_HARD;
-
stt = kvmppc_find_table(vcpu->kvm, liobn);
if (!stt)
return H_TOO_HARD;
@@ -489,10 +485,6 @@ long kvmppc_rm_h_put_tce_indirect(struct kvm_vcpu *vcpu,
bool prereg = false;
struct kvmppc_spapr_tce_iommu_table *stit;
- /* For radix, we might be in virtual mode, so punt */
- if (kvm_is_radix(vcpu->kvm))
- return H_TOO_HARD;
-
/*
* used to check for invalidations in progress
*/
@@ -602,10 +594,6 @@ long kvmppc_rm_h_stuff_tce(struct kvm_vcpu *vcpu,
long i, ret;
struct kvmppc_spapr_tce_iommu_table *stit;
- /* For radix, we might be in virtual mode, so punt */
- if (kvm_is_radix(vcpu->kvm))
- return H_TOO_HARD;
-
stt = kvmppc_find_table(vcpu->kvm, liobn);
if (!stt)
return H_TOO_HARD;
diff --git a/arch/powerpc/kvm/book3s_hv.c b/arch/powerpc/kvm/book3s_hv.c
index 13728495ac66..f4dc4f0c34b5 100644
--- a/arch/powerpc/kvm/book3s_hv.c
+++ b/arch/powerpc/kvm/book3s_hv.c
@@ -103,13 +103,9 @@ static int target_smt_mode;
module_param(target_smt_mode, int, 0644);
MODULE_PARM_DESC(target_smt_mode, "Target threads per core (0 = max)");
-static bool indep_threads_mode = true;
-module_param(indep_threads_mode, bool, S_IRUGO | S_IWUSR);
-MODULE_PARM_DESC(indep_threads_mode, "Independent-threads mode (only on POWER9)");
-
static bool one_vm_per_core;
module_param(one_vm_per_core, bool, S_IRUGO | S_IWUSR);
-MODULE_PARM_DESC(one_vm_per_core, "Only run vCPUs from the same VM on a core (requires indep_threads_mode=N)");
+MODULE_PARM_DESC(one_vm_per_core, "Only run vCPUs from the same VM on a core (requires POWER8 or older)");
#ifdef CONFIG_KVM_XICS
static const struct kernel_param_ops module_param_ops = {
@@ -134,9 +130,6 @@ static inline bool nesting_enabled(struct kvm *kvm)
return kvm->arch.nested_enable && kvm_is_radix(kvm);
}
-/* If set, the threads on each CPU core have to be in the same MMU mode */
-static bool no_mixing_hpt_and_radix __read_mostly;
-
static int kvmppc_hv_setup_htab_rma(struct kvm_vcpu *vcpu);
/*
@@ -807,7 +800,8 @@ static int kvmppc_h_set_mode(struct kvm_vcpu *vcpu, unsigned long mflags,
* KVM does not support mflags=2 (AIL=2) and AIL=1 is reserved.
* Keep this in synch with kvmppc_filter_guest_lpcr_hv.
*/
- if (mflags != 0 && mflags != 3)
+ if (cpu_has_feature(CPU_FTR_P9_RADIX_PREFETCH_BUG) &&
+ kvmhv_vcpu_is_radix(vcpu) && mflags == 3)
return H_UNSUPPORTED_FLAG_START;
return H_TOO_HARD;
default:
@@ -899,6 +893,10 @@ static int kvm_arch_vcpu_yield_to(struct kvm_vcpu *target)
* H_SUCCESS if the source vcore wasn't idle (e.g. if it may
* have useful work to do and should not confer) so we don't
* recheck that here.
+ *
+ * In the case of the P9 single vcpu per vcore case, the real
+ * mode handler is not called but no other threads are in the
+ * source vcore.
*/
spin_lock(&vcore->lock);
@@ -926,6 +924,7 @@ static int kvmppc_get_yield_count(struct kvm_vcpu *vcpu)
int kvmppc_pseries_do_hcall(struct kvm_vcpu *vcpu)
{
+ struct kvm *kvm = vcpu->kvm;
unsigned long req = kvmppc_get_gpr(vcpu, 3);
unsigned long target, ret = H_SUCCESS;
int yield_count;
@@ -937,11 +936,57 @@ int kvmppc_pseries_do_hcall(struct kvm_vcpu *vcpu)
return RESUME_HOST;
switch (req) {
+ case H_REMOVE:
+ ret = kvmppc_h_remove(vcpu, kvmppc_get_gpr(vcpu, 4),
+ kvmppc_get_gpr(vcpu, 5),
+ kvmppc_get_gpr(vcpu, 6));
+ if (ret == H_TOO_HARD)
+ return RESUME_HOST;
+ break;
+ case H_ENTER:
+ ret = kvmppc_h_enter(vcpu, kvmppc_get_gpr(vcpu, 4),
+ kvmppc_get_gpr(vcpu, 5),
+ kvmppc_get_gpr(vcpu, 6),
+ kvmppc_get_gpr(vcpu, 7));
+ if (ret == H_TOO_HARD)
+ return RESUME_HOST;
+ break;
+ case H_READ:
+ ret = kvmppc_h_read(vcpu, kvmppc_get_gpr(vcpu, 4),
+ kvmppc_get_gpr(vcpu, 5));
+ if (ret == H_TOO_HARD)
+ return RESUME_HOST;
+ break;
+ case H_CLEAR_MOD:
+ ret = kvmppc_h_clear_mod(vcpu, kvmppc_get_gpr(vcpu, 4),
+ kvmppc_get_gpr(vcpu, 5));
+ if (ret == H_TOO_HARD)
+ return RESUME_HOST;
+ break;
+ case H_CLEAR_REF:
+ ret = kvmppc_h_clear_ref(vcpu, kvmppc_get_gpr(vcpu, 4),
+ kvmppc_get_gpr(vcpu, 5));
+ if (ret == H_TOO_HARD)
+ return RESUME_HOST;
+ break;
+ case H_PROTECT:
+ ret = kvmppc_h_protect(vcpu, kvmppc_get_gpr(vcpu, 4),
+ kvmppc_get_gpr(vcpu, 5),
+ kvmppc_get_gpr(vcpu, 6));
+ if (ret == H_TOO_HARD)
+ return RESUME_HOST;
+ break;
+ case H_BULK_REMOVE:
+ ret = kvmppc_h_bulk_remove(vcpu);
+ if (ret == H_TOO_HARD)
+ return RESUME_HOST;
+ break;
+
case H_CEDE:
break;
case H_PROD:
target = kvmppc_get_gpr(vcpu, 4);
- tvcpu = kvmppc_find_vcpu(vcpu->kvm, target);
+ tvcpu = kvmppc_find_vcpu(kvm, target);
if (!tvcpu) {
ret = H_PARAMETER;
break;
@@ -955,7 +1000,7 @@ int kvmppc_pseries_do_hcall(struct kvm_vcpu *vcpu)
target = kvmppc_get_gpr(vcpu, 4);
if (target == -1)
break;
- tvcpu = kvmppc_find_vcpu(vcpu->kvm, target);
+ tvcpu = kvmppc_find_vcpu(kvm, target);
if (!tvcpu) {
ret = H_PARAMETER;
break;
@@ -971,12 +1016,12 @@ int kvmppc_pseries_do_hcall(struct kvm_vcpu *vcpu)
kvmppc_get_gpr(vcpu, 6));
break;
case H_RTAS:
- if (list_empty(&vcpu->kvm->arch.rtas_tokens))
+ if (list_empty(&kvm->arch.rtas_tokens))
return RESUME_HOST;
- idx = srcu_read_lock(&vcpu->kvm->srcu);
+ idx = srcu_read_lock(&kvm->srcu);
rc = kvmppc_rtas_hcall(vcpu);
- srcu_read_unlock(&vcpu->kvm->srcu, idx);
+ srcu_read_unlock(&kvm->srcu, idx);
if (rc == -ENOENT)
return RESUME_HOST;
@@ -1063,12 +1108,12 @@ int kvmppc_pseries_do_hcall(struct kvm_vcpu *vcpu)
case H_SET_PARTITION_TABLE:
ret = H_FUNCTION;
- if (nesting_enabled(vcpu->kvm))
+ if (nesting_enabled(kvm))
ret = kvmhv_set_partition_table(vcpu);
break;
case H_ENTER_NESTED:
ret = H_FUNCTION;
- if (!nesting_enabled(vcpu->kvm))
+ if (!nesting_enabled(kvm))
break;
ret = kvmhv_enter_nested_guest(vcpu);
if (ret == H_INTERRUPT) {
@@ -1083,12 +1128,12 @@ int kvmppc_pseries_do_hcall(struct kvm_vcpu *vcpu)
break;
case H_TLB_INVALIDATE:
ret = H_FUNCTION;
- if (nesting_enabled(vcpu->kvm))
+ if (nesting_enabled(kvm))
ret = kvmhv_do_nested_tlbie(vcpu);
break;
case H_COPY_TOFROM_GUEST:
ret = H_FUNCTION;
- if (nesting_enabled(vcpu->kvm))
+ if (nesting_enabled(kvm))
ret = kvmhv_copy_tofrom_guest_nested(vcpu);
break;
case H_PAGE_INIT:
@@ -1099,7 +1144,7 @@ int kvmppc_pseries_do_hcall(struct kvm_vcpu *vcpu)
case H_SVM_PAGE_IN:
ret = H_UNSUPPORTED;
if (kvmppc_get_srr1(vcpu) & MSR_S)
- ret = kvmppc_h_svm_page_in(vcpu->kvm,
+ ret = kvmppc_h_svm_page_in(kvm,
kvmppc_get_gpr(vcpu, 4),
kvmppc_get_gpr(vcpu, 5),
kvmppc_get_gpr(vcpu, 6));
@@ -1107,7 +1152,7 @@ int kvmppc_pseries_do_hcall(struct kvm_vcpu *vcpu)
case H_SVM_PAGE_OUT:
ret = H_UNSUPPORTED;
if (kvmppc_get_srr1(vcpu) & MSR_S)
- ret = kvmppc_h_svm_page_out(vcpu->kvm,
+ ret = kvmppc_h_svm_page_out(kvm,
kvmppc_get_gpr(vcpu, 4),
kvmppc_get_gpr(vcpu, 5),
kvmppc_get_gpr(vcpu, 6));
@@ -1115,12 +1160,12 @@ int kvmppc_pseries_do_hcall(struct kvm_vcpu *vcpu)
case H_SVM_INIT_START:
ret = H_UNSUPPORTED;
if (kvmppc_get_srr1(vcpu) & MSR_S)
- ret = kvmppc_h_svm_init_start(vcpu->kvm);
+ ret = kvmppc_h_svm_init_start(kvm);
break;
case H_SVM_INIT_DONE:
ret = H_UNSUPPORTED;
if (kvmppc_get_srr1(vcpu) & MSR_S)
- ret = kvmppc_h_svm_init_done(vcpu->kvm);
+ ret = kvmppc_h_svm_init_done(kvm);
break;
case H_SVM_INIT_ABORT:
/*
@@ -1130,24 +1175,26 @@ int kvmppc_pseries_do_hcall(struct kvm_vcpu *vcpu)
* Instead the kvm->arch.secure_guest flag is checked inside
* kvmppc_h_svm_init_abort().
*/
- ret = kvmppc_h_svm_init_abort(vcpu->kvm);
+ ret = kvmppc_h_svm_init_abort(kvm);
break;
default:
return RESUME_HOST;
}
+ WARN_ON_ONCE(ret == H_TOO_HARD);
kvmppc_set_gpr(vcpu, 3, ret);
vcpu->arch.hcall_needed = 0;
return RESUME_GUEST;
}
/*
- * Handle H_CEDE in the nested virtualization case where we haven't
- * called the real-mode hcall handlers in book3s_hv_rmhandlers.S.
+ * Handle H_CEDE in the P9 path where we don't call the real-mode hcall
+ * handlers in book3s_hv_rmhandlers.S.
+ *
* This has to be done early, not in kvmppc_pseries_do_hcall(), so
* that the cede logic in kvmppc_run_single_vcpu() works properly.
*/
-static void kvmppc_nested_cede(struct kvm_vcpu *vcpu)
+static void kvmppc_cede(struct kvm_vcpu *vcpu)
{
vcpu->arch.shregs.msr |= MSR_EE;
vcpu->arch.ceded = 1;
@@ -1400,13 +1447,39 @@ static int kvmppc_handle_exit_hv(struct kvm_vcpu *vcpu,
}
case BOOK3S_INTERRUPT_SYSCALL:
{
- /* hcall - punt to userspace */
int i;
- /* hypercall with MSR_PR has already been handled in rmode,
- * and never reaches here.
- */
+ if (unlikely(vcpu->arch.shregs.msr & MSR_PR)) {
+ /*
+ * Guest userspace executed sc 1. This can only be
+ * reached by the P9 path because the old path
+ * handles this case in realmode hcall handlers.
+ */
+ if (!kvmhv_vcpu_is_radix(vcpu)) {
+ /*
+ * A guest could be running PR KVM, so this
+ * may be a PR KVM hcall. It must be reflected
+ * to the guest kernel as a sc interrupt.
+ */
+ kvmppc_core_queue_syscall(vcpu);
+ } else {
+ /*
+ * Radix guests can not run PR KVM or nested HV
+ * hash guests which might run PR KVM, so this
+ * is always a privilege fault. Send a program
+ * check to guest kernel.
+ */
+ kvmppc_core_queue_program(vcpu, SRR1_PROGPRIV);
+ }
+ r = RESUME_GUEST;
+ break;
+ }
+ /*
+ * hcall - gather args and set exit_reason. This will next be
+ * handled by kvmppc_pseries_do_hcall which may be able to deal
+ * with it and resume guest, or may punt to userspace.
+ */
run->papr_hcall.nr = kvmppc_get_gpr(vcpu, 3);
for (i = 0; i < 9; ++i)
run->papr_hcall.args[i] = kvmppc_get_gpr(vcpu, 4 + i);
@@ -1419,20 +1492,102 @@ static int kvmppc_handle_exit_hv(struct kvm_vcpu *vcpu,
* We get these next two if the guest accesses a page which it thinks
* it has mapped but which is not actually present, either because
* it is for an emulated I/O device or because the corresonding
- * host page has been paged out. Any other HDSI/HISI interrupts
- * have been handled already.
+ * host page has been paged out.
+ *
+ * Any other HDSI/HISI interrupts have been handled already for P7/8
+ * guests. For POWER9 hash guests not using rmhandlers, basic hash
+ * fault handling is done here.
*/
- case BOOK3S_INTERRUPT_H_DATA_STORAGE:
- r = RESUME_PAGE_FAULT;
+ case BOOK3S_INTERRUPT_H_DATA_STORAGE: {
+ unsigned long vsid;
+ long err;
+
+ if (vcpu->arch.fault_dsisr == HDSISR_CANARY) {
+ r = RESUME_GUEST; /* Just retry if it's the canary */
+ break;
+ }
+
+ if (kvm_is_radix(vcpu->kvm) || !cpu_has_feature(CPU_FTR_ARCH_300)) {
+ /*
+ * Radix doesn't require anything, and pre-ISAv3.0 hash
+ * already attempted to handle this in rmhandlers. The
+ * hash fault handling below is v3 only (it uses ASDR
+ * via fault_gpa).
+ */
+ r = RESUME_PAGE_FAULT;
+ break;
+ }
+
+ if (!(vcpu->arch.fault_dsisr & (DSISR_NOHPTE | DSISR_PROTFAULT))) {
+ kvmppc_core_queue_data_storage(vcpu,
+ vcpu->arch.fault_dar, vcpu->arch.fault_dsisr);
+ r = RESUME_GUEST;
+ break;
+ }
+
+ if (!(vcpu->arch.shregs.msr & MSR_DR))
+ vsid = vcpu->kvm->arch.vrma_slb_v;
+ else
+ vsid = vcpu->arch.fault_gpa;
+
+ err = kvmppc_hpte_hv_fault(vcpu, vcpu->arch.fault_dar,
+ vsid, vcpu->arch.fault_dsisr, true);
+ if (err == 0) {
+ r = RESUME_GUEST;
+ } else if (err == -1 || err == -2) {
+ r = RESUME_PAGE_FAULT;
+ } else {
+ kvmppc_core_queue_data_storage(vcpu,
+ vcpu->arch.fault_dar, err);
+ r = RESUME_GUEST;
+ }
break;
- case BOOK3S_INTERRUPT_H_INST_STORAGE:
+ }
+ case BOOK3S_INTERRUPT_H_INST_STORAGE: {
+ unsigned long vsid;
+ long err;
+
vcpu->arch.fault_dar = kvmppc_get_pc(vcpu);
vcpu->arch.fault_dsisr = vcpu->arch.shregs.msr &
DSISR_SRR1_MATCH_64S;
- if (vcpu->arch.shregs.msr & HSRR1_HISI_WRITE)
- vcpu->arch.fault_dsisr |= DSISR_ISSTORE;
- r = RESUME_PAGE_FAULT;
+ if (kvm_is_radix(vcpu->kvm) || !cpu_has_feature(CPU_FTR_ARCH_300)) {
+ /*
+ * Radix doesn't require anything, and pre-ISAv3.0 hash
+ * already attempted to handle this in rmhandlers. The
+ * hash fault handling below is v3 only (it uses ASDR
+ * via fault_gpa).
+ */
+ if (vcpu->arch.shregs.msr & HSRR1_HISI_WRITE)
+ vcpu->arch.fault_dsisr |= DSISR_ISSTORE;
+ r = RESUME_PAGE_FAULT;
+ break;
+ }
+
+ if (!(vcpu->arch.fault_dsisr & SRR1_ISI_NOPT)) {
+ kvmppc_core_queue_inst_storage(vcpu,
+ vcpu->arch.fault_dsisr);
+ r = RESUME_GUEST;
+ break;
+ }
+
+ if (!(vcpu->arch.shregs.msr & MSR_IR))
+ vsid = vcpu->kvm->arch.vrma_slb_v;
+ else
+ vsid = vcpu->arch.fault_gpa;
+
+ err = kvmppc_hpte_hv_fault(vcpu, vcpu->arch.fault_dar,
+ vsid, vcpu->arch.fault_dsisr, false);
+ if (err == 0) {
+ r = RESUME_GUEST;
+ } else if (err == -1) {
+ r = RESUME_PAGE_FAULT;
+ } else {
+ kvmppc_core_queue_inst_storage(vcpu, err);
+ r = RESUME_GUEST;
+ }
break;
+ }
+
/*
* This occurs if the guest executes an illegal instruction.
* If the guest debug is disabled, generate a program interrupt
@@ -1654,6 +1809,14 @@ unsigned long kvmppc_filter_lpcr_hv(struct kvm *kvm, unsigned long lpcr)
lpcr &= ~LPCR_AIL;
if ((lpcr & LPCR_AIL) != LPCR_AIL_3)
lpcr &= ~LPCR_AIL; /* LPCR[AIL]=1/2 is disallowed */
+ /*
+ * On some POWER9s we force AIL off for radix guests to prevent
+ * executing in MSR[HV]=1 mode with the MMU enabled and PIDR set to
+ * guest, which can result in Q0 translations with LPID=0 PID=PIDR to
+ * be cached, which the host TLB management does not expect.
+ */
+ if (kvm_is_radix(kvm) && cpu_has_feature(CPU_FTR_P9_RADIX_PREFETCH_BUG))
+ lpcr &= ~LPCR_AIL;
/*
* On POWER9, allow userspace to enable large decrementer for the
@@ -2233,7 +2396,7 @@ static int kvmppc_set_one_reg_hv(struct kvm_vcpu *vcpu, u64 id,
*/
static int threads_per_vcore(struct kvm *kvm)
{
- if (kvm->arch.threads_indep)
+ if (cpu_has_feature(CPU_FTR_ARCH_300))
return 1;
return threads_per_subcore;
}
@@ -2967,9 +3130,6 @@ static void prepare_threads(struct kvmppc_vcore *vc)
for_each_runnable_thread(i, vcpu, vc) {
if (signal_pending(vcpu->arch.run_task))
vcpu->arch.ret = -EINTR;
- else if (no_mixing_hpt_and_radix &&
- kvm_is_radix(vc->kvm) != radix_enabled())
- vcpu->arch.ret = -EINVAL;
else if (vcpu->arch.vpa.update_pending ||
vcpu->arch.slb_shadow.update_pending ||
vcpu->arch.dtl.update_pending)
@@ -3176,6 +3336,9 @@ static noinline void kvmppc_run_core(struct kvmppc_vcore *vc)
int trap;
bool is_power8;
+ if (WARN_ON_ONCE(cpu_has_feature(CPU_FTR_ARCH_300)))
+ return;
+
/*
* Remove from the list any threads that have a signal pending
* or need a VPA update done
@@ -3203,9 +3366,6 @@ static noinline void kvmppc_run_core(struct kvmppc_vcore *vc)
* Make sure we are running on primary threads, and that secondary
* threads are offline. Also check if the number of threads in this
* guest are greater than the current system threads per guest.
- * On POWER9, we need to be not in independent-threads mode if
- * this is a HPT guest on a radix host machine where the
- * CPU threads may not be in different MMU modes.
*/
if ((controlled_threads > 1) &&
((vc->num_threads > threads_per_subcore) || !on_primary_thread())) {
@@ -3230,18 +3390,6 @@ static noinline void kvmppc_run_core(struct kvmppc_vcore *vc)
collect_piggybacks(&core_info, target_threads);
/*
- * On radix, arrange for TLB flushing if necessary.
- * This has to be done before disabling interrupts since
- * it uses smp_call_function().
- */
- pcpu = smp_processor_id();
- if (kvm_is_radix(vc->kvm)) {
- for (sub = 0; sub < core_info.n_subcores; ++sub)
- for_each_runnable_thread(i, vcpu, core_info.vc[sub])
- kvmppc_prepare_radix_vcpu(vcpu, pcpu);
- }
-
- /*
* Hard-disable interrupts, and check resched flag and signals.
* If we need to reschedule or deliver a signal, clean up
* and return without going into the guest(s).
@@ -3273,8 +3421,7 @@ static noinline void kvmppc_run_core(struct kvmppc_vcore *vc)
cmd_bit = stat_bit = 0;
split = core_info.n_subcores;
sip = NULL;
- is_power8 = cpu_has_feature(CPU_FTR_ARCH_207S)
- && !cpu_has_feature(CPU_FTR_ARCH_300);
+ is_power8 = cpu_has_feature(CPU_FTR_ARCH_207S);
if (split > 1) {
sip = &split_info;
@@ -3478,184 +3625,113 @@ static noinline void kvmppc_run_core(struct kvmppc_vcore *vc)
trace_kvmppc_run_core(vc, 1);
}
-/*
- * Load up hypervisor-mode registers on P9.
- */
-static int kvmhv_load_hv_regs_and_go(struct kvm_vcpu *vcpu, u64 time_limit,
- unsigned long lpcr)
+static void load_spr_state(struct kvm_vcpu *vcpu)
{
- struct kvmppc_vcore *vc = vcpu->arch.vcore;
- s64 hdec;
- u64 tb, purr, spurr;
- int trap;
- unsigned long host_hfscr = mfspr(SPRN_HFSCR);
- unsigned long host_ciabr = mfspr(SPRN_CIABR);
- unsigned long host_dawr0 = mfspr(SPRN_DAWR0);
- unsigned long host_dawrx0 = mfspr(SPRN_DAWRX0);
- unsigned long host_psscr = mfspr(SPRN_PSSCR);
- unsigned long host_pidr = mfspr(SPRN_PID);
- unsigned long host_dawr1 = 0;
- unsigned long host_dawrx1 = 0;
-
- if (cpu_has_feature(CPU_FTR_DAWR1)) {
- host_dawr1 = mfspr(SPRN_DAWR1);
- host_dawrx1 = mfspr(SPRN_DAWRX1);
- }
+ mtspr(SPRN_DSCR, vcpu->arch.dscr);
+ mtspr(SPRN_IAMR, vcpu->arch.iamr);
+ mtspr(SPRN_PSPB, vcpu->arch.pspb);
+ mtspr(SPRN_FSCR, vcpu->arch.fscr);
+ mtspr(SPRN_TAR, vcpu->arch.tar);
+ mtspr(SPRN_EBBHR, vcpu->arch.ebbhr);
+ mtspr(SPRN_EBBRR, vcpu->arch.ebbrr);
+ mtspr(SPRN_BESCR, vcpu->arch.bescr);
+ mtspr(SPRN_WORT, vcpu->arch.wort);
+ mtspr(SPRN_TIDR, vcpu->arch.tid);
+ mtspr(SPRN_AMR, vcpu->arch.amr);
+ mtspr(SPRN_UAMOR, vcpu->arch.uamor);
/*
- * P8 and P9 suppress the HDEC exception when LPCR[HDICE] = 0,
- * so set HDICE before writing HDEC.
+ * DAR, DSISR, and for nested HV, SPRGs must be set with MSR[RI]
+ * clear (or hstate set appropriately to catch those registers
+ * being clobbered if we take a MCE or SRESET), so those are done
+ * later.
*/
- mtspr(SPRN_LPCR, vcpu->kvm->arch.host_lpcr | LPCR_HDICE);
- isync();
- hdec = time_limit - mftb();
- if (hdec < 0) {
- mtspr(SPRN_LPCR, vcpu->kvm->arch.host_lpcr);
- isync();
- return BOOK3S_INTERRUPT_HV_DECREMENTER;
- }
- mtspr(SPRN_HDEC, hdec);
-
- if (vc->tb_offset) {
- u64 new_tb = mftb() + vc->tb_offset;
- mtspr(SPRN_TBU40, new_tb);
- tb = mftb();
- if ((tb & 0xffffff) < (new_tb & 0xffffff))
- mtspr(SPRN_TBU40, new_tb + 0x1000000);
- vc->tb_offset_applied = vc->tb_offset;
- }
-
- if (vc->pcr)
- mtspr(SPRN_PCR, vc->pcr | PCR_MASK);
- mtspr(SPRN_DPDES, vc->dpdes);
- mtspr(SPRN_VTB, vc->vtb);
-
- local_paca->kvm_hstate.host_purr = mfspr(SPRN_PURR);
- local_paca->kvm_hstate.host_spurr = mfspr(SPRN_SPURR);
- mtspr(SPRN_PURR, vcpu->arch.purr);
- mtspr(SPRN_SPURR, vcpu->arch.spurr);
-
- if (dawr_enabled()) {
- mtspr(SPRN_DAWR0, vcpu->arch.dawr0);
- mtspr(SPRN_DAWRX0, vcpu->arch.dawrx0);
- if (cpu_has_feature(CPU_FTR_DAWR1)) {
- mtspr(SPRN_DAWR1, vcpu->arch.dawr1);
- mtspr(SPRN_DAWRX1, vcpu->arch.dawrx1);
- }
- }
- mtspr(SPRN_CIABR, vcpu->arch.ciabr);
- mtspr(SPRN_IC, vcpu->arch.ic);
- mtspr(SPRN_PID, vcpu->arch.pid);
-
- mtspr(SPRN_PSSCR, vcpu->arch.psscr | PSSCR_EC |
- (local_paca->kvm_hstate.fake_suspend << PSSCR_FAKE_SUSPEND_LG));
-
- mtspr(SPRN_HFSCR, vcpu->arch.hfscr);
-
- mtspr(SPRN_SPRG0, vcpu->arch.shregs.sprg0);
- mtspr(SPRN_SPRG1, vcpu->arch.shregs.sprg1);
- mtspr(SPRN_SPRG2, vcpu->arch.shregs.sprg2);
- mtspr(SPRN_SPRG3, vcpu->arch.shregs.sprg3);
-
- mtspr(SPRN_AMOR, ~0UL);
-
- mtspr(SPRN_LPCR, lpcr);
- isync();
-
- kvmppc_xive_push_vcpu(vcpu);
-
- mtspr(SPRN_SRR0, vcpu->arch.shregs.srr0);
- mtspr(SPRN_SRR1, vcpu->arch.shregs.srr1);
-
- trap = __kvmhv_vcpu_entry_p9(vcpu);
-
- /* Advance host PURR/SPURR by the amount used by guest */
- purr = mfspr(SPRN_PURR);
- spurr = mfspr(SPRN_SPURR);
- mtspr(SPRN_PURR, local_paca->kvm_hstate.host_purr +
- purr - vcpu->arch.purr);
- mtspr(SPRN_SPURR, local_paca->kvm_hstate.host_spurr +
- spurr - vcpu->arch.spurr);
- vcpu->arch.purr = purr;
- vcpu->arch.spurr = spurr;
+ if (!(vcpu->arch.ctrl & 1))
+ mtspr(SPRN_CTRLT, mfspr(SPRN_CTRLF) & ~1);
+}
- vcpu->arch.ic = mfspr(SPRN_IC);
- vcpu->arch.pid = mfspr(SPRN_PID);
- vcpu->arch.psscr = mfspr(SPRN_PSSCR) & PSSCR_GUEST_VIS;
+static void store_spr_state(struct kvm_vcpu *vcpu)
+{
+ vcpu->arch.ctrl = mfspr(SPRN_CTRLF);
- vcpu->arch.shregs.sprg0 = mfspr(SPRN_SPRG0);
- vcpu->arch.shregs.sprg1 = mfspr(SPRN_SPRG1);
- vcpu->arch.shregs.sprg2 = mfspr(SPRN_SPRG2);
- vcpu->arch.shregs.sprg3 = mfspr(SPRN_SPRG3);
+ vcpu->arch.iamr = mfspr(SPRN_IAMR);
+ vcpu->arch.pspb = mfspr(SPRN_PSPB);
+ vcpu->arch.fscr = mfspr(SPRN_FSCR);
+ vcpu->arch.tar = mfspr(SPRN_TAR);
+ vcpu->arch.ebbhr = mfspr(SPRN_EBBHR);
+ vcpu->arch.ebbrr = mfspr(SPRN_EBBRR);
+ vcpu->arch.bescr = mfspr(SPRN_BESCR);
+ vcpu->arch.wort = mfspr(SPRN_WORT);
+ vcpu->arch.tid = mfspr(SPRN_TIDR);
+ vcpu->arch.amr = mfspr(SPRN_AMR);
+ vcpu->arch.uamor = mfspr(SPRN_UAMOR);
+ vcpu->arch.dscr = mfspr(SPRN_DSCR);
+}
- /* Preserve PSSCR[FAKE_SUSPEND] until we've called kvmppc_save_tm_hv */
- mtspr(SPRN_PSSCR, host_psscr |
- (local_paca->kvm_hstate.fake_suspend << PSSCR_FAKE_SUSPEND_LG));
- mtspr(SPRN_HFSCR, host_hfscr);
- mtspr(SPRN_CIABR, host_ciabr);
- mtspr(SPRN_DAWR0, host_dawr0);
- mtspr(SPRN_DAWRX0, host_dawrx0);
- if (cpu_has_feature(CPU_FTR_DAWR1)) {
- mtspr(SPRN_DAWR1, host_dawr1);
- mtspr(SPRN_DAWRX1, host_dawrx1);
- }
- mtspr(SPRN_PID, host_pidr);
+/*
+ * Privileged (non-hypervisor) host registers to save.
+ */
+struct p9_host_os_sprs {
+ unsigned long dscr;
+ unsigned long tidr;
+ unsigned long iamr;
+ unsigned long amr;
+ unsigned long fscr;
+};
- /*
- * Since this is radix, do a eieio; tlbsync; ptesync sequence in
- * case we interrupted the guest between a tlbie and a ptesync.
- */
- asm volatile("eieio; tlbsync; ptesync");
+static void save_p9_host_os_sprs(struct p9_host_os_sprs *host_os_sprs)
+{
+ host_os_sprs->dscr = mfspr(SPRN_DSCR);
+ host_os_sprs->tidr = mfspr(SPRN_TIDR);
+ host_os_sprs->iamr = mfspr(SPRN_IAMR);
+ host_os_sprs->amr = mfspr(SPRN_AMR);
+ host_os_sprs->fscr = mfspr(SPRN_FSCR);
+}
- /*
- * cp_abort is required if the processor supports local copy-paste
- * to clear the copy buffer that was under control of the guest.
- */
- if (cpu_has_feature(CPU_FTR_ARCH_31))
- asm volatile(PPC_CP_ABORT);
+/* vcpu guest regs must already be saved */
+static void restore_p9_host_os_sprs(struct kvm_vcpu *vcpu,
+ struct p9_host_os_sprs *host_os_sprs)
+{
+ mtspr(SPRN_PSPB, 0);
+ mtspr(SPRN_WORT, 0);
+ mtspr(SPRN_UAMOR, 0);
- mtspr(SPRN_LPID, vcpu->kvm->arch.host_lpid); /* restore host LPID */
- isync();
+ mtspr(SPRN_DSCR, host_os_sprs->dscr);
+ mtspr(SPRN_TIDR, host_os_sprs->tidr);
+ mtspr(SPRN_IAMR, host_os_sprs->iamr);
- vc->dpdes = mfspr(SPRN_DPDES);
- vc->vtb = mfspr(SPRN_VTB);
- mtspr(SPRN_DPDES, 0);
- if (vc->pcr)
- mtspr(SPRN_PCR, PCR_MASK);
+ if (host_os_sprs->amr != vcpu->arch.amr)
+ mtspr(SPRN_AMR, host_os_sprs->amr);
- if (vc->tb_offset_applied) {
- u64 new_tb = mftb() - vc->tb_offset_applied;
- mtspr(SPRN_TBU40, new_tb);
- tb = mftb();
- if ((tb & 0xffffff) < (new_tb & 0xffffff))
- mtspr(SPRN_TBU40, new_tb + 0x1000000);
- vc->tb_offset_applied = 0;
- }
+ if (host_os_sprs->fscr != vcpu->arch.fscr)
+ mtspr(SPRN_FSCR, host_os_sprs->fscr);
- mtspr(SPRN_HDEC, 0x7fffffff);
- mtspr(SPRN_LPCR, vcpu->kvm->arch.host_lpcr);
+ /* Save guest CTRL register, set runlatch to 1 */
+ if (!(vcpu->arch.ctrl & 1))
+ mtspr(SPRN_CTRLT, 1);
+}
- return trap;
+static inline bool hcall_is_xics(unsigned long req)
+{
+ return req == H_EOI || req == H_CPPR || req == H_IPI ||
+ req == H_IPOLL || req == H_XIRR || req == H_XIRR_X;
}
/*
- * Virtual-mode guest entry for POWER9 and later when the host and
- * guest are both using the radix MMU. The LPIDR has already been set.
+ * Guest entry for POWER9 and later CPUs.
*/
static int kvmhv_p9_guest_entry(struct kvm_vcpu *vcpu, u64 time_limit,
unsigned long lpcr)
{
struct kvmppc_vcore *vc = vcpu->arch.vcore;
- unsigned long host_dscr = mfspr(SPRN_DSCR);
- unsigned long host_tidr = mfspr(SPRN_TIDR);
- unsigned long host_iamr = mfspr(SPRN_IAMR);
- unsigned long host_amr = mfspr(SPRN_AMR);
- unsigned long host_fscr = mfspr(SPRN_FSCR);
+ struct p9_host_os_sprs host_os_sprs;
s64 dec;
u64 tb;
int trap, save_pmu;
+ WARN_ON_ONCE(vcpu->arch.ceded);
+
dec = mfspr(SPRN_DEC);
tb = mftb();
if (dec < 0)
@@ -3664,7 +3740,7 @@ static int kvmhv_p9_guest_entry(struct kvm_vcpu *vcpu, u64 time_limit,
if (local_paca->kvm_hstate.dec_expires < time_limit)
time_limit = local_paca->kvm_hstate.dec_expires;
- vcpu->arch.ceded = 0;
+ save_p9_host_os_sprs(&host_os_sprs);
kvmhv_save_host_pmu(); /* saves it to PACA kvm_hstate */
@@ -3693,24 +3769,20 @@ static int kvmhv_p9_guest_entry(struct kvm_vcpu *vcpu, u64 time_limit,
#endif
mtspr(SPRN_VRSAVE, vcpu->arch.vrsave);
- mtspr(SPRN_DSCR, vcpu->arch.dscr);
- mtspr(SPRN_IAMR, vcpu->arch.iamr);
- mtspr(SPRN_PSPB, vcpu->arch.pspb);
- mtspr(SPRN_FSCR, vcpu->arch.fscr);
- mtspr(SPRN_TAR, vcpu->arch.tar);
- mtspr(SPRN_EBBHR, vcpu->arch.ebbhr);
- mtspr(SPRN_EBBRR, vcpu->arch.ebbrr);
- mtspr(SPRN_BESCR, vcpu->arch.bescr);
- mtspr(SPRN_WORT, vcpu->arch.wort);
- mtspr(SPRN_TIDR, vcpu->arch.tid);
- mtspr(SPRN_DAR, vcpu->arch.shregs.dar);
- mtspr(SPRN_DSISR, vcpu->arch.shregs.dsisr);
- mtspr(SPRN_AMR, vcpu->arch.amr);
- mtspr(SPRN_UAMOR, vcpu->arch.uamor);
-
- if (!(vcpu->arch.ctrl & 1))
- mtspr(SPRN_CTRLT, mfspr(SPRN_CTRLF) & ~1);
+ load_spr_state(vcpu);
+ /*
+ * When setting DEC, we must always deal with irq_work_raise via NMI vs
+ * setting DEC. The problem occurs right as we switch into guest mode
+ * if a NMI hits and sets pending work and sets DEC, then that will
+ * apply to the guest and not bring us back to the host.
+ *
+ * irq_work_raise could check a flag (or possibly LPCR[HDICE] for
+ * example) and set HDEC to 1? That wouldn't solve the nested hv
+ * case which needs to abort the hcall or zero the time limit.
+ *
+ * XXX: Another day's problem.
+ */
mtspr(SPRN_DEC, vcpu->arch.dec_expires - mftb());
if (kvmhv_on_pseries()) {
@@ -3718,7 +3790,7 @@ static int kvmhv_p9_guest_entry(struct kvm_vcpu *vcpu, u64 time_limit,
* We need to save and restore the guest visible part of the
* psscr (i.e. using SPRN_PSSCR_PR) since the hypervisor
* doesn't do this for us. Note only required if pseries since
- * this is done in kvmhv_load_hv_regs_and_go() below otherwise.
+ * this is done in kvmhv_vcpu_entry_p9() below otherwise.
*/
unsigned long host_psscr;
/* call our hypervisor to load up HV regs and go */
@@ -3738,6 +3810,8 @@ static int kvmhv_p9_guest_entry(struct kvm_vcpu *vcpu, u64 time_limit,
hvregs.vcpu_token = vcpu->vcpu_id;
}
hvregs.hdec_expiry = time_limit;
+ mtspr(SPRN_DAR, vcpu->arch.shregs.dar);
+ mtspr(SPRN_DSISR, vcpu->arch.shregs.dsisr);
trap = plpar_hcall_norets(H_ENTER_NESTED, __pa(&hvregs),
__pa(&vcpu->arch.regs));
kvmhv_restore_hv_return_state(vcpu, &hvregs);
@@ -3750,15 +3824,41 @@ static int kvmhv_p9_guest_entry(struct kvm_vcpu *vcpu, u64 time_limit,
/* H_CEDE has to be handled now, not later */
if (trap == BOOK3S_INTERRUPT_SYSCALL && !vcpu->arch.nested &&
kvmppc_get_gpr(vcpu, 3) == H_CEDE) {
- kvmppc_nested_cede(vcpu);
+ kvmppc_cede(vcpu);
kvmppc_set_gpr(vcpu, 3, 0);
trap = 0;
}
} else {
- trap = kvmhv_load_hv_regs_and_go(vcpu, time_limit, lpcr);
+ kvmppc_xive_push_vcpu(vcpu);
+ trap = kvmhv_vcpu_entry_p9(vcpu, time_limit, lpcr);
+ if (trap == BOOK3S_INTERRUPT_SYSCALL && !vcpu->arch.nested &&
+ !(vcpu->arch.shregs.msr & MSR_PR)) {
+ unsigned long req = kvmppc_get_gpr(vcpu, 3);
+
+ /* H_CEDE has to be handled now, not later */
+ if (req == H_CEDE) {
+ kvmppc_cede(vcpu);
+ kvmppc_xive_rearm_escalation(vcpu); /* may un-cede */
+ kvmppc_set_gpr(vcpu, 3, 0);
+ trap = 0;
+
+ /* XICS hcalls must be handled before xive is pulled */
+ } else if (hcall_is_xics(req)) {
+ int ret;
+
+ ret = kvmppc_xive_xics_hcall(vcpu, req);
+ if (ret != H_TOO_HARD) {
+ kvmppc_set_gpr(vcpu, 3, ret);
+ trap = 0;
+ }
+ }
+ }
+ kvmppc_xive_pull_vcpu(vcpu);
+
+ if (kvm_is_radix(vcpu->kvm))
+ vcpu->arch.slb_max = 0;
}
- vcpu->arch.slb_max = 0;
dec = mfspr(SPRN_DEC);
if (!(lpcr & LPCR_LD)) /* Sign extend if not using large decrementer */
dec = (s32) dec;
@@ -3766,36 +3866,10 @@ static int kvmhv_p9_guest_entry(struct kvm_vcpu *vcpu, u64 time_limit,
vcpu->arch.dec_expires = dec + tb;
vcpu->cpu = -1;
vcpu->arch.thread_cpu = -1;
- /* Save guest CTRL register, set runlatch to 1 */
- vcpu->arch.ctrl = mfspr(SPRN_CTRLF);
- if (!(vcpu->arch.ctrl & 1))
- mtspr(SPRN_CTRLT, vcpu->arch.ctrl | 1);
- vcpu->arch.iamr = mfspr(SPRN_IAMR);
- vcpu->arch.pspb = mfspr(SPRN_PSPB);
- vcpu->arch.fscr = mfspr(SPRN_FSCR);
- vcpu->arch.tar = mfspr(SPRN_TAR);
- vcpu->arch.ebbhr = mfspr(SPRN_EBBHR);
- vcpu->arch.ebbrr = mfspr(SPRN_EBBRR);
- vcpu->arch.bescr = mfspr(SPRN_BESCR);
- vcpu->arch.wort = mfspr(SPRN_WORT);
- vcpu->arch.tid = mfspr(SPRN_TIDR);
- vcpu->arch.amr = mfspr(SPRN_AMR);
- vcpu->arch.uamor = mfspr(SPRN_UAMOR);
- vcpu->arch.dscr = mfspr(SPRN_DSCR);
-
- mtspr(SPRN_PSPB, 0);
- mtspr(SPRN_WORT, 0);
- mtspr(SPRN_UAMOR, 0);
- mtspr(SPRN_DSCR, host_dscr);
- mtspr(SPRN_TIDR, host_tidr);
- mtspr(SPRN_IAMR, host_iamr);
+ store_spr_state(vcpu);
- if (host_amr != vcpu->arch.amr)
- mtspr(SPRN_AMR, host_amr);
-
- if (host_fscr != vcpu->arch.fscr)
- mtspr(SPRN_FSCR, host_fscr);
+ restore_p9_host_os_sprs(vcpu, &host_os_sprs);
msr_check_and_set(MSR_FP | MSR_VEC | MSR_VSX);
store_fp_state(&vcpu->arch.fp);
@@ -3825,6 +3899,9 @@ static int kvmhv_p9_guest_entry(struct kvm_vcpu *vcpu, u64 time_limit,
vc->in_guest = 0;
mtspr(SPRN_DEC, local_paca->kvm_hstate.dec_expires - mftb());
+ /* We may have raced with new irq work */
+ if (test_irq_work_pending())
+ set_dec(1);
mtspr(SPRN_SPRG_VDSO_WRITE, local_paca->sprg_vdso);
kvmhv_load_host_pmu();
@@ -4014,7 +4091,6 @@ out:
/*
* This never fails for a radix guest, as none of the operations it does
* for a radix guest can fail or have a way to report failure.
- * kvmhv_run_single_vcpu() relies on this fact.
*/
static int kvmhv_setup_mmu(struct kvm_vcpu *vcpu)
{
@@ -4170,7 +4246,7 @@ int kvmhv_run_single_vcpu(struct kvm_vcpu *vcpu, u64 time_limit,
{
struct kvm_run *run = vcpu->run;
int trap, r, pcpu;
- int srcu_idx, lpid;
+ int srcu_idx;
struct kvmppc_vcore *vc;
struct kvm *kvm = vcpu->kvm;
struct kvm_nested_guest *nested = vcpu->arch.nested;
@@ -4193,8 +4269,15 @@ int kvmhv_run_single_vcpu(struct kvm_vcpu *vcpu, u64 time_limit,
vc->runner = vcpu;
/* See if the MMU is ready to go */
- if (!kvm->arch.mmu_ready)
- kvmhv_setup_mmu(vcpu);
+ if (!kvm->arch.mmu_ready) {
+ r = kvmhv_setup_mmu(vcpu);
+ if (r) {
+ run->exit_reason = KVM_EXIT_FAIL_ENTRY;
+ run->fail_entry.hardware_entry_failure_reason = 0;
+ vcpu->arch.ret = r;
+ return r;
+ }
+ }
if (need_resched())
cond_resched();
@@ -4207,7 +4290,8 @@ int kvmhv_run_single_vcpu(struct kvm_vcpu *vcpu, u64 time_limit,
preempt_disable();
pcpu = smp_processor_id();
vc->pcpu = pcpu;
- kvmppc_prepare_radix_vcpu(vcpu, pcpu);
+ if (kvm_is_radix(kvm))
+ kvmppc_prepare_radix_vcpu(vcpu, pcpu);
local_irq_disable();
hard_irq_disable();
@@ -4244,13 +4328,6 @@ int kvmhv_run_single_vcpu(struct kvm_vcpu *vcpu, u64 time_limit,
vc->vcore_state = VCORE_RUNNING;
trace_kvmppc_run_core(vc, 0);
- if (cpu_has_feature(CPU_FTR_HVMODE)) {
- lpid = nested ? nested->shadow_lpid : kvm->arch.lpid;
- mtspr(SPRN_LPID, lpid);
- isync();
- kvmppc_check_need_tlb_flush(kvm, pcpu, nested);
- }
-
guest_enter_irqoff();
srcu_idx = srcu_read_lock(&kvm->srcu);
@@ -4269,11 +4346,6 @@ int kvmhv_run_single_vcpu(struct kvm_vcpu *vcpu, u64 time_limit,
srcu_read_unlock(&kvm->srcu, srcu_idx);
- if (cpu_has_feature(CPU_FTR_HVMODE)) {
- mtspr(SPRN_LPID, kvm->arch.host_lpid);
- isync();
- }
-
set_irq_happened(trap);
kvmppc_set_host_core(pcpu);
@@ -4419,19 +4491,23 @@ static int kvmppc_vcpu_run_hv(struct kvm_vcpu *vcpu)
vcpu->arch.state = KVMPPC_VCPU_BUSY_IN_HOST;
do {
- /*
- * The TLB prefetch bug fixup is only in the kvmppc_run_vcpu
- * path, which also handles hash and dependent threads mode.
- */
- if (kvm->arch.threads_indep && kvm_is_radix(kvm) &&
- !cpu_has_feature(CPU_FTR_P9_RADIX_PREFETCH_BUG))
+ if (cpu_has_feature(CPU_FTR_ARCH_300))
r = kvmhv_run_single_vcpu(vcpu, ~(u64)0,
vcpu->arch.vcore->lpcr);
else
r = kvmppc_run_vcpu(vcpu);
- if (run->exit_reason == KVM_EXIT_PAPR_HCALL &&
- !(vcpu->arch.shregs.msr & MSR_PR)) {
+ if (run->exit_reason == KVM_EXIT_PAPR_HCALL) {
+ if (WARN_ON_ONCE(vcpu->arch.shregs.msr & MSR_PR)) {
+ /*
+ * These should have been caught reflected
+ * into the guest by now. Final sanity check:
+ * don't allow userspace to execute hcalls in
+ * the hypervisor.
+ */
+ r = RESUME_GUEST;
+ continue;
+ }
trace_kvm_hcall_enter(vcpu);
r = kvmppc_pseries_do_hcall(vcpu);
trace_kvm_hcall_exit(vcpu, r);
@@ -5038,18 +5114,8 @@ static int kvmppc_core_init_vm_hv(struct kvm *kvm)
/*
* Track that we now have a HV mode VM active. This blocks secondary
* CPU threads from coming online.
- * On POWER9, we only need to do this if the "indep_threads_mode"
- * module parameter has been set to N.
*/
- if (cpu_has_feature(CPU_FTR_ARCH_300)) {
- if (!indep_threads_mode && !cpu_has_feature(CPU_FTR_HVMODE)) {
- pr_warn("KVM: Ignoring indep_threads_mode=N in nested hypervisor\n");
- kvm->arch.threads_indep = true;
- } else {
- kvm->arch.threads_indep = indep_threads_mode;
- }
- }
- if (!kvm->arch.threads_indep)
+ if (!cpu_has_feature(CPU_FTR_ARCH_300))
kvm_hv_vm_activated();
/*
@@ -5090,7 +5156,7 @@ static void kvmppc_core_destroy_vm_hv(struct kvm *kvm)
{
debugfs_remove_recursive(kvm->arch.debugfs_dir);
- if (!kvm->arch.threads_indep)
+ if (!cpu_has_feature(CPU_FTR_ARCH_300))
kvm_hv_vm_deactivated();
kvmppc_free_vcores(kvm);
@@ -5511,7 +5577,9 @@ static int kvmhv_enable_nested(struct kvm *kvm)
{
if (!nested)
return -EPERM;
- if (!cpu_has_feature(CPU_FTR_ARCH_300) || no_mixing_hpt_and_radix)
+ if (!cpu_has_feature(CPU_FTR_ARCH_300))
+ return -ENODEV;
+ if (!radix_enabled())
return -ENODEV;
/* kvm == NULL means the caller is testing if the capability exists */
@@ -5674,11 +5742,25 @@ static int kvmhv_enable_dawr1(struct kvm *kvm)
static bool kvmppc_hash_v3_possible(void)
{
- if (radix_enabled() && no_mixing_hpt_and_radix)
+ if (!cpu_has_feature(CPU_FTR_ARCH_300))
return false;
- return cpu_has_feature(CPU_FTR_ARCH_300) &&
- cpu_has_feature(CPU_FTR_HVMODE);
+ if (!cpu_has_feature(CPU_FTR_HVMODE))
+ return false;
+
+ /*
+ * POWER9 chips before version 2.02 can't have some threads in
+ * HPT mode and some in radix mode on the same core.
+ */
+ if (radix_enabled()) {
+ unsigned int pvr = mfspr(SPRN_PVR);
+ if ((pvr >> 16) == PVR_POWER9 &&
+ (((pvr & 0xe000) == 0 && (pvr & 0xfff) < 0x202) ||
+ ((pvr & 0xe000) == 0x2000 && (pvr & 0xfff) < 0x101)))
+ return false;
+ }
+
+ return true;
}
static struct kvmppc_ops kvm_ops_hv = {
@@ -5822,18 +5904,6 @@ static int kvmppc_book3s_init_hv(void)
if (kvmppc_radix_possible())
r = kvmppc_radix_init();
- /*
- * POWER9 chips before version 2.02 can't have some threads in
- * HPT mode and some in radix mode on the same core.
- */
- if (cpu_has_feature(CPU_FTR_ARCH_300)) {
- unsigned int pvr = mfspr(SPRN_PVR);
- if ((pvr >> 16) == PVR_POWER9 &&
- (((pvr & 0xe000) == 0 && (pvr & 0xfff) < 0x202) ||
- ((pvr & 0xe000) == 0x2000 && (pvr & 0xfff) < 0x101)))
- no_mixing_hpt_and_radix = true;
- }
-
r = kvmppc_uvmem_init();
if (r < 0)
pr_err("KVM-HV: kvmppc_uvmem_init failed %d\n", r);
diff --git a/arch/powerpc/kvm/book3s_hv_builtin.c b/arch/powerpc/kvm/book3s_hv_builtin.c
index 7a0e33a9c980..259492bb4153 100644
--- a/arch/powerpc/kvm/book3s_hv_builtin.c
+++ b/arch/powerpc/kvm/book3s_hv_builtin.c
@@ -35,21 +35,6 @@
#include "book3s_xive.h"
/*
- * The XIVE module will populate these when it loads
- */
-unsigned long (*__xive_vm_h_xirr)(struct kvm_vcpu *vcpu);
-unsigned long (*__xive_vm_h_ipoll)(struct kvm_vcpu *vcpu, unsigned long server);
-int (*__xive_vm_h_ipi)(struct kvm_vcpu *vcpu, unsigned long server,
- unsigned long mfrr);
-int (*__xive_vm_h_cppr)(struct kvm_vcpu *vcpu, unsigned long cppr);
-int (*__xive_vm_h_eoi)(struct kvm_vcpu *vcpu, unsigned long xirr);
-EXPORT_SYMBOL_GPL(__xive_vm_h_xirr);
-EXPORT_SYMBOL_GPL(__xive_vm_h_ipoll);
-EXPORT_SYMBOL_GPL(__xive_vm_h_ipi);
-EXPORT_SYMBOL_GPL(__xive_vm_h_cppr);
-EXPORT_SYMBOL_GPL(__xive_vm_h_eoi);
-
-/*
* Hash page table alignment on newer cpus(CPU_FTR_ARCH_206)
* should be power of 2.
*/
@@ -196,16 +181,9 @@ int kvmppc_hwrng_present(void)
}
EXPORT_SYMBOL_GPL(kvmppc_hwrng_present);
-long kvmppc_h_random(struct kvm_vcpu *vcpu)
+long kvmppc_rm_h_random(struct kvm_vcpu *vcpu)
{
- int r;
-
- /* Only need to do the expensive mfmsr() on radix */
- if (kvm_is_radix(vcpu->kvm) && (mfmsr() & MSR_IR))
- r = powernv_get_random_long(&vcpu->arch.regs.gpr[4]);
- else
- r = powernv_get_random_real_mode(&vcpu->arch.regs.gpr[4]);
- if (r)
+ if (powernv_get_random_real_mode(&vcpu->arch.regs.gpr[4]))
return H_SUCCESS;
return H_HARDWARE;
@@ -221,15 +199,6 @@ void kvmhv_rm_send_ipi(int cpu)
void __iomem *xics_phys;
unsigned long msg = PPC_DBELL_TYPE(PPC_DBELL_SERVER);
- /* For a nested hypervisor, use the XICS via hcall */
- if (kvmhv_on_pseries()) {
- unsigned long retbuf[PLPAR_HCALL_BUFSIZE];
-
- plpar_hcall_raw(H_IPI, retbuf, get_hard_smp_processor_id(cpu),
- IPI_PRIORITY);
- return;
- }
-
/* On POWER9 we can use msgsnd for any destination cpu. */
if (cpu_has_feature(CPU_FTR_ARCH_300)) {
msg |= get_hard_smp_processor_id(cpu);
@@ -442,19 +411,12 @@ static long kvmppc_read_one_intr(bool *again)
return 1;
/* Now read the interrupt from the ICP */
- if (kvmhv_on_pseries()) {
- unsigned long retbuf[PLPAR_HCALL_BUFSIZE];
-
- rc = plpar_hcall_raw(H_XIRR, retbuf, 0xFF);
- xirr = cpu_to_be32(retbuf[0]);
- } else {
- xics_phys = local_paca->kvm_hstate.xics_phys;
- rc = 0;
- if (!xics_phys)
- rc = opal_int_get_xirr(&xirr, false);
- else
- xirr = __raw_rm_readl(xics_phys + XICS_XIRR);
- }
+ xics_phys = local_paca->kvm_hstate.xics_phys;
+ rc = 0;
+ if (!xics_phys)
+ rc = opal_int_get_xirr(&xirr, false);
+ else
+ xirr = __raw_rm_readl(xics_phys + XICS_XIRR);
if (rc < 0)
return 1;
@@ -483,13 +445,7 @@ static long kvmppc_read_one_intr(bool *again)
*/
if (xisr == XICS_IPI) {
rc = 0;
- if (kvmhv_on_pseries()) {
- unsigned long retbuf[PLPAR_HCALL_BUFSIZE];
-
- plpar_hcall_raw(H_IPI, retbuf,
- hard_smp_processor_id(), 0xff);
- plpar_hcall_raw(H_EOI, retbuf, h_xirr);
- } else if (xics_phys) {
+ if (xics_phys) {
__raw_rm_writeb(0xff, xics_phys + XICS_MFRR);
__raw_rm_writel(xirr, xics_phys + XICS_XIRR);
} else {
@@ -515,13 +471,7 @@ static long kvmppc_read_one_intr(bool *again)
/* We raced with the host,
* we need to resend that IPI, bummer
*/
- if (kvmhv_on_pseries()) {
- unsigned long retbuf[PLPAR_HCALL_BUFSIZE];
-
- plpar_hcall_raw(H_IPI, retbuf,
- hard_smp_processor_id(),
- IPI_PRIORITY);
- } else if (xics_phys)
+ if (xics_phys)
__raw_rm_writeb(IPI_PRIORITY,
xics_phys + XICS_MFRR);
else
@@ -541,22 +491,13 @@ static long kvmppc_read_one_intr(bool *again)
}
#ifdef CONFIG_KVM_XICS
-static inline bool is_rm(void)
-{
- return !(mfmsr() & MSR_DR);
-}
-
unsigned long kvmppc_rm_h_xirr(struct kvm_vcpu *vcpu)
{
if (!kvmppc_xics_enabled(vcpu))
return H_TOO_HARD;
- if (xics_on_xive()) {
- if (is_rm())
- return xive_rm_h_xirr(vcpu);
- if (unlikely(!__xive_vm_h_xirr))
- return H_NOT_AVAILABLE;
- return __xive_vm_h_xirr(vcpu);
- } else
+ if (xics_on_xive())
+ return xive_rm_h_xirr(vcpu);
+ else
return xics_rm_h_xirr(vcpu);
}
@@ -565,13 +506,9 @@ unsigned long kvmppc_rm_h_xirr_x(struct kvm_vcpu *vcpu)
if (!kvmppc_xics_enabled(vcpu))
return H_TOO_HARD;
vcpu->arch.regs.gpr[5] = get_tb();
- if (xics_on_xive()) {
- if (is_rm())
- return xive_rm_h_xirr(vcpu);
- if (unlikely(!__xive_vm_h_xirr))
- return H_NOT_AVAILABLE;
- return __xive_vm_h_xirr(vcpu);
- } else
+ if (xics_on_xive())
+ return xive_rm_h_xirr(vcpu);
+ else
return xics_rm_h_xirr(vcpu);
}
@@ -579,13 +516,9 @@ unsigned long kvmppc_rm_h_ipoll(struct kvm_vcpu *vcpu, unsigned long server)
{
if (!kvmppc_xics_enabled(vcpu))
return H_TOO_HARD;
- if (xics_on_xive()) {
- if (is_rm())
- return xive_rm_h_ipoll(vcpu, server);
- if (unlikely(!__xive_vm_h_ipoll))
- return H_NOT_AVAILABLE;
- return __xive_vm_h_ipoll(vcpu, server);
- } else
+ if (xics_on_xive())
+ return xive_rm_h_ipoll(vcpu, server);
+ else
return H_TOO_HARD;
}
@@ -594,13 +527,9 @@ int kvmppc_rm_h_ipi(struct kvm_vcpu *vcpu, unsigned long server,
{
if (!kvmppc_xics_enabled(vcpu))
return H_TOO_HARD;
- if (xics_on_xive()) {
- if (is_rm())
- return xive_rm_h_ipi(vcpu, server, mfrr);
- if (unlikely(!__xive_vm_h_ipi))
- return H_NOT_AVAILABLE;
- return __xive_vm_h_ipi(vcpu, server, mfrr);
- } else
+ if (xics_on_xive())
+ return xive_rm_h_ipi(vcpu, server, mfrr);
+ else
return xics_rm_h_ipi(vcpu, server, mfrr);
}
@@ -608,13 +537,9 @@ int kvmppc_rm_h_cppr(struct kvm_vcpu *vcpu, unsigned long cppr)
{
if (!kvmppc_xics_enabled(vcpu))
return H_TOO_HARD;
- if (xics_on_xive()) {
- if (is_rm())
- return xive_rm_h_cppr(vcpu, cppr);
- if (unlikely(!__xive_vm_h_cppr))
- return H_NOT_AVAILABLE;
- return __xive_vm_h_cppr(vcpu, cppr);
- } else
+ if (xics_on_xive())
+ return xive_rm_h_cppr(vcpu, cppr);
+ else
return xics_rm_h_cppr(vcpu, cppr);
}
@@ -622,13 +547,9 @@ int kvmppc_rm_h_eoi(struct kvm_vcpu *vcpu, unsigned long xirr)
{
if (!kvmppc_xics_enabled(vcpu))
return H_TOO_HARD;
- if (xics_on_xive()) {
- if (is_rm())
- return xive_rm_h_eoi(vcpu, xirr);
- if (unlikely(!__xive_vm_h_eoi))
- return H_NOT_AVAILABLE;
- return __xive_vm_h_eoi(vcpu, xirr);
- } else
+ if (xics_on_xive())
+ return xive_rm_h_eoi(vcpu, xirr);
+ else
return xics_rm_h_eoi(vcpu, xirr);
}
#endif /* CONFIG_KVM_XICS */
diff --git a/arch/powerpc/kvm/book3s_hv_interrupts.S b/arch/powerpc/kvm/book3s_hv_interrupts.S
index 327417d79eac..4444f83cb133 100644
--- a/arch/powerpc/kvm/book3s_hv_interrupts.S
+++ b/arch/powerpc/kvm/book3s_hv_interrupts.S
@@ -58,7 +58,7 @@ END_FTR_SECTION_IFCLR(CPU_FTR_ARCH_207S)
/*
* Put whatever is in the decrementer into the
* hypervisor decrementer.
- * Because of a hardware deviation in P8 and P9,
+ * Because of a hardware deviation in P8,
* we need to set LPCR[HDICE] before writing HDEC.
*/
ld r5, HSTATE_KVM_VCORE(r13)
@@ -67,15 +67,10 @@ END_FTR_SECTION_IFCLR(CPU_FTR_ARCH_207S)
ori r8, r9, LPCR_HDICE
mtspr SPRN_LPCR, r8
isync
- andis. r0, r9, LPCR_LD@h
mfspr r8,SPRN_DEC
mftb r7
-BEGIN_FTR_SECTION
- /* On POWER9, don't sign-extend if host LPCR[LD] bit is set */
- bne 32f
-END_FTR_SECTION_IFSET(CPU_FTR_ARCH_300)
extsw r8,r8
-32: mtspr SPRN_HDEC,r8
+ mtspr SPRN_HDEC,r8
add r8,r8,r7
std r8,HSTATE_DECEXP(r13)
diff --git a/arch/powerpc/kvm/book3s_hv_p9_entry.c b/arch/powerpc/kvm/book3s_hv_p9_entry.c
new file mode 100644
index 000000000000..83f592eadcd2
--- /dev/null
+++ b/arch/powerpc/kvm/book3s_hv_p9_entry.c
@@ -0,0 +1,508 @@
+// SPDX-License-Identifier: GPL-2.0-only
+#include <linux/kernel.h>
+#include <linux/kvm_host.h>
+#include <asm/asm-prototypes.h>
+#include <asm/dbell.h>
+#include <asm/kvm_ppc.h>
+#include <asm/ppc-opcode.h>
+
+#ifdef CONFIG_KVM_BOOK3S_HV_EXIT_TIMING
+static void __start_timing(struct kvm_vcpu *vcpu, struct kvmhv_tb_accumulator *next)
+{
+ struct kvmppc_vcore *vc = vcpu->arch.vcore;
+ u64 tb = mftb() - vc->tb_offset_applied;
+
+ vcpu->arch.cur_activity = next;
+ vcpu->arch.cur_tb_start = tb;
+}
+
+static void __accumulate_time(struct kvm_vcpu *vcpu, struct kvmhv_tb_accumulator *next)
+{
+ struct kvmppc_vcore *vc = vcpu->arch.vcore;
+ struct kvmhv_tb_accumulator *curr;
+ u64 tb = mftb() - vc->tb_offset_applied;
+ u64 prev_tb;
+ u64 delta;
+ u64 seq;
+
+ curr = vcpu->arch.cur_activity;
+ vcpu->arch.cur_activity = next;
+ prev_tb = vcpu->arch.cur_tb_start;
+ vcpu->arch.cur_tb_start = tb;
+
+ if (!curr)
+ return;
+
+ delta = tb - prev_tb;
+
+ seq = curr->seqcount;
+ curr->seqcount = seq + 1;
+ smp_wmb();
+ curr->tb_total += delta;
+ if (seq == 0 || delta < curr->tb_min)
+ curr->tb_min = delta;
+ if (delta > curr->tb_max)
+ curr->tb_max = delta;
+ smp_wmb();
+ curr->seqcount = seq + 2;
+}
+
+#define start_timing(vcpu, next) __start_timing(vcpu, next)
+#define end_timing(vcpu) __start_timing(vcpu, NULL)
+#define accumulate_time(vcpu, next) __accumulate_time(vcpu, next)
+#else
+#define start_timing(vcpu, next) do {} while (0)
+#define end_timing(vcpu) do {} while (0)
+#define accumulate_time(vcpu, next) do {} while (0)
+#endif
+
+static inline void mfslb(unsigned int idx, u64 *slbee, u64 *slbev)
+{
+ asm volatile("slbmfev %0,%1" : "=r" (*slbev) : "r" (idx));
+ asm volatile("slbmfee %0,%1" : "=r" (*slbee) : "r" (idx));
+}
+
+static inline void mtslb(u64 slbee, u64 slbev)
+{
+ asm volatile("slbmte %0,%1" :: "r" (slbev), "r" (slbee));
+}
+
+static inline void clear_slb_entry(unsigned int idx)
+{
+ mtslb(idx, 0);
+}
+
+static inline void slb_clear_invalidate_partition(void)
+{
+ clear_slb_entry(0);
+ asm volatile(PPC_SLBIA(6));
+}
+
+/*
+ * Malicious or buggy radix guests may have inserted SLB entries
+ * (only 0..3 because radix always runs with UPRT=1), so these must
+ * be cleared here to avoid side-channels. slbmte is used rather
+ * than slbia, as it won't clear cached translations.
+ */
+static void radix_clear_slb(void)
+{
+ int i;
+
+ for (i = 0; i < 4; i++)
+ clear_slb_entry(i);
+}
+
+static void switch_mmu_to_guest_radix(struct kvm *kvm, struct kvm_vcpu *vcpu, u64 lpcr)
+{
+ struct kvm_nested_guest *nested = vcpu->arch.nested;
+ u32 lpid;
+
+ lpid = nested ? nested->shadow_lpid : kvm->arch.lpid;
+
+ /*
+ * All the isync()s are overkill but trivially follow the ISA
+ * requirements. Some can likely be replaced with justification
+ * comment for why they are not needed.
+ */
+ isync();
+ mtspr(SPRN_LPID, lpid);
+ isync();
+ mtspr(SPRN_LPCR, lpcr);
+ isync();
+ mtspr(SPRN_PID, vcpu->arch.pid);
+ isync();
+}
+
+static void switch_mmu_to_guest_hpt(struct kvm *kvm, struct kvm_vcpu *vcpu, u64 lpcr)
+{
+ u32 lpid;
+ int i;
+
+ lpid = kvm->arch.lpid;
+
+ mtspr(SPRN_LPID, lpid);
+ mtspr(SPRN_LPCR, lpcr);
+ mtspr(SPRN_PID, vcpu->arch.pid);
+
+ for (i = 0; i < vcpu->arch.slb_max; i++)
+ mtslb(vcpu->arch.slb[i].orige, vcpu->arch.slb[i].origv);
+
+ isync();
+}
+
+static void switch_mmu_to_host(struct kvm *kvm, u32 pid)
+{
+ isync();
+ mtspr(SPRN_PID, pid);
+ isync();
+ mtspr(SPRN_LPID, kvm->arch.host_lpid);
+ isync();
+ mtspr(SPRN_LPCR, kvm->arch.host_lpcr);
+ isync();
+
+ if (!radix_enabled())
+ slb_restore_bolted_realmode();
+}
+
+static void save_clear_host_mmu(struct kvm *kvm)
+{
+ if (!radix_enabled()) {
+ /*
+ * Hash host could save and restore host SLB entries to
+ * reduce SLB fault overheads of VM exits, but for now the
+ * existing code clears all entries and restores just the
+ * bolted ones when switching back to host.
+ */
+ slb_clear_invalidate_partition();
+ }
+}
+
+static void save_clear_guest_mmu(struct kvm *kvm, struct kvm_vcpu *vcpu)
+{
+ if (kvm_is_radix(kvm)) {
+ radix_clear_slb();
+ } else {
+ int i;
+ int nr = 0;
+
+ /*
+ * This must run before switching to host (radix host can't
+ * access all SLBs).
+ */
+ for (i = 0; i < vcpu->arch.slb_nr; i++) {
+ u64 slbee, slbev;
+ mfslb(i, &slbee, &slbev);
+ if (slbee & SLB_ESID_V) {
+ vcpu->arch.slb[nr].orige = slbee | i;
+ vcpu->arch.slb[nr].origv = slbev;
+ nr++;
+ }
+ }
+ vcpu->arch.slb_max = nr;
+ slb_clear_invalidate_partition();
+ }
+}
+
+int kvmhv_vcpu_entry_p9(struct kvm_vcpu *vcpu, u64 time_limit, unsigned long lpcr)
+{
+ struct kvm *kvm = vcpu->kvm;
+ struct kvm_nested_guest *nested = vcpu->arch.nested;
+ struct kvmppc_vcore *vc = vcpu->arch.vcore;
+ s64 hdec;
+ u64 tb, purr, spurr;
+ u64 *exsave;
+ bool ri_set;
+ int trap;
+ unsigned long msr;
+ unsigned long host_hfscr;
+ unsigned long host_ciabr;
+ unsigned long host_dawr0;
+ unsigned long host_dawrx0;
+ unsigned long host_psscr;
+ unsigned long host_pidr;
+ unsigned long host_dawr1;
+ unsigned long host_dawrx1;
+
+ hdec = time_limit - mftb();
+ if (hdec < 0)
+ return BOOK3S_INTERRUPT_HV_DECREMENTER;
+
+ WARN_ON_ONCE(vcpu->arch.shregs.msr & MSR_HV);
+ WARN_ON_ONCE(!(vcpu->arch.shregs.msr & MSR_ME));
+
+ start_timing(vcpu, &vcpu->arch.rm_entry);
+
+ vcpu->arch.ceded = 0;
+
+ if (vc->tb_offset) {
+ u64 new_tb = mftb() + vc->tb_offset;
+ mtspr(SPRN_TBU40, new_tb);
+ tb = mftb();
+ if ((tb & 0xffffff) < (new_tb & 0xffffff))
+ mtspr(SPRN_TBU40, new_tb + 0x1000000);
+ vc->tb_offset_applied = vc->tb_offset;
+ }
+
+ msr = mfmsr();
+
+ host_hfscr = mfspr(SPRN_HFSCR);
+ host_ciabr = mfspr(SPRN_CIABR);
+ host_dawr0 = mfspr(SPRN_DAWR0);
+ host_dawrx0 = mfspr(SPRN_DAWRX0);
+ host_psscr = mfspr(SPRN_PSSCR);
+ host_pidr = mfspr(SPRN_PID);
+ if (cpu_has_feature(CPU_FTR_DAWR1)) {
+ host_dawr1 = mfspr(SPRN_DAWR1);
+ host_dawrx1 = mfspr(SPRN_DAWRX1);
+ }
+
+ if (vc->pcr)
+ mtspr(SPRN_PCR, vc->pcr | PCR_MASK);
+ mtspr(SPRN_DPDES, vc->dpdes);
+ mtspr(SPRN_VTB, vc->vtb);
+
+ local_paca->kvm_hstate.host_purr = mfspr(SPRN_PURR);
+ local_paca->kvm_hstate.host_spurr = mfspr(SPRN_SPURR);
+ mtspr(SPRN_PURR, vcpu->arch.purr);
+ mtspr(SPRN_SPURR, vcpu->arch.spurr);
+
+ if (dawr_enabled()) {
+ mtspr(SPRN_DAWR0, vcpu->arch.dawr0);
+ mtspr(SPRN_DAWRX0, vcpu->arch.dawrx0);
+ if (cpu_has_feature(CPU_FTR_DAWR1)) {
+ mtspr(SPRN_DAWR1, vcpu->arch.dawr1);
+ mtspr(SPRN_DAWRX1, vcpu->arch.dawrx1);
+ }
+ }
+ mtspr(SPRN_CIABR, vcpu->arch.ciabr);
+ mtspr(SPRN_IC, vcpu->arch.ic);
+
+ mtspr(SPRN_PSSCR, vcpu->arch.psscr | PSSCR_EC |
+ (local_paca->kvm_hstate.fake_suspend << PSSCR_FAKE_SUSPEND_LG));
+
+ mtspr(SPRN_HFSCR, vcpu->arch.hfscr);
+
+ mtspr(SPRN_HSRR0, vcpu->arch.regs.nip);
+ mtspr(SPRN_HSRR1, (vcpu->arch.shregs.msr & ~MSR_HV) | MSR_ME);
+
+ /*
+ * On POWER9 DD2.1 and below, sometimes on a Hypervisor Data Storage
+ * Interrupt (HDSI) the HDSISR is not be updated at all.
+ *
+ * To work around this we put a canary value into the HDSISR before
+ * returning to a guest and then check for this canary when we take a
+ * HDSI. If we find the canary on a HDSI, we know the hardware didn't
+ * update the HDSISR. In this case we return to the guest to retake the
+ * HDSI which should correctly update the HDSISR the second time HDSI
+ * entry.
+ *
+ * Just do this on all p9 processors for now.
+ */
+ mtspr(SPRN_HDSISR, HDSISR_CANARY);
+
+ mtspr(SPRN_SPRG0, vcpu->arch.shregs.sprg0);
+ mtspr(SPRN_SPRG1, vcpu->arch.shregs.sprg1);
+ mtspr(SPRN_SPRG2, vcpu->arch.shregs.sprg2);
+ mtspr(SPRN_SPRG3, vcpu->arch.shregs.sprg3);
+
+ mtspr(SPRN_AMOR, ~0UL);
+
+ local_paca->kvm_hstate.in_guest = KVM_GUEST_MODE_HV_P9;
+
+ /*
+ * Hash host, hash guest, or radix guest with prefetch bug, all have
+ * to disable the MMU before switching to guest MMU state.
+ */
+ if (!radix_enabled() || !kvm_is_radix(kvm) ||
+ cpu_has_feature(CPU_FTR_P9_RADIX_PREFETCH_BUG))
+ __mtmsrd(msr & ~(MSR_IR|MSR_DR|MSR_RI), 0);
+
+ save_clear_host_mmu(kvm);
+
+ if (kvm_is_radix(kvm)) {
+ switch_mmu_to_guest_radix(kvm, vcpu, lpcr);
+ if (!cpu_has_feature(CPU_FTR_P9_RADIX_PREFETCH_BUG))
+ __mtmsrd(0, 1); /* clear RI */
+
+ } else {
+ switch_mmu_to_guest_hpt(kvm, vcpu, lpcr);
+ }
+
+ /* TLBIEL uses LPID=LPIDR, so run this after setting guest LPID */
+ kvmppc_check_need_tlb_flush(kvm, vc->pcpu, nested);
+
+ /*
+ * P9 suppresses the HDEC exception when LPCR[HDICE] = 0,
+ * so set guest LPCR (with HDICE) before writing HDEC.
+ */
+ mtspr(SPRN_HDEC, hdec);
+
+ mtspr(SPRN_DAR, vcpu->arch.shregs.dar);
+ mtspr(SPRN_DSISR, vcpu->arch.shregs.dsisr);
+ mtspr(SPRN_SRR0, vcpu->arch.shregs.srr0);
+ mtspr(SPRN_SRR1, vcpu->arch.shregs.srr1);
+
+ accumulate_time(vcpu, &vcpu->arch.guest_time);
+
+ kvmppc_p9_enter_guest(vcpu);
+
+ accumulate_time(vcpu, &vcpu->arch.rm_intr);
+
+ /* XXX: Could get these from r11/12 and paca exsave instead */
+ vcpu->arch.shregs.srr0 = mfspr(SPRN_SRR0);
+ vcpu->arch.shregs.srr1 = mfspr(SPRN_SRR1);
+ vcpu->arch.shregs.dar = mfspr(SPRN_DAR);
+ vcpu->arch.shregs.dsisr = mfspr(SPRN_DSISR);
+
+ /* 0x2 bit for HSRR is only used by PR and P7/8 HV paths, clear it */
+ trap = local_paca->kvm_hstate.scratch0 & ~0x2;
+
+ /* HSRR interrupts leave MSR[RI] unchanged, SRR interrupts clear it. */
+ ri_set = false;
+ if (likely(trap > BOOK3S_INTERRUPT_MACHINE_CHECK)) {
+ if (trap != BOOK3S_INTERRUPT_SYSCALL &&
+ (vcpu->arch.shregs.msr & MSR_RI))
+ ri_set = true;
+ exsave = local_paca->exgen;
+ } else if (trap == BOOK3S_INTERRUPT_SYSTEM_RESET) {
+ exsave = local_paca->exnmi;
+ } else { /* trap == 0x200 */
+ exsave = local_paca->exmc;
+ }
+
+ vcpu->arch.regs.gpr[1] = local_paca->kvm_hstate.scratch1;
+ vcpu->arch.regs.gpr[3] = local_paca->kvm_hstate.scratch2;
+
+ /*
+ * Only set RI after reading machine check regs (DAR, DSISR, SRR0/1)
+ * and hstate scratch (which we need to move into exsave to make
+ * re-entrant vs SRESET/MCE)
+ */
+ if (ri_set) {
+ if (unlikely(!(mfmsr() & MSR_RI))) {
+ __mtmsrd(MSR_RI, 1);
+ WARN_ON_ONCE(1);
+ }
+ } else {
+ WARN_ON_ONCE(mfmsr() & MSR_RI);
+ __mtmsrd(MSR_RI, 1);
+ }
+
+ vcpu->arch.regs.gpr[9] = exsave[EX_R9/sizeof(u64)];
+ vcpu->arch.regs.gpr[10] = exsave[EX_R10/sizeof(u64)];
+ vcpu->arch.regs.gpr[11] = exsave[EX_R11/sizeof(u64)];
+ vcpu->arch.regs.gpr[12] = exsave[EX_R12/sizeof(u64)];
+ vcpu->arch.regs.gpr[13] = exsave[EX_R13/sizeof(u64)];
+ vcpu->arch.ppr = exsave[EX_PPR/sizeof(u64)];
+ vcpu->arch.cfar = exsave[EX_CFAR/sizeof(u64)];
+ vcpu->arch.regs.ctr = exsave[EX_CTR/sizeof(u64)];
+
+ vcpu->arch.last_inst = KVM_INST_FETCH_FAILED;
+
+ if (unlikely(trap == BOOK3S_INTERRUPT_MACHINE_CHECK)) {
+ vcpu->arch.fault_dar = exsave[EX_DAR/sizeof(u64)];
+ vcpu->arch.fault_dsisr = exsave[EX_DSISR/sizeof(u64)];
+ kvmppc_realmode_machine_check(vcpu);
+
+ } else if (unlikely(trap == BOOK3S_INTERRUPT_HMI)) {
+ kvmppc_realmode_hmi_handler();
+
+ } else if (trap == BOOK3S_INTERRUPT_H_EMUL_ASSIST) {
+ vcpu->arch.emul_inst = mfspr(SPRN_HEIR);
+
+ } else if (trap == BOOK3S_INTERRUPT_H_DATA_STORAGE) {
+ vcpu->arch.fault_dar = exsave[EX_DAR/sizeof(u64)];
+ vcpu->arch.fault_dsisr = exsave[EX_DSISR/sizeof(u64)];
+ vcpu->arch.fault_gpa = mfspr(SPRN_ASDR);
+
+ } else if (trap == BOOK3S_INTERRUPT_H_INST_STORAGE) {
+ vcpu->arch.fault_gpa = mfspr(SPRN_ASDR);
+
+ } else if (trap == BOOK3S_INTERRUPT_H_FAC_UNAVAIL) {
+ vcpu->arch.hfscr = mfspr(SPRN_HFSCR);
+
+#ifdef CONFIG_PPC_TRANSACTIONAL_MEM
+ /*
+ * Softpatch interrupt for transactional memory emulation cases
+ * on POWER9 DD2.2. This is early in the guest exit path - we
+ * haven't saved registers or done a treclaim yet.
+ */
+ } else if (trap == BOOK3S_INTERRUPT_HV_SOFTPATCH) {
+ vcpu->arch.emul_inst = mfspr(SPRN_HEIR);
+
+ /*
+ * The cases we want to handle here are those where the guest
+ * is in real suspend mode and is trying to transition to
+ * transactional mode.
+ */
+ if (local_paca->kvm_hstate.fake_suspend &&
+ (vcpu->arch.shregs.msr & MSR_TS_S)) {
+ if (kvmhv_p9_tm_emulation_early(vcpu)) {
+ /* Prevent it being handled again. */
+ trap = 0;
+ }
+ }
+#endif
+ }
+
+ accumulate_time(vcpu, &vcpu->arch.rm_exit);
+
+ /* Advance host PURR/SPURR by the amount used by guest */
+ purr = mfspr(SPRN_PURR);
+ spurr = mfspr(SPRN_SPURR);
+ mtspr(SPRN_PURR, local_paca->kvm_hstate.host_purr +
+ purr - vcpu->arch.purr);
+ mtspr(SPRN_SPURR, local_paca->kvm_hstate.host_spurr +
+ spurr - vcpu->arch.spurr);
+ vcpu->arch.purr = purr;
+ vcpu->arch.spurr = spurr;
+
+ vcpu->arch.ic = mfspr(SPRN_IC);
+ vcpu->arch.pid = mfspr(SPRN_PID);
+ vcpu->arch.psscr = mfspr(SPRN_PSSCR) & PSSCR_GUEST_VIS;
+
+ vcpu->arch.shregs.sprg0 = mfspr(SPRN_SPRG0);
+ vcpu->arch.shregs.sprg1 = mfspr(SPRN_SPRG1);
+ vcpu->arch.shregs.sprg2 = mfspr(SPRN_SPRG2);
+ vcpu->arch.shregs.sprg3 = mfspr(SPRN_SPRG3);
+
+ /* Preserve PSSCR[FAKE_SUSPEND] until we've called kvmppc_save_tm_hv */
+ mtspr(SPRN_PSSCR, host_psscr |
+ (local_paca->kvm_hstate.fake_suspend << PSSCR_FAKE_SUSPEND_LG));
+ mtspr(SPRN_HFSCR, host_hfscr);
+ mtspr(SPRN_CIABR, host_ciabr);
+ mtspr(SPRN_DAWR0, host_dawr0);
+ mtspr(SPRN_DAWRX0, host_dawrx0);
+ if (cpu_has_feature(CPU_FTR_DAWR1)) {
+ mtspr(SPRN_DAWR1, host_dawr1);
+ mtspr(SPRN_DAWRX1, host_dawrx1);
+ }
+
+ if (kvm_is_radix(kvm)) {
+ /*
+ * Since this is radix, do a eieio; tlbsync; ptesync sequence
+ * in case we interrupted the guest between a tlbie and a
+ * ptesync.
+ */
+ asm volatile("eieio; tlbsync; ptesync");
+ }
+
+ /*
+ * cp_abort is required if the processor supports local copy-paste
+ * to clear the copy buffer that was under control of the guest.
+ */
+ if (cpu_has_feature(CPU_FTR_ARCH_31))
+ asm volatile(PPC_CP_ABORT);
+
+ vc->dpdes = mfspr(SPRN_DPDES);
+ vc->vtb = mfspr(SPRN_VTB);
+ mtspr(SPRN_DPDES, 0);
+ if (vc->pcr)
+ mtspr(SPRN_PCR, PCR_MASK);
+
+ if (vc->tb_offset_applied) {
+ u64 new_tb = mftb() - vc->tb_offset_applied;
+ mtspr(SPRN_TBU40, new_tb);
+ tb = mftb();
+ if ((tb & 0xffffff) < (new_tb & 0xffffff))
+ mtspr(SPRN_TBU40, new_tb + 0x1000000);
+ vc->tb_offset_applied = 0;
+ }
+
+ mtspr(SPRN_HDEC, 0x7fffffff);
+
+ save_clear_guest_mmu(kvm, vcpu);
+ switch_mmu_to_host(kvm, host_pidr);
+ local_paca->kvm_hstate.in_guest = KVM_GUEST_MODE_NONE;
+
+ /*
+ * If we are in real mode, only switch MMU on after the MMU is
+ * switched to host, to avoid the P9_RADIX_PREFETCH_BUG.
+ */
+ __mtmsrd(msr, 0);
+
+ end_timing(vcpu);
+
+ return trap;
+}
+EXPORT_SYMBOL_GPL(kvmhv_vcpu_entry_p9);
diff --git a/arch/powerpc/kvm/book3s_hv_rm_mmu.c b/arch/powerpc/kvm/book3s_hv_rm_mmu.c
index 7a0f12404e0e..7b23fe6e9ca0 100644
--- a/arch/powerpc/kvm/book3s_hv_rm_mmu.c
+++ b/arch/powerpc/kvm/book3s_hv_rm_mmu.c
@@ -46,6 +46,10 @@ static int global_invalidates(struct kvm *kvm)
else
global = 1;
+ /* LPID has been switched to host if in virt mode so can't do local */
+ if (!global && (mfmsr() & (MSR_IR|MSR_DR)))
+ global = 1;
+
if (!global) {
/* any other core might now have stale TLB entries... */
smp_wmb();
@@ -398,6 +402,7 @@ long kvmppc_h_enter(struct kvm_vcpu *vcpu, unsigned long flags,
vcpu->arch.pgdir, true,
&vcpu->arch.regs.gpr[4]);
}
+EXPORT_SYMBOL_GPL(kvmppc_h_enter);
#ifdef __BIG_ENDIAN__
#define LOCK_TOKEN (*(u32 *)(&get_paca()->lock_token))
@@ -542,6 +547,7 @@ long kvmppc_h_remove(struct kvm_vcpu *vcpu, unsigned long flags,
return kvmppc_do_h_remove(vcpu->kvm, flags, pte_index, avpn,
&vcpu->arch.regs.gpr[4]);
}
+EXPORT_SYMBOL_GPL(kvmppc_h_remove);
long kvmppc_h_bulk_remove(struct kvm_vcpu *vcpu)
{
@@ -660,6 +666,7 @@ long kvmppc_h_bulk_remove(struct kvm_vcpu *vcpu)
return ret;
}
+EXPORT_SYMBOL_GPL(kvmppc_h_bulk_remove);
long kvmppc_h_protect(struct kvm_vcpu *vcpu, unsigned long flags,
unsigned long pte_index, unsigned long avpn)
@@ -730,6 +737,7 @@ long kvmppc_h_protect(struct kvm_vcpu *vcpu, unsigned long flags,
return H_SUCCESS;
}
+EXPORT_SYMBOL_GPL(kvmppc_h_protect);
long kvmppc_h_read(struct kvm_vcpu *vcpu, unsigned long flags,
unsigned long pte_index)
@@ -770,6 +778,7 @@ long kvmppc_h_read(struct kvm_vcpu *vcpu, unsigned long flags,
}
return H_SUCCESS;
}
+EXPORT_SYMBOL_GPL(kvmppc_h_read);
long kvmppc_h_clear_ref(struct kvm_vcpu *vcpu, unsigned long flags,
unsigned long pte_index)
@@ -818,6 +827,7 @@ long kvmppc_h_clear_ref(struct kvm_vcpu *vcpu, unsigned long flags,
unlock_hpte(hpte, v & ~HPTE_V_HVLOCK);
return ret;
}
+EXPORT_SYMBOL_GPL(kvmppc_h_clear_ref);
long kvmppc_h_clear_mod(struct kvm_vcpu *vcpu, unsigned long flags,
unsigned long pte_index)
@@ -865,6 +875,7 @@ long kvmppc_h_clear_mod(struct kvm_vcpu *vcpu, unsigned long flags,
unlock_hpte(hpte, v & ~HPTE_V_HVLOCK);
return ret;
}
+EXPORT_SYMBOL_GPL(kvmppc_h_clear_mod);
static int kvmppc_get_hpa(struct kvm_vcpu *vcpu, unsigned long mmu_seq,
unsigned long gpa, int writing, unsigned long *hpa,
@@ -1283,3 +1294,4 @@ long kvmppc_hpte_hv_fault(struct kvm_vcpu *vcpu, unsigned long addr,
return -1; /* send fault up to host kernel mode */
}
+EXPORT_SYMBOL_GPL(kvmppc_hpte_hv_fault);
diff --git a/arch/powerpc/kvm/book3s_hv_rm_xics.c b/arch/powerpc/kvm/book3s_hv_rm_xics.c
index c2c9c733f359..0a11ec88a0ae 100644
--- a/arch/powerpc/kvm/book3s_hv_rm_xics.c
+++ b/arch/powerpc/kvm/book3s_hv_rm_xics.c
@@ -141,13 +141,6 @@ static void icp_rm_set_vcpu_irq(struct kvm_vcpu *vcpu,
return;
}
- if (xive_enabled() && kvmhv_on_pseries()) {
- /* No XICS access or hypercalls available, too hard */
- this_icp->rm_action |= XICS_RM_KICK_VCPU;
- this_icp->rm_kick_target = vcpu;
- return;
- }
-
/*
* Check if the core is loaded,
* if not, find an available host core to post to wake the VCPU,
@@ -771,14 +764,6 @@ static void icp_eoi(struct irq_chip *c, u32 hwirq, __be32 xirr, bool *again)
void __iomem *xics_phys;
int64_t rc;
- if (kvmhv_on_pseries()) {
- unsigned long retbuf[PLPAR_HCALL_BUFSIZE];
-
- iosync();
- plpar_hcall_raw(H_EOI, retbuf, hwirq);
- return;
- }
-
rc = pnv_opal_pci_msi_eoi(c, hwirq);
if (rc)
diff --git a/arch/powerpc/kvm/book3s_hv_rmhandlers.S b/arch/powerpc/kvm/book3s_hv_rmhandlers.S
index 004f0d4e665f..8dd437d7a2c6 100644
--- a/arch/powerpc/kvm/book3s_hv_rmhandlers.S
+++ b/arch/powerpc/kvm/book3s_hv_rmhandlers.S
@@ -25,18 +25,10 @@
#include <asm/export.h>
#include <asm/tm.h>
#include <asm/opal.h>
-#include <asm/xive-regs.h>
#include <asm/thread_info.h>
#include <asm/asm-compat.h>
#include <asm/feature-fixups.h>
#include <asm/cpuidle.h>
-#include <asm/ultravisor-api.h>
-
-/* Sign-extend HDEC if not on POWER9 */
-#define EXTEND_HDEC(reg) \
-BEGIN_FTR_SECTION; \
- extsw reg, reg; \
-END_FTR_SECTION_IFCLR(CPU_FTR_ARCH_300)
/* Values in HSTATE_NAPPING(r13) */
#define NAPPING_CEDE 1
@@ -44,9 +36,8 @@ END_FTR_SECTION_IFCLR(CPU_FTR_ARCH_300)
#define NAPPING_UNSPLIT 3
/* Stack frame offsets for kvmppc_hv_entry */
-#define SFS 208
+#define SFS 160
#define STACK_SLOT_TRAP (SFS-4)
-#define STACK_SLOT_SHORT_PATH (SFS-8)
#define STACK_SLOT_TID (SFS-16)
#define STACK_SLOT_PSSCR (SFS-24)
#define STACK_SLOT_PID (SFS-32)
@@ -57,11 +48,7 @@ END_FTR_SECTION_IFCLR(CPU_FTR_ARCH_300)
#define STACK_SLOT_HFSCR (SFS-72)
#define STACK_SLOT_AMR (SFS-80)
#define STACK_SLOT_UAMOR (SFS-88)
-#define STACK_SLOT_DAWR1 (SFS-96)
-#define STACK_SLOT_DAWRX1 (SFS-104)
-#define STACK_SLOT_FSCR (SFS-112)
-/* the following is used by the P9 short path */
-#define STACK_SLOT_NVGPRS (SFS-152) /* 18 gprs */
+#define STACK_SLOT_FSCR (SFS-96)
/*
* Call kvmppc_hv_entry in real mode.
@@ -137,15 +124,6 @@ END_FTR_SECTION_IFCLR(CPU_FTR_ARCH_207S)
/* Return the trap number on this thread as the return value */
mr r3, r12
- /*
- * If we came back from the guest via a relocation-on interrupt,
- * we will be in virtual mode at this point, which makes it a
- * little easier to get back to the caller.
- */
- mfmsr r0
- andi. r0, r0, MSR_IR /* in real mode? */
- bne .Lvirt_return
-
/* RFI into the highmem handler */
mfmsr r6
li r0, MSR_RI
@@ -155,11 +133,6 @@ END_FTR_SECTION_IFCLR(CPU_FTR_ARCH_207S)
mtsrr1 r7
RFI_TO_KERNEL
- /* Virtual-mode return */
-.Lvirt_return:
- mtlr r8
- blr
-
kvmppc_primary_no_guest:
/* We handle this much like a ceded vcpu */
/* put the HDEC into the DEC, since HDEC interrupts don't wake us */
@@ -246,7 +219,7 @@ kvm_novcpu_wakeup:
/* See if our timeslice has expired (HDEC is negative) */
mfspr r0, SPRN_HDEC
- EXTEND_HDEC(r0)
+ extsw r0, r0
li r12, BOOK3S_INTERRUPT_HV_DECREMENTER
cmpdi r0, 0
blt kvm_novcpu_exit
@@ -348,10 +321,8 @@ kvm_secondary_got_guest:
lbz r4, HSTATE_PTID(r13)
cmpwi r4, 0
bne 63f
- LOAD_REG_ADDR(r6, decrementer_max)
- ld r6, 0(r6)
+ lis r6,0x7fff /* MAX_INT@h */
mtspr SPRN_HDEC, r6
-BEGIN_FTR_SECTION
/* and set per-LPAR registers, if doing dynamic micro-threading */
ld r6, HSTATE_SPLIT_MODE(r13)
cmpdi r6, 0
@@ -363,7 +334,6 @@ BEGIN_FTR_SECTION
ld r0, KVM_SPLIT_LDBAR(r6)
mtspr SPRN_LDBAR, r0
isync
-END_FTR_SECTION_IFCLR(CPU_FTR_ARCH_300)
63:
/* Order load of vcpu after load of vcore */
lwsync
@@ -434,7 +404,6 @@ kvm_no_guest:
blr
53:
-BEGIN_FTR_SECTION
HMT_LOW
ld r5, HSTATE_KVM_VCORE(r13)
cmpdi r5, 0
@@ -449,14 +418,6 @@ BEGIN_FTR_SECTION
b kvm_unsplit_nap
60: HMT_MEDIUM
b kvm_secondary_got_guest
-FTR_SECTION_ELSE
- HMT_LOW
- ld r5, HSTATE_KVM_VCORE(r13)
- cmpdi r5, 0
- beq kvm_no_guest
- HMT_MEDIUM
- b kvm_secondary_got_guest
-ALT_FTR_SECTION_END_IFCLR(CPU_FTR_ARCH_300)
54: li r0, KVM_HWTHREAD_IN_KVM
stb r0, HSTATE_HWTHREAD_STATE(r13)
@@ -582,13 +543,11 @@ kvmppc_hv_entry:
bne 10f
lwz r7,KVM_LPID(r9)
-BEGIN_FTR_SECTION
ld r6,KVM_SDR1(r9)
li r0,LPID_RSVD /* switch to reserved LPID */
mtspr SPRN_LPID,r0
ptesync
mtspr SPRN_SDR1,r6 /* switch to partition page table */
-END_FTR_SECTION_IFCLR(CPU_FTR_ARCH_300)
mtspr SPRN_LPID,r7
isync
@@ -669,16 +628,6 @@ kvmppc_got_guest:
/* Save host values of some registers */
BEGIN_FTR_SECTION
- mfspr r5, SPRN_TIDR
- mfspr r6, SPRN_PSSCR
- mfspr r7, SPRN_PID
- std r5, STACK_SLOT_TID(r1)
- std r6, STACK_SLOT_PSSCR(r1)
- std r7, STACK_SLOT_PID(r1)
- mfspr r5, SPRN_HFSCR
- std r5, STACK_SLOT_HFSCR(r1)
-END_FTR_SECTION_IFSET(CPU_FTR_ARCH_300)
-BEGIN_FTR_SECTION
mfspr r5, SPRN_CIABR
mfspr r6, SPRN_DAWR0
mfspr r7, SPRN_DAWRX0
@@ -690,12 +639,6 @@ BEGIN_FTR_SECTION
mfspr r5, SPRN_FSCR
std r5, STACK_SLOT_FSCR(r1)
END_FTR_SECTION_IFSET(CPU_FTR_ARCH_207S)
-BEGIN_FTR_SECTION
- mfspr r6, SPRN_DAWR1
- mfspr r7, SPRN_DAWRX1
- std r6, STACK_SLOT_DAWR1(r1)
- std r7, STACK_SLOT_DAWRX1(r1)
-END_FTR_SECTION_IFSET(CPU_FTR_ARCH_207S | CPU_FTR_DAWR1)
mfspr r5, SPRN_AMR
std r5, STACK_SLOT_AMR(r1)
@@ -713,13 +656,9 @@ BEGIN_FTR_SECTION
END_FTR_SECTION_IFCLR(CPU_FTR_ARCH_207S)
#ifdef CONFIG_PPC_TRANSACTIONAL_MEM
-/*
- * Branch around the call if both CPU_FTR_TM and
- * CPU_FTR_P9_TM_HV_ASSIST are off.
- */
BEGIN_FTR_SECTION
b 91f
-END_FTR_SECTION(CPU_FTR_TM | CPU_FTR_P9_TM_HV_ASSIST, 0)
+END_FTR_SECTION_IFCLR(CPU_FTR_TM)
/*
* NOTE THAT THIS TRASHES ALL NON-VOLATILE REGISTERS (but not CR)
*/
@@ -786,12 +725,6 @@ END_FTR_SECTION_IFCLR(CPU_FTR_ARCH_207S)
ld r6, VCPU_DAWRX0(r4)
mtspr SPRN_DAWR0, r5
mtspr SPRN_DAWRX0, r6
-BEGIN_FTR_SECTION
- ld r5, VCPU_DAWR1(r4)
- ld r6, VCPU_DAWRX1(r4)
- mtspr SPRN_DAWR1, r5
- mtspr SPRN_DAWRX1, r6
-END_FTR_SECTION_IFSET(CPU_FTR_DAWR1)
1:
ld r7, VCPU_CIABR(r4)
ld r8, VCPU_TAR(r4)
@@ -809,7 +742,6 @@ END_FTR_SECTION_IFSET(CPU_FTR_DAWR1)
mtspr SPRN_BESCR, r6
mtspr SPRN_PID, r7
mtspr SPRN_WORT, r8
-BEGIN_FTR_SECTION
/* POWER8-only registers */
ld r5, VCPU_TCSCR(r4)
ld r6, VCPU_ACOP(r4)
@@ -820,18 +752,6 @@ BEGIN_FTR_SECTION
mtspr SPRN_CSIGR, r7
mtspr SPRN_TACR, r8
nop
-FTR_SECTION_ELSE
- /* POWER9-only registers */
- ld r5, VCPU_TID(r4)
- ld r6, VCPU_PSSCR(r4)
- lbz r8, HSTATE_FAKE_SUSPEND(r13)
- oris r6, r6, PSSCR_EC@h /* This makes stop trap to HV */
- rldimi r6, r8, PSSCR_FAKE_SUSPEND_LG, 63 - PSSCR_FAKE_SUSPEND_LG
- ld r7, VCPU_HFSCR(r4)
- mtspr SPRN_TIDR, r5
- mtspr SPRN_PSSCR, r6
- mtspr SPRN_HFSCR, r7
-ALT_FTR_SECTION_END_IFCLR(CPU_FTR_ARCH_300)
8:
ld r5, VCPU_SPRG0(r4)
@@ -901,23 +821,15 @@ ALT_FTR_SECTION_END_IFCLR(CPU_FTR_ARCH_300)
/* Check if HDEC expires soon */
mfspr r3, SPRN_HDEC
- EXTEND_HDEC(r3)
+ extsw r3, r3
cmpdi r3, 512 /* 1 microsecond */
blt hdec_soon
- ld r6, VCPU_KVM(r4)
- lbz r0, KVM_RADIX(r6)
- cmpwi r0, 0
- bne 9f
-
- /* For hash guest, clear out and reload the SLB */
-BEGIN_MMU_FTR_SECTION
- /* Radix host won't have populated the SLB, so no need to clear */
+ /* Clear out and reload the SLB */
li r6, 0
slbmte r6, r6
PPC_SLBIA(6)
ptesync
-END_MMU_FTR_SECTION_IFCLR(MMU_FTR_TYPE_RADIX)
/* Load up guest SLB entries (N.B. slb_max will be 0 for radix) */
lwz r5,VCPU_SLB_MAX(r4)
@@ -932,96 +844,9 @@ END_MMU_FTR_SECTION_IFCLR(MMU_FTR_TYPE_RADIX)
bdnz 1b
9:
-#ifdef CONFIG_KVM_XICS
- /* We are entering the guest on that thread, push VCPU to XIVE */
- ld r11, VCPU_XIVE_SAVED_STATE(r4)
- li r9, TM_QW1_OS
- lwz r8, VCPU_XIVE_CAM_WORD(r4)
- cmpwi r8, 0
- beq no_xive
- li r7, TM_QW1_OS + TM_WORD2
- mfmsr r0
- andi. r0, r0, MSR_DR /* in real mode? */
- beq 2f
- ld r10, HSTATE_XIVE_TIMA_VIRT(r13)
- cmpldi cr1, r10, 0
- beq cr1, no_xive
- eieio
- stdx r11,r9,r10
- stwx r8,r7,r10
- b 3f
-2: ld r10, HSTATE_XIVE_TIMA_PHYS(r13)
- cmpldi cr1, r10, 0
- beq cr1, no_xive
- eieio
- stdcix r11,r9,r10
- stwcix r8,r7,r10
-3: li r9, 1
- stb r9, VCPU_XIVE_PUSHED(r4)
- eieio
-
- /*
- * We clear the irq_pending flag. There is a small chance of a
- * race vs. the escalation interrupt happening on another
- * processor setting it again, but the only consequence is to
- * cause a spurrious wakeup on the next H_CEDE which is not an
- * issue.
- */
- li r0,0
- stb r0, VCPU_IRQ_PENDING(r4)
-
- /*
- * In single escalation mode, if the escalation interrupt is
- * on, we mask it.
- */
- lbz r0, VCPU_XIVE_ESC_ON(r4)
- cmpwi cr1, r0,0
- beq cr1, 1f
- li r9, XIVE_ESB_SET_PQ_01
- beq 4f /* in real mode? */
- ld r10, VCPU_XIVE_ESC_VADDR(r4)
- ldx r0, r10, r9
- b 5f
-4: ld r10, VCPU_XIVE_ESC_RADDR(r4)
- ldcix r0, r10, r9
-5: sync
-
- /* We have a possible subtle race here: The escalation interrupt might
- * have fired and be on its way to the host queue while we mask it,
- * and if we unmask it early enough (re-cede right away), there is
- * a theorical possibility that it fires again, thus landing in the
- * target queue more than once which is a big no-no.
- *
- * Fortunately, solving this is rather easy. If the above load setting
- * PQ to 01 returns a previous value where P is set, then we know the
- * escalation interrupt is somewhere on its way to the host. In that
- * case we simply don't clear the xive_esc_on flag below. It will be
- * eventually cleared by the handler for the escalation interrupt.
- *
- * Then, when doing a cede, we check that flag again before re-enabling
- * the escalation interrupt, and if set, we abort the cede.
- */
- andi. r0, r0, XIVE_ESB_VAL_P
- bne- 1f
-
- /* Now P is 0, we can clear the flag */
- li r0, 0
- stb r0, VCPU_XIVE_ESC_ON(r4)
-1:
-no_xive:
-#endif /* CONFIG_KVM_XICS */
-
- li r0, 0
- stw r0, STACK_SLOT_SHORT_PATH(r1)
-
deliver_guest_interrupt: /* r4 = vcpu, r13 = paca */
/* Check if we can deliver an external or decrementer interrupt now */
ld r0, VCPU_PENDING_EXC(r4)
-BEGIN_FTR_SECTION
- /* On POWER9, also check for emulated doorbell interrupt */
- lbz r3, VCPU_DBELL_REQ(r4)
- or r0, r0, r3
-END_FTR_SECTION_IFSET(CPU_FTR_ARCH_300)
cmpdi r0, 0
beq 71f
mr r3, r4
@@ -1033,7 +858,6 @@ END_FTR_SECTION_IFSET(CPU_FTR_ARCH_300)
mtspr SPRN_SRR0, r6
mtspr SPRN_SRR1, r7
-fast_guest_entry_c:
ld r10, VCPU_PC(r4)
ld r11, VCPU_MSR(r4)
/* r11 = vcpu->arch.msr & ~MSR_HV */
@@ -1095,18 +919,8 @@ BEGIN_FTR_SECTION
mtspr SPRN_PPR, r0
END_FTR_SECTION_IFSET(CPU_FTR_HAS_PPR)
-/* Move canary into DSISR to check for later */
-BEGIN_FTR_SECTION
- li r0, 0x7fff
- mtspr SPRN_HDSISR, r0
-END_FTR_SECTION_IFSET(CPU_FTR_ARCH_300)
-
- ld r6, VCPU_KVM(r4)
- lbz r7, KVM_SECURE_GUEST(r6)
- cmpdi r7, 0
ld r6, VCPU_GPR(R6)(r4)
ld r7, VCPU_GPR(R7)(r4)
- bne ret_to_ultra
ld r0, VCPU_CR(r4)
mtcr r0
@@ -1117,117 +931,6 @@ END_FTR_SECTION_IFSET(CPU_FTR_ARCH_300)
ld r4, VCPU_GPR(R4)(r4)
HRFI_TO_GUEST
b .
-/*
- * Use UV_RETURN ultracall to return control back to the Ultravisor after
- * processing an hypercall or interrupt that was forwarded (a.k.a. reflected)
- * to the Hypervisor.
- *
- * All registers have already been loaded, except:
- * R0 = hcall result
- * R2 = SRR1, so UV can detect a synthesized interrupt (if any)
- * R3 = UV_RETURN
- */
-ret_to_ultra:
- ld r0, VCPU_CR(r4)
- mtcr r0
-
- ld r0, VCPU_GPR(R3)(r4)
- mfspr r2, SPRN_SRR1
- li r3, 0
- ori r3, r3, UV_RETURN
- ld r4, VCPU_GPR(R4)(r4)
- sc 2
-
-/*
- * Enter the guest on a P9 or later system where we have exactly
- * one vcpu per vcore and we don't need to go to real mode
- * (which implies that host and guest are both using radix MMU mode).
- * r3 = vcpu pointer
- * Most SPRs and all the VSRs have been loaded already.
- */
-_GLOBAL(__kvmhv_vcpu_entry_p9)
-EXPORT_SYMBOL_GPL(__kvmhv_vcpu_entry_p9)
- mflr r0
- std r0, PPC_LR_STKOFF(r1)
- stdu r1, -SFS(r1)
-
- li r0, 1
- stw r0, STACK_SLOT_SHORT_PATH(r1)
-
- std r3, HSTATE_KVM_VCPU(r13)
- mfcr r4
- stw r4, SFS+8(r1)
-
- std r1, HSTATE_HOST_R1(r13)
-
- reg = 14
- .rept 18
- std reg, STACK_SLOT_NVGPRS + ((reg - 14) * 8)(r1)
- reg = reg + 1
- .endr
-
- reg = 14
- .rept 18
- ld reg, __VCPU_GPR(reg)(r3)
- reg = reg + 1
- .endr
-
- mfmsr r10
- std r10, HSTATE_HOST_MSR(r13)
-
- mr r4, r3
- b fast_guest_entry_c
-guest_exit_short_path:
- /*
- * Malicious or buggy radix guests may have inserted SLB entries
- * (only 0..3 because radix always runs with UPRT=1), so these must
- * be cleared here to avoid side-channels. slbmte is used rather
- * than slbia, as it won't clear cached translations.
- */
- li r0,0
- slbmte r0,r0
- li r4,1
- slbmte r0,r4
- li r4,2
- slbmte r0,r4
- li r4,3
- slbmte r0,r4
-
- li r0, KVM_GUEST_MODE_NONE
- stb r0, HSTATE_IN_GUEST(r13)
-
- reg = 14
- .rept 18
- std reg, __VCPU_GPR(reg)(r9)
- reg = reg + 1
- .endr
-
- reg = 14
- .rept 18
- ld reg, STACK_SLOT_NVGPRS + ((reg - 14) * 8)(r1)
- reg = reg + 1
- .endr
-
- lwz r4, SFS+8(r1)
- mtcr r4
-
- mr r3, r12 /* trap number */
-
- addi r1, r1, SFS
- ld r0, PPC_LR_STKOFF(r1)
- mtlr r0
-
- /* If we are in real mode, do a rfid to get back to the caller */
- mfmsr r4
- andi. r5, r4, MSR_IR
- bnelr
- rldicl r5, r4, 64 - MSR_TS_S_LG, 62 /* extract TS field */
- mtspr SPRN_SRR0, r0
- ld r10, HSTATE_HOST_MSR(r13)
- rldimi r10, r5, MSR_TS_S_LG, 63 - MSR_TS_T_LG
- mtspr SPRN_SRR1, r10
- RFI_TO_KERNEL
- b .
secondary_too_late:
li r12, 0
@@ -1268,21 +971,16 @@ hdec_soon:
kvmppc_interrupt_hv:
/*
* Register contents:
+ * R9 = HSTATE_IN_GUEST
* R12 = (guest CR << 32) | interrupt vector
* R13 = PACA
* guest R12 saved in shadow VCPU SCRATCH0
* guest R13 saved in SPRN_SCRATCH0
+ * guest R9 saved in HSTATE_SCRATCH2
*/
- std r9, HSTATE_SCRATCH2(r13)
- lbz r9, HSTATE_IN_GUEST(r13)
- cmpwi r9, KVM_GUEST_MODE_HOST_HV
- beq kvmppc_bad_host_intr
-#ifdef CONFIG_KVM_BOOK3S_PR_POSSIBLE
- cmpwi r9, KVM_GUEST_MODE_GUEST
- ld r9, HSTATE_SCRATCH2(r13)
- beq kvmppc_interrupt_pr
-#endif
/* We're now back in the host but in guest MMU context */
+ cmpwi r9,KVM_GUEST_MODE_HOST_HV
+ beq kvmppc_bad_host_intr
li r9, KVM_GUEST_MODE_HOST_HV
stb r9, HSTATE_IN_GUEST(r13)
@@ -1400,7 +1098,7 @@ END_FTR_SECTION_IFSET(CPU_FTR_HAS_PPR)
cmpwi r12,BOOK3S_INTERRUPT_HV_DECREMENTER
bne 2f
mfspr r3,SPRN_HDEC
- EXTEND_HDEC(r3)
+ extsw r3, r3
cmpdi r3,0
mr r4,r9
bge fast_guest_return
@@ -1412,14 +1110,6 @@ END_FTR_SECTION_IFSET(CPU_FTR_HAS_PPR)
/* Hypervisor doorbell - exit only if host IPI flag set */
cmpwi r12, BOOK3S_INTERRUPT_H_DOORBELL
bne 3f
-BEGIN_FTR_SECTION
- PPC_MSGSYNC
- lwsync
- /* always exit if we're running a nested guest */
- ld r0, VCPU_NESTED(r9)
- cmpdi r0, 0
- bne guest_exit_cont
-END_FTR_SECTION_IFSET(CPU_FTR_ARCH_300)
lbz r0, HSTATE_HOST_IPI(r13)
cmpwi r0, 0
beq maybe_reenter_guest
@@ -1449,62 +1139,16 @@ guest_exit_cont: /* r9 = vcpu, r12 = trap, r13 = paca */
mr r4, r9
bl kvmhv_accumulate_time
#endif
-#ifdef CONFIG_KVM_XICS
- /* We are exiting, pull the VP from the XIVE */
- lbz r0, VCPU_XIVE_PUSHED(r9)
- cmpwi cr0, r0, 0
- beq 1f
- li r7, TM_SPC_PULL_OS_CTX
- li r6, TM_QW1_OS
- mfmsr r0
- andi. r0, r0, MSR_DR /* in real mode? */
- beq 2f
- ld r10, HSTATE_XIVE_TIMA_VIRT(r13)
- cmpldi cr0, r10, 0
- beq 1f
- /* First load to pull the context, we ignore the value */
- eieio
- lwzx r11, r7, r10
- /* Second load to recover the context state (Words 0 and 1) */
- ldx r11, r6, r10
- b 3f
-2: ld r10, HSTATE_XIVE_TIMA_PHYS(r13)
- cmpldi cr0, r10, 0
- beq 1f
- /* First load to pull the context, we ignore the value */
- eieio
- lwzcix r11, r7, r10
- /* Second load to recover the context state (Words 0 and 1) */
- ldcix r11, r6, r10
-3: std r11, VCPU_XIVE_SAVED_STATE(r9)
- /* Fixup some of the state for the next load */
- li r10, 0
- li r0, 0xff
- stb r10, VCPU_XIVE_PUSHED(r9)
- stb r10, (VCPU_XIVE_SAVED_STATE+3)(r9)
- stb r0, (VCPU_XIVE_SAVED_STATE+4)(r9)
- eieio
-1:
-#endif /* CONFIG_KVM_XICS */
/*
* Possibly flush the link stack here, before we do a blr in
- * guest_exit_short_path.
+ * kvmhv_switch_to_host.
*/
1: nop
patch_site 1b patch__call_kvm_flush_link_stack
- /* If we came in through the P9 short path, go back out to C now */
- lwz r0, STACK_SLOT_SHORT_PATH(r1)
- cmpwi r0, 0
- bne guest_exit_short_path
-
/* For hash guest, read the guest SLB and save it away */
- ld r5, VCPU_KVM(r9)
- lbz r0, KVM_RADIX(r5)
li r5, 0
- cmpwi r0, 0
- bne 0f /* for radix, save 0 entries */
lwz r0,VCPU_SLB_NR(r9) /* number of entries in SLB */
mtctr r0
li r6,0
@@ -1528,9 +1172,6 @@ guest_exit_cont: /* r9 = vcpu, r12 = trap, r13 = paca */
stw r5,VCPU_SLB_MAX(r9)
/* load host SLB entries */
-BEGIN_MMU_FTR_SECTION
- b guest_bypass
-END_MMU_FTR_SECTION_IFSET(MMU_FTR_TYPE_RADIX)
ld r8,PACA_SLBSHADOWPTR(r13)
.rept SLB_NUM_BOLTED
@@ -1543,21 +1184,6 @@ END_MMU_FTR_SECTION_IFSET(MMU_FTR_TYPE_RADIX)
slbmte r6,r5
1: addi r8,r8,16
.endr
- b guest_bypass
-
-0: /*
- * Sanitise radix guest SLB, see guest_exit_short_path comment.
- * We clear vcpu->arch.slb_max to match earlier behaviour.
- */
- li r0,0
- stw r0,VCPU_SLB_MAX(r9)
- slbmte r0,r0
- li r4,1
- slbmte r0,r4
- li r4,2
- slbmte r0,r4
- li r4,3
- slbmte r0,r4
guest_bypass:
stw r12, STACK_SLOT_TRAP(r1)
@@ -1567,12 +1193,6 @@ guest_bypass:
ld r3, HSTATE_KVM_VCORE(r13)
mfspr r5,SPRN_DEC
mftb r6
- /* On P9, if the guest has large decr enabled, don't sign extend */
-BEGIN_FTR_SECTION
- ld r4, VCORE_LPCR(r3)
- andis. r4, r4, LPCR_LD@h
- bne 16f
-END_FTR_SECTION_IFSET(CPU_FTR_ARCH_300)
extsw r5,r5
16: add r5,r5,r6
/* r5 is a guest timebase value here, convert to host TB */
@@ -1646,7 +1266,6 @@ END_FTR_SECTION_IFCLR(CPU_FTR_ARCH_207S)
std r6, VCPU_BESCR(r9)
stw r7, VCPU_GUEST_PID(r9)
std r8, VCPU_WORT(r9)
-BEGIN_FTR_SECTION
mfspr r5, SPRN_TCSCR
mfspr r6, SPRN_ACOP
mfspr r7, SPRN_CSIGR
@@ -1655,17 +1274,6 @@ BEGIN_FTR_SECTION
std r6, VCPU_ACOP(r9)
std r7, VCPU_CSIGR(r9)
std r8, VCPU_TACR(r9)
-FTR_SECTION_ELSE
- mfspr r5, SPRN_TIDR
- mfspr r6, SPRN_PSSCR
- std r5, VCPU_TID(r9)
- rldicl r6, r6, 4, 50 /* r6 &= PSSCR_GUEST_VIS */
- rotldi r6, r6, 60
- std r6, VCPU_PSSCR(r9)
- /* Restore host HFSCR value */
- ld r7, STACK_SLOT_HFSCR(r1)
- mtspr SPRN_HFSCR, r7
-ALT_FTR_SECTION_END_IFCLR(CPU_FTR_ARCH_300)
BEGIN_FTR_SECTION
ld r5, STACK_SLOT_FSCR(r1)
mtspr SPRN_FSCR, r5
@@ -1677,13 +1285,11 @@ END_FTR_SECTION_IFSET(CPU_FTR_ARCH_207S)
li r0, 0
mtspr SPRN_PSPB, r0
mtspr SPRN_WORT, r0
-BEGIN_FTR_SECTION
mtspr SPRN_TCSCR, r0
/* Set MMCRS to 1<<31 to freeze and disable the SPMC counters */
li r0, 1
sldi r0, r0, 31
mtspr SPRN_MMCRS, r0
-END_FTR_SECTION_IFCLR(CPU_FTR_ARCH_300)
/* Save and restore AMR, IAMR and UAMOR before turning on the MMU */
ld r8, STACK_SLOT_IAMR(r1)
@@ -1740,13 +1346,9 @@ END_FTR_SECTION_IFCLR(CPU_FTR_ARCH_300)
bl kvmppc_save_fp
#ifdef CONFIG_PPC_TRANSACTIONAL_MEM
-/*
- * Branch around the call if both CPU_FTR_TM and
- * CPU_FTR_P9_TM_HV_ASSIST are off.
- */
BEGIN_FTR_SECTION
b 91f
-END_FTR_SECTION(CPU_FTR_TM | CPU_FTR_P9_TM_HV_ASSIST, 0)
+END_FTR_SECTION_IFCLR(CPU_FTR_TM)
/*
* NOTE THAT THIS TRASHES ALL NON-VOLATILE REGISTERS (but not CR)
*/
@@ -1792,80 +1394,6 @@ BEGIN_FTR_SECTION
mtspr SPRN_DAWR0, r6
mtspr SPRN_DAWRX0, r7
END_FTR_SECTION_IFSET(CPU_FTR_ARCH_207S)
-BEGIN_FTR_SECTION
- ld r6, STACK_SLOT_DAWR1(r1)
- ld r7, STACK_SLOT_DAWRX1(r1)
- mtspr SPRN_DAWR1, r6
- mtspr SPRN_DAWRX1, r7
-END_FTR_SECTION_IFSET(CPU_FTR_ARCH_207S | CPU_FTR_DAWR1)
-BEGIN_FTR_SECTION
- ld r5, STACK_SLOT_TID(r1)
- ld r6, STACK_SLOT_PSSCR(r1)
- ld r7, STACK_SLOT_PID(r1)
- mtspr SPRN_TIDR, r5
- mtspr SPRN_PSSCR, r6
- mtspr SPRN_PID, r7
-END_FTR_SECTION_IFSET(CPU_FTR_ARCH_300)
-
-#ifdef CONFIG_PPC_RADIX_MMU
- /*
- * Are we running hash or radix ?
- */
- ld r5, VCPU_KVM(r9)
- lbz r0, KVM_RADIX(r5)
- cmpwi cr2, r0, 0
- beq cr2, 2f
-
- /*
- * Radix: do eieio; tlbsync; ptesync sequence in case we
- * interrupted the guest between a tlbie and a ptesync.
- */
- eieio
- tlbsync
- ptesync
-
-BEGIN_FTR_SECTION
- /* Radix: Handle the case where the guest used an illegal PID */
- LOAD_REG_ADDR(r4, mmu_base_pid)
- lwz r3, VCPU_GUEST_PID(r9)
- lwz r5, 0(r4)
- cmpw cr0,r3,r5
- blt 2f
-
- /*
- * Illegal PID, the HW might have prefetched and cached in the TLB
- * some translations for the LPID 0 / guest PID combination which
- * Linux doesn't know about, so we need to flush that PID out of
- * the TLB. First we need to set LPIDR to 0 so tlbiel applies to
- * the right context.
- */
- li r0,0
- mtspr SPRN_LPID,r0
- isync
-
- /* Then do a congruence class local flush */
- ld r6,VCPU_KVM(r9)
- lwz r0,KVM_TLB_SETS(r6)
- mtctr r0
- li r7,0x400 /* IS field = 0b01 */
- ptesync
- sldi r0,r3,32 /* RS has PID */
-1: PPC_TLBIEL(7,0,2,1,1) /* RIC=2, PRS=1, R=1 */
- addi r7,r7,0x1000
- bdnz 1b
- ptesync
-END_FTR_SECTION_IFSET(CPU_FTR_P9_RADIX_PREFETCH_BUG)
-
-2:
-#endif /* CONFIG_PPC_RADIX_MMU */
-
- /*
- * cp_abort is required if the processor supports local copy-paste
- * to clear the copy buffer that was under control of the guest.
- */
-BEGIN_FTR_SECTION
- PPC_CP_ABORT
-END_FTR_SECTION_IFSET(CPU_FTR_ARCH_31)
/*
* POWER7/POWER8 guest -> host partition switch code.
@@ -1902,13 +1430,11 @@ kvmhv_switch_to_host:
/* Primary thread switches back to host partition */
lwz r7,KVM_HOST_LPID(r4)
-BEGIN_FTR_SECTION
ld r6,KVM_HOST_SDR1(r4)
li r8,LPID_RSVD /* switch to reserved LPID */
mtspr SPRN_LPID,r8
ptesync
mtspr SPRN_SDR1,r6 /* switch to host page table */
-END_FTR_SECTION_IFCLR(CPU_FTR_ARCH_300)
mtspr SPRN_LPID,r7
isync
@@ -2117,26 +1643,13 @@ kvmppc_tm_emul:
* reflect the HDSI to the guest as a DSI.
*/
kvmppc_hdsi:
- ld r3, VCPU_KVM(r9)
- lbz r0, KVM_RADIX(r3)
mfspr r4, SPRN_HDAR
mfspr r6, SPRN_HDSISR
-BEGIN_FTR_SECTION
- /* Look for DSISR canary. If we find it, retry instruction */
- cmpdi r6, 0x7fff
- beq 6f
-END_FTR_SECTION_IFSET(CPU_FTR_ARCH_300)
- cmpwi r0, 0
- bne .Lradix_hdsi /* on radix, just save DAR/DSISR/ASDR */
/* HPTE not found fault or protection fault? */
andis. r0, r6, (DSISR_NOHPTE | DSISR_PROTFAULT)@h
beq 1f /* if not, send it to the guest */
andi. r0, r11, MSR_DR /* data relocation enabled? */
beq 3f
-BEGIN_FTR_SECTION
- mfspr r5, SPRN_ASDR /* on POWER9, use ASDR to get VSID */
- b 4f
-END_FTR_SECTION_IFSET(CPU_FTR_ARCH_300)
clrrdi r0, r4, 28
PPC_SLBFEE_DOT(R5, R0) /* if so, look up SLB */
li r0, BOOK3S_INTERRUPT_DATA_SEGMENT
@@ -2204,31 +1717,15 @@ fast_interrupt_c_return:
stb r0, HSTATE_IN_GUEST(r13)
b guest_exit_cont
-.Lradix_hdsi:
- std r4, VCPU_FAULT_DAR(r9)
- stw r6, VCPU_FAULT_DSISR(r9)
-.Lradix_hisi:
- mfspr r5, SPRN_ASDR
- std r5, VCPU_FAULT_GPA(r9)
- b guest_exit_cont
-
/*
* Similarly for an HISI, reflect it to the guest as an ISI unless
* it is an HPTE not found fault for a page that we have paged out.
*/
kvmppc_hisi:
- ld r3, VCPU_KVM(r9)
- lbz r0, KVM_RADIX(r3)
- cmpwi r0, 0
- bne .Lradix_hisi /* for radix, just save ASDR */
andis. r0, r11, SRR1_ISI_NOPT@h
beq 1f
andi. r0, r11, MSR_IR /* instruction relocation enabled? */
beq 3f
-BEGIN_FTR_SECTION
- mfspr r5, SPRN_ASDR /* on POWER9, use ASDR to get VSID */
- b 4f
-END_FTR_SECTION_IFSET(CPU_FTR_ARCH_300)
clrrdi r0, r10, 28
PPC_SLBFEE_DOT(R5, R0) /* if so, look up SLB */
li r0, BOOK3S_INTERRUPT_INST_SEGMENT
@@ -2276,10 +1773,6 @@ hcall_try_real_mode:
andi. r0,r11,MSR_PR
/* sc 1 from userspace - reflect to guest syscall */
bne sc_1_fast_return
- /* sc 1 from nested guest - give it to L1 to handle */
- ld r0, VCPU_NESTED(r9)
- cmpdi r0, 0
- bne guest_exit_cont
clrrdi r3,r3,2
cmpldi r3,hcall_real_table_end - hcall_real_table
bge guest_exit_cont
@@ -2544,7 +2037,7 @@ hcall_real_table:
#else
.long 0 /* 0x2fc - H_XIRR_X*/
#endif
- .long DOTSYM(kvmppc_h_random) - hcall_real_table
+ .long DOTSYM(kvmppc_rm_h_random) - hcall_real_table
.globl hcall_real_table_end
hcall_real_table_end:
@@ -2675,13 +2168,9 @@ _GLOBAL(kvmppc_h_cede) /* r3 = vcpu pointer, r11 = msr, r13 = paca */
bl kvmppc_save_fp
#ifdef CONFIG_PPC_TRANSACTIONAL_MEM
-/*
- * Branch around the call if both CPU_FTR_TM and
- * CPU_FTR_P9_TM_HV_ASSIST are off.
- */
BEGIN_FTR_SECTION
b 91f
-END_FTR_SECTION(CPU_FTR_TM | CPU_FTR_P9_TM_HV_ASSIST, 0)
+END_FTR_SECTION_IFCLR(CPU_FTR_TM)
/*
* NOTE THAT THIS TRASHES ALL NON-VOLATILE REGISTERS (but not CR)
*/
@@ -2701,15 +2190,8 @@ END_FTR_SECTION(CPU_FTR_TM | CPU_FTR_P9_TM_HV_ASSIST, 0)
mfspr r3, SPRN_DEC
mfspr r4, SPRN_HDEC
mftb r5
-BEGIN_FTR_SECTION
- /* On P9 check whether the guest has large decrementer mode enabled */
- ld r6, HSTATE_KVM_VCORE(r13)
- ld r6, VCORE_LPCR(r6)
- andis. r6, r6, LPCR_LD@h
- bne 68f
-END_FTR_SECTION_IFSET(CPU_FTR_ARCH_300)
extsw r3, r3
-68: EXTEND_HDEC(r4)
+ extsw r4, r4
cmpd r3, r4
ble 67f
mtspr SPRN_DEC, r4
@@ -2754,28 +2236,11 @@ BEGIN_FTR_SECTION
END_FTR_SECTION_IFSET(CPU_FTR_ARCH_207S)
kvm_nap_sequence: /* desired LPCR value in r5 */
-BEGIN_FTR_SECTION
- /*
- * PSSCR bits: exit criterion = 1 (wakeup based on LPCR at sreset)
- * enable state loss = 1 (allow SMT mode switch)
- * requested level = 0 (just stop dispatching)
- */
- lis r3, (PSSCR_EC | PSSCR_ESL)@h
- /* Set LPCR_PECE_HVEE bit to enable wakeup by HV interrupts */
- li r4, LPCR_PECE_HVEE@higher
- sldi r4, r4, 32
- or r5, r5, r4
-FTR_SECTION_ELSE
li r3, PNV_THREAD_NAP
-ALT_FTR_SECTION_END_IFSET(CPU_FTR_ARCH_300)
mtspr SPRN_LPCR,r5
isync
-BEGIN_FTR_SECTION
- bl isa300_idle_stop_mayloss
-FTR_SECTION_ELSE
bl isa206_idle_insn_mayloss
-ALT_FTR_SECTION_END_IFSET(CPU_FTR_ARCH_300)
mfspr r0, SPRN_CTRLF
ori r0, r0, 1
@@ -2794,10 +2259,8 @@ ALT_FTR_SECTION_END_IFSET(CPU_FTR_ARCH_300)
beq kvm_end_cede
cmpwi r0, NAPPING_NOVCPU
beq kvm_novcpu_wakeup
-BEGIN_FTR_SECTION
cmpwi r0, NAPPING_UNSPLIT
beq kvm_unsplit_wakeup
-END_FTR_SECTION_IFCLR(CPU_FTR_ARCH_300)
twi 31,0,0 /* Nap state must not be zero */
33: mr r4, r3
@@ -2817,13 +2280,9 @@ kvm_end_cede:
#endif
#ifdef CONFIG_PPC_TRANSACTIONAL_MEM
-/*
- * Branch around the call if both CPU_FTR_TM and
- * CPU_FTR_P9_TM_HV_ASSIST are off.
- */
BEGIN_FTR_SECTION
b 91f
-END_FTR_SECTION(CPU_FTR_TM | CPU_FTR_P9_TM_HV_ASSIST, 0)
+END_FTR_SECTION_IFCLR(CPU_FTR_TM)
/*
* NOTE THAT THIS TRASHES ALL NON-VOLATILE REGISTERS (but not CR)
*/
@@ -2913,47 +2372,7 @@ kvm_cede_prodded:
/* we've ceded but we want to give control to the host */
kvm_cede_exit:
ld r9, HSTATE_KVM_VCPU(r13)
-#ifdef CONFIG_KVM_XICS
- /* are we using XIVE with single escalation? */
- ld r10, VCPU_XIVE_ESC_VADDR(r9)
- cmpdi r10, 0
- beq 3f
- li r6, XIVE_ESB_SET_PQ_00
- /*
- * If we still have a pending escalation, abort the cede,
- * and we must set PQ to 10 rather than 00 so that we don't
- * potentially end up with two entries for the escalation
- * interrupt in the XIVE interrupt queue. In that case
- * we also don't want to set xive_esc_on to 1 here in
- * case we race with xive_esc_irq().
- */
- lbz r5, VCPU_XIVE_ESC_ON(r9)
- cmpwi r5, 0
- beq 4f
- li r0, 0
- stb r0, VCPU_CEDED(r9)
- /*
- * The escalation interrupts are special as we don't EOI them.
- * There is no need to use the load-after-store ordering offset
- * to set PQ to 10 as we won't use StoreEOI.
- */
- li r6, XIVE_ESB_SET_PQ_10
- b 5f
-4: li r0, 1
- stb r0, VCPU_XIVE_ESC_ON(r9)
- /* make sure store to xive_esc_on is seen before xive_esc_irq runs */
- sync
-5: /* Enable XIVE escalation */
- mfmsr r0
- andi. r0, r0, MSR_DR /* in real mode? */
- beq 1f
- ldx r0, r10, r6
- b 2f
-1: ld r10, VCPU_XIVE_ESC_RADDR(r9)
- ldcix r0, r10, r6
-2: sync
-#endif /* CONFIG_KVM_XICS */
-3: b guest_exit_cont
+ b guest_exit_cont
/* Try to do machine check recovery in real mode */
machine_check_realmode:
@@ -3030,10 +2449,6 @@ END_FTR_SECTION_IFSET(CPU_FTR_ARCH_207S)
PPC_MSGCLR(6)
/* see if it's a host IPI */
li r3, 1
-BEGIN_FTR_SECTION
- PPC_MSGSYNC
- lwsync
-END_FTR_SECTION_IFSET(CPU_FTR_ARCH_300)
lbz r0, HSTATE_HOST_IPI(r13)
cmpwi r0, 0
bnelr
@@ -3342,73 +2757,12 @@ kvmppc_bad_host_intr:
std r3, STACK_FRAME_OVERHEAD-16(r1)
/*
- * On POWER9 do a minimal restore of the MMU and call C code,
- * which will print a message and panic.
* XXX On POWER7 and POWER8, we just spin here since we don't
* know what the other threads are doing (and we don't want to
* coordinate with them) - but at least we now have register state
* in memory that we might be able to look at from another CPU.
*/
-BEGIN_FTR_SECTION
b .
-END_FTR_SECTION_IFCLR(CPU_FTR_ARCH_300)
- ld r9, HSTATE_KVM_VCPU(r13)
- ld r10, VCPU_KVM(r9)
-
- li r0, 0
- mtspr SPRN_AMR, r0
- mtspr SPRN_IAMR, r0
- mtspr SPRN_CIABR, r0
- mtspr SPRN_DAWRX0, r0
-BEGIN_FTR_SECTION
- mtspr SPRN_DAWRX1, r0
-END_FTR_SECTION_IFSET(CPU_FTR_DAWR1)
-
- /* Clear hash and radix guest SLB, see guest_exit_short_path comment. */
- slbmte r0, r0
- PPC_SLBIA(6)
-
-BEGIN_MMU_FTR_SECTION
- b 4f
-END_MMU_FTR_SECTION_IFSET(MMU_FTR_TYPE_RADIX)
-
- ptesync
- ld r8, PACA_SLBSHADOWPTR(r13)
- .rept SLB_NUM_BOLTED
- li r3, SLBSHADOW_SAVEAREA
- LDX_BE r5, r8, r3
- addi r3, r3, 8
- LDX_BE r6, r8, r3
- andis. r7, r5, SLB_ESID_V@h
- beq 3f
- slbmte r6, r5
-3: addi r8, r8, 16
- .endr
-
-4: lwz r7, KVM_HOST_LPID(r10)
- mtspr SPRN_LPID, r7
- mtspr SPRN_PID, r0
- ld r8, KVM_HOST_LPCR(r10)
- mtspr SPRN_LPCR, r8
- isync
- li r0, KVM_GUEST_MODE_NONE
- stb r0, HSTATE_IN_GUEST(r13)
-
- /*
- * Turn on the MMU and jump to C code
- */
- bcl 20, 31, .+4
-5: mflr r3
- addi r3, r3, 9f - 5b
- li r4, -1
- rldimi r3, r4, 62, 0 /* ensure 0xc000000000000000 bits are set */
- ld r4, PACAKMSR(r13)
- mtspr SPRN_SRR0, r3
- mtspr SPRN_SRR1, r4
- RFI_TO_KERNEL
-9: addi r3, r1, STACK_FRAME_OVERHEAD
- bl kvmppc_bad_interrupt
- b 9b
/*
* This mimics the MSR transition on IRQ delivery. The new guest MSR is taken
diff --git a/arch/powerpc/kvm/book3s_segment.S b/arch/powerpc/kvm/book3s_segment.S
index 1f492aa4c8d6..202046a83fc1 100644
--- a/arch/powerpc/kvm/book3s_segment.S
+++ b/arch/powerpc/kvm/book3s_segment.S
@@ -164,12 +164,15 @@ kvmppc_interrupt_pr:
/* 64-bit entry. Register usage at this point:
*
* SPRG_SCRATCH0 = guest R13
+ * R9 = HSTATE_IN_GUEST
* R12 = (guest CR << 32) | exit handler id
* R13 = PACA
* HSTATE.SCRATCH0 = guest R12
+ * HSTATE.SCRATCH2 = guest R9
*/
#ifdef CONFIG_PPC64
/* Match 32-bit entry */
+ ld r9,HSTATE_SCRATCH2(r13)
rotldi r12, r12, 32 /* Flip R12 halves for stw */
stw r12, HSTATE_SCRATCH1(r13) /* CR is now in the low half */
srdi r12, r12, 32 /* shift trap into low half */
diff --git a/arch/powerpc/kvm/book3s_xive.c b/arch/powerpc/kvm/book3s_xive.c
index e7219b6f5f9a..9268d386b128 100644
--- a/arch/powerpc/kvm/book3s_xive.c
+++ b/arch/powerpc/kvm/book3s_xive.c
@@ -128,6 +128,71 @@ void kvmppc_xive_push_vcpu(struct kvm_vcpu *vcpu)
EXPORT_SYMBOL_GPL(kvmppc_xive_push_vcpu);
/*
+ * Pull a vcpu's context from the XIVE on guest exit.
+ * This assumes we are in virtual mode (MMU on)
+ */
+void kvmppc_xive_pull_vcpu(struct kvm_vcpu *vcpu)
+{
+ void __iomem *tima = local_paca->kvm_hstate.xive_tima_virt;
+
+ if (!vcpu->arch.xive_pushed)
+ return;
+
+ /*
+ * Should not have been pushed if there is no tima
+ */
+ if (WARN_ON(!tima))
+ return;
+
+ eieio();
+ /* First load to pull the context, we ignore the value */
+ __raw_readl(tima + TM_SPC_PULL_OS_CTX);
+ /* Second load to recover the context state (Words 0 and 1) */
+ vcpu->arch.xive_saved_state.w01 = __raw_readq(tima + TM_QW1_OS);
+
+ /* Fixup some of the state for the next load */
+ vcpu->arch.xive_saved_state.lsmfb = 0;
+ vcpu->arch.xive_saved_state.ack = 0xff;
+ vcpu->arch.xive_pushed = 0;
+ eieio();
+}
+EXPORT_SYMBOL_GPL(kvmppc_xive_pull_vcpu);
+
+void kvmppc_xive_rearm_escalation(struct kvm_vcpu *vcpu)
+{
+ void __iomem *esc_vaddr = (void __iomem *)vcpu->arch.xive_esc_vaddr;
+
+ if (!esc_vaddr)
+ return;
+
+ /* we are using XIVE with single escalation */
+
+ if (vcpu->arch.xive_esc_on) {
+ /*
+ * If we still have a pending escalation, abort the cede,
+ * and we must set PQ to 10 rather than 00 so that we don't
+ * potentially end up with two entries for the escalation
+ * interrupt in the XIVE interrupt queue. In that case
+ * we also don't want to set xive_esc_on to 1 here in
+ * case we race with xive_esc_irq().
+ */
+ vcpu->arch.ceded = 0;
+ /*
+ * The escalation interrupts are special as we don't EOI them.
+ * There is no need to use the load-after-store ordering offset
+ * to set PQ to 10 as we won't use StoreEOI.
+ */
+ __raw_readq(esc_vaddr + XIVE_ESB_SET_PQ_10);
+ } else {
+ vcpu->arch.xive_esc_on = true;
+ mb();
+ __raw_readq(esc_vaddr + XIVE_ESB_SET_PQ_00);
+ }
+ mb();
+}
+EXPORT_SYMBOL_GPL(kvmppc_xive_rearm_escalation);
+
+/*
* This is a simple trigger for a generic XIVE IRQ. This must
* only be called for interrupts that support a trigger page
*/
@@ -2075,6 +2140,36 @@ static int kvmppc_xive_create(struct kvm_device *dev, u32 type)
return 0;
}
+int kvmppc_xive_xics_hcall(struct kvm_vcpu *vcpu, u32 req)
+{
+ struct kvmppc_vcore *vc = vcpu->arch.vcore;
+
+ /* The VM should have configured XICS mode before doing XICS hcalls. */
+ if (!kvmppc_xics_enabled(vcpu))
+ return H_TOO_HARD;
+
+ switch (req) {
+ case H_XIRR:
+ return xive_vm_h_xirr(vcpu);
+ case H_CPPR:
+ return xive_vm_h_cppr(vcpu, kvmppc_get_gpr(vcpu, 4));
+ case H_EOI:
+ return xive_vm_h_eoi(vcpu, kvmppc_get_gpr(vcpu, 4));
+ case H_IPI:
+ return xive_vm_h_ipi(vcpu, kvmppc_get_gpr(vcpu, 4),
+ kvmppc_get_gpr(vcpu, 5));
+ case H_IPOLL:
+ return xive_vm_h_ipoll(vcpu, kvmppc_get_gpr(vcpu, 4));
+ case H_XIRR_X:
+ xive_vm_h_xirr(vcpu);
+ kvmppc_set_gpr(vcpu, 5, get_tb() + vc->tb_offset);
+ return H_SUCCESS;
+ }
+
+ return H_UNSUPPORTED;
+}
+EXPORT_SYMBOL_GPL(kvmppc_xive_xics_hcall);
+
int kvmppc_xive_debug_show_queues(struct seq_file *m, struct kvm_vcpu *vcpu)
{
struct kvmppc_xive_vcpu *xc = vcpu->arch.xive_vcpu;
@@ -2257,21 +2352,3 @@ struct kvm_device_ops kvm_xive_ops = {
.get_attr = xive_get_attr,
.has_attr = xive_has_attr,
};
-
-void kvmppc_xive_init_module(void)
-{
- __xive_vm_h_xirr = xive_vm_h_xirr;
- __xive_vm_h_ipoll = xive_vm_h_ipoll;
- __xive_vm_h_ipi = xive_vm_h_ipi;
- __xive_vm_h_cppr = xive_vm_h_cppr;
- __xive_vm_h_eoi = xive_vm_h_eoi;
-}
-
-void kvmppc_xive_exit_module(void)
-{
- __xive_vm_h_xirr = NULL;
- __xive_vm_h_ipoll = NULL;
- __xive_vm_h_ipi = NULL;
- __xive_vm_h_cppr = NULL;
- __xive_vm_h_eoi = NULL;
-}
diff --git a/arch/powerpc/kvm/book3s_xive.h b/arch/powerpc/kvm/book3s_xive.h
index 86c24a4ad809..afe9eeac6d56 100644
--- a/arch/powerpc/kvm/book3s_xive.h
+++ b/arch/powerpc/kvm/book3s_xive.h
@@ -289,13 +289,6 @@ extern int xive_rm_h_ipi(struct kvm_vcpu *vcpu, unsigned long server,
extern int xive_rm_h_cppr(struct kvm_vcpu *vcpu, unsigned long cppr);
extern int xive_rm_h_eoi(struct kvm_vcpu *vcpu, unsigned long xirr);
-extern unsigned long (*__xive_vm_h_xirr)(struct kvm_vcpu *vcpu);
-extern unsigned long (*__xive_vm_h_ipoll)(struct kvm_vcpu *vcpu, unsigned long server);
-extern int (*__xive_vm_h_ipi)(struct kvm_vcpu *vcpu, unsigned long server,
- unsigned long mfrr);
-extern int (*__xive_vm_h_cppr)(struct kvm_vcpu *vcpu, unsigned long cppr);
-extern int (*__xive_vm_h_eoi)(struct kvm_vcpu *vcpu, unsigned long xirr);
-
/*
* Common Xive routines for XICS-over-XIVE and XIVE native
*/
diff --git a/arch/powerpc/kvm/book3s_xive_native.c b/arch/powerpc/kvm/book3s_xive_native.c
index 76800c84f2a3..1253666dd4d8 100644
--- a/arch/powerpc/kvm/book3s_xive_native.c
+++ b/arch/powerpc/kvm/book3s_xive_native.c
@@ -1281,13 +1281,3 @@ struct kvm_device_ops kvm_xive_native_ops = {
.has_attr = kvmppc_xive_native_has_attr,
.mmap = kvmppc_xive_native_mmap,
};
-
-void kvmppc_xive_native_init_module(void)
-{
- ;
-}
-
-void kvmppc_xive_native_exit_module(void)
-{
- ;
-}
diff --git a/arch/powerpc/mm/book3s64/radix_pgtable.c b/arch/powerpc/mm/book3s64/radix_pgtable.c
index 5fef8db3b463..fe236c38ce00 100644
--- a/arch/powerpc/mm/book3s64/radix_pgtable.c
+++ b/arch/powerpc/mm/book3s64/radix_pgtable.c
@@ -357,30 +357,19 @@ static void __init radix_init_pgtable(void)
}
/* Find out how many PID bits are supported */
- if (!cpu_has_feature(CPU_FTR_P9_RADIX_PREFETCH_BUG)) {
- if (!mmu_pid_bits)
- mmu_pid_bits = 20;
- mmu_base_pid = 1;
- } else if (cpu_has_feature(CPU_FTR_HVMODE)) {
- if (!mmu_pid_bits)
- mmu_pid_bits = 20;
-#ifdef CONFIG_KVM_BOOK3S_HV_POSSIBLE
+ if (!cpu_has_feature(CPU_FTR_HVMODE) &&
+ cpu_has_feature(CPU_FTR_P9_RADIX_PREFETCH_BUG)) {
/*
- * When KVM is possible, we only use the top half of the
- * PID space to avoid collisions between host and guest PIDs
- * which can cause problems due to prefetch when exiting the
- * guest with AIL=3
+ * Older versions of KVM on these machines perfer if the
+ * guest only uses the low 19 PID bits.
*/
- mmu_base_pid = 1 << (mmu_pid_bits - 1);
-#else
- mmu_base_pid = 1;
-#endif
- } else {
- /* The guest uses the bottom half of the PID space */
if (!mmu_pid_bits)
mmu_pid_bits = 19;
- mmu_base_pid = 1;
+ } else {
+ if (!mmu_pid_bits)
+ mmu_pid_bits = 20;
}
+ mmu_base_pid = 1;
/*
* Allocate Partition table and process table for the
diff --git a/arch/powerpc/mm/book3s64/radix_tlb.c b/arch/powerpc/mm/book3s64/radix_tlb.c
index 817a02ef6032..b827e2a96da0 100644
--- a/arch/powerpc/mm/book3s64/radix_tlb.c
+++ b/arch/powerpc/mm/book3s64/radix_tlb.c
@@ -1344,49 +1344,3 @@ void radix__flush_tlb_all(void)
: : "r"(rb), "i"(r), "i"(prs), "i"(ric), "r"(0) : "memory");
asm volatile("eieio; tlbsync; ptesync": : :"memory");
}
-
-#ifdef CONFIG_KVM_BOOK3S_HV_POSSIBLE
-extern void radix_kvm_prefetch_workaround(struct mm_struct *mm)
-{
- unsigned long pid = mm->context.id;
-
- if (unlikely(pid == MMU_NO_CONTEXT))
- return;
-
- if (!cpu_has_feature(CPU_FTR_P9_RADIX_PREFETCH_BUG))
- return;
-
- /*
- * If this context hasn't run on that CPU before and KVM is
- * around, there's a slim chance that the guest on another
- * CPU just brought in obsolete translation into the TLB of
- * this CPU due to a bad prefetch using the guest PID on
- * the way into the hypervisor.
- *
- * We work around this here. If KVM is possible, we check if
- * any sibling thread is in KVM. If it is, the window may exist
- * and thus we flush that PID from the core.
- *
- * A potential future improvement would be to mark which PIDs
- * have never been used on the system and avoid it if the PID
- * is new and the process has no other cpumask bit set.
- */
- if (cpu_has_feature(CPU_FTR_HVMODE) && radix_enabled()) {
- int cpu = smp_processor_id();
- int sib = cpu_first_thread_sibling(cpu);
- bool flush = false;
-
- for (; sib <= cpu_last_thread_sibling(cpu) && !flush; sib++) {
- if (sib == cpu)
- continue;
- if (!cpu_possible(sib))
- continue;
- if (paca_ptrs[sib]->kvm_hstate.kvm_vcpu)
- flush = true;
- }
- if (flush)
- _tlbiel_pid(pid, RIC_FLUSH_ALL);
- }
-}
-EXPORT_SYMBOL_GPL(radix_kvm_prefetch_workaround);
-#endif /* CONFIG_KVM_BOOK3S_HV_POSSIBLE */
diff --git a/arch/powerpc/mm/mmu_context.c b/arch/powerpc/mm/mmu_context.c
index a857af401738..74246536b832 100644
--- a/arch/powerpc/mm/mmu_context.c
+++ b/arch/powerpc/mm/mmu_context.c
@@ -83,9 +83,7 @@ void switch_mm_irqs_off(struct mm_struct *prev, struct mm_struct *next,
if (cpu_has_feature(CPU_FTR_ALTIVEC))
asm volatile ("dssall");
- if (new_on_cpu)
- radix_kvm_prefetch_workaround(next);
- else
+ if (!new_on_cpu)
membarrier_arch_switch_mm(prev, next, tsk);
/*
diff --git a/arch/powerpc/platforms/powernv/idle.c b/arch/powerpc/platforms/powernv/idle.c
index 999997d9e9a9..528a7e0cf83a 100644
--- a/arch/powerpc/platforms/powernv/idle.c
+++ b/arch/powerpc/platforms/powernv/idle.c
@@ -604,7 +604,7 @@ struct p9_sprs {
u64 uamor;
};
-static unsigned long power9_idle_stop(unsigned long psscr, bool mmu_on)
+static unsigned long power9_idle_stop(unsigned long psscr)
{
int cpu = raw_smp_processor_id();
int first = cpu_first_thread_sibling(cpu);
@@ -620,8 +620,6 @@ static unsigned long power9_idle_stop(unsigned long psscr, bool mmu_on)
if (!(psscr & (PSSCR_EC|PSSCR_ESL))) {
/* EC=ESL=0 case */
- BUG_ON(!mmu_on);
-
/*
* Wake synchronously. SRESET via xscom may still cause
* a 0x100 powersave wakeup with SRR1 reason!
@@ -803,8 +801,7 @@ core_woken:
__slb_restore_bolted_realmode();
out:
- if (mmu_on)
- mtmsr(MSR_KERNEL);
+ mtmsr(MSR_KERNEL);
return srr1;
}
@@ -895,7 +892,7 @@ struct p10_sprs {
*/
};
-static unsigned long power10_idle_stop(unsigned long psscr, bool mmu_on)
+static unsigned long power10_idle_stop(unsigned long psscr)
{
int cpu = raw_smp_processor_id();
int first = cpu_first_thread_sibling(cpu);
@@ -909,8 +906,6 @@ static unsigned long power10_idle_stop(unsigned long psscr, bool mmu_on)
if (!(psscr & (PSSCR_EC|PSSCR_ESL))) {
/* EC=ESL=0 case */
- BUG_ON(!mmu_on);
-
/*
* Wake synchronously. SRESET via xscom may still cause
* a 0x100 powersave wakeup with SRR1 reason!
@@ -991,8 +986,7 @@ core_woken:
__slb_restore_bolted_realmode();
out:
- if (mmu_on)
- mtmsr(MSR_KERNEL);
+ mtmsr(MSR_KERNEL);
return srr1;
}
@@ -1002,40 +996,10 @@ static unsigned long arch300_offline_stop(unsigned long psscr)
{
unsigned long srr1;
-#ifndef CONFIG_KVM_BOOK3S_HV_POSSIBLE
- __ppc64_runlatch_off();
if (cpu_has_feature(CPU_FTR_ARCH_31))
- srr1 = power10_idle_stop(psscr, true);
+ srr1 = power10_idle_stop(psscr);
else
- srr1 = power9_idle_stop(psscr, true);
- __ppc64_runlatch_on();
-#else
- /*
- * Tell KVM we're entering idle.
- * This does not have to be done in real mode because the P9 MMU
- * is independent per-thread. Some steppings share radix/hash mode
- * between threads, but in that case KVM has a barrier sync in real
- * mode before and after switching between radix and hash.
- *
- * kvm_start_guest must still be called in real mode though, hence
- * the false argument.
- */
- local_paca->kvm_hstate.hwthread_state = KVM_HWTHREAD_IN_IDLE;
-
- __ppc64_runlatch_off();
- if (cpu_has_feature(CPU_FTR_ARCH_31))
- srr1 = power10_idle_stop(psscr, false);
- else
- srr1 = power9_idle_stop(psscr, false);
- __ppc64_runlatch_on();
-
- local_paca->kvm_hstate.hwthread_state = KVM_HWTHREAD_IN_KERNEL;
- /* Order setting hwthread_state vs. testing hwthread_req */
- smp_mb();
- if (local_paca->kvm_hstate.hwthread_req)
- srr1 = idle_kvm_start_guest(srr1);
- mtmsr(MSR_KERNEL);
-#endif
+ srr1 = power9_idle_stop(psscr);
return srr1;
}
@@ -1055,9 +1019,9 @@ void arch300_idle_type(unsigned long stop_psscr_val,
__ppc64_runlatch_off();
if (cpu_has_feature(CPU_FTR_ARCH_31))
- srr1 = power10_idle_stop(psscr, true);
+ srr1 = power10_idle_stop(psscr);
else
- srr1 = power9_idle_stop(psscr, true);
+ srr1 = power9_idle_stop(psscr);
__ppc64_runlatch_on();
fini_irq_for_idle_irqsoff();