aboutsummaryrefslogtreecommitdiffstats
path: root/arch/powerpc/kvm/book3s_hv.c
diff options
context:
space:
mode:
Diffstat (limited to 'arch/powerpc/kvm/book3s_hv.c')
-rw-r--r--arch/powerpc/kvm/book3s_hv.c817
1 files changed, 490 insertions, 327 deletions
diff --git a/arch/powerpc/kvm/book3s_hv.c b/arch/powerpc/kvm/book3s_hv.c
index bc0813644666..1d1fcc290fca 100644
--- a/arch/powerpc/kvm/book3s_hv.c
+++ b/arch/powerpc/kvm/book3s_hv.c
@@ -76,6 +76,7 @@
#include <asm/kvm_book3s_uvmem.h>
#include <asm/ultravisor.h>
#include <asm/dtl.h>
+#include <asm/plpar_wrappers.h>
#include "book3s.h"
@@ -103,13 +104,9 @@ static int target_smt_mode;
module_param(target_smt_mode, int, 0644);
MODULE_PARM_DESC(target_smt_mode, "Target threads per core (0 = max)");
-static bool indep_threads_mode = true;
-module_param(indep_threads_mode, bool, S_IRUGO | S_IWUSR);
-MODULE_PARM_DESC(indep_threads_mode, "Independent-threads mode (only on POWER9)");
-
static bool one_vm_per_core;
module_param(one_vm_per_core, bool, S_IRUGO | S_IWUSR);
-MODULE_PARM_DESC(one_vm_per_core, "Only run vCPUs from the same VM on a core (requires indep_threads_mode=N)");
+MODULE_PARM_DESC(one_vm_per_core, "Only run vCPUs from the same VM on a core (requires POWER8 or older)");
#ifdef CONFIG_KVM_XICS
static const struct kernel_param_ops module_param_ops = {
@@ -134,9 +131,6 @@ static inline bool nesting_enabled(struct kvm *kvm)
return kvm->arch.nested_enable && kvm_is_radix(kvm);
}
-/* If set, the threads on each CPU core have to be in the same MMU mode */
-static bool no_mixing_hpt_and_radix __read_mostly;
-
static int kvmppc_hv_setup_htab_rma(struct kvm_vcpu *vcpu);
/*
@@ -236,7 +230,7 @@ static void kvmppc_fast_vcpu_kick_hv(struct kvm_vcpu *vcpu)
waitp = kvm_arch_vcpu_get_wait(vcpu);
if (rcuwait_wake_up(waitp))
- ++vcpu->stat.halt_wakeup;
+ ++vcpu->stat.generic.halt_wakeup;
cpu = READ_ONCE(vcpu->arch.thread_cpu);
if (cpu >= 0 && kvmppc_ipi_thread(cpu))
@@ -807,7 +801,8 @@ static int kvmppc_h_set_mode(struct kvm_vcpu *vcpu, unsigned long mflags,
* KVM does not support mflags=2 (AIL=2) and AIL=1 is reserved.
* Keep this in synch with kvmppc_filter_guest_lpcr_hv.
*/
- if (mflags != 0 && mflags != 3)
+ if (cpu_has_feature(CPU_FTR_P9_RADIX_PREFETCH_BUG) &&
+ kvmhv_vcpu_is_radix(vcpu) && mflags == 3)
return H_UNSUPPORTED_FLAG_START;
return H_TOO_HARD;
default:
@@ -899,6 +894,10 @@ static int kvm_arch_vcpu_yield_to(struct kvm_vcpu *target)
* H_SUCCESS if the source vcore wasn't idle (e.g. if it may
* have useful work to do and should not confer) so we don't
* recheck that here.
+ *
+ * In the case of the P9 single vcpu per vcore case, the real
+ * mode handler is not called but no other threads are in the
+ * source vcore.
*/
spin_lock(&vcore->lock);
@@ -924,8 +923,71 @@ static int kvmppc_get_yield_count(struct kvm_vcpu *vcpu)
return yield_count;
}
+/*
+ * H_RPT_INVALIDATE hcall handler for nested guests.
+ *
+ * Handles only nested process-scoped invalidation requests in L0.
+ */
+static int kvmppc_nested_h_rpt_invalidate(struct kvm_vcpu *vcpu)
+{
+ unsigned long type = kvmppc_get_gpr(vcpu, 6);
+ unsigned long pid, pg_sizes, start, end;
+
+ /*
+ * The partition-scoped invalidations aren't handled here in L0.
+ */
+ if (type & H_RPTI_TYPE_NESTED)
+ return RESUME_HOST;
+
+ pid = kvmppc_get_gpr(vcpu, 4);
+ pg_sizes = kvmppc_get_gpr(vcpu, 7);
+ start = kvmppc_get_gpr(vcpu, 8);
+ end = kvmppc_get_gpr(vcpu, 9);
+
+ do_h_rpt_invalidate_prt(pid, vcpu->arch.nested->shadow_lpid,
+ type, pg_sizes, start, end);
+
+ kvmppc_set_gpr(vcpu, 3, H_SUCCESS);
+ return RESUME_GUEST;
+}
+
+static long kvmppc_h_rpt_invalidate(struct kvm_vcpu *vcpu,
+ unsigned long id, unsigned long target,
+ unsigned long type, unsigned long pg_sizes,
+ unsigned long start, unsigned long end)
+{
+ if (!kvm_is_radix(vcpu->kvm))
+ return H_UNSUPPORTED;
+
+ if (end < start)
+ return H_P5;
+
+ /*
+ * Partition-scoped invalidation for nested guests.
+ */
+ if (type & H_RPTI_TYPE_NESTED) {
+ if (!nesting_enabled(vcpu->kvm))
+ return H_FUNCTION;
+
+ /* Support only cores as target */
+ if (target != H_RPTI_TARGET_CMMU)
+ return H_P2;
+
+ return do_h_rpt_invalidate_pat(vcpu, id, type, pg_sizes,
+ start, end);
+ }
+
+ /*
+ * Process-scoped invalidation for L1 guests.
+ */
+ do_h_rpt_invalidate_prt(id, vcpu->kvm->arch.lpid,
+ type, pg_sizes, start, end);
+ return H_SUCCESS;
+}
+
int kvmppc_pseries_do_hcall(struct kvm_vcpu *vcpu)
{
+ struct kvm *kvm = vcpu->kvm;
unsigned long req = kvmppc_get_gpr(vcpu, 3);
unsigned long target, ret = H_SUCCESS;
int yield_count;
@@ -937,11 +999,57 @@ int kvmppc_pseries_do_hcall(struct kvm_vcpu *vcpu)
return RESUME_HOST;
switch (req) {
+ case H_REMOVE:
+ ret = kvmppc_h_remove(vcpu, kvmppc_get_gpr(vcpu, 4),
+ kvmppc_get_gpr(vcpu, 5),
+ kvmppc_get_gpr(vcpu, 6));
+ if (ret == H_TOO_HARD)
+ return RESUME_HOST;
+ break;
+ case H_ENTER:
+ ret = kvmppc_h_enter(vcpu, kvmppc_get_gpr(vcpu, 4),
+ kvmppc_get_gpr(vcpu, 5),
+ kvmppc_get_gpr(vcpu, 6),
+ kvmppc_get_gpr(vcpu, 7));
+ if (ret == H_TOO_HARD)
+ return RESUME_HOST;
+ break;
+ case H_READ:
+ ret = kvmppc_h_read(vcpu, kvmppc_get_gpr(vcpu, 4),
+ kvmppc_get_gpr(vcpu, 5));
+ if (ret == H_TOO_HARD)
+ return RESUME_HOST;
+ break;
+ case H_CLEAR_MOD:
+ ret = kvmppc_h_clear_mod(vcpu, kvmppc_get_gpr(vcpu, 4),
+ kvmppc_get_gpr(vcpu, 5));
+ if (ret == H_TOO_HARD)
+ return RESUME_HOST;
+ break;
+ case H_CLEAR_REF:
+ ret = kvmppc_h_clear_ref(vcpu, kvmppc_get_gpr(vcpu, 4),
+ kvmppc_get_gpr(vcpu, 5));
+ if (ret == H_TOO_HARD)
+ return RESUME_HOST;
+ break;
+ case H_PROTECT:
+ ret = kvmppc_h_protect(vcpu, kvmppc_get_gpr(vcpu, 4),
+ kvmppc_get_gpr(vcpu, 5),
+ kvmppc_get_gpr(vcpu, 6));
+ if (ret == H_TOO_HARD)
+ return RESUME_HOST;
+ break;
+ case H_BULK_REMOVE:
+ ret = kvmppc_h_bulk_remove(vcpu);
+ if (ret == H_TOO_HARD)
+ return RESUME_HOST;
+ break;
+
case H_CEDE:
break;
case H_PROD:
target = kvmppc_get_gpr(vcpu, 4);
- tvcpu = kvmppc_find_vcpu(vcpu->kvm, target);
+ tvcpu = kvmppc_find_vcpu(kvm, target);
if (!tvcpu) {
ret = H_PARAMETER;
break;
@@ -955,7 +1063,7 @@ int kvmppc_pseries_do_hcall(struct kvm_vcpu *vcpu)
target = kvmppc_get_gpr(vcpu, 4);
if (target == -1)
break;
- tvcpu = kvmppc_find_vcpu(vcpu->kvm, target);
+ tvcpu = kvmppc_find_vcpu(kvm, target);
if (!tvcpu) {
ret = H_PARAMETER;
break;
@@ -971,12 +1079,12 @@ int kvmppc_pseries_do_hcall(struct kvm_vcpu *vcpu)
kvmppc_get_gpr(vcpu, 6));
break;
case H_RTAS:
- if (list_empty(&vcpu->kvm->arch.rtas_tokens))
+ if (list_empty(&kvm->arch.rtas_tokens))
return RESUME_HOST;
- idx = srcu_read_lock(&vcpu->kvm->srcu);
+ idx = srcu_read_lock(&kvm->srcu);
rc = kvmppc_rtas_hcall(vcpu);
- srcu_read_unlock(&vcpu->kvm->srcu, idx);
+ srcu_read_unlock(&kvm->srcu, idx);
if (rc == -ENOENT)
return RESUME_HOST;
@@ -1060,15 +1168,23 @@ int kvmppc_pseries_do_hcall(struct kvm_vcpu *vcpu)
if (!powernv_get_random_long(&vcpu->arch.regs.gpr[4]))
ret = H_HARDWARE;
break;
+ case H_RPT_INVALIDATE:
+ ret = kvmppc_h_rpt_invalidate(vcpu, kvmppc_get_gpr(vcpu, 4),
+ kvmppc_get_gpr(vcpu, 5),
+ kvmppc_get_gpr(vcpu, 6),
+ kvmppc_get_gpr(vcpu, 7),
+ kvmppc_get_gpr(vcpu, 8),
+ kvmppc_get_gpr(vcpu, 9));
+ break;
case H_SET_PARTITION_TABLE:
ret = H_FUNCTION;
- if (nesting_enabled(vcpu->kvm))
+ if (nesting_enabled(kvm))
ret = kvmhv_set_partition_table(vcpu);
break;
case H_ENTER_NESTED:
ret = H_FUNCTION;
- if (!nesting_enabled(vcpu->kvm))
+ if (!nesting_enabled(kvm))
break;
ret = kvmhv_enter_nested_guest(vcpu);
if (ret == H_INTERRUPT) {
@@ -1083,12 +1199,12 @@ int kvmppc_pseries_do_hcall(struct kvm_vcpu *vcpu)
break;
case H_TLB_INVALIDATE:
ret = H_FUNCTION;
- if (nesting_enabled(vcpu->kvm))
+ if (nesting_enabled(kvm))
ret = kvmhv_do_nested_tlbie(vcpu);
break;
case H_COPY_TOFROM_GUEST:
ret = H_FUNCTION;
- if (nesting_enabled(vcpu->kvm))
+ if (nesting_enabled(kvm))
ret = kvmhv_copy_tofrom_guest_nested(vcpu);
break;
case H_PAGE_INIT:
@@ -1099,7 +1215,7 @@ int kvmppc_pseries_do_hcall(struct kvm_vcpu *vcpu)
case H_SVM_PAGE_IN:
ret = H_UNSUPPORTED;
if (kvmppc_get_srr1(vcpu) & MSR_S)
- ret = kvmppc_h_svm_page_in(vcpu->kvm,
+ ret = kvmppc_h_svm_page_in(kvm,
kvmppc_get_gpr(vcpu, 4),
kvmppc_get_gpr(vcpu, 5),
kvmppc_get_gpr(vcpu, 6));
@@ -1107,7 +1223,7 @@ int kvmppc_pseries_do_hcall(struct kvm_vcpu *vcpu)
case H_SVM_PAGE_OUT:
ret = H_UNSUPPORTED;
if (kvmppc_get_srr1(vcpu) & MSR_S)
- ret = kvmppc_h_svm_page_out(vcpu->kvm,
+ ret = kvmppc_h_svm_page_out(kvm,
kvmppc_get_gpr(vcpu, 4),
kvmppc_get_gpr(vcpu, 5),
kvmppc_get_gpr(vcpu, 6));
@@ -1115,12 +1231,12 @@ int kvmppc_pseries_do_hcall(struct kvm_vcpu *vcpu)
case H_SVM_INIT_START:
ret = H_UNSUPPORTED;
if (kvmppc_get_srr1(vcpu) & MSR_S)
- ret = kvmppc_h_svm_init_start(vcpu->kvm);
+ ret = kvmppc_h_svm_init_start(kvm);
break;
case H_SVM_INIT_DONE:
ret = H_UNSUPPORTED;
if (kvmppc_get_srr1(vcpu) & MSR_S)
- ret = kvmppc_h_svm_init_done(vcpu->kvm);
+ ret = kvmppc_h_svm_init_done(kvm);
break;
case H_SVM_INIT_ABORT:
/*
@@ -1130,24 +1246,26 @@ int kvmppc_pseries_do_hcall(struct kvm_vcpu *vcpu)
* Instead the kvm->arch.secure_guest flag is checked inside
* kvmppc_h_svm_init_abort().
*/
- ret = kvmppc_h_svm_init_abort(vcpu->kvm);
+ ret = kvmppc_h_svm_init_abort(kvm);
break;
default:
return RESUME_HOST;
}
+ WARN_ON_ONCE(ret == H_TOO_HARD);
kvmppc_set_gpr(vcpu, 3, ret);
vcpu->arch.hcall_needed = 0;
return RESUME_GUEST;
}
/*
- * Handle H_CEDE in the nested virtualization case where we haven't
- * called the real-mode hcall handlers in book3s_hv_rmhandlers.S.
+ * Handle H_CEDE in the P9 path where we don't call the real-mode hcall
+ * handlers in book3s_hv_rmhandlers.S.
+ *
* This has to be done early, not in kvmppc_pseries_do_hcall(), so
* that the cede logic in kvmppc_run_single_vcpu() works properly.
*/
-static void kvmppc_nested_cede(struct kvm_vcpu *vcpu)
+static void kvmppc_cede(struct kvm_vcpu *vcpu)
{
vcpu->arch.shregs.msr |= MSR_EE;
vcpu->arch.ceded = 1;
@@ -1178,6 +1296,7 @@ static int kvmppc_hcall_impl_hv(unsigned long cmd)
case H_XIRR_X:
#endif
case H_PAGE_INIT:
+ case H_RPT_INVALIDATE:
return 1;
}
@@ -1400,13 +1519,39 @@ static int kvmppc_handle_exit_hv(struct kvm_vcpu *vcpu,
}
case BOOK3S_INTERRUPT_SYSCALL:
{
- /* hcall - punt to userspace */
int i;
- /* hypercall with MSR_PR has already been handled in rmode,
- * and never reaches here.
- */
+ if (unlikely(vcpu->arch.shregs.msr & MSR_PR)) {
+ /*
+ * Guest userspace executed sc 1. This can only be
+ * reached by the P9 path because the old path
+ * handles this case in realmode hcall handlers.
+ */
+ if (!kvmhv_vcpu_is_radix(vcpu)) {
+ /*
+ * A guest could be running PR KVM, so this
+ * may be a PR KVM hcall. It must be reflected
+ * to the guest kernel as a sc interrupt.
+ */
+ kvmppc_core_queue_syscall(vcpu);
+ } else {
+ /*
+ * Radix guests can not run PR KVM or nested HV
+ * hash guests which might run PR KVM, so this
+ * is always a privilege fault. Send a program
+ * check to guest kernel.
+ */
+ kvmppc_core_queue_program(vcpu, SRR1_PROGPRIV);
+ }
+ r = RESUME_GUEST;
+ break;
+ }
+ /*
+ * hcall - gather args and set exit_reason. This will next be
+ * handled by kvmppc_pseries_do_hcall which may be able to deal
+ * with it and resume guest, or may punt to userspace.
+ */
run->papr_hcall.nr = kvmppc_get_gpr(vcpu, 3);
for (i = 0; i < 9; ++i)
run->papr_hcall.args[i] = kvmppc_get_gpr(vcpu, 4 + i);
@@ -1419,20 +1564,102 @@ static int kvmppc_handle_exit_hv(struct kvm_vcpu *vcpu,
* We get these next two if the guest accesses a page which it thinks
* it has mapped but which is not actually present, either because
* it is for an emulated I/O device or because the corresonding
- * host page has been paged out. Any other HDSI/HISI interrupts
- * have been handled already.
+ * host page has been paged out.
+ *
+ * Any other HDSI/HISI interrupts have been handled already for P7/8
+ * guests. For POWER9 hash guests not using rmhandlers, basic hash
+ * fault handling is done here.
*/
- case BOOK3S_INTERRUPT_H_DATA_STORAGE:
- r = RESUME_PAGE_FAULT;
+ case BOOK3S_INTERRUPT_H_DATA_STORAGE: {
+ unsigned long vsid;
+ long err;
+
+ if (vcpu->arch.fault_dsisr == HDSISR_CANARY) {
+ r = RESUME_GUEST; /* Just retry if it's the canary */
+ break;
+ }
+
+ if (kvm_is_radix(vcpu->kvm) || !cpu_has_feature(CPU_FTR_ARCH_300)) {
+ /*
+ * Radix doesn't require anything, and pre-ISAv3.0 hash
+ * already attempted to handle this in rmhandlers. The
+ * hash fault handling below is v3 only (it uses ASDR
+ * via fault_gpa).
+ */
+ r = RESUME_PAGE_FAULT;
+ break;
+ }
+
+ if (!(vcpu->arch.fault_dsisr & (DSISR_NOHPTE | DSISR_PROTFAULT))) {
+ kvmppc_core_queue_data_storage(vcpu,
+ vcpu->arch.fault_dar, vcpu->arch.fault_dsisr);
+ r = RESUME_GUEST;
+ break;
+ }
+
+ if (!(vcpu->arch.shregs.msr & MSR_DR))
+ vsid = vcpu->kvm->arch.vrma_slb_v;
+ else
+ vsid = vcpu->arch.fault_gpa;
+
+ err = kvmppc_hpte_hv_fault(vcpu, vcpu->arch.fault_dar,
+ vsid, vcpu->arch.fault_dsisr, true);
+ if (err == 0) {
+ r = RESUME_GUEST;
+ } else if (err == -1 || err == -2) {
+ r = RESUME_PAGE_FAULT;
+ } else {
+ kvmppc_core_queue_data_storage(vcpu,
+ vcpu->arch.fault_dar, err);
+ r = RESUME_GUEST;
+ }
break;
- case BOOK3S_INTERRUPT_H_INST_STORAGE:
+ }
+ case BOOK3S_INTERRUPT_H_INST_STORAGE: {
+ unsigned long vsid;
+ long err;
+
vcpu->arch.fault_dar = kvmppc_get_pc(vcpu);
vcpu->arch.fault_dsisr = vcpu->arch.shregs.msr &
DSISR_SRR1_MATCH_64S;
- if (vcpu->arch.shregs.msr & HSRR1_HISI_WRITE)
- vcpu->arch.fault_dsisr |= DSISR_ISSTORE;
- r = RESUME_PAGE_FAULT;
+ if (kvm_is_radix(vcpu->kvm) || !cpu_has_feature(CPU_FTR_ARCH_300)) {
+ /*
+ * Radix doesn't require anything, and pre-ISAv3.0 hash
+ * already attempted to handle this in rmhandlers. The
+ * hash fault handling below is v3 only (it uses ASDR
+ * via fault_gpa).
+ */
+ if (vcpu->arch.shregs.msr & HSRR1_HISI_WRITE)
+ vcpu->arch.fault_dsisr |= DSISR_ISSTORE;
+ r = RESUME_PAGE_FAULT;
+ break;
+ }
+
+ if (!(vcpu->arch.fault_dsisr & SRR1_ISI_NOPT)) {
+ kvmppc_core_queue_inst_storage(vcpu,
+ vcpu->arch.fault_dsisr);
+ r = RESUME_GUEST;
+ break;
+ }
+
+ if (!(vcpu->arch.shregs.msr & MSR_IR))
+ vsid = vcpu->kvm->arch.vrma_slb_v;
+ else
+ vsid = vcpu->arch.fault_gpa;
+
+ err = kvmppc_hpte_hv_fault(vcpu, vcpu->arch.fault_dar,
+ vsid, vcpu->arch.fault_dsisr, false);
+ if (err == 0) {
+ r = RESUME_GUEST;
+ } else if (err == -1) {
+ r = RESUME_PAGE_FAULT;
+ } else {
+ kvmppc_core_queue_inst_storage(vcpu, err);
+ r = RESUME_GUEST;
+ }
break;
+ }
+
/*
* This occurs if the guest executes an illegal instruction.
* If the guest debug is disabled, generate a program interrupt
@@ -1593,6 +1820,23 @@ static int kvmppc_handle_nested_exit(struct kvm_vcpu *vcpu)
if (!xics_on_xive())
kvmppc_xics_rm_complete(vcpu, 0);
break;
+ case BOOK3S_INTERRUPT_SYSCALL:
+ {
+ unsigned long req = kvmppc_get_gpr(vcpu, 3);
+
+ /*
+ * The H_RPT_INVALIDATE hcalls issued by nested
+ * guests for process-scoped invalidations when
+ * GTSE=0, are handled here in L0.
+ */
+ if (req == H_RPT_INVALIDATE) {
+ r = kvmppc_nested_h_rpt_invalidate(vcpu);
+ break;
+ }
+
+ r = RESUME_HOST;
+ break;
+ }
default:
r = RESUME_HOST;
break;
@@ -1654,6 +1898,14 @@ unsigned long kvmppc_filter_lpcr_hv(struct kvm *kvm, unsigned long lpcr)
lpcr &= ~LPCR_AIL;
if ((lpcr & LPCR_AIL) != LPCR_AIL_3)
lpcr &= ~LPCR_AIL; /* LPCR[AIL]=1/2 is disallowed */
+ /*
+ * On some POWER9s we force AIL off for radix guests to prevent
+ * executing in MSR[HV]=1 mode with the MMU enabled and PIDR set to
+ * guest, which can result in Q0 translations with LPID=0 PID=PIDR to
+ * be cached, which the host TLB management does not expect.
+ */
+ if (kvm_is_radix(kvm) && cpu_has_feature(CPU_FTR_P9_RADIX_PREFETCH_BUG))
+ lpcr &= ~LPCR_AIL;
/*
* On POWER9, allow userspace to enable large decrementer for the
@@ -2233,7 +2485,7 @@ static int kvmppc_set_one_reg_hv(struct kvm_vcpu *vcpu, u64 id,
*/
static int threads_per_vcore(struct kvm *kvm)
{
- if (kvm->arch.threads_indep)
+ if (cpu_has_feature(CPU_FTR_ARCH_300))
return 1;
return threads_per_subcore;
}
@@ -2657,7 +2909,7 @@ static void radix_flush_cpu(struct kvm *kvm, int cpu, struct kvm_vcpu *vcpu)
cpumask_t *cpu_in_guest;
int i;
- cpu = cpu_first_thread_sibling(cpu);
+ cpu = cpu_first_tlb_thread_sibling(cpu);
if (nested) {
cpumask_set_cpu(cpu, &nested->need_tlb_flush);
cpu_in_guest = &nested->cpu_in_guest;
@@ -2671,9 +2923,10 @@ static void radix_flush_cpu(struct kvm *kvm, int cpu, struct kvm_vcpu *vcpu)
* the other side is the first smp_mb() in kvmppc_run_core().
*/
smp_mb();
- for (i = 0; i < threads_per_core; ++i)
- if (cpumask_test_cpu(cpu + i, cpu_in_guest))
- smp_call_function_single(cpu + i, do_nothing, NULL, 1);
+ for (i = cpu; i <= cpu_last_tlb_thread_sibling(cpu);
+ i += cpu_tlb_thread_sibling_step())
+ if (cpumask_test_cpu(i, cpu_in_guest))
+ smp_call_function_single(i, do_nothing, NULL, 1);
}
static void kvmppc_prepare_radix_vcpu(struct kvm_vcpu *vcpu, int pcpu)
@@ -2704,8 +2957,8 @@ static void kvmppc_prepare_radix_vcpu(struct kvm_vcpu *vcpu, int pcpu)
*/
if (prev_cpu != pcpu) {
if (prev_cpu >= 0 &&
- cpu_first_thread_sibling(prev_cpu) !=
- cpu_first_thread_sibling(pcpu))
+ cpu_first_tlb_thread_sibling(prev_cpu) !=
+ cpu_first_tlb_thread_sibling(pcpu))
radix_flush_cpu(kvm, prev_cpu, vcpu);
if (nested)
nested->prev_cpu[vcpu->arch.nested_vcpu_id] = pcpu;
@@ -2967,9 +3220,6 @@ static void prepare_threads(struct kvmppc_vcore *vc)
for_each_runnable_thread(i, vcpu, vc) {
if (signal_pending(vcpu->arch.run_task))
vcpu->arch.ret = -EINTR;
- else if (no_mixing_hpt_and_radix &&
- kvm_is_radix(vc->kvm) != radix_enabled())
- vcpu->arch.ret = -EINVAL;
else if (vcpu->arch.vpa.update_pending ||
vcpu->arch.slb_shadow.update_pending ||
vcpu->arch.dtl.update_pending)
@@ -3176,6 +3426,9 @@ static noinline void kvmppc_run_core(struct kvmppc_vcore *vc)
int trap;
bool is_power8;
+ if (WARN_ON_ONCE(cpu_has_feature(CPU_FTR_ARCH_300)))
+ return;
+
/*
* Remove from the list any threads that have a signal pending
* or need a VPA update done
@@ -3203,9 +3456,6 @@ static noinline void kvmppc_run_core(struct kvmppc_vcore *vc)
* Make sure we are running on primary threads, and that secondary
* threads are offline. Also check if the number of threads in this
* guest are greater than the current system threads per guest.
- * On POWER9, we need to be not in independent-threads mode if
- * this is a HPT guest on a radix host machine where the
- * CPU threads may not be in different MMU modes.
*/
if ((controlled_threads > 1) &&
((vc->num_threads > threads_per_subcore) || !on_primary_thread())) {
@@ -3230,18 +3480,6 @@ static noinline void kvmppc_run_core(struct kvmppc_vcore *vc)
collect_piggybacks(&core_info, target_threads);
/*
- * On radix, arrange for TLB flushing if necessary.
- * This has to be done before disabling interrupts since
- * it uses smp_call_function().
- */
- pcpu = smp_processor_id();
- if (kvm_is_radix(vc->kvm)) {
- for (sub = 0; sub < core_info.n_subcores; ++sub)
- for_each_runnable_thread(i, vcpu, core_info.vc[sub])
- kvmppc_prepare_radix_vcpu(vcpu, pcpu);
- }
-
- /*
* Hard-disable interrupts, and check resched flag and signals.
* If we need to reschedule or deliver a signal, clean up
* and return without going into the guest(s).
@@ -3273,8 +3511,7 @@ static noinline void kvmppc_run_core(struct kvmppc_vcore *vc)
cmd_bit = stat_bit = 0;
split = core_info.n_subcores;
sip = NULL;
- is_power8 = cpu_has_feature(CPU_FTR_ARCH_207S)
- && !cpu_has_feature(CPU_FTR_ARCH_300);
+ is_power8 = cpu_has_feature(CPU_FTR_ARCH_207S);
if (split > 1) {
sip = &split_info;
@@ -3478,184 +3715,113 @@ static noinline void kvmppc_run_core(struct kvmppc_vcore *vc)
trace_kvmppc_run_core(vc, 1);
}
-/*
- * Load up hypervisor-mode registers on P9.
- */
-static int kvmhv_load_hv_regs_and_go(struct kvm_vcpu *vcpu, u64 time_limit,
- unsigned long lpcr)
+static void load_spr_state(struct kvm_vcpu *vcpu)
{
- struct kvmppc_vcore *vc = vcpu->arch.vcore;
- s64 hdec;
- u64 tb, purr, spurr;
- int trap;
- unsigned long host_hfscr = mfspr(SPRN_HFSCR);
- unsigned long host_ciabr = mfspr(SPRN_CIABR);
- unsigned long host_dawr0 = mfspr(SPRN_DAWR0);
- unsigned long host_dawrx0 = mfspr(SPRN_DAWRX0);
- unsigned long host_psscr = mfspr(SPRN_PSSCR);
- unsigned long host_pidr = mfspr(SPRN_PID);
- unsigned long host_dawr1 = 0;
- unsigned long host_dawrx1 = 0;
-
- if (cpu_has_feature(CPU_FTR_DAWR1)) {
- host_dawr1 = mfspr(SPRN_DAWR1);
- host_dawrx1 = mfspr(SPRN_DAWRX1);
- }
+ mtspr(SPRN_DSCR, vcpu->arch.dscr);
+ mtspr(SPRN_IAMR, vcpu->arch.iamr);
+ mtspr(SPRN_PSPB, vcpu->arch.pspb);
+ mtspr(SPRN_FSCR, vcpu->arch.fscr);
+ mtspr(SPRN_TAR, vcpu->arch.tar);
+ mtspr(SPRN_EBBHR, vcpu->arch.ebbhr);
+ mtspr(SPRN_EBBRR, vcpu->arch.ebbrr);
+ mtspr(SPRN_BESCR, vcpu->arch.bescr);
+ mtspr(SPRN_WORT, vcpu->arch.wort);
+ mtspr(SPRN_TIDR, vcpu->arch.tid);
+ mtspr(SPRN_AMR, vcpu->arch.amr);
+ mtspr(SPRN_UAMOR, vcpu->arch.uamor);
/*
- * P8 and P9 suppress the HDEC exception when LPCR[HDICE] = 0,
- * so set HDICE before writing HDEC.
+ * DAR, DSISR, and for nested HV, SPRGs must be set with MSR[RI]
+ * clear (or hstate set appropriately to catch those registers
+ * being clobbered if we take a MCE or SRESET), so those are done
+ * later.
*/
- mtspr(SPRN_LPCR, vcpu->kvm->arch.host_lpcr | LPCR_HDICE);
- isync();
-
- hdec = time_limit - mftb();
- if (hdec < 0) {
- mtspr(SPRN_LPCR, vcpu->kvm->arch.host_lpcr);
- isync();
- return BOOK3S_INTERRUPT_HV_DECREMENTER;
- }
- mtspr(SPRN_HDEC, hdec);
-
- if (vc->tb_offset) {
- u64 new_tb = mftb() + vc->tb_offset;
- mtspr(SPRN_TBU40, new_tb);
- tb = mftb();
- if ((tb & 0xffffff) < (new_tb & 0xffffff))
- mtspr(SPRN_TBU40, new_tb + 0x1000000);
- vc->tb_offset_applied = vc->tb_offset;
- }
-
- if (vc->pcr)
- mtspr(SPRN_PCR, vc->pcr | PCR_MASK);
- mtspr(SPRN_DPDES, vc->dpdes);
- mtspr(SPRN_VTB, vc->vtb);
-
- local_paca->kvm_hstate.host_purr = mfspr(SPRN_PURR);
- local_paca->kvm_hstate.host_spurr = mfspr(SPRN_SPURR);
- mtspr(SPRN_PURR, vcpu->arch.purr);
- mtspr(SPRN_SPURR, vcpu->arch.spurr);
-
- if (dawr_enabled()) {
- mtspr(SPRN_DAWR0, vcpu->arch.dawr0);
- mtspr(SPRN_DAWRX0, vcpu->arch.dawrx0);
- if (cpu_has_feature(CPU_FTR_DAWR1)) {
- mtspr(SPRN_DAWR1, vcpu->arch.dawr1);
- mtspr(SPRN_DAWRX1, vcpu->arch.dawrx1);
- }
- }
- mtspr(SPRN_CIABR, vcpu->arch.ciabr);
- mtspr(SPRN_IC, vcpu->arch.ic);
- mtspr(SPRN_PID, vcpu->arch.pid);
-
- mtspr(SPRN_PSSCR, vcpu->arch.psscr | PSSCR_EC |
- (local_paca->kvm_hstate.fake_suspend << PSSCR_FAKE_SUSPEND_LG));
- mtspr(SPRN_HFSCR, vcpu->arch.hfscr);
-
- mtspr(SPRN_SPRG0, vcpu->arch.shregs.sprg0);
- mtspr(SPRN_SPRG1, vcpu->arch.shregs.sprg1);
- mtspr(SPRN_SPRG2, vcpu->arch.shregs.sprg2);
- mtspr(SPRN_SPRG3, vcpu->arch.shregs.sprg3);
-
- mtspr(SPRN_AMOR, ~0UL);
-
- mtspr(SPRN_LPCR, lpcr);
- isync();
-
- kvmppc_xive_push_vcpu(vcpu);
-
- mtspr(SPRN_SRR0, vcpu->arch.shregs.srr0);
- mtspr(SPRN_SRR1, vcpu->arch.shregs.srr1);
-
- trap = __kvmhv_vcpu_entry_p9(vcpu);
-
- /* Advance host PURR/SPURR by the amount used by guest */
- purr = mfspr(SPRN_PURR);
- spurr = mfspr(SPRN_SPURR);
- mtspr(SPRN_PURR, local_paca->kvm_hstate.host_purr +
- purr - vcpu->arch.purr);
- mtspr(SPRN_SPURR, local_paca->kvm_hstate.host_spurr +
- spurr - vcpu->arch.spurr);
- vcpu->arch.purr = purr;
- vcpu->arch.spurr = spurr;
+ if (!(vcpu->arch.ctrl & 1))
+ mtspr(SPRN_CTRLT, mfspr(SPRN_CTRLF) & ~1);
+}
- vcpu->arch.ic = mfspr(SPRN_IC);
- vcpu->arch.pid = mfspr(SPRN_PID);
- vcpu->arch.psscr = mfspr(SPRN_PSSCR) & PSSCR_GUEST_VIS;
+static void store_spr_state(struct kvm_vcpu *vcpu)
+{
+ vcpu->arch.ctrl = mfspr(SPRN_CTRLF);
- vcpu->arch.shregs.sprg0 = mfspr(SPRN_SPRG0);
- vcpu->arch.shregs.sprg1 = mfspr(SPRN_SPRG1);
- vcpu->arch.shregs.sprg2 = mfspr(SPRN_SPRG2);
- vcpu->arch.shregs.sprg3 = mfspr(SPRN_SPRG3);
+ vcpu->arch.iamr = mfspr(SPRN_IAMR);
+ vcpu->arch.pspb = mfspr(SPRN_PSPB);
+ vcpu->arch.fscr = mfspr(SPRN_FSCR);
+ vcpu->arch.tar = mfspr(SPRN_TAR);
+ vcpu->arch.ebbhr = mfspr(SPRN_EBBHR);
+ vcpu->arch.ebbrr = mfspr(SPRN_EBBRR);
+ vcpu->arch.bescr = mfspr(SPRN_BESCR);
+ vcpu->arch.wort = mfspr(SPRN_WORT);
+ vcpu->arch.tid = mfspr(SPRN_TIDR);
+ vcpu->arch.amr = mfspr(SPRN_AMR);
+ vcpu->arch.uamor = mfspr(SPRN_UAMOR);
+ vcpu->arch.dscr = mfspr(SPRN_DSCR);
+}
- /* Preserve PSSCR[FAKE_SUSPEND] until we've called kvmppc_save_tm_hv */
- mtspr(SPRN_PSSCR, host_psscr |
- (local_paca->kvm_hstate.fake_suspend << PSSCR_FAKE_SUSPEND_LG));
- mtspr(SPRN_HFSCR, host_hfscr);
- mtspr(SPRN_CIABR, host_ciabr);
- mtspr(SPRN_DAWR0, host_dawr0);
- mtspr(SPRN_DAWRX0, host_dawrx0);
- if (cpu_has_feature(CPU_FTR_DAWR1)) {
- mtspr(SPRN_DAWR1, host_dawr1);
- mtspr(SPRN_DAWRX1, host_dawrx1);
- }
- mtspr(SPRN_PID, host_pidr);
+/*
+ * Privileged (non-hypervisor) host registers to save.
+ */
+struct p9_host_os_sprs {
+ unsigned long dscr;
+ unsigned long tidr;
+ unsigned long iamr;
+ unsigned long amr;
+ unsigned long fscr;
+};
- /*
- * Since this is radix, do a eieio; tlbsync; ptesync sequence in
- * case we interrupted the guest between a tlbie and a ptesync.
- */
- asm volatile("eieio; tlbsync; ptesync");
+static void save_p9_host_os_sprs(struct p9_host_os_sprs *host_os_sprs)
+{
+ host_os_sprs->dscr = mfspr(SPRN_DSCR);
+ host_os_sprs->tidr = mfspr(SPRN_TIDR);
+ host_os_sprs->iamr = mfspr(SPRN_IAMR);
+ host_os_sprs->amr = mfspr(SPRN_AMR);
+ host_os_sprs->fscr = mfspr(SPRN_FSCR);
+}
- /*
- * cp_abort is required if the processor supports local copy-paste
- * to clear the copy buffer that was under control of the guest.
- */
- if (cpu_has_feature(CPU_FTR_ARCH_31))
- asm volatile(PPC_CP_ABORT);
+/* vcpu guest regs must already be saved */
+static void restore_p9_host_os_sprs(struct kvm_vcpu *vcpu,
+ struct p9_host_os_sprs *host_os_sprs)
+{
+ mtspr(SPRN_PSPB, 0);
+ mtspr(SPRN_WORT, 0);
+ mtspr(SPRN_UAMOR, 0);
- mtspr(SPRN_LPID, vcpu->kvm->arch.host_lpid); /* restore host LPID */
- isync();
+ mtspr(SPRN_DSCR, host_os_sprs->dscr);
+ mtspr(SPRN_TIDR, host_os_sprs->tidr);
+ mtspr(SPRN_IAMR, host_os_sprs->iamr);
- vc->dpdes = mfspr(SPRN_DPDES);
- vc->vtb = mfspr(SPRN_VTB);
- mtspr(SPRN_DPDES, 0);
- if (vc->pcr)
- mtspr(SPRN_PCR, PCR_MASK);
+ if (host_os_sprs->amr != vcpu->arch.amr)
+ mtspr(SPRN_AMR, host_os_sprs->amr);
- if (vc->tb_offset_applied) {
- u64 new_tb = mftb() - vc->tb_offset_applied;
- mtspr(SPRN_TBU40, new_tb);
- tb = mftb();
- if ((tb & 0xffffff) < (new_tb & 0xffffff))
- mtspr(SPRN_TBU40, new_tb + 0x1000000);
- vc->tb_offset_applied = 0;
- }
+ if (host_os_sprs->fscr != vcpu->arch.fscr)
+ mtspr(SPRN_FSCR, host_os_sprs->fscr);
- mtspr(SPRN_HDEC, 0x7fffffff);
- mtspr(SPRN_LPCR, vcpu->kvm->arch.host_lpcr);
+ /* Save guest CTRL register, set runlatch to 1 */
+ if (!(vcpu->arch.ctrl & 1))
+ mtspr(SPRN_CTRLT, 1);
+}
- return trap;
+static inline bool hcall_is_xics(unsigned long req)
+{
+ return req == H_EOI || req == H_CPPR || req == H_IPI ||
+ req == H_IPOLL || req == H_XIRR || req == H_XIRR_X;
}
/*
- * Virtual-mode guest entry for POWER9 and later when the host and
- * guest are both using the radix MMU. The LPIDR has already been set.
+ * Guest entry for POWER9 and later CPUs.
*/
static int kvmhv_p9_guest_entry(struct kvm_vcpu *vcpu, u64 time_limit,
unsigned long lpcr)
{
struct kvmppc_vcore *vc = vcpu->arch.vcore;
- unsigned long host_dscr = mfspr(SPRN_DSCR);
- unsigned long host_tidr = mfspr(SPRN_TIDR);
- unsigned long host_iamr = mfspr(SPRN_IAMR);
- unsigned long host_amr = mfspr(SPRN_AMR);
- unsigned long host_fscr = mfspr(SPRN_FSCR);
+ struct p9_host_os_sprs host_os_sprs;
s64 dec;
u64 tb;
int trap, save_pmu;
+ WARN_ON_ONCE(vcpu->arch.ceded);
+
dec = mfspr(SPRN_DEC);
tb = mftb();
if (dec < 0)
@@ -3664,7 +3830,7 @@ static int kvmhv_p9_guest_entry(struct kvm_vcpu *vcpu, u64 time_limit,
if (local_paca->kvm_hstate.dec_expires < time_limit)
time_limit = local_paca->kvm_hstate.dec_expires;
- vcpu->arch.ceded = 0;
+ save_p9_host_os_sprs(&host_os_sprs);
kvmhv_save_host_pmu(); /* saves it to PACA kvm_hstate */
@@ -3693,24 +3859,20 @@ static int kvmhv_p9_guest_entry(struct kvm_vcpu *vcpu, u64 time_limit,
#endif
mtspr(SPRN_VRSAVE, vcpu->arch.vrsave);
- mtspr(SPRN_DSCR, vcpu->arch.dscr);
- mtspr(SPRN_IAMR, vcpu->arch.iamr);
- mtspr(SPRN_PSPB, vcpu->arch.pspb);
- mtspr(SPRN_FSCR, vcpu->arch.fscr);
- mtspr(SPRN_TAR, vcpu->arch.tar);
- mtspr(SPRN_EBBHR, vcpu->arch.ebbhr);
- mtspr(SPRN_EBBRR, vcpu->arch.ebbrr);
- mtspr(SPRN_BESCR, vcpu->arch.bescr);
- mtspr(SPRN_WORT, vcpu->arch.wort);
- mtspr(SPRN_TIDR, vcpu->arch.tid);
- mtspr(SPRN_DAR, vcpu->arch.shregs.dar);
- mtspr(SPRN_DSISR, vcpu->arch.shregs.dsisr);
- mtspr(SPRN_AMR, vcpu->arch.amr);
- mtspr(SPRN_UAMOR, vcpu->arch.uamor);
-
- if (!(vcpu->arch.ctrl & 1))
- mtspr(SPRN_CTRLT, mfspr(SPRN_CTRLF) & ~1);
+ load_spr_state(vcpu);
+ /*
+ * When setting DEC, we must always deal with irq_work_raise via NMI vs
+ * setting DEC. The problem occurs right as we switch into guest mode
+ * if a NMI hits and sets pending work and sets DEC, then that will
+ * apply to the guest and not bring us back to the host.
+ *
+ * irq_work_raise could check a flag (or possibly LPCR[HDICE] for
+ * example) and set HDEC to 1? That wouldn't solve the nested hv
+ * case which needs to abort the hcall or zero the time limit.
+ *
+ * XXX: Another day's problem.
+ */
mtspr(SPRN_DEC, vcpu->arch.dec_expires - mftb());
if (kvmhv_on_pseries()) {
@@ -3718,7 +3880,7 @@ static int kvmhv_p9_guest_entry(struct kvm_vcpu *vcpu, u64 time_limit,
* We need to save and restore the guest visible part of the
* psscr (i.e. using SPRN_PSSCR_PR) since the hypervisor
* doesn't do this for us. Note only required if pseries since
- * this is done in kvmhv_load_hv_regs_and_go() below otherwise.
+ * this is done in kvmhv_vcpu_entry_p9() below otherwise.
*/
unsigned long host_psscr;
/* call our hypervisor to load up HV regs and go */
@@ -3738,6 +3900,8 @@ static int kvmhv_p9_guest_entry(struct kvm_vcpu *vcpu, u64 time_limit,
hvregs.vcpu_token = vcpu->vcpu_id;
}
hvregs.hdec_expiry = time_limit;
+ mtspr(SPRN_DAR, vcpu->arch.shregs.dar);
+ mtspr(SPRN_DSISR, vcpu->arch.shregs.dsisr);
trap = plpar_hcall_norets(H_ENTER_NESTED, __pa(&hvregs),
__pa(&vcpu->arch.regs));
kvmhv_restore_hv_return_state(vcpu, &hvregs);
@@ -3750,15 +3914,41 @@ static int kvmhv_p9_guest_entry(struct kvm_vcpu *vcpu, u64 time_limit,
/* H_CEDE has to be handled now, not later */
if (trap == BOOK3S_INTERRUPT_SYSCALL && !vcpu->arch.nested &&
kvmppc_get_gpr(vcpu, 3) == H_CEDE) {
- kvmppc_nested_cede(vcpu);
+ kvmppc_cede(vcpu);
kvmppc_set_gpr(vcpu, 3, 0);
trap = 0;
}
} else {
- trap = kvmhv_load_hv_regs_and_go(vcpu, time_limit, lpcr);
+ kvmppc_xive_push_vcpu(vcpu);
+ trap = kvmhv_vcpu_entry_p9(vcpu, time_limit, lpcr);
+ if (trap == BOOK3S_INTERRUPT_SYSCALL && !vcpu->arch.nested &&
+ !(vcpu->arch.shregs.msr & MSR_PR)) {
+ unsigned long req = kvmppc_get_gpr(vcpu, 3);
+
+ /* H_CEDE has to be handled now, not later */
+ if (req == H_CEDE) {
+ kvmppc_cede(vcpu);
+ kvmppc_xive_rearm_escalation(vcpu); /* may un-cede */
+ kvmppc_set_gpr(vcpu, 3, 0);
+ trap = 0;
+
+ /* XICS hcalls must be handled before xive is pulled */
+ } else if (hcall_is_xics(req)) {
+ int ret;
+
+ ret = kvmppc_xive_xics_hcall(vcpu, req);
+ if (ret != H_TOO_HARD) {
+ kvmppc_set_gpr(vcpu, 3, ret);
+ trap = 0;
+ }
+ }
+ }
+ kvmppc_xive_pull_vcpu(vcpu);
+
+ if (kvm_is_radix(vcpu->kvm))
+ vcpu->arch.slb_max = 0;
}
- vcpu->arch.slb_max = 0;
dec = mfspr(SPRN_DEC);
if (!(lpcr & LPCR_LD)) /* Sign extend if not using large decrementer */
dec = (s32) dec;
@@ -3766,36 +3956,10 @@ static int kvmhv_p9_guest_entry(struct kvm_vcpu *vcpu, u64 time_limit,
vcpu->arch.dec_expires = dec + tb;
vcpu->cpu = -1;
vcpu->arch.thread_cpu = -1;
- /* Save guest CTRL register, set runlatch to 1 */
- vcpu->arch.ctrl = mfspr(SPRN_CTRLF);
- if (!(vcpu->arch.ctrl & 1))
- mtspr(SPRN_CTRLT, vcpu->arch.ctrl | 1);
- vcpu->arch.iamr = mfspr(SPRN_IAMR);
- vcpu->arch.pspb = mfspr(SPRN_PSPB);
- vcpu->arch.fscr = mfspr(SPRN_FSCR);
- vcpu->arch.tar = mfspr(SPRN_TAR);
- vcpu->arch.ebbhr = mfspr(SPRN_EBBHR);
- vcpu->arch.ebbrr = mfspr(SPRN_EBBRR);
- vcpu->arch.bescr = mfspr(SPRN_BESCR);
- vcpu->arch.wort = mfspr(SPRN_WORT);
- vcpu->arch.tid = mfspr(SPRN_TIDR);
- vcpu->arch.amr = mfspr(SPRN_AMR);
- vcpu->arch.uamor = mfspr(SPRN_UAMOR);
- vcpu->arch.dscr = mfspr(SPRN_DSCR);
+ store_spr_state(vcpu);
- mtspr(SPRN_PSPB, 0);
- mtspr(SPRN_WORT, 0);
- mtspr(SPRN_UAMOR, 0);
- mtspr(SPRN_DSCR, host_dscr);
- mtspr(SPRN_TIDR, host_tidr);
- mtspr(SPRN_IAMR, host_iamr);
-
- if (host_amr != vcpu->arch.amr)
- mtspr(SPRN_AMR, host_amr);
-
- if (host_fscr != vcpu->arch.fscr)
- mtspr(SPRN_FSCR, host_fscr);
+ restore_p9_host_os_sprs(vcpu, &host_os_sprs);
msr_check_and_set(MSR_FP | MSR_VEC | MSR_VSX);
store_fp_state(&vcpu->arch.fp);
@@ -3825,6 +3989,9 @@ static int kvmhv_p9_guest_entry(struct kvm_vcpu *vcpu, u64 time_limit,
vc->in_guest = 0;
mtspr(SPRN_DEC, local_paca->kvm_hstate.dec_expires - mftb());
+ /* We may have raced with new irq work */
+ if (test_irq_work_pending())
+ set_dec(1);
mtspr(SPRN_SPRG_VDSO_WRITE, local_paca->sprg_vdso);
kvmhv_load_host_pmu();
@@ -3925,7 +4092,7 @@ static void kvmppc_vcore_blocked(struct kvmppc_vcore *vc)
cur = start_poll = ktime_get();
if (vc->halt_poll_ns) {
ktime_t stop = ktime_add_ns(start_poll, vc->halt_poll_ns);
- ++vc->runner->stat.halt_attempted_poll;
+ ++vc->runner->stat.generic.halt_attempted_poll;
vc->vcore_state = VCORE_POLLING;
spin_unlock(&vc->lock);
@@ -3942,7 +4109,7 @@ static void kvmppc_vcore_blocked(struct kvmppc_vcore *vc)
vc->vcore_state = VCORE_INACTIVE;
if (!do_sleep) {
- ++vc->runner->stat.halt_successful_poll;
+ ++vc->runner->stat.generic.halt_successful_poll;
goto out;
}
}
@@ -3954,7 +4121,7 @@ static void kvmppc_vcore_blocked(struct kvmppc_vcore *vc)
do_sleep = 0;
/* If we polled, count this as a successful poll */
if (vc->halt_poll_ns)
- ++vc->runner->stat.halt_successful_poll;
+ ++vc->runner->stat.generic.halt_successful_poll;
goto out;
}
@@ -3981,13 +4148,13 @@ out:
ktime_to_ns(cur) - ktime_to_ns(start_wait);
/* Attribute failed poll time */
if (vc->halt_poll_ns)
- vc->runner->stat.halt_poll_fail_ns +=
+ vc->runner->stat.generic.halt_poll_fail_ns +=
ktime_to_ns(start_wait) -
ktime_to_ns(start_poll);
} else {
/* Attribute successful poll time */
if (vc->halt_poll_ns)
- vc->runner->stat.halt_poll_success_ns +=
+ vc->runner->stat.generic.halt_poll_success_ns +=
ktime_to_ns(cur) -
ktime_to_ns(start_poll);
}
@@ -4014,7 +4181,6 @@ out:
/*
* This never fails for a radix guest, as none of the operations it does
* for a radix guest can fail or have a way to report failure.
- * kvmhv_run_single_vcpu() relies on this fact.
*/
static int kvmhv_setup_mmu(struct kvm_vcpu *vcpu)
{
@@ -4170,7 +4336,7 @@ int kvmhv_run_single_vcpu(struct kvm_vcpu *vcpu, u64 time_limit,
{
struct kvm_run *run = vcpu->run;
int trap, r, pcpu;
- int srcu_idx, lpid;
+ int srcu_idx;
struct kvmppc_vcore *vc;
struct kvm *kvm = vcpu->kvm;
struct kvm_nested_guest *nested = vcpu->arch.nested;
@@ -4193,8 +4359,15 @@ int kvmhv_run_single_vcpu(struct kvm_vcpu *vcpu, u64 time_limit,
vc->runner = vcpu;
/* See if the MMU is ready to go */
- if (!kvm->arch.mmu_ready)
- kvmhv_setup_mmu(vcpu);
+ if (!kvm->arch.mmu_ready) {
+ r = kvmhv_setup_mmu(vcpu);
+ if (r) {
+ run->exit_reason = KVM_EXIT_FAIL_ENTRY;
+ run->fail_entry.hardware_entry_failure_reason = 0;
+ vcpu->arch.ret = r;
+ return r;
+ }
+ }
if (need_resched())
cond_resched();
@@ -4207,7 +4380,8 @@ int kvmhv_run_single_vcpu(struct kvm_vcpu *vcpu, u64 time_limit,
preempt_disable();
pcpu = smp_processor_id();
vc->pcpu = pcpu;
- kvmppc_prepare_radix_vcpu(vcpu, pcpu);
+ if (kvm_is_radix(kvm))
+ kvmppc_prepare_radix_vcpu(vcpu, pcpu);
local_irq_disable();
hard_irq_disable();
@@ -4244,13 +4418,6 @@ int kvmhv_run_single_vcpu(struct kvm_vcpu *vcpu, u64 time_limit,
vc->vcore_state = VCORE_RUNNING;
trace_kvmppc_run_core(vc, 0);
- if (cpu_has_feature(CPU_FTR_HVMODE)) {
- lpid = nested ? nested->shadow_lpid : kvm->arch.lpid;
- mtspr(SPRN_LPID, lpid);
- isync();
- kvmppc_check_need_tlb_flush(kvm, pcpu, nested);
- }
-
guest_enter_irqoff();
srcu_idx = srcu_read_lock(&kvm->srcu);
@@ -4269,11 +4436,6 @@ int kvmhv_run_single_vcpu(struct kvm_vcpu *vcpu, u64 time_limit,
srcu_read_unlock(&kvm->srcu, srcu_idx);
- if (cpu_has_feature(CPU_FTR_HVMODE)) {
- mtspr(SPRN_LPID, kvm->arch.host_lpid);
- isync();
- }
-
set_irq_happened(trap);
kvmppc_set_host_core(pcpu);
@@ -4419,19 +4581,23 @@ static int kvmppc_vcpu_run_hv(struct kvm_vcpu *vcpu)
vcpu->arch.state = KVMPPC_VCPU_BUSY_IN_HOST;
do {
- /*
- * The TLB prefetch bug fixup is only in the kvmppc_run_vcpu
- * path, which also handles hash and dependent threads mode.
- */
- if (kvm->arch.threads_indep && kvm_is_radix(kvm) &&
- !cpu_has_feature(CPU_FTR_P9_RADIX_PREFETCH_BUG))
+ if (cpu_has_feature(CPU_FTR_ARCH_300))
r = kvmhv_run_single_vcpu(vcpu, ~(u64)0,
vcpu->arch.vcore->lpcr);
else
r = kvmppc_run_vcpu(vcpu);
- if (run->exit_reason == KVM_EXIT_PAPR_HCALL &&
- !(vcpu->arch.shregs.msr & MSR_PR)) {
+ if (run->exit_reason == KVM_EXIT_PAPR_HCALL) {
+ if (WARN_ON_ONCE(vcpu->arch.shregs.msr & MSR_PR)) {
+ /*
+ * These should have been caught reflected
+ * into the guest by now. Final sanity check:
+ * don't allow userspace to execute hcalls in
+ * the hypervisor.
+ */
+ r = RESUME_GUEST;
+ continue;
+ }
trace_kvm_hcall_enter(vcpu);
r = kvmppc_pseries_do_hcall(vcpu);
trace_kvm_hcall_exit(vcpu, r);
@@ -4460,6 +4626,9 @@ static int kvmppc_vcpu_run_hv(struct kvm_vcpu *vcpu)
vcpu->arch.state = KVMPPC_VCPU_NOTREADY;
atomic_dec(&kvm->arch.vcpus_running);
+
+ srr_regs_clobbered();
+
return r;
}
@@ -4758,8 +4927,8 @@ static int kvmppc_hv_setup_htab_rma(struct kvm_vcpu *vcpu)
/* Look up the VMA for the start of this memory slot */
hva = memslot->userspace_addr;
mmap_read_lock(kvm->mm);
- vma = find_vma(kvm->mm, hva);
- if (!vma || vma->vm_start > hva || (vma->vm_flags & VM_IO))
+ vma = vma_lookup(kvm->mm, hva);
+ if (!vma || (vma->vm_flags & VM_IO))
goto up_out;
psize = vma_kernel_pagesize(vma);
@@ -5038,18 +5207,8 @@ static int kvmppc_core_init_vm_hv(struct kvm *kvm)
/*
* Track that we now have a HV mode VM active. This blocks secondary
* CPU threads from coming online.
- * On POWER9, we only need to do this if the "indep_threads_mode"
- * module parameter has been set to N.
*/
- if (cpu_has_feature(CPU_FTR_ARCH_300)) {
- if (!indep_threads_mode && !cpu_has_feature(CPU_FTR_HVMODE)) {
- pr_warn("KVM: Ignoring indep_threads_mode=N in nested hypervisor\n");
- kvm->arch.threads_indep = true;
- } else {
- kvm->arch.threads_indep = indep_threads_mode;
- }
- }
- if (!kvm->arch.threads_indep)
+ if (!cpu_has_feature(CPU_FTR_ARCH_300))
kvm_hv_vm_activated();
/*
@@ -5090,7 +5249,7 @@ static void kvmppc_core_destroy_vm_hv(struct kvm *kvm)
{
debugfs_remove_recursive(kvm->arch.debugfs_dir);
- if (!kvm->arch.threads_indep)
+ if (!cpu_has_feature(CPU_FTR_ARCH_300))
kvm_hv_vm_deactivated();
kvmppc_free_vcores(kvm);
@@ -5511,7 +5670,9 @@ static int kvmhv_enable_nested(struct kvm *kvm)
{
if (!nested)
return -EPERM;
- if (!cpu_has_feature(CPU_FTR_ARCH_300) || no_mixing_hpt_and_radix)
+ if (!cpu_has_feature(CPU_FTR_ARCH_300))
+ return -ENODEV;
+ if (!radix_enabled())
return -ENODEV;
/* kvm == NULL means the caller is testing if the capability exists */
@@ -5674,11 +5835,25 @@ static int kvmhv_enable_dawr1(struct kvm *kvm)
static bool kvmppc_hash_v3_possible(void)
{
- if (radix_enabled() && no_mixing_hpt_and_radix)
+ if (!cpu_has_feature(CPU_FTR_ARCH_300))
+ return false;
+
+ if (!cpu_has_feature(CPU_FTR_HVMODE))
return false;
- return cpu_has_feature(CPU_FTR_ARCH_300) &&
- cpu_has_feature(CPU_FTR_HVMODE);
+ /*
+ * POWER9 chips before version 2.02 can't have some threads in
+ * HPT mode and some in radix mode on the same core.
+ */
+ if (radix_enabled()) {
+ unsigned int pvr = mfspr(SPRN_PVR);
+ if ((pvr >> 16) == PVR_POWER9 &&
+ (((pvr & 0xe000) == 0 && (pvr & 0xfff) < 0x202) ||
+ ((pvr & 0xe000) == 0x2000 && (pvr & 0xfff) < 0x101)))
+ return false;
+ }
+
+ return true;
}
static struct kvmppc_ops kvm_ops_hv = {
@@ -5822,18 +5997,6 @@ static int kvmppc_book3s_init_hv(void)
if (kvmppc_radix_possible())
r = kvmppc_radix_init();
- /*
- * POWER9 chips before version 2.02 can't have some threads in
- * HPT mode and some in radix mode on the same core.
- */
- if (cpu_has_feature(CPU_FTR_ARCH_300)) {
- unsigned int pvr = mfspr(SPRN_PVR);
- if ((pvr >> 16) == PVR_POWER9 &&
- (((pvr & 0xe000) == 0 && (pvr & 0xfff) < 0x202) ||
- ((pvr & 0xe000) == 0x2000 && (pvr & 0xfff) < 0x101)))
- no_mixing_hpt_and_radix = true;
- }
-
r = kvmppc_uvmem_init();
if (r < 0)
pr_err("KVM-HV: kvmppc_uvmem_init failed %d\n", r);