aboutsummaryrefslogtreecommitdiffstats
path: root/arch/powerpc/kvm/book3s_hv.c
diff options
context:
space:
mode:
Diffstat (limited to '')
-rw-r--r--arch/powerpc/kvm/book3s_hv.c146
1 files changed, 105 insertions, 41 deletions
diff --git a/arch/powerpc/kvm/book3s_hv.c b/arch/powerpc/kvm/book3s_hv.c
index 57d0835e56fd..6ba68dd6190b 100644
--- a/arch/powerpc/kvm/book3s_hv.c
+++ b/arch/powerpc/kvm/book3s_hv.c
@@ -249,6 +249,7 @@ static void kvmppc_fast_vcpu_kick_hv(struct kvm_vcpu *vcpu)
/*
* We use the vcpu_load/put functions to measure stolen time.
+ *
* Stolen time is counted as time when either the vcpu is able to
* run as part of a virtual core, but the task running the vcore
* is preempted or sleeping, or when the vcpu needs something done
@@ -278,6 +279,12 @@ static void kvmppc_fast_vcpu_kick_hv(struct kvm_vcpu *vcpu)
* lock. The stolen times are measured in units of timebase ticks.
* (Note that the != TB_NIL checks below are purely defensive;
* they should never fail.)
+ *
+ * The POWER9 path is simpler, one vcpu per virtual core so the
+ * former case does not exist. If a vcpu is preempted when it is
+ * BUSY_IN_HOST and not ceded or otherwise blocked, then accumulate
+ * the stolen cycles in busy_stolen. RUNNING is not a preemptible
+ * state in the P9 path.
*/
static void kvmppc_core_start_stolen(struct kvmppc_vcore *vc, u64 tb)
@@ -311,8 +318,14 @@ static void kvmppc_core_vcpu_load_hv(struct kvm_vcpu *vcpu, int cpu)
unsigned long flags;
u64 now;
- if (cpu_has_feature(CPU_FTR_ARCH_300))
+ if (cpu_has_feature(CPU_FTR_ARCH_300)) {
+ if (vcpu->arch.busy_preempt != TB_NIL) {
+ WARN_ON_ONCE(vcpu->arch.state != KVMPPC_VCPU_BUSY_IN_HOST);
+ vc->stolen_tb += mftb() - vcpu->arch.busy_preempt;
+ vcpu->arch.busy_preempt = TB_NIL;
+ }
return;
+ }
now = mftb();
@@ -340,8 +353,21 @@ static void kvmppc_core_vcpu_put_hv(struct kvm_vcpu *vcpu)
unsigned long flags;
u64 now;
- if (cpu_has_feature(CPU_FTR_ARCH_300))
+ if (cpu_has_feature(CPU_FTR_ARCH_300)) {
+ /*
+ * In the P9 path, RUNNABLE is not preemptible
+ * (nor takes host interrupts)
+ */
+ WARN_ON_ONCE(vcpu->arch.state == KVMPPC_VCPU_RUNNABLE);
+ /*
+ * Account stolen time when preempted while the vcpu task is
+ * running in the kernel (but not in qemu, which is INACTIVE).
+ */
+ if (task_is_running(current) &&
+ vcpu->arch.state == KVMPPC_VCPU_BUSY_IN_HOST)
+ vcpu->arch.busy_preempt = mftb();
return;
+ }
now = mftb();
@@ -707,16 +733,15 @@ static u64 vcore_stolen_time(struct kvmppc_vcore *vc, u64 now)
}
static void __kvmppc_create_dtl_entry(struct kvm_vcpu *vcpu,
+ struct lppaca *vpa,
unsigned int pcpu, u64 now,
unsigned long stolen)
{
struct dtl_entry *dt;
- struct lppaca *vpa;
dt = vcpu->arch.dtl_ptr;
- vpa = vcpu->arch.vpa.pinned_addr;
- if (!dt || !vpa)
+ if (!dt)
return;
dt->dispatch_reason = 7;
@@ -737,17 +762,23 @@ static void __kvmppc_create_dtl_entry(struct kvm_vcpu *vcpu,
/* order writing *dt vs. writing vpa->dtl_idx */
smp_wmb();
vpa->dtl_idx = cpu_to_be64(++vcpu->arch.dtl_index);
- vcpu->arch.dtl.dirty = true;
+
+ /* vcpu->arch.dtl.dirty is set by the caller */
}
-static void kvmppc_create_dtl_entry(struct kvm_vcpu *vcpu,
- struct kvmppc_vcore *vc)
+static void kvmppc_update_vpa_dispatch(struct kvm_vcpu *vcpu,
+ struct kvmppc_vcore *vc)
{
+ struct lppaca *vpa;
unsigned long stolen;
unsigned long core_stolen;
u64 now;
unsigned long flags;
+ vpa = vcpu->arch.vpa.pinned_addr;
+ if (!vpa)
+ return;
+
now = mftb();
core_stolen = vcore_stolen_time(vc, now);
@@ -758,7 +789,34 @@ static void kvmppc_create_dtl_entry(struct kvm_vcpu *vcpu,
vcpu->arch.busy_stolen = 0;
spin_unlock_irqrestore(&vcpu->arch.tbacct_lock, flags);
- __kvmppc_create_dtl_entry(vcpu, vc->pcpu, now + vc->tb_offset, stolen);
+ vpa->enqueue_dispatch_tb = cpu_to_be64(be64_to_cpu(vpa->enqueue_dispatch_tb) + stolen);
+
+ __kvmppc_create_dtl_entry(vcpu, vpa, vc->pcpu, now + vc->tb_offset, stolen);
+
+ vcpu->arch.vpa.dirty = true;
+}
+
+static void kvmppc_update_vpa_dispatch_p9(struct kvm_vcpu *vcpu,
+ struct kvmppc_vcore *vc,
+ u64 now)
+{
+ struct lppaca *vpa;
+ unsigned long stolen;
+ unsigned long stolen_delta;
+
+ vpa = vcpu->arch.vpa.pinned_addr;
+ if (!vpa)
+ return;
+
+ stolen = vc->stolen_tb;
+ stolen_delta = stolen - vcpu->arch.stolen_logged;
+ vcpu->arch.stolen_logged = stolen;
+
+ vpa->enqueue_dispatch_tb = cpu_to_be64(stolen);
+
+ __kvmppc_create_dtl_entry(vcpu, vpa, vc->pcpu, now, stolen_delta);
+
+ vcpu->arch.vpa.dirty = true;
}
/* See if there is a doorbell interrupt pending for a vcpu */
@@ -2517,10 +2575,24 @@ static int kvmppc_set_one_reg_hv(struct kvm_vcpu *vcpu, u64 id,
r = set_vpa(vcpu, &vcpu->arch.dtl, addr, len);
break;
case KVM_REG_PPC_TB_OFFSET:
+ {
/* round up to multiple of 2^24 */
- vcpu->arch.vcore->tb_offset =
- ALIGN(set_reg_val(id, *val), 1UL << 24);
+ u64 tb_offset = ALIGN(set_reg_val(id, *val), 1UL << 24);
+
+ /*
+ * Now that we know the timebase offset, update the
+ * decrementer expiry with a guest timebase value. If
+ * the userspace does not set DEC_EXPIRY, this ensures
+ * a migrated vcpu at least starts with an expired
+ * decrementer, which is better than a large one that
+ * causes a hang.
+ */
+ if (!vcpu->arch.dec_expires && tb_offset)
+ vcpu->arch.dec_expires = get_tb() + tb_offset;
+
+ vcpu->arch.vcore->tb_offset = tb_offset;
break;
+ }
case KVM_REG_PPC_LPCR:
kvmppc_set_lpcr(vcpu, set_reg_val(id, *val), true);
break;
@@ -3800,7 +3872,7 @@ static noinline void kvmppc_run_core(struct kvmppc_vcore *vc)
* kvmppc_core_prepare_to_enter.
*/
kvmppc_start_thread(vcpu, pvc);
- kvmppc_create_dtl_entry(vcpu, pvc);
+ kvmppc_update_vpa_dispatch(vcpu, pvc);
trace_kvm_guest_enter(vcpu);
if (!vcpu->arch.ptid)
thr0_done = true;
@@ -3840,23 +3912,17 @@ static noinline void kvmppc_run_core(struct kvmppc_vcore *vc)
for (sub = 0; sub < core_info.n_subcores; ++sub)
spin_unlock(&core_info.vc[sub]->lock);
- guest_enter_irqoff();
+ guest_timing_enter_irqoff();
srcu_idx = srcu_read_lock(&vc->kvm->srcu);
+ guest_state_enter_irqoff();
this_cpu_disable_ftrace();
- /*
- * Interrupts will be enabled once we get into the guest,
- * so tell lockdep that we're about to enable interrupts.
- */
- trace_hardirqs_on();
-
trap = __kvmppc_vcore_entry();
- trace_hardirqs_off();
-
this_cpu_enable_ftrace();
+ guest_state_exit_irqoff();
srcu_read_unlock(&vc->kvm->srcu, srcu_idx);
@@ -3891,11 +3957,10 @@ static noinline void kvmppc_run_core(struct kvmppc_vcore *vc)
kvmppc_set_host_core(pcpu);
- context_tracking_guest_exit();
if (!vtime_accounting_enabled_this_cpu()) {
local_irq_enable();
/*
- * Service IRQs here before vtime_account_guest_exit() so any
+ * Service IRQs here before guest_timing_exit_irqoff() so any
* ticks that occurred while running the guest are accounted to
* the guest. If vtime accounting is enabled, accounting uses
* TB rather than ticks, so it can be done without enabling
@@ -3904,7 +3969,7 @@ static noinline void kvmppc_run_core(struct kvmppc_vcore *vc)
*/
local_irq_disable();
}
- vtime_account_guest_exit();
+ guest_timing_exit_irqoff();
local_irq_enable();
@@ -4404,7 +4469,7 @@ static int kvmppc_run_vcpu(struct kvm_vcpu *vcpu)
if ((vc->vcore_state == VCORE_PIGGYBACK ||
vc->vcore_state == VCORE_RUNNING) &&
!VCORE_IS_EXITING(vc)) {
- kvmppc_create_dtl_entry(vcpu, vc);
+ kvmppc_update_vpa_dispatch(vcpu, vc);
kvmppc_start_thread(vcpu, vc);
trace_kvm_guest_enter(vcpu);
} else if (vc->vcore_state == VCORE_SLEEPING) {
@@ -4520,7 +4585,6 @@ int kvmhv_run_single_vcpu(struct kvm_vcpu *vcpu, u64 time_limit,
vc = vcpu->arch.vcore;
vcpu->arch.ceded = 0;
vcpu->arch.run_task = current;
- vcpu->arch.state = KVMPPC_VCPU_RUNNABLE;
vcpu->arch.last_inst = KVM_INST_FETCH_FAILED;
/* See if the MMU is ready to go */
@@ -4547,6 +4611,8 @@ int kvmhv_run_single_vcpu(struct kvm_vcpu *vcpu, u64 time_limit,
/* flags save not required, but irq_pmu has no disable/enable API */
powerpc_local_irq_pmu_save(flags);
+ vcpu->arch.state = KVMPPC_VCPU_RUNNABLE;
+
if (signal_pending(current))
goto sigpend;
if (need_resched() || !kvm->arch.mmu_ready)
@@ -4591,47 +4657,44 @@ int kvmhv_run_single_vcpu(struct kvm_vcpu *vcpu, u64 time_limit,
tb = mftb();
- __kvmppc_create_dtl_entry(vcpu, pcpu, tb + vc->tb_offset, 0);
+ kvmppc_update_vpa_dispatch_p9(vcpu, vc, tb + vc->tb_offset);
trace_kvm_guest_enter(vcpu);
- guest_enter_irqoff();
+ guest_timing_enter_irqoff();
srcu_idx = srcu_read_lock(&kvm->srcu);
+ guest_state_enter_irqoff();
this_cpu_disable_ftrace();
- /* Tell lockdep that we're about to enable interrupts */
- trace_hardirqs_on();
-
trap = kvmhv_p9_guest_entry(vcpu, time_limit, lpcr, &tb);
vcpu->arch.trap = trap;
- trace_hardirqs_off();
-
this_cpu_enable_ftrace();
+ guest_state_exit_irqoff();
srcu_read_unlock(&kvm->srcu, srcu_idx);
set_irq_happened(trap);
- context_tracking_guest_exit();
+ vcpu->cpu = -1;
+ vcpu->arch.thread_cpu = -1;
+ vcpu->arch.state = KVMPPC_VCPU_BUSY_IN_HOST;
+
if (!vtime_accounting_enabled_this_cpu()) {
- local_irq_enable();
+ powerpc_local_irq_pmu_restore(flags);
/*
- * Service IRQs here before vtime_account_guest_exit() so any
+ * Service IRQs here before guest_timing_exit_irqoff() so any
* ticks that occurred while running the guest are accounted to
* the guest. If vtime accounting is enabled, accounting uses
* TB rather than ticks, so it can be done without enabling
* interrupts here, which has the problem that it accounts
* interrupt processing overhead to the host.
*/
- local_irq_disable();
+ powerpc_local_irq_pmu_save(flags);
}
- vtime_account_guest_exit();
-
- vcpu->cpu = -1;
- vcpu->arch.thread_cpu = -1;
+ guest_timing_exit_irqoff();
powerpc_local_irq_pmu_restore(flags);
@@ -4694,6 +4757,7 @@ int kvmhv_run_single_vcpu(struct kvm_vcpu *vcpu, u64 time_limit,
out:
vcpu->cpu = -1;
vcpu->arch.thread_cpu = -1;
+ vcpu->arch.state = KVMPPC_VCPU_BUSY_IN_HOST;
powerpc_local_irq_pmu_restore(flags);
preempt_enable();
goto done;