diff options
Diffstat (limited to 'arch/powerpc/kvm')
-rw-r--r-- | arch/powerpc/kvm/Kconfig | 7 | ||||
-rw-r--r-- | arch/powerpc/kvm/book3s.c | 8 | ||||
-rw-r--r-- | arch/powerpc/kvm/book3s_32_mmu.c | 1 | ||||
-rw-r--r-- | arch/powerpc/kvm/book3s_64_mmu_radix.c | 12 | ||||
-rw-r--r-- | arch/powerpc/kvm/book3s_64_vio.c | 77 | ||||
-rw-r--r-- | arch/powerpc/kvm/book3s_64_vio_hv.c | 42 | ||||
-rw-r--r-- | arch/powerpc/kvm/book3s_hv.c | 56 | ||||
-rw-r--r-- | arch/powerpc/kvm/book3s_hv_builtin.c | 6 | ||||
-rw-r--r-- | arch/powerpc/kvm/book3s_hv_nested.c | 4 | ||||
-rw-r--r-- | arch/powerpc/kvm/book3s_hv_rm_mmu.c | 2 | ||||
-rw-r--r-- | arch/powerpc/kvm/book3s_hv_rmhandlers.S | 77 | ||||
-rw-r--r-- | arch/powerpc/kvm/book3s_hv_tm.c | 6 | ||||
-rw-r--r-- | arch/powerpc/kvm/book3s_xics.c | 2 | ||||
-rw-r--r-- | arch/powerpc/kvm/book3s_xive.c | 64 | ||||
-rw-r--r-- | arch/powerpc/kvm/book3s_xive.h | 2 | ||||
-rw-r--r-- | arch/powerpc/kvm/book3s_xive_native.c | 27 | ||||
-rw-r--r-- | arch/powerpc/kvm/e500.c | 3 | ||||
-rw-r--r-- | arch/powerpc/kvm/emulate.c | 1 | ||||
-rw-r--r-- | arch/powerpc/kvm/emulate_loadstore.c | 6 | ||||
-rw-r--r-- | arch/powerpc/kvm/powerpc.c | 22 |
20 files changed, 275 insertions, 150 deletions
diff --git a/arch/powerpc/kvm/Kconfig b/arch/powerpc/kvm/Kconfig index f53997a8ca62..711fca9bc6f0 100644 --- a/arch/powerpc/kvm/Kconfig +++ b/arch/powerpc/kvm/Kconfig @@ -38,6 +38,7 @@ config KVM_BOOK3S_32_HANDLER config KVM_BOOK3S_64_HANDLER bool select KVM_BOOK3S_HANDLER + select PPC_DAWR_FORCE_ENABLE config KVM_BOOK3S_PR_POSSIBLE bool @@ -183,9 +184,9 @@ config KVM_MPIC select HAVE_KVM_MSI help Enable support for emulating MPIC devices inside the - host kernel, rather than relying on userspace to emulate. - Currently, support is limited to certain versions of - Freescale's MPIC implementation. + host kernel, rather than relying on userspace to emulate. + Currently, support is limited to certain versions of + Freescale's MPIC implementation. config KVM_XICS bool "KVM in-kernel XICS emulation" diff --git a/arch/powerpc/kvm/book3s.c b/arch/powerpc/kvm/book3s.c index 9524d92bc45d..d7fcdfa7fee4 100644 --- a/arch/powerpc/kvm/book3s.c +++ b/arch/powerpc/kvm/book3s.c @@ -1083,9 +1083,11 @@ static int kvmppc_book3s_init(void) if (xics_on_xive()) { kvmppc_xive_init_module(); kvm_register_device_ops(&kvm_xive_ops, KVM_DEV_TYPE_XICS); - kvmppc_xive_native_init_module(); - kvm_register_device_ops(&kvm_xive_native_ops, - KVM_DEV_TYPE_XIVE); + if (kvmppc_xive_native_supported()) { + kvmppc_xive_native_init_module(); + kvm_register_device_ops(&kvm_xive_native_ops, + KVM_DEV_TYPE_XIVE); + } } else #endif kvm_register_device_ops(&kvm_xics_ops, KVM_DEV_TYPE_XICS); diff --git a/arch/powerpc/kvm/book3s_32_mmu.c b/arch/powerpc/kvm/book3s_32_mmu.c index 653936177857..18f244aad7aa 100644 --- a/arch/powerpc/kvm/book3s_32_mmu.c +++ b/arch/powerpc/kvm/book3s_32_mmu.c @@ -239,6 +239,7 @@ static int kvmppc_mmu_book3s_32_xlate_pte(struct kvm_vcpu *vcpu, gva_t eaddr, case 2: case 6: pte->may_write = true; + /* fall through */ case 3: case 5: case 7: diff --git a/arch/powerpc/kvm/book3s_64_mmu_radix.c b/arch/powerpc/kvm/book3s_64_mmu_radix.c index 08b2dfbc5305..2d415c36a61d 100644 --- a/arch/powerpc/kvm/book3s_64_mmu_radix.c +++ b/arch/powerpc/kvm/book3s_64_mmu_radix.c @@ -361,12 +361,6 @@ static void kvmppc_pte_free(pte_t *ptep) kmem_cache_free(kvm_pte_cache, ptep); } -/* Like pmd_huge() and pmd_large(), but works regardless of config options */ -static inline int pmd_is_leaf(pmd_t pmd) -{ - return !!(pmd_val(pmd) & _PAGE_PTE); -} - static pmd_t *kvmppc_pmd_alloc(void) { return kmem_cache_alloc(kvm_pmd_cache, GFP_KERNEL); @@ -487,7 +481,7 @@ static void kvmppc_unmap_free_pud(struct kvm *kvm, pud_t *pud, for (iu = 0; iu < PTRS_PER_PUD; ++iu, ++p) { if (!pud_present(*p)) continue; - if (pud_huge(*p)) { + if (pud_is_leaf(*p)) { pud_clear(p); } else { pmd_t *pmd; @@ -586,7 +580,7 @@ int kvmppc_create_pte(struct kvm *kvm, pgd_t *pgtable, pte_t pte, new_pud = pud_alloc_one(kvm->mm, gpa); pmd = NULL; - if (pud && pud_present(*pud) && !pud_huge(*pud)) + if (pud && pud_present(*pud) && !pud_is_leaf(*pud)) pmd = pmd_offset(pud, gpa); else if (level <= 1) new_pmd = kvmppc_pmd_alloc(); @@ -609,7 +603,7 @@ int kvmppc_create_pte(struct kvm *kvm, pgd_t *pgtable, pte_t pte, new_pud = NULL; } pud = pud_offset(pgd, gpa); - if (pud_huge(*pud)) { + if (pud_is_leaf(*pud)) { unsigned long hgpa = gpa & PUD_MASK; /* Check if we raced and someone else has set the same thing */ diff --git a/arch/powerpc/kvm/book3s_64_vio.c b/arch/powerpc/kvm/book3s_64_vio.c index 5bf05cc774e2..5834db0a54c6 100644 --- a/arch/powerpc/kvm/book3s_64_vio.c +++ b/arch/powerpc/kvm/book3s_64_vio.c @@ -19,6 +19,7 @@ #include <linux/anon_inodes.h> #include <linux/iommu.h> #include <linux/file.h> +#include <linux/mm.h> #include <asm/kvm_ppc.h> #include <asm/kvm_book3s.h> @@ -45,43 +46,6 @@ static unsigned long kvmppc_stt_pages(unsigned long tce_pages) return tce_pages + ALIGN(stt_bytes, PAGE_SIZE) / PAGE_SIZE; } -static long kvmppc_account_memlimit(unsigned long stt_pages, bool inc) -{ - long ret = 0; - - if (!current || !current->mm) - return ret; /* process exited */ - - down_write(¤t->mm->mmap_sem); - - if (inc) { - unsigned long locked, lock_limit; - - locked = current->mm->locked_vm + stt_pages; - lock_limit = rlimit(RLIMIT_MEMLOCK) >> PAGE_SHIFT; - if (locked > lock_limit && !capable(CAP_IPC_LOCK)) - ret = -ENOMEM; - else - current->mm->locked_vm += stt_pages; - } else { - if (WARN_ON_ONCE(stt_pages > current->mm->locked_vm)) - stt_pages = current->mm->locked_vm; - - current->mm->locked_vm -= stt_pages; - } - - pr_debug("[%d] RLIMIT_MEMLOCK KVM %c%ld %ld/%ld%s\n", current->pid, - inc ? '+' : '-', - stt_pages << PAGE_SHIFT, - current->mm->locked_vm << PAGE_SHIFT, - rlimit(RLIMIT_MEMLOCK), - ret ? " - exceeded" : ""); - - up_write(¤t->mm->mmap_sem); - - return ret; -} - static void kvm_spapr_tce_iommu_table_free(struct rcu_head *head) { struct kvmppc_spapr_tce_iommu_table *stit = container_of(head, @@ -291,7 +255,7 @@ static int kvm_spapr_tce_release(struct inode *inode, struct file *filp) kvm_put_kvm(stt->kvm); - kvmppc_account_memlimit( + account_locked_vm(current->mm, kvmppc_stt_pages(kvmppc_tce_pages(stt->size)), false); call_rcu(&stt->rcu, release_spapr_tce_table); @@ -316,7 +280,7 @@ long kvm_vm_ioctl_create_spapr_tce(struct kvm *kvm, return -EINVAL; npages = kvmppc_tce_pages(size); - ret = kvmppc_account_memlimit(kvmppc_stt_pages(npages), true); + ret = account_locked_vm(current->mm, kvmppc_stt_pages(npages), true); if (ret) return ret; @@ -362,7 +326,7 @@ long kvm_vm_ioctl_create_spapr_tce(struct kvm *kvm, kfree(stt); fail_acct: - kvmppc_account_memlimit(kvmppc_stt_pages(npages), false); + account_locked_vm(current->mm, kvmppc_stt_pages(npages), false); return ret; } @@ -452,7 +416,7 @@ static void kvmppc_clear_tce(struct mm_struct *mm, struct iommu_table *tbl, unsigned long hpa = 0; enum dma_data_direction dir = DMA_NONE; - iommu_tce_xchg(mm, tbl, entry, &hpa, &dir); + iommu_tce_xchg_no_kill(mm, tbl, entry, &hpa, &dir); } static long kvmppc_tce_iommu_mapped_dec(struct kvm *kvm, @@ -483,7 +447,8 @@ static long kvmppc_tce_iommu_do_unmap(struct kvm *kvm, unsigned long hpa = 0; long ret; - if (WARN_ON_ONCE(iommu_tce_xchg(kvm->mm, tbl, entry, &hpa, &dir))) + if (WARN_ON_ONCE(iommu_tce_xchg_no_kill(kvm->mm, tbl, entry, &hpa, + &dir))) return H_TOO_HARD; if (dir == DMA_NONE) @@ -491,7 +456,7 @@ static long kvmppc_tce_iommu_do_unmap(struct kvm *kvm, ret = kvmppc_tce_iommu_mapped_dec(kvm, tbl, entry); if (ret != H_SUCCESS) - iommu_tce_xchg(kvm->mm, tbl, entry, &hpa, &dir); + iommu_tce_xchg_no_kill(kvm->mm, tbl, entry, &hpa, &dir); return ret; } @@ -537,7 +502,7 @@ long kvmppc_tce_iommu_do_map(struct kvm *kvm, struct iommu_table *tbl, if (mm_iommu_mapped_inc(mem)) return H_TOO_HARD; - ret = iommu_tce_xchg(kvm->mm, tbl, entry, &hpa, &dir); + ret = iommu_tce_xchg_no_kill(kvm->mm, tbl, entry, &hpa, &dir); if (WARN_ON_ONCE(ret)) { mm_iommu_mapped_dec(mem); return H_TOO_HARD; @@ -615,6 +580,8 @@ long kvmppc_h_put_tce(struct kvm_vcpu *vcpu, unsigned long liobn, ret = kvmppc_tce_iommu_map(vcpu->kvm, stt, stit->tbl, entry, ua, dir); + iommu_tce_kill(stit->tbl, entry, 1); + if (ret != H_SUCCESS) { kvmppc_clear_tce(vcpu->kvm->mm, stit->tbl, entry); goto unlock_exit; @@ -692,12 +659,14 @@ long kvmppc_h_put_tce_indirect(struct kvm_vcpu *vcpu, */ if (get_user(tce, tces + i)) { ret = H_TOO_HARD; - goto unlock_exit; + goto invalidate_exit; } tce = be64_to_cpu(tce); - if (kvmppc_tce_to_ua(vcpu->kvm, tce, &ua)) - return H_PARAMETER; + if (kvmppc_tce_to_ua(vcpu->kvm, tce, &ua)) { + ret = H_PARAMETER; + goto invalidate_exit; + } list_for_each_entry_lockless(stit, &stt->iommu_tables, next) { ret = kvmppc_tce_iommu_map(vcpu->kvm, stt, @@ -707,13 +676,17 @@ long kvmppc_h_put_tce_indirect(struct kvm_vcpu *vcpu, if (ret != H_SUCCESS) { kvmppc_clear_tce(vcpu->kvm->mm, stit->tbl, entry); - goto unlock_exit; + goto invalidate_exit; } } kvmppc_tce_put(stt, entry + i, tce); } +invalidate_exit: + list_for_each_entry_lockless(stit, &stt->iommu_tables, next) + iommu_tce_kill(stit->tbl, entry, npages); + unlock_exit: srcu_read_unlock(&vcpu->kvm->srcu, idx); @@ -752,7 +725,7 @@ long kvmppc_h_stuff_tce(struct kvm_vcpu *vcpu, continue; if (ret == H_TOO_HARD) - return ret; + goto invalidate_exit; WARN_ON_ONCE(1); kvmppc_clear_tce(vcpu->kvm->mm, stit->tbl, entry); @@ -762,6 +735,10 @@ long kvmppc_h_stuff_tce(struct kvm_vcpu *vcpu, for (i = 0; i < npages; ++i, ioba += (1ULL << stt->page_shift)) kvmppc_tce_put(stt, ioba >> stt->page_shift, tce_value); - return H_SUCCESS; +invalidate_exit: + list_for_each_entry_lockless(stit, &stt->iommu_tables, next) + iommu_tce_kill(stit->tbl, ioba >> stt->page_shift, npages); + + return ret; } EXPORT_SYMBOL_GPL(kvmppc_h_stuff_tce); diff --git a/arch/powerpc/kvm/book3s_64_vio_hv.c b/arch/powerpc/kvm/book3s_64_vio_hv.c index f50bbeedfc66..ab6eeb8e753e 100644 --- a/arch/powerpc/kvm/book3s_64_vio_hv.c +++ b/arch/powerpc/kvm/book3s_64_vio_hv.c @@ -218,13 +218,14 @@ static long kvmppc_rm_ioba_validate(struct kvmppc_spapr_tce_table *stt, return H_SUCCESS; } -static long iommu_tce_xchg_rm(struct mm_struct *mm, struct iommu_table *tbl, +static long iommu_tce_xchg_no_kill_rm(struct mm_struct *mm, + struct iommu_table *tbl, unsigned long entry, unsigned long *hpa, enum dma_data_direction *direction) { long ret; - ret = tbl->it_ops->exchange_rm(tbl, entry, hpa, direction); + ret = tbl->it_ops->xchg_no_kill(tbl, entry, hpa, direction, true); if (!ret && ((*direction == DMA_FROM_DEVICE) || (*direction == DMA_BIDIRECTIONAL))) { @@ -240,13 +241,20 @@ static long iommu_tce_xchg_rm(struct mm_struct *mm, struct iommu_table *tbl, return ret; } +extern void iommu_tce_kill_rm(struct iommu_table *tbl, + unsigned long entry, unsigned long pages) +{ + if (tbl->it_ops->tce_kill) + tbl->it_ops->tce_kill(tbl, entry, pages, true); +} + static void kvmppc_rm_clear_tce(struct kvm *kvm, struct iommu_table *tbl, unsigned long entry) { unsigned long hpa = 0; enum dma_data_direction dir = DMA_NONE; - iommu_tce_xchg_rm(kvm->mm, tbl, entry, &hpa, &dir); + iommu_tce_xchg_no_kill_rm(kvm->mm, tbl, entry, &hpa, &dir); } static long kvmppc_rm_tce_iommu_mapped_dec(struct kvm *kvm, @@ -278,7 +286,7 @@ static long kvmppc_rm_tce_iommu_do_unmap(struct kvm *kvm, unsigned long hpa = 0; long ret; - if (iommu_tce_xchg_rm(kvm->mm, tbl, entry, &hpa, &dir)) + if (iommu_tce_xchg_no_kill_rm(kvm->mm, tbl, entry, &hpa, &dir)) /* * real mode xchg can fail if struct page crosses * a page boundary @@ -290,7 +298,7 @@ static long kvmppc_rm_tce_iommu_do_unmap(struct kvm *kvm, ret = kvmppc_rm_tce_iommu_mapped_dec(kvm, tbl, entry); if (ret) - iommu_tce_xchg_rm(kvm->mm, tbl, entry, &hpa, &dir); + iommu_tce_xchg_no_kill_rm(kvm->mm, tbl, entry, &hpa, &dir); return ret; } @@ -336,7 +344,7 @@ static long kvmppc_rm_tce_iommu_do_map(struct kvm *kvm, struct iommu_table *tbl, if (WARN_ON_ONCE_RM(mm_iommu_mapped_inc(mem))) return H_TOO_HARD; - ret = iommu_tce_xchg_rm(kvm->mm, tbl, entry, &hpa, &dir); + ret = iommu_tce_xchg_no_kill_rm(kvm->mm, tbl, entry, &hpa, &dir); if (ret) { mm_iommu_mapped_dec(mem); /* @@ -417,6 +425,8 @@ long kvmppc_rm_h_put_tce(struct kvm_vcpu *vcpu, unsigned long liobn, ret = kvmppc_rm_tce_iommu_map(vcpu->kvm, stt, stit->tbl, entry, ua, dir); + iommu_tce_kill_rm(stit->tbl, entry, 1); + if (ret != H_SUCCESS) { kvmppc_rm_clear_tce(vcpu->kvm, stit->tbl, entry); return ret; @@ -556,8 +566,10 @@ long kvmppc_rm_h_put_tce_indirect(struct kvm_vcpu *vcpu, unsigned long tce = be64_to_cpu(((u64 *)tces)[i]); ua = 0; - if (kvmppc_rm_tce_to_ua(vcpu->kvm, tce, &ua, NULL)) - return H_PARAMETER; + if (kvmppc_rm_tce_to_ua(vcpu->kvm, tce, &ua, NULL)) { + ret = H_PARAMETER; + goto invalidate_exit; + } list_for_each_entry_lockless(stit, &stt->iommu_tables, next) { ret = kvmppc_rm_tce_iommu_map(vcpu->kvm, stt, @@ -567,13 +579,17 @@ long kvmppc_rm_h_put_tce_indirect(struct kvm_vcpu *vcpu, if (ret != H_SUCCESS) { kvmppc_rm_clear_tce(vcpu->kvm, stit->tbl, entry); - goto unlock_exit; + goto invalidate_exit; } } kvmppc_rm_tce_put(stt, entry + i, tce); } +invalidate_exit: + list_for_each_entry_lockless(stit, &stt->iommu_tables, next) + iommu_tce_kill_rm(stit->tbl, entry, npages); + unlock_exit: if (rmap) unlock_rmap(rmap); @@ -616,7 +632,7 @@ long kvmppc_rm_h_stuff_tce(struct kvm_vcpu *vcpu, continue; if (ret == H_TOO_HARD) - return ret; + goto invalidate_exit; WARN_ON_ONCE_RM(1); kvmppc_rm_clear_tce(vcpu->kvm, stit->tbl, entry); @@ -626,7 +642,11 @@ long kvmppc_rm_h_stuff_tce(struct kvm_vcpu *vcpu, for (i = 0; i < npages; ++i, ioba += (1ULL << stt->page_shift)) kvmppc_rm_tce_put(stt, ioba >> stt->page_shift, tce_value); - return H_SUCCESS; +invalidate_exit: + list_for_each_entry_lockless(stit, &stt->iommu_tables, next) + iommu_tce_kill_rm(stit->tbl, ioba >> stt->page_shift, npages); + + return ret; } /* This can be called in either virtual mode or real mode */ diff --git a/arch/powerpc/kvm/book3s_hv.c b/arch/powerpc/kvm/book3s_hv.c index 76b1801aa44a..efd8f93bc9dc 100644 --- a/arch/powerpc/kvm/book3s_hv.c +++ b/arch/powerpc/kvm/book3s_hv.c @@ -1678,7 +1678,14 @@ static int kvmppc_get_one_reg_hv(struct kvm_vcpu *vcpu, u64 id, *val = get_reg_val(id, vcpu->arch.pspb); break; case KVM_REG_PPC_DPDES: - *val = get_reg_val(id, vcpu->arch.vcore->dpdes); + /* + * On POWER9, where we are emulating msgsndp etc., + * we return 1 bit for each vcpu, which can come from + * either vcore->dpdes or doorbell_request. + * On POWER8, doorbell_request is 0. + */ + *val = get_reg_val(id, vcpu->arch.vcore->dpdes | + vcpu->arch.doorbell_request); break; case KVM_REG_PPC_VTB: *val = get_reg_val(id, vcpu->arch.vcore->vtb); @@ -2860,7 +2867,7 @@ static void collect_piggybacks(struct core_info *cip, int target_threads) if (!spin_trylock(&pvc->lock)) continue; prepare_threads(pvc); - if (!pvc->n_runnable) { + if (!pvc->n_runnable || !pvc->kvm->arch.mmu_ready) { list_del_init(&pvc->preempt_list); if (pvc->runner == NULL) { pvc->vcore_state = VCORE_INACTIVE; @@ -2881,15 +2888,20 @@ static void collect_piggybacks(struct core_info *cip, int target_threads) spin_unlock(&lp->lock); } -static bool recheck_signals(struct core_info *cip) +static bool recheck_signals_and_mmu(struct core_info *cip) { int sub, i; struct kvm_vcpu *vcpu; + struct kvmppc_vcore *vc; - for (sub = 0; sub < cip->n_subcores; ++sub) - for_each_runnable_thread(i, vcpu, cip->vc[sub]) + for (sub = 0; sub < cip->n_subcores; ++sub) { + vc = cip->vc[sub]; + if (!vc->kvm->arch.mmu_ready) + return true; + for_each_runnable_thread(i, vcpu, vc) if (signal_pending(vcpu->arch.run_task)) return true; + } return false; } @@ -3119,7 +3131,7 @@ static noinline void kvmppc_run_core(struct kvmppc_vcore *vc) local_irq_disable(); hard_irq_disable(); if (lazy_irq_pending() || need_resched() || - recheck_signals(&core_info) || !vc->kvm->arch.mmu_ready) { + recheck_signals_and_mmu(&core_info)) { local_irq_enable(); vc->vcore_state = VCORE_INACTIVE; /* Unlock all except the primary vcore */ @@ -3569,9 +3581,18 @@ int kvmhv_p9_guest_entry(struct kvm_vcpu *vcpu, u64 time_limit, mtspr(SPRN_DEC, vcpu->arch.dec_expires - mftb()); if (kvmhv_on_pseries()) { + /* + * We need to save and restore the guest visible part of the + * psscr (i.e. using SPRN_PSSCR_PR) since the hypervisor + * doesn't do this for us. Note only required if pseries since + * this is done in kvmhv_load_hv_regs_and_go() below otherwise. + */ + unsigned long host_psscr; /* call our hypervisor to load up HV regs and go */ struct hv_guest_state hvregs; + host_psscr = mfspr(SPRN_PSSCR_PR); + mtspr(SPRN_PSSCR_PR, vcpu->arch.psscr); kvmhv_save_hv_regs(vcpu, &hvregs); hvregs.lpcr = lpcr; vcpu->arch.regs.msr = vcpu->arch.shregs.msr; @@ -3590,6 +3611,8 @@ int kvmhv_p9_guest_entry(struct kvm_vcpu *vcpu, u64 time_limit, vcpu->arch.shregs.msr = vcpu->arch.regs.msr; vcpu->arch.shregs.dar = mfspr(SPRN_DAR); vcpu->arch.shregs.dsisr = mfspr(SPRN_DSISR); + vcpu->arch.psscr = mfspr(SPRN_PSSCR_PR); + mtspr(SPRN_PSSCR_PR, host_psscr); /* H_CEDE has to be handled now, not later */ if (trap == BOOK3S_INTERRUPT_SYSCALL && !vcpu->arch.nested && @@ -3603,6 +3626,8 @@ int kvmhv_p9_guest_entry(struct kvm_vcpu *vcpu, u64 time_limit, vcpu->arch.slb_max = 0; dec = mfspr(SPRN_DEC); + if (!(lpcr & LPCR_LD)) /* Sign extend if not using large decrementer */ + dec = (s32) dec; tb = mftb(); vcpu->arch.dec_expires = dec + tb; vcpu->cpu = -1; @@ -3652,6 +3677,8 @@ int kvmhv_p9_guest_entry(struct kvm_vcpu *vcpu, u64 time_limit, vcpu->arch.vpa.dirty = 1; save_pmu = lp->pmcregs_in_use; } + /* Must save pmu if this guest is capable of running nested guests */ + save_pmu |= nesting_enabled(vcpu->kvm); kvmhv_save_guest_pmu(vcpu, save_pmu); @@ -4122,8 +4149,15 @@ int kvmhv_run_single_vcpu(struct kvm_run *kvm_run, preempt_enable(); - /* cancel pending decrementer exception if DEC is now positive */ - if (get_tb() < vcpu->arch.dec_expires && kvmppc_core_pending_dec(vcpu)) + /* + * cancel pending decrementer exception if DEC is now positive, or if + * entering a nested guest in which case the decrementer is now owned + * by L2 and the L1 decrementer is provided in hdec_expires + */ + if (kvmppc_core_pending_dec(vcpu) && + ((get_tb() < vcpu->arch.dec_expires) || + (trap == BOOK3S_INTERRUPT_SYSCALL && + kvmppc_get_gpr(vcpu, 3) == H_ENTER_NESTED))) kvmppc_core_dequeue_dec(vcpu); trace_kvm_guest_exit(vcpu); @@ -5440,6 +5474,12 @@ static int kvmppc_radix_possible(void) static int kvmppc_book3s_init_hv(void) { int r; + + if (!tlbie_capable) { + pr_err("KVM-HV: Host does not support TLBIE\n"); + return -ENODEV; + } + /* * FIXME!! Do we need to check on all cpus ? */ diff --git a/arch/powerpc/kvm/book3s_hv_builtin.c b/arch/powerpc/kvm/book3s_hv_builtin.c index cb05ccc8bc6a..7c1909657b55 100644 --- a/arch/powerpc/kvm/book3s_hv_builtin.c +++ b/arch/powerpc/kvm/book3s_hv_builtin.c @@ -820,6 +820,8 @@ static void flush_guest_tlb(struct kvm *kvm) : : "r" (rb), "i" (1), "i" (1), "i" (0), "r" (0) : "memory"); } + asm volatile("ptesync": : :"memory"); + asm volatile(PPC_RADIX_INVALIDATE_ERAT_GUEST : : :"memory"); } else { for (set = 0; set < kvm->arch.tlb_sets; ++set) { /* R=0 PRS=0 RIC=0 */ @@ -828,9 +830,9 @@ static void flush_guest_tlb(struct kvm *kvm) "r" (0) : "memory"); rb += PPC_BIT(51); /* increment set number */ } + asm volatile("ptesync": : :"memory"); + asm volatile(PPC_ISA_3_0_INVALIDATE_ERAT : : :"memory"); } - asm volatile("ptesync": : :"memory"); - asm volatile(PPC_INVALIDATE_ERAT : : :"memory"); } void kvmppc_check_need_tlb_flush(struct kvm *kvm, int pcpu, diff --git a/arch/powerpc/kvm/book3s_hv_nested.c b/arch/powerpc/kvm/book3s_hv_nested.c index 735e0ac6f5b2..fff90f2c3de2 100644 --- a/arch/powerpc/kvm/book3s_hv_nested.c +++ b/arch/powerpc/kvm/book3s_hv_nested.c @@ -398,7 +398,7 @@ static void kvmhv_flush_lpid(unsigned int lpid) long rc; if (!kvmhv_on_pseries()) { - radix__flush_tlb_lpid(lpid); + radix__flush_all_lpid(lpid); return; } @@ -411,7 +411,7 @@ static void kvmhv_flush_lpid(unsigned int lpid) void kvmhv_set_ptbl_entry(unsigned int lpid, u64 dw0, u64 dw1) { if (!kvmhv_on_pseries()) { - mmu_partition_table_set_entry(lpid, dw0, dw1); + mmu_partition_table_set_entry(lpid, dw0, dw1, true); return; } diff --git a/arch/powerpc/kvm/book3s_hv_rm_mmu.c b/arch/powerpc/kvm/book3s_hv_rm_mmu.c index 63e0ce91e29d..7186c65c61c9 100644 --- a/arch/powerpc/kvm/book3s_hv_rm_mmu.c +++ b/arch/powerpc/kvm/book3s_hv_rm_mmu.c @@ -99,7 +99,7 @@ void kvmppc_add_revmap_chain(struct kvm *kvm, struct revmap_entry *rev, } else { rev->forw = rev->back = pte_index; *rmap = (*rmap & ~KVMPPC_RMAP_INDEX) | - pte_index | KVMPPC_RMAP_PRESENT; + pte_index | KVMPPC_RMAP_PRESENT | KVMPPC_RMAP_HPT; } unlock_rmap(rmap); } diff --git a/arch/powerpc/kvm/book3s_hv_rmhandlers.S b/arch/powerpc/kvm/book3s_hv_rmhandlers.S index 337e64468d78..9a05b0d932ef 100644 --- a/arch/powerpc/kvm/book3s_hv_rmhandlers.S +++ b/arch/powerpc/kvm/book3s_hv_rmhandlers.S @@ -29,6 +29,7 @@ #include <asm/asm-compat.h> #include <asm/feature-fixups.h> #include <asm/cpuidle.h> +#include <asm/ultravisor-api.h> /* Sign-extend HDEC if not on POWER9 */ #define EXTEND_HDEC(reg) \ @@ -942,6 +943,8 @@ ALT_FTR_SECTION_END_IFCLR(CPU_FTR_ARCH_300) ld r11, VCPU_XIVE_SAVED_STATE(r4) li r9, TM_QW1_OS lwz r8, VCPU_XIVE_CAM_WORD(r4) + cmpwi r8, 0 + beq no_xive li r7, TM_QW1_OS + TM_WORD2 mfmsr r0 andi. r0, r0, MSR_DR /* in real mode? */ @@ -1083,16 +1086,10 @@ BEGIN_FTR_SECTION END_FTR_SECTION_IFSET(CPU_FTR_HAS_PPR) ld r5, VCPU_LR(r4) - ld r6, VCPU_CR(r4) mtlr r5 - mtcr r6 ld r1, VCPU_GPR(R1)(r4) - ld r2, VCPU_GPR(R2)(r4) - ld r3, VCPU_GPR(R3)(r4) ld r5, VCPU_GPR(R5)(r4) - ld r6, VCPU_GPR(R6)(r4) - ld r7, VCPU_GPR(R7)(r4) ld r8, VCPU_GPR(R8)(r4) ld r9, VCPU_GPR(R9)(r4) ld r10, VCPU_GPR(R10)(r4) @@ -1110,10 +1107,42 @@ BEGIN_FTR_SECTION mtspr SPRN_HDSISR, r0 END_FTR_SECTION_IFSET(CPU_FTR_ARCH_300) + ld r6, VCPU_KVM(r4) + lbz r7, KVM_SECURE_GUEST(r6) + cmpdi r7, 0 + ld r6, VCPU_GPR(R6)(r4) + ld r7, VCPU_GPR(R7)(r4) + bne ret_to_ultra + + lwz r0, VCPU_CR(r4) + mtcr r0 + ld r0, VCPU_GPR(R0)(r4) + ld r2, VCPU_GPR(R2)(r4) + ld r3, VCPU_GPR(R3)(r4) ld r4, VCPU_GPR(R4)(r4) HRFI_TO_GUEST b . +/* + * Use UV_RETURN ultracall to return control back to the Ultravisor after + * processing an hypercall or interrupt that was forwarded (a.k.a. reflected) + * to the Hypervisor. + * + * All registers have already been loaded, except: + * R0 = hcall result + * R2 = SRR1, so UV can detect a synthesized interrupt (if any) + * R3 = UV_RETURN + */ +ret_to_ultra: + lwz r0, VCPU_CR(r4) + mtcr r0 + + ld r0, VCPU_GPR(R3)(r4) + mfspr r2, SPRN_SRR1 + li r3, 0 + ori r3, r3, UV_RETURN + ld r4, VCPU_GPR(R4)(r4) + sc 2 /* * Enter the guest on a P9 or later system where we have exactly @@ -2831,29 +2860,39 @@ kvm_cede_prodded: kvm_cede_exit: ld r9, HSTATE_KVM_VCPU(r13) #ifdef CONFIG_KVM_XICS - /* Abort if we still have a pending escalation */ + /* are we using XIVE with single escalation? */ + ld r10, VCPU_XIVE_ESC_VADDR(r9) + cmpdi r10, 0 + beq 3f + li r6, XIVE_ESB_SET_PQ_00 + /* + * If we still have a pending escalation, abort the cede, + * and we must set PQ to 10 rather than 00 so that we don't + * potentially end up with two entries for the escalation + * interrupt in the XIVE interrupt queue. In that case + * we also don't want to set xive_esc_on to 1 here in + * case we race with xive_esc_irq(). + */ lbz r5, VCPU_XIVE_ESC_ON(r9) cmpwi r5, 0 - beq 1f + beq 4f li r0, 0 stb r0, VCPU_CEDED(r9) -1: /* Enable XIVE escalation */ - li r5, XIVE_ESB_SET_PQ_00 + li r6, XIVE_ESB_SET_PQ_10 + b 5f +4: li r0, 1 + stb r0, VCPU_XIVE_ESC_ON(r9) + /* make sure store to xive_esc_on is seen before xive_esc_irq runs */ + sync +5: /* Enable XIVE escalation */ mfmsr r0 andi. r0, r0, MSR_DR /* in real mode? */ beq 1f - ld r10, VCPU_XIVE_ESC_VADDR(r9) - cmpdi r10, 0 - beq 3f - ldx r0, r10, r5 + ldx r0, r10, r6 b 2f 1: ld r10, VCPU_XIVE_ESC_RADDR(r9) - cmpdi r10, 0 - beq 3f - ldcix r0, r10, r5 + ldcix r0, r10, r6 2: sync - li r0, 1 - stb r0, VCPU_XIVE_ESC_ON(r9) #endif /* CONFIG_KVM_XICS */ 3: b guest_exit_cont diff --git a/arch/powerpc/kvm/book3s_hv_tm.c b/arch/powerpc/kvm/book3s_hv_tm.c index 229496e2652e..0db937497169 100644 --- a/arch/powerpc/kvm/book3s_hv_tm.c +++ b/arch/powerpc/kvm/book3s_hv_tm.c @@ -128,7 +128,7 @@ int kvmhv_p9_tm_emulation(struct kvm_vcpu *vcpu) } /* Set CR0 to indicate previous transactional state */ vcpu->arch.regs.ccr = (vcpu->arch.regs.ccr & 0x0fffffff) | - (((msr & MSR_TS_MASK) >> MSR_TS_S_LG) << 28); + (((msr & MSR_TS_MASK) >> MSR_TS_S_LG) << 29); /* L=1 => tresume, L=0 => tsuspend */ if (instr & (1 << 21)) { if (MSR_TM_SUSPENDED(msr)) @@ -172,7 +172,7 @@ int kvmhv_p9_tm_emulation(struct kvm_vcpu *vcpu) /* Set CR0 to indicate previous transactional state */ vcpu->arch.regs.ccr = (vcpu->arch.regs.ccr & 0x0fffffff) | - (((msr & MSR_TS_MASK) >> MSR_TS_S_LG) << 28); + (((msr & MSR_TS_MASK) >> MSR_TS_S_LG) << 29); vcpu->arch.shregs.msr &= ~MSR_TS_MASK; return RESUME_GUEST; @@ -202,7 +202,7 @@ int kvmhv_p9_tm_emulation(struct kvm_vcpu *vcpu) /* Set CR0 to indicate previous transactional state */ vcpu->arch.regs.ccr = (vcpu->arch.regs.ccr & 0x0fffffff) | - (((msr & MSR_TS_MASK) >> MSR_TS_S_LG) << 28); + (((msr & MSR_TS_MASK) >> MSR_TS_S_LG) << 29); vcpu->arch.shregs.msr = msr | MSR_TS_S; return RESUME_GUEST; } diff --git a/arch/powerpc/kvm/book3s_xics.c b/arch/powerpc/kvm/book3s_xics.c index e8276161872e..381bf8dea193 100644 --- a/arch/powerpc/kvm/book3s_xics.c +++ b/arch/powerpc/kvm/book3s_xics.c @@ -827,7 +827,7 @@ static noinline int kvmppc_h_eoi(struct kvm_vcpu *vcpu, unsigned long xirr) * * Note: If EOI is incorrectly used by SW to lower the CPPR * value (ie more favored), we do not check for rejection of - * a pending interrupt, this is a SW error and PAPR sepcifies + * a pending interrupt, this is a SW error and PAPR specifies * that we don't have to deal with it. * * The sending of an EOI to the ICS is handled after the diff --git a/arch/powerpc/kvm/book3s_xive.c b/arch/powerpc/kvm/book3s_xive.c index 6ca0d7376a9f..591bfb4bfd0f 100644 --- a/arch/powerpc/kvm/book3s_xive.c +++ b/arch/powerpc/kvm/book3s_xive.c @@ -67,8 +67,14 @@ void kvmppc_xive_push_vcpu(struct kvm_vcpu *vcpu) void __iomem *tima = local_paca->kvm_hstate.xive_tima_virt; u64 pq; - if (!tima) + /* + * Nothing to do if the platform doesn't have a XIVE + * or this vCPU doesn't have its own XIVE context + * (e.g. because it's not using an in-kernel interrupt controller). + */ + if (!tima || !vcpu->arch.xive_cam_word) return; + eieio(); __raw_writeq(vcpu->arch.xive_saved_state.w01, tima + TM_QW1_OS); __raw_writel(vcpu->arch.xive_cam_word, tima + TM_QW1_OS + TM_WORD2); @@ -160,6 +166,9 @@ static irqreturn_t xive_esc_irq(int irq, void *data) */ vcpu->arch.xive_esc_on = false; + /* This orders xive_esc_on = false vs. subsequent stale_p = true */ + smp_wmb(); /* goes with smp_mb() in cleanup_single_escalation */ + return IRQ_HANDLED; } @@ -1113,6 +1122,31 @@ void kvmppc_xive_disable_vcpu_interrupts(struct kvm_vcpu *vcpu) vcpu->arch.xive_esc_raddr = 0; } +/* + * In single escalation mode, the escalation interrupt is marked so + * that EOI doesn't re-enable it, but just sets the stale_p flag to + * indicate that the P bit has already been dealt with. However, the + * assembly code that enters the guest sets PQ to 00 without clearing + * stale_p (because it has no easy way to address it). Hence we have + * to adjust stale_p before shutting down the interrupt. + */ +void xive_cleanup_single_escalation(struct kvm_vcpu *vcpu, + struct kvmppc_xive_vcpu *xc, int irq) +{ + struct irq_data *d = irq_get_irq_data(irq); + struct xive_irq_data *xd = irq_data_get_irq_handler_data(d); + + /* + * This slightly odd sequence gives the right result + * (i.e. stale_p set if xive_esc_on is false) even if + * we race with xive_esc_irq() and xive_irq_eoi(). + */ + xd->stale_p = false; + smp_mb(); /* paired with smb_wmb in xive_esc_irq */ + if (!vcpu->arch.xive_esc_on) + xd->stale_p = true; +} + void kvmppc_xive_cleanup_vcpu(struct kvm_vcpu *vcpu) { struct kvmppc_xive_vcpu *xc = vcpu->arch.xive_vcpu; @@ -1134,20 +1168,28 @@ void kvmppc_xive_cleanup_vcpu(struct kvm_vcpu *vcpu) /* Mask the VP IPI */ xive_vm_esb_load(&xc->vp_ipi_data, XIVE_ESB_SET_PQ_01); - /* Disable the VP */ - xive_native_disable_vp(xc->vp_id); - - /* Free the queues & associated interrupts */ + /* Free escalations */ for (i = 0; i < KVMPPC_XIVE_Q_COUNT; i++) { - struct xive_q *q = &xc->queues[i]; - - /* Free the escalation irq */ if (xc->esc_virq[i]) { + if (xc->xive->single_escalation) + xive_cleanup_single_escalation(vcpu, xc, + xc->esc_virq[i]); free_irq(xc->esc_virq[i], vcpu); irq_dispose_mapping(xc->esc_virq[i]); kfree(xc->esc_virq_names[i]); } - /* Free the queue */ + } + + /* Disable the VP */ + xive_native_disable_vp(xc->vp_id); + + /* Clear the cam word so guest entry won't try to push context */ + vcpu->arch.xive_cam_word = 0; + + /* Free the queues */ + for (i = 0; i < KVMPPC_XIVE_Q_COUNT; i++) { + struct xive_q *q = &xc->queues[i]; + xive_native_disable_queue(xc->vp_id, q, i); if (q->qpage) { free_pages((unsigned long)q->qpage, @@ -1986,10 +2028,8 @@ static int kvmppc_xive_create(struct kvm_device *dev, u32 type) xive->single_escalation = xive_native_has_single_escalation(); - if (ret) { - kfree(xive); + if (ret) return ret; - } return 0; } diff --git a/arch/powerpc/kvm/book3s_xive.h b/arch/powerpc/kvm/book3s_xive.h index 50494d0ee375..955b820ffd6d 100644 --- a/arch/powerpc/kvm/book3s_xive.h +++ b/arch/powerpc/kvm/book3s_xive.h @@ -282,6 +282,8 @@ int kvmppc_xive_select_target(struct kvm *kvm, u32 *server, u8 prio); int kvmppc_xive_attach_escalation(struct kvm_vcpu *vcpu, u8 prio, bool single_escalation); struct kvmppc_xive *kvmppc_xive_get_device(struct kvm *kvm, u32 type); +void xive_cleanup_single_escalation(struct kvm_vcpu *vcpu, + struct kvmppc_xive_vcpu *xc, int irq); #endif /* CONFIG_KVM_XICS */ #endif /* _KVM_PPC_BOOK3S_XICS_H */ diff --git a/arch/powerpc/kvm/book3s_xive_native.c b/arch/powerpc/kvm/book3s_xive_native.c index 5596c8ec221a..248c1ea9e788 100644 --- a/arch/powerpc/kvm/book3s_xive_native.c +++ b/arch/powerpc/kvm/book3s_xive_native.c @@ -67,20 +67,28 @@ void kvmppc_xive_native_cleanup_vcpu(struct kvm_vcpu *vcpu) xc->valid = false; kvmppc_xive_disable_vcpu_interrupts(vcpu); - /* Disable the VP */ - xive_native_disable_vp(xc->vp_id); - - /* Free the queues & associated interrupts */ + /* Free escalations */ for (i = 0; i < KVMPPC_XIVE_Q_COUNT; i++) { /* Free the escalation irq */ if (xc->esc_virq[i]) { + if (xc->xive->single_escalation) + xive_cleanup_single_escalation(vcpu, xc, + xc->esc_virq[i]); free_irq(xc->esc_virq[i], vcpu); irq_dispose_mapping(xc->esc_virq[i]); kfree(xc->esc_virq_names[i]); xc->esc_virq[i] = 0; } + } + + /* Disable the VP */ + xive_native_disable_vp(xc->vp_id); - /* Free the queue */ + /* Clear the cam word so guest entry won't try to push context */ + vcpu->arch.xive_cam_word = 0; + + /* Free the queues */ + for (i = 0; i < KVMPPC_XIVE_Q_COUNT; i++) { kvmppc_xive_native_cleanup_queue(vcpu, i); } @@ -1090,9 +1098,9 @@ static int kvmppc_xive_native_create(struct kvm_device *dev, u32 type) xive->ops = &kvmppc_xive_native_ops; if (ret) - kfree(xive); + return ret; - return ret; + return 0; } /* @@ -1171,6 +1179,11 @@ int kvmppc_xive_native_set_vp(struct kvm_vcpu *vcpu, union kvmppc_one_reg *val) return 0; } +bool kvmppc_xive_native_supported(void) +{ + return xive_native_has_queue_state_support(); +} + static int xive_native_debug_show(struct seq_file *m, void *private) { struct kvmppc_xive *xive = m->private; diff --git a/arch/powerpc/kvm/e500.c b/arch/powerpc/kvm/e500.c index b5a848a55504..00649ca5fa9a 100644 --- a/arch/powerpc/kvm/e500.c +++ b/arch/powerpc/kvm/e500.c @@ -440,6 +440,9 @@ static struct kvm_vcpu *kvmppc_core_vcpu_create_e500(struct kvm *kvm, struct kvm_vcpu *vcpu; int err; + BUILD_BUG_ON_MSG(offsetof(struct kvmppc_vcpu_e500, vcpu) != 0, + "struct kvm_vcpu must be at offset 0 for arch usercopy region"); + vcpu_e500 = kmem_cache_zalloc(kvm_vcpu_cache, GFP_KERNEL); if (!vcpu_e500) { err = -ENOMEM; diff --git a/arch/powerpc/kvm/emulate.c b/arch/powerpc/kvm/emulate.c index bb4d09c1ad56..6fca38ca791f 100644 --- a/arch/powerpc/kvm/emulate.c +++ b/arch/powerpc/kvm/emulate.c @@ -271,6 +271,7 @@ int kvmppc_emulate_instruction(struct kvm_run *run, struct kvm_vcpu *vcpu) */ if (inst == KVMPPC_INST_SW_BREAKPOINT) { run->exit_reason = KVM_EXIT_DEBUG; + run->debug.arch.status = 0; run->debug.arch.address = kvmppc_get_pc(vcpu); emulated = EMULATE_EXIT_USER; advance = 0; diff --git a/arch/powerpc/kvm/emulate_loadstore.c b/arch/powerpc/kvm/emulate_loadstore.c index 9208c82ed08d..2e496eb86e94 100644 --- a/arch/powerpc/kvm/emulate_loadstore.c +++ b/arch/powerpc/kvm/emulate_loadstore.c @@ -89,12 +89,6 @@ int kvmppc_emulate_loadstore(struct kvm_vcpu *vcpu) rs = get_rs(inst); rt = get_rt(inst); - /* - * if mmio_vsx_tx_sx_enabled == 0, copy data between - * VSR[0..31] and memory - * if mmio_vsx_tx_sx_enabled == 1, copy data between - * VSR[32..63] and memory - */ vcpu->arch.mmio_vsx_copy_nums = 0; vcpu->arch.mmio_vsx_offset = 0; vcpu->arch.mmio_copy_type = KVMPPC_VSX_COPY_NONE; diff --git a/arch/powerpc/kvm/powerpc.c b/arch/powerpc/kvm/powerpc.c index 6d704ad2472b..3a77bb643452 100644 --- a/arch/powerpc/kvm/powerpc.c +++ b/arch/powerpc/kvm/powerpc.c @@ -50,6 +50,11 @@ int kvm_arch_vcpu_runnable(struct kvm_vcpu *v) return !!(v->arch.pending_exceptions) || kvm_request_pending(v); } +bool kvm_arch_dy_runnable(struct kvm_vcpu *vcpu) +{ + return kvm_arch_vcpu_runnable(vcpu); +} + bool kvm_arch_vcpu_in_kernel(struct kvm_vcpu *vcpu) { return false; @@ -414,9 +419,9 @@ int kvm_arch_hardware_setup(void) return 0; } -void kvm_arch_check_processor_compat(void *rtn) +int kvm_arch_check_processor_compat(void) { - *(int *)rtn = kvmppc_core_check_processor_compat(); + return kvmppc_core_check_processor_compat(); } int kvm_arch_init_vm(struct kvm *kvm, unsigned long type) @@ -452,16 +457,6 @@ err_out: return -EINVAL; } -bool kvm_arch_has_vcpu_debugfs(void) -{ - return false; -} - -int kvm_arch_create_vcpu_debugfs(struct kvm_vcpu *vcpu) -{ - return 0; -} - void kvm_arch_destroy_vm(struct kvm *kvm) { unsigned int i; @@ -566,7 +561,8 @@ int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext) * a POWER9 processor) and the PowerNV platform, as * nested is not yet supported. */ - r = xive_enabled() && !!cpu_has_feature(CPU_FTR_HVMODE); + r = xive_enabled() && !!cpu_has_feature(CPU_FTR_HVMODE) && + kvmppc_xive_native_supported(); break; #endif |