From 6b243fcfb5f1e16bcf732e6f86a63f8af5b59a9f Mon Sep 17 00:00:00 2001 From: Paul Mackerras Date: Fri, 11 Nov 2016 16:55:03 +1100 Subject: powerpc/64: Simplify adaptation to new ISA v3.00 HPTE format This changes the way that we support the new ISA v3.00 HPTE format. Instead of adapting everything that uses HPTE values to handle either the old format or the new format, depending on which CPU we are on, we now convert explicitly between old and new formats if necessary in the low-level routines that actually access HPTEs in memory. This limits the amount of code that needs to know about the new format and makes the conversions explicit. This is OK because the old format contains all the information that is in the new format. This also fixes operation under a hypervisor, because the H_ENTER hypercall (and other hypercalls that deal with HPTEs) will continue to require the HPTE value to be supplied in the old format. At present the kernel will not boot in HPT mode on POWER9 under a hypervisor. This fixes and partially reverts commit 50de596de8be ("powerpc/mm/hash: Add support for Power9 Hash", 2016-04-29). Fixes: 50de596de8be ("powerpc/mm/hash: Add support for Power9 Hash") Signed-off-by: Paul Mackerras Reviewed-by: Aneesh Kumar K.V Signed-off-by: Michael Ellerman --- arch/powerpc/include/asm/book3s/64/mmu-hash.h | 47 ++++++++++++++++++++++----- arch/powerpc/mm/hash_native_64.c | 30 +++++++++++++---- arch/powerpc/platforms/ps3/htab.c | 2 +- arch/powerpc/platforms/pseries/lpar.c | 2 +- 4 files changed, 65 insertions(+), 16 deletions(-) (limited to 'arch/powerpc') diff --git a/arch/powerpc/include/asm/book3s/64/mmu-hash.h b/arch/powerpc/include/asm/book3s/64/mmu-hash.h index e407af2b7333..2e6a823fa502 100644 --- a/arch/powerpc/include/asm/book3s/64/mmu-hash.h +++ b/arch/powerpc/include/asm/book3s/64/mmu-hash.h @@ -70,7 +70,9 @@ #define HPTE_V_SSIZE_SHIFT 62 #define HPTE_V_AVPN_SHIFT 7 +#define HPTE_V_COMMON_BITS ASM_CONST(0x000fffffffffffff) #define HPTE_V_AVPN ASM_CONST(0x3fffffffffffff80) +#define HPTE_V_AVPN_3_0 ASM_CONST(0x000fffffffffff80) #define HPTE_V_AVPN_VAL(x) (((x) & HPTE_V_AVPN) >> HPTE_V_AVPN_SHIFT) #define HPTE_V_COMPARE(x,y) (!(((x) ^ (y)) & 0xffffffffffffff80UL)) #define HPTE_V_BOLTED ASM_CONST(0x0000000000000010) @@ -80,14 +82,16 @@ #define HPTE_V_VALID ASM_CONST(0x0000000000000001) /* - * ISA 3.0 have a different HPTE format. + * ISA 3.0 has a different HPTE format. */ #define HPTE_R_3_0_SSIZE_SHIFT 58 +#define HPTE_R_3_0_SSIZE_MASK (3ull << HPTE_R_3_0_SSIZE_SHIFT) #define HPTE_R_PP0 ASM_CONST(0x8000000000000000) #define HPTE_R_TS ASM_CONST(0x4000000000000000) #define HPTE_R_KEY_HI ASM_CONST(0x3000000000000000) #define HPTE_R_RPN_SHIFT 12 #define HPTE_R_RPN ASM_CONST(0x0ffffffffffff000) +#define HPTE_R_RPN_3_0 ASM_CONST(0x01fffffffffff000) #define HPTE_R_PP ASM_CONST(0x0000000000000003) #define HPTE_R_PPP ASM_CONST(0x8000000000000003) #define HPTE_R_N ASM_CONST(0x0000000000000004) @@ -316,11 +320,42 @@ static inline unsigned long hpte_encode_avpn(unsigned long vpn, int psize, */ v = (vpn >> (23 - VPN_SHIFT)) & ~(mmu_psize_defs[psize].avpnm); v <<= HPTE_V_AVPN_SHIFT; - if (!cpu_has_feature(CPU_FTR_ARCH_300)) - v |= ((unsigned long) ssize) << HPTE_V_SSIZE_SHIFT; + v |= ((unsigned long) ssize) << HPTE_V_SSIZE_SHIFT; return v; } +/* + * ISA v3.0 defines a new HPTE format, which differs from the old + * format in having smaller AVPN and ARPN fields, and the B field + * in the second dword instead of the first. + */ +static inline unsigned long hpte_old_to_new_v(unsigned long v) +{ + /* trim AVPN, drop B */ + return v & HPTE_V_COMMON_BITS; +} + +static inline unsigned long hpte_old_to_new_r(unsigned long v, unsigned long r) +{ + /* move B field from 1st to 2nd dword, trim ARPN */ + return (r & ~HPTE_R_3_0_SSIZE_MASK) | + (((v) >> HPTE_V_SSIZE_SHIFT) << HPTE_R_3_0_SSIZE_SHIFT); +} + +static inline unsigned long hpte_new_to_old_v(unsigned long v, unsigned long r) +{ + /* insert B field */ + return (v & HPTE_V_COMMON_BITS) | + ((r & HPTE_R_3_0_SSIZE_MASK) << + (HPTE_V_SSIZE_SHIFT - HPTE_R_3_0_SSIZE_SHIFT)); +} + +static inline unsigned long hpte_new_to_old_r(unsigned long r) +{ + /* clear out B field */ + return r & ~HPTE_R_3_0_SSIZE_MASK; +} + /* * This function sets the AVPN and L fields of the HPTE appropriately * using the base page size and actual page size. @@ -341,12 +376,8 @@ static inline unsigned long hpte_encode_v(unsigned long vpn, int base_psize, * aligned for the requested page size */ static inline unsigned long hpte_encode_r(unsigned long pa, int base_psize, - int actual_psize, int ssize) + int actual_psize) { - - if (cpu_has_feature(CPU_FTR_ARCH_300)) - pa |= ((unsigned long) ssize) << HPTE_R_3_0_SSIZE_SHIFT; - /* A 4K page needs no special encoding */ if (actual_psize == MMU_PAGE_4K) return pa & HPTE_R_RPN; diff --git a/arch/powerpc/mm/hash_native_64.c b/arch/powerpc/mm/hash_native_64.c index 83ddc0e171b0..ad9fd5245be2 100644 --- a/arch/powerpc/mm/hash_native_64.c +++ b/arch/powerpc/mm/hash_native_64.c @@ -221,13 +221,18 @@ static long native_hpte_insert(unsigned long hpte_group, unsigned long vpn, return -1; hpte_v = hpte_encode_v(vpn, psize, apsize, ssize) | vflags | HPTE_V_VALID; - hpte_r = hpte_encode_r(pa, psize, apsize, ssize) | rflags; + hpte_r = hpte_encode_r(pa, psize, apsize) | rflags; if (!(vflags & HPTE_V_BOLTED)) { DBG_LOW(" i=%x hpte_v=%016lx, hpte_r=%016lx\n", i, hpte_v, hpte_r); } + if (cpu_has_feature(CPU_FTR_ARCH_300)) { + hpte_r = hpte_old_to_new_r(hpte_v, hpte_r); + hpte_v = hpte_old_to_new_v(hpte_v); + } + hptep->r = cpu_to_be64(hpte_r); /* Guarantee the second dword is visible before the valid bit */ eieio(); @@ -295,6 +300,8 @@ static long native_hpte_updatepp(unsigned long slot, unsigned long newpp, vpn, want_v & HPTE_V_AVPN, slot, newpp); hpte_v = be64_to_cpu(hptep->v); + if (cpu_has_feature(CPU_FTR_ARCH_300)) + hpte_v = hpte_new_to_old_v(hpte_v, be64_to_cpu(hptep->r)); /* * We need to invalidate the TLB always because hpte_remove doesn't do * a tlb invalidate. If a hash bucket gets full, we "evict" a more/less @@ -309,6 +316,8 @@ static long native_hpte_updatepp(unsigned long slot, unsigned long newpp, native_lock_hpte(hptep); /* recheck with locks held */ hpte_v = be64_to_cpu(hptep->v); + if (cpu_has_feature(CPU_FTR_ARCH_300)) + hpte_v = hpte_new_to_old_v(hpte_v, be64_to_cpu(hptep->r)); if (unlikely(!HPTE_V_COMPARE(hpte_v, want_v) || !(hpte_v & HPTE_V_VALID))) { ret = -1; @@ -350,6 +359,8 @@ static long native_hpte_find(unsigned long vpn, int psize, int ssize) for (i = 0; i < HPTES_PER_GROUP; i++) { hptep = htab_address + slot; hpte_v = be64_to_cpu(hptep->v); + if (cpu_has_feature(CPU_FTR_ARCH_300)) + hpte_v = hpte_new_to_old_v(hpte_v, be64_to_cpu(hptep->r)); if (HPTE_V_COMPARE(hpte_v, want_v) && (hpte_v & HPTE_V_VALID)) /* HPTE matches */ @@ -409,6 +420,8 @@ static void native_hpte_invalidate(unsigned long slot, unsigned long vpn, want_v = hpte_encode_avpn(vpn, bpsize, ssize); native_lock_hpte(hptep); hpte_v = be64_to_cpu(hptep->v); + if (cpu_has_feature(CPU_FTR_ARCH_300)) + hpte_v = hpte_new_to_old_v(hpte_v, be64_to_cpu(hptep->r)); /* * We need to invalidate the TLB always because hpte_remove doesn't do @@ -467,6 +480,8 @@ static void native_hugepage_invalidate(unsigned long vsid, want_v = hpte_encode_avpn(vpn, psize, ssize); native_lock_hpte(hptep); hpte_v = be64_to_cpu(hptep->v); + if (cpu_has_feature(CPU_FTR_ARCH_300)) + hpte_v = hpte_new_to_old_v(hpte_v, be64_to_cpu(hptep->r)); /* Even if we miss, we need to invalidate the TLB */ if (!HPTE_V_COMPARE(hpte_v, want_v) || !(hpte_v & HPTE_V_VALID)) @@ -504,6 +519,10 @@ static void hpte_decode(struct hash_pte *hpte, unsigned long slot, /* Look at the 8 bit LP value */ unsigned int lp = (hpte_r >> LP_SHIFT) & ((1 << LP_BITS) - 1); + if (cpu_has_feature(CPU_FTR_ARCH_300)) { + hpte_v = hpte_new_to_old_v(hpte_v, hpte_r); + hpte_r = hpte_new_to_old_r(hpte_r); + } if (!(hpte_v & HPTE_V_LARGE)) { size = MMU_PAGE_4K; a_size = MMU_PAGE_4K; @@ -512,11 +531,7 @@ static void hpte_decode(struct hash_pte *hpte, unsigned long slot, a_size = hpte_page_sizes[lp] >> 4; } /* This works for all page sizes, and for 256M and 1T segments */ - if (cpu_has_feature(CPU_FTR_ARCH_300)) - *ssize = hpte_r >> HPTE_R_3_0_SSIZE_SHIFT; - else - *ssize = hpte_v >> HPTE_V_SSIZE_SHIFT; - + *ssize = hpte_v >> HPTE_V_SSIZE_SHIFT; shift = mmu_psize_defs[size].shift; avpn = (HPTE_V_AVPN_VAL(hpte_v) & ~mmu_psize_defs[size].avpnm); @@ -639,6 +654,9 @@ static void native_flush_hash_range(unsigned long number, int local) want_v = hpte_encode_avpn(vpn, psize, ssize); native_lock_hpte(hptep); hpte_v = be64_to_cpu(hptep->v); + if (cpu_has_feature(CPU_FTR_ARCH_300)) + hpte_v = hpte_new_to_old_v(hpte_v, + be64_to_cpu(hptep->r)); if (!HPTE_V_COMPARE(hpte_v, want_v) || !(hpte_v & HPTE_V_VALID)) native_unlock_hpte(hptep); diff --git a/arch/powerpc/platforms/ps3/htab.c b/arch/powerpc/platforms/ps3/htab.c index cb3c50328de8..cc2b281a3766 100644 --- a/arch/powerpc/platforms/ps3/htab.c +++ b/arch/powerpc/platforms/ps3/htab.c @@ -63,7 +63,7 @@ static long ps3_hpte_insert(unsigned long hpte_group, unsigned long vpn, vflags &= ~HPTE_V_SECONDARY; hpte_v = hpte_encode_v(vpn, psize, apsize, ssize) | vflags | HPTE_V_VALID; - hpte_r = hpte_encode_r(ps3_mm_phys_to_lpar(pa), psize, apsize, ssize) | rflags; + hpte_r = hpte_encode_r(ps3_mm_phys_to_lpar(pa), psize, apsize) | rflags; spin_lock_irqsave(&ps3_htab_lock, flags); diff --git a/arch/powerpc/platforms/pseries/lpar.c b/arch/powerpc/platforms/pseries/lpar.c index aa35245d8d6d..f2c98f6c1c9c 100644 --- a/arch/powerpc/platforms/pseries/lpar.c +++ b/arch/powerpc/platforms/pseries/lpar.c @@ -145,7 +145,7 @@ static long pSeries_lpar_hpte_insert(unsigned long hpte_group, hpte_group, vpn, pa, rflags, vflags, psize); hpte_v = hpte_encode_v(vpn, psize, apsize, ssize) | vflags | HPTE_V_VALID; - hpte_r = hpte_encode_r(pa, psize, apsize, ssize) | rflags; + hpte_r = hpte_encode_r(pa, psize, apsize) | rflags; if (!(vflags & HPTE_V_BOLTED)) pr_devel(" hpte_v=%016lx, hpte_r=%016lx\n", hpte_v, hpte_r); -- cgit v1.2.3-59-g8ed1b From 025c95113866415c17b47b2a80ad6341214b1fe9 Mon Sep 17 00:00:00 2001 From: Daniel Axtens Date: Mon, 10 Oct 2016 11:31:19 +1100 Subject: KVM: PPC: Book3S HV: Fix sparse static warning Squash a couple of sparse warnings by making things static. Build tested. Signed-off-by: Daniel Axtens Signed-off-by: Paul Mackerras --- arch/powerpc/kvm/book3s_64_mmu_hv.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'arch/powerpc') diff --git a/arch/powerpc/kvm/book3s_64_mmu_hv.c b/arch/powerpc/kvm/book3s_64_mmu_hv.c index 05f09ae82587..95abca69b168 100644 --- a/arch/powerpc/kvm/book3s_64_mmu_hv.c +++ b/arch/powerpc/kvm/book3s_64_mmu_hv.c @@ -255,7 +255,7 @@ static void kvmppc_mmu_book3s_64_hv_reset_msr(struct kvm_vcpu *vcpu) kvmppc_set_msr(vcpu, msr); } -long kvmppc_virtmode_do_h_enter(struct kvm *kvm, unsigned long flags, +static long kvmppc_virtmode_do_h_enter(struct kvm *kvm, unsigned long flags, long pte_index, unsigned long pteh, unsigned long ptel, unsigned long *pte_idx_ret) { @@ -1608,7 +1608,7 @@ static ssize_t debugfs_htab_read(struct file *file, char __user *buf, return ret; } -ssize_t debugfs_htab_write(struct file *file, const char __user *buf, +static ssize_t debugfs_htab_write(struct file *file, const char __user *buf, size_t len, loff_t *ppos) { return -EACCES; -- cgit v1.2.3-59-g8ed1b From ebe4535fbe7a190e13c0e175e7e7a02898dbac33 Mon Sep 17 00:00:00 2001 From: Daniel Axtens Date: Mon, 10 Oct 2016 11:31:20 +1100 Subject: KVM: PPC: Book3S HV: sparse: prototypes for functions called from assembler A bunch of KVM functions are only called from assembler. Give them prototypes in asm-prototypes.h This reduces sparse warnings. Signed-off-by: Daniel Axtens Signed-off-by: Paul Mackerras --- arch/powerpc/include/asm/asm-prototypes.h | 44 +++++++++++++++++++++++++++++++ arch/powerpc/kvm/book3s_64_vio_hv.c | 1 + arch/powerpc/kvm/book3s_hv_builtin.c | 1 + arch/powerpc/kvm/book3s_hv_ras.c | 1 + arch/powerpc/kvm/book3s_hv_rm_mmu.c | 1 + arch/powerpc/kvm/book3s_hv_rm_xics.c | 1 + 6 files changed, 49 insertions(+) (limited to 'arch/powerpc') diff --git a/arch/powerpc/include/asm/asm-prototypes.h b/arch/powerpc/include/asm/asm-prototypes.h index d1492736d852..6c853bcd11fa 100644 --- a/arch/powerpc/include/asm/asm-prototypes.h +++ b/arch/powerpc/include/asm/asm-prototypes.h @@ -14,6 +14,9 @@ #include #include +#ifdef CONFIG_KVM +#include +#endif #include @@ -109,4 +112,45 @@ void early_setup_secondary(void); /* time */ void accumulate_stolen_time(void); +/* kvm */ +#ifdef CONFIG_KVM +long kvmppc_rm_h_put_tce(struct kvm_vcpu *vcpu, unsigned long liobn, + unsigned long ioba, unsigned long tce); +long kvmppc_rm_h_put_tce_indirect(struct kvm_vcpu *vcpu, + unsigned long liobn, unsigned long ioba, + unsigned long tce_list, unsigned long npages); +long kvmppc_rm_h_stuff_tce(struct kvm_vcpu *vcpu, + unsigned long liobn, unsigned long ioba, + unsigned long tce_value, unsigned long npages); +long int kvmppc_rm_h_confer(struct kvm_vcpu *vcpu, int target, + unsigned int yield_count); +long kvmppc_h_random(struct kvm_vcpu *vcpu); +void kvmhv_commence_exit(int trap); +long kvmppc_realmode_machine_check(struct kvm_vcpu *vcpu); +void kvmppc_subcore_enter_guest(void); +void kvmppc_subcore_exit_guest(void); +long kvmppc_realmode_hmi_handler(void); +long kvmppc_h_enter(struct kvm_vcpu *vcpu, unsigned long flags, + long pte_index, unsigned long pteh, unsigned long ptel); +long kvmppc_h_remove(struct kvm_vcpu *vcpu, unsigned long flags, + unsigned long pte_index, unsigned long avpn); +long kvmppc_h_bulk_remove(struct kvm_vcpu *vcpu); +long kvmppc_h_protect(struct kvm_vcpu *vcpu, unsigned long flags, + unsigned long pte_index, unsigned long avpn, + unsigned long va); +long kvmppc_h_read(struct kvm_vcpu *vcpu, unsigned long flags, + unsigned long pte_index); +long kvmppc_h_clear_ref(struct kvm_vcpu *vcpu, unsigned long flags, + unsigned long pte_index); +long kvmppc_h_clear_mod(struct kvm_vcpu *vcpu, unsigned long flags, + unsigned long pte_index); +long kvmppc_hpte_hv_fault(struct kvm_vcpu *vcpu, unsigned long addr, + unsigned long slb_v, unsigned int status, bool data); +unsigned long kvmppc_rm_h_xirr(struct kvm_vcpu *vcpu); +int kvmppc_rm_h_ipi(struct kvm_vcpu *vcpu, unsigned long server, + unsigned long mfrr); +int kvmppc_rm_h_cppr(struct kvm_vcpu *vcpu, unsigned long cppr); +int kvmppc_rm_h_eoi(struct kvm_vcpu *vcpu, unsigned long xirr); +#endif + #endif /* _ASM_POWERPC_ASM_PROTOTYPES_H */ diff --git a/arch/powerpc/kvm/book3s_64_vio_hv.c b/arch/powerpc/kvm/book3s_64_vio_hv.c index d461c440889a..30f83cf1b98e 100644 --- a/arch/powerpc/kvm/book3s_64_vio_hv.c +++ b/arch/powerpc/kvm/book3s_64_vio_hv.c @@ -40,6 +40,7 @@ #include #include #include +#include #define TCES_PER_PAGE (PAGE_SIZE / sizeof(u64)) diff --git a/arch/powerpc/kvm/book3s_hv_builtin.c b/arch/powerpc/kvm/book3s_hv_builtin.c index 0c84d6bc8356..90a0b274e699 100644 --- a/arch/powerpc/kvm/book3s_hv_builtin.c +++ b/arch/powerpc/kvm/book3s_hv_builtin.c @@ -26,6 +26,7 @@ #include #include #include +#include #define KVM_CMA_CHUNK_ORDER 18 diff --git a/arch/powerpc/kvm/book3s_hv_ras.c b/arch/powerpc/kvm/book3s_hv_ras.c index 0fa70a9618d7..be1cee5dc032 100644 --- a/arch/powerpc/kvm/book3s_hv_ras.c +++ b/arch/powerpc/kvm/book3s_hv_ras.c @@ -16,6 +16,7 @@ #include #include #include +#include /* SRR1 bits for machine check on POWER7 */ #define SRR1_MC_LDSTERR (1ul << (63-42)) diff --git a/arch/powerpc/kvm/book3s_hv_rm_mmu.c b/arch/powerpc/kvm/book3s_hv_rm_mmu.c index 99b4e9d5dd23..6b3d01b024d7 100644 --- a/arch/powerpc/kvm/book3s_hv_rm_mmu.c +++ b/arch/powerpc/kvm/book3s_hv_rm_mmu.c @@ -21,6 +21,7 @@ #include #include #include +#include /* Translate address of a vmalloc'd thing to a linear map address */ static void *real_vmalloc_addr(void *x) diff --git a/arch/powerpc/kvm/book3s_hv_rm_xics.c b/arch/powerpc/kvm/book3s_hv_rm_xics.c index a0ea63ac2b52..f2d4487b137e 100644 --- a/arch/powerpc/kvm/book3s_hv_rm_xics.c +++ b/arch/powerpc/kvm/book3s_hv_rm_xics.c @@ -24,6 +24,7 @@ #include #include #include +#include #include "book3s_xics.h" -- cgit v1.2.3-59-g8ed1b From 28d057c8970d394fe048f0b2b9f203889110f165 Mon Sep 17 00:00:00 2001 From: Wei Yongjun Date: Mon, 17 Oct 2016 15:15:50 +0000 Subject: KVM: PPC: Book3S HV: Use list_move_tail instead of list_del/list_add_tail Using list_move_tail() instead of list_del() + list_add_tail(). Signed-off-by: Wei Yongjun Signed-off-by: Paul Mackerras --- arch/powerpc/kvm/book3s_hv.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) (limited to 'arch/powerpc') diff --git a/arch/powerpc/kvm/book3s_hv.c b/arch/powerpc/kvm/book3s_hv.c index 3686471be32b..415943198e0f 100644 --- a/arch/powerpc/kvm/book3s_hv.c +++ b/arch/powerpc/kvm/book3s_hv.c @@ -2123,8 +2123,7 @@ static bool can_dynamic_split(struct kvmppc_vcore *vc, struct core_info *cip) cip->subcore_threads[sub] = vc->num_threads; cip->subcore_vm[sub] = vc->kvm; init_master_vcore(vc); - list_del(&vc->preempt_list); - list_add_tail(&vc->preempt_list, &cip->vcs[sub]); + list_move_tail(&vc->preempt_list, &cip->vcs[sub]); return true; } -- cgit v1.2.3-59-g8ed1b From f05859827d28bde311a92e0bb5c1b6a92c305442 Mon Sep 17 00:00:00 2001 From: Yongji Xie Date: Fri, 4 Nov 2016 13:55:11 +0800 Subject: KVM: PPC: Book3S HV: Clear the key field of HPTE when the page is paged out Currently we mark a HPTE for emulated MMIO with HPTE_V_ABSENT bit set as well as key 0x1f. However, those HPTEs may be conflicted with the HPTE for real guest RAM page HPTE with key 0x1f when the page get paged out. This patch clears the key field of HPTE when the page is paged out, then recover it when HPTE is re-established. Signed-off-by: Yongji Xie Signed-off-by: Paul Mackerras --- arch/powerpc/kvm/book3s_64_mmu_hv.c | 4 +++- arch/powerpc/kvm/book3s_hv_rm_mmu.c | 5 ++++- 2 files changed, 7 insertions(+), 2 deletions(-) (limited to 'arch/powerpc') diff --git a/arch/powerpc/kvm/book3s_64_mmu_hv.c b/arch/powerpc/kvm/book3s_64_mmu_hv.c index 95abca69b168..33a7d1f9167b 100644 --- a/arch/powerpc/kvm/book3s_64_mmu_hv.c +++ b/arch/powerpc/kvm/book3s_64_mmu_hv.c @@ -575,7 +575,8 @@ int kvmppc_book3s_hv_page_fault(struct kvm_run *run, struct kvm_vcpu *vcpu, */ if (psize < PAGE_SIZE) psize = PAGE_SIZE; - r = (r & ~(HPTE_R_PP0 - psize)) | ((pfn << PAGE_SHIFT) & ~(psize - 1)); + r = (r & HPTE_R_KEY_HI) | (r & ~(HPTE_R_PP0 - psize)) | + ((pfn << PAGE_SHIFT) & ~(psize - 1)); if (hpte_is_writable(r) && !write_ok) r = hpte_make_readonly(r); ret = RESUME_GUEST; @@ -758,6 +759,7 @@ static int kvm_unmap_rmapp(struct kvm *kvm, unsigned long *rmapp, hpte_rpn(ptel, psize) == gfn) { hptep[0] |= cpu_to_be64(HPTE_V_ABSENT); kvmppc_invalidate_hpte(kvm, hptep, i); + hptep[1] &= ~cpu_to_be64(HPTE_R_KEY_HI | HPTE_R_KEY_LO); /* Harvest R and C */ rcbits = be64_to_cpu(hptep[1]) & (HPTE_R_R | HPTE_R_C); *rmapp |= rcbits << KVMPPC_RMAP_RC_SHIFT; diff --git a/arch/powerpc/kvm/book3s_hv_rm_mmu.c b/arch/powerpc/kvm/book3s_hv_rm_mmu.c index 6b3d01b024d7..e960c831fd15 100644 --- a/arch/powerpc/kvm/book3s_hv_rm_mmu.c +++ b/arch/powerpc/kvm/book3s_hv_rm_mmu.c @@ -265,8 +265,10 @@ long kvmppc_do_h_enter(struct kvm *kvm, unsigned long flags, if (pa) pteh |= HPTE_V_VALID; - else + else { pteh |= HPTE_V_ABSENT; + ptel &= ~(HPTE_R_KEY_HI | HPTE_R_KEY_LO); + } /*If we had host pte mapping then Check WIMG */ if (ptep && !hpte_cache_flags_ok(ptel, is_ci)) { @@ -352,6 +354,7 @@ long kvmppc_do_h_enter(struct kvm *kvm, unsigned long flags, /* inval in progress, write a non-present HPTE */ pteh |= HPTE_V_ABSENT; pteh &= ~HPTE_V_VALID; + ptel &= ~(HPTE_R_KEY_HI | HPTE_R_KEY_LO); unlock_rmap(rmap); } else { kvmppc_add_revmap_chain(kvm, rev, rmap, pte_index, -- cgit v1.2.3-59-g8ed1b From a56ee9f8f01c5a11ced541f00c67646336f402b6 Mon Sep 17 00:00:00 2001 From: Yongji Xie Date: Fri, 4 Nov 2016 13:55:12 +0800 Subject: KVM: PPC: Book3S HV: Add a per vcpu cache for recently page faulted MMIO entries This keeps a per vcpu cache for recently page faulted MMIO entries. On a page fault, if the entry exists in the cache, we can avoid some time-consuming paths, for example, looking up HPT, locking HPTE twice and searching mmio gfn from memslots, then directly call kvmppc_hv_emulate_mmio(). In current implenment, we limit the size of cache to four. We think it's enough to cover the high-frequency MMIO HPTEs in most case. For example, considering the case of using virtio device, for virtio legacy devices, one HPTE could handle notifications from up to 1024 (64K page / 64 byte Port IO register) devices, so one cache entry is enough; for virtio modern devices, we always need one HPTE to handle notification for each device because modern device would use a 8M MMIO register to notify host instead of Port IO register, typically the system's configuration should not exceed four virtio devices per vcpu, four cache entry is also enough in this case. Of course, if needed, we could also modify the macro to a module parameter in the future. Signed-off-by: Yongji Xie Signed-off-by: Paul Mackerras --- arch/powerpc/include/asm/kvm_host.h | 21 +++++++ arch/powerpc/kvm/book3s_64_mmu_hv.c | 16 +++++ arch/powerpc/kvm/book3s_hv.c | 9 +++ arch/powerpc/kvm/book3s_hv_rm_mmu.c | 117 ++++++++++++++++++++++++++++++------ 4 files changed, 144 insertions(+), 19 deletions(-) (limited to 'arch/powerpc') diff --git a/arch/powerpc/include/asm/kvm_host.h b/arch/powerpc/include/asm/kvm_host.h index 28350a294b1e..20ef27d09d05 100644 --- a/arch/powerpc/include/asm/kvm_host.h +++ b/arch/powerpc/include/asm/kvm_host.h @@ -246,6 +246,7 @@ struct kvm_arch { #ifdef CONFIG_KVM_BOOK3S_HV_POSSIBLE unsigned long hpt_virt; struct revmap_entry *revmap; + atomic64_t mmio_update; unsigned int host_lpid; unsigned long host_lpcr; unsigned long sdr1; @@ -408,6 +409,24 @@ struct kvmppc_passthru_irqmap { #define KVMPPC_IRQ_MPIC 1 #define KVMPPC_IRQ_XICS 2 +#define MMIO_HPTE_CACHE_SIZE 4 + +struct mmio_hpte_cache_entry { + unsigned long hpte_v; + unsigned long hpte_r; + unsigned long rpte; + unsigned long pte_index; + unsigned long eaddr; + unsigned long slb_v; + long mmio_update; + unsigned int slb_base_pshift; +}; + +struct mmio_hpte_cache { + struct mmio_hpte_cache_entry entry[MMIO_HPTE_CACHE_SIZE]; + unsigned int index; +}; + struct openpic; struct kvm_vcpu_arch { @@ -655,9 +674,11 @@ struct kvm_vcpu_arch { #ifdef CONFIG_KVM_BOOK3S_HV_POSSIBLE struct kvm_vcpu_arch_shared shregs; + struct mmio_hpte_cache mmio_cache; unsigned long pgfault_addr; long pgfault_index; unsigned long pgfault_hpte[2]; + struct mmio_hpte_cache_entry *pgfault_cache; struct task_struct *run_task; struct kvm_run *kvm_run; diff --git a/arch/powerpc/kvm/book3s_64_mmu_hv.c b/arch/powerpc/kvm/book3s_64_mmu_hv.c index 33a7d1f9167b..564ae8aa1961 100644 --- a/arch/powerpc/kvm/book3s_64_mmu_hv.c +++ b/arch/powerpc/kvm/book3s_64_mmu_hv.c @@ -88,6 +88,8 @@ long kvmppc_alloc_hpt(struct kvm *kvm, u32 *htab_orderp) /* 128 (2**7) bytes in each HPTEG */ kvm->arch.hpt_mask = (1ul << (order - 7)) - 1; + atomic64_set(&kvm->arch.mmio_update, 0); + /* Allocate reverse map array */ rev = vmalloc(sizeof(struct revmap_entry) * kvm->arch.hpt_npte); if (!rev) { @@ -451,6 +453,7 @@ int kvmppc_book3s_hv_page_fault(struct kvm_run *run, struct kvm_vcpu *vcpu, unsigned int writing, write_ok; struct vm_area_struct *vma; unsigned long rcbits; + long mmio_update; /* * Real-mode code has already searched the HPT and found the @@ -460,6 +463,19 @@ int kvmppc_book3s_hv_page_fault(struct kvm_run *run, struct kvm_vcpu *vcpu, */ if (ea != vcpu->arch.pgfault_addr) return RESUME_GUEST; + + if (vcpu->arch.pgfault_cache) { + mmio_update = atomic64_read(&kvm->arch.mmio_update); + if (mmio_update == vcpu->arch.pgfault_cache->mmio_update) { + r = vcpu->arch.pgfault_cache->rpte; + psize = hpte_page_size(vcpu->arch.pgfault_hpte[0], r); + gpa_base = r & HPTE_R_RPN & ~(psize - 1); + gfn_base = gpa_base >> PAGE_SHIFT; + gpa = gpa_base | (ea & (psize - 1)); + return kvmppc_hv_emulate_mmio(run, vcpu, gpa, ea, + dsisr & DSISR_ISSTORE); + } + } index = vcpu->arch.pgfault_index; hptep = (__be64 *)(kvm->arch.hpt_virt + (index << 4)); rev = &kvm->arch.revmap[index]; diff --git a/arch/powerpc/kvm/book3s_hv.c b/arch/powerpc/kvm/book3s_hv.c index 415943198e0f..320a7981c906 100644 --- a/arch/powerpc/kvm/book3s_hv.c +++ b/arch/powerpc/kvm/book3s_hv.c @@ -2970,6 +2970,15 @@ static void kvmppc_core_commit_memory_region_hv(struct kvm *kvm, struct kvm_memslots *slots; struct kvm_memory_slot *memslot; + /* + * If we are making a new memslot, it might make + * some address that was previously cached as emulated + * MMIO be no longer emulated MMIO, so invalidate + * all the caches of emulated MMIO translations. + */ + if (npages) + atomic64_inc(&kvm->arch.mmio_update); + if (npages && old->npages) { /* * If modifying a memslot, reset all the rmap dirty bits. diff --git a/arch/powerpc/kvm/book3s_hv_rm_mmu.c b/arch/powerpc/kvm/book3s_hv_rm_mmu.c index e960c831fd15..65902e94f4bd 100644 --- a/arch/powerpc/kvm/book3s_hv_rm_mmu.c +++ b/arch/powerpc/kvm/book3s_hv_rm_mmu.c @@ -390,6 +390,13 @@ long kvmppc_h_enter(struct kvm_vcpu *vcpu, unsigned long flags, #define LOCK_TOKEN (*(u32 *)(&get_paca()->paca_index)) #endif +static inline int is_mmio_hpte(unsigned long v, unsigned long r) +{ + return ((v & HPTE_V_ABSENT) && + (r & (HPTE_R_KEY_HI | HPTE_R_KEY_LO)) == + (HPTE_R_KEY_HI | HPTE_R_KEY_LO)); +} + static inline int try_lock_tlbie(unsigned int *lock) { unsigned int tmp, old; @@ -456,6 +463,7 @@ long kvmppc_do_h_remove(struct kvm *kvm, unsigned long flags, rev = real_vmalloc_addr(&kvm->arch.revmap[pte_index]); v = pte & ~HPTE_V_HVLOCK; + pte = be64_to_cpu(hpte[1]); if (v & HPTE_V_VALID) { hpte[0] &= ~cpu_to_be64(HPTE_V_VALID); rb = compute_tlbie_rb(v, be64_to_cpu(hpte[1]), pte_index); @@ -476,6 +484,9 @@ long kvmppc_do_h_remove(struct kvm *kvm, unsigned long flags, note_hpte_modification(kvm, rev); unlock_hpte(hpte, 0); + if (is_mmio_hpte(v, pte)) + atomic64_inc(&kvm->arch.mmio_update); + if (v & HPTE_V_ABSENT) v = (v & ~HPTE_V_ABSENT) | HPTE_V_VALID; hpret[0] = v; @@ -502,7 +513,7 @@ long kvmppc_h_bulk_remove(struct kvm_vcpu *vcpu) int global; long int ret = H_SUCCESS; struct revmap_entry *rev, *revs[4]; - u64 hp0; + u64 hp0, hp1; global = global_invalidates(kvm, 0); for (i = 0; i < 4 && ret == H_SUCCESS; ) { @@ -535,6 +546,7 @@ long kvmppc_h_bulk_remove(struct kvm_vcpu *vcpu) } found = 0; hp0 = be64_to_cpu(hp[0]); + hp1 = be64_to_cpu(hp[1]); if (hp0 & (HPTE_V_ABSENT | HPTE_V_VALID)) { switch (flags & 3) { case 0: /* absolute */ @@ -565,6 +577,8 @@ long kvmppc_h_bulk_remove(struct kvm_vcpu *vcpu) rcbits = rev->guest_rpte & (HPTE_R_R|HPTE_R_C); args[j] |= rcbits << (56 - 5); hp[0] = 0; + if (is_mmio_hpte(hp0, hp1)) + atomic64_inc(&kvm->arch.mmio_update); continue; } @@ -625,6 +639,7 @@ long kvmppc_h_protect(struct kvm_vcpu *vcpu, unsigned long flags, } v = pte; + pte = be64_to_cpu(hpte[1]); bits = (flags << 55) & HPTE_R_PP0; bits |= (flags << 48) & HPTE_R_KEY_HI; bits |= flags & (HPTE_R_PP | HPTE_R_N | HPTE_R_KEY_LO); @@ -646,7 +661,6 @@ long kvmppc_h_protect(struct kvm_vcpu *vcpu, unsigned long flags, * readonly to writable. If it should be writable, we'll * take a trap and let the page fault code sort it out. */ - pte = be64_to_cpu(hpte[1]); r = (pte & ~mask) | bits; if (hpte_is_writable(r) && !hpte_is_writable(pte)) r = hpte_make_readonly(r); @@ -662,6 +676,9 @@ long kvmppc_h_protect(struct kvm_vcpu *vcpu, unsigned long flags, } unlock_hpte(hpte, v & ~HPTE_V_HVLOCK); asm volatile("ptesync" : : : "memory"); + if (is_mmio_hpte(v, pte)) + atomic64_inc(&kvm->arch.mmio_update); + return H_SUCCESS; } @@ -832,6 +849,37 @@ static int slb_base_page_shift[4] = { 20, /* 1M, unsupported */ }; +static struct mmio_hpte_cache_entry *mmio_cache_search(struct kvm_vcpu *vcpu, + unsigned long eaddr, unsigned long slb_v, long mmio_update) +{ + struct mmio_hpte_cache_entry *entry = NULL; + unsigned int pshift; + unsigned int i; + + for (i = 0; i < MMIO_HPTE_CACHE_SIZE; i++) { + entry = &vcpu->arch.mmio_cache.entry[i]; + if (entry->mmio_update == mmio_update) { + pshift = entry->slb_base_pshift; + if ((entry->eaddr >> pshift) == (eaddr >> pshift) && + entry->slb_v == slb_v) + return entry; + } + } + return NULL; +} + +static struct mmio_hpte_cache_entry * + next_mmio_cache_entry(struct kvm_vcpu *vcpu) +{ + unsigned int index = vcpu->arch.mmio_cache.index; + + vcpu->arch.mmio_cache.index++; + if (vcpu->arch.mmio_cache.index == MMIO_HPTE_CACHE_SIZE) + vcpu->arch.mmio_cache.index = 0; + + return &vcpu->arch.mmio_cache.entry[index]; +} + /* When called from virtmode, this func should be protected by * preempt_disable(), otherwise, the holding of HPTE_V_HVLOCK * can trigger deadlock issue. @@ -933,25 +981,36 @@ long kvmppc_hpte_hv_fault(struct kvm_vcpu *vcpu, unsigned long addr, unsigned long valid; struct revmap_entry *rev; unsigned long pp, key; + struct mmio_hpte_cache_entry *cache_entry = NULL; + long mmio_update = 0; /* For protection fault, expect to find a valid HPTE */ valid = HPTE_V_VALID; - if (status & DSISR_NOHPTE) + if (status & DSISR_NOHPTE) { valid |= HPTE_V_ABSENT; - - index = kvmppc_hv_find_lock_hpte(kvm, addr, slb_v, valid); - if (index < 0) { - if (status & DSISR_NOHPTE) - return status; /* there really was no HPTE */ - return 0; /* for prot fault, HPTE disappeared */ + mmio_update = atomic64_read(&kvm->arch.mmio_update); + cache_entry = mmio_cache_search(vcpu, addr, slb_v, mmio_update); } - hpte = (__be64 *)(kvm->arch.hpt_virt + (index << 4)); - v = be64_to_cpu(hpte[0]) & ~HPTE_V_HVLOCK; - r = be64_to_cpu(hpte[1]); - rev = real_vmalloc_addr(&kvm->arch.revmap[index]); - gr = rev->guest_rpte; + if (cache_entry) { + index = cache_entry->pte_index; + v = cache_entry->hpte_v; + r = cache_entry->hpte_r; + gr = cache_entry->rpte; + } else { + index = kvmppc_hv_find_lock_hpte(kvm, addr, slb_v, valid); + if (index < 0) { + if (status & DSISR_NOHPTE) + return status; /* there really was no HPTE */ + return 0; /* for prot fault, HPTE disappeared */ + } + hpte = (__be64 *)(kvm->arch.hpt_virt + (index << 4)); + v = be64_to_cpu(hpte[0]) & ~HPTE_V_HVLOCK; + r = be64_to_cpu(hpte[1]); + rev = real_vmalloc_addr(&kvm->arch.revmap[index]); + gr = rev->guest_rpte; - unlock_hpte(hpte, v); + unlock_hpte(hpte, v); + } /* For not found, if the HPTE is valid by now, retry the instruction */ if ((status & DSISR_NOHPTE) && (v & HPTE_V_VALID)) @@ -989,12 +1048,32 @@ long kvmppc_hpte_hv_fault(struct kvm_vcpu *vcpu, unsigned long addr, vcpu->arch.pgfault_index = index; vcpu->arch.pgfault_hpte[0] = v; vcpu->arch.pgfault_hpte[1] = r; + vcpu->arch.pgfault_cache = cache_entry; /* Check the storage key to see if it is possibly emulated MMIO */ - if (data && (vcpu->arch.shregs.msr & MSR_IR) && - (r & (HPTE_R_KEY_HI | HPTE_R_KEY_LO)) == - (HPTE_R_KEY_HI | HPTE_R_KEY_LO)) - return -2; /* MMIO emulation - load instr word */ + if ((r & (HPTE_R_KEY_HI | HPTE_R_KEY_LO)) == + (HPTE_R_KEY_HI | HPTE_R_KEY_LO)) { + if (!cache_entry) { + unsigned int pshift = 12; + unsigned int pshift_index; + + if (slb_v & SLB_VSID_L) { + pshift_index = ((slb_v & SLB_VSID_LP) >> 4); + pshift = slb_base_page_shift[pshift_index]; + } + cache_entry = next_mmio_cache_entry(vcpu); + cache_entry->eaddr = addr; + cache_entry->slb_base_pshift = pshift; + cache_entry->pte_index = index; + cache_entry->hpte_v = v; + cache_entry->hpte_r = r; + cache_entry->rpte = gr; + cache_entry->slb_v = slb_v; + cache_entry->mmio_update = mmio_update; + } + if (data && (vcpu->arch.shregs.msr & MSR_IR)) + return -2; /* MMIO emulation - load instr word */ + } return -1; /* send fault up to host kernel mode */ } -- cgit v1.2.3-59-g8ed1b From 0d808df06a44200f52262b6eb72bcb6042f5a7c5 Mon Sep 17 00:00:00 2001 From: Paul Mackerras Date: Mon, 7 Nov 2016 15:09:58 +1100 Subject: KVM: PPC: Book3S HV: Save/restore XER in checkpointed register state When switching from/to a guest that has a transaction in progress, we need to save/restore the checkpointed register state. Although XER is part of the CPU state that gets checkpointed, the code that does this saving and restoring doesn't save/restore XER. This fixes it by saving and restoring the XER. To allow userspace to read/write the checkpointed XER value, we also add a new ONE_REG specifier. The visible effect of this bug is that the guest may see its XER value being corrupted when it uses transactions. Fixes: e4e38121507a ("KVM: PPC: Book3S HV: Add transactional memory support") Fixes: 0a8eccefcb34 ("KVM: PPC: Book3S HV: Add missing code for transaction reclaim on guest exit") Cc: stable@vger.kernel.org # v3.15+ Signed-off-by: Paul Mackerras Reviewed-by: Thomas Huth Signed-off-by: Paul Mackerras --- Documentation/virtual/kvm/api.txt | 1 + arch/powerpc/include/asm/kvm_host.h | 1 + arch/powerpc/include/uapi/asm/kvm.h | 1 + arch/powerpc/kernel/asm-offsets.c | 1 + arch/powerpc/kvm/book3s_hv.c | 6 ++++++ arch/powerpc/kvm/book3s_hv_rmhandlers.S | 4 ++++ 6 files changed, 14 insertions(+) (limited to 'arch/powerpc') diff --git a/Documentation/virtual/kvm/api.txt b/Documentation/virtual/kvm/api.txt index 739db9ab16b2..a7596e9fdf06 100644 --- a/Documentation/virtual/kvm/api.txt +++ b/Documentation/virtual/kvm/api.txt @@ -2039,6 +2039,7 @@ registers, find a list below: PPC | KVM_REG_PPC_TM_VSCR | 32 PPC | KVM_REG_PPC_TM_DSCR | 64 PPC | KVM_REG_PPC_TM_TAR | 64 + PPC | KVM_REG_PPC_TM_XER | 64 | | MIPS | KVM_REG_MIPS_R0 | 64 ... diff --git a/arch/powerpc/include/asm/kvm_host.h b/arch/powerpc/include/asm/kvm_host.h index 20ef27d09d05..0e584ee57730 100644 --- a/arch/powerpc/include/asm/kvm_host.h +++ b/arch/powerpc/include/asm/kvm_host.h @@ -565,6 +565,7 @@ struct kvm_vcpu_arch { u64 tfiar; u32 cr_tm; + u64 xer_tm; u64 lr_tm; u64 ctr_tm; u64 amr_tm; diff --git a/arch/powerpc/include/uapi/asm/kvm.h b/arch/powerpc/include/uapi/asm/kvm.h index c93cf35ce379..0fb1326c3ea2 100644 --- a/arch/powerpc/include/uapi/asm/kvm.h +++ b/arch/powerpc/include/uapi/asm/kvm.h @@ -596,6 +596,7 @@ struct kvm_get_htab_header { #define KVM_REG_PPC_TM_VSCR (KVM_REG_PPC_TM | KVM_REG_SIZE_U32 | 0x67) #define KVM_REG_PPC_TM_DSCR (KVM_REG_PPC_TM | KVM_REG_SIZE_U64 | 0x68) #define KVM_REG_PPC_TM_TAR (KVM_REG_PPC_TM | KVM_REG_SIZE_U64 | 0x69) +#define KVM_REG_PPC_TM_XER (KVM_REG_PPC_TM | KVM_REG_SIZE_U64 | 0x6a) /* PPC64 eXternal Interrupt Controller Specification */ #define KVM_DEV_XICS_GRP_SOURCES 1 /* 64-bit source attributes */ diff --git a/arch/powerpc/kernel/asm-offsets.c b/arch/powerpc/kernel/asm-offsets.c index caec7bf3b99a..c833d88c423d 100644 --- a/arch/powerpc/kernel/asm-offsets.c +++ b/arch/powerpc/kernel/asm-offsets.c @@ -569,6 +569,7 @@ int main(void) DEFINE(VCPU_VRS_TM, offsetof(struct kvm_vcpu, arch.vr_tm.vr)); DEFINE(VCPU_VRSAVE_TM, offsetof(struct kvm_vcpu, arch.vrsave_tm)); DEFINE(VCPU_CR_TM, offsetof(struct kvm_vcpu, arch.cr_tm)); + DEFINE(VCPU_XER_TM, offsetof(struct kvm_vcpu, arch.xer_tm)); DEFINE(VCPU_LR_TM, offsetof(struct kvm_vcpu, arch.lr_tm)); DEFINE(VCPU_CTR_TM, offsetof(struct kvm_vcpu, arch.ctr_tm)); DEFINE(VCPU_AMR_TM, offsetof(struct kvm_vcpu, arch.amr_tm)); diff --git a/arch/powerpc/kvm/book3s_hv.c b/arch/powerpc/kvm/book3s_hv.c index 320a7981c906..02f9aedbc258 100644 --- a/arch/powerpc/kvm/book3s_hv.c +++ b/arch/powerpc/kvm/book3s_hv.c @@ -1288,6 +1288,9 @@ static int kvmppc_get_one_reg_hv(struct kvm_vcpu *vcpu, u64 id, case KVM_REG_PPC_TM_CR: *val = get_reg_val(id, vcpu->arch.cr_tm); break; + case KVM_REG_PPC_TM_XER: + *val = get_reg_val(id, vcpu->arch.xer_tm); + break; case KVM_REG_PPC_TM_LR: *val = get_reg_val(id, vcpu->arch.lr_tm); break; @@ -1498,6 +1501,9 @@ static int kvmppc_set_one_reg_hv(struct kvm_vcpu *vcpu, u64 id, case KVM_REG_PPC_TM_CR: vcpu->arch.cr_tm = set_reg_val(id, *val); break; + case KVM_REG_PPC_TM_XER: + vcpu->arch.xer_tm = set_reg_val(id, *val); + break; case KVM_REG_PPC_TM_LR: vcpu->arch.lr_tm = set_reg_val(id, *val); break; diff --git a/arch/powerpc/kvm/book3s_hv_rmhandlers.S b/arch/powerpc/kvm/book3s_hv_rmhandlers.S index c3c1d1bcfc67..6f81adb112f1 100644 --- a/arch/powerpc/kvm/book3s_hv_rmhandlers.S +++ b/arch/powerpc/kvm/book3s_hv_rmhandlers.S @@ -2600,11 +2600,13 @@ kvmppc_save_tm: mfctr r7 mfspr r8, SPRN_AMR mfspr r10, SPRN_TAR + mfxer r11 std r5, VCPU_LR_TM(r9) stw r6, VCPU_CR_TM(r9) std r7, VCPU_CTR_TM(r9) std r8, VCPU_AMR_TM(r9) std r10, VCPU_TAR_TM(r9) + std r11, VCPU_XER_TM(r9) /* Restore r12 as trap number. */ lwz r12, VCPU_TRAP(r9) @@ -2697,11 +2699,13 @@ kvmppc_restore_tm: ld r7, VCPU_CTR_TM(r4) ld r8, VCPU_AMR_TM(r4) ld r9, VCPU_TAR_TM(r4) + ld r10, VCPU_XER_TM(r4) mtlr r5 mtcr r6 mtctr r7 mtspr SPRN_AMR, r8 mtspr SPRN_TAR, r9 + mtxer r10 /* * Load up PPR and DSCR values but don't put them in the actual SPRs -- cgit v1.2.3-59-g8ed1b From f064a0de1579fabded8990bed93971e30deb9ecb Mon Sep 17 00:00:00 2001 From: Paul Mackerras Date: Wed, 16 Nov 2016 16:43:28 +1100 Subject: KVM: PPC: Book3S HV: Don't lose hardware R/C bit updates in H_PROTECT The hashed page table MMU in POWER processors can update the R (reference) and C (change) bits in a HPTE at any time until the HPTE has been invalidated and the TLB invalidation sequence has completed. In kvmppc_h_protect, which implements the H_PROTECT hypercall, we read the HPTE, modify the second doubleword, invalidate the HPTE in memory, do the TLB invalidation sequence, and then write the modified value of the second doubleword back to memory. In doing so we could overwrite an R/C bit update done by hardware between when we read the HPTE and when the TLB invalidation completed. To fix this we re-read the second doubleword after the TLB invalidation and OR in the (possibly) new values of R and C. We can use an OR since hardware only ever sets R and C, never clears them. This race was found by code inspection. In principle this bug could cause occasional guest memory corruption under host memory pressure. Fixes: a8606e20e41a ("KVM: PPC: Handle some PAPR hcalls in the kernel", 2011-06-29) Cc: stable@vger.kernel.org # v3.19+ Signed-off-by: Paul Mackerras --- arch/powerpc/kvm/book3s_hv_rm_mmu.c | 2 ++ 1 file changed, 2 insertions(+) (limited to 'arch/powerpc') diff --git a/arch/powerpc/kvm/book3s_hv_rm_mmu.c b/arch/powerpc/kvm/book3s_hv_rm_mmu.c index 65902e94f4bd..0e7fca413a5d 100644 --- a/arch/powerpc/kvm/book3s_hv_rm_mmu.c +++ b/arch/powerpc/kvm/book3s_hv_rm_mmu.c @@ -671,6 +671,8 @@ long kvmppc_h_protect(struct kvm_vcpu *vcpu, unsigned long flags, HPTE_V_ABSENT); do_tlbies(kvm, &rb, 1, global_invalidates(kvm, flags), true); + /* Don't lose R/C bit updates done by hardware */ + r |= be64_to_cpu(hpte[1]) & (HPTE_R_R | HPTE_R_C); hpte[1] = cpu_to_be64(r); } } -- cgit v1.2.3-59-g8ed1b From 68b8b72bb221f6d3d14b7fcc9c6991121b6a06ba Mon Sep 17 00:00:00 2001 From: Geliang Tang Date: Fri, 18 Nov 2016 22:21:14 +0800 Subject: KVM: PPC: Book3S HV: Drop duplicate header asm/iommu.h Drop duplicate header asm/iommu.h from book3s_64_vio_hv.c. Signed-off-by: Geliang Tang Signed-off-by: Paul Mackerras --- arch/powerpc/kvm/book3s_64_vio_hv.c | 1 - 1 file changed, 1 deletion(-) (limited to 'arch/powerpc') diff --git a/arch/powerpc/kvm/book3s_64_vio_hv.c b/arch/powerpc/kvm/book3s_64_vio_hv.c index 30f83cf1b98e..66e98276d93e 100644 --- a/arch/powerpc/kvm/book3s_64_vio_hv.c +++ b/arch/powerpc/kvm/book3s_64_vio_hv.c @@ -39,7 +39,6 @@ #include #include #include -#include #include #define TCES_PER_PAGE (PAGE_SIZE / sizeof(u64)) -- cgit v1.2.3-59-g8ed1b From 7fd317f8c330a8d3ed6468d5670e5c09c25846e2 Mon Sep 17 00:00:00 2001 From: Paul Mackerras Date: Mon, 21 Nov 2016 16:00:23 +1100 Subject: powerpc/64: Add some more SPRs and SPR bits for POWER9 These definitions will be needed by KVM. Signed-off-by: Paul Mackerras Signed-off-by: Michael Ellerman --- arch/powerpc/include/asm/reg.h | 5 +++++ 1 file changed, 5 insertions(+) (limited to 'arch/powerpc') diff --git a/arch/powerpc/include/asm/reg.h b/arch/powerpc/include/asm/reg.h index 9cd4e8cbc78c..df81411a85ea 100644 --- a/arch/powerpc/include/asm/reg.h +++ b/arch/powerpc/include/asm/reg.h @@ -153,6 +153,8 @@ #define PSSCR_EC 0x00100000 /* Exit Criterion */ #define PSSCR_ESL 0x00200000 /* Enable State Loss */ #define PSSCR_SD 0x00400000 /* Status Disable */ +#define PSSCR_PLS 0xf000000000000000 /* Power-saving Level Status */ +#define PSSCR_GUEST_VIS 0xf0000000000003ff /* Guest-visible PSSCR fields */ /* Floating Point Status and Control Register (FPSCR) Fields */ #define FPSCR_FX 0x80000000 /* FPU exception summary */ @@ -236,6 +238,7 @@ #define SPRN_TEXASRU 0x83 /* '' '' '' Upper 32 */ #define TEXASR_FS __MASK(63-36) /* TEXASR Failure Summary */ #define SPRN_TFHAR 0x80 /* Transaction Failure Handler Addr */ +#define SPRN_TIDR 144 /* Thread ID register */ #define SPRN_CTRLF 0x088 #define SPRN_CTRLT 0x098 #define CTRL_CT 0xc0000000 /* current thread */ @@ -294,6 +297,7 @@ #define SPRN_HSRR1 0x13B /* Hypervisor Save/Restore 1 */ #define SPRN_LMRR 0x32D /* Load Monitor Region Register */ #define SPRN_LMSER 0x32E /* Load Monitor Section Enable Register */ +#define SPRN_ASDR 0x330 /* Access segment descriptor register */ #define SPRN_IC 0x350 /* Virtual Instruction Count */ #define SPRN_VTB 0x351 /* Virtual Time Base */ #define SPRN_LDBAR 0x352 /* LD Base Address Register */ @@ -357,6 +361,7 @@ #define LPCR_PECE2 ASM_CONST(0x0000000000001000) /* machine check etc can cause exit */ #define LPCR_MER ASM_CONST(0x0000000000000800) /* Mediated External Exception */ #define LPCR_MER_SH 11 +#define LPCR_GTSE ASM_CONST(0x0000000000000400) /* Guest Translation Shootdown Enable */ #define LPCR_TC ASM_CONST(0x0000000000000200) /* Translation control */ #define LPCR_LPES 0x0000000c #define LPCR_LPES0 ASM_CONST(0x0000000000000008) /* LPAR Env selector 0 */ -- cgit v1.2.3-59-g8ed1b From 9d66195807ac6cb8a14231fd055ff755977c5fca Mon Sep 17 00:00:00 2001 From: Paul Mackerras Date: Mon, 21 Nov 2016 16:00:58 +1100 Subject: powerpc/64: Provide functions for accessing POWER9 partition table POWER9 requires the host to set up a partition table, which is a table in memory indexed by logical partition ID (LPID) which contains the pointers to page tables and process tables for the host and each guest. This factors out the initialization of the partition table into a single function. This code was previously duplicated between hash_utils_64.c and pgtable-radix.c. This provides a function for setting a partition table entry, which is used in early MMU initialization, and will be used by KVM whenever a guest is created. This function includes a tlbie instruction which will flush all TLB entries for the LPID and all caches of the partition table entry for the LPID, across the system. This also moves a call to memblock_set_current_limit(), which was in radix_init_partition_table(), but has nothing to do with the partition table. By analogy with the similar code for hash, the call gets moved to near the end of radix__early_init_mmu(). It now gets called when running as a guest, whereas previously it would only be called if the kernel is running as the host. Signed-off-by: Paul Mackerras Signed-off-by: Michael Ellerman --- arch/powerpc/include/asm/mmu.h | 5 +++++ arch/powerpc/mm/hash_utils_64.c | 28 ++++------------------------ arch/powerpc/mm/pgtable-radix.c | 18 ++++++------------ arch/powerpc/mm/pgtable_64.c | 34 ++++++++++++++++++++++++++++++++++ 4 files changed, 49 insertions(+), 36 deletions(-) (limited to 'arch/powerpc') diff --git a/arch/powerpc/include/asm/mmu.h b/arch/powerpc/include/asm/mmu.h index e88368354e49..060b40b1bc3d 100644 --- a/arch/powerpc/include/asm/mmu.h +++ b/arch/powerpc/include/asm/mmu.h @@ -208,6 +208,11 @@ extern u64 ppc64_rma_size; /* Cleanup function used by kexec */ extern void mmu_cleanup_all(void); extern void radix__mmu_cleanup_all(void); + +/* Functions for creating and updating partition table on POWER9 */ +extern void mmu_partition_table_init(void); +extern void mmu_partition_table_set_entry(unsigned int lpid, unsigned long dw0, + unsigned long dw1); #endif /* CONFIG_PPC64 */ struct mm_struct; diff --git a/arch/powerpc/mm/hash_utils_64.c b/arch/powerpc/mm/hash_utils_64.c index 44d3c3a38e3e..b9a062f5805b 100644 --- a/arch/powerpc/mm/hash_utils_64.c +++ b/arch/powerpc/mm/hash_utils_64.c @@ -792,37 +792,17 @@ static void update_hid_for_hash(void) static void __init hash_init_partition_table(phys_addr_t hash_table, unsigned long htab_size) { - unsigned long ps_field; - unsigned long patb_size = 1UL << PATB_SIZE_SHIFT; + mmu_partition_table_init(); /* - * slb llp encoding for the page size used in VPM real mode. - * We can ignore that for lpid 0 + * PS field (VRMA page size) is not used for LPID 0, hence set to 0. + * For now, UPRT is 0 and we have no segment table. */ - ps_field = 0; htab_size = __ilog2(htab_size) - 18; - - BUILD_BUG_ON_MSG((PATB_SIZE_SHIFT > 24), "Partition table size too large."); - partition_tb = __va(memblock_alloc_base(patb_size, patb_size, - MEMBLOCK_ALLOC_ANYWHERE)); - - /* Initialize the Partition Table with no entries */ - memset((void *)partition_tb, 0, patb_size); - partition_tb->patb0 = cpu_to_be64(ps_field | hash_table | htab_size); - /* - * FIXME!! This should be done via update_partition table - * For now UPRT is 0 for us. - */ - partition_tb->patb1 = 0; + mmu_partition_table_set_entry(0, hash_table | htab_size, 0); pr_info("Partition table %p\n", partition_tb); if (cpu_has_feature(CPU_FTR_POWER9_DD1)) update_hid_for_hash(); - /* - * update partition table control register, - * 64 K size. - */ - mtspr(SPRN_PTCR, __pa(partition_tb) | (PATB_SIZE_SHIFT - 12)); - } static void __init htab_initialize(void) diff --git a/arch/powerpc/mm/pgtable-radix.c b/arch/powerpc/mm/pgtable-radix.c index ed7bddc456b7..186f1adb04ec 100644 --- a/arch/powerpc/mm/pgtable-radix.c +++ b/arch/powerpc/mm/pgtable-radix.c @@ -177,23 +177,15 @@ redo: static void __init radix_init_partition_table(void) { - unsigned long rts_field; + unsigned long rts_field, dw0; + mmu_partition_table_init(); rts_field = radix__get_tree_size(); + dw0 = rts_field | __pa(init_mm.pgd) | RADIX_PGD_INDEX_SIZE | PATB_HR; + mmu_partition_table_set_entry(0, dw0, 0); - BUILD_BUG_ON_MSG((PATB_SIZE_SHIFT > 24), "Partition table size too large."); - partition_tb = early_alloc_pgtable(1UL << PATB_SIZE_SHIFT); - partition_tb->patb0 = cpu_to_be64(rts_field | __pa(init_mm.pgd) | - RADIX_PGD_INDEX_SIZE | PATB_HR); pr_info("Initializing Radix MMU\n"); pr_info("Partition table %p\n", partition_tb); - - memblock_set_current_limit(MEMBLOCK_ALLOC_ANYWHERE); - /* - * update partition table control register, - * 64 K size. - */ - mtspr(SPRN_PTCR, __pa(partition_tb) | (PATB_SIZE_SHIFT - 12)); } void __init radix_init_native(void) @@ -378,6 +370,8 @@ void __init radix__early_init_mmu(void) radix_init_partition_table(); } + memblock_set_current_limit(MEMBLOCK_ALLOC_ANYWHERE); + radix_init_pgtable(); } diff --git a/arch/powerpc/mm/pgtable_64.c b/arch/powerpc/mm/pgtable_64.c index f5e8d4edb808..8bca7f58afc4 100644 --- a/arch/powerpc/mm/pgtable_64.c +++ b/arch/powerpc/mm/pgtable_64.c @@ -431,3 +431,37 @@ void pgtable_free_tlb(struct mmu_gather *tlb, void *table, int shift) } } #endif + +#ifdef CONFIG_PPC_BOOK3S_64 +void __init mmu_partition_table_init(void) +{ + unsigned long patb_size = 1UL << PATB_SIZE_SHIFT; + + BUILD_BUG_ON_MSG((PATB_SIZE_SHIFT > 36), "Partition table size too large."); + partition_tb = __va(memblock_alloc_base(patb_size, patb_size, + MEMBLOCK_ALLOC_ANYWHERE)); + + /* Initialize the Partition Table with no entries */ + memset((void *)partition_tb, 0, patb_size); + + /* + * update partition table control register, + * 64 K size. + */ + mtspr(SPRN_PTCR, __pa(partition_tb) | (PATB_SIZE_SHIFT - 12)); +} + +void mmu_partition_table_set_entry(unsigned int lpid, unsigned long dw0, + unsigned long dw1) +{ + partition_tb[lpid].patb0 = cpu_to_be64(dw0); + partition_tb[lpid].patb1 = cpu_to_be64(dw1); + + /* Global flush of TLBs and partition table caches for this lpid */ + asm volatile("ptesync" : : : "memory"); + asm volatile(PPC_TLBIE_5(%0,%1,2,0,0) : : + "r" (TLBIEL_INVAL_SET_LPID), "r" (lpid)); + asm volatile("eieio; tlbsync; ptesync" : : : "memory"); +} +EXPORT_SYMBOL_GPL(mmu_partition_table_set_entry); +#endif /* CONFIG_PPC_BOOK3S_64 */ -- cgit v1.2.3-59-g8ed1b From ffe6d810fe95208b9f132fb7687930185129305a Mon Sep 17 00:00:00 2001 From: Paul Mackerras Date: Mon, 21 Nov 2016 16:01:36 +1100 Subject: powerpc/powernv: Define real-mode versions of OPAL XICS accessors This defines real-mode versions of opal_int_get_xirr(), opal_int_eoi() and opal_int_set_mfrr(), for use by KVM real-mode code. It also exports opal_int_set_mfrr() so that the modular part of KVM can use it to send IPIs. Signed-off-by: Paul Mackerras Signed-off-by: Michael Ellerman --- arch/powerpc/include/asm/opal.h | 3 +++ arch/powerpc/platforms/powernv/opal-wrappers.S | 3 +++ arch/powerpc/platforms/powernv/opal.c | 2 ++ 3 files changed, 8 insertions(+) (limited to 'arch/powerpc') diff --git a/arch/powerpc/include/asm/opal.h b/arch/powerpc/include/asm/opal.h index e958b7096f19..5c7db0f1a708 100644 --- a/arch/powerpc/include/asm/opal.h +++ b/arch/powerpc/include/asm/opal.h @@ -220,9 +220,12 @@ int64_t opal_pci_set_power_state(uint64_t async_token, uint64_t id, int64_t opal_pci_poll2(uint64_t id, uint64_t data); int64_t opal_int_get_xirr(uint32_t *out_xirr, bool just_poll); +int64_t opal_rm_int_get_xirr(__be32 *out_xirr, bool just_poll); int64_t opal_int_set_cppr(uint8_t cppr); int64_t opal_int_eoi(uint32_t xirr); +int64_t opal_rm_int_eoi(uint32_t xirr); int64_t opal_int_set_mfrr(uint32_t cpu, uint8_t mfrr); +int64_t opal_rm_int_set_mfrr(uint32_t cpu, uint8_t mfrr); int64_t opal_pci_tce_kill(uint64_t phb_id, uint32_t kill_type, uint32_t pe_num, uint32_t tce_size, uint64_t dma_addr, uint32_t npages); diff --git a/arch/powerpc/platforms/powernv/opal-wrappers.S b/arch/powerpc/platforms/powernv/opal-wrappers.S index 44d2d842cee7..3aa40f1b20f5 100644 --- a/arch/powerpc/platforms/powernv/opal-wrappers.S +++ b/arch/powerpc/platforms/powernv/opal-wrappers.S @@ -304,8 +304,11 @@ OPAL_CALL(opal_pci_get_presence_state, OPAL_PCI_GET_PRESENCE_STATE); OPAL_CALL(opal_pci_get_power_state, OPAL_PCI_GET_POWER_STATE); OPAL_CALL(opal_pci_set_power_state, OPAL_PCI_SET_POWER_STATE); OPAL_CALL(opal_int_get_xirr, OPAL_INT_GET_XIRR); +OPAL_CALL_REAL(opal_rm_int_get_xirr, OPAL_INT_GET_XIRR); OPAL_CALL(opal_int_set_cppr, OPAL_INT_SET_CPPR); OPAL_CALL(opal_int_eoi, OPAL_INT_EOI); +OPAL_CALL_REAL(opal_rm_int_eoi, OPAL_INT_EOI); OPAL_CALL(opal_int_set_mfrr, OPAL_INT_SET_MFRR); +OPAL_CALL_REAL(opal_rm_int_set_mfrr, OPAL_INT_SET_MFRR); OPAL_CALL(opal_pci_tce_kill, OPAL_PCI_TCE_KILL); OPAL_CALL_REAL(opal_rm_pci_tce_kill, OPAL_PCI_TCE_KILL); diff --git a/arch/powerpc/platforms/powernv/opal.c b/arch/powerpc/platforms/powernv/opal.c index 6c9a65b52e63..b3b8930ac52f 100644 --- a/arch/powerpc/platforms/powernv/opal.c +++ b/arch/powerpc/platforms/powernv/opal.c @@ -896,3 +896,5 @@ EXPORT_SYMBOL_GPL(opal_leds_get_ind); EXPORT_SYMBOL_GPL(opal_leds_set_ind); /* Export this symbol for PowerNV Operator Panel class driver */ EXPORT_SYMBOL_GPL(opal_write_oppanel_async); +/* Export this for KVM */ +EXPORT_SYMBOL_GPL(opal_int_set_mfrr); -- cgit v1.2.3-59-g8ed1b From 9dd17e8517f5ccd594a01374b0b41ec1a1c266af Mon Sep 17 00:00:00 2001 From: Suraj Jitindar Singh Date: Mon, 21 Nov 2016 16:02:35 +1100 Subject: powerpc/64: Define new ISA v3.00 logical PVR value and PCR register value ISA 3.00 adds the logical PVR value 0x0f000005, so add a definition for this. Define PCR_ARCH_207 to reflect ISA 2.07 compatibility mode in the processor compatibility register (PCR). [paulus@ozlabs.org - moved dummy PCR_ARCH_300 value into next patch] Signed-off-by: Suraj Jitindar Singh Signed-off-by: Paul Mackerras Signed-off-by: Michael Ellerman --- arch/powerpc/include/asm/reg.h | 7 +++++++ 1 file changed, 7 insertions(+) (limited to 'arch/powerpc') diff --git a/arch/powerpc/include/asm/reg.h b/arch/powerpc/include/asm/reg.h index df81411a85ea..30d9adbc32ba 100644 --- a/arch/powerpc/include/asm/reg.h +++ b/arch/powerpc/include/asm/reg.h @@ -382,6 +382,12 @@ #define PCR_VEC_DIS (1ul << (63-0)) /* Vec. disable (bit NA since POWER8) */ #define PCR_VSX_DIS (1ul << (63-1)) /* VSX disable (bit NA since POWER8) */ #define PCR_TM_DIS (1ul << (63-2)) /* Trans. memory disable (POWER8) */ +/* + * These bits are used in the function kvmppc_set_arch_compat() to specify and + * determine both the compatibility level which we want to emulate and the + * compatibility level which the host is capable of emulating. + */ +#define PCR_ARCH_207 0x8 /* Architecture 2.07 */ #define PCR_ARCH_206 0x4 /* Architecture 2.06 */ #define PCR_ARCH_205 0x2 /* Architecture 2.05 */ #define SPRN_HEIR 0x153 /* Hypervisor Emulated Instruction Register */ @@ -1223,6 +1229,7 @@ #define PVR_ARCH_206 0x0f000003 #define PVR_ARCH_206p 0x0f100003 #define PVR_ARCH_207 0x0f000004 +#define PVR_ARCH_300 0x0f000005 /* Macros for setting and retrieving special purpose registers */ #ifndef __ASSEMBLY__ -- cgit v1.2.3-59-g8ed1b From 1f0f2e72270c089c291aac794800cc326c4c05dd Mon Sep 17 00:00:00 2001 From: Michael Ellerman Date: Tue, 22 Nov 2016 14:50:39 +1100 Subject: powerpc/reg: Add definition for LPCR_PECE_HVEE ISA 3.0 defines a new PECE (Power-saving mode Exit Cause Enable) field in the LPCR (Logical Partitioning Control Register), called LPCR_PECE_HVEE (Hypervisor Virtualization Exit Enable). KVM code will need to know about this bit, so add a definition for it. Signed-off-by: Michael Ellerman --- arch/powerpc/include/asm/reg.h | 1 + 1 file changed, 1 insertion(+) (limited to 'arch/powerpc') diff --git a/arch/powerpc/include/asm/reg.h b/arch/powerpc/include/asm/reg.h index 30d9adbc32ba..c94df450bdd6 100644 --- a/arch/powerpc/include/asm/reg.h +++ b/arch/powerpc/include/asm/reg.h @@ -359,6 +359,7 @@ #define LPCR_PECE0 ASM_CONST(0x0000000000004000) /* ext. exceptions can cause exit */ #define LPCR_PECE1 ASM_CONST(0x0000000000002000) /* decrementer can cause exit */ #define LPCR_PECE2 ASM_CONST(0x0000000000001000) /* machine check etc can cause exit */ +#define LPCR_PECE_HVEE ASM_CONST(0x0000400000000000) /* P9 Wakeup on HV interrupts */ #define LPCR_MER ASM_CONST(0x0000000000000800) /* Mediated External Exception */ #define LPCR_MER_SH 11 #define LPCR_GTSE ASM_CONST(0x0000000000000400) /* Guest Translation Shootdown Enable */ -- cgit v1.2.3-59-g8ed1b From 02ed21aeda0e02d84af493f92b1b6b6b13ddd6e8 Mon Sep 17 00:00:00 2001 From: Michael Neuling Date: Wed, 23 Nov 2016 10:44:09 +1100 Subject: powerpc/powernv: Define and set POWER9 HFSCR doorbell bit Define and set the POWER9 HFSCR doorbell bit so that guests can use msgsndp. ISA 3.0 calls this MSGP, so name it accordingly in the code. Signed-off-by: Michael Neuling Signed-off-by: Michael Ellerman --- arch/powerpc/include/asm/reg.h | 2 ++ arch/powerpc/kernel/cpu_setup_power.S | 2 +- 2 files changed, 3 insertions(+), 1 deletion(-) (limited to 'arch/powerpc') diff --git a/arch/powerpc/include/asm/reg.h b/arch/powerpc/include/asm/reg.h index c94df450bdd6..04aa1ee8cdb6 100644 --- a/arch/powerpc/include/asm/reg.h +++ b/arch/powerpc/include/asm/reg.h @@ -309,6 +309,7 @@ /* HFSCR and FSCR bit numbers are the same */ #define FSCR_LM_LG 11 /* Enable Load Monitor Registers */ +#define FSCR_MSGP_LG 10 /* Enable MSGP */ #define FSCR_TAR_LG 8 /* Enable Target Address Register */ #define FSCR_EBB_LG 7 /* Enable Event Based Branching */ #define FSCR_TM_LG 5 /* Enable Transactional Memory */ @@ -324,6 +325,7 @@ #define FSCR_DSCR __MASK(FSCR_DSCR_LG) #define SPRN_HFSCR 0xbe /* HV=1 Facility Status & Control Register */ #define HFSCR_LM __MASK(FSCR_LM_LG) +#define HFSCR_MSGP __MASK(FSCR_MSGP_LG) #define HFSCR_TAR __MASK(FSCR_TAR_LG) #define HFSCR_EBB __MASK(FSCR_EBB_LG) #define HFSCR_TM __MASK(FSCR_TM_LG) diff --git a/arch/powerpc/kernel/cpu_setup_power.S b/arch/powerpc/kernel/cpu_setup_power.S index 52ff3f025437..bdfc1c67eb38 100644 --- a/arch/powerpc/kernel/cpu_setup_power.S +++ b/arch/powerpc/kernel/cpu_setup_power.S @@ -174,7 +174,7 @@ __init_FSCR: __init_HFSCR: mfspr r3,SPRN_HFSCR ori r3,r3,HFSCR_TAR|HFSCR_TM|HFSCR_BHRB|HFSCR_PM|\ - HFSCR_DSCR|HFSCR_VECVSX|HFSCR_FP|HFSCR_EBB + HFSCR_DSCR|HFSCR_VECVSX|HFSCR_FP|HFSCR_EBB|HFSCR_MSGP mtspr SPRN_HFSCR,r3 blr -- cgit v1.2.3-59-g8ed1b From abb7c7ddbacd30b9a879491998966771504760bd Mon Sep 17 00:00:00 2001 From: Paul Mackerras Date: Wed, 16 Nov 2016 16:57:24 +1100 Subject: KVM: PPC: Book3S HV: Adapt to new HPTE format on POWER9 This adapts the KVM-HV hashed page table (HPT) code to read and write HPT entries in the new format defined in Power ISA v3.00 on POWER9 machines. The new format moves the B (segment size) field from the first doubleword to the second, and trims some bits from the AVA (abbreviated virtual address) and ARPN (abbreviated real page number) fields. As far as possible, the conversion is done when reading or writing the HPT entries, and the rest of the code continues to use the old format. Signed-off-by: Paul Mackerras --- arch/powerpc/kvm/book3s_64_mmu_hv.c | 39 ++++++++++---- arch/powerpc/kvm/book3s_hv_rm_mmu.c | 101 +++++++++++++++++++++++++----------- 2 files changed, 100 insertions(+), 40 deletions(-) (limited to 'arch/powerpc') diff --git a/arch/powerpc/kvm/book3s_64_mmu_hv.c b/arch/powerpc/kvm/book3s_64_mmu_hv.c index 564ae8aa1961..b795dd1ac2ef 100644 --- a/arch/powerpc/kvm/book3s_64_mmu_hv.c +++ b/arch/powerpc/kvm/book3s_64_mmu_hv.c @@ -314,7 +314,7 @@ static int kvmppc_mmu_book3s_64_hv_xlate(struct kvm_vcpu *vcpu, gva_t eaddr, struct kvmppc_slb *slbe; unsigned long slb_v; unsigned long pp, key; - unsigned long v, gr; + unsigned long v, orig_v, gr; __be64 *hptep; int index; int virtmode = vcpu->arch.shregs.msr & (data ? MSR_DR : MSR_IR); @@ -339,10 +339,12 @@ static int kvmppc_mmu_book3s_64_hv_xlate(struct kvm_vcpu *vcpu, gva_t eaddr, return -ENOENT; } hptep = (__be64 *)(kvm->arch.hpt_virt + (index << 4)); - v = be64_to_cpu(hptep[0]) & ~HPTE_V_HVLOCK; + v = orig_v = be64_to_cpu(hptep[0]) & ~HPTE_V_HVLOCK; + if (cpu_has_feature(CPU_FTR_ARCH_300)) + v = hpte_new_to_old_v(v, be64_to_cpu(hptep[1])); gr = kvm->arch.revmap[index].guest_rpte; - unlock_hpte(hptep, v); + unlock_hpte(hptep, orig_v); preempt_enable(); gpte->eaddr = eaddr; @@ -440,6 +442,7 @@ int kvmppc_book3s_hv_page_fault(struct kvm_run *run, struct kvm_vcpu *vcpu, { struct kvm *kvm = vcpu->kvm; unsigned long hpte[3], r; + unsigned long hnow_v, hnow_r; __be64 *hptep; unsigned long mmu_seq, psize, pte_size; unsigned long gpa_base, gfn_base; @@ -488,6 +491,10 @@ int kvmppc_book3s_hv_page_fault(struct kvm_run *run, struct kvm_vcpu *vcpu, unlock_hpte(hptep, hpte[0]); preempt_enable(); + if (cpu_has_feature(CPU_FTR_ARCH_300)) { + hpte[0] = hpte_new_to_old_v(hpte[0], hpte[1]); + hpte[1] = hpte_new_to_old_r(hpte[1]); + } if (hpte[0] != vcpu->arch.pgfault_hpte[0] || hpte[1] != vcpu->arch.pgfault_hpte[1]) return RESUME_GUEST; @@ -599,9 +606,14 @@ int kvmppc_book3s_hv_page_fault(struct kvm_run *run, struct kvm_vcpu *vcpu, preempt_disable(); while (!try_lock_hpte(hptep, HPTE_V_HVLOCK)) cpu_relax(); - if ((be64_to_cpu(hptep[0]) & ~HPTE_V_HVLOCK) != hpte[0] || - be64_to_cpu(hptep[1]) != hpte[1] || - rev->guest_rpte != hpte[2]) + hnow_v = be64_to_cpu(hptep[0]); + hnow_r = be64_to_cpu(hptep[1]); + if (cpu_has_feature(CPU_FTR_ARCH_300)) { + hnow_v = hpte_new_to_old_v(hnow_v, hnow_r); + hnow_r = hpte_new_to_old_r(hnow_r); + } + if ((hnow_v & ~HPTE_V_HVLOCK) != hpte[0] || hnow_r != hpte[1] || + rev->guest_rpte != hpte[2]) /* HPTE has been changed under us; let the guest retry */ goto out_unlock; hpte[0] = (hpte[0] & ~HPTE_V_ABSENT) | HPTE_V_VALID; @@ -632,6 +644,10 @@ int kvmppc_book3s_hv_page_fault(struct kvm_run *run, struct kvm_vcpu *vcpu, kvmppc_add_revmap_chain(kvm, rev, rmap, index, 0); } + if (cpu_has_feature(CPU_FTR_ARCH_300)) { + r = hpte_old_to_new_r(hpte[0], r); + hpte[0] = hpte_old_to_new_v(hpte[0]); + } hptep[1] = cpu_to_be64(r); eieio(); __unlock_hpte(hptep, hpte[0]); @@ -1183,7 +1199,7 @@ static long record_hpte(unsigned long flags, __be64 *hptp, unsigned long *hpte, struct revmap_entry *revp, int want_valid, int first_pass) { - unsigned long v, r; + unsigned long v, r, hr; unsigned long rcbits_unset; int ok = 1; int valid, dirty; @@ -1210,6 +1226,11 @@ static long record_hpte(unsigned long flags, __be64 *hptp, while (!try_lock_hpte(hptp, HPTE_V_HVLOCK)) cpu_relax(); v = be64_to_cpu(hptp[0]); + hr = be64_to_cpu(hptp[1]); + if (cpu_has_feature(CPU_FTR_ARCH_300)) { + v = hpte_new_to_old_v(v, hr); + hr = hpte_new_to_old_r(hr); + } /* re-evaluate valid and dirty from synchronized HPTE value */ valid = !!(v & HPTE_V_VALID); @@ -1217,8 +1238,8 @@ static long record_hpte(unsigned long flags, __be64 *hptp, /* Harvest R and C into guest view if necessary */ rcbits_unset = ~revp->guest_rpte & (HPTE_R_R | HPTE_R_C); - if (valid && (rcbits_unset & be64_to_cpu(hptp[1]))) { - revp->guest_rpte |= (be64_to_cpu(hptp[1]) & + if (valid && (rcbits_unset & hr)) { + revp->guest_rpte |= (hr & (HPTE_R_R | HPTE_R_C)) | HPTE_GR_MODIFIED; dirty = 1; } diff --git a/arch/powerpc/kvm/book3s_hv_rm_mmu.c b/arch/powerpc/kvm/book3s_hv_rm_mmu.c index 0e7fca413a5d..59d345883d9a 100644 --- a/arch/powerpc/kvm/book3s_hv_rm_mmu.c +++ b/arch/powerpc/kvm/book3s_hv_rm_mmu.c @@ -365,6 +365,11 @@ long kvmppc_do_h_enter(struct kvm *kvm, unsigned long flags, } } + /* Convert to new format on P9 */ + if (cpu_has_feature(CPU_FTR_ARCH_300)) { + ptel = hpte_old_to_new_r(pteh, ptel); + pteh = hpte_old_to_new_v(pteh); + } hpte[1] = cpu_to_be64(ptel); /* Write the first HPTE dword, unlocking the HPTE and making it valid */ @@ -446,27 +451,31 @@ long kvmppc_do_h_remove(struct kvm *kvm, unsigned long flags, __be64 *hpte; unsigned long v, r, rb; struct revmap_entry *rev; - u64 pte; + u64 pte, orig_pte, pte_r; if (pte_index >= kvm->arch.hpt_npte) return H_PARAMETER; hpte = (__be64 *)(kvm->arch.hpt_virt + (pte_index << 4)); while (!try_lock_hpte(hpte, HPTE_V_HVLOCK)) cpu_relax(); - pte = be64_to_cpu(hpte[0]); + pte = orig_pte = be64_to_cpu(hpte[0]); + pte_r = be64_to_cpu(hpte[1]); + if (cpu_has_feature(CPU_FTR_ARCH_300)) { + pte = hpte_new_to_old_v(pte, pte_r); + pte_r = hpte_new_to_old_r(pte_r); + } if ((pte & (HPTE_V_ABSENT | HPTE_V_VALID)) == 0 || ((flags & H_AVPN) && (pte & ~0x7fUL) != avpn) || ((flags & H_ANDCOND) && (pte & avpn) != 0)) { - __unlock_hpte(hpte, pte); + __unlock_hpte(hpte, orig_pte); return H_NOT_FOUND; } rev = real_vmalloc_addr(&kvm->arch.revmap[pte_index]); v = pte & ~HPTE_V_HVLOCK; - pte = be64_to_cpu(hpte[1]); if (v & HPTE_V_VALID) { hpte[0] &= ~cpu_to_be64(HPTE_V_VALID); - rb = compute_tlbie_rb(v, be64_to_cpu(hpte[1]), pte_index); + rb = compute_tlbie_rb(v, pte_r, pte_index); do_tlbies(kvm, &rb, 1, global_invalidates(kvm, flags), true); /* * The reference (R) and change (C) bits in a HPT @@ -484,7 +493,7 @@ long kvmppc_do_h_remove(struct kvm *kvm, unsigned long flags, note_hpte_modification(kvm, rev); unlock_hpte(hpte, 0); - if (is_mmio_hpte(v, pte)) + if (is_mmio_hpte(v, pte_r)) atomic64_inc(&kvm->arch.mmio_update); if (v & HPTE_V_ABSENT) @@ -547,6 +556,10 @@ long kvmppc_h_bulk_remove(struct kvm_vcpu *vcpu) found = 0; hp0 = be64_to_cpu(hp[0]); hp1 = be64_to_cpu(hp[1]); + if (cpu_has_feature(CPU_FTR_ARCH_300)) { + hp0 = hpte_new_to_old_v(hp0, hp1); + hp1 = hpte_new_to_old_r(hp1); + } if (hp0 & (HPTE_V_ABSENT | HPTE_V_VALID)) { switch (flags & 3) { case 0: /* absolute */ @@ -584,8 +597,7 @@ long kvmppc_h_bulk_remove(struct kvm_vcpu *vcpu) /* leave it locked */ hp[0] &= ~cpu_to_be64(HPTE_V_VALID); - tlbrb[n] = compute_tlbie_rb(be64_to_cpu(hp[0]), - be64_to_cpu(hp[1]), pte_index); + tlbrb[n] = compute_tlbie_rb(hp0, hp1, pte_index); indexes[n] = j; hptes[n] = hp; revs[n] = rev; @@ -623,7 +635,7 @@ long kvmppc_h_protect(struct kvm_vcpu *vcpu, unsigned long flags, __be64 *hpte; struct revmap_entry *rev; unsigned long v, r, rb, mask, bits; - u64 pte; + u64 pte_v, pte_r; if (pte_index >= kvm->arch.hpt_npte) return H_PARAMETER; @@ -631,15 +643,16 @@ long kvmppc_h_protect(struct kvm_vcpu *vcpu, unsigned long flags, hpte = (__be64 *)(kvm->arch.hpt_virt + (pte_index << 4)); while (!try_lock_hpte(hpte, HPTE_V_HVLOCK)) cpu_relax(); - pte = be64_to_cpu(hpte[0]); - if ((pte & (HPTE_V_ABSENT | HPTE_V_VALID)) == 0 || - ((flags & H_AVPN) && (pte & ~0x7fUL) != avpn)) { - __unlock_hpte(hpte, pte); + v = pte_v = be64_to_cpu(hpte[0]); + if (cpu_has_feature(CPU_FTR_ARCH_300)) + v = hpte_new_to_old_v(v, be64_to_cpu(hpte[1])); + if ((v & (HPTE_V_ABSENT | HPTE_V_VALID)) == 0 || + ((flags & H_AVPN) && (v & ~0x7fUL) != avpn)) { + __unlock_hpte(hpte, pte_v); return H_NOT_FOUND; } - v = pte; - pte = be64_to_cpu(hpte[1]); + pte_r = be64_to_cpu(hpte[1]); bits = (flags << 55) & HPTE_R_PP0; bits |= (flags << 48) & HPTE_R_KEY_HI; bits |= flags & (HPTE_R_PP | HPTE_R_N | HPTE_R_KEY_LO); @@ -661,13 +674,13 @@ long kvmppc_h_protect(struct kvm_vcpu *vcpu, unsigned long flags, * readonly to writable. If it should be writable, we'll * take a trap and let the page fault code sort it out. */ - r = (pte & ~mask) | bits; - if (hpte_is_writable(r) && !hpte_is_writable(pte)) + r = (pte_r & ~mask) | bits; + if (hpte_is_writable(r) && !hpte_is_writable(pte_r)) r = hpte_make_readonly(r); /* If the PTE is changing, invalidate it first */ - if (r != pte) { + if (r != pte_r) { rb = compute_tlbie_rb(v, r, pte_index); - hpte[0] = cpu_to_be64((v & ~HPTE_V_VALID) | + hpte[0] = cpu_to_be64((pte_v & ~HPTE_V_VALID) | HPTE_V_ABSENT); do_tlbies(kvm, &rb, 1, global_invalidates(kvm, flags), true); @@ -676,9 +689,9 @@ long kvmppc_h_protect(struct kvm_vcpu *vcpu, unsigned long flags, hpte[1] = cpu_to_be64(r); } } - unlock_hpte(hpte, v & ~HPTE_V_HVLOCK); + unlock_hpte(hpte, pte_v & ~HPTE_V_HVLOCK); asm volatile("ptesync" : : : "memory"); - if (is_mmio_hpte(v, pte)) + if (is_mmio_hpte(v, pte_r)) atomic64_inc(&kvm->arch.mmio_update); return H_SUCCESS; @@ -704,6 +717,10 @@ long kvmppc_h_read(struct kvm_vcpu *vcpu, unsigned long flags, hpte = (__be64 *)(kvm->arch.hpt_virt + (pte_index << 4)); v = be64_to_cpu(hpte[0]) & ~HPTE_V_HVLOCK; r = be64_to_cpu(hpte[1]); + if (cpu_has_feature(CPU_FTR_ARCH_300)) { + v = hpte_new_to_old_v(v, r); + r = hpte_new_to_old_r(r); + } if (v & HPTE_V_ABSENT) { v &= ~HPTE_V_ABSENT; v |= HPTE_V_VALID; @@ -821,10 +838,16 @@ void kvmppc_invalidate_hpte(struct kvm *kvm, __be64 *hptep, unsigned long pte_index) { unsigned long rb; + u64 hp0, hp1; hptep[0] &= ~cpu_to_be64(HPTE_V_VALID); - rb = compute_tlbie_rb(be64_to_cpu(hptep[0]), be64_to_cpu(hptep[1]), - pte_index); + hp0 = be64_to_cpu(hptep[0]); + hp1 = be64_to_cpu(hptep[1]); + if (cpu_has_feature(CPU_FTR_ARCH_300)) { + hp0 = hpte_new_to_old_v(hp0, hp1); + hp1 = hpte_new_to_old_r(hp1); + } + rb = compute_tlbie_rb(hp0, hp1, pte_index); do_tlbies(kvm, &rb, 1, 1, true); } EXPORT_SYMBOL_GPL(kvmppc_invalidate_hpte); @@ -834,9 +857,15 @@ void kvmppc_clear_ref_hpte(struct kvm *kvm, __be64 *hptep, { unsigned long rb; unsigned char rbyte; + u64 hp0, hp1; - rb = compute_tlbie_rb(be64_to_cpu(hptep[0]), be64_to_cpu(hptep[1]), - pte_index); + hp0 = be64_to_cpu(hptep[0]); + hp1 = be64_to_cpu(hptep[1]); + if (cpu_has_feature(CPU_FTR_ARCH_300)) { + hp0 = hpte_new_to_old_v(hp0, hp1); + hp1 = hpte_new_to_old_r(hp1); + } + rb = compute_tlbie_rb(hp0, hp1, pte_index); rbyte = (be64_to_cpu(hptep[1]) & ~HPTE_R_R) >> 8; /* modify only the second-last byte, which contains the ref bit */ *((char *)hptep + 14) = rbyte; @@ -896,7 +925,7 @@ long kvmppc_hv_find_lock_hpte(struct kvm *kvm, gva_t eaddr, unsigned long slb_v, unsigned long avpn; __be64 *hpte; unsigned long mask, val; - unsigned long v, r; + unsigned long v, r, orig_v; /* Get page shift, work out hash and AVPN etc. */ mask = SLB_VSID_B | HPTE_V_AVPN | HPTE_V_SECONDARY; @@ -931,6 +960,8 @@ long kvmppc_hv_find_lock_hpte(struct kvm *kvm, gva_t eaddr, unsigned long slb_v, for (i = 0; i < 16; i += 2) { /* Read the PTE racily */ v = be64_to_cpu(hpte[i]) & ~HPTE_V_HVLOCK; + if (cpu_has_feature(CPU_FTR_ARCH_300)) + v = hpte_new_to_old_v(v, be64_to_cpu(hpte[i+1])); /* Check valid/absent, hash, segment size and AVPN */ if (!(v & valid) || (v & mask) != val) @@ -939,8 +970,12 @@ long kvmppc_hv_find_lock_hpte(struct kvm *kvm, gva_t eaddr, unsigned long slb_v, /* Lock the PTE and read it under the lock */ while (!try_lock_hpte(&hpte[i], HPTE_V_HVLOCK)) cpu_relax(); - v = be64_to_cpu(hpte[i]) & ~HPTE_V_HVLOCK; + v = orig_v = be64_to_cpu(hpte[i]) & ~HPTE_V_HVLOCK; r = be64_to_cpu(hpte[i+1]); + if (cpu_has_feature(CPU_FTR_ARCH_300)) { + v = hpte_new_to_old_v(v, r); + r = hpte_new_to_old_r(r); + } /* * Check the HPTE again, including base page size @@ -950,7 +985,7 @@ long kvmppc_hv_find_lock_hpte(struct kvm *kvm, gva_t eaddr, unsigned long slb_v, /* Return with the HPTE still locked */ return (hash << 3) + (i >> 1); - __unlock_hpte(&hpte[i], v); + __unlock_hpte(&hpte[i], orig_v); } if (val & HPTE_V_SECONDARY) @@ -978,7 +1013,7 @@ long kvmppc_hpte_hv_fault(struct kvm_vcpu *vcpu, unsigned long addr, { struct kvm *kvm = vcpu->kvm; long int index; - unsigned long v, r, gr; + unsigned long v, r, gr, orig_v; __be64 *hpte; unsigned long valid; struct revmap_entry *rev; @@ -1006,12 +1041,16 @@ long kvmppc_hpte_hv_fault(struct kvm_vcpu *vcpu, unsigned long addr, return 0; /* for prot fault, HPTE disappeared */ } hpte = (__be64 *)(kvm->arch.hpt_virt + (index << 4)); - v = be64_to_cpu(hpte[0]) & ~HPTE_V_HVLOCK; + v = orig_v = be64_to_cpu(hpte[0]) & ~HPTE_V_HVLOCK; r = be64_to_cpu(hpte[1]); + if (cpu_has_feature(CPU_FTR_ARCH_300)) { + v = hpte_new_to_old_v(v, r); + r = hpte_new_to_old_r(r); + } rev = real_vmalloc_addr(&kvm->arch.revmap[index]); gr = rev->guest_rpte; - unlock_hpte(hpte, v); + unlock_hpte(hpte, orig_v); } /* For not found, if the HPTE is valid by now, retry the instruction */ -- cgit v1.2.3-59-g8ed1b From 7a84084c60545bc47f3339344f1af5f94599c966 Mon Sep 17 00:00:00 2001 From: Paul Mackerras Date: Wed, 16 Nov 2016 22:25:20 +1100 Subject: KVM: PPC: Book3S HV: Set partition table rather than SDR1 on POWER9 On POWER9, the SDR1 register (hashed page table base address) is no longer used, and instead the hardware reads the HPT base address and size from the partition table. The partition table entry also contains the bits that specify the page size for the VRMA mapping, which were previously in the LPCR. The VPM0 bit of the LPCR is now reserved; the processor now always uses the VRMA (virtual real-mode area) mechanism for guest real-mode accesses in HPT mode, and the RMO (real-mode offset) mechanism has been dropped. When entering or exiting the guest, we now only have to set the LPIDR (logical partition ID register), not the SDR1 register. There is also no requirement now to transition via a reserved LPID value. Signed-off-by: Paul Mackerras --- arch/powerpc/kvm/book3s_hv.c | 36 +++++++++++++++++++++++++++------ arch/powerpc/kvm/book3s_hv_rmhandlers.S | 10 ++++++--- 2 files changed, 37 insertions(+), 9 deletions(-) (limited to 'arch/powerpc') diff --git a/arch/powerpc/kvm/book3s_hv.c b/arch/powerpc/kvm/book3s_hv.c index 02f9aedbc258..13b6e6154c90 100644 --- a/arch/powerpc/kvm/book3s_hv.c +++ b/arch/powerpc/kvm/book3s_hv.c @@ -54,6 +54,7 @@ #include #include #include +#include #include #include #include @@ -3029,6 +3030,22 @@ static void kvmppc_mmu_destroy_hv(struct kvm_vcpu *vcpu) return; } +static void kvmppc_setup_partition_table(struct kvm *kvm) +{ + unsigned long dw0, dw1; + + /* PS field - page size for VRMA */ + dw0 = ((kvm->arch.vrma_slb_v & SLB_VSID_L) >> 1) | + ((kvm->arch.vrma_slb_v & SLB_VSID_LP) << 1); + /* HTABSIZE and HTABORG fields */ + dw0 |= kvm->arch.sdr1; + + /* Second dword has GR=0; other fields are unused since UPRT=0 */ + dw1 = 0; + + mmu_partition_table_set_entry(kvm->arch.lpid, dw0, dw1); +} + static int kvmppc_hv_setup_htab_rma(struct kvm_vcpu *vcpu) { int err = 0; @@ -3080,17 +3097,20 @@ static int kvmppc_hv_setup_htab_rma(struct kvm_vcpu *vcpu) psize == 0x1000000)) goto out_srcu; - /* Update VRMASD field in the LPCR */ senc = slb_pgsize_encoding(psize); kvm->arch.vrma_slb_v = senc | SLB_VSID_B_1T | (VRMA_VSID << SLB_VSID_SHIFT_1T); - /* the -4 is to account for senc values starting at 0x10 */ - lpcr = senc << (LPCR_VRMASD_SH - 4); - /* Create HPTEs in the hash page table for the VRMA */ kvmppc_map_vrma(vcpu, memslot, porder); - kvmppc_update_lpcr(kvm, lpcr, LPCR_VRMASD); + /* Update VRMASD field in the LPCR */ + if (!cpu_has_feature(CPU_FTR_ARCH_300)) { + /* the -4 is to account for senc values starting at 0x10 */ + lpcr = senc << (LPCR_VRMASD_SH - 4); + kvmppc_update_lpcr(kvm, lpcr, LPCR_VRMASD); + } else { + kvmppc_setup_partition_table(kvm); + } /* Order updates to kvm->arch.lpcr etc. vs. hpte_setup_done */ smp_wmb(); @@ -3240,7 +3260,8 @@ static int kvmppc_core_init_vm_hv(struct kvm *kvm) memcpy(kvm->arch.enabled_hcalls, default_enabled_hcalls, sizeof(kvm->arch.enabled_hcalls)); - kvm->arch.host_sdr1 = mfspr(SPRN_SDR1); + if (!cpu_has_feature(CPU_FTR_ARCH_300)) + kvm->arch.host_sdr1 = mfspr(SPRN_SDR1); /* Init LPCR for virtual RMA mode */ kvm->arch.host_lpid = mfspr(SPRN_LPID); @@ -3253,6 +3274,9 @@ static int kvmppc_core_init_vm_hv(struct kvm *kvm) /* On POWER8 turn on online bit to enable PURR/SPURR */ if (cpu_has_feature(CPU_FTR_ARCH_207S)) lpcr |= LPCR_ONL; + /* On POWER9, VPM0 bit is reserved (VPM0=1 behaviour is assumed) */ + if (cpu_has_feature(CPU_FTR_ARCH_300)) + lpcr &= ~LPCR_VPM0; kvm->arch.lpcr = lpcr; /* diff --git a/arch/powerpc/kvm/book3s_hv_rmhandlers.S b/arch/powerpc/kvm/book3s_hv_rmhandlers.S index 6f81adb112f1..b2d107d8d215 100644 --- a/arch/powerpc/kvm/book3s_hv_rmhandlers.S +++ b/arch/powerpc/kvm/book3s_hv_rmhandlers.S @@ -581,12 +581,14 @@ kvmppc_hv_entry: ld r9,VCORE_KVM(r5) /* pointer to struct kvm */ cmpwi r6,0 bne 10f - ld r6,KVM_SDR1(r9) lwz r7,KVM_LPID(r9) +BEGIN_FTR_SECTION + ld r6,KVM_SDR1(r9) li r0,LPID_RSVD /* switch to reserved LPID */ mtspr SPRN_LPID,r0 ptesync mtspr SPRN_SDR1,r6 /* switch to partition page table */ +END_FTR_SECTION_IFCLR(CPU_FTR_ARCH_300) mtspr SPRN_LPID,r7 isync @@ -1552,12 +1554,14 @@ kvmhv_switch_to_host: beq 19f /* Primary thread switches back to host partition */ - ld r6,KVM_HOST_SDR1(r4) lwz r7,KVM_HOST_LPID(r4) +BEGIN_FTR_SECTION + ld r6,KVM_HOST_SDR1(r4) li r8,LPID_RSVD /* switch to reserved LPID */ mtspr SPRN_LPID,r8 ptesync - mtspr SPRN_SDR1,r6 /* switch to partition page table */ + mtspr SPRN_SDR1,r6 /* switch to host page table */ +END_FTR_SECTION_IFCLR(CPU_FTR_ARCH_300) mtspr SPRN_LPID,r7 isync -- cgit v1.2.3-59-g8ed1b From 83677f551e0a6ad43061053e7d6208abcd2707f0 Mon Sep 17 00:00:00 2001 From: Paul Mackerras Date: Wed, 16 Nov 2016 22:33:27 +1100 Subject: KVM: PPC: Book3S HV: Adjust host/guest context switch for POWER9 Some special-purpose registers that were present and accessible by guests on POWER8 no longer exist on POWER9, so this adds feature sections to ensure that we don't try to context-switch them when going into or out of a guest on POWER9. These are all relatively obscure, rarely-used registers, but we had to context-switch them on POWER8 to avoid creating a covert channel. They are: SPMC1, SPMC2, MMCRS, CSIGR, TACR, TCSCR, and ACOP. Signed-off-by: Paul Mackerras --- arch/powerpc/kvm/book3s_hv_rmhandlers.S | 50 ++++++++++++++++++++------------- 1 file changed, 30 insertions(+), 20 deletions(-) (limited to 'arch/powerpc') diff --git a/arch/powerpc/kvm/book3s_hv_rmhandlers.S b/arch/powerpc/kvm/book3s_hv_rmhandlers.S index b2d107d8d215..c7dd251ac05d 100644 --- a/arch/powerpc/kvm/book3s_hv_rmhandlers.S +++ b/arch/powerpc/kvm/book3s_hv_rmhandlers.S @@ -752,14 +752,16 @@ END_FTR_SECTION_IFSET(CPU_FTR_PMAO_BUG) BEGIN_FTR_SECTION ld r5, VCPU_MMCR + 24(r4) ld r6, VCPU_SIER(r4) + mtspr SPRN_MMCR2, r5 + mtspr SPRN_SIER, r6 +BEGIN_FTR_SECTION_NESTED(96) lwz r7, VCPU_PMC + 24(r4) lwz r8, VCPU_PMC + 28(r4) ld r9, VCPU_MMCR + 32(r4) - mtspr SPRN_MMCR2, r5 - mtspr SPRN_SIER, r6 mtspr SPRN_SPMC1, r7 mtspr SPRN_SPMC2, r8 mtspr SPRN_MMCRS, r9 +END_FTR_SECTION_NESTED(CPU_FTR_ARCH_300, 0, 96) END_FTR_SECTION_IFSET(CPU_FTR_ARCH_207S) mtspr SPRN_MMCR0, r3 isync @@ -815,20 +817,22 @@ END_FTR_SECTION_IFCLR(CPU_FTR_ARCH_207S) mtspr SPRN_EBBHR, r8 ld r5, VCPU_EBBRR(r4) ld r6, VCPU_BESCR(r4) - ld r7, VCPU_CSIGR(r4) - ld r8, VCPU_TACR(r4) + lwz r7, VCPU_GUEST_PID(r4) + ld r8, VCPU_WORT(r4) mtspr SPRN_EBBRR, r5 mtspr SPRN_BESCR, r6 - mtspr SPRN_CSIGR, r7 - mtspr SPRN_TACR, r8 + mtspr SPRN_PID, r7 + mtspr SPRN_WORT, r8 +BEGIN_FTR_SECTION ld r5, VCPU_TCSCR(r4) ld r6, VCPU_ACOP(r4) - lwz r7, VCPU_GUEST_PID(r4) - ld r8, VCPU_WORT(r4) + ld r7, VCPU_CSIGR(r4) + ld r8, VCPU_TACR(r4) mtspr SPRN_TCSCR, r5 mtspr SPRN_ACOP, r6 - mtspr SPRN_PID, r7 - mtspr SPRN_WORT, r8 + mtspr SPRN_CSIGR, r7 + mtspr SPRN_TACR, r8 +END_FTR_SECTION_IFCLR(CPU_FTR_ARCH_300) 8: /* @@ -1343,20 +1347,22 @@ END_FTR_SECTION_IFCLR(CPU_FTR_ARCH_207S) std r8, VCPU_EBBHR(r9) mfspr r5, SPRN_EBBRR mfspr r6, SPRN_BESCR - mfspr r7, SPRN_CSIGR - mfspr r8, SPRN_TACR + mfspr r7, SPRN_PID + mfspr r8, SPRN_WORT std r5, VCPU_EBBRR(r9) std r6, VCPU_BESCR(r9) - std r7, VCPU_CSIGR(r9) - std r8, VCPU_TACR(r9) + stw r7, VCPU_GUEST_PID(r9) + std r8, VCPU_WORT(r9) +BEGIN_FTR_SECTION mfspr r5, SPRN_TCSCR mfspr r6, SPRN_ACOP - mfspr r7, SPRN_PID - mfspr r8, SPRN_WORT + mfspr r7, SPRN_CSIGR + mfspr r8, SPRN_TACR std r5, VCPU_TCSCR(r9) std r6, VCPU_ACOP(r9) - stw r7, VCPU_GUEST_PID(r9) - std r8, VCPU_WORT(r9) + std r7, VCPU_CSIGR(r9) + std r8, VCPU_TACR(r9) +END_FTR_SECTION_IFCLR(CPU_FTR_ARCH_300) /* * Restore various registers to 0, where non-zero values * set by the guest could disrupt the host. @@ -1365,12 +1371,14 @@ END_FTR_SECTION_IFCLR(CPU_FTR_ARCH_207S) mtspr SPRN_IAMR, r0 mtspr SPRN_CIABR, r0 mtspr SPRN_DAWRX, r0 - mtspr SPRN_TCSCR, r0 mtspr SPRN_WORT, r0 +BEGIN_FTR_SECTION + mtspr SPRN_TCSCR, r0 /* Set MMCRS to 1<<31 to freeze and disable the SPMC counters */ li r0, 1 sldi r0, r0, 31 mtspr SPRN_MMCRS, r0 +END_FTR_SECTION_IFCLR(CPU_FTR_ARCH_300) 8: /* Save and reset AMR and UAMOR before turning on the MMU */ @@ -1504,15 +1512,17 @@ END_FTR_SECTION_IFSET(CPU_FTR_ARCH_207S) stw r8, VCPU_PMC + 20(r9) BEGIN_FTR_SECTION mfspr r5, SPRN_SIER + std r5, VCPU_SIER(r9) +BEGIN_FTR_SECTION_NESTED(96) mfspr r6, SPRN_SPMC1 mfspr r7, SPRN_SPMC2 mfspr r8, SPRN_MMCRS - std r5, VCPU_SIER(r9) stw r6, VCPU_PMC + 24(r9) stw r7, VCPU_PMC + 28(r9) std r8, VCPU_MMCR + 32(r9) lis r4, 0x8000 mtspr SPRN_MMCRS, r4 +END_FTR_SECTION_NESTED(CPU_FTR_ARCH_300, 0, 96) END_FTR_SECTION_IFSET(CPU_FTR_ARCH_207S) 22: /* Clear out SLB */ -- cgit v1.2.3-59-g8ed1b From e9cf1e085647b433ccd98582681b17121ecfdc21 Mon Sep 17 00:00:00 2001 From: Paul Mackerras Date: Fri, 18 Nov 2016 13:11:42 +1100 Subject: KVM: PPC: Book3S HV: Add new POWER9 guest-accessible SPRs This adds code to handle two new guest-accessible special-purpose registers on POWER9: TIDR (thread ID register) and PSSCR (processor stop status and control register). They are context-switched between host and guest, and the guest values can be read and set via the one_reg interface. The PSSCR contains some fields which are guest-accessible and some which are only accessible in hypervisor mode. We only allow the guest-accessible fields to be read or set by userspace. Signed-off-by: Paul Mackerras --- Documentation/virtual/kvm/api.txt | 2 ++ arch/powerpc/include/asm/kvm_host.h | 2 ++ arch/powerpc/include/uapi/asm/kvm.h | 4 ++++ arch/powerpc/kernel/asm-offsets.c | 2 ++ arch/powerpc/kvm/book3s_hv.c | 12 ++++++++++ arch/powerpc/kvm/book3s_hv_rmhandlers.S | 39 +++++++++++++++++++++++++++++++-- 6 files changed, 59 insertions(+), 2 deletions(-) (limited to 'arch/powerpc') diff --git a/Documentation/virtual/kvm/api.txt b/Documentation/virtual/kvm/api.txt index a7596e9fdf06..8a5ebd118313 100644 --- a/Documentation/virtual/kvm/api.txt +++ b/Documentation/virtual/kvm/api.txt @@ -2023,6 +2023,8 @@ registers, find a list below: PPC | KVM_REG_PPC_WORT | 64 PPC | KVM_REG_PPC_SPRG9 | 64 PPC | KVM_REG_PPC_DBSR | 32 + PPC | KVM_REG_PPC_TIDR | 64 + PPC | KVM_REG_PPC_PSSCR | 64 PPC | KVM_REG_PPC_TM_GPR0 | 64 ... PPC | KVM_REG_PPC_TM_GPR31 | 64 diff --git a/arch/powerpc/include/asm/kvm_host.h b/arch/powerpc/include/asm/kvm_host.h index 0e584ee57730..9556de61b1bb 100644 --- a/arch/powerpc/include/asm/kvm_host.h +++ b/arch/powerpc/include/asm/kvm_host.h @@ -517,6 +517,8 @@ struct kvm_vcpu_arch { ulong tcscr; ulong acop; ulong wort; + ulong tid; + ulong psscr; ulong shadow_srr1; #endif u32 vrsave; /* also USPRG0 */ diff --git a/arch/powerpc/include/uapi/asm/kvm.h b/arch/powerpc/include/uapi/asm/kvm.h index 0fb1326c3ea2..3603b6f51b11 100644 --- a/arch/powerpc/include/uapi/asm/kvm.h +++ b/arch/powerpc/include/uapi/asm/kvm.h @@ -573,6 +573,10 @@ struct kvm_get_htab_header { #define KVM_REG_PPC_SPRG9 (KVM_REG_PPC | KVM_REG_SIZE_U64 | 0xba) #define KVM_REG_PPC_DBSR (KVM_REG_PPC | KVM_REG_SIZE_U32 | 0xbb) +/* POWER9 registers */ +#define KVM_REG_PPC_TIDR (KVM_REG_PPC | KVM_REG_SIZE_U64 | 0xbc) +#define KVM_REG_PPC_PSSCR (KVM_REG_PPC | KVM_REG_SIZE_U64 | 0xbd) + /* Transactional Memory checkpointed state: * This is all GPRs, all VSX regs and a subset of SPRs */ diff --git a/arch/powerpc/kernel/asm-offsets.c b/arch/powerpc/kernel/asm-offsets.c index c833d88c423d..a4f6d5e32a81 100644 --- a/arch/powerpc/kernel/asm-offsets.c +++ b/arch/powerpc/kernel/asm-offsets.c @@ -548,6 +548,8 @@ int main(void) DEFINE(VCPU_TCSCR, offsetof(struct kvm_vcpu, arch.tcscr)); DEFINE(VCPU_ACOP, offsetof(struct kvm_vcpu, arch.acop)); DEFINE(VCPU_WORT, offsetof(struct kvm_vcpu, arch.wort)); + DEFINE(VCPU_TID, offsetof(struct kvm_vcpu, arch.tid)); + DEFINE(VCPU_PSSCR, offsetof(struct kvm_vcpu, arch.psscr)); DEFINE(VCORE_ENTRY_EXIT, offsetof(struct kvmppc_vcore, entry_exit_map)); DEFINE(VCORE_IN_GUEST, offsetof(struct kvmppc_vcore, in_guest)); DEFINE(VCORE_NAPPING_THREADS, offsetof(struct kvmppc_vcore, napping_threads)); diff --git a/arch/powerpc/kvm/book3s_hv.c b/arch/powerpc/kvm/book3s_hv.c index 13b6e6154c90..14eeacc82336 100644 --- a/arch/powerpc/kvm/book3s_hv.c +++ b/arch/powerpc/kvm/book3s_hv.c @@ -1230,6 +1230,12 @@ static int kvmppc_get_one_reg_hv(struct kvm_vcpu *vcpu, u64 id, case KVM_REG_PPC_WORT: *val = get_reg_val(id, vcpu->arch.wort); break; + case KVM_REG_PPC_TIDR: + *val = get_reg_val(id, vcpu->arch.tid); + break; + case KVM_REG_PPC_PSSCR: + *val = get_reg_val(id, vcpu->arch.psscr); + break; case KVM_REG_PPC_VPA_ADDR: spin_lock(&vcpu->arch.vpa_update_lock); *val = get_reg_val(id, vcpu->arch.vpa.next_gpa); @@ -1431,6 +1437,12 @@ static int kvmppc_set_one_reg_hv(struct kvm_vcpu *vcpu, u64 id, case KVM_REG_PPC_WORT: vcpu->arch.wort = set_reg_val(id, *val); break; + case KVM_REG_PPC_TIDR: + vcpu->arch.tid = set_reg_val(id, *val); + break; + case KVM_REG_PPC_PSSCR: + vcpu->arch.psscr = set_reg_val(id, *val) & PSSCR_GUEST_VIS; + break; case KVM_REG_PPC_VPA_ADDR: addr = set_reg_val(id, *val); r = -EINVAL; diff --git a/arch/powerpc/kvm/book3s_hv_rmhandlers.S b/arch/powerpc/kvm/book3s_hv_rmhandlers.S index c7dd251ac05d..499be609c80e 100644 --- a/arch/powerpc/kvm/book3s_hv_rmhandlers.S +++ b/arch/powerpc/kvm/book3s_hv_rmhandlers.S @@ -523,6 +523,10 @@ END_FTR_SECTION_IFSET(CPU_FTR_ARCH_207S) * * *****************************************************************************/ +/* Stack frame offsets */ +#define STACK_SLOT_TID (112-16) +#define STACK_SLOT_PSSCR (112-24) + .global kvmppc_hv_entry kvmppc_hv_entry: @@ -700,6 +704,14 @@ kvmppc_got_guest: mtspr SPRN_PURR,r7 mtspr SPRN_SPURR,r8 + /* Save host values of some registers */ +BEGIN_FTR_SECTION + mfspr r5, SPRN_TIDR + mfspr r6, SPRN_PSSCR + std r5, STACK_SLOT_TID(r1) + std r6, STACK_SLOT_PSSCR(r1) +END_FTR_SECTION_IFSET(CPU_FTR_ARCH_300) + BEGIN_FTR_SECTION /* Set partition DABR */ /* Do this before re-enabling PMU to avoid P7 DABR corruption bug */ @@ -824,6 +836,7 @@ END_FTR_SECTION_IFCLR(CPU_FTR_ARCH_207S) mtspr SPRN_PID, r7 mtspr SPRN_WORT, r8 BEGIN_FTR_SECTION + /* POWER8-only registers */ ld r5, VCPU_TCSCR(r4) ld r6, VCPU_ACOP(r4) ld r7, VCPU_CSIGR(r4) @@ -832,7 +845,14 @@ BEGIN_FTR_SECTION mtspr SPRN_ACOP, r6 mtspr SPRN_CSIGR, r7 mtspr SPRN_TACR, r8 -END_FTR_SECTION_IFCLR(CPU_FTR_ARCH_300) +FTR_SECTION_ELSE + /* POWER9-only registers */ + ld r5, VCPU_TID(r4) + ld r6, VCPU_PSSCR(r4) + oris r6, r6, PSSCR_EC@h /* This makes stop trap to HV */ + mtspr SPRN_TIDR, r5 + mtspr SPRN_PSSCR, r6 +ALT_FTR_SECTION_END_IFCLR(CPU_FTR_ARCH_300) 8: /* @@ -1362,7 +1382,14 @@ BEGIN_FTR_SECTION std r6, VCPU_ACOP(r9) std r7, VCPU_CSIGR(r9) std r8, VCPU_TACR(r9) -END_FTR_SECTION_IFCLR(CPU_FTR_ARCH_300) +FTR_SECTION_ELSE + mfspr r5, SPRN_TIDR + mfspr r6, SPRN_PSSCR + std r5, VCPU_TID(r9) + rldicl r6, r6, 4, 50 /* r6 &= PSSCR_GUEST_VIS */ + rotldi r6, r6, 60 + std r6, VCPU_PSSCR(r9) +ALT_FTR_SECTION_END_IFCLR(CPU_FTR_ARCH_300) /* * Restore various registers to 0, where non-zero values * set by the guest could disrupt the host. @@ -1531,6 +1558,14 @@ END_FTR_SECTION_IFSET(CPU_FTR_ARCH_207S) slbia ptesync + /* Restore host values of some registers */ +BEGIN_FTR_SECTION + ld r5, STACK_SLOT_TID(r1) + ld r6, STACK_SLOT_PSSCR(r1) + mtspr SPRN_TIDR, r5 + mtspr SPRN_PSSCR, r6 +END_FTR_SECTION_IFSET(CPU_FTR_ARCH_300) + /* * POWER7/POWER8 guest -> host partition switch code. * We don't have to lock against tlbies but we do -- cgit v1.2.3-59-g8ed1b From 7c5b06cadf274f2867523c1130c11387545f808e Mon Sep 17 00:00:00 2001 From: Paul Mackerras Date: Fri, 18 Nov 2016 08:28:51 +1100 Subject: KVM: PPC: Book3S HV: Adapt TLB invalidations to work on POWER9 POWER9 adds new capabilities to the tlbie (TLB invalidate entry) and tlbiel (local tlbie) instructions. Both instructions get a set of new parameters (RIC, PRS and R) which appear as bits in the instruction word. The tlbiel instruction now has a second register operand, which contains a PID and/or LPID value if needed, and should otherwise contain 0. This adapts KVM-HV's usage of tlbie and tlbiel to work on POWER9 as well as older processors. Since we only handle HPT guests so far, we need RIC=0 PRS=0 R=0, which ends up with the same instruction word as on previous processors, so we don't need to conditionally execute different instructions depending on the processor. The local flush on first entry to a guest in book3s_hv_rmhandlers.S is a loop which depends on the number of TLB sets. Rather than using feature sections to set the number of iterations based on which CPU we're on, we now work out this number at VM creation time and store it in the kvm_arch struct. That will make it possible to get the number from the device tree in future, which will help with compatibility with future processors. Since mmu_partition_table_set_entry() does a global flush of the whole LPID, we don't need to do the TLB flush on first entry to the guest on each processor. Therefore we don't set all bits in the tlb_need_flush bitmap on VM startup on POWER9. Signed-off-by: Paul Mackerras --- arch/powerpc/include/asm/kvm_host.h | 1 + arch/powerpc/kernel/asm-offsets.c | 1 + arch/powerpc/kvm/book3s_hv.c | 17 ++++++++++++++++- arch/powerpc/kvm/book3s_hv_rm_mmu.c | 10 ++++++++-- arch/powerpc/kvm/book3s_hv_rmhandlers.S | 8 ++------ 5 files changed, 28 insertions(+), 9 deletions(-) (limited to 'arch/powerpc') diff --git a/arch/powerpc/include/asm/kvm_host.h b/arch/powerpc/include/asm/kvm_host.h index 9556de61b1bb..f15713ae3f34 100644 --- a/arch/powerpc/include/asm/kvm_host.h +++ b/arch/powerpc/include/asm/kvm_host.h @@ -244,6 +244,7 @@ struct kvm_arch_memory_slot { struct kvm_arch { unsigned int lpid; #ifdef CONFIG_KVM_BOOK3S_HV_POSSIBLE + unsigned int tlb_sets; unsigned long hpt_virt; struct revmap_entry *revmap; atomic64_t mmio_update; diff --git a/arch/powerpc/kernel/asm-offsets.c b/arch/powerpc/kernel/asm-offsets.c index a4f6d5e32a81..195a9fc8f81c 100644 --- a/arch/powerpc/kernel/asm-offsets.c +++ b/arch/powerpc/kernel/asm-offsets.c @@ -487,6 +487,7 @@ int main(void) /* book3s */ #ifdef CONFIG_KVM_BOOK3S_HV_POSSIBLE + DEFINE(KVM_TLB_SETS, offsetof(struct kvm, arch.tlb_sets)); DEFINE(KVM_SDR1, offsetof(struct kvm, arch.sdr1)); DEFINE(KVM_HOST_LPID, offsetof(struct kvm, arch.host_lpid)); DEFINE(KVM_HOST_LPCR, offsetof(struct kvm, arch.host_lpcr)); diff --git a/arch/powerpc/kvm/book3s_hv.c b/arch/powerpc/kvm/book3s_hv.c index 14eeacc82336..0a90fa44f555 100644 --- a/arch/powerpc/kvm/book3s_hv.c +++ b/arch/powerpc/kvm/book3s_hv.c @@ -3265,8 +3265,11 @@ static int kvmppc_core_init_vm_hv(struct kvm *kvm) * Since we don't flush the TLB when tearing down a VM, * and this lpid might have previously been used, * make sure we flush on each core before running the new VM. + * On POWER9, the tlbie in mmu_partition_table_set_entry() + * does this flush for us. */ - cpumask_setall(&kvm->arch.need_tlb_flush); + if (!cpu_has_feature(CPU_FTR_ARCH_300)) + cpumask_setall(&kvm->arch.need_tlb_flush); /* Start out with the default set of hcalls enabled */ memcpy(kvm->arch.enabled_hcalls, default_enabled_hcalls, @@ -3291,6 +3294,17 @@ static int kvmppc_core_init_vm_hv(struct kvm *kvm) lpcr &= ~LPCR_VPM0; kvm->arch.lpcr = lpcr; + /* + * Work out how many sets the TLB has, for the use of + * the TLB invalidation loop in book3s_hv_rmhandlers.S. + */ + if (cpu_has_feature(CPU_FTR_ARCH_300)) + kvm->arch.tlb_sets = POWER9_TLB_SETS_HASH; /* 256 */ + else if (cpu_has_feature(CPU_FTR_ARCH_207S)) + kvm->arch.tlb_sets = POWER8_TLB_SETS; /* 512 */ + else + kvm->arch.tlb_sets = POWER7_TLB_SETS; /* 128 */ + /* * Track that we now have a HV mode VM active. This blocks secondary * CPU threads from coming online. @@ -3733,3 +3747,4 @@ module_exit(kvmppc_book3s_exit_hv); MODULE_LICENSE("GPL"); MODULE_ALIAS_MISCDEV(KVM_MINOR); MODULE_ALIAS("devname:kvm"); + diff --git a/arch/powerpc/kvm/book3s_hv_rm_mmu.c b/arch/powerpc/kvm/book3s_hv_rm_mmu.c index 59d345883d9a..378b962bcf2e 100644 --- a/arch/powerpc/kvm/book3s_hv_rm_mmu.c +++ b/arch/powerpc/kvm/book3s_hv_rm_mmu.c @@ -425,13 +425,18 @@ static void do_tlbies(struct kvm *kvm, unsigned long *rbvalues, { long i; + /* + * We use the POWER9 5-operand versions of tlbie and tlbiel here. + * Since we are using RIC=0 PRS=0 R=0, and P7/P8 tlbiel ignores + * the RS field, this is backwards-compatible with P7 and P8. + */ if (global) { while (!try_lock_tlbie(&kvm->arch.tlbie_lock)) cpu_relax(); if (need_sync) asm volatile("ptesync" : : : "memory"); for (i = 0; i < npages; ++i) - asm volatile(PPC_TLBIE(%1,%0) : : + asm volatile(PPC_TLBIE_5(%0,%1,0,0,0) : : "r" (rbvalues[i]), "r" (kvm->arch.lpid)); asm volatile("eieio; tlbsync; ptesync" : : : "memory"); kvm->arch.tlbie_lock = 0; @@ -439,7 +444,8 @@ static void do_tlbies(struct kvm *kvm, unsigned long *rbvalues, if (need_sync) asm volatile("ptesync" : : : "memory"); for (i = 0; i < npages; ++i) - asm volatile("tlbiel %0" : : "r" (rbvalues[i])); + asm volatile(PPC_TLBIEL(%0,%1,0,0,0) : : + "r" (rbvalues[i]), "r" (0)); asm volatile("ptesync" : : : "memory"); } } diff --git a/arch/powerpc/kvm/book3s_hv_rmhandlers.S b/arch/powerpc/kvm/book3s_hv_rmhandlers.S index 499be609c80e..598d8281fc05 100644 --- a/arch/powerpc/kvm/book3s_hv_rmhandlers.S +++ b/arch/powerpc/kvm/book3s_hv_rmhandlers.S @@ -613,12 +613,8 @@ END_FTR_SECTION_IFCLR(CPU_FTR_ARCH_300) stdcx. r7,0,r6 bne 23b /* Flush the TLB of any entries for this LPID */ - /* use arch 2.07S as a proxy for POWER8 */ -BEGIN_FTR_SECTION - li r6,512 /* POWER8 has 512 sets */ -FTR_SECTION_ELSE - li r6,128 /* POWER7 has 128 sets */ -ALT_FTR_SECTION_END_IFSET(CPU_FTR_ARCH_207S) + lwz r6,KVM_TLB_SETS(r9) + li r0,0 /* RS for P9 version of tlbiel */ mtctr r6 li r7,0x800 /* IS field = 0b10 */ ptesync -- cgit v1.2.3-59-g8ed1b From 1704a81ccebc69b5223220df97cde8a645271828 Mon Sep 17 00:00:00 2001 From: Paul Mackerras Date: Fri, 18 Nov 2016 08:47:08 +1100 Subject: KVM: PPC: Book3S HV: Use msgsnd for IPIs to other cores on POWER9 On POWER9, the msgsnd instruction is able to send interrupts to other cores, as well as other threads on the local core. Since msgsnd is generally simpler and faster than sending an IPI via the XICS, we use msgsnd for all IPIs sent by KVM on POWER9. Signed-off-by: Paul Mackerras --- arch/powerpc/kvm/book3s_hv.c | 11 ++++++++++- arch/powerpc/kvm/book3s_hv_builtin.c | 10 ++++++++-- 2 files changed, 18 insertions(+), 3 deletions(-) (limited to 'arch/powerpc') diff --git a/arch/powerpc/kvm/book3s_hv.c b/arch/powerpc/kvm/book3s_hv.c index 0a90fa44f555..7a6a00a41b12 100644 --- a/arch/powerpc/kvm/book3s_hv.c +++ b/arch/powerpc/kvm/book3s_hv.c @@ -147,12 +147,21 @@ static inline struct kvm_vcpu *next_runnable_thread(struct kvmppc_vcore *vc, static bool kvmppc_ipi_thread(int cpu) { + unsigned long msg = PPC_DBELL_TYPE(PPC_DBELL_SERVER); + + /* On POWER9 we can use msgsnd to IPI any cpu */ + if (cpu_has_feature(CPU_FTR_ARCH_300)) { + msg |= get_hard_smp_processor_id(cpu); + smp_mb(); + __asm__ __volatile__ (PPC_MSGSND(%0) : : "r" (msg)); + return true; + } + /* On POWER8 for IPIs to threads in the same core, use msgsnd */ if (cpu_has_feature(CPU_FTR_ARCH_207S)) { preempt_disable(); if (cpu_first_thread_sibling(cpu) == cpu_first_thread_sibling(smp_processor_id())) { - unsigned long msg = PPC_DBELL_TYPE(PPC_DBELL_SERVER); msg |= cpu_thread_in_core(cpu); smp_mb(); __asm__ __volatile__ (PPC_MSGSND(%0) : : "r" (msg)); diff --git a/arch/powerpc/kvm/book3s_hv_builtin.c b/arch/powerpc/kvm/book3s_hv_builtin.c index 90a0b274e699..e1e1ead1abb5 100644 --- a/arch/powerpc/kvm/book3s_hv_builtin.c +++ b/arch/powerpc/kvm/book3s_hv_builtin.c @@ -206,12 +206,18 @@ static inline void rm_writeb(unsigned long paddr, u8 val) void kvmhv_rm_send_ipi(int cpu) { unsigned long xics_phys; + unsigned long msg = PPC_DBELL_TYPE(PPC_DBELL_SERVER); - /* On POWER8 for IPIs to threads in the same core, use msgsnd */ + /* On POWER9 we can use msgsnd for any destination cpu. */ + if (cpu_has_feature(CPU_FTR_ARCH_300)) { + msg |= get_hard_smp_processor_id(cpu); + __asm__ __volatile__ (PPC_MSGSND(%0) : : "r" (msg)); + return; + } + /* On POWER8 for IPIs to threads in the same core, use msgsnd. */ if (cpu_has_feature(CPU_FTR_ARCH_207S) && cpu_first_thread_sibling(cpu) == cpu_first_thread_sibling(raw_smp_processor_id())) { - unsigned long msg = PPC_DBELL_TYPE(PPC_DBELL_SERVER); msg |= cpu_thread_in_core(cpu); __asm__ __volatile__ (PPC_MSGSND(%0) : : "r" (msg)); return; -- cgit v1.2.3-59-g8ed1b From f725758b899f11cac6b375e332e092dc855b9210 Mon Sep 17 00:00:00 2001 From: Paul Mackerras Date: Fri, 18 Nov 2016 09:02:08 +1100 Subject: KVM: PPC: Book3S HV: Use OPAL XICS emulation on POWER9 POWER9 includes a new interrupt controller, called XIVE, which is quite different from the XICS interrupt controller on POWER7 and POWER8 machines. KVM-HV accesses the XICS directly in several places in order to send and clear IPIs and handle interrupts from PCI devices being passed through to the guest. In order to make the transition to XIVE easier, OPAL firmware will include an emulation of XICS on top of XIVE. Access to the emulated XICS is via OPAL calls. The one complication is that the EOI (end-of-interrupt) function can now return a value indicating that another interrupt is pending; in this case, the XIVE will not signal an interrupt in hardware to the CPU, and software is supposed to acknowledge the new interrupt without waiting for another interrupt to be delivered in hardware. This adapts KVM-HV to use the OPAL calls on machines where there is no XICS hardware. When there is no XICS, we look for a device-tree node with "ibm,opal-intc" in its compatible property, which is how OPAL indicates that it provides XICS emulation. In order to handle the EOI return value, kvmppc_read_intr() has become kvmppc_read_one_intr(), with a boolean variable passed by reference which can be set by the EOI functions to indicate that another interrupt is pending. The new kvmppc_read_intr() keeps calling kvmppc_read_one_intr() until there are no more interrupts to process. The return value from kvmppc_read_intr() is the largest non-zero value of the returns from kvmppc_read_one_intr(). Signed-off-by: Paul Mackerras --- arch/powerpc/include/asm/kvm_ppc.h | 7 +++-- arch/powerpc/kvm/book3s_hv.c | 28 +++++++++++++++-- arch/powerpc/kvm/book3s_hv_builtin.c | 59 ++++++++++++++++++++++++++++++------ arch/powerpc/kvm/book3s_hv_rm_xics.c | 23 ++++++++++---- 4 files changed, 96 insertions(+), 21 deletions(-) (limited to 'arch/powerpc') diff --git a/arch/powerpc/include/asm/kvm_ppc.h b/arch/powerpc/include/asm/kvm_ppc.h index f6e49640dbe1..a5b94bed1423 100644 --- a/arch/powerpc/include/asm/kvm_ppc.h +++ b/arch/powerpc/include/asm/kvm_ppc.h @@ -483,9 +483,10 @@ extern void kvmppc_xics_set_mapped(struct kvm *kvm, unsigned long guest_irq, unsigned long host_irq); extern void kvmppc_xics_clr_mapped(struct kvm *kvm, unsigned long guest_irq, unsigned long host_irq); -extern long kvmppc_deliver_irq_passthru(struct kvm_vcpu *vcpu, u32 xirr, - struct kvmppc_irq_map *irq_map, - struct kvmppc_passthru_irqmap *pimap); +extern long kvmppc_deliver_irq_passthru(struct kvm_vcpu *vcpu, __be32 xirr, + struct kvmppc_irq_map *irq_map, + struct kvmppc_passthru_irqmap *pimap, + bool *again); extern int h_ipi_redirect; #else static inline struct kvmppc_passthru_irqmap *kvmppc_get_passthru_irqmap( diff --git a/arch/powerpc/kvm/book3s_hv.c b/arch/powerpc/kvm/book3s_hv.c index 7a6a00a41b12..aca9fbfaf16f 100644 --- a/arch/powerpc/kvm/book3s_hv.c +++ b/arch/powerpc/kvm/book3s_hv.c @@ -55,6 +55,8 @@ #include #include #include +#include +#include #include #include #include @@ -63,6 +65,7 @@ #include #include #include +#include #include "book3s.h" @@ -172,8 +175,12 @@ static bool kvmppc_ipi_thread(int cpu) } #if defined(CONFIG_PPC_ICP_NATIVE) && defined(CONFIG_SMP) - if (cpu >= 0 && cpu < nr_cpu_ids && paca[cpu].kvm_hstate.xics_phys) { - xics_wake_cpu(cpu); + if (cpu >= 0 && cpu < nr_cpu_ids) { + if (paca[cpu].kvm_hstate.xics_phys) { + xics_wake_cpu(cpu); + return true; + } + opal_int_set_mfrr(get_hard_smp_processor_id(cpu), IPI_PRIORITY); return true; } #endif @@ -3734,6 +3741,23 @@ static int kvmppc_book3s_init_hv(void) if (r) return r; + /* + * We need a way of accessing the XICS interrupt controller, + * either directly, via paca[cpu].kvm_hstate.xics_phys, or + * indirectly, via OPAL. + */ +#ifdef CONFIG_SMP + if (!get_paca()->kvm_hstate.xics_phys) { + struct device_node *np; + + np = of_find_compatible_node(NULL, NULL, "ibm,opal-intc"); + if (!np) { + pr_err("KVM-HV: Cannot determine method for accessing XICS\n"); + return -ENODEV; + } + } +#endif + kvm_ops_hv.owner = THIS_MODULE; kvmppc_hv_ops = &kvm_ops_hv; diff --git a/arch/powerpc/kvm/book3s_hv_builtin.c b/arch/powerpc/kvm/book3s_hv_builtin.c index e1e1ead1abb5..9e1223a81138 100644 --- a/arch/powerpc/kvm/book3s_hv_builtin.c +++ b/arch/powerpc/kvm/book3s_hv_builtin.c @@ -27,6 +27,7 @@ #include #include #include +#include #define KVM_CMA_CHUNK_ORDER 18 @@ -225,7 +226,11 @@ void kvmhv_rm_send_ipi(int cpu) /* Else poke the target with an IPI */ xics_phys = paca[cpu].kvm_hstate.xics_phys; - rm_writeb(xics_phys + XICS_MFRR, IPI_PRIORITY); + if (xics_phys) + rm_writeb(xics_phys + XICS_MFRR, IPI_PRIORITY); + else + opal_rm_int_set_mfrr(get_hard_smp_processor_id(cpu), + IPI_PRIORITY); } /* @@ -336,7 +341,7 @@ static struct kvmppc_irq_map *get_irqmap(struct kvmppc_passthru_irqmap *pimap, * saved a copy of the XIRR in the PACA, it will be picked up by * the host ICP driver. */ -static int kvmppc_check_passthru(u32 xisr, __be32 xirr) +static int kvmppc_check_passthru(u32 xisr, __be32 xirr, bool *again) { struct kvmppc_passthru_irqmap *pimap; struct kvmppc_irq_map *irq_map; @@ -355,7 +360,7 @@ static int kvmppc_check_passthru(u32 xisr, __be32 xirr) /* We're handling this interrupt, generic code doesn't need to */ local_paca->kvm_hstate.saved_xirr = 0; - return kvmppc_deliver_irq_passthru(vcpu, xirr, irq_map, pimap); + return kvmppc_deliver_irq_passthru(vcpu, xirr, irq_map, pimap, again); } #else @@ -374,14 +379,31 @@ static inline int kvmppc_check_passthru(u32 xisr, __be32 xirr) * -1 if there was a guest wakeup IPI (which has now been cleared) * -2 if there is PCI passthrough external interrupt that was handled */ +static long kvmppc_read_one_intr(bool *again); long kvmppc_read_intr(void) +{ + long ret = 0; + long rc; + bool again; + + do { + again = false; + rc = kvmppc_read_one_intr(&again); + if (rc && (ret == 0 || rc > ret)) + ret = rc; + } while (again); + return ret; +} + +static long kvmppc_read_one_intr(bool *again) { unsigned long xics_phys; u32 h_xirr; __be32 xirr; u32 xisr; u8 host_ipi; + int64_t rc; /* see if a host IPI is pending */ host_ipi = local_paca->kvm_hstate.host_ipi; @@ -390,8 +412,14 @@ long kvmppc_read_intr(void) /* Now read the interrupt from the ICP */ xics_phys = local_paca->kvm_hstate.xics_phys; - if (unlikely(!xics_phys)) - return 1; + if (!xics_phys) { + /* Use OPAL to read the XIRR */ + rc = opal_rm_int_get_xirr(&xirr, false); + if (rc < 0) + return 1; + } else { + xirr = _lwzcix(xics_phys + XICS_XIRR); + } /* * Save XIRR for later. Since we get control in reverse endian @@ -399,7 +427,6 @@ long kvmppc_read_intr(void) * host endian. Note that xirr is the value read from the * XIRR register, while h_xirr is the host endian version. */ - xirr = _lwzcix(xics_phys + XICS_XIRR); h_xirr = be32_to_cpu(xirr); local_paca->kvm_hstate.saved_xirr = h_xirr; xisr = h_xirr & 0xffffff; @@ -418,8 +445,16 @@ long kvmppc_read_intr(void) * If it is an IPI, clear the MFRR and EOI it. */ if (xisr == XICS_IPI) { - _stbcix(xics_phys + XICS_MFRR, 0xff); - _stwcix(xics_phys + XICS_XIRR, xirr); + if (xics_phys) { + _stbcix(xics_phys + XICS_MFRR, 0xff); + _stwcix(xics_phys + XICS_XIRR, xirr); + } else { + opal_rm_int_set_mfrr(hard_smp_processor_id(), 0xff); + rc = opal_rm_int_eoi(h_xirr); + /* If rc > 0, there is another interrupt pending */ + *again = rc > 0; + } + /* * Need to ensure side effects of above stores * complete before proceeding. @@ -436,7 +471,11 @@ long kvmppc_read_intr(void) /* We raced with the host, * we need to resend that IPI, bummer */ - _stbcix(xics_phys + XICS_MFRR, IPI_PRIORITY); + if (xics_phys) + _stbcix(xics_phys + XICS_MFRR, IPI_PRIORITY); + else + opal_rm_int_set_mfrr(hard_smp_processor_id(), + IPI_PRIORITY); /* Let side effects complete */ smp_mb(); return 1; @@ -447,5 +486,5 @@ long kvmppc_read_intr(void) return -1; } - return kvmppc_check_passthru(xisr, xirr); + return kvmppc_check_passthru(xisr, xirr, again); } diff --git a/arch/powerpc/kvm/book3s_hv_rm_xics.c b/arch/powerpc/kvm/book3s_hv_rm_xics.c index f2d4487b137e..6a4c4d758a9e 100644 --- a/arch/powerpc/kvm/book3s_hv_rm_xics.c +++ b/arch/powerpc/kvm/book3s_hv_rm_xics.c @@ -71,7 +71,11 @@ static inline void icp_send_hcore_msg(int hcore, struct kvm_vcpu *vcpu) hcpu = hcore << threads_shift; kvmppc_host_rm_ops_hv->rm_core[hcore].rm_data = vcpu; smp_muxed_ipi_set_message(hcpu, PPC_MSG_RM_HOST_ACTION); - icp_native_cause_ipi_rm(hcpu); + if (paca[hcpu].kvm_hstate.xics_phys) + icp_native_cause_ipi_rm(hcpu); + else + opal_rm_int_set_mfrr(get_hard_smp_processor_id(hcpu), + IPI_PRIORITY); } #else static inline void icp_send_hcore_msg(int hcore, struct kvm_vcpu *vcpu) { } @@ -738,7 +742,7 @@ int kvmppc_rm_h_eoi(struct kvm_vcpu *vcpu, unsigned long xirr) unsigned long eoi_rc; -static void icp_eoi(struct irq_chip *c, u32 hwirq, u32 xirr) +static void icp_eoi(struct irq_chip *c, u32 hwirq, __be32 xirr, bool *again) { unsigned long xics_phys; int64_t rc; @@ -752,7 +756,12 @@ static void icp_eoi(struct irq_chip *c, u32 hwirq, u32 xirr) /* EOI it */ xics_phys = local_paca->kvm_hstate.xics_phys; - _stwcix(xics_phys + XICS_XIRR, xirr); + if (xics_phys) { + _stwcix(xics_phys + XICS_XIRR, xirr); + } else { + rc = opal_rm_int_eoi(be32_to_cpu(xirr)); + *again = rc > 0; + } } static int xics_opal_rm_set_server(unsigned int hw_irq, int server_cpu) @@ -810,9 +819,10 @@ static void kvmppc_rm_handle_irq_desc(struct irq_desc *desc) } long kvmppc_deliver_irq_passthru(struct kvm_vcpu *vcpu, - u32 xirr, + __be32 xirr, struct kvmppc_irq_map *irq_map, - struct kvmppc_passthru_irqmap *pimap) + struct kvmppc_passthru_irqmap *pimap, + bool *again) { struct kvmppc_xics *xics; struct kvmppc_icp *icp; @@ -826,7 +836,8 @@ long kvmppc_deliver_irq_passthru(struct kvm_vcpu *vcpu, icp_rm_deliver_irq(xics, icp, irq); /* EOI the interrupt */ - icp_eoi(irq_desc_get_chip(irq_map->desc), irq_map->r_hwirq, xirr); + icp_eoi(irq_desc_get_chip(irq_map->desc), irq_map->r_hwirq, xirr, + again); if (check_too_hard(xics, icp) == H_TOO_HARD) return 2; -- cgit v1.2.3-59-g8ed1b From bf53c88e42ac5dfdef649888d01b3bc96375647b Mon Sep 17 00:00:00 2001 From: Paul Mackerras Date: Fri, 18 Nov 2016 14:34:07 +1100 Subject: KVM: PPC: Book3S HV: Use stop instruction rather than nap on POWER9 POWER9 replaces the various power-saving mode instructions on POWER8 (doze, nap, sleep and rvwinkle) with a single "stop" instruction, plus a register, PSSCR, which controls the depth of the power-saving mode. This replaces the use of the nap instruction when threads are idle during guest execution with the stop instruction, and adds code to set PSSCR to a value which will allow an SMT mode switch while the thread is idle (given that the core as a whole won't be idle in these cases). Signed-off-by: Paul Mackerras --- arch/powerpc/kvm/book3s_hv_rmhandlers.S | 33 ++++++++++++++++++++++----------- 1 file changed, 22 insertions(+), 11 deletions(-) (limited to 'arch/powerpc') diff --git a/arch/powerpc/kvm/book3s_hv_rmhandlers.S b/arch/powerpc/kvm/book3s_hv_rmhandlers.S index 598d8281fc05..9338a818e05c 100644 --- a/arch/powerpc/kvm/book3s_hv_rmhandlers.S +++ b/arch/powerpc/kvm/book3s_hv_rmhandlers.S @@ -501,17 +501,9 @@ END_FTR_SECTION_IFSET(CPU_FTR_ARCH_207S) cmpwi r0, 0 beq 57f li r3, (LPCR_PECEDH | LPCR_PECE0) >> 4 - mfspr r4, SPRN_LPCR - rlwimi r4, r3, 4, (LPCR_PECEDP | LPCR_PECEDH | LPCR_PECE0 | LPCR_PECE1) - mtspr SPRN_LPCR, r4 - isync - std r0, HSTATE_SCRATCH0(r13) - ptesync - ld r0, HSTATE_SCRATCH0(r13) -1: cmpd r0, r0 - bne 1b - nap - b . + mfspr r5, SPRN_LPCR + rlwimi r5, r3, 4, (LPCR_PECEDP | LPCR_PECEDH | LPCR_PECE0 | LPCR_PECE1) + b kvm_nap_sequence 57: li r0, 0 stbx r0, r3, r4 @@ -2256,6 +2248,21 @@ BEGIN_FTR_SECTION ori r5, r5, LPCR_PECEDH rlwimi r5, r3, 0, LPCR_PECEDP END_FTR_SECTION_IFSET(CPU_FTR_ARCH_207S) + +kvm_nap_sequence: /* desired LPCR value in r5 */ +BEGIN_FTR_SECTION + /* + * PSSCR bits: exit criterion = 1 (wakeup based on LPCR at sreset) + * enable state loss = 1 (allow SMT mode switch) + * requested level = 0 (just stop dispatching) + */ + lis r3, (PSSCR_EC | PSSCR_ESL)@h + mtspr SPRN_PSSCR, r3 + /* Set LPCR_PECE_HVEE bit to enable wakeup by HV interrupts */ + li r4, LPCR_PECE_HVEE@higher + sldi r4, r4, 32 + or r5, r5, r4 +END_FTR_SECTION_IFSET(CPU_FTR_ARCH_300) mtspr SPRN_LPCR,r5 isync li r0, 0 @@ -2264,7 +2271,11 @@ END_FTR_SECTION_IFSET(CPU_FTR_ARCH_207S) ld r0, HSTATE_SCRATCH0(r13) 1: cmpd r0, r0 bne 1b +BEGIN_FTR_SECTION nap +FTR_SECTION_ELSE + PPC_STOP +ALT_FTR_SECTION_END_IFCLR(CPU_FTR_ARCH_300) b . 33: mr r4, r3 -- cgit v1.2.3-59-g8ed1b From 84f7139c064ed740d183ae535bda2f6d7ffc0d57 Mon Sep 17 00:00:00 2001 From: Paul Mackerras Date: Tue, 22 Nov 2016 14:30:14 +1100 Subject: KVM: PPC: Book3S HV: Enable hypervisor virtualization interrupts while in guest The new XIVE interrupt controller on POWER9 can direct external interrupts to the hypervisor or the guest. The interrupts directed to the hypervisor are controlled by an LPCR bit called LPCR_HVICE, and come in as a "hypervisor virtualization interrupt". This sets the LPCR bit so that hypervisor virtualization interrupts can occur while we are in the guest. We then also need to cope with exiting the guest because of a hypervisor virtualization interrupt. Signed-off-by: Paul Mackerras --- arch/powerpc/include/asm/kvm_asm.h | 1 + arch/powerpc/kvm/book3s_hv.c | 11 +++++++++-- 2 files changed, 10 insertions(+), 2 deletions(-) (limited to 'arch/powerpc') diff --git a/arch/powerpc/include/asm/kvm_asm.h b/arch/powerpc/include/asm/kvm_asm.h index 05cabed3d1bd..09a802bb702f 100644 --- a/arch/powerpc/include/asm/kvm_asm.h +++ b/arch/powerpc/include/asm/kvm_asm.h @@ -99,6 +99,7 @@ #define BOOK3S_INTERRUPT_H_EMUL_ASSIST 0xe40 #define BOOK3S_INTERRUPT_HMI 0xe60 #define BOOK3S_INTERRUPT_H_DOORBELL 0xe80 +#define BOOK3S_INTERRUPT_H_VIRT 0xea0 #define BOOK3S_INTERRUPT_PERFMON 0xf00 #define BOOK3S_INTERRUPT_ALTIVEC 0xf20 #define BOOK3S_INTERRUPT_VSX 0xf40 diff --git a/arch/powerpc/kvm/book3s_hv.c b/arch/powerpc/kvm/book3s_hv.c index aca9fbfaf16f..b6bc867dfc61 100644 --- a/arch/powerpc/kvm/book3s_hv.c +++ b/arch/powerpc/kvm/book3s_hv.c @@ -962,6 +962,7 @@ static int kvmppc_handle_exit_hv(struct kvm_run *run, struct kvm_vcpu *vcpu, break; case BOOK3S_INTERRUPT_EXTERNAL: case BOOK3S_INTERRUPT_H_DOORBELL: + case BOOK3S_INTERRUPT_H_VIRT: vcpu->stat.ext_intr_exits++; r = RESUME_GUEST; break; @@ -3305,9 +3306,15 @@ static int kvmppc_core_init_vm_hv(struct kvm *kvm) /* On POWER8 turn on online bit to enable PURR/SPURR */ if (cpu_has_feature(CPU_FTR_ARCH_207S)) lpcr |= LPCR_ONL; - /* On POWER9, VPM0 bit is reserved (VPM0=1 behaviour is assumed) */ - if (cpu_has_feature(CPU_FTR_ARCH_300)) + /* + * On POWER9, VPM0 bit is reserved (VPM0=1 behaviour is assumed) + * Set HVICE bit to enable hypervisor virtualization interrupts. + */ + if (cpu_has_feature(CPU_FTR_ARCH_300)) { lpcr &= ~LPCR_VPM0; + lpcr |= LPCR_HVICE; + } + kvm->arch.lpcr = lpcr; /* -- cgit v1.2.3-59-g8ed1b From 45c940ba490df28cb87b993981a5f63df6bbb8db Mon Sep 17 00:00:00 2001 From: Paul Mackerras Date: Fri, 18 Nov 2016 17:43:30 +1100 Subject: KVM: PPC: Book3S HV: Treat POWER9 CPU threads as independent subcores With POWER9, each CPU thread has its own MMU context and can be in the host or a guest independently of the other threads; there is still however a restriction that all threads must use the same type of address translation, either radix tree or hashed page table (HPT). Since we only support HPT guests on a HPT host at this point, we can treat the threads as being independent, and avoid all of the work of coordinating the CPU threads. To make this simpler, we introduce a new threads_per_vcore() function that returns 1 on POWER9 and threads_per_subcore on POWER7/8, and use that instead of threads_per_subcore or threads_per_core in various places. This also changes the value of the KVM_CAP_PPC_SMT capability on POWER9 systems from 4 to 1, so that userspace will not try to create VMs with multiple vcpus per vcore. (If userspace did create a VM that thought it was in an SMT mode, the VM might try to use the msgsndp instruction, which will not work as expected. In future it may be possible to trap and emulate msgsndp in order to allow VMs to think they are in an SMT mode, if only for the purpose of allowing migration from POWER8 systems.) With all this, we can now run guests on POWER9 as long as the host is running with HPT translation. Since userspace currently has no way to request radix tree translation for the guest, the guest has no choice but to use HPT translation. Signed-off-by: Paul Mackerras --- arch/powerpc/kvm/book3s_hv.c | 49 +++++++++++++++++++++++++++++++++----------- arch/powerpc/kvm/powerpc.c | 11 ++++++---- 2 files changed, 44 insertions(+), 16 deletions(-) (limited to 'arch/powerpc') diff --git a/arch/powerpc/kvm/book3s_hv.c b/arch/powerpc/kvm/book3s_hv.c index b6bc867dfc61..94294c3efb91 100644 --- a/arch/powerpc/kvm/book3s_hv.c +++ b/arch/powerpc/kvm/book3s_hv.c @@ -1576,6 +1576,20 @@ static int kvmppc_set_one_reg_hv(struct kvm_vcpu *vcpu, u64 id, return r; } +/* + * On POWER9, threads are independent and can be in different partitions. + * Therefore we consider each thread to be a subcore. + * There is a restriction that all threads have to be in the same + * MMU mode (radix or HPT), unfortunately, but since we only support + * HPT guests on a HPT host so far, that isn't an impediment yet. + */ +static int threads_per_vcore(void) +{ + if (cpu_has_feature(CPU_FTR_ARCH_300)) + return 1; + return threads_per_subcore; +} + static struct kvmppc_vcore *kvmppc_vcore_create(struct kvm *kvm, int core) { struct kvmppc_vcore *vcore; @@ -1590,7 +1604,7 @@ static struct kvmppc_vcore *kvmppc_vcore_create(struct kvm *kvm, int core) init_swait_queue_head(&vcore->wq); vcore->preempt_tb = TB_NIL; vcore->lpcr = kvm->arch.lpcr; - vcore->first_vcpuid = core * threads_per_subcore; + vcore->first_vcpuid = core * threads_per_vcore(); vcore->kvm = kvm; INIT_LIST_HEAD(&vcore->preempt_list); @@ -1753,7 +1767,7 @@ static struct kvm_vcpu *kvmppc_core_vcpu_create_hv(struct kvm *kvm, int core; struct kvmppc_vcore *vcore; - core = id / threads_per_subcore; + core = id / threads_per_vcore(); if (core >= KVM_MAX_VCORES) goto out; @@ -1971,7 +1985,10 @@ static void kvmppc_wait_for_nap(void) { int cpu = smp_processor_id(); int i, loops; + int n_threads = threads_per_vcore(); + if (n_threads <= 1) + return; for (loops = 0; loops < 1000000; ++loops) { /* * Check if all threads are finished. @@ -1979,17 +1996,17 @@ static void kvmppc_wait_for_nap(void) * and the thread clears it when finished, so we look * for any threads that still have a non-NULL vcore ptr. */ - for (i = 1; i < threads_per_subcore; ++i) + for (i = 1; i < n_threads; ++i) if (paca[cpu + i].kvm_hstate.kvm_vcore) break; - if (i == threads_per_subcore) { + if (i == n_threads) { HMT_medium(); return; } HMT_low(); } HMT_medium(); - for (i = 1; i < threads_per_subcore; ++i) + for (i = 1; i < n_threads; ++i) if (paca[cpu + i].kvm_hstate.kvm_vcore) pr_err("KVM: CPU %d seems to be stuck\n", cpu + i); } @@ -2055,7 +2072,7 @@ static void kvmppc_vcore_preempt(struct kvmppc_vcore *vc) vc->vcore_state = VCORE_PREEMPT; vc->pcpu = smp_processor_id(); - if (vc->num_threads < threads_per_subcore) { + if (vc->num_threads < threads_per_vcore()) { spin_lock(&lp->lock); list_add_tail(&vc->preempt_list, &lp->list); spin_unlock(&lp->lock); @@ -2342,6 +2359,7 @@ static noinline void kvmppc_run_core(struct kvmppc_vcore *vc) unsigned long cmd_bit, stat_bit; int pcpu, thr; int target_threads; + int controlled_threads; /* * Remove from the list any threads that have a signal pending @@ -2359,12 +2377,19 @@ static noinline void kvmppc_run_core(struct kvmppc_vcore *vc) init_master_vcore(vc); vc->preempt_tb = TB_NIL; + /* + * Number of threads that we will be controlling: the same as + * the number of threads per subcore, except on POWER9, + * where it's 1 because the threads are (mostly) independent. + */ + controlled_threads = threads_per_vcore(); + /* * Make sure we are running on primary threads, and that secondary * threads are offline. Also check if the number of threads in this * guest are greater than the current system threads per guest. */ - if ((threads_per_core > 1) && + if ((controlled_threads > 1) && ((vc->num_threads > threads_per_subcore) || !on_primary_thread())) { for_each_runnable_thread(i, vcpu, vc) { vcpu->arch.ret = -EBUSY; @@ -2380,7 +2405,7 @@ static noinline void kvmppc_run_core(struct kvmppc_vcore *vc) */ init_core_info(&core_info, vc); pcpu = smp_processor_id(); - target_threads = threads_per_subcore; + target_threads = controlled_threads; if (target_smt_mode && target_smt_mode < target_threads) target_threads = target_smt_mode; if (vc->num_threads < target_threads) @@ -2416,7 +2441,7 @@ static noinline void kvmppc_run_core(struct kvmppc_vcore *vc) smp_wmb(); } pcpu = smp_processor_id(); - for (thr = 0; thr < threads_per_subcore; ++thr) + for (thr = 0; thr < controlled_threads; ++thr) paca[pcpu + thr].kvm_hstate.kvm_split_mode = sip; /* Initiate micro-threading (split-core) if required */ @@ -2526,7 +2551,7 @@ static noinline void kvmppc_run_core(struct kvmppc_vcore *vc) } /* Let secondaries go back to the offline loop */ - for (i = 0; i < threads_per_subcore; ++i) { + for (i = 0; i < controlled_threads; ++i) { kvmppc_release_hwthread(pcpu + i); if (sip && sip->napped[i]) kvmppc_ipi_thread(pcpu + i); @@ -3392,9 +3417,9 @@ static int kvmppc_core_check_processor_compat_hv(void) !cpu_has_feature(CPU_FTR_ARCH_206)) return -EIO; /* - * Disable KVM for Power9, untill the required bits merged. + * Disable KVM for Power9 in radix mode. */ - if (cpu_has_feature(CPU_FTR_ARCH_300)) + if (cpu_has_feature(CPU_FTR_ARCH_300) && radix_enabled()) return -EIO; return 0; diff --git a/arch/powerpc/kvm/powerpc.c b/arch/powerpc/kvm/powerpc.c index 70963c845e96..b5e470571470 100644 --- a/arch/powerpc/kvm/powerpc.c +++ b/arch/powerpc/kvm/powerpc.c @@ -548,10 +548,13 @@ int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext) #endif /* CONFIG_PPC_BOOK3S_64 */ #ifdef CONFIG_KVM_BOOK3S_HV_POSSIBLE case KVM_CAP_PPC_SMT: - if (hv_enabled) - r = threads_per_subcore; - else - r = 0; + r = 0; + if (hv_enabled) { + if (cpu_has_feature(CPU_FTR_ARCH_300)) + r = 1; + else + r = threads_per_subcore; + } break; case KVM_CAP_PPC_RMA: r = 0; -- cgit v1.2.3-59-g8ed1b From 2ee13be34b135957733b84ef5f7bd30c80ec3c42 Mon Sep 17 00:00:00 2001 From: Suraj Jitindar Singh Date: Mon, 14 Nov 2016 11:35:08 +1100 Subject: KVM: PPC: Book3S HV: Update kvmppc_set_arch_compat() for ISA v3.00 The function kvmppc_set_arch_compat() is used to determine the value of the processor compatibility register (PCR) for a guest running in a given compatibility mode. There is currently no support for v3.00 of the ISA. Add support for v3.00 of the ISA which adds an ISA v2.07 compatilibity mode to the PCR. We also add a check to ensure the processor we are running on is capable of emulating the chosen processor (for example a POWER7 cannot emulate a POWER8, similarly with a POWER8 and a POWER9). Based on work by: Paul Mackerras [paulus@ozlabs.org - moved dummy PCR_ARCH_300 definition here; set guest_pcr_bit when arch_compat == 0, added comment.] Signed-off-by: Suraj Jitindar Singh Signed-off-by: Paul Mackerras --- arch/powerpc/kvm/book3s_hv.c | 43 ++++++++++++++++++++++++++++--------------- 1 file changed, 28 insertions(+), 15 deletions(-) (limited to 'arch/powerpc') diff --git a/arch/powerpc/kvm/book3s_hv.c b/arch/powerpc/kvm/book3s_hv.c index 94294c3efb91..7ee77fe88356 100644 --- a/arch/powerpc/kvm/book3s_hv.c +++ b/arch/powerpc/kvm/book3s_hv.c @@ -316,41 +316,54 @@ static void kvmppc_set_pvr_hv(struct kvm_vcpu *vcpu, u32 pvr) vcpu->arch.pvr = pvr; } +/* Dummy value used in computing PCR value below */ +#define PCR_ARCH_300 (PCR_ARCH_207 << 1) + static int kvmppc_set_arch_compat(struct kvm_vcpu *vcpu, u32 arch_compat) { - unsigned long pcr = 0; + unsigned long host_pcr_bit = 0, guest_pcr_bit = 0; struct kvmppc_vcore *vc = vcpu->arch.vcore; + /* We can (emulate) our own architecture version and anything older */ + if (cpu_has_feature(CPU_FTR_ARCH_300)) + host_pcr_bit = PCR_ARCH_300; + else if (cpu_has_feature(CPU_FTR_ARCH_207S)) + host_pcr_bit = PCR_ARCH_207; + else if (cpu_has_feature(CPU_FTR_ARCH_206)) + host_pcr_bit = PCR_ARCH_206; + else + host_pcr_bit = PCR_ARCH_205; + + /* Determine lowest PCR bit needed to run guest in given PVR level */ + guest_pcr_bit = host_pcr_bit; if (arch_compat) { switch (arch_compat) { case PVR_ARCH_205: - /* - * If an arch bit is set in PCR, all the defined - * higher-order arch bits also have to be set. - */ - pcr = PCR_ARCH_206 | PCR_ARCH_205; + guest_pcr_bit = PCR_ARCH_205; break; case PVR_ARCH_206: case PVR_ARCH_206p: - pcr = PCR_ARCH_206; + guest_pcr_bit = PCR_ARCH_206; break; case PVR_ARCH_207: + guest_pcr_bit = PCR_ARCH_207; + break; + case PVR_ARCH_300: + guest_pcr_bit = PCR_ARCH_300; break; default: return -EINVAL; } - - if (!cpu_has_feature(CPU_FTR_ARCH_207S)) { - /* POWER7 can't emulate POWER8 */ - if (!(pcr & PCR_ARCH_206)) - return -EINVAL; - pcr &= ~PCR_ARCH_206; - } } + /* Check requested PCR bits don't exceed our capabilities */ + if (guest_pcr_bit > host_pcr_bit) + return -EINVAL; + spin_lock(&vc->lock); vc->arch_compat = arch_compat; - vc->pcr = pcr; + /* Set all PCR bits for which guest_pcr_bit <= bit < host_pcr_bit */ + vc->pcr = host_pcr_bit - guest_pcr_bit; spin_unlock(&vc->lock); return 0; -- cgit v1.2.3-59-g8ed1b From e2702871b4b70a39e08c46744a8fa16e281120aa Mon Sep 17 00:00:00 2001 From: Paul Mackerras Date: Thu, 24 Nov 2016 14:10:43 +1100 Subject: KVM: PPC: Book3S HV: Fix compilation with unusual configurations This adds the "again" parameter to the dummy version of kvmppc_check_passthru(), so that it matches the real version. This fixes compilation with CONFIG_BOOK3S_64_HV set but CONFIG_KVM_XICS=n. This includes asm/smp.h in book3s_hv_builtin.c to fix compilation with CONFIG_SMP=n. The explicit inclusion is necessary to provide definitions of hard_smp_processor_id() and get_hard_smp_processor_id() in UP configs. Signed-off-by: Paul Mackerras --- arch/powerpc/kvm/book3s_hv_builtin.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'arch/powerpc') diff --git a/arch/powerpc/kvm/book3s_hv_builtin.c b/arch/powerpc/kvm/book3s_hv_builtin.c index 9e1223a81138..11561f0ef83a 100644 --- a/arch/powerpc/kvm/book3s_hv_builtin.c +++ b/arch/powerpc/kvm/book3s_hv_builtin.c @@ -28,6 +28,7 @@ #include #include #include +#include #define KVM_CMA_CHUNK_ORDER 18 @@ -364,7 +365,7 @@ static int kvmppc_check_passthru(u32 xisr, __be32 xirr, bool *again) } #else -static inline int kvmppc_check_passthru(u32 xisr, __be32 xirr) +static inline int kvmppc_check_passthru(u32 xisr, __be32 xirr, bool *again) { return 1; } -- cgit v1.2.3-59-g8ed1b From a8acaece5d88db234d0b82b8692dea15d602f622 Mon Sep 17 00:00:00 2001 From: David Gibson Date: Wed, 23 Nov 2016 16:14:07 +1100 Subject: KVM: PPC: Correctly report KVM_CAP_PPC_ALLOC_HTAB At present KVM on powerpc always reports KVM_CAP_PPC_ALLOC_HTAB as enabled. However, the ioctl() it advertises (KVM_PPC_ALLOCATE_HTAB) only actually works on KVM HV. On KVM PR it will fail with ENOTTY. QEMU already has a workaround for this, so it's not breaking things in practice, but it would be better to advertise this correctly. Signed-off-by: David Gibson Signed-off-by: Paul Mackerras --- arch/powerpc/kvm/powerpc.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) (limited to 'arch/powerpc') diff --git a/arch/powerpc/kvm/powerpc.c b/arch/powerpc/kvm/powerpc.c index b5e470571470..efd1183a6b16 100644 --- a/arch/powerpc/kvm/powerpc.c +++ b/arch/powerpc/kvm/powerpc.c @@ -536,7 +536,6 @@ int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext) #ifdef CONFIG_PPC_BOOK3S_64 case KVM_CAP_SPAPR_TCE: case KVM_CAP_SPAPR_TCE_64: - case KVM_CAP_PPC_ALLOC_HTAB: case KVM_CAP_PPC_RTAS: case KVM_CAP_PPC_FIXUP_HCALL: case KVM_CAP_PPC_ENABLE_HCALL: @@ -545,6 +544,10 @@ int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext) #endif r = 1; break; + + case KVM_CAP_PPC_ALLOC_HTAB: + r = hv_enabled; + break; #endif /* CONFIG_PPC_BOOK3S_64 */ #ifdef CONFIG_KVM_BOOK3S_HV_POSSIBLE case KVM_CAP_PPC_SMT: -- cgit v1.2.3-59-g8ed1b From 307d93e476a340116cbddd1d3d7edf9b3cdd7506 Mon Sep 17 00:00:00 2001 From: Suraj Jitindar Singh Date: Fri, 14 Oct 2016 11:53:20 +1100 Subject: KVM: PPC: Book3S HV: Use generic kvm module parameters The previous patch exported the variables which back the module parameters of the generic kvm module. Now use these variables in the kvm-hv module so that any change to the generic module parameters will also have the same effect for the kvm-hv module. This removes the duplication of the kvm module parameters which was redundant and should reduce confusion when tuning them. Signed-off-by: Suraj Jitindar Singh Signed-off-by: Paul Mackerras --- arch/powerpc/kvm/book3s_hv.c | 29 ++++++----------------------- 1 file changed, 6 insertions(+), 23 deletions(-) (limited to 'arch/powerpc') diff --git a/arch/powerpc/kvm/book3s_hv.c b/arch/powerpc/kvm/book3s_hv.c index 7ee77fe88356..f0d22ced3e1f 100644 --- a/arch/powerpc/kvm/book3s_hv.c +++ b/arch/powerpc/kvm/book3s_hv.c @@ -108,23 +108,6 @@ module_param_cb(h_ipi_redirect, &module_param_ops, &h_ipi_redirect, MODULE_PARM_DESC(h_ipi_redirect, "Redirect H_IPI wakeup to a free host core"); #endif -/* Maximum halt poll interval defaults to KVM_HALT_POLL_NS_DEFAULT */ -static unsigned int halt_poll_max_ns = KVM_HALT_POLL_NS_DEFAULT; -module_param(halt_poll_max_ns, uint, S_IRUGO | S_IWUSR); -MODULE_PARM_DESC(halt_poll_max_ns, "Maximum halt poll time in ns"); - -/* Factor by which the vcore halt poll interval is grown, default is to double - */ -static unsigned int halt_poll_ns_grow = 2; -module_param(halt_poll_ns_grow, int, S_IRUGO); -MODULE_PARM_DESC(halt_poll_ns_grow, "Factor halt poll time is grown by"); - -/* Factor by which the vcore halt poll interval is shrunk, default is to reset - */ -static unsigned int halt_poll_ns_shrink; -module_param(halt_poll_ns_shrink, int, S_IRUGO); -MODULE_PARM_DESC(halt_poll_ns_shrink, "Factor halt poll time is shrunk by"); - static void kvmppc_end_cede(struct kvm_vcpu *vcpu); static int kvmppc_hv_setup_htab_rma(struct kvm_vcpu *vcpu); @@ -2617,8 +2600,8 @@ static void grow_halt_poll_ns(struct kvmppc_vcore *vc) else vc->halt_poll_ns *= halt_poll_ns_grow; - if (vc->halt_poll_ns > halt_poll_max_ns) - vc->halt_poll_ns = halt_poll_max_ns; + if (vc->halt_poll_ns > halt_poll_ns) + vc->halt_poll_ns = halt_poll_ns; } static void shrink_halt_poll_ns(struct kvmppc_vcore *vc) @@ -2728,15 +2711,15 @@ out: } /* Adjust poll time */ - if (halt_poll_max_ns) { + if (halt_poll_ns) { if (block_ns <= vc->halt_poll_ns) ; /* We slept and blocked for longer than the max halt time */ - else if (vc->halt_poll_ns && block_ns > halt_poll_max_ns) + else if (vc->halt_poll_ns && block_ns > halt_poll_ns) shrink_halt_poll_ns(vc); /* We slept and our poll time is too small */ - else if (vc->halt_poll_ns < halt_poll_max_ns && - block_ns < halt_poll_max_ns) + else if (vc->halt_poll_ns < halt_poll_ns && + block_ns < halt_poll_ns) grow_halt_poll_ns(vc); } else vc->halt_poll_ns = 0; -- cgit v1.2.3-59-g8ed1b From e03f3921e597cbcc6880033e5c52fa1db524f88b Mon Sep 17 00:00:00 2001 From: Suraj Jitindar Singh Date: Fri, 14 Oct 2016 11:53:21 +1100 Subject: KVM: PPC: Book3S HV: Add check for module parameter halt_poll_ns The kvm module parameter halt_poll_ns defines the global maximum halt polling interval and can be dynamically changed by writing to the /sys/module/kvm/parameters/halt_poll_ns sysfs file. However in kvm-hv this module parameter value is only ever checked when we grow the current polling interval for the given vcore. This means that if we decrease the halt_poll_ns value below the current polling interval we won't see any effect unless we try to grow the polling interval above the new max at some point or it happens to be shrunk below the halt_poll_ns value. Update the halt polling code so that we always check for a new module param value of halt_poll_ns and set the current halt polling interval to it if it's currently greater than the new max. This means that it's redundant to also perform this check in the grow_halt_poll_ns() function now. Signed-off-by: Suraj Jitindar Singh Signed-off-by: Paul Mackerras --- arch/powerpc/kvm/book3s_hv.c | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) (limited to 'arch/powerpc') diff --git a/arch/powerpc/kvm/book3s_hv.c b/arch/powerpc/kvm/book3s_hv.c index f0d22ced3e1f..0b0ca272a13c 100644 --- a/arch/powerpc/kvm/book3s_hv.c +++ b/arch/powerpc/kvm/book3s_hv.c @@ -2599,9 +2599,6 @@ static void grow_halt_poll_ns(struct kvmppc_vcore *vc) vc->halt_poll_ns = 10000; else vc->halt_poll_ns *= halt_poll_ns_grow; - - if (vc->halt_poll_ns > halt_poll_ns) - vc->halt_poll_ns = halt_poll_ns; } static void shrink_halt_poll_ns(struct kvmppc_vcore *vc) @@ -2721,6 +2718,8 @@ out: else if (vc->halt_poll_ns < halt_poll_ns && block_ns < halt_poll_ns) grow_halt_poll_ns(vc); + if (vc->halt_poll_ns > halt_poll_ns) + vc->halt_poll_ns = halt_poll_ns; } else vc->halt_poll_ns = 0; -- cgit v1.2.3-59-g8ed1b From f4944613ad1ab6760589d5791488be1236c07fcc Mon Sep 17 00:00:00 2001 From: Suraj Jitindar Singh Date: Fri, 14 Oct 2016 11:53:22 +1100 Subject: KVM: PPC: Decrease the powerpc default halt poll max value KVM_HALT_POLL_NS_DEFAULT is an arch specific constant which sets the default value of the halt_poll_ns kvm module parameter which determines the global maximum halt polling interval. The current value for powerpc is 500000 (500us) which means that any repetitive workload with a period of less than that can drive the cpu usage to 100% where it may have been mostly idle without halt polling. This presents the possibility of a large increase in power usage with a comparatively small performance benefit. Reduce the default to 10000 (10us) and a user can tune this themselves to set their affinity for halt polling based on the trade off between power and performance which they are willing to make. Signed-off-by: Suraj Jitindar Singh Signed-off-by: Paul Mackerras --- arch/powerpc/include/asm/kvm_host.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch/powerpc') diff --git a/arch/powerpc/include/asm/kvm_host.h b/arch/powerpc/include/asm/kvm_host.h index f15713ae3f34..e59b172666cd 100644 --- a/arch/powerpc/include/asm/kvm_host.h +++ b/arch/powerpc/include/asm/kvm_host.h @@ -48,7 +48,7 @@ #ifdef CONFIG_KVM_MMIO #define KVM_COALESCED_MMIO_PAGE_OFFSET 1 #endif -#define KVM_HALT_POLL_NS_DEFAULT 500000 +#define KVM_HALT_POLL_NS_DEFAULT 10000 /* 10 us */ /* These values are internal and can be increased later */ #define KVM_NR_IRQCHIPS 1 -- cgit v1.2.3-59-g8ed1b From 908a09359ef4ed9e9ca1147b9d35f829d7e42a74 Mon Sep 17 00:00:00 2001 From: Suraj Jitindar Singh Date: Fri, 14 Oct 2016 11:53:23 +1100 Subject: KVM: PPC: Book3S HV: Comment style and print format fixups Fix comment block to match kernel comment style. Fix print format from signed to unsigned. Signed-off-by: Suraj Jitindar Singh Signed-off-by: Paul Mackerras --- arch/powerpc/kvm/book3s_hv.c | 3 ++- arch/powerpc/kvm/trace_hv.h | 2 +- 2 files changed, 3 insertions(+), 2 deletions(-) (limited to 'arch/powerpc') diff --git a/arch/powerpc/kvm/book3s_hv.c b/arch/powerpc/kvm/book3s_hv.c index 0b0ca272a13c..be8f83c999f3 100644 --- a/arch/powerpc/kvm/book3s_hv.c +++ b/arch/powerpc/kvm/book3s_hv.c @@ -2609,7 +2609,8 @@ static void shrink_halt_poll_ns(struct kvmppc_vcore *vc) vc->halt_poll_ns /= halt_poll_ns_shrink; } -/* Check to see if any of the runnable vcpus on the vcore have pending +/* + * Check to see if any of the runnable vcpus on the vcore have pending * exceptions or are no longer ceded */ static int kvmppc_vcore_check_block(struct kvmppc_vcore *vc) diff --git a/arch/powerpc/kvm/trace_hv.h b/arch/powerpc/kvm/trace_hv.h index fb21990c0fb4..ebc6dd449556 100644 --- a/arch/powerpc/kvm/trace_hv.h +++ b/arch/powerpc/kvm/trace_hv.h @@ -449,7 +449,7 @@ TRACE_EVENT(kvmppc_vcore_wakeup, __entry->tgid = current->tgid; ), - TP_printk("%s time %lld ns, tgid=%d", + TP_printk("%s time %llu ns, tgid=%d", __entry->waited ? "wait" : "poll", __entry->ns, __entry->tgid) ); -- cgit v1.2.3-59-g8ed1b From e34af7849014f1d80899b811cf9021588cb8dd88 Mon Sep 17 00:00:00 2001 From: Paul Mackerras Date: Thu, 1 Dec 2016 14:03:46 +1100 Subject: KVM: PPC: Book3S: Move prototypes for KVM functions into kvm_ppc.h This moves the prototypes for functions that are only called from assembler code out of asm/asm-prototypes.h into asm/kvm_ppc.h. The prototypes were added in commit ebe4535fbe7a ("KVM: PPC: Book3S HV: sparse: prototypes for functions called from assembler", 2016-10-10), but given that the functions are KVM functions, having them in a KVM header will be better for long-term maintenance. Signed-off-by: Paul Mackerras --- arch/powerpc/include/asm/asm-prototypes.h | 44 ------------------------------- arch/powerpc/include/asm/kvm_ppc.h | 42 +++++++++++++++++++++++++++++ arch/powerpc/kvm/book3s_64_vio_hv.c | 1 - arch/powerpc/kvm/book3s_hv_builtin.c | 1 - arch/powerpc/kvm/book3s_hv_ras.c | 2 +- arch/powerpc/kvm/book3s_hv_rm_mmu.c | 1 - arch/powerpc/kvm/book3s_hv_rm_xics.c | 1 - 7 files changed, 43 insertions(+), 49 deletions(-) (limited to 'arch/powerpc') diff --git a/arch/powerpc/include/asm/asm-prototypes.h b/arch/powerpc/include/asm/asm-prototypes.h index 6c853bcd11fa..d1492736d852 100644 --- a/arch/powerpc/include/asm/asm-prototypes.h +++ b/arch/powerpc/include/asm/asm-prototypes.h @@ -14,9 +14,6 @@ #include #include -#ifdef CONFIG_KVM -#include -#endif #include @@ -112,45 +109,4 @@ void early_setup_secondary(void); /* time */ void accumulate_stolen_time(void); -/* kvm */ -#ifdef CONFIG_KVM -long kvmppc_rm_h_put_tce(struct kvm_vcpu *vcpu, unsigned long liobn, - unsigned long ioba, unsigned long tce); -long kvmppc_rm_h_put_tce_indirect(struct kvm_vcpu *vcpu, - unsigned long liobn, unsigned long ioba, - unsigned long tce_list, unsigned long npages); -long kvmppc_rm_h_stuff_tce(struct kvm_vcpu *vcpu, - unsigned long liobn, unsigned long ioba, - unsigned long tce_value, unsigned long npages); -long int kvmppc_rm_h_confer(struct kvm_vcpu *vcpu, int target, - unsigned int yield_count); -long kvmppc_h_random(struct kvm_vcpu *vcpu); -void kvmhv_commence_exit(int trap); -long kvmppc_realmode_machine_check(struct kvm_vcpu *vcpu); -void kvmppc_subcore_enter_guest(void); -void kvmppc_subcore_exit_guest(void); -long kvmppc_realmode_hmi_handler(void); -long kvmppc_h_enter(struct kvm_vcpu *vcpu, unsigned long flags, - long pte_index, unsigned long pteh, unsigned long ptel); -long kvmppc_h_remove(struct kvm_vcpu *vcpu, unsigned long flags, - unsigned long pte_index, unsigned long avpn); -long kvmppc_h_bulk_remove(struct kvm_vcpu *vcpu); -long kvmppc_h_protect(struct kvm_vcpu *vcpu, unsigned long flags, - unsigned long pte_index, unsigned long avpn, - unsigned long va); -long kvmppc_h_read(struct kvm_vcpu *vcpu, unsigned long flags, - unsigned long pte_index); -long kvmppc_h_clear_ref(struct kvm_vcpu *vcpu, unsigned long flags, - unsigned long pte_index); -long kvmppc_h_clear_mod(struct kvm_vcpu *vcpu, unsigned long flags, - unsigned long pte_index); -long kvmppc_hpte_hv_fault(struct kvm_vcpu *vcpu, unsigned long addr, - unsigned long slb_v, unsigned int status, bool data); -unsigned long kvmppc_rm_h_xirr(struct kvm_vcpu *vcpu); -int kvmppc_rm_h_ipi(struct kvm_vcpu *vcpu, unsigned long server, - unsigned long mfrr); -int kvmppc_rm_h_cppr(struct kvm_vcpu *vcpu, unsigned long cppr); -int kvmppc_rm_h_eoi(struct kvm_vcpu *vcpu, unsigned long xirr); -#endif - #endif /* _ASM_POWERPC_ASM_PROTOTYPES_H */ diff --git a/arch/powerpc/include/asm/kvm_ppc.h b/arch/powerpc/include/asm/kvm_ppc.h index a5b94bed1423..2da67bf1f2ec 100644 --- a/arch/powerpc/include/asm/kvm_ppc.h +++ b/arch/powerpc/include/asm/kvm_ppc.h @@ -510,6 +510,48 @@ static inline int kvmppc_xics_hcall(struct kvm_vcpu *vcpu, u32 cmd) { return 0; } #endif +/* + * Prototypes for functions called only from assembler code. + * Having prototypes reduces sparse errors. + */ +long kvmppc_rm_h_put_tce(struct kvm_vcpu *vcpu, unsigned long liobn, + unsigned long ioba, unsigned long tce); +long kvmppc_rm_h_put_tce_indirect(struct kvm_vcpu *vcpu, + unsigned long liobn, unsigned long ioba, + unsigned long tce_list, unsigned long npages); +long kvmppc_rm_h_stuff_tce(struct kvm_vcpu *vcpu, + unsigned long liobn, unsigned long ioba, + unsigned long tce_value, unsigned long npages); +long int kvmppc_rm_h_confer(struct kvm_vcpu *vcpu, int target, + unsigned int yield_count); +long kvmppc_h_random(struct kvm_vcpu *vcpu); +void kvmhv_commence_exit(int trap); +long kvmppc_realmode_machine_check(struct kvm_vcpu *vcpu); +void kvmppc_subcore_enter_guest(void); +void kvmppc_subcore_exit_guest(void); +long kvmppc_realmode_hmi_handler(void); +long kvmppc_h_enter(struct kvm_vcpu *vcpu, unsigned long flags, + long pte_index, unsigned long pteh, unsigned long ptel); +long kvmppc_h_remove(struct kvm_vcpu *vcpu, unsigned long flags, + unsigned long pte_index, unsigned long avpn); +long kvmppc_h_bulk_remove(struct kvm_vcpu *vcpu); +long kvmppc_h_protect(struct kvm_vcpu *vcpu, unsigned long flags, + unsigned long pte_index, unsigned long avpn, + unsigned long va); +long kvmppc_h_read(struct kvm_vcpu *vcpu, unsigned long flags, + unsigned long pte_index); +long kvmppc_h_clear_ref(struct kvm_vcpu *vcpu, unsigned long flags, + unsigned long pte_index); +long kvmppc_h_clear_mod(struct kvm_vcpu *vcpu, unsigned long flags, + unsigned long pte_index); +long kvmppc_hpte_hv_fault(struct kvm_vcpu *vcpu, unsigned long addr, + unsigned long slb_v, unsigned int status, bool data); +unsigned long kvmppc_rm_h_xirr(struct kvm_vcpu *vcpu); +int kvmppc_rm_h_ipi(struct kvm_vcpu *vcpu, unsigned long server, + unsigned long mfrr); +int kvmppc_rm_h_cppr(struct kvm_vcpu *vcpu, unsigned long cppr); +int kvmppc_rm_h_eoi(struct kvm_vcpu *vcpu, unsigned long xirr); + /* * Host-side operations we want to set up while running in real * mode in the guest operating on the xics. diff --git a/arch/powerpc/kvm/book3s_64_vio_hv.c b/arch/powerpc/kvm/book3s_64_vio_hv.c index 66e98276d93e..e4c4ea973e57 100644 --- a/arch/powerpc/kvm/book3s_64_vio_hv.c +++ b/arch/powerpc/kvm/book3s_64_vio_hv.c @@ -39,7 +39,6 @@ #include #include #include -#include #define TCES_PER_PAGE (PAGE_SIZE / sizeof(u64)) diff --git a/arch/powerpc/kvm/book3s_hv_builtin.c b/arch/powerpc/kvm/book3s_hv_builtin.c index 11561f0ef83a..5bb24be0b346 100644 --- a/arch/powerpc/kvm/book3s_hv_builtin.c +++ b/arch/powerpc/kvm/book3s_hv_builtin.c @@ -26,7 +26,6 @@ #include #include #include -#include #include #include diff --git a/arch/powerpc/kvm/book3s_hv_ras.c b/arch/powerpc/kvm/book3s_hv_ras.c index be1cee5dc032..7ef0993214f3 100644 --- a/arch/powerpc/kvm/book3s_hv_ras.c +++ b/arch/powerpc/kvm/book3s_hv_ras.c @@ -16,7 +16,7 @@ #include #include #include -#include +#include /* SRR1 bits for machine check on POWER7 */ #define SRR1_MC_LDSTERR (1ul << (63-42)) diff --git a/arch/powerpc/kvm/book3s_hv_rm_mmu.c b/arch/powerpc/kvm/book3s_hv_rm_mmu.c index 378b962bcf2e..9ef3c4be952f 100644 --- a/arch/powerpc/kvm/book3s_hv_rm_mmu.c +++ b/arch/powerpc/kvm/book3s_hv_rm_mmu.c @@ -21,7 +21,6 @@ #include #include #include -#include /* Translate address of a vmalloc'd thing to a linear map address */ static void *real_vmalloc_addr(void *x) diff --git a/arch/powerpc/kvm/book3s_hv_rm_xics.c b/arch/powerpc/kvm/book3s_hv_rm_xics.c index 6a4c4d758a9e..06edc4366639 100644 --- a/arch/powerpc/kvm/book3s_hv_rm_xics.c +++ b/arch/powerpc/kvm/book3s_hv_rm_xics.c @@ -24,7 +24,6 @@ #include #include #include -#include #include "book3s_xics.h" -- cgit v1.2.3-59-g8ed1b