From e13dcc1ab593452b99c39de2cb588c39f6d6a7e9 Mon Sep 17 00:00:00 2001 From: Stuart Yoder Date: Tue, 3 Jul 2012 05:48:49 +0000 Subject: PPC: epapr: create define for return code value of success Signed-off-by: Stuart Yoder Signed-off-by: Alexander Graf --- arch/powerpc/include/asm/epapr_hcalls.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'arch/powerpc') diff --git a/arch/powerpc/include/asm/epapr_hcalls.h b/arch/powerpc/include/asm/epapr_hcalls.h index bf2c06c33871..c0c7adcd21e3 100644 --- a/arch/powerpc/include/asm/epapr_hcalls.h +++ b/arch/powerpc/include/asm/epapr_hcalls.h @@ -88,7 +88,8 @@ #define _EV_HCALL_TOKEN(id, num) (((id) << 16) | (num)) #define EV_HCALL_TOKEN(hcall_num) _EV_HCALL_TOKEN(EV_EPAPR_VENDOR_ID, hcall_num) -/* epapr error codes */ +/* epapr return codes */ +#define EV_SUCCESS 0 #define EV_EPERM 1 /* Operation not permitted */ #define EV_ENOENT 2 /* Entry Not Found */ #define EV_EIO 3 /* I/O error occured */ -- cgit v1.2.3-59-g8ed1b From fdcf8bd7e711d4c0fe3ef624cfb5e3808149ff7f Mon Sep 17 00:00:00 2001 From: Stuart Yoder Date: Tue, 3 Jul 2012 05:48:50 +0000 Subject: KVM: PPC: use definitions in epapr header for hcalls Signed-off-by: Stuart Yoder Signed-off-by: Alexander Graf --- arch/powerpc/include/asm/kvm_para.h | 21 +++++++++++---------- arch/powerpc/kernel/kvm.c | 2 +- arch/powerpc/kvm/powerpc.c | 10 +++++----- 3 files changed, 17 insertions(+), 16 deletions(-) (limited to 'arch/powerpc') diff --git a/arch/powerpc/include/asm/kvm_para.h b/arch/powerpc/include/asm/kvm_para.h index c18916bff689..a168ce37d85c 100644 --- a/arch/powerpc/include/asm/kvm_para.h +++ b/arch/powerpc/include/asm/kvm_para.h @@ -75,9 +75,10 @@ struct kvm_vcpu_arch_shared { }; #define KVM_SC_MAGIC_R0 0x4b564d21 /* "KVM!" */ -#define HC_VENDOR_KVM (42 << 16) -#define HC_EV_SUCCESS 0 -#define HC_EV_UNIMPLEMENTED 12 + +#define KVM_HCALL_TOKEN(num) _EV_HCALL_TOKEN(EV_KVM_VENDOR_ID, num) + +#include #define KVM_FEATURE_MAGIC_PAGE 1 @@ -121,7 +122,7 @@ static unsigned long kvm_hypercall(unsigned long *in, unsigned long *out, unsigned long nr) { - return HC_EV_UNIMPLEMENTED; + return EV_UNIMPLEMENTED; } #endif @@ -132,7 +133,7 @@ static inline long kvm_hypercall0_1(unsigned int nr, unsigned long *r2) unsigned long out[8]; unsigned long r; - r = kvm_hypercall(in, out, nr | HC_VENDOR_KVM); + r = kvm_hypercall(in, out, KVM_HCALL_TOKEN(nr)); *r2 = out[0]; return r; @@ -143,7 +144,7 @@ static inline long kvm_hypercall0(unsigned int nr) unsigned long in[8]; unsigned long out[8]; - return kvm_hypercall(in, out, nr | HC_VENDOR_KVM); + return kvm_hypercall(in, out, KVM_HCALL_TOKEN(nr)); } static inline long kvm_hypercall1(unsigned int nr, unsigned long p1) @@ -152,7 +153,7 @@ static inline long kvm_hypercall1(unsigned int nr, unsigned long p1) unsigned long out[8]; in[0] = p1; - return kvm_hypercall(in, out, nr | HC_VENDOR_KVM); + return kvm_hypercall(in, out, KVM_HCALL_TOKEN(nr)); } static inline long kvm_hypercall2(unsigned int nr, unsigned long p1, @@ -163,7 +164,7 @@ static inline long kvm_hypercall2(unsigned int nr, unsigned long p1, in[0] = p1; in[1] = p2; - return kvm_hypercall(in, out, nr | HC_VENDOR_KVM); + return kvm_hypercall(in, out, KVM_HCALL_TOKEN(nr)); } static inline long kvm_hypercall3(unsigned int nr, unsigned long p1, @@ -175,7 +176,7 @@ static inline long kvm_hypercall3(unsigned int nr, unsigned long p1, in[0] = p1; in[1] = p2; in[2] = p3; - return kvm_hypercall(in, out, nr | HC_VENDOR_KVM); + return kvm_hypercall(in, out, KVM_HCALL_TOKEN(nr)); } static inline long kvm_hypercall4(unsigned int nr, unsigned long p1, @@ -189,7 +190,7 @@ static inline long kvm_hypercall4(unsigned int nr, unsigned long p1, in[1] = p2; in[2] = p3; in[3] = p4; - return kvm_hypercall(in, out, nr | HC_VENDOR_KVM); + return kvm_hypercall(in, out, KVM_HCALL_TOKEN(nr)); } diff --git a/arch/powerpc/kernel/kvm.c b/arch/powerpc/kernel/kvm.c index 867db1de8949..a61b133c4f99 100644 --- a/arch/powerpc/kernel/kvm.c +++ b/arch/powerpc/kernel/kvm.c @@ -419,7 +419,7 @@ static void kvm_map_magic_page(void *data) in[0] = KVM_MAGIC_PAGE; in[1] = KVM_MAGIC_PAGE; - kvm_hypercall(in, out, HC_VENDOR_KVM | KVM_HC_PPC_MAP_MAGIC_PAGE); + kvm_hypercall(in, out, KVM_HCALL_TOKEN(KVM_HC_PPC_MAP_MAGIC_PAGE)); *features = out[0]; } diff --git a/arch/powerpc/kvm/powerpc.c b/arch/powerpc/kvm/powerpc.c index 4d213b8b0fb5..0368a9391b21 100644 --- a/arch/powerpc/kvm/powerpc.c +++ b/arch/powerpc/kvm/powerpc.c @@ -67,18 +67,18 @@ int kvmppc_kvm_pv(struct kvm_vcpu *vcpu) } switch (nr) { - case HC_VENDOR_KVM | KVM_HC_PPC_MAP_MAGIC_PAGE: + case KVM_HCALL_TOKEN(KVM_HC_PPC_MAP_MAGIC_PAGE): { vcpu->arch.magic_page_pa = param1; vcpu->arch.magic_page_ea = param2; r2 = KVM_MAGIC_FEAT_SR | KVM_MAGIC_FEAT_MAS0_TO_SPRG7; - r = HC_EV_SUCCESS; + r = EV_SUCCESS; break; } - case HC_VENDOR_KVM | KVM_HC_FEATURES: - r = HC_EV_SUCCESS; + case KVM_HCALL_TOKEN(KVM_HC_FEATURES): + r = EV_SUCCESS; #if defined(CONFIG_PPC_BOOK3S) || defined(CONFIG_KVM_E500V2) /* XXX Missing magic page on 44x */ r2 |= (1 << KVM_FEATURE_MAGIC_PAGE); @@ -87,7 +87,7 @@ int kvmppc_kvm_pv(struct kvm_vcpu *vcpu) /* Second return value is in r4 */ break; default: - r = HC_EV_UNIMPLEMENTED; + r = EV_UNIMPLEMENTED; break; } -- cgit v1.2.3-59-g8ed1b From 784bafac79e7646e56f40998a6dde0e1ed5595f8 Mon Sep 17 00:00:00 2001 From: Stuart Yoder Date: Tue, 3 Jul 2012 05:48:51 +0000 Subject: KVM: PPC: add pvinfo for hcall opcodes on e500mc/e5500 Signed-off-by: Liu Yu [stuart: factored this out from idle hcall support in host patch] Signed-off-by: Stuart Yoder Signed-off-by: Alexander Graf --- arch/powerpc/kvm/powerpc.c | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) (limited to 'arch/powerpc') diff --git a/arch/powerpc/kvm/powerpc.c b/arch/powerpc/kvm/powerpc.c index 0368a9391b21..a478e662b2bc 100644 --- a/arch/powerpc/kvm/powerpc.c +++ b/arch/powerpc/kvm/powerpc.c @@ -751,9 +751,16 @@ int kvm_arch_vcpu_fault(struct kvm_vcpu *vcpu, struct vm_fault *vmf) static int kvm_vm_ioctl_get_pvinfo(struct kvm_ppc_pvinfo *pvinfo) { + u32 inst_nop = 0x60000000; +#ifdef CONFIG_KVM_BOOKE_HV + u32 inst_sc1 = 0x44000022; + pvinfo->hcall[0] = inst_sc1; + pvinfo->hcall[1] = inst_nop; + pvinfo->hcall[2] = inst_nop; + pvinfo->hcall[3] = inst_nop; +#else u32 inst_lis = 0x3c000000; u32 inst_ori = 0x60000000; - u32 inst_nop = 0x60000000; u32 inst_sc = 0x44000002; u32 inst_imm_mask = 0xffff; @@ -770,6 +777,7 @@ static int kvm_vm_ioctl_get_pvinfo(struct kvm_ppc_pvinfo *pvinfo) pvinfo->hcall[1] = inst_ori | (KVM_SC_MAGIC_R0 & inst_imm_mask); pvinfo->hcall[2] = inst_sc; pvinfo->hcall[3] = inst_nop; +#endif return 0; } -- cgit v1.2.3-59-g8ed1b From 9202e07636f0c4858ba6c30773a3f160b2b5659a Mon Sep 17 00:00:00 2001 From: Liu Yu-B13201 Date: Tue, 3 Jul 2012 05:48:52 +0000 Subject: KVM: PPC: Add support for ePAPR idle hcall in host kernel And add a new flag definition in kvm_ppc_pvinfo to indicate whether the host supports the EV_IDLE hcall. Signed-off-by: Liu Yu [stuart.yoder@freescale.com: cleanup,fixes for conditions allowing idle] Signed-off-by: Stuart Yoder [agraf: fix typo] Signed-off-by: Alexander Graf --- Documentation/virtual/kvm/api.txt | 7 +++++-- arch/powerpc/include/asm/Kbuild | 1 + arch/powerpc/kvm/powerpc.c | 10 ++++++++-- include/linux/kvm.h | 2 ++ 4 files changed, 16 insertions(+), 4 deletions(-) (limited to 'arch/powerpc') diff --git a/Documentation/virtual/kvm/api.txt b/Documentation/virtual/kvm/api.txt index f6ec3a92e621..170afb1fbc2e 100644 --- a/Documentation/virtual/kvm/api.txt +++ b/Documentation/virtual/kvm/api.txt @@ -1194,12 +1194,15 @@ struct kvm_ppc_pvinfo { This ioctl fetches PV specific information that need to be passed to the guest using the device tree or other means from vm context. -For now the only implemented piece of information distributed here is an array -of 4 instructions that make up a hypercall. +The hcall array defines 4 instructions that make up a hypercall. If any additional field gets added to this structure later on, a bit for that additional piece of information will be set in the flags bitmap. +The flags bitmap is defined as: + + /* the host supports the ePAPR idle hcall + #define KVM_PPC_PVINFO_FLAGS_EV_IDLE (1<<0) 4.48 KVM_ASSIGN_PCI_DEVICE diff --git a/arch/powerpc/include/asm/Kbuild b/arch/powerpc/include/asm/Kbuild index 7e313f1ed183..13d6b7bf3b69 100644 --- a/arch/powerpc/include/asm/Kbuild +++ b/arch/powerpc/include/asm/Kbuild @@ -34,5 +34,6 @@ header-y += termios.h header-y += types.h header-y += ucontext.h header-y += unistd.h +header-y += epapr_hcalls.h generic-y += rwsem.h diff --git a/arch/powerpc/kvm/powerpc.c b/arch/powerpc/kvm/powerpc.c index a478e662b2bc..dbf56e173c25 100644 --- a/arch/powerpc/kvm/powerpc.c +++ b/arch/powerpc/kvm/powerpc.c @@ -38,8 +38,7 @@ int kvm_arch_vcpu_runnable(struct kvm_vcpu *v) { - return !(v->arch.shared->msr & MSR_WE) || - !!(v->arch.pending_exceptions) || + return !!(v->arch.pending_exceptions) || v->requests; } @@ -86,6 +85,11 @@ int kvmppc_kvm_pv(struct kvm_vcpu *vcpu) /* Second return value is in r4 */ break; + case EV_HCALL_TOKEN(EV_IDLE): + r = EV_SUCCESS; + kvm_vcpu_block(vcpu); + clear_bit(KVM_REQ_UNHALT, &vcpu->requests); + break; default: r = EV_UNIMPLEMENTED; break; @@ -779,6 +783,8 @@ static int kvm_vm_ioctl_get_pvinfo(struct kvm_ppc_pvinfo *pvinfo) pvinfo->hcall[3] = inst_nop; #endif + pvinfo->flags = KVM_PPC_PVINFO_FLAGS_EV_IDLE; + return 0; } diff --git a/include/linux/kvm.h b/include/linux/kvm.h index 0a6d6ba44c85..4cb3761bebae 100644 --- a/include/linux/kvm.h +++ b/include/linux/kvm.h @@ -477,6 +477,8 @@ struct kvm_ppc_smmu_info { struct kvm_ppc_one_seg_page_size sps[KVM_PPC_PAGE_SIZES_MAX_SZ]; }; +#define KVM_PPC_PVINFO_FLAGS_EV_IDLE (1<<0) + #define KVMIO 0xAE /* machine type bits, to be used as argument to KVM_CREATE_VM */ -- cgit v1.2.3-59-g8ed1b From 2f979de8a716bdbdc9f4db532652fbca08ed710c Mon Sep 17 00:00:00 2001 From: Liu Yu-B13201 Date: Tue, 3 Jul 2012 05:48:53 +0000 Subject: KVM: PPC: ev_idle hcall support for e500 guests Signed-off-by: Liu Yu [varun: 64-bit changes] Signed-off-by: Varun Sethi Signed-off-by: Stuart Yoder Signed-off-by: Alexander Graf --- arch/powerpc/include/asm/epapr_hcalls.h | 11 ++++++----- arch/powerpc/kernel/epapr_hcalls.S | 28 ++++++++++++++++++++++++++++ arch/powerpc/kernel/epapr_paravirt.c | 11 ++++++++++- 3 files changed, 44 insertions(+), 6 deletions(-) (limited to 'arch/powerpc') diff --git a/arch/powerpc/include/asm/epapr_hcalls.h b/arch/powerpc/include/asm/epapr_hcalls.h index c0c7adcd21e3..833ce2c2d505 100644 --- a/arch/powerpc/include/asm/epapr_hcalls.h +++ b/arch/powerpc/include/asm/epapr_hcalls.h @@ -50,10 +50,6 @@ #ifndef _EPAPR_HCALLS_H #define _EPAPR_HCALLS_H -#include -#include -#include - #define EV_BYTE_CHANNEL_SEND 1 #define EV_BYTE_CHANNEL_RECEIVE 2 #define EV_BYTE_CHANNEL_POLL 3 @@ -109,6 +105,11 @@ #define EV_UNIMPLEMENTED 12 /* Unimplemented hypercall */ #define EV_BUFFER_OVERFLOW 13 /* Caller-supplied buffer too small */ +#ifndef __ASSEMBLY__ +#include +#include +#include + /* * Hypercall register clobber list * @@ -506,5 +507,5 @@ static inline unsigned int ev_idle(void) return r3; } - +#endif /* !__ASSEMBLY__ */ #endif diff --git a/arch/powerpc/kernel/epapr_hcalls.S b/arch/powerpc/kernel/epapr_hcalls.S index 697b390ebfd8..62c0dc237826 100644 --- a/arch/powerpc/kernel/epapr_hcalls.S +++ b/arch/powerpc/kernel/epapr_hcalls.S @@ -8,13 +8,41 @@ */ #include +#include #include #include #include #include #include +#include #include +/* epapr_ev_idle() was derived from e500_idle() */ +_GLOBAL(epapr_ev_idle) + CURRENT_THREAD_INFO(r3, r1) + PPC_LL r4, TI_LOCAL_FLAGS(r3) /* set napping bit */ + ori r4, r4,_TLF_NAPPING /* so when we take an exception */ + PPC_STL r4, TI_LOCAL_FLAGS(r3) /* it will return to our caller */ + + wrteei 1 + +idle_loop: + LOAD_REG_IMMEDIATE(r11, EV_HCALL_TOKEN(EV_IDLE)) + +.global epapr_ev_idle_start +epapr_ev_idle_start: + li r3, -1 + nop + nop + nop + + /* + * Guard against spurious wakeups from a hypervisor -- + * only interrupt will cause us to return to LR due to + * _TLF_NAPPING. + */ + b idle_loop + /* Hypercall entry point. Will be patched with device tree instructions. */ .global epapr_hypercall_start epapr_hypercall_start: diff --git a/arch/powerpc/kernel/epapr_paravirt.c b/arch/powerpc/kernel/epapr_paravirt.c index 028aeae370b6..f3eab8594d9f 100644 --- a/arch/powerpc/kernel/epapr_paravirt.c +++ b/arch/powerpc/kernel/epapr_paravirt.c @@ -21,6 +21,10 @@ #include #include #include +#include + +extern void epapr_ev_idle(void); +extern u32 epapr_ev_idle_start[]; bool epapr_paravirt_enabled; @@ -41,8 +45,13 @@ static int __init epapr_paravirt_init(void) if (len % 4 || len > (4 * 4)) return -ENODEV; - for (i = 0; i < (len / 4); i++) + for (i = 0; i < (len / 4); i++) { patch_instruction(epapr_hypercall_start + i, insts[i]); + patch_instruction(epapr_ev_idle_start + i, insts[i]); + } + + if (of_get_property(hyper_node, "has-idle", NULL)) + ppc_md.power_save = epapr_ev_idle; epapr_paravirt_enabled = true; -- cgit v1.2.3-59-g8ed1b From 40656397241860bb21f2802af17ac1de607fb7a9 Mon Sep 17 00:00:00 2001 From: Stuart Yoder Date: Tue, 3 Jul 2012 05:48:54 +0000 Subject: PPC: select EPAPR_PARAVIRT for all users of epapr hcalls Signed-off-by: Stuart Yoder Signed-off-by: Alexander Graf --- arch/powerpc/platforms/Kconfig | 1 + drivers/tty/Kconfig | 1 + drivers/virt/Kconfig | 1 + 3 files changed, 3 insertions(+) (limited to 'arch/powerpc') diff --git a/arch/powerpc/platforms/Kconfig b/arch/powerpc/platforms/Kconfig index e7a896acd982..48a920d51489 100644 --- a/arch/powerpc/platforms/Kconfig +++ b/arch/powerpc/platforms/Kconfig @@ -90,6 +90,7 @@ config MPIC config PPC_EPAPR_HV_PIC bool default n + select EPAPR_PARAVIRT config MPIC_WEIRD bool diff --git a/drivers/tty/Kconfig b/drivers/tty/Kconfig index 830cd62d8492..aa99cd26ba8b 100644 --- a/drivers/tty/Kconfig +++ b/drivers/tty/Kconfig @@ -358,6 +358,7 @@ config TRACE_SINK config PPC_EPAPR_HV_BYTECHAN tristate "ePAPR hypervisor byte channel driver" depends on PPC + select EPAPR_PARAVIRT help This driver creates /dev entries for each ePAPR hypervisor byte channel, thereby allowing applications to communicate with byte diff --git a/drivers/virt/Kconfig b/drivers/virt/Kconfig index 2dcdbc9364d8..99ebdde590f8 100644 --- a/drivers/virt/Kconfig +++ b/drivers/virt/Kconfig @@ -15,6 +15,7 @@ if VIRT_DRIVERS config FSL_HV_MANAGER tristate "Freescale hypervisor management driver" depends on FSL_SOC + select EPAPR_PARAVIRT help The Freescale hypervisor management driver provides several services to drivers and applications related to the Freescale hypervisor: -- cgit v1.2.3-59-g8ed1b From 305bcf26128e380bb1296d2802387659ab8b038e Mon Sep 17 00:00:00 2001 From: Scott Wood Date: Tue, 3 Jul 2012 05:48:55 +0000 Subject: powerpc/fsl-soc: use CONFIG_EPAPR_PARAVIRT for hcalls Signed-off-by: Scott Wood Signed-off-by: Stuart Yoder Signed-off-by: Alexander Graf --- arch/powerpc/sysdev/fsl_msi.c | 9 +++++++-- arch/powerpc/sysdev/fsl_soc.c | 2 ++ 2 files changed, 9 insertions(+), 2 deletions(-) (limited to 'arch/powerpc') diff --git a/arch/powerpc/sysdev/fsl_msi.c b/arch/powerpc/sysdev/fsl_msi.c index 6e097de00e09..7e2b2f2e3ecd 100644 --- a/arch/powerpc/sysdev/fsl_msi.c +++ b/arch/powerpc/sysdev/fsl_msi.c @@ -236,7 +236,6 @@ static void fsl_msi_cascade(unsigned int irq, struct irq_desc *desc) u32 intr_index; u32 have_shift = 0; struct fsl_msi_cascade_data *cascade_data; - unsigned int ret; cascade_data = irq_get_handler_data(irq); msi_data = cascade_data->msi_data; @@ -268,7 +267,9 @@ static void fsl_msi_cascade(unsigned int irq, struct irq_desc *desc) case FSL_PIC_IP_IPIC: msir_value = fsl_msi_read(msi_data->msi_regs, msir_index * 0x4); break; - case FSL_PIC_IP_VMPIC: +#ifdef CONFIG_EPAPR_PARAVIRT + case FSL_PIC_IP_VMPIC: { + unsigned int ret; ret = fh_vmpic_get_msir(virq_to_hw(irq), &msir_value); if (ret) { pr_err("fsl-msi: fh_vmpic_get_msir() failed for " @@ -277,6 +278,8 @@ static void fsl_msi_cascade(unsigned int irq, struct irq_desc *desc) } break; } +#endif + } while (msir_value) { intr_index = ffs(msir_value) - 1; @@ -508,10 +511,12 @@ static const struct of_device_id fsl_of_msi_ids[] = { .compatible = "fsl,ipic-msi", .data = (void *)&ipic_msi_feature, }, +#ifdef CONFIG_EPAPR_PARAVIRT { .compatible = "fsl,vmpic-msi", .data = (void *)&vmpic_msi_feature, }, +#endif {} }; diff --git a/arch/powerpc/sysdev/fsl_soc.c b/arch/powerpc/sysdev/fsl_soc.c index c449dbd1c938..97118dc3d285 100644 --- a/arch/powerpc/sysdev/fsl_soc.c +++ b/arch/powerpc/sysdev/fsl_soc.c @@ -253,6 +253,7 @@ struct platform_diu_data_ops diu_ops; EXPORT_SYMBOL(diu_ops); #endif +#ifdef CONFIG_EPAPR_PARAVIRT /* * Restart the current partition * @@ -278,3 +279,4 @@ void fsl_hv_halt(void) pr_info("hv exit\n"); fh_partition_stop(-1); } +#endif -- cgit v1.2.3-59-g8ed1b From 8e525d59d024f54b88a038faac38f76b9094774e Mon Sep 17 00:00:00 2001 From: Liu Yu-B13201 Date: Tue, 3 Jul 2012 05:48:56 +0000 Subject: PPC: Don't use hardcoded opcode for ePAPR hcall invocation Signed-off-by: Liu Yu Signed-off-by: Stuart Yoder Signed-off-by: Alexander Graf --- arch/powerpc/include/asm/epapr_hcalls.h | 22 ++++++++++---------- arch/powerpc/include/asm/fsl_hcalls.h | 36 ++++++++++++++++----------------- 2 files changed, 29 insertions(+), 29 deletions(-) (limited to 'arch/powerpc') diff --git a/arch/powerpc/include/asm/epapr_hcalls.h b/arch/powerpc/include/asm/epapr_hcalls.h index 833ce2c2d505..b8d94459a929 100644 --- a/arch/powerpc/include/asm/epapr_hcalls.h +++ b/arch/powerpc/include/asm/epapr_hcalls.h @@ -195,7 +195,7 @@ static inline unsigned int ev_int_set_config(unsigned int interrupt, r5 = priority; r6 = destination; - __asm__ __volatile__ ("sc 1" + asm volatile("bl epapr_hypercall_start" : "+r" (r11), "+r" (r3), "+r" (r4), "+r" (r5), "+r" (r6) : : EV_HCALL_CLOBBERS4 ); @@ -224,7 +224,7 @@ static inline unsigned int ev_int_get_config(unsigned int interrupt, r11 = EV_HCALL_TOKEN(EV_INT_GET_CONFIG); r3 = interrupt; - __asm__ __volatile__ ("sc 1" + asm volatile("bl epapr_hypercall_start" : "+r" (r11), "+r" (r3), "=r" (r4), "=r" (r5), "=r" (r6) : : EV_HCALL_CLOBBERS4 ); @@ -254,7 +254,7 @@ static inline unsigned int ev_int_set_mask(unsigned int interrupt, r3 = interrupt; r4 = mask; - __asm__ __volatile__ ("sc 1" + asm volatile("bl epapr_hypercall_start" : "+r" (r11), "+r" (r3), "+r" (r4) : : EV_HCALL_CLOBBERS2 ); @@ -279,7 +279,7 @@ static inline unsigned int ev_int_get_mask(unsigned int interrupt, r11 = EV_HCALL_TOKEN(EV_INT_GET_MASK); r3 = interrupt; - __asm__ __volatile__ ("sc 1" + asm volatile("bl epapr_hypercall_start" : "+r" (r11), "+r" (r3), "=r" (r4) : : EV_HCALL_CLOBBERS2 ); @@ -307,7 +307,7 @@ static inline unsigned int ev_int_eoi(unsigned int interrupt) r11 = EV_HCALL_TOKEN(EV_INT_EOI); r3 = interrupt; - __asm__ __volatile__ ("sc 1" + asm volatile("bl epapr_hypercall_start" : "+r" (r11), "+r" (r3) : : EV_HCALL_CLOBBERS1 ); @@ -346,7 +346,7 @@ static inline unsigned int ev_byte_channel_send(unsigned int handle, r7 = be32_to_cpu(p[2]); r8 = be32_to_cpu(p[3]); - __asm__ __volatile__ ("sc 1" + asm volatile("bl epapr_hypercall_start" : "+r" (r11), "+r" (r3), "+r" (r4), "+r" (r5), "+r" (r6), "+r" (r7), "+r" (r8) : : EV_HCALL_CLOBBERS6 @@ -385,7 +385,7 @@ static inline unsigned int ev_byte_channel_receive(unsigned int handle, r3 = handle; r4 = *count; - __asm__ __volatile__ ("sc 1" + asm volatile("bl epapr_hypercall_start" : "+r" (r11), "+r" (r3), "+r" (r4), "=r" (r5), "=r" (r6), "=r" (r7), "=r" (r8) : : EV_HCALL_CLOBBERS6 @@ -423,7 +423,7 @@ static inline unsigned int ev_byte_channel_poll(unsigned int handle, r11 = EV_HCALL_TOKEN(EV_BYTE_CHANNEL_POLL); r3 = handle; - __asm__ __volatile__ ("sc 1" + asm volatile("bl epapr_hypercall_start" : "+r" (r11), "+r" (r3), "=r" (r4), "=r" (r5) : : EV_HCALL_CLOBBERS3 ); @@ -456,7 +456,7 @@ static inline unsigned int ev_int_iack(unsigned int handle, r11 = EV_HCALL_TOKEN(EV_INT_IACK); r3 = handle; - __asm__ __volatile__ ("sc 1" + asm volatile("bl epapr_hypercall_start" : "+r" (r11), "+r" (r3), "=r" (r4) : : EV_HCALL_CLOBBERS2 ); @@ -480,7 +480,7 @@ static inline unsigned int ev_doorbell_send(unsigned int handle) r11 = EV_HCALL_TOKEN(EV_DOORBELL_SEND); r3 = handle; - __asm__ __volatile__ ("sc 1" + asm volatile("bl epapr_hypercall_start" : "+r" (r11), "+r" (r3) : : EV_HCALL_CLOBBERS1 ); @@ -500,7 +500,7 @@ static inline unsigned int ev_idle(void) r11 = EV_HCALL_TOKEN(EV_IDLE); - __asm__ __volatile__ ("sc 1" + asm volatile("bl epapr_hypercall_start" : "+r" (r11), "=r" (r3) : : EV_HCALL_CLOBBERS1 ); diff --git a/arch/powerpc/include/asm/fsl_hcalls.h b/arch/powerpc/include/asm/fsl_hcalls.h index 922d9b5fe3d5..3abb58394da4 100644 --- a/arch/powerpc/include/asm/fsl_hcalls.h +++ b/arch/powerpc/include/asm/fsl_hcalls.h @@ -96,7 +96,7 @@ static inline unsigned int fh_send_nmi(unsigned int vcpu_mask) r11 = FH_HCALL_TOKEN(FH_SEND_NMI); r3 = vcpu_mask; - __asm__ __volatile__ ("sc 1" + asm volatile("bl epapr_hypercall_start" : "+r" (r11), "+r" (r3) : : EV_HCALL_CLOBBERS1 ); @@ -151,7 +151,7 @@ static inline unsigned int fh_partition_get_dtprop(int handle, r9 = (uint32_t)propvalue_addr; r10 = *propvalue_len; - __asm__ __volatile__ ("sc 1" + asm volatile("bl epapr_hypercall_start" : "+r" (r11), "+r" (r3), "+r" (r4), "+r" (r5), "+r" (r6), "+r" (r7), "+r" (r8), "+r" (r9), "+r" (r10) @@ -205,7 +205,7 @@ static inline unsigned int fh_partition_set_dtprop(int handle, r9 = (uint32_t)propvalue_addr; r10 = propvalue_len; - __asm__ __volatile__ ("sc 1" + asm volatile("bl epapr_hypercall_start" : "+r" (r11), "+r" (r3), "+r" (r4), "+r" (r5), "+r" (r6), "+r" (r7), "+r" (r8), "+r" (r9), "+r" (r10) @@ -229,7 +229,7 @@ static inline unsigned int fh_partition_restart(unsigned int partition) r11 = FH_HCALL_TOKEN(FH_PARTITION_RESTART); r3 = partition; - __asm__ __volatile__ ("sc 1" + asm volatile("bl epapr_hypercall_start" : "+r" (r11), "+r" (r3) : : EV_HCALL_CLOBBERS1 ); @@ -262,7 +262,7 @@ static inline unsigned int fh_partition_get_status(unsigned int partition, r11 = FH_HCALL_TOKEN(FH_PARTITION_GET_STATUS); r3 = partition; - __asm__ __volatile__ ("sc 1" + asm volatile("bl epapr_hypercall_start" : "+r" (r11), "+r" (r3), "=r" (r4) : : EV_HCALL_CLOBBERS2 ); @@ -295,7 +295,7 @@ static inline unsigned int fh_partition_start(unsigned int partition, r4 = entry_point; r5 = load; - __asm__ __volatile__ ("sc 1" + asm volatile("bl epapr_hypercall_start" : "+r" (r11), "+r" (r3), "+r" (r4), "+r" (r5) : : EV_HCALL_CLOBBERS3 ); @@ -317,7 +317,7 @@ static inline unsigned int fh_partition_stop(unsigned int partition) r11 = FH_HCALL_TOKEN(FH_PARTITION_STOP); r3 = partition; - __asm__ __volatile__ ("sc 1" + asm volatile("bl epapr_hypercall_start" : "+r" (r11), "+r" (r3) : : EV_HCALL_CLOBBERS1 ); @@ -376,7 +376,7 @@ static inline unsigned int fh_partition_memcpy(unsigned int source, #endif r7 = count; - __asm__ __volatile__ ("sc 1" + asm volatile("bl epapr_hypercall_start" : "+r" (r11), "+r" (r3), "+r" (r4), "+r" (r5), "+r" (r6), "+r" (r7) : : EV_HCALL_CLOBBERS5 @@ -399,7 +399,7 @@ static inline unsigned int fh_dma_enable(unsigned int liodn) r11 = FH_HCALL_TOKEN(FH_DMA_ENABLE); r3 = liodn; - __asm__ __volatile__ ("sc 1" + asm volatile("bl epapr_hypercall_start" : "+r" (r11), "+r" (r3) : : EV_HCALL_CLOBBERS1 ); @@ -421,7 +421,7 @@ static inline unsigned int fh_dma_disable(unsigned int liodn) r11 = FH_HCALL_TOKEN(FH_DMA_DISABLE); r3 = liodn; - __asm__ __volatile__ ("sc 1" + asm volatile("bl epapr_hypercall_start" : "+r" (r11), "+r" (r3) : : EV_HCALL_CLOBBERS1 ); @@ -447,7 +447,7 @@ static inline unsigned int fh_vmpic_get_msir(unsigned int interrupt, r11 = FH_HCALL_TOKEN(FH_VMPIC_GET_MSIR); r3 = interrupt; - __asm__ __volatile__ ("sc 1" + asm volatile("bl epapr_hypercall_start" : "+r" (r11), "+r" (r3), "=r" (r4) : : EV_HCALL_CLOBBERS2 ); @@ -469,7 +469,7 @@ static inline unsigned int fh_system_reset(void) r11 = FH_HCALL_TOKEN(FH_SYSTEM_RESET); - __asm__ __volatile__ ("sc 1" + asm volatile("bl epapr_hypercall_start" : "+r" (r11), "=r" (r3) : : EV_HCALL_CLOBBERS1 ); @@ -506,7 +506,7 @@ static inline unsigned int fh_err_get_info(int queue, uint32_t *bufsize, r6 = addr_lo; r7 = peek; - __asm__ __volatile__ ("sc 1" + asm volatile("bl epapr_hypercall_start" : "+r" (r11), "+r" (r3), "+r" (r4), "+r" (r5), "+r" (r6), "+r" (r7) : : EV_HCALL_CLOBBERS5 @@ -542,7 +542,7 @@ static inline unsigned int fh_get_core_state(unsigned int handle, r3 = handle; r4 = vcpu; - __asm__ __volatile__ ("sc 1" + asm volatile("bl epapr_hypercall_start" : "+r" (r11), "+r" (r3), "+r" (r4) : : EV_HCALL_CLOBBERS2 ); @@ -572,7 +572,7 @@ static inline unsigned int fh_enter_nap(unsigned int handle, unsigned int vcpu) r3 = handle; r4 = vcpu; - __asm__ __volatile__ ("sc 1" + asm volatile("bl epapr_hypercall_start" : "+r" (r11), "+r" (r3), "+r" (r4) : : EV_HCALL_CLOBBERS2 ); @@ -597,7 +597,7 @@ static inline unsigned int fh_exit_nap(unsigned int handle, unsigned int vcpu) r3 = handle; r4 = vcpu; - __asm__ __volatile__ ("sc 1" + asm volatile("bl epapr_hypercall_start" : "+r" (r11), "+r" (r3), "+r" (r4) : : EV_HCALL_CLOBBERS2 ); @@ -618,7 +618,7 @@ static inline unsigned int fh_claim_device(unsigned int handle) r11 = FH_HCALL_TOKEN(FH_CLAIM_DEVICE); r3 = handle; - __asm__ __volatile__ ("sc 1" + asm volatile("bl epapr_hypercall_start" : "+r" (r11), "+r" (r3) : : EV_HCALL_CLOBBERS1 ); @@ -645,7 +645,7 @@ static inline unsigned int fh_partition_stop_dma(unsigned int handle) r11 = FH_HCALL_TOKEN(FH_PARTITION_STOP_DMA); r3 = handle; - __asm__ __volatile__ ("sc 1" + asm volatile("bl epapr_hypercall_start" : "+r" (r11), "+r" (r3) : : EV_HCALL_CLOBBERS1 ); -- cgit v1.2.3-59-g8ed1b From 97c95059848358f1577f471ec47cf68690f996e4 Mon Sep 17 00:00:00 2001 From: Alexander Graf Date: Thu, 2 Aug 2012 15:10:00 +0200 Subject: KVM: PPC: PR: Use generic tracepoint for guest exit We want to have tracing information on guest exits for booke as well as book3s. Since most information is identical, use a common trace point. Signed-off-by: Alexander Graf --- arch/powerpc/kvm/book3s_pr.c | 2 +- arch/powerpc/kvm/booke.c | 3 ++ arch/powerpc/kvm/trace.h | 79 ++++++++++++++++++++++++++++---------------- 3 files changed, 55 insertions(+), 29 deletions(-) (limited to 'arch/powerpc') diff --git a/arch/powerpc/kvm/book3s_pr.c b/arch/powerpc/kvm/book3s_pr.c index 05c28f59f77f..7f0fe6f9e297 100644 --- a/arch/powerpc/kvm/book3s_pr.c +++ b/arch/powerpc/kvm/book3s_pr.c @@ -549,7 +549,7 @@ int kvmppc_handle_exit(struct kvm_run *run, struct kvm_vcpu *vcpu, /* We get here with MSR.EE=0, so enable it to be a nice citizen */ __hard_irq_enable(); - trace_kvm_book3s_exit(exit_nr, vcpu); + trace_kvm_exit(exit_nr, vcpu); preempt_enable(); kvm_resched(vcpu); switch (exit_nr) { diff --git a/arch/powerpc/kvm/booke.c b/arch/powerpc/kvm/booke.c index d25a097c852b..7ce2ed07831f 100644 --- a/arch/powerpc/kvm/booke.c +++ b/arch/powerpc/kvm/booke.c @@ -39,6 +39,7 @@ #include "timing.h" #include "booke.h" +#include "trace.h" unsigned long kvmppc_booke_handlers; @@ -677,6 +678,8 @@ int kvmppc_handle_exit(struct kvm_run *run, struct kvm_vcpu *vcpu, local_irq_enable(); + trace_kvm_exit(exit_nr, vcpu); + run->exit_reason = KVM_EXIT_UNKNOWN; run->ready_for_interrupt_injection = 1; diff --git a/arch/powerpc/kvm/trace.h b/arch/powerpc/kvm/trace.h index 877186b7b1c3..9fab6eddc7e4 100644 --- a/arch/powerpc/kvm/trace.h +++ b/arch/powerpc/kvm/trace.h @@ -31,6 +31,57 @@ TRACE_EVENT(kvm_ppc_instr, __entry->inst, __entry->pc, __entry->emulate) ); +TRACE_EVENT(kvm_exit, + TP_PROTO(unsigned int exit_nr, struct kvm_vcpu *vcpu), + TP_ARGS(exit_nr, vcpu), + + TP_STRUCT__entry( + __field( unsigned int, exit_nr ) + __field( unsigned long, pc ) + __field( unsigned long, msr ) + __field( unsigned long, dar ) +#ifdef CONFIG_KVM_BOOK3S_PR + __field( unsigned long, srr1 ) +#endif + __field( unsigned long, last_inst ) + ), + + TP_fast_assign( +#ifdef CONFIG_KVM_BOOK3S_PR + struct kvmppc_book3s_shadow_vcpu *svcpu; +#endif + __entry->exit_nr = exit_nr; + __entry->pc = kvmppc_get_pc(vcpu); + __entry->dar = kvmppc_get_fault_dar(vcpu); + __entry->msr = vcpu->arch.shared->msr; +#ifdef CONFIG_KVM_BOOK3S_PR + svcpu = svcpu_get(vcpu); + __entry->srr1 = svcpu->shadow_srr1; + svcpu_put(svcpu); +#endif + __entry->last_inst = vcpu->arch.last_inst; + ), + + TP_printk("exit=0x%x" + " | pc=0x%lx" + " | msr=0x%lx" + " | dar=0x%lx" +#ifdef CONFIG_KVM_BOOK3S_PR + " | srr1=0x%lx" +#endif + " | last_inst=0x%lx" + , + __entry->exit_nr, + __entry->pc, + __entry->msr, + __entry->dar, +#ifdef CONFIG_KVM_BOOK3S_PR + __entry->srr1, +#endif + __entry->last_inst + ) +); + TRACE_EVENT(kvm_stlb_inval, TP_PROTO(unsigned int stlb_index), TP_ARGS(stlb_index), @@ -105,34 +156,6 @@ TRACE_EVENT(kvm_gtlb_write, #ifdef CONFIG_KVM_BOOK3S_PR -TRACE_EVENT(kvm_book3s_exit, - TP_PROTO(unsigned int exit_nr, struct kvm_vcpu *vcpu), - TP_ARGS(exit_nr, vcpu), - - TP_STRUCT__entry( - __field( unsigned int, exit_nr ) - __field( unsigned long, pc ) - __field( unsigned long, msr ) - __field( unsigned long, dar ) - __field( unsigned long, srr1 ) - ), - - TP_fast_assign( - struct kvmppc_book3s_shadow_vcpu *svcpu; - __entry->exit_nr = exit_nr; - __entry->pc = kvmppc_get_pc(vcpu); - __entry->dar = kvmppc_get_fault_dar(vcpu); - __entry->msr = vcpu->arch.shared->msr; - svcpu = svcpu_get(vcpu); - __entry->srr1 = svcpu->shadow_srr1; - svcpu_put(svcpu); - ), - - TP_printk("exit=0x%x | pc=0x%lx | msr=0x%lx | dar=0x%lx | srr1=0x%lx", - __entry->exit_nr, __entry->pc, __entry->msr, __entry->dar, - __entry->srr1) -); - TRACE_EVENT(kvm_book3s_reenter, TP_PROTO(int r, struct kvm_vcpu *vcpu), TP_ARGS(r, vcpu), -- cgit v1.2.3-59-g8ed1b From f4800b1f4d23156e9080a08d6114e5d8bb767964 Mon Sep 17 00:00:00 2001 From: Alexander Graf Date: Tue, 7 Aug 2012 10:24:14 +0200 Subject: KVM: PPC: Expose SYNC cap based on mmu notifiers Semantically, the "SYNC" cap means that we have mmu notifiers available. Express this in our #ifdef'ery around the feature, so that we can be sure we don't miss out on ppc targets when they get their implementation. Signed-off-by: Alexander Graf --- arch/powerpc/kvm/powerpc.c | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) (limited to 'arch/powerpc') diff --git a/arch/powerpc/kvm/powerpc.c b/arch/powerpc/kvm/powerpc.c index dbf56e173c25..45fe433316ea 100644 --- a/arch/powerpc/kvm/powerpc.c +++ b/arch/powerpc/kvm/powerpc.c @@ -264,10 +264,16 @@ int kvm_dev_ioctl_check_extension(long ext) if (cpu_has_feature(CPU_FTR_ARCH_201)) r = 2; break; +#endif case KVM_CAP_SYNC_MMU: +#ifdef CONFIG_KVM_BOOK3S_64_HV r = cpu_has_feature(CPU_FTR_ARCH_206) ? 1 : 0; - break; +#elif defined(KVM_ARCH_WANT_MMU_NOTIFIER) + r = 1; +#else + r = 0; #endif + break; case KVM_CAP_NR_VCPUS: /* * Recommending a number of CPUs is somewhat arbitrary; we -- cgit v1.2.3-59-g8ed1b From cf1c5ca47319d9eb49166859921822fea354d4b3 Mon Sep 17 00:00:00 2001 From: Alexander Graf Date: Wed, 1 Aug 2012 12:56:51 +0200 Subject: KVM: PPC: BookE: Expose remote TLB flushes in debugfs We're already counting remote TLB flushes in a variable, but don't export it to user space yet. Do so, so we know what's going on. Signed-off-by: Alexander Graf --- arch/powerpc/kvm/booke.c | 1 + 1 file changed, 1 insertion(+) (limited to 'arch/powerpc') diff --git a/arch/powerpc/kvm/booke.c b/arch/powerpc/kvm/booke.c index 7ce2ed07831f..1d4ce9a80f55 100644 --- a/arch/powerpc/kvm/booke.c +++ b/arch/powerpc/kvm/booke.c @@ -63,6 +63,7 @@ struct kvm_stats_debugfs_item debugfs_entries[] = { { "halt_wakeup", VCPU_STAT(halt_wakeup) }, { "doorbell", VCPU_STAT(dbell_exits) }, { "guest doorbell", VCPU_STAT(gdbell_exits) }, + { "remote_tlb_flush", VM_STAT(remote_tlb_flush) }, { NULL } }; -- cgit v1.2.3-59-g8ed1b From 2bb890f5ee79c85b9d3b7df37ecb639d8d4b961e Mon Sep 17 00:00:00 2001 From: Alexander Graf Date: Thu, 2 Aug 2012 13:38:49 +0200 Subject: KVM: PPC: E500: Fix clear_tlb_refs Our mapping code assumes that TLB0 entries are always mapped. However, after calling clear_tlb_refs() this is no longer the case. Map them dynamically if we find an entry unmapped in TLB0. Signed-off-by: Alexander Graf --- arch/powerpc/kvm/e500_tlb.c | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) (limited to 'arch/powerpc') diff --git a/arch/powerpc/kvm/e500_tlb.c b/arch/powerpc/kvm/e500_tlb.c index ff38b664195d..b56b6e14df6c 100644 --- a/arch/powerpc/kvm/e500_tlb.c +++ b/arch/powerpc/kvm/e500_tlb.c @@ -1039,8 +1039,12 @@ void kvmppc_mmu_map(struct kvm_vcpu *vcpu, u64 eaddr, gpa_t gpaddr, sesel = 0; /* unused */ priv = &vcpu_e500->gtlb_priv[tlbsel][esel]; - kvmppc_e500_setup_stlbe(vcpu, gtlbe, BOOK3E_PAGESZ_4K, - &priv->ref, eaddr, &stlbe); + /* Only triggers after clear_tlb_refs */ + if (unlikely(!(priv->ref.flags & E500_TLB_VALID))) + kvmppc_e500_tlb0_map(vcpu_e500, esel, &stlbe); + else + kvmppc_e500_setup_stlbe(vcpu, gtlbe, BOOK3E_PAGESZ_4K, + &priv->ref, eaddr, &stlbe); break; case 1: { -- cgit v1.2.3-59-g8ed1b From 1340f3e8871b9f35b39c33d0140383c6c6c1f005 Mon Sep 17 00:00:00 2001 From: Paul Mackerras Date: Mon, 6 Aug 2012 00:04:14 +0000 Subject: KVM: PPC: Quieten message about allocating linear regions This is printed once for every RMA or HPT region that get preallocated. If one preallocates hundreds of such regions (in order to run hundreds of KVM guests), that gets rather painful, so make it a bit quieter. Signed-off-by: Paul Mackerras Signed-off-by: Alexander Graf --- arch/powerpc/kvm/book3s_hv_builtin.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'arch/powerpc') diff --git a/arch/powerpc/kvm/book3s_hv_builtin.c b/arch/powerpc/kvm/book3s_hv_builtin.c index fb4eac290fef..ec0a9e5de100 100644 --- a/arch/powerpc/kvm/book3s_hv_builtin.c +++ b/arch/powerpc/kvm/book3s_hv_builtin.c @@ -157,8 +157,8 @@ static void __init kvm_linear_init_one(ulong size, int count, int type) linear_info = alloc_bootmem(count * sizeof(struct kvmppc_linear_info)); for (i = 0; i < count; ++i) { linear = alloc_bootmem_align(size, size); - pr_info("Allocated KVM %s at %p (%ld MB)\n", typestr, linear, - size >> 20); + pr_debug("Allocated KVM %s at %p (%ld MB)\n", typestr, linear, + size >> 20); linear_info[i].base_virt = linear; linear_info[i].base_pfn = __pa(linear) >> PAGE_SHIFT; linear_info[i].npages = npages; -- cgit v1.2.3-59-g8ed1b From 8043e494da644ec174f7df0b67f88ccf8777a1ce Mon Sep 17 00:00:00 2001 From: Scott Wood Date: Fri, 10 Aug 2012 12:21:21 +0000 Subject: powerpc/epapr: export epapr_hypercall_start This fixes breakage introduced by the following commit: commit 6d2d82627f4f1e96a33664ace494fa363e0495cb Author: Liu Yu-B13201 Date: Tue Jul 3 05:48:56 2012 +0000 PPC: Don't use hardcoded opcode for ePAPR hcall invocation when a driver that uses ePAPR hypercalls is built as a module. Reported-by: Geert Uytterhoeven Signed-off-by: Scott Wood Signed-off-by: Alexander Graf --- arch/powerpc/kernel/ppc_ksyms.c | 5 +++++ 1 file changed, 5 insertions(+) (limited to 'arch/powerpc') diff --git a/arch/powerpc/kernel/ppc_ksyms.c b/arch/powerpc/kernel/ppc_ksyms.c index 3e4031581c65..e597dde124e8 100644 --- a/arch/powerpc/kernel/ppc_ksyms.c +++ b/arch/powerpc/kernel/ppc_ksyms.c @@ -43,6 +43,7 @@ #include #include #include +#include #ifdef CONFIG_PPC32 extern void transfer_to_handler(void); @@ -192,3 +193,7 @@ EXPORT_SYMBOL(__arch_hweight64); #ifdef CONFIG_PPC_BOOK3S_64 EXPORT_SYMBOL_GPL(mmu_psize_defs); #endif + +#ifdef CONFIG_EPAPR_PARAVIRT +EXPORT_SYMBOL(epapr_hypercall_start); +#endif -- cgit v1.2.3-59-g8ed1b From 4ffc6356ec690f77f65b7b78e0047a3fe8316371 Mon Sep 17 00:00:00 2001 From: Alexander Graf Date: Wed, 8 Aug 2012 20:31:13 +0200 Subject: KVM: PPC: BookE: Add check_requests helper function We need a central place to check for pending requests in. Add one that only does the timer check we already do in a different place. Later, this central function can be extended by more checks. Signed-off-by: Alexander Graf --- arch/powerpc/kvm/booke.c | 24 +++++++++++++++++------- 1 file changed, 17 insertions(+), 7 deletions(-) (limited to 'arch/powerpc') diff --git a/arch/powerpc/kvm/booke.c b/arch/powerpc/kvm/booke.c index 1d4ce9a80f55..bcf87fe89179 100644 --- a/arch/powerpc/kvm/booke.c +++ b/arch/powerpc/kvm/booke.c @@ -419,13 +419,6 @@ static void kvmppc_core_check_exceptions(struct kvm_vcpu *vcpu) unsigned long *pending = &vcpu->arch.pending_exceptions; unsigned int priority; - if (vcpu->requests) { - if (kvm_check_request(KVM_REQ_PENDING_TIMER, vcpu)) { - smp_mb(); - update_timer_ints(vcpu); - } - } - priority = __ffs(*pending); while (priority < BOOKE_IRQPRIO_MAX) { if (kvmppc_booke_irqprio_deliver(vcpu, priority)) @@ -461,6 +454,14 @@ int kvmppc_core_prepare_to_enter(struct kvm_vcpu *vcpu) return r; } +static void kvmppc_check_requests(struct kvm_vcpu *vcpu) +{ + if (vcpu->requests) { + if (kvm_check_request(KVM_REQ_PENDING_TIMER, vcpu)) + update_timer_ints(vcpu); + } +} + /* * Common checks before entering the guest world. Call with interrupts * disabled. @@ -485,6 +486,15 @@ static int kvmppc_prepare_to_enter(struct kvm_vcpu *vcpu) break; } + smp_mb(); + if (vcpu->requests) { + /* Make sure we process requests preemptable */ + local_irq_enable(); + kvmppc_check_requests(vcpu); + local_irq_disable(); + continue; + } + if (kvmppc_core_prepare_to_enter(vcpu)) { /* interrupts got enabled in between, so we are back at square 1 */ -- cgit v1.2.3-59-g8ed1b From d69c6436443c05a64452054f51a79316297755f4 Mon Sep 17 00:00:00 2001 From: Alexander Graf Date: Wed, 8 Aug 2012 20:44:20 +0200 Subject: KVM: PPC: BookE: Add support for vcpu->mode Generic KVM code might want to know whether we are inside guest context or outside. It also wants to be able to push us out of guest context. Add support to the BookE code for the generic vcpu->mode field that describes the above states. Signed-off-by: Alexander Graf --- arch/powerpc/kvm/booke.c | 11 +++++++++++ 1 file changed, 11 insertions(+) (limited to 'arch/powerpc') diff --git a/arch/powerpc/kvm/booke.c b/arch/powerpc/kvm/booke.c index bcf87fe89179..70a86c0a9d85 100644 --- a/arch/powerpc/kvm/booke.c +++ b/arch/powerpc/kvm/booke.c @@ -501,6 +501,15 @@ static int kvmppc_prepare_to_enter(struct kvm_vcpu *vcpu) continue; } + if (vcpu->mode == EXITING_GUEST_MODE) { + r = 1; + break; + } + + /* Going into guest context! Yay! */ + vcpu->mode = IN_GUEST_MODE; + smp_wmb(); + break; } @@ -572,6 +581,8 @@ int kvmppc_vcpu_run(struct kvm_run *kvm_run, struct kvm_vcpu *vcpu) kvm_guest_exit(); out: + vcpu->mode = OUTSIDE_GUEST_MODE; + smp_wmb(); local_irq_enable(); return ret; } -- cgit v1.2.3-59-g8ed1b From 862d31f788f9a249f7656d02d8d4006e306108ce Mon Sep 17 00:00:00 2001 From: Alexander Graf Date: Tue, 31 Jul 2012 00:19:50 +0200 Subject: KVM: PPC: E500: Implement MMU notifiers The e500 target has lived without mmu notifiers ever since it got introduced, but fails for the user space check on them with hugetlbfs. So in order to get that one working, implement mmu notifiers in a reasonably dumb fashion and be happy. On embedded hardware, we almost never end up with mmu notifier calls, since most people don't overcommit. Signed-off-by: Alexander Graf --- arch/powerpc/include/asm/kvm_host.h | 3 +- arch/powerpc/include/asm/kvm_ppc.h | 1 + arch/powerpc/kvm/Kconfig | 2 ++ arch/powerpc/kvm/booke.c | 6 ++++ arch/powerpc/kvm/e500_tlb.c | 60 +++++++++++++++++++++++++++++++++---- 5 files changed, 65 insertions(+), 7 deletions(-) (limited to 'arch/powerpc') diff --git a/arch/powerpc/include/asm/kvm_host.h b/arch/powerpc/include/asm/kvm_host.h index 28e8f5e5c63e..cea9d3aab71c 100644 --- a/arch/powerpc/include/asm/kvm_host.h +++ b/arch/powerpc/include/asm/kvm_host.h @@ -46,7 +46,8 @@ #define KVM_COALESCED_MMIO_PAGE_OFFSET 1 #endif -#ifdef CONFIG_KVM_BOOK3S_64_HV +#if defined(CONFIG_KVM_BOOK3S_64_HV) || defined(CONFIG_KVM_E500V2) || \ + defined(CONFIG_KVM_E500MC) #include #define KVM_ARCH_WANT_MMU_NOTIFIER diff --git a/arch/powerpc/include/asm/kvm_ppc.h b/arch/powerpc/include/asm/kvm_ppc.h index e006f0bdea95..88de3146838b 100644 --- a/arch/powerpc/include/asm/kvm_ppc.h +++ b/arch/powerpc/include/asm/kvm_ppc.h @@ -104,6 +104,7 @@ extern void kvmppc_core_queue_external(struct kvm_vcpu *vcpu, struct kvm_interrupt *irq); extern void kvmppc_core_dequeue_external(struct kvm_vcpu *vcpu, struct kvm_interrupt *irq); +extern void kvmppc_core_flush_tlb(struct kvm_vcpu *vcpu); extern int kvmppc_core_emulate_op(struct kvm_run *run, struct kvm_vcpu *vcpu, unsigned int op, int *advance); diff --git a/arch/powerpc/kvm/Kconfig b/arch/powerpc/kvm/Kconfig index f4dacb9c57fa..40cad8c8bd0e 100644 --- a/arch/powerpc/kvm/Kconfig +++ b/arch/powerpc/kvm/Kconfig @@ -123,6 +123,7 @@ config KVM_E500V2 depends on EXPERIMENTAL && E500 && !PPC_E500MC select KVM select KVM_MMIO + select MMU_NOTIFIER ---help--- Support running unmodified E500 guest kernels in virtual machines on E500v2 host processors. @@ -138,6 +139,7 @@ config KVM_E500MC select KVM select KVM_MMIO select KVM_BOOKE_HV + select MMU_NOTIFIER ---help--- Support running unmodified E500MC/E5500 (32-bit) guest kernels in virtual machines on E500MC/E5500 host processors. diff --git a/arch/powerpc/kvm/booke.c b/arch/powerpc/kvm/booke.c index 70a86c0a9d85..52f6cbb4923e 100644 --- a/arch/powerpc/kvm/booke.c +++ b/arch/powerpc/kvm/booke.c @@ -459,6 +459,10 @@ static void kvmppc_check_requests(struct kvm_vcpu *vcpu) if (vcpu->requests) { if (kvm_check_request(KVM_REQ_PENDING_TIMER, vcpu)) update_timer_ints(vcpu); +#if defined(CONFIG_KVM_E500V2) || defined(CONFIG_KVM_E500MC) + if (kvm_check_request(KVM_REQ_TLB_FLUSH, vcpu)) + kvmppc_core_flush_tlb(vcpu); +#endif } } @@ -579,6 +583,8 @@ int kvmppc_vcpu_run(struct kvm_run *kvm_run, struct kvm_vcpu *vcpu) #endif kvm_guest_exit(); + vcpu->mode = OUTSIDE_GUEST_MODE; + smp_wmb(); out: vcpu->mode = OUTSIDE_GUEST_MODE; diff --git a/arch/powerpc/kvm/e500_tlb.c b/arch/powerpc/kvm/e500_tlb.c index b56b6e14df6c..de8ea29409f2 100644 --- a/arch/powerpc/kvm/e500_tlb.c +++ b/arch/powerpc/kvm/e500_tlb.c @@ -303,18 +303,15 @@ static inline void kvmppc_e500_ref_setup(struct tlbe_ref *ref, ref->pfn = pfn; ref->flags = E500_TLB_VALID; - if (tlbe_is_writable(gtlbe)) + if (tlbe_is_writable(gtlbe)) { ref->flags |= E500_TLB_DIRTY; + kvm_set_pfn_dirty(pfn); + } } static inline void kvmppc_e500_ref_release(struct tlbe_ref *ref) { if (ref->flags & E500_TLB_VALID) { - if (ref->flags & E500_TLB_DIRTY) - kvm_release_pfn_dirty(ref->pfn); - else - kvm_release_pfn_clean(ref->pfn); - ref->flags = 0; } } @@ -357,6 +354,13 @@ static void clear_tlb_refs(struct kvmppc_vcpu_e500 *vcpu_e500) clear_tlb_privs(vcpu_e500); } +void kvmppc_core_flush_tlb(struct kvm_vcpu *vcpu) +{ + struct kvmppc_vcpu_e500 *vcpu_e500 = to_e500(vcpu); + clear_tlb_refs(vcpu_e500); + clear_tlb1_bitmap(vcpu_e500); +} + static inline void kvmppc_e500_deliver_tlb_miss(struct kvm_vcpu *vcpu, unsigned int eaddr, int as) { @@ -541,6 +545,9 @@ static inline void kvmppc_e500_shadow_map(struct kvmppc_vcpu_e500 *vcpu_e500, /* Clear i-cache for new pages */ kvmppc_mmu_flush_icache(pfn); + + /* Drop refcount on page, so that mmu notifiers can clear it */ + kvm_release_pfn_clean(pfn); } /* XXX only map the one-one case, for now use TLB0 */ @@ -1064,6 +1071,47 @@ void kvmppc_mmu_map(struct kvm_vcpu *vcpu, u64 eaddr, gpa_t gpaddr, write_stlbe(vcpu_e500, gtlbe, &stlbe, stlbsel, sesel); } +/************* MMU Notifiers *************/ + +int kvm_unmap_hva(struct kvm *kvm, unsigned long hva) +{ + /* + * Flush all shadow tlb entries everywhere. This is slow, but + * we are 100% sure that we catch the to be unmapped page + */ + kvm_flush_remote_tlbs(kvm); + + return 0; +} + +int kvm_unmap_hva_range(struct kvm *kvm, unsigned long start, unsigned long end) +{ + /* kvm_unmap_hva flushes everything anyways */ + kvm_unmap_hva(kvm, start); + + return 0; +} + +int kvm_age_hva(struct kvm *kvm, unsigned long hva) +{ + /* XXX could be more clever ;) */ + return 0; +} + +int kvm_test_age_hva(struct kvm *kvm, unsigned long hva) +{ + /* XXX could be more clever ;) */ + return 0; +} + +void kvm_set_spte_hva(struct kvm *kvm, unsigned long hva, pte_t pte) +{ + /* The page will get remapped properly on its next fault */ + kvm_unmap_hva(kvm, hva); +} + +/*****************************************/ + static void free_gtlb(struct kvmppc_vcpu_e500 *vcpu_e500) { int i; -- cgit v1.2.3-59-g8ed1b From 6346046c3a69edc9149311473b940f3af7c93752 Mon Sep 17 00:00:00 2001 From: Alexander Graf Date: Wed, 8 Aug 2012 00:44:52 +0200 Subject: KVM: PPC: BookE: Add some more trace points Without trace points, debugging what exactly is going on inside guest code can be very tricky. Add a few more trace points at places that hopefully tell us more when things go wrong. Signed-off-by: Alexander Graf --- arch/powerpc/kvm/booke.c | 3 ++ arch/powerpc/kvm/e500_tlb.c | 3 ++ arch/powerpc/kvm/trace.h | 71 +++++++++++++++++++++++++++++++++++++++++++++ 3 files changed, 77 insertions(+) (limited to 'arch/powerpc') diff --git a/arch/powerpc/kvm/booke.c b/arch/powerpc/kvm/booke.c index 52f6cbb4923e..00bcc57428c7 100644 --- a/arch/powerpc/kvm/booke.c +++ b/arch/powerpc/kvm/booke.c @@ -143,6 +143,7 @@ void kvmppc_set_msr(struct kvm_vcpu *vcpu, u32 new_msr) static void kvmppc_booke_queue_irqprio(struct kvm_vcpu *vcpu, unsigned int priority) { + trace_kvm_booke_queue_irqprio(vcpu, priority); set_bit(priority, &vcpu->arch.pending_exceptions); } @@ -457,6 +458,8 @@ int kvmppc_core_prepare_to_enter(struct kvm_vcpu *vcpu) static void kvmppc_check_requests(struct kvm_vcpu *vcpu) { if (vcpu->requests) { + trace_kvm_check_requests(vcpu); + if (kvm_check_request(KVM_REQ_PENDING_TIMER, vcpu)) update_timer_ints(vcpu); #if defined(CONFIG_KVM_E500V2) || defined(CONFIG_KVM_E500MC) diff --git a/arch/powerpc/kvm/e500_tlb.c b/arch/powerpc/kvm/e500_tlb.c index de8ea29409f2..1af6fab58995 100644 --- a/arch/powerpc/kvm/e500_tlb.c +++ b/arch/powerpc/kvm/e500_tlb.c @@ -312,6 +312,7 @@ static inline void kvmppc_e500_ref_setup(struct tlbe_ref *ref, static inline void kvmppc_e500_ref_release(struct tlbe_ref *ref) { if (ref->flags & E500_TLB_VALID) { + trace_kvm_booke206_ref_release(ref->pfn, ref->flags); ref->flags = 0; } } @@ -1075,6 +1076,8 @@ void kvmppc_mmu_map(struct kvm_vcpu *vcpu, u64 eaddr, gpa_t gpaddr, int kvm_unmap_hva(struct kvm *kvm, unsigned long hva) { + trace_kvm_unmap_hva(hva); + /* * Flush all shadow tlb entries everywhere. This is slow, but * we are 100% sure that we catch the to be unmapped page diff --git a/arch/powerpc/kvm/trace.h b/arch/powerpc/kvm/trace.h index 9fab6eddc7e4..cb2780a42fd8 100644 --- a/arch/powerpc/kvm/trace.h +++ b/arch/powerpc/kvm/trace.h @@ -82,6 +82,21 @@ TRACE_EVENT(kvm_exit, ) ); +TRACE_EVENT(kvm_unmap_hva, + TP_PROTO(unsigned long hva), + TP_ARGS(hva), + + TP_STRUCT__entry( + __field( unsigned long, hva ) + ), + + TP_fast_assign( + __entry->hva = hva; + ), + + TP_printk("unmap hva 0x%lx\n", __entry->hva) +); + TRACE_EVENT(kvm_stlb_inval, TP_PROTO(unsigned int stlb_index), TP_ARGS(stlb_index), @@ -149,6 +164,24 @@ TRACE_EVENT(kvm_gtlb_write, __entry->word1, __entry->word2) ); +TRACE_EVENT(kvm_check_requests, + TP_PROTO(struct kvm_vcpu *vcpu), + TP_ARGS(vcpu), + + TP_STRUCT__entry( + __field( __u32, cpu_nr ) + __field( __u32, requests ) + ), + + TP_fast_assign( + __entry->cpu_nr = vcpu->vcpu_id; + __entry->requests = vcpu->requests; + ), + + TP_printk("vcpu=%x requests=%x", + __entry->cpu_nr, __entry->requests) +); + /************************************************************************* * Book3S trace points * @@ -418,6 +451,44 @@ TRACE_EVENT(kvm_booke206_gtlb_write, __entry->mas2, __entry->mas7_3) ); +TRACE_EVENT(kvm_booke206_ref_release, + TP_PROTO(__u64 pfn, __u32 flags), + TP_ARGS(pfn, flags), + + TP_STRUCT__entry( + __field( __u64, pfn ) + __field( __u32, flags ) + ), + + TP_fast_assign( + __entry->pfn = pfn; + __entry->flags = flags; + ), + + TP_printk("pfn=%llx flags=%x", + __entry->pfn, __entry->flags) +); + +TRACE_EVENT(kvm_booke_queue_irqprio, + TP_PROTO(struct kvm_vcpu *vcpu, unsigned int priority), + TP_ARGS(vcpu, priority), + + TP_STRUCT__entry( + __field( __u32, cpu_nr ) + __field( __u32, priority ) + __field( unsigned long, pending ) + ), + + TP_fast_assign( + __entry->cpu_nr = vcpu->vcpu_id; + __entry->priority = priority; + __entry->pending = vcpu->arch.pending_exceptions; + ), + + TP_printk("vcpu=%x prio=%x pending=%lx", + __entry->cpu_nr, __entry->priority, __entry->pending) +); + #endif #endif /* _TRACE_KVM_H */ -- cgit v1.2.3-59-g8ed1b From 2d8185d4ee22f425001d28d1817fc8d478e6fa02 Mon Sep 17 00:00:00 2001 From: Alexander Graf Date: Fri, 10 Aug 2012 12:31:12 +0200 Subject: KVM: PPC: BookE: No duplicate request != 0 check We only call kvmppc_check_requests() when vcpu->requests != 0, so drop the redundant check in the function itself Signed-off-by: Alexander Graf --- arch/powerpc/kvm/booke.c | 12 +++++------- 1 file changed, 5 insertions(+), 7 deletions(-) (limited to 'arch/powerpc') diff --git a/arch/powerpc/kvm/booke.c b/arch/powerpc/kvm/booke.c index 00bcc57428c7..683cbd686d01 100644 --- a/arch/powerpc/kvm/booke.c +++ b/arch/powerpc/kvm/booke.c @@ -457,16 +457,14 @@ int kvmppc_core_prepare_to_enter(struct kvm_vcpu *vcpu) static void kvmppc_check_requests(struct kvm_vcpu *vcpu) { - if (vcpu->requests) { - trace_kvm_check_requests(vcpu); + trace_kvm_check_requests(vcpu); - if (kvm_check_request(KVM_REQ_PENDING_TIMER, vcpu)) - update_timer_ints(vcpu); + if (kvm_check_request(KVM_REQ_PENDING_TIMER, vcpu)) + update_timer_ints(vcpu); #if defined(CONFIG_KVM_E500V2) || defined(CONFIG_KVM_E500MC) - if (kvm_check_request(KVM_REQ_TLB_FLUSH, vcpu)) - kvmppc_core_flush_tlb(vcpu); + if (kvm_check_request(KVM_REQ_TLB_FLUSH, vcpu)) + kvmppc_core_flush_tlb(vcpu); #endif - } } /* -- cgit v1.2.3-59-g8ed1b From 03d25c5bd5c3125055bd36f4813ddb817def19dd Mon Sep 17 00:00:00 2001 From: Alexander Graf Date: Fri, 10 Aug 2012 12:28:50 +0200 Subject: KVM: PPC: Use same kvmppc_prepare_to_enter code for booke and book3s_pr We need to do the same things when preparing to enter a guest for booke and book3s_pr cores. Fold the generic code into a generic function that both call. Signed-off-by: Alexander Graf --- arch/powerpc/include/asm/kvm_ppc.h | 3 ++ arch/powerpc/kvm/book3s_pr.c | 22 ++++----------- arch/powerpc/kvm/booke.c | 58 +------------------------------------- arch/powerpc/kvm/powerpc.c | 57 +++++++++++++++++++++++++++++++++++++ 4 files changed, 67 insertions(+), 73 deletions(-) (limited to 'arch/powerpc') diff --git a/arch/powerpc/include/asm/kvm_ppc.h b/arch/powerpc/include/asm/kvm_ppc.h index 88de3146838b..59b7c87e47f7 100644 --- a/arch/powerpc/include/asm/kvm_ppc.h +++ b/arch/powerpc/include/asm/kvm_ppc.h @@ -112,6 +112,7 @@ extern int kvmppc_core_emulate_mtspr(struct kvm_vcpu *vcpu, int sprn, ulong val); extern int kvmppc_core_emulate_mfspr(struct kvm_vcpu *vcpu, int sprn, ulong *val); +extern void kvmppc_core_check_requests(struct kvm_vcpu *vcpu); extern int kvmppc_booke_init(void); extern void kvmppc_booke_exit(void); @@ -150,6 +151,8 @@ extern int kvm_vm_ioctl_get_smmu_info(struct kvm *kvm, extern int kvmppc_bookehv_init(void); extern void kvmppc_bookehv_exit(void); +extern int kvmppc_prepare_to_enter(struct kvm_vcpu *vcpu); + /* * Cuts out inst bits with ordering according to spec. * That means the leftmost bit is zero. All given bits are included. diff --git a/arch/powerpc/kvm/book3s_pr.c b/arch/powerpc/kvm/book3s_pr.c index 7f0fe6f9e297..cae2defd1462 100644 --- a/arch/powerpc/kvm/book3s_pr.c +++ b/arch/powerpc/kvm/book3s_pr.c @@ -88,6 +88,10 @@ void kvmppc_core_vcpu_put(struct kvm_vcpu *vcpu) kvmppc_giveup_ext(vcpu, MSR_VSX); } +void kvmppc_core_check_requests(struct kvm_vcpu *vcpu) +{ +} + static void kvmppc_recalc_shadow_msr(struct kvm_vcpu *vcpu) { ulong smsr = vcpu->arch.shared->msr; @@ -815,19 +819,9 @@ program_interrupt: * again due to a host external interrupt. */ __hard_irq_disable(); - if (signal_pending(current)) { - __hard_irq_enable(); -#ifdef EXIT_DEBUG - printk(KERN_EMERG "KVM: Going back to host\n"); -#endif - vcpu->stat.signal_exits++; + if (kvmppc_prepare_to_enter(vcpu)) { run->exit_reason = KVM_EXIT_INTR; r = -EINTR; - } else { - /* In case an interrupt came in that was triggered - * from userspace (like DEC), we need to check what - * to inject now! */ - kvmppc_core_prepare_to_enter(vcpu); } } @@ -1029,8 +1023,6 @@ int kvmppc_vcpu_run(struct kvm_run *kvm_run, struct kvm_vcpu *vcpu) goto out; } - kvmppc_core_prepare_to_enter(vcpu); - /* * Interrupts could be timers for the guest which we have to inject * again, so let's postpone them until we're in the guest and if we @@ -1038,9 +1030,7 @@ int kvmppc_vcpu_run(struct kvm_run *kvm_run, struct kvm_vcpu *vcpu) * a host external interrupt. */ __hard_irq_disable(); - - /* No need to go into the guest when all we do is going out */ - if (signal_pending(current)) { + if (kvmppc_prepare_to_enter(vcpu)) { __hard_irq_enable(); kvm_run->exit_reason = KVM_EXIT_INTR; ret = -EINTR; diff --git a/arch/powerpc/kvm/booke.c b/arch/powerpc/kvm/booke.c index 683cbd686d01..4652e0bfa781 100644 --- a/arch/powerpc/kvm/booke.c +++ b/arch/powerpc/kvm/booke.c @@ -455,10 +455,8 @@ int kvmppc_core_prepare_to_enter(struct kvm_vcpu *vcpu) return r; } -static void kvmppc_check_requests(struct kvm_vcpu *vcpu) +void kvmppc_core_check_requests(struct kvm_vcpu *vcpu) { - trace_kvm_check_requests(vcpu); - if (kvm_check_request(KVM_REQ_PENDING_TIMER, vcpu)) update_timer_ints(vcpu); #if defined(CONFIG_KVM_E500V2) || defined(CONFIG_KVM_E500MC) @@ -467,60 +465,6 @@ static void kvmppc_check_requests(struct kvm_vcpu *vcpu) #endif } -/* - * Common checks before entering the guest world. Call with interrupts - * disabled. - * - * returns !0 if a signal is pending and check_signal is true - */ -static int kvmppc_prepare_to_enter(struct kvm_vcpu *vcpu) -{ - int r = 0; - - WARN_ON_ONCE(!irqs_disabled()); - while (true) { - if (need_resched()) { - local_irq_enable(); - cond_resched(); - local_irq_disable(); - continue; - } - - if (signal_pending(current)) { - r = 1; - break; - } - - smp_mb(); - if (vcpu->requests) { - /* Make sure we process requests preemptable */ - local_irq_enable(); - kvmppc_check_requests(vcpu); - local_irq_disable(); - continue; - } - - if (kvmppc_core_prepare_to_enter(vcpu)) { - /* interrupts got enabled in between, so we - are back at square 1 */ - continue; - } - - if (vcpu->mode == EXITING_GUEST_MODE) { - r = 1; - break; - } - - /* Going into guest context! Yay! */ - vcpu->mode = IN_GUEST_MODE; - smp_wmb(); - - break; - } - - return r; -} - int kvmppc_vcpu_run(struct kvm_run *kvm_run, struct kvm_vcpu *vcpu) { int ret; diff --git a/arch/powerpc/kvm/powerpc.c b/arch/powerpc/kvm/powerpc.c index 45fe433316ea..153a26abc915 100644 --- a/arch/powerpc/kvm/powerpc.c +++ b/arch/powerpc/kvm/powerpc.c @@ -47,6 +47,63 @@ int kvm_arch_vcpu_should_kick(struct kvm_vcpu *vcpu) return 1; } +#ifndef CONFIG_KVM_BOOK3S_64_HV +/* + * Common checks before entering the guest world. Call with interrupts + * disabled. + * + * returns !0 if a signal is pending and check_signal is true + */ +int kvmppc_prepare_to_enter(struct kvm_vcpu *vcpu) +{ + int r = 0; + + WARN_ON_ONCE(!irqs_disabled()); + while (true) { + if (need_resched()) { + local_irq_enable(); + cond_resched(); + local_irq_disable(); + continue; + } + + if (signal_pending(current)) { + r = 1; + break; + } + + smp_mb(); + if (vcpu->requests) { + /* Make sure we process requests preemptable */ + local_irq_enable(); + trace_kvm_check_requests(vcpu); + kvmppc_core_check_requests(vcpu); + local_irq_disable(); + continue; + } + + if (kvmppc_core_prepare_to_enter(vcpu)) { + /* interrupts got enabled in between, so we + are back at square 1 */ + continue; + } + + if (vcpu->mode == EXITING_GUEST_MODE) { + r = 1; + break; + } + + /* Going into guest context! Yay! */ + vcpu->mode = IN_GUEST_MODE; + smp_wmb(); + + break; + } + + return r; +} +#endif /* CONFIG_KVM_BOOK3S_64_HV */ + int kvmppc_kvm_pv(struct kvm_vcpu *vcpu) { int nr = kvmppc_get_gpr(vcpu, 11); -- cgit v1.2.3-59-g8ed1b From 9b0cb3c808fef0d75d6f79ab9684246e6879f9c1 Mon Sep 17 00:00:00 2001 From: Alexander Graf Date: Fri, 10 Aug 2012 13:23:55 +0200 Subject: KVM: PPC: Book3s: PR: Add (dumb) MMU Notifier support Now that we have very simple MMU Notifier support for e500 in place, also add the same simple support to book3s. It gets us one step closer to actual fast support. Signed-off-by: Alexander Graf --- arch/powerpc/include/asm/kvm_host.h | 3 +-- arch/powerpc/kvm/Kconfig | 1 + arch/powerpc/kvm/book3s_32_mmu_host.c | 1 + arch/powerpc/kvm/book3s_64_mmu_host.c | 1 + arch/powerpc/kvm/book3s_mmu_hpte.c | 5 ---- arch/powerpc/kvm/book3s_pr.c | 47 +++++++++++++++++++++++++++++++++++ 6 files changed, 51 insertions(+), 7 deletions(-) (limited to 'arch/powerpc') diff --git a/arch/powerpc/include/asm/kvm_host.h b/arch/powerpc/include/asm/kvm_host.h index cea9d3aab71c..4a5ec8f573c7 100644 --- a/arch/powerpc/include/asm/kvm_host.h +++ b/arch/powerpc/include/asm/kvm_host.h @@ -46,8 +46,7 @@ #define KVM_COALESCED_MMIO_PAGE_OFFSET 1 #endif -#if defined(CONFIG_KVM_BOOK3S_64_HV) || defined(CONFIG_KVM_E500V2) || \ - defined(CONFIG_KVM_E500MC) +#if !defined(CONFIG_KVM_440) #include #define KVM_ARCH_WANT_MMU_NOTIFIER diff --git a/arch/powerpc/kvm/Kconfig b/arch/powerpc/kvm/Kconfig index 40cad8c8bd0e..71f0cd9edf33 100644 --- a/arch/powerpc/kvm/Kconfig +++ b/arch/powerpc/kvm/Kconfig @@ -36,6 +36,7 @@ config KVM_BOOK3S_64_HANDLER config KVM_BOOK3S_PR bool select KVM_MMIO + select MMU_NOTIFIER config KVM_BOOK3S_32 tristate "KVM support for PowerPC book3s_32 processors" diff --git a/arch/powerpc/kvm/book3s_32_mmu_host.c b/arch/powerpc/kvm/book3s_32_mmu_host.c index 837f13e7b6bf..9fac0101ffb9 100644 --- a/arch/powerpc/kvm/book3s_32_mmu_host.c +++ b/arch/powerpc/kvm/book3s_32_mmu_host.c @@ -254,6 +254,7 @@ next_pteg: kvmppc_mmu_hpte_cache_map(vcpu, pte); + kvm_release_pfn_clean(hpaddr >> PAGE_SHIFT); out: return r; } diff --git a/arch/powerpc/kvm/book3s_64_mmu_host.c b/arch/powerpc/kvm/book3s_64_mmu_host.c index 0688b6b39585..6b2c80e49681 100644 --- a/arch/powerpc/kvm/book3s_64_mmu_host.c +++ b/arch/powerpc/kvm/book3s_64_mmu_host.c @@ -168,6 +168,7 @@ map_again: kvmppc_mmu_hpte_cache_map(vcpu, pte); } + kvm_release_pfn_clean(hpaddr >> PAGE_SHIFT); out: return r; diff --git a/arch/powerpc/kvm/book3s_mmu_hpte.c b/arch/powerpc/kvm/book3s_mmu_hpte.c index 41cb0017e757..2c86b0d63714 100644 --- a/arch/powerpc/kvm/book3s_mmu_hpte.c +++ b/arch/powerpc/kvm/book3s_mmu_hpte.c @@ -114,11 +114,6 @@ static void invalidate_pte(struct kvm_vcpu *vcpu, struct hpte_cache *pte) hlist_del_init_rcu(&pte->list_vpte); hlist_del_init_rcu(&pte->list_vpte_long); - if (pte->pte.may_write) - kvm_release_pfn_dirty(pte->pfn); - else - kvm_release_pfn_clean(pte->pfn); - spin_unlock(&vcpu3s->mmu_lock); vcpu3s->hpte_cache_count--; diff --git a/arch/powerpc/kvm/book3s_pr.c b/arch/powerpc/kvm/book3s_pr.c index cae2defd1462..10f8217b8c38 100644 --- a/arch/powerpc/kvm/book3s_pr.c +++ b/arch/powerpc/kvm/book3s_pr.c @@ -90,8 +90,55 @@ void kvmppc_core_vcpu_put(struct kvm_vcpu *vcpu) void kvmppc_core_check_requests(struct kvm_vcpu *vcpu) { + /* We misuse TLB_FLUSH to indicate that we want to clear + all shadow cache entries */ + if (kvm_check_request(KVM_REQ_TLB_FLUSH, vcpu)) + kvmppc_mmu_pte_flush(vcpu, 0, 0); } +/************* MMU Notifiers *************/ + +int kvm_unmap_hva(struct kvm *kvm, unsigned long hva) +{ + trace_kvm_unmap_hva(hva); + + /* + * Flush all shadow tlb entries everywhere. This is slow, but + * we are 100% sure that we catch the to be unmapped page + */ + kvm_flush_remote_tlbs(kvm); + + return 0; +} + +int kvm_unmap_hva_range(struct kvm *kvm, unsigned long start, unsigned long end) +{ + /* kvm_unmap_hva flushes everything anyways */ + kvm_unmap_hva(kvm, start); + + return 0; +} + +int kvm_age_hva(struct kvm *kvm, unsigned long hva) +{ + /* XXX could be more clever ;) */ + return 0; +} + +int kvm_test_age_hva(struct kvm *kvm, unsigned long hva) +{ + /* XXX could be more clever ;) */ + return 0; +} + +void kvm_set_spte_hva(struct kvm *kvm, unsigned long hva, pte_t pte) +{ + /* The page will get remapped properly on its next fault */ + kvm_unmap_hva(kvm, hva); +} + +/*****************************************/ + static void kvmppc_recalc_shadow_msr(struct kvm_vcpu *vcpu) { ulong smsr = vcpu->arch.shared->msr; -- cgit v1.2.3-59-g8ed1b From e85ad380c6bf6dcd4776d313c81d16a6293db136 Mon Sep 17 00:00:00 2001 From: Alexander Graf Date: Sun, 12 Aug 2012 11:13:25 +0200 Subject: KVM: PPC: BookE: Drop redundant vcpu->mode set We only need to set vcpu->mode to outside once. Signed-off-by: Alexander Graf --- arch/powerpc/kvm/booke.c | 2 -- 1 file changed, 2 deletions(-) (limited to 'arch/powerpc') diff --git a/arch/powerpc/kvm/booke.c b/arch/powerpc/kvm/booke.c index 4652e0bfa781..492c343f598e 100644 --- a/arch/powerpc/kvm/booke.c +++ b/arch/powerpc/kvm/booke.c @@ -528,8 +528,6 @@ int kvmppc_vcpu_run(struct kvm_run *kvm_run, struct kvm_vcpu *vcpu) #endif kvm_guest_exit(); - vcpu->mode = OUTSIDE_GUEST_MODE; - smp_wmb(); out: vcpu->mode = OUTSIDE_GUEST_MODE; -- cgit v1.2.3-59-g8ed1b From c63ddcb4540db95e5a4223cfa8cdbe6efbd5e386 Mon Sep 17 00:00:00 2001 From: Alexander Graf Date: Sun, 12 Aug 2012 11:27:49 +0200 Subject: KVM: PPC: Book3S: PR: Only do resched check once per exit Now that we use our generic exit helper, we can safely drop our previous kvm_resched that we used to trigger at the beginning of the exit handler function. Signed-off-by: Alexander Graf --- arch/powerpc/kvm/book3s_pr.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch/powerpc') diff --git a/arch/powerpc/kvm/book3s_pr.c b/arch/powerpc/kvm/book3s_pr.c index 10f8217b8c38..2c268a15b20f 100644 --- a/arch/powerpc/kvm/book3s_pr.c +++ b/arch/powerpc/kvm/book3s_pr.c @@ -602,7 +602,7 @@ int kvmppc_handle_exit(struct kvm_run *run, struct kvm_vcpu *vcpu, trace_kvm_exit(exit_nr, vcpu); preempt_enable(); - kvm_resched(vcpu); + switch (exit_nr) { case BOOK3S_INTERRUPT_INST_STORAGE: { -- cgit v1.2.3-59-g8ed1b From 706fb730cb4f9db2e3de33391475dd0616c2c935 Mon Sep 17 00:00:00 2001 From: Alexander Graf Date: Sun, 12 Aug 2012 11:29:09 +0200 Subject: KVM: PPC: Exit guest context while handling exit The x86 implementation of KVM accounts for host time while processing guest exits. Do the same for us. Signed-off-by: Alexander Graf --- arch/powerpc/kvm/book3s_pr.c | 2 ++ arch/powerpc/kvm/booke.c | 3 +++ 2 files changed, 5 insertions(+) (limited to 'arch/powerpc') diff --git a/arch/powerpc/kvm/book3s_pr.c b/arch/powerpc/kvm/book3s_pr.c index 2c268a15b20f..b4ae11ec068f 100644 --- a/arch/powerpc/kvm/book3s_pr.c +++ b/arch/powerpc/kvm/book3s_pr.c @@ -601,6 +601,7 @@ int kvmppc_handle_exit(struct kvm_run *run, struct kvm_vcpu *vcpu, __hard_irq_enable(); trace_kvm_exit(exit_nr, vcpu); + kvm_guest_exit(); preempt_enable(); switch (exit_nr) { @@ -872,6 +873,7 @@ program_interrupt: } } + kvm_guest_enter(); trace_kvm_book3s_reenter(r, vcpu); return r; diff --git a/arch/powerpc/kvm/booke.c b/arch/powerpc/kvm/booke.c index 492c343f598e..887c7cc02146 100644 --- a/arch/powerpc/kvm/booke.c +++ b/arch/powerpc/kvm/booke.c @@ -650,6 +650,7 @@ int kvmppc_handle_exit(struct kvm_run *run, struct kvm_vcpu *vcpu, local_irq_enable(); trace_kvm_exit(exit_nr, vcpu); + kvm_guest_exit(); run->exit_reason = KVM_EXIT_UNKNOWN; run->ready_for_interrupt_injection = 1; @@ -952,6 +953,8 @@ int kvmppc_handle_exit(struct kvm_run *run, struct kvm_vcpu *vcpu, } } + kvm_guest_enter(); + return r; } -- cgit v1.2.3-59-g8ed1b From 0652eaaebea0995b3236e51dec727d62264f4248 Mon Sep 17 00:00:00 2001 From: Alexander Graf Date: Sun, 12 Aug 2012 11:34:21 +0200 Subject: KVM: PPC: Book3S: PR: Indicate we're out of guest mode When going out of guest mode, indicate that we are in vcpu->mode. That way requests from other CPUs don't needlessly need to kick us to process them, because it'll just happen next time we enter the guest. Signed-off-by: Alexander Graf --- arch/powerpc/kvm/book3s_pr.c | 1 + 1 file changed, 1 insertion(+) (limited to 'arch/powerpc') diff --git a/arch/powerpc/kvm/book3s_pr.c b/arch/powerpc/kvm/book3s_pr.c index b4ae11ec068f..9430a362e5a3 100644 --- a/arch/powerpc/kvm/book3s_pr.c +++ b/arch/powerpc/kvm/book3s_pr.c @@ -1152,6 +1152,7 @@ int kvmppc_vcpu_run(struct kvm_run *kvm_run, struct kvm_vcpu *vcpu) #endif out: + vcpu->mode = OUTSIDE_GUEST_MODE; preempt_enable(); return ret; } -- cgit v1.2.3-59-g8ed1b From 24afa37b9c8f035d2fe2028e4824bc4e49bafe73 Mon Sep 17 00:00:00 2001 From: Alexander Graf Date: Sun, 12 Aug 2012 12:42:30 +0200 Subject: KVM: PPC: Consistentify vcpu exit path When getting out of __vcpu_run, let's be consistent about the state we return in. We want to always * have IRQs enabled * have called kvm_guest_exit before Signed-off-by: Alexander Graf --- arch/powerpc/kvm/book3s_pr.c | 8 ++++++-- arch/powerpc/kvm/booke.c | 13 ++++++++----- 2 files changed, 14 insertions(+), 7 deletions(-) (limited to 'arch/powerpc') diff --git a/arch/powerpc/kvm/book3s_pr.c b/arch/powerpc/kvm/book3s_pr.c index 9430a362e5a3..3dec346c4b93 100644 --- a/arch/powerpc/kvm/book3s_pr.c +++ b/arch/powerpc/kvm/book3s_pr.c @@ -868,12 +868,15 @@ program_interrupt: */ __hard_irq_disable(); if (kvmppc_prepare_to_enter(vcpu)) { + /* local_irq_enable(); */ run->exit_reason = KVM_EXIT_INTR; r = -EINTR; + } else { + /* Going back to guest */ + kvm_guest_enter(); } } - kvm_guest_enter(); trace_kvm_book3s_reenter(r, vcpu); return r; @@ -1123,7 +1126,8 @@ int kvmppc_vcpu_run(struct kvm_run *kvm_run, struct kvm_vcpu *vcpu) ret = __kvmppc_vcpu_run(kvm_run, vcpu); - kvm_guest_exit(); + /* No need for kvm_guest_exit. It's done in handle_exit. + We also get here with interrupts enabled. */ current->thread.regs->msr = ext_msr; diff --git a/arch/powerpc/kvm/booke.c b/arch/powerpc/kvm/booke.c index 887c7cc02146..aae535f6d9de 100644 --- a/arch/powerpc/kvm/booke.c +++ b/arch/powerpc/kvm/booke.c @@ -481,6 +481,7 @@ int kvmppc_vcpu_run(struct kvm_run *kvm_run, struct kvm_vcpu *vcpu) local_irq_disable(); if (kvmppc_prepare_to_enter(vcpu)) { + local_irq_enable(); kvm_run->exit_reason = KVM_EXIT_INTR; ret = -EINTR; goto out; @@ -512,6 +513,9 @@ int kvmppc_vcpu_run(struct kvm_run *kvm_run, struct kvm_vcpu *vcpu) ret = __kvmppc_vcpu_run(kvm_run, vcpu); + /* No need for kvm_guest_exit. It's done in handle_exit. + We also get here with interrupts enabled. */ + #ifdef CONFIG_PPC_FPU kvmppc_save_guest_fp(vcpu); @@ -527,12 +531,9 @@ int kvmppc_vcpu_run(struct kvm_run *kvm_run, struct kvm_vcpu *vcpu) current->thread.fpexc_mode = fpexc_mode; #endif - kvm_guest_exit(); - out: vcpu->mode = OUTSIDE_GUEST_MODE; smp_wmb(); - local_irq_enable(); return ret; } @@ -947,14 +948,16 @@ int kvmppc_handle_exit(struct kvm_run *run, struct kvm_vcpu *vcpu, if (!(r & RESUME_HOST)) { local_irq_disable(); if (kvmppc_prepare_to_enter(vcpu)) { + local_irq_enable(); run->exit_reason = KVM_EXIT_INTR; r = (-EINTR << 2) | RESUME_HOST | (r & RESUME_FLAG_NV); kvmppc_account_exit(vcpu, SIGNAL_EXITS); + } else { + /* Going back to guest */ + kvm_guest_enter(); } } - kvm_guest_enter(); - return r; } -- cgit v1.2.3-59-g8ed1b From bd2be6836ee493d41fe42367a2b129aa771185c1 Mon Sep 17 00:00:00 2001 From: Alexander Graf Date: Mon, 13 Aug 2012 01:04:19 +0200 Subject: KVM: PPC: Book3S: PR: Rework irq disabling Today, we disable preemption while inside guest context, because we need to expose to the world that we are not in a preemptible context. However, during that time we already have interrupts disabled, which would indicate that we are in a non-preemptible context. The reason the checks for irqs_disabled() fail for us though is that we manually control hard IRQs and ignore all the lazy EE framework. Let's stop doing that. Instead, let's always use lazy EE to indicate when we want to disable IRQs, but do a special final switch that gets us into EE disabled, but soft enabled state. That way when we get back out of guest state, we are immediately ready to process interrupts. This simplifies the code drastically and reduces the time that we appear as preempt disabled. Signed-off-by: Alexander Graf --- arch/powerpc/include/asm/kvm_ppc.h | 10 ++++++++++ arch/powerpc/kvm/book3s_pr.c | 21 +++++++-------------- arch/powerpc/kvm/book3s_rmhandlers.S | 15 ++++++++------- arch/powerpc/kvm/booke.c | 2 ++ arch/powerpc/kvm/powerpc.c | 14 ++++++++++++++ 5 files changed, 41 insertions(+), 21 deletions(-) (limited to 'arch/powerpc') diff --git a/arch/powerpc/include/asm/kvm_ppc.h b/arch/powerpc/include/asm/kvm_ppc.h index 59b7c87e47f7..545936428bf6 100644 --- a/arch/powerpc/include/asm/kvm_ppc.h +++ b/arch/powerpc/include/asm/kvm_ppc.h @@ -234,5 +234,15 @@ static inline void kvmppc_mmu_flush_icache(pfn_t pfn) } } +/* Please call after prepare_to_enter. This function puts the lazy ee state + back to normal mode, without actually enabling interrupts. */ +static inline void kvmppc_lazy_ee_enable(void) +{ +#ifdef CONFIG_PPC64 + /* Only need to enable IRQs by hard enabling them after this */ + local_paca->irq_happened = 0; + local_paca->soft_enabled = 1; +#endif +} #endif /* __POWERPC_KVM_PPC_H__ */ diff --git a/arch/powerpc/kvm/book3s_pr.c b/arch/powerpc/kvm/book3s_pr.c index 3dec346c4b93..e737db8a5ca7 100644 --- a/arch/powerpc/kvm/book3s_pr.c +++ b/arch/powerpc/kvm/book3s_pr.c @@ -52,8 +52,6 @@ static int kvmppc_handle_ext(struct kvm_vcpu *vcpu, unsigned int exit_nr, #define MSR_USER32 MSR_USER #define MSR_USER64 MSR_USER #define HW_PAGE_SIZE PAGE_SIZE -#define __hard_irq_disable local_irq_disable -#define __hard_irq_enable local_irq_enable #endif void kvmppc_core_vcpu_load(struct kvm_vcpu *vcpu, int cpu) @@ -597,12 +595,10 @@ int kvmppc_handle_exit(struct kvm_run *run, struct kvm_vcpu *vcpu, run->exit_reason = KVM_EXIT_UNKNOWN; run->ready_for_interrupt_injection = 1; - /* We get here with MSR.EE=0, so enable it to be a nice citizen */ - __hard_irq_enable(); + /* We get here with MSR.EE=1 */ trace_kvm_exit(exit_nr, vcpu); kvm_guest_exit(); - preempt_enable(); switch (exit_nr) { case BOOK3S_INTERRUPT_INST_STORAGE: @@ -854,7 +850,6 @@ program_interrupt: } } - preempt_disable(); if (!(r & RESUME_HOST)) { /* To avoid clobbering exit_reason, only check for signals if * we aren't already exiting to userspace for some other @@ -866,14 +861,15 @@ program_interrupt: * and if we really did time things so badly, then we just exit * again due to a host external interrupt. */ - __hard_irq_disable(); + local_irq_disable(); if (kvmppc_prepare_to_enter(vcpu)) { - /* local_irq_enable(); */ + local_irq_enable(); run->exit_reason = KVM_EXIT_INTR; r = -EINTR; } else { /* Going back to guest */ kvm_guest_enter(); + kvmppc_lazy_ee_enable(); } } @@ -1066,8 +1062,6 @@ int kvmppc_vcpu_run(struct kvm_run *kvm_run, struct kvm_vcpu *vcpu) #endif ulong ext_msr; - preempt_disable(); - /* Check if we can run the vcpu at all */ if (!vcpu->arch.sane) { kvm_run->exit_reason = KVM_EXIT_INTERNAL_ERROR; @@ -1081,9 +1075,9 @@ int kvmppc_vcpu_run(struct kvm_run *kvm_run, struct kvm_vcpu *vcpu) * really did time things so badly, then we just exit again due to * a host external interrupt. */ - __hard_irq_disable(); + local_irq_disable(); if (kvmppc_prepare_to_enter(vcpu)) { - __hard_irq_enable(); + local_irq_enable(); kvm_run->exit_reason = KVM_EXIT_INTR; ret = -EINTR; goto out; @@ -1122,7 +1116,7 @@ int kvmppc_vcpu_run(struct kvm_run *kvm_run, struct kvm_vcpu *vcpu) if (vcpu->arch.shared->msr & MSR_FP) kvmppc_handle_ext(vcpu, BOOK3S_INTERRUPT_FP_UNAVAIL, MSR_FP); - kvm_guest_enter(); + kvmppc_lazy_ee_enable(); ret = __kvmppc_vcpu_run(kvm_run, vcpu); @@ -1157,7 +1151,6 @@ int kvmppc_vcpu_run(struct kvm_run *kvm_run, struct kvm_vcpu *vcpu) out: vcpu->mode = OUTSIDE_GUEST_MODE; - preempt_enable(); return ret; } diff --git a/arch/powerpc/kvm/book3s_rmhandlers.S b/arch/powerpc/kvm/book3s_rmhandlers.S index 9ecf6e35cd8d..b2f8258b545a 100644 --- a/arch/powerpc/kvm/book3s_rmhandlers.S +++ b/arch/powerpc/kvm/book3s_rmhandlers.S @@ -170,20 +170,21 @@ kvmppc_handler_skip_ins: * Call kvmppc_handler_trampoline_enter in real mode * * On entry, r4 contains the guest shadow MSR + * MSR.EE has to be 0 when calling this function */ _GLOBAL(kvmppc_entry_trampoline) mfmsr r5 LOAD_REG_ADDR(r7, kvmppc_handler_trampoline_enter) toreal(r7) - li r9, MSR_RI - ori r9, r9, MSR_EE - andc r9, r5, r9 /* Clear EE and RI in MSR value */ li r6, MSR_IR | MSR_DR - ori r6, r6, MSR_EE - andc r6, r5, r6 /* Clear EE, DR and IR in MSR value */ - MTMSR_EERI(r9) /* Clear EE and RI in MSR */ - mtsrr0 r7 /* before we set srr0/1 */ + andc r6, r5, r6 /* Clear DR and IR in MSR value */ + /* + * Set EE in HOST_MSR so that it's enabled when we get into our + * C exit handler function + */ + ori r5, r5, MSR_EE + mtsrr0 r7 mtsrr1 r6 RFI diff --git a/arch/powerpc/kvm/booke.c b/arch/powerpc/kvm/booke.c index aae535f6d9de..2bd190c488ef 100644 --- a/arch/powerpc/kvm/booke.c +++ b/arch/powerpc/kvm/booke.c @@ -486,6 +486,7 @@ int kvmppc_vcpu_run(struct kvm_run *kvm_run, struct kvm_vcpu *vcpu) ret = -EINTR; goto out; } + kvmppc_lazy_ee_enable(); kvm_guest_enter(); @@ -955,6 +956,7 @@ int kvmppc_handle_exit(struct kvm_run *run, struct kvm_vcpu *vcpu, } else { /* Going back to guest */ kvm_guest_enter(); + kvmppc_lazy_ee_enable(); } } diff --git a/arch/powerpc/kvm/powerpc.c b/arch/powerpc/kvm/powerpc.c index 153a26abc915..266549979e9f 100644 --- a/arch/powerpc/kvm/powerpc.c +++ b/arch/powerpc/kvm/powerpc.c @@ -30,6 +30,7 @@ #include #include #include +#include #include "timing.h" #include "../mm/mmu_decl.h" @@ -93,6 +94,19 @@ int kvmppc_prepare_to_enter(struct kvm_vcpu *vcpu) break; } +#ifdef CONFIG_PPC64 + /* lazy EE magic */ + hard_irq_disable(); + if (lazy_irq_pending()) { + /* Got an interrupt in between, try again */ + local_irq_enable(); + local_irq_disable(); + continue; + } + + trace_hardirqs_on(); +#endif + /* Going into guest context! Yay! */ vcpu->mode = IN_GUEST_MODE; smp_wmb(); -- cgit v1.2.3-59-g8ed1b From 3766a4c693358cff33441310413e3776dbbf8ef0 Mon Sep 17 00:00:00 2001 From: Alexander Graf Date: Mon, 13 Aug 2012 01:24:01 +0200 Subject: KVM: PPC: Move kvm_guest_enter call into generic code We need to call kvm_guest_enter in booke and book3s, so move its call to generic code. Signed-off-by: Alexander Graf --- arch/powerpc/kvm/book3s_pr.c | 2 -- arch/powerpc/kvm/booke.c | 2 -- arch/powerpc/kvm/powerpc.c | 3 +++ 3 files changed, 3 insertions(+), 4 deletions(-) (limited to 'arch/powerpc') diff --git a/arch/powerpc/kvm/book3s_pr.c b/arch/powerpc/kvm/book3s_pr.c index e737db8a5ca7..1ff0d6ccc589 100644 --- a/arch/powerpc/kvm/book3s_pr.c +++ b/arch/powerpc/kvm/book3s_pr.c @@ -867,8 +867,6 @@ program_interrupt: run->exit_reason = KVM_EXIT_INTR; r = -EINTR; } else { - /* Going back to guest */ - kvm_guest_enter(); kvmppc_lazy_ee_enable(); } } diff --git a/arch/powerpc/kvm/booke.c b/arch/powerpc/kvm/booke.c index 2bd190c488ef..5e8dc1909130 100644 --- a/arch/powerpc/kvm/booke.c +++ b/arch/powerpc/kvm/booke.c @@ -954,8 +954,6 @@ int kvmppc_handle_exit(struct kvm_run *run, struct kvm_vcpu *vcpu, r = (-EINTR << 2) | RESUME_HOST | (r & RESUME_FLAG_NV); kvmppc_account_exit(vcpu, SIGNAL_EXITS); } else { - /* Going back to guest */ - kvm_guest_enter(); kvmppc_lazy_ee_enable(); } } diff --git a/arch/powerpc/kvm/powerpc.c b/arch/powerpc/kvm/powerpc.c index 266549979e9f..6646574bf930 100644 --- a/arch/powerpc/kvm/powerpc.c +++ b/arch/powerpc/kvm/powerpc.c @@ -101,12 +101,15 @@ int kvmppc_prepare_to_enter(struct kvm_vcpu *vcpu) /* Got an interrupt in between, try again */ local_irq_enable(); local_irq_disable(); + kvm_guest_exit(); continue; } trace_hardirqs_on(); #endif + kvm_guest_enter(); + /* Going into guest context! Yay! */ vcpu->mode = IN_GUEST_MODE; smp_wmb(); -- cgit v1.2.3-59-g8ed1b From 206c2ed7f1ea55222bde2954ee3d65c2e9cfb750 Mon Sep 17 00:00:00 2001 From: Alexander Graf Date: Mon, 13 Aug 2012 12:43:33 +0200 Subject: KVM: PPC: Ignore EXITING_GUEST_MODE mode We don't need to do anything when mode is EXITING_GUEST_MODE, because we essentially are outside of guest mode and did everything it asked us to do by the time we check it. Signed-off-by: Alexander Graf --- arch/powerpc/kvm/powerpc.c | 5 ----- 1 file changed, 5 deletions(-) (limited to 'arch/powerpc') diff --git a/arch/powerpc/kvm/powerpc.c b/arch/powerpc/kvm/powerpc.c index 6646574bf930..dc86371b9953 100644 --- a/arch/powerpc/kvm/powerpc.c +++ b/arch/powerpc/kvm/powerpc.c @@ -89,11 +89,6 @@ int kvmppc_prepare_to_enter(struct kvm_vcpu *vcpu) continue; } - if (vcpu->mode == EXITING_GUEST_MODE) { - r = 1; - break; - } - #ifdef CONFIG_PPC64 /* lazy EE magic */ hard_irq_disable(); -- cgit v1.2.3-59-g8ed1b From 7ee788556bf395a8ef413bea33494df29a3409e0 Mon Sep 17 00:00:00 2001 From: Alexander Graf Date: Mon, 13 Aug 2012 12:44:41 +0200 Subject: KVM: PPC: Add return value in prepare_to_enter Our prepare_to_enter helper wants to be able to return in more circumstances to the host than only when an interrupt is pending. Broaden the interface a bit and move even more generic code to the generic helper. Signed-off-by: Alexander Graf --- arch/powerpc/kvm/book3s_pr.c | 12 ++++++------ arch/powerpc/kvm/booke.c | 16 ++++++++-------- arch/powerpc/kvm/powerpc.c | 11 ++++++++--- 3 files changed, 22 insertions(+), 17 deletions(-) (limited to 'arch/powerpc') diff --git a/arch/powerpc/kvm/book3s_pr.c b/arch/powerpc/kvm/book3s_pr.c index 1ff0d6ccc589..71fa0f1873b3 100644 --- a/arch/powerpc/kvm/book3s_pr.c +++ b/arch/powerpc/kvm/book3s_pr.c @@ -589,6 +589,7 @@ int kvmppc_handle_exit(struct kvm_run *run, struct kvm_vcpu *vcpu, unsigned int exit_nr) { int r = RESUME_HOST; + int s; vcpu->stat.sum_exits++; @@ -862,10 +863,10 @@ program_interrupt: * again due to a host external interrupt. */ local_irq_disable(); - if (kvmppc_prepare_to_enter(vcpu)) { + s = kvmppc_prepare_to_enter(vcpu); + if (s <= 0) { local_irq_enable(); - run->exit_reason = KVM_EXIT_INTR; - r = -EINTR; + r = s; } else { kvmppc_lazy_ee_enable(); } @@ -1074,10 +1075,9 @@ int kvmppc_vcpu_run(struct kvm_run *kvm_run, struct kvm_vcpu *vcpu) * a host external interrupt. */ local_irq_disable(); - if (kvmppc_prepare_to_enter(vcpu)) { + ret = kvmppc_prepare_to_enter(vcpu); + if (ret <= 0) { local_irq_enable(); - kvm_run->exit_reason = KVM_EXIT_INTR; - ret = -EINTR; goto out; } diff --git a/arch/powerpc/kvm/booke.c b/arch/powerpc/kvm/booke.c index 5e8dc1909130..1917802463f5 100644 --- a/arch/powerpc/kvm/booke.c +++ b/arch/powerpc/kvm/booke.c @@ -467,7 +467,7 @@ void kvmppc_core_check_requests(struct kvm_vcpu *vcpu) int kvmppc_vcpu_run(struct kvm_run *kvm_run, struct kvm_vcpu *vcpu) { - int ret; + int ret, s; #ifdef CONFIG_PPC_FPU unsigned int fpscr; int fpexc_mode; @@ -480,10 +480,10 @@ int kvmppc_vcpu_run(struct kvm_run *kvm_run, struct kvm_vcpu *vcpu) } local_irq_disable(); - if (kvmppc_prepare_to_enter(vcpu)) { + s = kvmppc_prepare_to_enter(vcpu); + if (s <= 0) { local_irq_enable(); - kvm_run->exit_reason = KVM_EXIT_INTR; - ret = -EINTR; + ret = s; goto out; } kvmppc_lazy_ee_enable(); @@ -642,6 +642,7 @@ int kvmppc_handle_exit(struct kvm_run *run, struct kvm_vcpu *vcpu, unsigned int exit_nr) { int r = RESUME_HOST; + int s; /* update before a new last_exit_type is rewritten */ kvmppc_update_timing_stats(vcpu); @@ -948,11 +949,10 @@ int kvmppc_handle_exit(struct kvm_run *run, struct kvm_vcpu *vcpu, */ if (!(r & RESUME_HOST)) { local_irq_disable(); - if (kvmppc_prepare_to_enter(vcpu)) { + s = kvmppc_prepare_to_enter(vcpu); + if (s <= 0) { local_irq_enable(); - run->exit_reason = KVM_EXIT_INTR; - r = (-EINTR << 2) | RESUME_HOST | (r & RESUME_FLAG_NV); - kvmppc_account_exit(vcpu, SIGNAL_EXITS); + r = (s << 2) | RESUME_HOST | (r & RESUME_FLAG_NV); } else { kvmppc_lazy_ee_enable(); } diff --git a/arch/powerpc/kvm/powerpc.c b/arch/powerpc/kvm/powerpc.c index dc86371b9953..0e2a98ab6a77 100644 --- a/arch/powerpc/kvm/powerpc.c +++ b/arch/powerpc/kvm/powerpc.c @@ -53,11 +53,14 @@ int kvm_arch_vcpu_should_kick(struct kvm_vcpu *vcpu) * Common checks before entering the guest world. Call with interrupts * disabled. * - * returns !0 if a signal is pending and check_signal is true + * returns: + * + * == 1 if we're ready to go into guest state + * <= 0 if we need to go back to the host with return value */ int kvmppc_prepare_to_enter(struct kvm_vcpu *vcpu) { - int r = 0; + int r = 1; WARN_ON_ONCE(!irqs_disabled()); while (true) { @@ -69,7 +72,9 @@ int kvmppc_prepare_to_enter(struct kvm_vcpu *vcpu) } if (signal_pending(current)) { - r = 1; + kvmppc_account_exit(vcpu, SIGNAL_EXITS); + vcpu->run->exit_reason = KVM_EXIT_INTR; + r = -EINTR; break; } -- cgit v1.2.3-59-g8ed1b From 7c973a2ebb8fb9c8ee2ae9647f9ad7b0ad58a3e6 Mon Sep 17 00:00:00 2001 From: Alexander Graf Date: Mon, 13 Aug 2012 12:50:35 +0200 Subject: KVM: PPC: Add return value to core_check_requests Requests may want to tell us that we need to go back into host state, so add a return value for the checks. Signed-off-by: Alexander Graf --- arch/powerpc/include/asm/kvm_ppc.h | 2 +- arch/powerpc/kvm/book3s_pr.c | 6 +++++- arch/powerpc/kvm/booke.c | 6 +++++- arch/powerpc/kvm/powerpc.c | 6 ++++-- 4 files changed, 15 insertions(+), 5 deletions(-) (limited to 'arch/powerpc') diff --git a/arch/powerpc/include/asm/kvm_ppc.h b/arch/powerpc/include/asm/kvm_ppc.h index 545936428bf6..3dfc437fb9d9 100644 --- a/arch/powerpc/include/asm/kvm_ppc.h +++ b/arch/powerpc/include/asm/kvm_ppc.h @@ -112,7 +112,7 @@ extern int kvmppc_core_emulate_mtspr(struct kvm_vcpu *vcpu, int sprn, ulong val); extern int kvmppc_core_emulate_mfspr(struct kvm_vcpu *vcpu, int sprn, ulong *val); -extern void kvmppc_core_check_requests(struct kvm_vcpu *vcpu); +extern int kvmppc_core_check_requests(struct kvm_vcpu *vcpu); extern int kvmppc_booke_init(void); extern void kvmppc_booke_exit(void); diff --git a/arch/powerpc/kvm/book3s_pr.c b/arch/powerpc/kvm/book3s_pr.c index 71fa0f1873b3..b3c584f94cb3 100644 --- a/arch/powerpc/kvm/book3s_pr.c +++ b/arch/powerpc/kvm/book3s_pr.c @@ -86,12 +86,16 @@ void kvmppc_core_vcpu_put(struct kvm_vcpu *vcpu) kvmppc_giveup_ext(vcpu, MSR_VSX); } -void kvmppc_core_check_requests(struct kvm_vcpu *vcpu) +int kvmppc_core_check_requests(struct kvm_vcpu *vcpu) { + int r = 1; /* Indicate we want to get back into the guest */ + /* We misuse TLB_FLUSH to indicate that we want to clear all shadow cache entries */ if (kvm_check_request(KVM_REQ_TLB_FLUSH, vcpu)) kvmppc_mmu_pte_flush(vcpu, 0, 0); + + return r; } /************* MMU Notifiers *************/ diff --git a/arch/powerpc/kvm/booke.c b/arch/powerpc/kvm/booke.c index 1917802463f5..c36493087dbf 100644 --- a/arch/powerpc/kvm/booke.c +++ b/arch/powerpc/kvm/booke.c @@ -455,14 +455,18 @@ int kvmppc_core_prepare_to_enter(struct kvm_vcpu *vcpu) return r; } -void kvmppc_core_check_requests(struct kvm_vcpu *vcpu) +int kvmppc_core_check_requests(struct kvm_vcpu *vcpu) { + int r = 1; /* Indicate we want to get back into the guest */ + if (kvm_check_request(KVM_REQ_PENDING_TIMER, vcpu)) update_timer_ints(vcpu); #if defined(CONFIG_KVM_E500V2) || defined(CONFIG_KVM_E500MC) if (kvm_check_request(KVM_REQ_TLB_FLUSH, vcpu)) kvmppc_core_flush_tlb(vcpu); #endif + + return r; } int kvmppc_vcpu_run(struct kvm_run *kvm_run, struct kvm_vcpu *vcpu) diff --git a/arch/powerpc/kvm/powerpc.c b/arch/powerpc/kvm/powerpc.c index 0e2a98ab6a77..54b12af577d0 100644 --- a/arch/powerpc/kvm/powerpc.c +++ b/arch/powerpc/kvm/powerpc.c @@ -83,9 +83,11 @@ int kvmppc_prepare_to_enter(struct kvm_vcpu *vcpu) /* Make sure we process requests preemptable */ local_irq_enable(); trace_kvm_check_requests(vcpu); - kvmppc_core_check_requests(vcpu); + r = kvmppc_core_check_requests(vcpu); local_irq_disable(); - continue; + if (r > 0) + continue; + break; } if (kvmppc_core_prepare_to_enter(vcpu)) { -- cgit v1.2.3-59-g8ed1b From f61c94bb99ca4253ac5dd57750e1af209a4beb7a Mon Sep 17 00:00:00 2001 From: Bharat Bhushan Date: Wed, 8 Aug 2012 20:38:19 +0000 Subject: KVM: PPC: booke: Add watchdog emulation This patch adds the watchdog emulation in KVM. The watchdog emulation is enabled by KVM_ENABLE_CAP(KVM_CAP_PPC_BOOKE_WATCHDOG) ioctl. The kernel timer are used for watchdog emulation and emulates h/w watchdog state machine. On watchdog timer expiry, it exit to QEMU if TCR.WRC is non ZERO. QEMU can reset/shutdown etc depending upon how it is configured. Signed-off-by: Liu Yu Signed-off-by: Scott Wood [bharat.bhushan@freescale.com: reworked patch] Signed-off-by: Bharat Bhushan [agraf: adjust to new request framework] Signed-off-by: Alexander Graf --- arch/powerpc/include/asm/kvm_host.h | 3 + arch/powerpc/include/asm/kvm_ppc.h | 2 + arch/powerpc/include/asm/reg_booke.h | 7 ++ arch/powerpc/kvm/book3s.c | 9 ++ arch/powerpc/kvm/booke.c | 155 +++++++++++++++++++++++++++++++++++ arch/powerpc/kvm/booke_emulate.c | 8 ++ arch/powerpc/kvm/powerpc.c | 14 +++- include/linux/kvm.h | 2 + include/linux/kvm_host.h | 1 + 9 files changed, 199 insertions(+), 2 deletions(-) (limited to 'arch/powerpc') diff --git a/arch/powerpc/include/asm/kvm_host.h b/arch/powerpc/include/asm/kvm_host.h index 4a5ec8f573c7..51b0ccd56769 100644 --- a/arch/powerpc/include/asm/kvm_host.h +++ b/arch/powerpc/include/asm/kvm_host.h @@ -471,6 +471,8 @@ struct kvm_vcpu_arch { ulong fault_esr; ulong queued_dear; ulong queued_esr; + spinlock_t wdt_lock; + struct timer_list wdt_timer; u32 tlbcfg[4]; u32 mmucfg; u32 epr; @@ -486,6 +488,7 @@ struct kvm_vcpu_arch { u8 osi_needed; u8 osi_enabled; u8 papr_enabled; + u8 watchdog_enabled; u8 sane; u8 cpu_type; u8 hcall_needed; diff --git a/arch/powerpc/include/asm/kvm_ppc.h b/arch/powerpc/include/asm/kvm_ppc.h index 3dfc437fb9d9..c06a64b53362 100644 --- a/arch/powerpc/include/asm/kvm_ppc.h +++ b/arch/powerpc/include/asm/kvm_ppc.h @@ -68,6 +68,8 @@ extern void kvmppc_emulate_dec(struct kvm_vcpu *vcpu); extern u32 kvmppc_get_dec(struct kvm_vcpu *vcpu, u64 tb); extern void kvmppc_decrementer_func(unsigned long data); extern int kvmppc_sanity_check(struct kvm_vcpu *vcpu); +extern int kvmppc_subarch_vcpu_init(struct kvm_vcpu *vcpu); +extern void kvmppc_subarch_vcpu_uninit(struct kvm_vcpu *vcpu); /* Core-specific hooks */ diff --git a/arch/powerpc/include/asm/reg_booke.h b/arch/powerpc/include/asm/reg_booke.h index 2d916c4982c5..e07e6af5e1ff 100644 --- a/arch/powerpc/include/asm/reg_booke.h +++ b/arch/powerpc/include/asm/reg_booke.h @@ -539,6 +539,13 @@ #define TCR_FIE 0x00800000 /* FIT Interrupt Enable */ #define TCR_ARE 0x00400000 /* Auto Reload Enable */ +#ifdef CONFIG_E500 +#define TCR_GET_WP(tcr) ((((tcr) & 0xC0000000) >> 30) | \ + (((tcr) & 0x1E0000) >> 15)) +#else +#define TCR_GET_WP(tcr) (((tcr) & 0xC0000000) >> 30) +#endif + /* Bit definitions for the TSR. */ #define TSR_ENW 0x80000000 /* Enable Next Watchdog */ #define TSR_WIS 0x40000000 /* WDT Interrupt Status */ diff --git a/arch/powerpc/kvm/book3s.c b/arch/powerpc/kvm/book3s.c index 3f2a8360c857..e94666566fa9 100644 --- a/arch/powerpc/kvm/book3s.c +++ b/arch/powerpc/kvm/book3s.c @@ -411,6 +411,15 @@ int kvm_arch_vcpu_setup(struct kvm_vcpu *vcpu) return 0; } +int kvmppc_subarch_vcpu_init(struct kvm_vcpu *vcpu) +{ + return 0; +} + +void kvmppc_subarch_vcpu_uninit(struct kvm_vcpu *vcpu) +{ +} + int kvm_arch_vcpu_ioctl_get_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs) { int i; diff --git a/arch/powerpc/kvm/booke.c b/arch/powerpc/kvm/booke.c index c36493087dbf..09e8bf33a8c9 100644 --- a/arch/powerpc/kvm/booke.c +++ b/arch/powerpc/kvm/booke.c @@ -209,6 +209,16 @@ void kvmppc_core_dequeue_external(struct kvm_vcpu *vcpu, clear_bit(BOOKE_IRQPRIO_EXTERNAL_LEVEL, &vcpu->arch.pending_exceptions); } +static void kvmppc_core_queue_watchdog(struct kvm_vcpu *vcpu) +{ + kvmppc_booke_queue_irqprio(vcpu, BOOKE_IRQPRIO_WATCHDOG); +} + +static void kvmppc_core_dequeue_watchdog(struct kvm_vcpu *vcpu) +{ + clear_bit(BOOKE_IRQPRIO_WATCHDOG, &vcpu->arch.pending_exceptions); +} + static void set_guest_srr(struct kvm_vcpu *vcpu, unsigned long srr0, u32 srr1) { #ifdef CONFIG_KVM_BOOKE_HV @@ -328,6 +338,7 @@ static int kvmppc_booke_irqprio_deliver(struct kvm_vcpu *vcpu, msr_mask = MSR_CE | MSR_ME | MSR_DE; int_class = INT_CLASS_NONCRIT; break; + case BOOKE_IRQPRIO_WATCHDOG: case BOOKE_IRQPRIO_CRITICAL: case BOOKE_IRQPRIO_DBELL_CRIT: allowed = vcpu->arch.shared->msr & MSR_CE; @@ -407,12 +418,121 @@ static int kvmppc_booke_irqprio_deliver(struct kvm_vcpu *vcpu, return allowed; } +/* + * Return the number of jiffies until the next timeout. If the timeout is + * longer than the NEXT_TIMER_MAX_DELTA, then return NEXT_TIMER_MAX_DELTA + * because the larger value can break the timer APIs. + */ +static unsigned long watchdog_next_timeout(struct kvm_vcpu *vcpu) +{ + u64 tb, wdt_tb, wdt_ticks = 0; + u64 nr_jiffies = 0; + u32 period = TCR_GET_WP(vcpu->arch.tcr); + + wdt_tb = 1ULL << (63 - period); + tb = get_tb(); + /* + * The watchdog timeout will hapeen when TB bit corresponding + * to watchdog will toggle from 0 to 1. + */ + if (tb & wdt_tb) + wdt_ticks = wdt_tb; + + wdt_ticks += wdt_tb - (tb & (wdt_tb - 1)); + + /* Convert timebase ticks to jiffies */ + nr_jiffies = wdt_ticks; + + if (do_div(nr_jiffies, tb_ticks_per_jiffy)) + nr_jiffies++; + + return min_t(unsigned long long, nr_jiffies, NEXT_TIMER_MAX_DELTA); +} + +static void arm_next_watchdog(struct kvm_vcpu *vcpu) +{ + unsigned long nr_jiffies; + unsigned long flags; + + /* + * If TSR_ENW and TSR_WIS are not set then no need to exit to + * userspace, so clear the KVM_REQ_WATCHDOG request. + */ + if ((vcpu->arch.tsr & (TSR_ENW | TSR_WIS)) != (TSR_ENW | TSR_WIS)) + clear_bit(KVM_REQ_WATCHDOG, &vcpu->requests); + + spin_lock_irqsave(&vcpu->arch.wdt_lock, flags); + nr_jiffies = watchdog_next_timeout(vcpu); + /* + * If the number of jiffies of watchdog timer >= NEXT_TIMER_MAX_DELTA + * then do not run the watchdog timer as this can break timer APIs. + */ + if (nr_jiffies < NEXT_TIMER_MAX_DELTA) + mod_timer(&vcpu->arch.wdt_timer, jiffies + nr_jiffies); + else + del_timer(&vcpu->arch.wdt_timer); + spin_unlock_irqrestore(&vcpu->arch.wdt_lock, flags); +} + +void kvmppc_watchdog_func(unsigned long data) +{ + struct kvm_vcpu *vcpu = (struct kvm_vcpu *)data; + u32 tsr, new_tsr; + int final; + + do { + new_tsr = tsr = vcpu->arch.tsr; + final = 0; + + /* Time out event */ + if (tsr & TSR_ENW) { + if (tsr & TSR_WIS) + final = 1; + else + new_tsr = tsr | TSR_WIS; + } else { + new_tsr = tsr | TSR_ENW; + } + } while (cmpxchg(&vcpu->arch.tsr, tsr, new_tsr) != tsr); + + if (new_tsr & TSR_WIS) { + smp_wmb(); + kvm_make_request(KVM_REQ_PENDING_TIMER, vcpu); + kvm_vcpu_kick(vcpu); + } + + /* + * If this is final watchdog expiry and some action is required + * then exit to userspace. + */ + if (final && (vcpu->arch.tcr & TCR_WRC_MASK) && + vcpu->arch.watchdog_enabled) { + smp_wmb(); + kvm_make_request(KVM_REQ_WATCHDOG, vcpu); + kvm_vcpu_kick(vcpu); + } + + /* + * Stop running the watchdog timer after final expiration to + * prevent the host from being flooded with timers if the + * guest sets a short period. + * Timers will resume when TSR/TCR is updated next time. + */ + if (!final) + arm_next_watchdog(vcpu); +} + static void update_timer_ints(struct kvm_vcpu *vcpu) { if ((vcpu->arch.tcr & TCR_DIE) && (vcpu->arch.tsr & TSR_DIS)) kvmppc_core_queue_dec(vcpu); else kvmppc_core_dequeue_dec(vcpu); + + if ((vcpu->arch.tcr & TCR_WIE) && (vcpu->arch.tsr & TSR_WIS)) + kvmppc_core_queue_watchdog(vcpu); + else + kvmppc_core_dequeue_watchdog(vcpu); } static void kvmppc_core_check_exceptions(struct kvm_vcpu *vcpu) @@ -466,6 +586,11 @@ int kvmppc_core_check_requests(struct kvm_vcpu *vcpu) kvmppc_core_flush_tlb(vcpu); #endif + if (kvm_check_request(KVM_REQ_WATCHDOG, vcpu)) { + vcpu->run->exit_reason = KVM_EXIT_WATCHDOG; + r = 0; + } + return r; } @@ -995,6 +1120,21 @@ int kvm_arch_vcpu_setup(struct kvm_vcpu *vcpu) return r; } +int kvmppc_subarch_vcpu_init(struct kvm_vcpu *vcpu) +{ + /* setup watchdog timer once */ + spin_lock_init(&vcpu->arch.wdt_lock); + setup_timer(&vcpu->arch.wdt_timer, kvmppc_watchdog_func, + (unsigned long)vcpu); + + return 0; +} + +void kvmppc_subarch_vcpu_uninit(struct kvm_vcpu *vcpu) +{ + del_timer_sync(&vcpu->arch.wdt_timer); +} + int kvm_arch_vcpu_ioctl_get_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs) { int i; @@ -1090,7 +1230,13 @@ static int set_sregs_base(struct kvm_vcpu *vcpu, } if (sregs->u.e.update_special & KVM_SREGS_E_UPDATE_TSR) { + u32 old_tsr = vcpu->arch.tsr; + vcpu->arch.tsr = sregs->u.e.tsr; + + if ((old_tsr ^ vcpu->arch.tsr) & (TSR_ENW | TSR_WIS)) + arm_next_watchdog(vcpu); + update_timer_ints(vcpu); } @@ -1251,6 +1397,7 @@ void kvmppc_core_commit_memory_region(struct kvm *kvm, void kvmppc_set_tcr(struct kvm_vcpu *vcpu, u32 new_tcr) { vcpu->arch.tcr = new_tcr; + arm_next_watchdog(vcpu); update_timer_ints(vcpu); } @@ -1265,6 +1412,14 @@ void kvmppc_set_tsr_bits(struct kvm_vcpu *vcpu, u32 tsr_bits) void kvmppc_clr_tsr_bits(struct kvm_vcpu *vcpu, u32 tsr_bits) { clear_bits(tsr_bits, &vcpu->arch.tsr); + + /* + * We may have stopped the watchdog due to + * being stuck on final expiration. + */ + if (tsr_bits & (TSR_ENW | TSR_WIS)) + arm_next_watchdog(vcpu); + update_timer_ints(vcpu); } diff --git a/arch/powerpc/kvm/booke_emulate.c b/arch/powerpc/kvm/booke_emulate.c index 12834bb608ab..5a66ade7fd17 100644 --- a/arch/powerpc/kvm/booke_emulate.c +++ b/arch/powerpc/kvm/booke_emulate.c @@ -145,6 +145,14 @@ int kvmppc_booke_emulate_mtspr(struct kvm_vcpu *vcpu, int sprn, ulong spr_val) kvmppc_clr_tsr_bits(vcpu, spr_val); break; case SPRN_TCR: + /* + * WRC is a 2-bit field that is supposed to preserve its + * value once written to non-zero. + */ + if (vcpu->arch.tcr & TCR_WRC_MASK) { + spr_val &= ~TCR_WRC_MASK; + spr_val |= vcpu->arch.tcr & TCR_WRC_MASK; + } kvmppc_set_tcr(vcpu, spr_val); break; diff --git a/arch/powerpc/kvm/powerpc.c b/arch/powerpc/kvm/powerpc.c index 54b12af577d0..0ffd7d17adc7 100644 --- a/arch/powerpc/kvm/powerpc.c +++ b/arch/powerpc/kvm/powerpc.c @@ -300,6 +300,7 @@ int kvm_dev_ioctl_check_extension(long ext) switch (ext) { #ifdef CONFIG_BOOKE case KVM_CAP_PPC_BOOKE_SREGS: + case KVM_CAP_PPC_BOOKE_WATCHDOG: #else case KVM_CAP_PPC_SEGSTATE: case KVM_CAP_PPC_HIOR: @@ -476,6 +477,8 @@ enum hrtimer_restart kvmppc_decrementer_wakeup(struct hrtimer *timer) int kvm_arch_vcpu_init(struct kvm_vcpu *vcpu) { + int ret; + hrtimer_init(&vcpu->arch.dec_timer, CLOCK_REALTIME, HRTIMER_MODE_ABS); tasklet_init(&vcpu->arch.tasklet, kvmppc_decrementer_func, (ulong)vcpu); vcpu->arch.dec_timer.function = kvmppc_decrementer_wakeup; @@ -484,13 +487,14 @@ int kvm_arch_vcpu_init(struct kvm_vcpu *vcpu) #ifdef CONFIG_KVM_EXIT_TIMING mutex_init(&vcpu->arch.exit_timing_lock); #endif - - return 0; + ret = kvmppc_subarch_vcpu_init(vcpu); + return ret; } void kvm_arch_vcpu_uninit(struct kvm_vcpu *vcpu) { kvmppc_mmu_destroy(vcpu); + kvmppc_subarch_vcpu_uninit(vcpu); } void kvm_arch_vcpu_load(struct kvm_vcpu *vcpu, int cpu) @@ -735,6 +739,12 @@ static int kvm_vcpu_ioctl_enable_cap(struct kvm_vcpu *vcpu, r = 0; vcpu->arch.papr_enabled = true; break; +#ifdef CONFIG_BOOKE + case KVM_CAP_PPC_BOOKE_WATCHDOG: + r = 0; + vcpu->arch.watchdog_enabled = true; + break; +#endif #if defined(CONFIG_KVM_E500V2) || defined(CONFIG_KVM_E500MC) case KVM_CAP_SW_TLB: { struct kvm_config_tlb cfg; diff --git a/include/linux/kvm.h b/include/linux/kvm.h index 4cb3761bebae..1649d4b57e1f 100644 --- a/include/linux/kvm.h +++ b/include/linux/kvm.h @@ -167,6 +167,7 @@ struct kvm_pit_config { #define KVM_EXIT_OSI 18 #define KVM_EXIT_PAPR_HCALL 19 #define KVM_EXIT_S390_UCONTROL 20 +#define KVM_EXIT_WATCHDOG 21 /* For KVM_EXIT_INTERNAL_ERROR */ #define KVM_INTERNAL_ERROR_EMULATION 1 @@ -628,6 +629,7 @@ struct kvm_ppc_smmu_info { #define KVM_CAP_READONLY_MEM 81 #endif #define KVM_CAP_IRQFD_RESAMPLE 82 +#define KVM_CAP_PPC_BOOKE_WATCHDOG 83 #ifdef KVM_CAP_IRQ_ROUTING diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h index 2850656e2e96..0ca3663206f8 100644 --- a/include/linux/kvm_host.h +++ b/include/linux/kvm_host.h @@ -118,6 +118,7 @@ static inline bool is_error_page(struct page *page) #define KVM_REQ_IMMEDIATE_EXIT 15 #define KVM_REQ_PMU 16 #define KVM_REQ_PMI 17 +#define KVM_REQ_WATCHDOG 18 #define KVM_USERSPACE_IRQ_SOURCE_ID 0 #define KVM_IRQFD_RESAMPLE_IRQ_SOURCE_ID 1 -- cgit v1.2.3-59-g8ed1b From 6df8d3fc58dde84fc82a9ec2581440e54dfd3d14 Mon Sep 17 00:00:00 2001 From: Bharat Bhushan Date: Wed, 8 Aug 2012 21:17:55 +0000 Subject: booke: Added ONE_REG interface for IAC/DAC debug registers IAC/DAC are defined as 32 bit while they are 64 bit wide. So ONE_REG interface is added to set/get them. Signed-off-by: Bharat Bhushan Signed-off-by: Alexander Graf --- arch/powerpc/include/asm/kvm.h | 12 ++++++++++ arch/powerpc/include/asm/kvm_host.h | 24 +++++++++++++++++-- arch/powerpc/kvm/booke.c | 48 +++++++++++++++++++++++++++++++++++-- arch/powerpc/kvm/booke_emulate.c | 8 +++---- 4 files changed, 84 insertions(+), 8 deletions(-) (limited to 'arch/powerpc') diff --git a/arch/powerpc/include/asm/kvm.h b/arch/powerpc/include/asm/kvm.h index 1bea4d8ea6f4..3c14202a3c84 100644 --- a/arch/powerpc/include/asm/kvm.h +++ b/arch/powerpc/include/asm/kvm.h @@ -221,6 +221,12 @@ struct kvm_sregs { __u32 dbsr; /* KVM_SREGS_E_UPDATE_DBSR */ __u32 dbcr[3]; + /* + * iac/dac registers are 64bit wide, while this API + * interface provides only lower 32 bits on 64 bit + * processors. ONE_REG interface is added for 64bit + * iac/dac registers. + */ __u32 iac[4]; __u32 dac[2]; __u32 dvc[2]; @@ -326,5 +332,11 @@ struct kvm_book3e_206_tlb_params { }; #define KVM_REG_PPC_HIOR (KVM_REG_PPC | KVM_REG_SIZE_U64 | 0x1) +#define KVM_REG_PPC_IAC1 (KVM_REG_PPC | KVM_REG_SIZE_U64 | 0x2) +#define KVM_REG_PPC_IAC2 (KVM_REG_PPC | KVM_REG_SIZE_U64 | 0x3) +#define KVM_REG_PPC_IAC3 (KVM_REG_PPC | KVM_REG_SIZE_U64 | 0x4) +#define KVM_REG_PPC_IAC4 (KVM_REG_PPC | KVM_REG_SIZE_U64 | 0x5) +#define KVM_REG_PPC_DAC1 (KVM_REG_PPC | KVM_REG_SIZE_U64 | 0x6) +#define KVM_REG_PPC_DAC2 (KVM_REG_PPC | KVM_REG_SIZE_U64 | 0x7) #endif /* __LINUX_KVM_POWERPC_H */ diff --git a/arch/powerpc/include/asm/kvm_host.h b/arch/powerpc/include/asm/kvm_host.h index 51b0ccd56769..f20a5ef1c7e8 100644 --- a/arch/powerpc/include/asm/kvm_host.h +++ b/arch/powerpc/include/asm/kvm_host.h @@ -346,6 +346,27 @@ struct kvmppc_slb { bool class : 1; }; +# ifdef CONFIG_PPC_FSL_BOOK3E +#define KVMPPC_BOOKE_IAC_NUM 2 +#define KVMPPC_BOOKE_DAC_NUM 2 +# else +#define KVMPPC_BOOKE_IAC_NUM 4 +#define KVMPPC_BOOKE_DAC_NUM 2 +# endif +#define KVMPPC_BOOKE_MAX_IAC 4 +#define KVMPPC_BOOKE_MAX_DAC 2 + +struct kvmppc_booke_debug_reg { + u32 dbcr0; + u32 dbcr1; + u32 dbcr2; +#ifdef CONFIG_KVM_E500MC + u32 dbcr4; +#endif + u64 iac[KVMPPC_BOOKE_MAX_IAC]; + u64 dac[KVMPPC_BOOKE_MAX_DAC]; +}; + struct kvm_vcpu_arch { ulong host_stack; u32 host_pid; @@ -440,8 +461,6 @@ struct kvm_vcpu_arch { u32 ccr0; u32 ccr1; - u32 dbcr0; - u32 dbcr1; u32 dbsr; u64 mmcr[3]; @@ -476,6 +495,7 @@ struct kvm_vcpu_arch { u32 tlbcfg[4]; u32 mmucfg; u32 epr; + struct kvmppc_booke_debug_reg dbg_reg; #endif gpa_t paddr_accessed; gva_t vaddr_accessed; diff --git a/arch/powerpc/kvm/booke.c b/arch/powerpc/kvm/booke.c index 09e8bf33a8c9..959aae96469c 100644 --- a/arch/powerpc/kvm/booke.c +++ b/arch/powerpc/kvm/booke.c @@ -1351,12 +1351,56 @@ int kvm_arch_vcpu_ioctl_set_sregs(struct kvm_vcpu *vcpu, int kvm_vcpu_ioctl_get_one_reg(struct kvm_vcpu *vcpu, struct kvm_one_reg *reg) { - return -EINVAL; + int r = -EINVAL; + + switch (reg->id) { + case KVM_REG_PPC_IAC1: + case KVM_REG_PPC_IAC2: + case KVM_REG_PPC_IAC3: + case KVM_REG_PPC_IAC4: { + int iac = reg->id - KVM_REG_PPC_IAC1; + r = copy_to_user((u64 __user *)(long)reg->addr, + &vcpu->arch.dbg_reg.iac[iac], sizeof(u64)); + break; + } + case KVM_REG_PPC_DAC1: + case KVM_REG_PPC_DAC2: { + int dac = reg->id - KVM_REG_PPC_DAC1; + r = copy_to_user((u64 __user *)(long)reg->addr, + &vcpu->arch.dbg_reg.dac[dac], sizeof(u64)); + break; + } + default: + break; + } + return r; } int kvm_vcpu_ioctl_set_one_reg(struct kvm_vcpu *vcpu, struct kvm_one_reg *reg) { - return -EINVAL; + int r = -EINVAL; + + switch (reg->id) { + case KVM_REG_PPC_IAC1: + case KVM_REG_PPC_IAC2: + case KVM_REG_PPC_IAC3: + case KVM_REG_PPC_IAC4: { + int iac = reg->id - KVM_REG_PPC_IAC1; + r = copy_from_user(&vcpu->arch.dbg_reg.iac[iac], + (u64 __user *)(long)reg->addr, sizeof(u64)); + break; + } + case KVM_REG_PPC_DAC1: + case KVM_REG_PPC_DAC2: { + int dac = reg->id - KVM_REG_PPC_DAC1; + r = copy_from_user(&vcpu->arch.dbg_reg.dac[dac], + (u64 __user *)(long)reg->addr, sizeof(u64)); + break; + } + default: + break; + } + return r; } int kvm_arch_vcpu_ioctl_get_fpu(struct kvm_vcpu *vcpu, struct kvm_fpu *fpu) diff --git a/arch/powerpc/kvm/booke_emulate.c b/arch/powerpc/kvm/booke_emulate.c index 5a66ade7fd17..cc99a0b3d202 100644 --- a/arch/powerpc/kvm/booke_emulate.c +++ b/arch/powerpc/kvm/booke_emulate.c @@ -133,10 +133,10 @@ int kvmppc_booke_emulate_mtspr(struct kvm_vcpu *vcpu, int sprn, ulong spr_val) vcpu->arch.csrr1 = spr_val; break; case SPRN_DBCR0: - vcpu->arch.dbcr0 = spr_val; + vcpu->arch.dbg_reg.dbcr0 = spr_val; break; case SPRN_DBCR1: - vcpu->arch.dbcr1 = spr_val; + vcpu->arch.dbg_reg.dbcr1 = spr_val; break; case SPRN_DBSR: vcpu->arch.dbsr &= ~spr_val; @@ -266,10 +266,10 @@ int kvmppc_booke_emulate_mfspr(struct kvm_vcpu *vcpu, int sprn, ulong *spr_val) *spr_val = vcpu->arch.csrr1; break; case SPRN_DBCR0: - *spr_val = vcpu->arch.dbcr0; + *spr_val = vcpu->arch.dbg_reg.dbcr0; break; case SPRN_DBCR1: - *spr_val = vcpu->arch.dbcr1; + *spr_val = vcpu->arch.dbg_reg.dbcr1; break; case SPRN_DBSR: *spr_val = vcpu->arch.dbsr; -- cgit v1.2.3-59-g8ed1b From 491dd5b8a4926393308172da80c73faf242a4057 Mon Sep 17 00:00:00 2001 From: Alexander Graf Date: Mon, 13 Aug 2012 14:40:29 +0200 Subject: KVM: PPC: 44x: Initialize PVR We need to make sure that vcpu->arch.pvr is initialized to a sane value, so let's just take the host PVR. Signed-off-by: Alexander Graf --- arch/powerpc/kvm/44x.c | 1 + 1 file changed, 1 insertion(+) (limited to 'arch/powerpc') diff --git a/arch/powerpc/kvm/44x.c b/arch/powerpc/kvm/44x.c index 50e7dbc7356c..3d7fd21c65f9 100644 --- a/arch/powerpc/kvm/44x.c +++ b/arch/powerpc/kvm/44x.c @@ -83,6 +83,7 @@ int kvmppc_core_vcpu_setup(struct kvm_vcpu *vcpu) vcpu_44x->shadow_refs[i].gtlb_index = -1; vcpu->arch.cpu_type = KVM_CPU_440; + vcpu->arch.pvr = mfspr(SPRN_PVR); return 0; } -- cgit v1.2.3-59-g8ed1b From 50c871edf59b4585fd2c17acfe4e7cd3752418b7 Mon Sep 17 00:00:00 2001 From: Alexander Graf Date: Mon, 13 Aug 2012 14:50:54 +0200 Subject: KVM: PPC: BookE: Add MCSR SPR support Add support for the MCSR SPR. This only implements the SPR storage bits, not actual machine checks. Signed-off-by: Alexander Graf --- arch/powerpc/kvm/booke_emulate.c | 6 ++++++ 1 file changed, 6 insertions(+) (limited to 'arch/powerpc') diff --git a/arch/powerpc/kvm/booke_emulate.c b/arch/powerpc/kvm/booke_emulate.c index cc99a0b3d202..514790f41aba 100644 --- a/arch/powerpc/kvm/booke_emulate.c +++ b/arch/powerpc/kvm/booke_emulate.c @@ -237,6 +237,9 @@ int kvmppc_booke_emulate_mtspr(struct kvm_vcpu *vcpu, int sprn, ulong spr_val) case SPRN_IVOR15: vcpu->arch.ivor[BOOKE_IRQPRIO_DEBUG] = spr_val; break; + case SPRN_MCSR: + vcpu->arch.mcsr &= ~spr_val; + break; default: emulated = EMULATE_FAIL; @@ -329,6 +332,9 @@ int kvmppc_booke_emulate_mfspr(struct kvm_vcpu *vcpu, int sprn, ulong *spr_val) case SPRN_IVOR15: *spr_val = vcpu->arch.ivor[BOOKE_IRQPRIO_DEBUG]; break; + case SPRN_MCSR: + *spr_val = vcpu->arch.mcsr; + break; default: emulated = EMULATE_FAIL; -- cgit v1.2.3-59-g8ed1b From 166a2b7000c388aee81168987ce2eddb6783f550 Mon Sep 17 00:00:00 2001 From: Alexander Graf Date: Wed, 15 Aug 2012 01:38:43 +0200 Subject: KVM: PPC: Use symbols for exit trace Exit traces are a lot easier to read when you don't have to remember cryptic numbers for guest exit reasons. Symbolify them in our trace output. Signed-off-by: Alexander Graf --- arch/powerpc/kvm/trace.h | 58 ++++++++++++++++++++++++++++++++++++++++++++++-- 1 file changed, 56 insertions(+), 2 deletions(-) (limited to 'arch/powerpc') diff --git a/arch/powerpc/kvm/trace.h b/arch/powerpc/kvm/trace.h index cb2780a42fd8..519aba8bb3d3 100644 --- a/arch/powerpc/kvm/trace.h +++ b/arch/powerpc/kvm/trace.h @@ -31,6 +31,60 @@ TRACE_EVENT(kvm_ppc_instr, __entry->inst, __entry->pc, __entry->emulate) ); +#ifdef CONFIG_PPC_BOOK3S +#define kvm_trace_symbol_exit \ + {0x100, "SYSTEM_RESET"}, \ + {0x200, "MACHINE_CHECK"}, \ + {0x300, "DATA_STORAGE"}, \ + {0x380, "DATA_SEGMENT"}, \ + {0x400, "INST_STORAGE"}, \ + {0x480, "INST_SEGMENT"}, \ + {0x500, "EXTERNAL"}, \ + {0x501, "EXTERNAL_LEVEL"}, \ + {0x502, "EXTERNAL_HV"}, \ + {0x600, "ALIGNMENT"}, \ + {0x700, "PROGRAM"}, \ + {0x800, "FP_UNAVAIL"}, \ + {0x900, "DECREMENTER"}, \ + {0x980, "HV_DECREMENTER"}, \ + {0xc00, "SYSCALL"}, \ + {0xd00, "TRACE"}, \ + {0xe00, "H_DATA_STORAGE"}, \ + {0xe20, "H_INST_STORAGE"}, \ + {0xe40, "H_EMUL_ASSIST"}, \ + {0xf00, "PERFMON"}, \ + {0xf20, "ALTIVEC"}, \ + {0xf40, "VSX"} +#else +#define kvm_trace_symbol_exit \ + {0, "CRITICAL"}, \ + {1, "MACHINE_CHECK"}, \ + {2, "DATA_STORAGE"}, \ + {3, "INST_STORAGE"}, \ + {4, "EXTERNAL"}, \ + {5, "ALIGNMENT"}, \ + {6, "PROGRAM"}, \ + {7, "FP_UNAVAIL"}, \ + {8, "SYSCALL"}, \ + {9, "AP_UNAVAIL"}, \ + {10, "DECREMENTER"}, \ + {11, "FIT"}, \ + {12, "WATCHDOG"}, \ + {13, "DTLB_MISS"}, \ + {14, "ITLB_MISS"}, \ + {15, "DEBUG"}, \ + {32, "SPE_UNAVAIL"}, \ + {33, "SPE_FP_DATA"}, \ + {34, "SPE_FP_ROUND"}, \ + {35, "PERFORMANCE_MONITOR"}, \ + {36, "DOORBELL"}, \ + {37, "DOORBELL_CRITICAL"}, \ + {38, "GUEST_DBELL"}, \ + {39, "GUEST_DBELL_CRIT"}, \ + {40, "HV_SYSCALL"}, \ + {41, "HV_PRIV"} +#endif + TRACE_EVENT(kvm_exit, TP_PROTO(unsigned int exit_nr, struct kvm_vcpu *vcpu), TP_ARGS(exit_nr, vcpu), @@ -62,7 +116,7 @@ TRACE_EVENT(kvm_exit, __entry->last_inst = vcpu->arch.last_inst; ), - TP_printk("exit=0x%x" + TP_printk("exit=%s" " | pc=0x%lx" " | msr=0x%lx" " | dar=0x%lx" @@ -71,7 +125,7 @@ TRACE_EVENT(kvm_exit, #endif " | last_inst=0x%lx" , - __entry->exit_nr, + __print_symbolic(__entry->exit_nr, kvm_trace_symbol_exit), __entry->pc, __entry->msr, __entry->dar, -- cgit v1.2.3-59-g8ed1b From 430c7ff52ffb902e1e08b255b93c28fcad8cb9ef Mon Sep 17 00:00:00 2001 From: Alexander Graf Date: Wed, 15 Aug 2012 11:42:07 +0200 Subject: KVM: PPC: E500: Remove E500_TLB_DIRTY flag Since we always mark pages as dirty immediately when mapping them read/write now, there's no need for the dirty flag in our cache. Signed-off-by: Alexander Graf --- arch/powerpc/kvm/e500.h | 3 +-- arch/powerpc/kvm/e500_tlb.c | 4 +--- 2 files changed, 2 insertions(+), 5 deletions(-) (limited to 'arch/powerpc') diff --git a/arch/powerpc/kvm/e500.h b/arch/powerpc/kvm/e500.h index aa8b81428bf4..d1622864549e 100644 --- a/arch/powerpc/kvm/e500.h +++ b/arch/powerpc/kvm/e500.h @@ -27,8 +27,7 @@ #define E500_TLB_NUM 2 #define E500_TLB_VALID 1 -#define E500_TLB_DIRTY 2 -#define E500_TLB_BITMAP 4 +#define E500_TLB_BITMAP 2 struct tlbe_ref { pfn_t pfn; diff --git a/arch/powerpc/kvm/e500_tlb.c b/arch/powerpc/kvm/e500_tlb.c index 1af6fab58995..43489a8fa985 100644 --- a/arch/powerpc/kvm/e500_tlb.c +++ b/arch/powerpc/kvm/e500_tlb.c @@ -303,10 +303,8 @@ static inline void kvmppc_e500_ref_setup(struct tlbe_ref *ref, ref->pfn = pfn; ref->flags = E500_TLB_VALID; - if (tlbe_is_writable(gtlbe)) { - ref->flags |= E500_TLB_DIRTY; + if (tlbe_is_writable(gtlbe)) kvm_set_pfn_dirty(pfn); - } } static inline void kvmppc_e500_ref_release(struct tlbe_ref *ref) -- cgit v1.2.3-59-g8ed1b From e4dcfe88fb30bcedda80c151018086fffb8280e6 Mon Sep 17 00:00:00 2001 From: Alexander Graf Date: Thu, 16 Aug 2012 00:28:09 +0200 Subject: KVM: PPC: 440: Implement mtdcrx We need mtdcrx to execute properly on 460 cores. Signed-off-by: Alexander Graf --- arch/powerpc/kvm/44x_emulate.c | 36 +++++++++++++++++++++++------------- 1 file changed, 23 insertions(+), 13 deletions(-) (limited to 'arch/powerpc') diff --git a/arch/powerpc/kvm/44x_emulate.c b/arch/powerpc/kvm/44x_emulate.c index c8c61578fdfc..3843a75e3e98 100644 --- a/arch/powerpc/kvm/44x_emulate.c +++ b/arch/powerpc/kvm/44x_emulate.c @@ -28,11 +28,29 @@ #include "44x_tlb.h" #define XOP_MFDCR 323 +#define XOP_MTDCRX 387 #define XOP_MTDCR 451 #define XOP_TLBSX 914 #define XOP_ICCCI 966 #define XOP_TLBWE 978 +static int emulate_mtdcr(struct kvm_vcpu *vcpu, int rs, int dcrn) +{ + /* emulate some access in kernel */ + switch (dcrn) { + case DCRN_CPR0_CONFIG_ADDR: + vcpu->arch.cpr0_cfgaddr = kvmppc_get_gpr(vcpu, rs); + return EMULATE_DONE; + default: + vcpu->run->dcr.dcrn = dcrn; + vcpu->run->dcr.data = kvmppc_get_gpr(vcpu, rs); + vcpu->run->dcr.is_write = 1; + vcpu->arch.dcr_needed = 1; + kvmppc_account_exit(vcpu, DCR_EXITS); + return EMULATE_DO_DCR; + } +} + int kvmppc_core_emulate_op(struct kvm_run *run, struct kvm_vcpu *vcpu, unsigned int inst, int *advance) { @@ -85,20 +103,12 @@ int kvmppc_core_emulate_op(struct kvm_run *run, struct kvm_vcpu *vcpu, break; case XOP_MTDCR: - /* emulate some access in kernel */ - switch (dcrn) { - case DCRN_CPR0_CONFIG_ADDR: - vcpu->arch.cpr0_cfgaddr = kvmppc_get_gpr(vcpu, rs); - break; - default: - run->dcr.dcrn = dcrn; - run->dcr.data = kvmppc_get_gpr(vcpu, rs); - run->dcr.is_write = 1; - vcpu->arch.dcr_needed = 1; - kvmppc_account_exit(vcpu, DCR_EXITS); - emulated = EMULATE_DO_DCR; - } + emulated = emulate_mtdcr(vcpu, rs, dcrn); + break; + case XOP_MTDCRX: + emulated = emulate_mtdcr(vcpu, rs, + kvmppc_get_gpr(vcpu, ra)); break; case XOP_TLBWE: -- cgit v1.2.3-59-g8ed1b From ceb985f9d18cba2efdef08b8d31751c2c2b20d77 Mon Sep 17 00:00:00 2001 From: Alexander Graf Date: Thu, 16 Aug 2012 00:34:58 +0200 Subject: KVM: PPC: 440: Implement mfdcrx We need mfdcrx to execute properly on 460 cores. Signed-off-by: Alexander Graf --- arch/powerpc/kvm/44x_emulate.c | 74 ++++++++++++++++++++++++------------------ 1 file changed, 43 insertions(+), 31 deletions(-) (limited to 'arch/powerpc') diff --git a/arch/powerpc/kvm/44x_emulate.c b/arch/powerpc/kvm/44x_emulate.c index 3843a75e3e98..1a793c4c4a67 100644 --- a/arch/powerpc/kvm/44x_emulate.c +++ b/arch/powerpc/kvm/44x_emulate.c @@ -27,6 +27,7 @@ #include "booke.h" #include "44x_tlb.h" +#define XOP_MFDCRX 259 #define XOP_MFDCR 323 #define XOP_MTDCRX 387 #define XOP_MTDCR 451 @@ -51,6 +52,43 @@ static int emulate_mtdcr(struct kvm_vcpu *vcpu, int rs, int dcrn) } } +static int emulate_mfdcr(struct kvm_vcpu *vcpu, int rt, int dcrn) +{ + /* The guest may access CPR0 registers to determine the timebase + * frequency, and it must know the real host frequency because it + * can directly access the timebase registers. + * + * It would be possible to emulate those accesses in userspace, + * but userspace can really only figure out the end frequency. + * We could decompose that into the factors that compute it, but + * that's tricky math, and it's easier to just report the real + * CPR0 values. + */ + switch (dcrn) { + case DCRN_CPR0_CONFIG_ADDR: + kvmppc_set_gpr(vcpu, rt, vcpu->arch.cpr0_cfgaddr); + break; + case DCRN_CPR0_CONFIG_DATA: + local_irq_disable(); + mtdcr(DCRN_CPR0_CONFIG_ADDR, + vcpu->arch.cpr0_cfgaddr); + kvmppc_set_gpr(vcpu, rt, + mfdcr(DCRN_CPR0_CONFIG_DATA)); + local_irq_enable(); + break; + default: + vcpu->run->dcr.dcrn = dcrn; + vcpu->run->dcr.data = 0; + vcpu->run->dcr.is_write = 0; + vcpu->arch.io_gpr = rt; + vcpu->arch.dcr_needed = 1; + kvmppc_account_exit(vcpu, DCR_EXITS); + return EMULATE_DO_DCR; + } + + return EMULATE_DONE; +} + int kvmppc_core_emulate_op(struct kvm_run *run, struct kvm_vcpu *vcpu, unsigned int inst, int *advance) { @@ -68,38 +106,12 @@ int kvmppc_core_emulate_op(struct kvm_run *run, struct kvm_vcpu *vcpu, switch (get_xop(inst)) { case XOP_MFDCR: - /* The guest may access CPR0 registers to determine the timebase - * frequency, and it must know the real host frequency because it - * can directly access the timebase registers. - * - * It would be possible to emulate those accesses in userspace, - * but userspace can really only figure out the end frequency. - * We could decompose that into the factors that compute it, but - * that's tricky math, and it's easier to just report the real - * CPR0 values. - */ - switch (dcrn) { - case DCRN_CPR0_CONFIG_ADDR: - kvmppc_set_gpr(vcpu, rt, vcpu->arch.cpr0_cfgaddr); - break; - case DCRN_CPR0_CONFIG_DATA: - local_irq_disable(); - mtdcr(DCRN_CPR0_CONFIG_ADDR, - vcpu->arch.cpr0_cfgaddr); - kvmppc_set_gpr(vcpu, rt, - mfdcr(DCRN_CPR0_CONFIG_DATA)); - local_irq_enable(); - break; - default: - run->dcr.dcrn = dcrn; - run->dcr.data = 0; - run->dcr.is_write = 0; - vcpu->arch.io_gpr = rt; - vcpu->arch.dcr_needed = 1; - kvmppc_account_exit(vcpu, DCR_EXITS); - emulated = EMULATE_DO_DCR; - } + emulated = emulate_mfdcr(vcpu, rt, dcrn); + break; + case XOP_MFDCRX: + emulated = emulate_mfdcr(vcpu, rt, + kvmppc_get_gpr(vcpu, ra)); break; case XOP_MTDCR: -- cgit v1.2.3-59-g8ed1b From 7a08c2740f07fb8c3769d1f137721835ead7652f Mon Sep 17 00:00:00 2001 From: Alexander Graf Date: Thu, 16 Aug 2012 13:10:16 +0200 Subject: KVM: PPC: BookE: Support FPU on non-hv systems When running on HV aware hosts, we can not trap when the guest sets the FP bit, so we just let it do so when it wants to, because it has full access to MSR. For non-HV aware hosts with an FPU (like 440), we need to also adjust the shadow MSR though. Otherwise the guest gets an FP unavailable trap even when it really enabled the FP bit in MSR. Signed-off-by: Alexander Graf --- arch/powerpc/kvm/booke.c | 11 +++++++++++ 1 file changed, 11 insertions(+) (limited to 'arch/powerpc') diff --git a/arch/powerpc/kvm/booke.c b/arch/powerpc/kvm/booke.c index 959aae96469c..5f0476a602d8 100644 --- a/arch/powerpc/kvm/booke.c +++ b/arch/powerpc/kvm/booke.c @@ -122,6 +122,16 @@ static void kvmppc_vcpu_sync_spe(struct kvm_vcpu *vcpu) } #endif +static void kvmppc_vcpu_sync_fpu(struct kvm_vcpu *vcpu) +{ +#if defined(CONFIG_PPC_FPU) && !defined(CONFIG_KVM_BOOKE_HV) + /* We always treat the FP bit as enabled from the host + perspective, so only need to adjust the shadow MSR */ + vcpu->arch.shadow_msr &= ~MSR_FP; + vcpu->arch.shadow_msr |= vcpu->arch.shared->msr & MSR_FP; +#endif +} + /* * Helper function for "full" MSR writes. No need to call this if only * EE/CE/ME/DE/RI are changing. @@ -138,6 +148,7 @@ void kvmppc_set_msr(struct kvm_vcpu *vcpu, u32 new_msr) kvmppc_mmu_msr_notify(vcpu, old_msr); kvmppc_vcpu_sync_spe(vcpu); + kvmppc_vcpu_sync_fpu(vcpu); } static void kvmppc_booke_queue_irqprio(struct kvm_vcpu *vcpu, -- cgit v1.2.3-59-g8ed1b From d61966fc08b84857b697ebae4489c652dd87e48a Mon Sep 17 00:00:00 2001 From: Mihai Caraman Date: Wed, 12 Sep 2012 03:18:14 +0000 Subject: KVM: PPC: bookehv: Allow duplicate calls of DO_KVM macro The current form of DO_KVM macro restricts its use to one call per input parameter set. This is caused by kvmppc_resume_\intno\()_\srr1 symbol definition. Duplicate calls of DO_KVM are required by distinct implementations of exeption handlers which are delegated at runtime. Use a rare label number to avoid conflicts with the calling contexts. Signed-off-by: Mihai Caraman Signed-off-by: Alexander Graf --- arch/powerpc/include/asm/kvm_booke_hv_asm.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'arch/powerpc') diff --git a/arch/powerpc/include/asm/kvm_booke_hv_asm.h b/arch/powerpc/include/asm/kvm_booke_hv_asm.h index 30a600fa1b6a..a37a12a9a7d7 100644 --- a/arch/powerpc/include/asm/kvm_booke_hv_asm.h +++ b/arch/powerpc/include/asm/kvm_booke_hv_asm.h @@ -38,9 +38,9 @@ #ifdef CONFIG_KVM_BOOKE_HV BEGIN_FTR_SECTION mtocrf 0x80, r11 /* check MSR[GS] without clobbering reg */ - bf 3, kvmppc_resume_\intno\()_\srr1 + bf 3, 1975f b kvmppc_handler_\intno\()_\srr1 -kvmppc_resume_\intno\()_\srr1: +1975: END_FTR_SECTION_IFSET(CPU_FTR_EMB_HV) #endif .endm -- cgit v1.2.3-59-g8ed1b From 2c9097e4c1340208ef93371abd4b3bd7e989381b Mon Sep 17 00:00:00 2001 From: Paul Mackerras Date: Tue, 11 Sep 2012 13:27:01 +0000 Subject: KVM: PPC: Book3S HV: Take the SRCU read lock before looking up memslots The generic KVM code uses SRCU (sleeping RCU) to protect accesses to the memslots data structures against updates due to userspace adding, modifying or removing memory slots. We need to do that too, both to avoid accessing stale copies of the memslots and to avoid lockdep warnings. This therefore adds srcu_read_lock/unlock pairs around code that accesses and uses memslots. Since the real-mode handlers for H_ENTER, H_REMOVE and H_BULK_REMOVE need to access the memslots, and we don't want to call the SRCU code in real mode (since we have no assurance that it would only access the linear mapping), we hold the SRCU read lock for the VM while in the guest. This does mean that adding or removing memory slots while some vcpus are executing in the guest will block for up to two jiffies. This tradeoff is acceptable since adding/removing memory slots only happens rarely, while H_ENTER/H_REMOVE/H_BULK_REMOVE are performance-critical hot paths. Signed-off-by: Paul Mackerras Signed-off-by: Alexander Graf --- arch/powerpc/kvm/book3s_64_mmu_hv.c | 17 +++++++++++++---- arch/powerpc/kvm/book3s_hv.c | 27 +++++++++++++++++++++++---- 2 files changed, 36 insertions(+), 8 deletions(-) (limited to 'arch/powerpc') diff --git a/arch/powerpc/kvm/book3s_64_mmu_hv.c b/arch/powerpc/kvm/book3s_64_mmu_hv.c index d95d11322a15..0f031c07f7e5 100644 --- a/arch/powerpc/kvm/book3s_64_mmu_hv.c +++ b/arch/powerpc/kvm/book3s_64_mmu_hv.c @@ -24,6 +24,7 @@ #include #include #include +#include #include #include @@ -1057,20 +1058,22 @@ void *kvmppc_pin_guest_page(struct kvm *kvm, unsigned long gpa, unsigned long hva, psize, offset; unsigned long pa; unsigned long *physp; + int srcu_idx; + srcu_idx = srcu_read_lock(&kvm->srcu); memslot = gfn_to_memslot(kvm, gfn); if (!memslot || (memslot->flags & KVM_MEMSLOT_INVALID)) - return NULL; + goto err; if (!kvm->arch.using_mmu_notifiers) { physp = kvm->arch.slot_phys[memslot->id]; if (!physp) - return NULL; + goto err; physp += gfn - memslot->base_gfn; pa = *physp; if (!pa) { if (kvmppc_get_guest_page(kvm, gfn, memslot, PAGE_SIZE) < 0) - return NULL; + goto err; pa = *physp; } page = pfn_to_page(pa >> PAGE_SHIFT); @@ -1079,9 +1082,11 @@ void *kvmppc_pin_guest_page(struct kvm *kvm, unsigned long gpa, hva = gfn_to_hva_memslot(memslot, gfn); npages = get_user_pages_fast(hva, 1, 1, pages); if (npages < 1) - return NULL; + goto err; page = pages[0]; } + srcu_read_unlock(&kvm->srcu, srcu_idx); + psize = PAGE_SIZE; if (PageHuge(page)) { page = compound_head(page); @@ -1091,6 +1096,10 @@ void *kvmppc_pin_guest_page(struct kvm *kvm, unsigned long gpa, if (nb_ret) *nb_ret = psize - offset; return page_address(page) + offset; + + err: + srcu_read_unlock(&kvm->srcu, srcu_idx); + return NULL; } void kvmppc_unpin_guest_page(struct kvm *kvm, void *va) diff --git a/arch/powerpc/kvm/book3s_hv.c b/arch/powerpc/kvm/book3s_hv.c index 83e929e66f9d..48b0d4a73b9d 100644 --- a/arch/powerpc/kvm/book3s_hv.c +++ b/arch/powerpc/kvm/book3s_hv.c @@ -30,6 +30,7 @@ #include #include #include +#include #include #include @@ -366,13 +367,16 @@ int kvmppc_pseries_do_hcall(struct kvm_vcpu *vcpu) unsigned long req = kvmppc_get_gpr(vcpu, 3); unsigned long target, ret = H_SUCCESS; struct kvm_vcpu *tvcpu; + int idx; switch (req) { case H_ENTER: + idx = srcu_read_lock(&vcpu->kvm->srcu); ret = kvmppc_virtmode_h_enter(vcpu, kvmppc_get_gpr(vcpu, 4), kvmppc_get_gpr(vcpu, 5), kvmppc_get_gpr(vcpu, 6), kvmppc_get_gpr(vcpu, 7)); + srcu_read_unlock(&vcpu->kvm->srcu, idx); break; case H_CEDE: break; @@ -411,6 +415,7 @@ static int kvmppc_handle_exit(struct kvm_run *run, struct kvm_vcpu *vcpu, struct task_struct *tsk) { int r = RESUME_HOST; + int srcu_idx; vcpu->stat.sum_exits++; @@ -470,12 +475,16 @@ static int kvmppc_handle_exit(struct kvm_run *run, struct kvm_vcpu *vcpu, * have been handled already. */ case BOOK3S_INTERRUPT_H_DATA_STORAGE: + srcu_idx = srcu_read_lock(&vcpu->kvm->srcu); r = kvmppc_book3s_hv_page_fault(run, vcpu, vcpu->arch.fault_dar, vcpu->arch.fault_dsisr); + srcu_read_unlock(&vcpu->kvm->srcu, srcu_idx); break; case BOOK3S_INTERRUPT_H_INST_STORAGE: + srcu_idx = srcu_read_lock(&vcpu->kvm->srcu); r = kvmppc_book3s_hv_page_fault(run, vcpu, kvmppc_get_pc(vcpu), 0); + srcu_read_unlock(&vcpu->kvm->srcu, srcu_idx); break; /* * This occurs if the guest executes an illegal instruction. @@ -820,6 +829,7 @@ static int kvmppc_run_core(struct kvmppc_vcore *vc) long ret; u64 now; int ptid, i, need_vpa_update; + int srcu_idx; /* don't start if any threads have a signal pending */ need_vpa_update = 0; @@ -898,6 +908,9 @@ static int kvmppc_run_core(struct kvmppc_vcore *vc) spin_unlock(&vc->lock); kvm_guest_enter(); + + srcu_idx = srcu_read_lock(&vcpu0->kvm->srcu); + __kvmppc_vcore_entry(NULL, vcpu0); for (i = 0; i < threads_per_core; ++i) kvmppc_release_hwthread(vc->pcpu + i); @@ -913,6 +926,8 @@ static int kvmppc_run_core(struct kvmppc_vcore *vc) vc->vcore_state = VCORE_EXITING; spin_unlock(&vc->lock); + srcu_read_unlock(&vcpu0->kvm->srcu, srcu_idx); + /* make sure updates to secondary vcpu structs are visible now */ smp_mb(); kvm_guest_exit(); @@ -1362,6 +1377,7 @@ static int kvmppc_hv_setup_htab_rma(struct kvm_vcpu *vcpu) unsigned long rmls; unsigned long *physp; unsigned long i, npages; + int srcu_idx; mutex_lock(&kvm->lock); if (kvm->arch.rma_setup_done) @@ -1377,12 +1393,13 @@ static int kvmppc_hv_setup_htab_rma(struct kvm_vcpu *vcpu) } /* Look up the memslot for guest physical address 0 */ + srcu_idx = srcu_read_lock(&kvm->srcu); memslot = gfn_to_memslot(kvm, 0); /* We must have some memory at 0 by now */ err = -EINVAL; if (!memslot || (memslot->flags & KVM_MEMSLOT_INVALID)) - goto out; + goto out_srcu; /* Look up the VMA for the start of this memory slot */ hva = memslot->userspace_addr; @@ -1406,14 +1423,14 @@ static int kvmppc_hv_setup_htab_rma(struct kvm_vcpu *vcpu) err = -EPERM; if (cpu_has_feature(CPU_FTR_ARCH_201)) { pr_err("KVM: CPU requires an RMO\n"); - goto out; + goto out_srcu; } /* We can handle 4k, 64k or 16M pages in the VRMA */ err = -EINVAL; if (!(psize == 0x1000 || psize == 0x10000 || psize == 0x1000000)) - goto out; + goto out_srcu; /* Update VRMASD field in the LPCR */ senc = slb_pgsize_encoding(psize); @@ -1436,7 +1453,7 @@ static int kvmppc_hv_setup_htab_rma(struct kvm_vcpu *vcpu) err = -EINVAL; if (rmls < 0) { pr_err("KVM: Can't use RMA of 0x%lx bytes\n", rma_size); - goto out; + goto out_srcu; } atomic_inc(&ri->use_count); kvm->arch.rma = ri; @@ -1476,6 +1493,8 @@ static int kvmppc_hv_setup_htab_rma(struct kvm_vcpu *vcpu) smp_wmb(); kvm->arch.rma_setup_done = 1; err = 0; + out_srcu: + srcu_read_unlock(&kvm->srcu, srcu_idx); out: mutex_unlock(&kvm->lock); return err; -- cgit v1.2.3-59-g8ed1b From a66b48c3a39fa1c4223d4f847fdc7a04ed1618de Mon Sep 17 00:00:00 2001 From: Paul Mackerras Date: Tue, 11 Sep 2012 13:27:46 +0000 Subject: KVM: PPC: Move kvm->arch.slot_phys into memslot.arch Now that we have an architecture-specific field in the kvm_memory_slot structure, we can use it to store the array of page physical addresses that we need for Book3S HV KVM on PPC970 processors. This reduces the size of struct kvm_arch for Book3S HV, and also reduces the size of struct kvm_arch_memory_slot for other PPC KVM variants since the fields in it are now only compiled in for Book3S HV. This necessitates making the kvm_arch_create_memslot and kvm_arch_free_memslot operations specific to each PPC KVM variant. That in turn means that we now don't allocate the rmap arrays on Book3S PR and Book E. Since we now unpin pages and free the slot_phys array in kvmppc_core_free_memslot, we no longer need to do it in kvmppc_core_destroy_vm, since the generic code takes care to free all the memslots when destroying a VM. We now need the new memslot to be passed in to kvmppc_core_prepare_memory_region, since we need to initialize its arch.slot_phys member on Book3S HV. Signed-off-by: Paul Mackerras Signed-off-by: Alexander Graf --- arch/powerpc/include/asm/kvm_host.h | 9 ++-- arch/powerpc/include/asm/kvm_ppc.h | 5 ++ arch/powerpc/kvm/book3s_64_mmu_hv.c | 6 +-- arch/powerpc/kvm/book3s_hv.c | 104 +++++++++++++++++++++--------------- arch/powerpc/kvm/book3s_hv_rm_mmu.c | 2 +- arch/powerpc/kvm/book3s_pr.c | 12 +++++ arch/powerpc/kvm/booke.c | 12 +++++ arch/powerpc/kvm/powerpc.c | 13 ++--- 8 files changed, 102 insertions(+), 61 deletions(-) (limited to 'arch/powerpc') diff --git a/arch/powerpc/include/asm/kvm_host.h b/arch/powerpc/include/asm/kvm_host.h index f20a5ef1c7e8..68f5a308737a 100644 --- a/arch/powerpc/include/asm/kvm_host.h +++ b/arch/powerpc/include/asm/kvm_host.h @@ -204,7 +204,7 @@ struct revmap_entry { }; /* - * We use the top bit of each memslot->rmap entry as a lock bit, + * We use the top bit of each memslot->arch.rmap entry as a lock bit, * and bit 32 as a present flag. The bottom 32 bits are the * index in the guest HPT of a HPTE that points to the page. */ @@ -215,14 +215,17 @@ struct revmap_entry { #define KVMPPC_RMAP_PRESENT 0x100000000ul #define KVMPPC_RMAP_INDEX 0xfffffffful -/* Low-order bits in kvm->arch.slot_phys[][] */ +/* Low-order bits in memslot->arch.slot_phys[] */ #define KVMPPC_PAGE_ORDER_MASK 0x1f #define KVMPPC_PAGE_NO_CACHE HPTE_R_I /* 0x20 */ #define KVMPPC_PAGE_WRITETHRU HPTE_R_W /* 0x40 */ #define KVMPPC_GOT_PAGE 0x80 struct kvm_arch_memory_slot { +#ifdef CONFIG_KVM_BOOK3S_64_HV unsigned long *rmap; + unsigned long *slot_phys; +#endif /* CONFIG_KVM_BOOK3S_64_HV */ }; struct kvm_arch { @@ -246,8 +249,6 @@ struct kvm_arch { unsigned long hpt_npte; unsigned long hpt_mask; spinlock_t slot_phys_lock; - unsigned long *slot_phys[KVM_MEM_SLOTS_NUM]; - int slot_npages[KVM_MEM_SLOTS_NUM]; unsigned short last_vcpu[NR_CPUS]; struct kvmppc_vcore *vcores[KVM_MAX_VCORES]; struct kvmppc_linear_info *hpt_li; diff --git a/arch/powerpc/include/asm/kvm_ppc.h b/arch/powerpc/include/asm/kvm_ppc.h index c06a64b53362..41a00eae68c7 100644 --- a/arch/powerpc/include/asm/kvm_ppc.h +++ b/arch/powerpc/include/asm/kvm_ppc.h @@ -143,7 +143,12 @@ extern struct kvmppc_linear_info *kvm_alloc_hpt(void); extern void kvm_release_hpt(struct kvmppc_linear_info *li); extern int kvmppc_core_init_vm(struct kvm *kvm); extern void kvmppc_core_destroy_vm(struct kvm *kvm); +extern void kvmppc_core_free_memslot(struct kvm_memory_slot *free, + struct kvm_memory_slot *dont); +extern int kvmppc_core_create_memslot(struct kvm_memory_slot *slot, + unsigned long npages); extern int kvmppc_core_prepare_memory_region(struct kvm *kvm, + struct kvm_memory_slot *memslot, struct kvm_userspace_memory_region *mem); extern void kvmppc_core_commit_memory_region(struct kvm *kvm, struct kvm_userspace_memory_region *mem); diff --git a/arch/powerpc/kvm/book3s_64_mmu_hv.c b/arch/powerpc/kvm/book3s_64_mmu_hv.c index 0f031c07f7e5..a389cc62b16c 100644 --- a/arch/powerpc/kvm/book3s_64_mmu_hv.c +++ b/arch/powerpc/kvm/book3s_64_mmu_hv.c @@ -261,7 +261,7 @@ static void kvmppc_mmu_book3s_64_hv_reset_msr(struct kvm_vcpu *vcpu) /* * This is called to get a reference to a guest page if there isn't - * one already in the kvm->arch.slot_phys[][] arrays. + * one already in the memslot->arch.slot_phys[] array. */ static long kvmppc_get_guest_page(struct kvm *kvm, unsigned long gfn, struct kvm_memory_slot *memslot, @@ -276,7 +276,7 @@ static long kvmppc_get_guest_page(struct kvm *kvm, unsigned long gfn, struct vm_area_struct *vma; unsigned long pfn, i, npages; - physp = kvm->arch.slot_phys[memslot->id]; + physp = memslot->arch.slot_phys; if (!physp) return -EINVAL; if (physp[gfn - memslot->base_gfn]) @@ -1065,7 +1065,7 @@ void *kvmppc_pin_guest_page(struct kvm *kvm, unsigned long gpa, if (!memslot || (memslot->flags & KVM_MEMSLOT_INVALID)) goto err; if (!kvm->arch.using_mmu_notifiers) { - physp = kvm->arch.slot_phys[memslot->id]; + physp = memslot->arch.slot_phys; if (!physp) goto err; physp += gfn - memslot->base_gfn; diff --git a/arch/powerpc/kvm/book3s_hv.c b/arch/powerpc/kvm/book3s_hv.c index 48b0d4a73b9d..817837de7362 100644 --- a/arch/powerpc/kvm/book3s_hv.c +++ b/arch/powerpc/kvm/book3s_hv.c @@ -1314,48 +1314,67 @@ static unsigned long slb_pgsize_encoding(unsigned long psize) return senc; } -int kvmppc_core_prepare_memory_region(struct kvm *kvm, - struct kvm_userspace_memory_region *mem) +static void unpin_slot(struct kvm_memory_slot *memslot) { - unsigned long npages; - unsigned long *phys; + unsigned long *physp; + unsigned long j, npages, pfn; + struct page *page; - /* Allocate a slot_phys array */ - phys = kvm->arch.slot_phys[mem->slot]; - if (!kvm->arch.using_mmu_notifiers && !phys) { - npages = mem->memory_size >> PAGE_SHIFT; - phys = vzalloc(npages * sizeof(unsigned long)); - if (!phys) - return -ENOMEM; - kvm->arch.slot_phys[mem->slot] = phys; - kvm->arch.slot_npages[mem->slot] = npages; + physp = memslot->arch.slot_phys; + npages = memslot->npages; + if (!physp) + return; + for (j = 0; j < npages; j++) { + if (!(physp[j] & KVMPPC_GOT_PAGE)) + continue; + pfn = physp[j] >> PAGE_SHIFT; + page = pfn_to_page(pfn); + SetPageDirty(page); + put_page(page); + } +} + +void kvmppc_core_free_memslot(struct kvm_memory_slot *free, + struct kvm_memory_slot *dont) +{ + if (!dont || free->arch.rmap != dont->arch.rmap) { + vfree(free->arch.rmap); + free->arch.rmap = NULL; } + if (!dont || free->arch.slot_phys != dont->arch.slot_phys) { + unpin_slot(free); + vfree(free->arch.slot_phys); + free->arch.slot_phys = NULL; + } +} + +int kvmppc_core_create_memslot(struct kvm_memory_slot *slot, + unsigned long npages) +{ + slot->arch.rmap = vzalloc(npages * sizeof(*slot->arch.rmap)); + if (!slot->arch.rmap) + return -ENOMEM; + slot->arch.slot_phys = NULL; return 0; } -static void unpin_slot(struct kvm *kvm, int slot_id) +int kvmppc_core_prepare_memory_region(struct kvm *kvm, + struct kvm_memory_slot *memslot, + struct kvm_userspace_memory_region *mem) { - unsigned long *physp; - unsigned long j, npages, pfn; - struct page *page; + unsigned long *phys; - physp = kvm->arch.slot_phys[slot_id]; - npages = kvm->arch.slot_npages[slot_id]; - if (physp) { - spin_lock(&kvm->arch.slot_phys_lock); - for (j = 0; j < npages; j++) { - if (!(physp[j] & KVMPPC_GOT_PAGE)) - continue; - pfn = physp[j] >> PAGE_SHIFT; - page = pfn_to_page(pfn); - SetPageDirty(page); - put_page(page); - } - kvm->arch.slot_phys[slot_id] = NULL; - spin_unlock(&kvm->arch.slot_phys_lock); - vfree(physp); + /* Allocate a slot_phys array if needed */ + phys = memslot->arch.slot_phys; + if (!kvm->arch.using_mmu_notifiers && !phys && memslot->npages) { + phys = vzalloc(memslot->npages * sizeof(unsigned long)); + if (!phys) + return -ENOMEM; + memslot->arch.slot_phys = phys; } + + return 0; } void kvmppc_core_commit_memory_region(struct kvm *kvm, @@ -1482,11 +1501,16 @@ static int kvmppc_hv_setup_htab_rma(struct kvm_vcpu *vcpu) /* Initialize phys addrs of pages in RMO */ npages = ri->npages; porder = __ilog2(npages); - physp = kvm->arch.slot_phys[memslot->id]; - spin_lock(&kvm->arch.slot_phys_lock); - for (i = 0; i < npages; ++i) - physp[i] = ((ri->base_pfn + i) << PAGE_SHIFT) + porder; - spin_unlock(&kvm->arch.slot_phys_lock); + physp = memslot->arch.slot_phys; + if (physp) { + if (npages > memslot->npages) + npages = memslot->npages; + spin_lock(&kvm->arch.slot_phys_lock); + for (i = 0; i < npages; ++i) + physp[i] = ((ri->base_pfn + i) << PAGE_SHIFT) + + porder; + spin_unlock(&kvm->arch.slot_phys_lock); + } } /* Order updates to kvm->arch.lpcr etc. vs. rma_setup_done */ @@ -1547,12 +1571,6 @@ int kvmppc_core_init_vm(struct kvm *kvm) void kvmppc_core_destroy_vm(struct kvm *kvm) { - unsigned long i; - - if (!kvm->arch.using_mmu_notifiers) - for (i = 0; i < KVM_MEM_SLOTS_NUM; i++) - unpin_slot(kvm, i); - if (kvm->arch.rma) { kvm_release_rma(kvm->arch.rma); kvm->arch.rma = NULL; diff --git a/arch/powerpc/kvm/book3s_hv_rm_mmu.c b/arch/powerpc/kvm/book3s_hv_rm_mmu.c index fb0e821622d4..63eb94e63cc3 100644 --- a/arch/powerpc/kvm/book3s_hv_rm_mmu.c +++ b/arch/powerpc/kvm/book3s_hv_rm_mmu.c @@ -183,7 +183,7 @@ long kvmppc_h_enter(struct kvm_vcpu *vcpu, unsigned long flags, rmap = &memslot->arch.rmap[slot_fn]; if (!kvm->arch.using_mmu_notifiers) { - physp = kvm->arch.slot_phys[memslot->id]; + physp = memslot->arch.slot_phys; if (!physp) return H_PARAMETER; physp += slot_fn; diff --git a/arch/powerpc/kvm/book3s_pr.c b/arch/powerpc/kvm/book3s_pr.c index b3c584f94cb3..fdadc9e57da2 100644 --- a/arch/powerpc/kvm/book3s_pr.c +++ b/arch/powerpc/kvm/book3s_pr.c @@ -1220,7 +1220,19 @@ int kvm_vm_ioctl_get_smmu_info(struct kvm *kvm, struct kvm_ppc_smmu_info *info) } #endif /* CONFIG_PPC64 */ +void kvmppc_core_free_memslot(struct kvm_memory_slot *free, + struct kvm_memory_slot *dont) +{ +} + +int kvmppc_core_create_memslot(struct kvm_memory_slot *slot, + unsigned long npages) +{ + return 0; +} + int kvmppc_core_prepare_memory_region(struct kvm *kvm, + struct kvm_memory_slot *memslot, struct kvm_userspace_memory_region *mem) { return 0; diff --git a/arch/powerpc/kvm/booke.c b/arch/powerpc/kvm/booke.c index 5f0476a602d8..514405752988 100644 --- a/arch/powerpc/kvm/booke.c +++ b/arch/powerpc/kvm/booke.c @@ -1438,7 +1438,19 @@ int kvm_vm_ioctl_get_dirty_log(struct kvm *kvm, struct kvm_dirty_log *log) return -ENOTSUPP; } +void kvmppc_core_free_memslot(struct kvm_memory_slot *free, + struct kvm_memory_slot *dont) +{ +} + +int kvmppc_core_create_memslot(struct kvm_memory_slot *slot, + unsigned long npages) +{ + return 0; +} + int kvmppc_core_prepare_memory_region(struct kvm *kvm, + struct kvm_memory_slot *memslot, struct kvm_userspace_memory_region *mem) { return 0; diff --git a/arch/powerpc/kvm/powerpc.c b/arch/powerpc/kvm/powerpc.c index 0ffd7d17adc7..33122dd89da9 100644 --- a/arch/powerpc/kvm/powerpc.c +++ b/arch/powerpc/kvm/powerpc.c @@ -389,19 +389,12 @@ long kvm_arch_dev_ioctl(struct file *filp, void kvm_arch_free_memslot(struct kvm_memory_slot *free, struct kvm_memory_slot *dont) { - if (!dont || free->arch.rmap != dont->arch.rmap) { - vfree(free->arch.rmap); - free->arch.rmap = NULL; - } + kvmppc_core_free_memslot(free, dont); } int kvm_arch_create_memslot(struct kvm_memory_slot *slot, unsigned long npages) { - slot->arch.rmap = vzalloc(npages * sizeof(*slot->arch.rmap)); - if (!slot->arch.rmap) - return -ENOMEM; - - return 0; + return kvmppc_core_create_memslot(slot, npages); } int kvm_arch_prepare_memory_region(struct kvm *kvm, @@ -410,7 +403,7 @@ int kvm_arch_prepare_memory_region(struct kvm *kvm, struct kvm_userspace_memory_region *mem, int user_alloc) { - return kvmppc_core_prepare_memory_region(kvm, mem); + return kvmppc_core_prepare_memory_region(kvm, memslot, mem); } void kvm_arch_commit_memory_region(struct kvm *kvm, -- cgit v1.2.3-59-g8ed1b From dfe49dbd1fc7310a4e0e2f83ae737cd7d34fa0cd Mon Sep 17 00:00:00 2001 From: Paul Mackerras Date: Tue, 11 Sep 2012 13:28:18 +0000 Subject: KVM: PPC: Book3S HV: Handle memory slot deletion and modification correctly This adds an implementation of kvm_arch_flush_shadow_memslot for Book3S HV, and arranges for kvmppc_core_commit_memory_region to flush the dirty log when modifying an existing slot. With this, we can handle deletion and modification of memory slots. kvm_arch_flush_shadow_memslot calls kvmppc_core_flush_memslot, which on Book3S HV now traverses the reverse map chains to remove any HPT (hashed page table) entries referring to pages in the memslot. This gets called by generic code whenever deleting a memslot or changing the guest physical address for a memslot. We flush the dirty log in kvmppc_core_commit_memory_region for consistency with what x86 does. We only need to flush when an existing memslot is being modified, because for a new memslot the rmap array (which stores the dirty bits) is all zero, meaning that every page is considered clean already, and when deleting a memslot we obviously don't care about the dirty bits any more. Signed-off-by: Paul Mackerras Signed-off-by: Alexander Graf --- arch/powerpc/include/asm/kvm_book3s.h | 2 +- arch/powerpc/include/asm/kvm_ppc.h | 5 ++++- arch/powerpc/kvm/book3s_64_mmu_hv.c | 33 ++++++++++++++++++++++++++++----- arch/powerpc/kvm/book3s_hv.c | 18 ++++++++++++++++-- arch/powerpc/kvm/book3s_hv_rm_mmu.c | 2 +- arch/powerpc/kvm/book3s_pr.c | 7 ++++++- arch/powerpc/kvm/booke.c | 7 ++++++- arch/powerpc/kvm/powerpc.c | 3 ++- 8 files changed, 64 insertions(+), 13 deletions(-) (limited to 'arch/powerpc') diff --git a/arch/powerpc/include/asm/kvm_book3s.h b/arch/powerpc/include/asm/kvm_book3s.h index f0e0c6a66d97..ab738005d2ea 100644 --- a/arch/powerpc/include/asm/kvm_book3s.h +++ b/arch/powerpc/include/asm/kvm_book3s.h @@ -160,7 +160,7 @@ extern long kvmppc_virtmode_h_enter(struct kvm_vcpu *vcpu, unsigned long flags, extern long kvmppc_h_enter(struct kvm_vcpu *vcpu, unsigned long flags, long pte_index, unsigned long pteh, unsigned long ptel); extern long kvmppc_hv_get_dirty_log(struct kvm *kvm, - struct kvm_memory_slot *memslot); + struct kvm_memory_slot *memslot, unsigned long *map); extern void kvmppc_entry_trampoline(void); extern void kvmppc_hv_entry_trampoline(void); diff --git a/arch/powerpc/include/asm/kvm_ppc.h b/arch/powerpc/include/asm/kvm_ppc.h index 41a00eae68c7..3fb980d293e5 100644 --- a/arch/powerpc/include/asm/kvm_ppc.h +++ b/arch/powerpc/include/asm/kvm_ppc.h @@ -151,9 +151,12 @@ extern int kvmppc_core_prepare_memory_region(struct kvm *kvm, struct kvm_memory_slot *memslot, struct kvm_userspace_memory_region *mem); extern void kvmppc_core_commit_memory_region(struct kvm *kvm, - struct kvm_userspace_memory_region *mem); + struct kvm_userspace_memory_region *mem, + struct kvm_memory_slot old); extern int kvm_vm_ioctl_get_smmu_info(struct kvm *kvm, struct kvm_ppc_smmu_info *info); +extern void kvmppc_core_flush_memslot(struct kvm *kvm, + struct kvm_memory_slot *memslot); extern int kvmppc_bookehv_init(void); extern void kvmppc_bookehv_exit(void); diff --git a/arch/powerpc/kvm/book3s_64_mmu_hv.c b/arch/powerpc/kvm/book3s_64_mmu_hv.c index a389cc62b16c..f598366e51c6 100644 --- a/arch/powerpc/kvm/book3s_64_mmu_hv.c +++ b/arch/powerpc/kvm/book3s_64_mmu_hv.c @@ -851,7 +851,8 @@ static int kvm_unmap_rmapp(struct kvm *kvm, unsigned long *rmapp, psize = hpte_page_size(hptep[0], ptel); if ((hptep[0] & HPTE_V_VALID) && hpte_rpn(ptel, psize) == gfn) { - hptep[0] |= HPTE_V_ABSENT; + if (kvm->arch.using_mmu_notifiers) + hptep[0] |= HPTE_V_ABSENT; kvmppc_invalidate_hpte(kvm, hptep, i); /* Harvest R and C */ rcbits = hptep[1] & (HPTE_R_R | HPTE_R_C); @@ -878,6 +879,28 @@ int kvm_unmap_hva_range(struct kvm *kvm, unsigned long start, unsigned long end) return 0; } +void kvmppc_core_flush_memslot(struct kvm *kvm, struct kvm_memory_slot *memslot) +{ + unsigned long *rmapp; + unsigned long gfn; + unsigned long n; + + rmapp = memslot->arch.rmap; + gfn = memslot->base_gfn; + for (n = memslot->npages; n; --n) { + /* + * Testing the present bit without locking is OK because + * the memslot has been marked invalid already, and hence + * no new HPTEs referencing this page can be created, + * thus the present bit can't go from 0 to 1. + */ + if (*rmapp & KVMPPC_RMAP_PRESENT) + kvm_unmap_rmapp(kvm, rmapp, gfn); + ++rmapp; + ++gfn; + } +} + static int kvm_age_rmapp(struct kvm *kvm, unsigned long *rmapp, unsigned long gfn) { @@ -1031,16 +1054,16 @@ static int kvm_test_clear_dirty(struct kvm *kvm, unsigned long *rmapp) return ret; } -long kvmppc_hv_get_dirty_log(struct kvm *kvm, struct kvm_memory_slot *memslot) +long kvmppc_hv_get_dirty_log(struct kvm *kvm, struct kvm_memory_slot *memslot, + unsigned long *map) { unsigned long i; - unsigned long *rmapp, *map; + unsigned long *rmapp; preempt_disable(); rmapp = memslot->arch.rmap; - map = memslot->dirty_bitmap; for (i = 0; i < memslot->npages; ++i) { - if (kvm_test_clear_dirty(kvm, rmapp)) + if (kvm_test_clear_dirty(kvm, rmapp) && map) __set_bit_le(i, map); ++rmapp; } diff --git a/arch/powerpc/kvm/book3s_hv.c b/arch/powerpc/kvm/book3s_hv.c index 817837de7362..38c7f1bc3495 100644 --- a/arch/powerpc/kvm/book3s_hv.c +++ b/arch/powerpc/kvm/book3s_hv.c @@ -1288,7 +1288,7 @@ int kvm_vm_ioctl_get_dirty_log(struct kvm *kvm, struct kvm_dirty_log *log) n = kvm_dirty_bitmap_bytes(memslot); memset(memslot->dirty_bitmap, 0, n); - r = kvmppc_hv_get_dirty_log(kvm, memslot); + r = kvmppc_hv_get_dirty_log(kvm, memslot, memslot->dirty_bitmap); if (r) goto out; @@ -1378,8 +1378,22 @@ int kvmppc_core_prepare_memory_region(struct kvm *kvm, } void kvmppc_core_commit_memory_region(struct kvm *kvm, - struct kvm_userspace_memory_region *mem) + struct kvm_userspace_memory_region *mem, + struct kvm_memory_slot old) { + unsigned long npages = mem->memory_size >> PAGE_SHIFT; + struct kvm_memory_slot *memslot; + + if (npages && old.npages) { + /* + * If modifying a memslot, reset all the rmap dirty bits. + * If this is a new memslot, we don't need to do anything + * since the rmap array starts out as all zeroes, + * i.e. no pages are dirty. + */ + memslot = id_to_memslot(kvm->memslots, mem->slot); + kvmppc_hv_get_dirty_log(kvm, memslot, NULL); + } } static int kvmppc_hv_setup_htab_rma(struct kvm_vcpu *vcpu) diff --git a/arch/powerpc/kvm/book3s_hv_rm_mmu.c b/arch/powerpc/kvm/book3s_hv_rm_mmu.c index 63eb94e63cc3..9955216477a4 100644 --- a/arch/powerpc/kvm/book3s_hv_rm_mmu.c +++ b/arch/powerpc/kvm/book3s_hv_rm_mmu.c @@ -81,7 +81,7 @@ static void remove_revmap_chain(struct kvm *kvm, long pte_index, ptel = rev->guest_rpte |= rcbits; gfn = hpte_rpn(ptel, hpte_page_size(hpte_v, ptel)); memslot = __gfn_to_memslot(kvm_memslots(kvm), gfn); - if (!memslot || (memslot->flags & KVM_MEMSLOT_INVALID)) + if (!memslot) return; rmap = real_vmalloc_addr(&memslot->arch.rmap[gfn - memslot->base_gfn]); diff --git a/arch/powerpc/kvm/book3s_pr.c b/arch/powerpc/kvm/book3s_pr.c index fdadc9e57da2..4d0667a810a4 100644 --- a/arch/powerpc/kvm/book3s_pr.c +++ b/arch/powerpc/kvm/book3s_pr.c @@ -1239,7 +1239,12 @@ int kvmppc_core_prepare_memory_region(struct kvm *kvm, } void kvmppc_core_commit_memory_region(struct kvm *kvm, - struct kvm_userspace_memory_region *mem) + struct kvm_userspace_memory_region *mem, + struct kvm_memory_slot old) +{ +} + +void kvmppc_core_flush_memslot(struct kvm *kvm, struct kvm_memory_slot *memslot) { } diff --git a/arch/powerpc/kvm/booke.c b/arch/powerpc/kvm/booke.c index 514405752988..3a6490fc6fcd 100644 --- a/arch/powerpc/kvm/booke.c +++ b/arch/powerpc/kvm/booke.c @@ -1457,7 +1457,12 @@ int kvmppc_core_prepare_memory_region(struct kvm *kvm, } void kvmppc_core_commit_memory_region(struct kvm *kvm, - struct kvm_userspace_memory_region *mem) + struct kvm_userspace_memory_region *mem, + struct kvm_memory_slot old) +{ +} + +void kvmppc_core_flush_memslot(struct kvm *kvm, struct kvm_memory_slot *memslot) { } diff --git a/arch/powerpc/kvm/powerpc.c b/arch/powerpc/kvm/powerpc.c index 33122dd89da9..8443e23f3605 100644 --- a/arch/powerpc/kvm/powerpc.c +++ b/arch/powerpc/kvm/powerpc.c @@ -411,7 +411,7 @@ void kvm_arch_commit_memory_region(struct kvm *kvm, struct kvm_memory_slot old, int user_alloc) { - kvmppc_core_commit_memory_region(kvm, mem); + kvmppc_core_commit_memory_region(kvm, mem, old); } void kvm_arch_flush_shadow_all(struct kvm *kvm) @@ -421,6 +421,7 @@ void kvm_arch_flush_shadow_all(struct kvm *kvm) void kvm_arch_flush_shadow_memslot(struct kvm *kvm, struct kvm_memory_slot *slot) { + kvmppc_core_flush_memslot(kvm, slot); } struct kvm_vcpu *kvm_arch_vcpu_create(struct kvm *kvm, unsigned int id) -- cgit v1.2.3-59-g8ed1b From a47d72f3613d5edfd8e752c9b804d7df35810649 Mon Sep 17 00:00:00 2001 From: Paul Mackerras Date: Thu, 20 Sep 2012 19:35:51 +0000 Subject: KVM: PPC: Book3S HV: Fix updates of vcpu->cpu This removes the powerpc "generic" updates of vcpu->cpu in load and put, and moves them to the various backends. The reason is that "HV" KVM does its own sauce with that field and the generic updates might corrupt it. The field contains the CPU# of the -first- HW CPU of the core always for all the VCPU threads of a core (the one that's online from a host Linux perspective). However, the preempt notifiers are going to be called on the threads VCPUs when they are running (due to them sleeping on our private waitqueue) causing unload to be called, potentially clobbering the value. Signed-off-by: Benjamin Herrenschmidt Signed-off-by: Paul Mackerras Signed-off-by: Alexander Graf --- arch/powerpc/kvm/book3s_pr.c | 3 ++- arch/powerpc/kvm/booke.c | 2 ++ arch/powerpc/kvm/powerpc.c | 2 -- 3 files changed, 4 insertions(+), 3 deletions(-) (limited to 'arch/powerpc') diff --git a/arch/powerpc/kvm/book3s_pr.c b/arch/powerpc/kvm/book3s_pr.c index 4d0667a810a4..bf3ec5d66d8c 100644 --- a/arch/powerpc/kvm/book3s_pr.c +++ b/arch/powerpc/kvm/book3s_pr.c @@ -64,7 +64,7 @@ void kvmppc_core_vcpu_load(struct kvm_vcpu *vcpu, int cpu) svcpu->slb_max = to_book3s(vcpu)->slb_shadow_max; svcpu_put(svcpu); #endif - + vcpu->cpu = smp_processor_id(); #ifdef CONFIG_PPC_BOOK3S_32 current->thread.kvm_shadow_vcpu = to_book3s(vcpu)->shadow_vcpu; #endif @@ -84,6 +84,7 @@ void kvmppc_core_vcpu_put(struct kvm_vcpu *vcpu) kvmppc_giveup_ext(vcpu, MSR_FP); kvmppc_giveup_ext(vcpu, MSR_VEC); kvmppc_giveup_ext(vcpu, MSR_VSX); + vcpu->cpu = -1; } int kvmppc_core_check_requests(struct kvm_vcpu *vcpu) diff --git a/arch/powerpc/kvm/booke.c b/arch/powerpc/kvm/booke.c index 3a6490fc6fcd..69d047c22d20 100644 --- a/arch/powerpc/kvm/booke.c +++ b/arch/powerpc/kvm/booke.c @@ -1509,12 +1509,14 @@ void kvmppc_decrementer_func(unsigned long data) void kvmppc_booke_vcpu_load(struct kvm_vcpu *vcpu, int cpu) { + vcpu->cpu = smp_processor_id(); current->thread.kvm_vcpu = vcpu; } void kvmppc_booke_vcpu_put(struct kvm_vcpu *vcpu) { current->thread.kvm_vcpu = NULL; + vcpu->cpu = -1; } int __init kvmppc_booke_init(void) diff --git a/arch/powerpc/kvm/powerpc.c b/arch/powerpc/kvm/powerpc.c index 8443e23f3605..6002ea938a48 100644 --- a/arch/powerpc/kvm/powerpc.c +++ b/arch/powerpc/kvm/powerpc.c @@ -504,7 +504,6 @@ void kvm_arch_vcpu_load(struct kvm_vcpu *vcpu, int cpu) mtspr(SPRN_VRSAVE, vcpu->arch.vrsave); #endif kvmppc_core_vcpu_load(vcpu, cpu); - vcpu->cpu = smp_processor_id(); } void kvm_arch_vcpu_put(struct kvm_vcpu *vcpu) @@ -513,7 +512,6 @@ void kvm_arch_vcpu_put(struct kvm_vcpu *vcpu) #ifdef CONFIG_BOOKE vcpu->arch.vrsave = mfspr(SPRN_VRSAVE); #endif - vcpu->cpu = -1; } int kvm_arch_vcpu_ioctl_set_guest_debug(struct kvm_vcpu *vcpu, -- cgit v1.2.3-59-g8ed1b From 964ee98ccde0534548565a201827cf06d813180f Mon Sep 17 00:00:00 2001 From: Paul Mackerras Date: Thu, 20 Sep 2012 19:36:32 +0000 Subject: KVM: PPC: Book3S HV: Remove bogus update of physical thread IDs When making a vcpu non-runnable we incorrectly changed the thread IDs of all other threads on the core, just remove that code. Signed-off-by: Benjamin Herrenschmidt Signed-off-by: Paul Mackerras Signed-off-by: Alexander Graf --- arch/powerpc/kvm/book3s_hv.c | 6 ------ 1 file changed, 6 deletions(-) (limited to 'arch/powerpc') diff --git a/arch/powerpc/kvm/book3s_hv.c b/arch/powerpc/kvm/book3s_hv.c index 38c7f1bc3495..c9ae3148c981 100644 --- a/arch/powerpc/kvm/book3s_hv.c +++ b/arch/powerpc/kvm/book3s_hv.c @@ -706,17 +706,11 @@ extern void xics_wake_cpu(int cpu); static void kvmppc_remove_runnable(struct kvmppc_vcore *vc, struct kvm_vcpu *vcpu) { - struct kvm_vcpu *v; - if (vcpu->arch.state != KVMPPC_VCPU_RUNNABLE) return; vcpu->arch.state = KVMPPC_VCPU_BUSY_IN_HOST; --vc->n_runnable; ++vc->n_busy; - /* decrement the physical thread id of each following vcpu */ - v = vcpu; - list_for_each_entry_continue(v, &vc->runnable_threads, arch.run_list) - --v->arch.ptid; list_del(&vcpu->arch.run_list); } -- cgit v1.2.3-59-g8ed1b From 70bddfefbdcdbfdebd81d8b59ff8a7fa5d450ccc Mon Sep 17 00:00:00 2001 From: Paul Mackerras Date: Thu, 20 Sep 2012 19:39:21 +0000 Subject: KVM: PPC: Book3S HV: Fix calculation of guest phys address for MMIO emulation In the case where the host kernel is using a 64kB base page size and the guest uses a 4k HPTE (hashed page table entry) to map an emulated MMIO device, we were calculating the guest physical address wrongly. We were calculating a gfn as the guest physical address shifted right 16 bits (PAGE_SHIFT) but then only adding back in 12 bits from the effective address, since the HPTE had a 4k page size. Thus the gpa reported to userspace was missing 4 bits. Instead, we now compute the guest physical address from the HPTE without reference to the host page size, and then compute the gfn by shifting the gpa right PAGE_SHIFT bits. Reported-by: Alexey Kardashevskiy Signed-off-by: Paul Mackerras Signed-off-by: Alexander Graf --- arch/powerpc/kvm/book3s_64_mmu_hv.c | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-) (limited to 'arch/powerpc') diff --git a/arch/powerpc/kvm/book3s_64_mmu_hv.c b/arch/powerpc/kvm/book3s_64_mmu_hv.c index f598366e51c6..7a4aae99ac5b 100644 --- a/arch/powerpc/kvm/book3s_64_mmu_hv.c +++ b/arch/powerpc/kvm/book3s_64_mmu_hv.c @@ -571,7 +571,7 @@ int kvmppc_book3s_hv_page_fault(struct kvm_run *run, struct kvm_vcpu *vcpu, struct kvm *kvm = vcpu->kvm; unsigned long *hptep, hpte[3], r; unsigned long mmu_seq, psize, pte_size; - unsigned long gfn, hva, pfn; + unsigned long gpa, gfn, hva, pfn; struct kvm_memory_slot *memslot; unsigned long *rmap; struct revmap_entry *rev; @@ -609,15 +609,14 @@ int kvmppc_book3s_hv_page_fault(struct kvm_run *run, struct kvm_vcpu *vcpu, /* Translate the logical address and get the page */ psize = hpte_page_size(hpte[0], r); - gfn = hpte_rpn(r, psize); + gpa = (r & HPTE_R_RPN & ~(psize - 1)) | (ea & (psize - 1)); + gfn = gpa >> PAGE_SHIFT; memslot = gfn_to_memslot(kvm, gfn); /* No memslot means it's an emulated MMIO region */ - if (!memslot || (memslot->flags & KVM_MEMSLOT_INVALID)) { - unsigned long gpa = (gfn << PAGE_SHIFT) | (ea & (psize - 1)); + if (!memslot || (memslot->flags & KVM_MEMSLOT_INVALID)) return kvmppc_hv_emulate_mmio(run, vcpu, gpa, ea, dsisr & DSISR_ISSTORE); - } if (!kvm->arch.using_mmu_notifiers) return -EFAULT; /* should never get here */ -- cgit v1.2.3-59-g8ed1b From e400e72f250d2567e89c9bafb47ab91e8d9a15a2 Mon Sep 17 00:00:00 2001 From: Scott Wood Date: Wed, 22 Aug 2012 15:04:23 +0000 Subject: KVM: PPC: e500: fix allocation size error on g2h_tlb1_map We were only allocating half the bytes we need, which was made more obvious by a recent fix to the memset in clear_tlb1_bitmap(). Signed-off-by: Scott Wood Signed-off-by: Alexander Graf Cc: stable@vger.kernel.org --- arch/powerpc/kvm/e500_tlb.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch/powerpc') diff --git a/arch/powerpc/kvm/e500_tlb.c b/arch/powerpc/kvm/e500_tlb.c index 43489a8fa985..a27d134eef36 100644 --- a/arch/powerpc/kvm/e500_tlb.c +++ b/arch/powerpc/kvm/e500_tlb.c @@ -1385,7 +1385,7 @@ int kvmppc_e500_tlb_init(struct kvmppc_vcpu_e500 *vcpu_e500) if (!vcpu_e500->gtlb_priv[1]) goto err; - vcpu_e500->g2h_tlb1_map = kzalloc(sizeof(unsigned int) * + vcpu_e500->g2h_tlb1_map = kzalloc(sizeof(u64) * vcpu_e500->gtlb_params[1].entries, GFP_KERNEL); if (!vcpu_e500->g2h_tlb1_map) -- cgit v1.2.3-59-g8ed1b From adbb48a854bf8dee556dc42b96dd61503351a82d Mon Sep 17 00:00:00 2001 From: Scott Wood Date: Wed, 22 Aug 2012 15:04:24 +0000 Subject: KVM: PPC: e500: MMU API: fix leak of shared_tlb_pages This was found by kmemleak. Signed-off-by: Scott Wood Signed-off-by: Alexander Graf --- arch/powerpc/kvm/e500_tlb.c | 2 ++ 1 file changed, 2 insertions(+) (limited to 'arch/powerpc') diff --git a/arch/powerpc/kvm/e500_tlb.c b/arch/powerpc/kvm/e500_tlb.c index a27d134eef36..641f97847b95 100644 --- a/arch/powerpc/kvm/e500_tlb.c +++ b/arch/powerpc/kvm/e500_tlb.c @@ -1134,6 +1134,8 @@ static void free_gtlb(struct kvmppc_vcpu_e500 *vcpu_e500) } vcpu_e500->num_shared_tlb_pages = 0; + + kfree(vcpu_e500->shared_tlb_pages); vcpu_e500->shared_tlb_pages = NULL; } else { kfree(vcpu_e500->gtlb_arch); -- cgit v1.2.3-59-g8ed1b From 5bd1cf118533aba41b3fbd4834e6362a9237db71 Mon Sep 17 00:00:00 2001 From: Scott Wood Date: Wed, 22 Aug 2012 15:03:50 +0000 Subject: KVM: PPC: set IN_GUEST_MODE before checking requests Avoid a race as described in the code comment. Also remove a related smp_wmb() from booke's kvmppc_prepare_to_enter(). I can't see any reason for it, and the book3s_pr version doesn't have it. Signed-off-by: Scott Wood Signed-off-by: Alexander Graf --- arch/powerpc/kvm/booke.c | 1 - arch/powerpc/kvm/powerpc.c | 14 +++++++++----- 2 files changed, 9 insertions(+), 6 deletions(-) (limited to 'arch/powerpc') diff --git a/arch/powerpc/kvm/booke.c b/arch/powerpc/kvm/booke.c index 69d047c22d20..3d1f35dc7862 100644 --- a/arch/powerpc/kvm/booke.c +++ b/arch/powerpc/kvm/booke.c @@ -674,7 +674,6 @@ int kvmppc_vcpu_run(struct kvm_run *kvm_run, struct kvm_vcpu *vcpu) out: vcpu->mode = OUTSIDE_GUEST_MODE; - smp_wmb(); return ret; } diff --git a/arch/powerpc/kvm/powerpc.c b/arch/powerpc/kvm/powerpc.c index 6002ea938a48..deb0d596d815 100644 --- a/arch/powerpc/kvm/powerpc.c +++ b/arch/powerpc/kvm/powerpc.c @@ -78,7 +78,16 @@ int kvmppc_prepare_to_enter(struct kvm_vcpu *vcpu) break; } + vcpu->mode = IN_GUEST_MODE; + + /* + * Reading vcpu->requests must happen after setting vcpu->mode, + * so we don't miss a request because the requester sees + * OUTSIDE_GUEST_MODE and assumes we'll be checking requests + * before next entering the guest (and thus doesn't IPI). + */ smp_mb(); + if (vcpu->requests) { /* Make sure we process requests preemptable */ local_irq_enable(); @@ -111,11 +120,6 @@ int kvmppc_prepare_to_enter(struct kvm_vcpu *vcpu) #endif kvm_guest_enter(); - - /* Going into guest context! Yay! */ - vcpu->mode = IN_GUEST_MODE; - smp_wmb(); - break; } -- cgit v1.2.3-59-g8ed1b From a136a8bdc02fc14625ac45ee846cc646fc46597e Mon Sep 17 00:00:00 2001 From: Paul Mackerras Date: Tue, 25 Sep 2012 20:31:56 +0000 Subject: KVM: PPC: Book3S: Get/set guest SPRs using the GET/SET_ONE_REG interface This enables userspace to get and set various SPRs (special-purpose registers) using the KVM_[GS]ET_ONE_REG ioctls. With this, userspace can get and set all the SPRs that are part of the guest state, either through the KVM_[GS]ET_REGS ioctls, the KVM_[GS]ET_SREGS ioctls, or the KVM_[GS]ET_ONE_REG ioctls. The SPRs that are added here are: - DABR: Data address breakpoint register - DSCR: Data stream control register - PURR: Processor utilization of resources register - SPURR: Scaled PURR - DAR: Data address register - DSISR: Data storage interrupt status register - AMR: Authority mask register - UAMOR: User authority mask override register - MMCR0, MMCR1, MMCRA: Performance monitor unit control registers - PMC1..PMC8: Performance monitor unit counter registers In order to reduce code duplication between PR and HV KVM code, this moves the kvm_vcpu_ioctl_[gs]et_one_reg functions into book3s.c and centralizes the copying between user and kernel space there. The registers that are handled differently between PR and HV, and those that exist only in one flavor, are handled in kvmppc_[gs]et_one_reg() functions that are specific to each flavor. Signed-off-by: Paul Mackerras [agraf: minimal style fixes] Signed-off-by: Alexander Graf --- Documentation/virtual/kvm/api.txt | 19 ++++++++++ arch/powerpc/include/asm/kvm.h | 21 +++++++++++ arch/powerpc/include/asm/kvm_ppc.h | 32 ++++++++++++++++ arch/powerpc/kvm/book3s.c | 68 ++++++++++++++++++++++++++++++++++ arch/powerpc/kvm/book3s_hv.c | 76 ++++++++++++++++++++++++++++++++------ arch/powerpc/kvm/book3s_pr.c | 23 ++++++------ 6 files changed, 215 insertions(+), 24 deletions(-) (limited to 'arch/powerpc') diff --git a/Documentation/virtual/kvm/api.txt b/Documentation/virtual/kvm/api.txt index c84739e2c0f3..af9b7a06bcae 100644 --- a/Documentation/virtual/kvm/api.txt +++ b/Documentation/virtual/kvm/api.txt @@ -1740,6 +1740,25 @@ registers, find a list below: PPC | KVM_REG_PPC_IAC4 | 64 PPC | KVM_REG_PPC_DAC1 | 64 PPC | KVM_REG_PPC_DAC2 | 64 + PPC | KVM_REG_PPC_DABR | 64 + PPC | KVM_REG_PPC_DSCR | 64 + PPC | KVM_REG_PPC_PURR | 64 + PPC | KVM_REG_PPC_SPURR | 64 + PPC | KVM_REG_PPC_DAR | 64 + PPC | KVM_REG_PPC_DSISR | 32 + PPC | KVM_REG_PPC_AMR | 64 + PPC | KVM_REG_PPC_UAMOR | 64 + PPC | KVM_REG_PPC_MMCR0 | 64 + PPC | KVM_REG_PPC_MMCR1 | 64 + PPC | KVM_REG_PPC_MMCRA | 64 + PPC | KVM_REG_PPC_PMC1 | 32 + PPC | KVM_REG_PPC_PMC2 | 32 + PPC | KVM_REG_PPC_PMC3 | 32 + PPC | KVM_REG_PPC_PMC4 | 32 + PPC | KVM_REG_PPC_PMC5 | 32 + PPC | KVM_REG_PPC_PMC6 | 32 + PPC | KVM_REG_PPC_PMC7 | 32 + PPC | KVM_REG_PPC_PMC8 | 32 4.69 KVM_GET_ONE_REG diff --git a/arch/powerpc/include/asm/kvm.h b/arch/powerpc/include/asm/kvm.h index 3c14202a3c84..9557576a5325 100644 --- a/arch/powerpc/include/asm/kvm.h +++ b/arch/powerpc/include/asm/kvm.h @@ -338,5 +338,26 @@ struct kvm_book3e_206_tlb_params { #define KVM_REG_PPC_IAC4 (KVM_REG_PPC | KVM_REG_SIZE_U64 | 0x5) #define KVM_REG_PPC_DAC1 (KVM_REG_PPC | KVM_REG_SIZE_U64 | 0x6) #define KVM_REG_PPC_DAC2 (KVM_REG_PPC | KVM_REG_SIZE_U64 | 0x7) +#define KVM_REG_PPC_DABR (KVM_REG_PPC | KVM_REG_SIZE_U64 | 0x8) +#define KVM_REG_PPC_DSCR (KVM_REG_PPC | KVM_REG_SIZE_U64 | 0x9) +#define KVM_REG_PPC_PURR (KVM_REG_PPC | KVM_REG_SIZE_U64 | 0xa) +#define KVM_REG_PPC_SPURR (KVM_REG_PPC | KVM_REG_SIZE_U64 | 0xb) +#define KVM_REG_PPC_DAR (KVM_REG_PPC | KVM_REG_SIZE_U64 | 0xc) +#define KVM_REG_PPC_DSISR (KVM_REG_PPC | KVM_REG_SIZE_U32 | 0xd) +#define KVM_REG_PPC_AMR (KVM_REG_PPC | KVM_REG_SIZE_U64 | 0xe) +#define KVM_REG_PPC_UAMOR (KVM_REG_PPC | KVM_REG_SIZE_U64 | 0xf) + +#define KVM_REG_PPC_MMCR0 (KVM_REG_PPC | KVM_REG_SIZE_U64 | 0x10) +#define KVM_REG_PPC_MMCR1 (KVM_REG_PPC | KVM_REG_SIZE_U64 | 0x11) +#define KVM_REG_PPC_MMCRA (KVM_REG_PPC | KVM_REG_SIZE_U64 | 0x12) + +#define KVM_REG_PPC_PMC1 (KVM_REG_PPC | KVM_REG_SIZE_U32 | 0x18) +#define KVM_REG_PPC_PMC2 (KVM_REG_PPC | KVM_REG_SIZE_U32 | 0x19) +#define KVM_REG_PPC_PMC3 (KVM_REG_PPC | KVM_REG_SIZE_U32 | 0x1a) +#define KVM_REG_PPC_PMC4 (KVM_REG_PPC | KVM_REG_SIZE_U32 | 0x1b) +#define KVM_REG_PPC_PMC5 (KVM_REG_PPC | KVM_REG_SIZE_U32 | 0x1c) +#define KVM_REG_PPC_PMC6 (KVM_REG_PPC | KVM_REG_SIZE_U32 | 0x1d) +#define KVM_REG_PPC_PMC7 (KVM_REG_PPC | KVM_REG_SIZE_U32 | 0x1e) +#define KVM_REG_PPC_PMC8 (KVM_REG_PPC | KVM_REG_SIZE_U32 | 0x1f) #endif /* __LINUX_KVM_POWERPC_H */ diff --git a/arch/powerpc/include/asm/kvm_ppc.h b/arch/powerpc/include/asm/kvm_ppc.h index 3fb980d293e5..709f0ddae1f1 100644 --- a/arch/powerpc/include/asm/kvm_ppc.h +++ b/arch/powerpc/include/asm/kvm_ppc.h @@ -28,6 +28,7 @@ #include #include #include +#include #ifdef CONFIG_PPC_BOOK3S #include #else @@ -196,6 +197,35 @@ static inline u32 kvmppc_set_field(u64 inst, int msb, int lsb, int value) return r; } +union kvmppc_one_reg { + u32 wval; + u64 dval; +}; + +#define one_reg_size(id) \ + (1ul << (((id) & KVM_REG_SIZE_MASK) >> KVM_REG_SIZE_SHIFT)) + +#define get_reg_val(id, reg) ({ \ + union kvmppc_one_reg __u; \ + switch (one_reg_size(id)) { \ + case 4: __u.wval = (reg); break; \ + case 8: __u.dval = (reg); break; \ + default: BUG(); \ + } \ + __u; \ +}) + + +#define set_reg_val(id, val) ({ \ + u64 __v; \ + switch (one_reg_size(id)) { \ + case 4: __v = (val).wval; break; \ + case 8: __v = (val).dval; break; \ + default: BUG(); \ + } \ + __v; \ +}) + void kvmppc_core_get_sregs(struct kvm_vcpu *vcpu, struct kvm_sregs *sregs); int kvmppc_core_set_sregs(struct kvm_vcpu *vcpu, struct kvm_sregs *sregs); @@ -204,6 +234,8 @@ int kvmppc_set_sregs_ivor(struct kvm_vcpu *vcpu, struct kvm_sregs *sregs); int kvm_vcpu_ioctl_get_one_reg(struct kvm_vcpu *vcpu, struct kvm_one_reg *reg); int kvm_vcpu_ioctl_set_one_reg(struct kvm_vcpu *vcpu, struct kvm_one_reg *reg); +int kvmppc_get_one_reg(struct kvm_vcpu *vcpu, u64 id, union kvmppc_one_reg *); +int kvmppc_set_one_reg(struct kvm_vcpu *vcpu, u64 id, union kvmppc_one_reg *); void kvmppc_set_pid(struct kvm_vcpu *vcpu, u32 pid); diff --git a/arch/powerpc/kvm/book3s.c b/arch/powerpc/kvm/book3s.c index e94666566fa9..a5af28fc3a8f 100644 --- a/arch/powerpc/kvm/book3s.c +++ b/arch/powerpc/kvm/book3s.c @@ -485,6 +485,74 @@ int kvm_arch_vcpu_ioctl_set_fpu(struct kvm_vcpu *vcpu, struct kvm_fpu *fpu) return -ENOTSUPP; } +int kvm_vcpu_ioctl_get_one_reg(struct kvm_vcpu *vcpu, struct kvm_one_reg *reg) +{ + int r; + union kvmppc_one_reg val; + int size; + + size = one_reg_size(reg->id); + if (size > sizeof(val)) + return -EINVAL; + + r = kvmppc_get_one_reg(vcpu, reg->id, &val); + + if (r == -EINVAL) { + r = 0; + switch (reg->id) { + case KVM_REG_PPC_DAR: + val = get_reg_val(reg->id, vcpu->arch.shared->dar); + break; + case KVM_REG_PPC_DSISR: + val = get_reg_val(reg->id, vcpu->arch.shared->dsisr); + break; + default: + r = -EINVAL; + break; + } + } + if (r) + return r; + + if (copy_to_user((char __user *)(unsigned long)reg->addr, &val, size)) + r = -EFAULT; + + return r; +} + +int kvm_vcpu_ioctl_set_one_reg(struct kvm_vcpu *vcpu, struct kvm_one_reg *reg) +{ + int r; + union kvmppc_one_reg val; + int size; + + size = one_reg_size(reg->id); + if (size > sizeof(val)) + return -EINVAL; + + if (copy_from_user(&val, (char __user *)(unsigned long)reg->addr, size)) + return -EFAULT; + + r = kvmppc_set_one_reg(vcpu, reg->id, &val); + + if (r == -EINVAL) { + r = 0; + switch (reg->id) { + case KVM_REG_PPC_DAR: + vcpu->arch.shared->dar = set_reg_val(reg->id, val); + break; + case KVM_REG_PPC_DSISR: + vcpu->arch.shared->dsisr = set_reg_val(reg->id, val); + break; + default: + r = -EINVAL; + break; + } + } + + return r; +} + int kvm_arch_vcpu_ioctl_translate(struct kvm_vcpu *vcpu, struct kvm_translation *tr) { diff --git a/arch/powerpc/kvm/book3s_hv.c b/arch/powerpc/kvm/book3s_hv.c index c9ae3148c981..1cc6b77fa63d 100644 --- a/arch/powerpc/kvm/book3s_hv.c +++ b/arch/powerpc/kvm/book3s_hv.c @@ -544,36 +544,88 @@ int kvm_arch_vcpu_ioctl_set_sregs(struct kvm_vcpu *vcpu, return 0; } -int kvm_vcpu_ioctl_get_one_reg(struct kvm_vcpu *vcpu, struct kvm_one_reg *reg) +int kvmppc_get_one_reg(struct kvm_vcpu *vcpu, u64 id, union kvmppc_one_reg *val) { - int r = -EINVAL; + int r = 0; + long int i; - switch (reg->id) { + switch (id) { case KVM_REG_PPC_HIOR: - r = put_user(0, (u64 __user *)reg->addr); + *val = get_reg_val(id, 0); + break; + case KVM_REG_PPC_DABR: + *val = get_reg_val(id, vcpu->arch.dabr); + break; + case KVM_REG_PPC_DSCR: + *val = get_reg_val(id, vcpu->arch.dscr); + break; + case KVM_REG_PPC_PURR: + *val = get_reg_val(id, vcpu->arch.purr); + break; + case KVM_REG_PPC_SPURR: + *val = get_reg_val(id, vcpu->arch.spurr); + break; + case KVM_REG_PPC_AMR: + *val = get_reg_val(id, vcpu->arch.amr); + break; + case KVM_REG_PPC_UAMOR: + *val = get_reg_val(id, vcpu->arch.uamor); + break; + case KVM_REG_PPC_MMCR0 ... KVM_REG_PPC_MMCRA: + i = id - KVM_REG_PPC_MMCR0; + *val = get_reg_val(id, vcpu->arch.mmcr[i]); + break; + case KVM_REG_PPC_PMC1 ... KVM_REG_PPC_PMC8: + i = id - KVM_REG_PPC_PMC1; + *val = get_reg_val(id, vcpu->arch.pmc[i]); break; default: + r = -EINVAL; break; } return r; } -int kvm_vcpu_ioctl_set_one_reg(struct kvm_vcpu *vcpu, struct kvm_one_reg *reg) +int kvmppc_set_one_reg(struct kvm_vcpu *vcpu, u64 id, union kvmppc_one_reg *val) { - int r = -EINVAL; + int r = 0; + long int i; - switch (reg->id) { + switch (id) { case KVM_REG_PPC_HIOR: - { - u64 hior; /* Only allow this to be set to zero */ - r = get_user(hior, (u64 __user *)reg->addr); - if (!r && (hior != 0)) + if (set_reg_val(id, *val)) r = -EINVAL; break; - } + case KVM_REG_PPC_DABR: + vcpu->arch.dabr = set_reg_val(id, *val); + break; + case KVM_REG_PPC_DSCR: + vcpu->arch.dscr = set_reg_val(id, *val); + break; + case KVM_REG_PPC_PURR: + vcpu->arch.purr = set_reg_val(id, *val); + break; + case KVM_REG_PPC_SPURR: + vcpu->arch.spurr = set_reg_val(id, *val); + break; + case KVM_REG_PPC_AMR: + vcpu->arch.amr = set_reg_val(id, *val); + break; + case KVM_REG_PPC_UAMOR: + vcpu->arch.uamor = set_reg_val(id, *val); + break; + case KVM_REG_PPC_MMCR0 ... KVM_REG_PPC_MMCRA: + i = id - KVM_REG_PPC_MMCR0; + vcpu->arch.mmcr[i] = set_reg_val(id, *val); + break; + case KVM_REG_PPC_PMC1 ... KVM_REG_PPC_PMC8: + i = id - KVM_REG_PPC_PMC1; + vcpu->arch.pmc[i] = set_reg_val(id, *val); + break; default: + r = -EINVAL; break; } diff --git a/arch/powerpc/kvm/book3s_pr.c b/arch/powerpc/kvm/book3s_pr.c index bf3ec5d66d8c..c81109f3a376 100644 --- a/arch/powerpc/kvm/book3s_pr.c +++ b/arch/powerpc/kvm/book3s_pr.c @@ -945,34 +945,33 @@ int kvm_arch_vcpu_ioctl_set_sregs(struct kvm_vcpu *vcpu, return 0; } -int kvm_vcpu_ioctl_get_one_reg(struct kvm_vcpu *vcpu, struct kvm_one_reg *reg) +int kvmppc_get_one_reg(struct kvm_vcpu *vcpu, u64 id, union kvmppc_one_reg *val) { - int r = -EINVAL; + int r = 0; - switch (reg->id) { + switch (id) { case KVM_REG_PPC_HIOR: - r = copy_to_user((u64 __user *)(long)reg->addr, - &to_book3s(vcpu)->hior, sizeof(u64)); + *val = get_reg_val(id, to_book3s(vcpu)->hior); break; default: + r = -EINVAL; break; } return r; } -int kvm_vcpu_ioctl_set_one_reg(struct kvm_vcpu *vcpu, struct kvm_one_reg *reg) +int kvmppc_set_one_reg(struct kvm_vcpu *vcpu, u64 id, union kvmppc_one_reg *val) { - int r = -EINVAL; + int r = 0; - switch (reg->id) { + switch (id) { case KVM_REG_PPC_HIOR: - r = copy_from_user(&to_book3s(vcpu)->hior, - (u64 __user *)(long)reg->addr, sizeof(u64)); - if (!r) - to_book3s(vcpu)->hior_explicit = true; + to_book3s(vcpu)->hior = set_reg_val(id, *val); + to_book3s(vcpu)->hior_explicit = true; break; default: + r = -EINVAL; break; } -- cgit v1.2.3-59-g8ed1b From a8bd19ef4dd49f0eef86a4a8eb43d60f967236b8 Mon Sep 17 00:00:00 2001 From: Paul Mackerras Date: Tue, 25 Sep 2012 20:32:30 +0000 Subject: KVM: PPC: Book3S: Get/set guest FP regs using the GET/SET_ONE_REG interface This enables userspace to get and set all the guest floating-point state using the KVM_[GS]ET_ONE_REG ioctls. The floating-point state includes all of the traditional floating-point registers and the FPSCR (floating point status/control register), all the VMX/Altivec vector registers and the VSCR (vector status/control register), and on POWER7, the vector-scalar registers (note that each FP register is the high-order half of the corresponding VSR). Most of these are implemented in common Book 3S code, except for VSX on POWER7. Because HV and PR differ in how they store the FP and VSX registers on POWER7, the code for these cases is not common. On POWER7, the FP registers are the upper halves of the VSX registers vsr0 - vsr31. PR KVM stores vsr0 - vsr31 in two halves, with the upper halves in the arch.fpr[] array and the lower halves in the arch.vsr[] array, whereas HV KVM on POWER7 stores the whole VSX register in arch.vsr[]. Signed-off-by: Paul Mackerras [agraf: fix whitespace, vsx compilation] Signed-off-by: Alexander Graf --- Documentation/virtual/kvm/api.txt | 11 +++++++++ arch/powerpc/include/asm/kvm.h | 20 ++++++++++++++++ arch/powerpc/include/asm/kvm_ppc.h | 2 ++ arch/powerpc/kvm/book3s.c | 48 ++++++++++++++++++++++++++++++++++++++ arch/powerpc/kvm/book3s_hv.c | 42 +++++++++++++++++++++++++++++++++ arch/powerpc/kvm/book3s_pr.c | 26 +++++++++++++++++++++ 6 files changed, 149 insertions(+) (limited to 'arch/powerpc') diff --git a/Documentation/virtual/kvm/api.txt b/Documentation/virtual/kvm/api.txt index af9b7a06bcae..71dc7e2ec8eb 100644 --- a/Documentation/virtual/kvm/api.txt +++ b/Documentation/virtual/kvm/api.txt @@ -1759,6 +1759,17 @@ registers, find a list below: PPC | KVM_REG_PPC_PMC6 | 32 PPC | KVM_REG_PPC_PMC7 | 32 PPC | KVM_REG_PPC_PMC8 | 32 + PPC | KVM_REG_PPC_FPR0 | 64 + ... + PPC | KVM_REG_PPC_FPR31 | 64 + PPC | KVM_REG_PPC_VR0 | 128 + ... + PPC | KVM_REG_PPC_VR31 | 128 + PPC | KVM_REG_PPC_VSR0 | 128 + ... + PPC | KVM_REG_PPC_VSR31 | 128 + PPC | KVM_REG_PPC_FPSCR | 64 + PPC | KVM_REG_PPC_VSCR | 32 4.69 KVM_GET_ONE_REG diff --git a/arch/powerpc/include/asm/kvm.h b/arch/powerpc/include/asm/kvm.h index 9557576a5325..1466975129c7 100644 --- a/arch/powerpc/include/asm/kvm.h +++ b/arch/powerpc/include/asm/kvm.h @@ -360,4 +360,24 @@ struct kvm_book3e_206_tlb_params { #define KVM_REG_PPC_PMC7 (KVM_REG_PPC | KVM_REG_SIZE_U32 | 0x1e) #define KVM_REG_PPC_PMC8 (KVM_REG_PPC | KVM_REG_SIZE_U32 | 0x1f) +/* 32 floating-point registers */ +#define KVM_REG_PPC_FPR0 (KVM_REG_PPC | KVM_REG_SIZE_U64 | 0x20) +#define KVM_REG_PPC_FPR(n) (KVM_REG_PPC_FPR0 + (n)) +#define KVM_REG_PPC_FPR31 (KVM_REG_PPC | KVM_REG_SIZE_U64 | 0x3f) + +/* 32 VMX/Altivec vector registers */ +#define KVM_REG_PPC_VR0 (KVM_REG_PPC | KVM_REG_SIZE_U128 | 0x40) +#define KVM_REG_PPC_VR(n) (KVM_REG_PPC_VR0 + (n)) +#define KVM_REG_PPC_VR31 (KVM_REG_PPC | KVM_REG_SIZE_U128 | 0x5f) + +/* 32 double-width FP registers for VSX */ +/* High-order halves overlap with FP regs */ +#define KVM_REG_PPC_VSR0 (KVM_REG_PPC | KVM_REG_SIZE_U128 | 0x60) +#define KVM_REG_PPC_VSR(n) (KVM_REG_PPC_VSR0 + (n)) +#define KVM_REG_PPC_VSR31 (KVM_REG_PPC | KVM_REG_SIZE_U128 | 0x7f) + +/* FP and vector status/control registers */ +#define KVM_REG_PPC_FPSCR (KVM_REG_PPC | KVM_REG_SIZE_U64 | 0x80) +#define KVM_REG_PPC_VSCR (KVM_REG_PPC | KVM_REG_SIZE_U32 | 0x81) + #endif /* __LINUX_KVM_POWERPC_H */ diff --git a/arch/powerpc/include/asm/kvm_ppc.h b/arch/powerpc/include/asm/kvm_ppc.h index 709f0ddae1f1..51604a16c8a5 100644 --- a/arch/powerpc/include/asm/kvm_ppc.h +++ b/arch/powerpc/include/asm/kvm_ppc.h @@ -200,6 +200,8 @@ static inline u32 kvmppc_set_field(u64 inst, int msb, int lsb, int value) union kvmppc_one_reg { u32 wval; u64 dval; + vector128 vval; + u64 vsxval[2]; }; #define one_reg_size(id) \ diff --git a/arch/powerpc/kvm/book3s.c b/arch/powerpc/kvm/book3s.c index a5af28fc3a8f..a4b645285240 100644 --- a/arch/powerpc/kvm/book3s.c +++ b/arch/powerpc/kvm/book3s.c @@ -490,6 +490,7 @@ int kvm_vcpu_ioctl_get_one_reg(struct kvm_vcpu *vcpu, struct kvm_one_reg *reg) int r; union kvmppc_one_reg val; int size; + long int i; size = one_reg_size(reg->id); if (size > sizeof(val)) @@ -506,6 +507,29 @@ int kvm_vcpu_ioctl_get_one_reg(struct kvm_vcpu *vcpu, struct kvm_one_reg *reg) case KVM_REG_PPC_DSISR: val = get_reg_val(reg->id, vcpu->arch.shared->dsisr); break; + case KVM_REG_PPC_FPR0 ... KVM_REG_PPC_FPR31: + i = reg->id - KVM_REG_PPC_FPR0; + val = get_reg_val(reg->id, vcpu->arch.fpr[i]); + break; + case KVM_REG_PPC_FPSCR: + val = get_reg_val(reg->id, vcpu->arch.fpscr); + break; +#ifdef CONFIG_ALTIVEC + case KVM_REG_PPC_VR0 ... KVM_REG_PPC_VR31: + if (!cpu_has_feature(CPU_FTR_ALTIVEC)) { + r = -ENXIO; + break; + } + val.vval = vcpu->arch.vr[reg->id - KVM_REG_PPC_VR0]; + break; + case KVM_REG_PPC_VSCR: + if (!cpu_has_feature(CPU_FTR_ALTIVEC)) { + r = -ENXIO; + break; + } + val = get_reg_val(reg->id, vcpu->arch.vscr.u[3]); + break; +#endif /* CONFIG_ALTIVEC */ default: r = -EINVAL; break; @@ -525,6 +549,7 @@ int kvm_vcpu_ioctl_set_one_reg(struct kvm_vcpu *vcpu, struct kvm_one_reg *reg) int r; union kvmppc_one_reg val; int size; + long int i; size = one_reg_size(reg->id); if (size > sizeof(val)) @@ -544,6 +569,29 @@ int kvm_vcpu_ioctl_set_one_reg(struct kvm_vcpu *vcpu, struct kvm_one_reg *reg) case KVM_REG_PPC_DSISR: vcpu->arch.shared->dsisr = set_reg_val(reg->id, val); break; + case KVM_REG_PPC_FPR0 ... KVM_REG_PPC_FPR31: + i = reg->id - KVM_REG_PPC_FPR0; + vcpu->arch.fpr[i] = set_reg_val(reg->id, val); + break; + case KVM_REG_PPC_FPSCR: + vcpu->arch.fpscr = set_reg_val(reg->id, val); + break; +#ifdef CONFIG_ALTIVEC + case KVM_REG_PPC_VR0 ... KVM_REG_PPC_VR31: + if (!cpu_has_feature(CPU_FTR_ALTIVEC)) { + r = -ENXIO; + break; + } + vcpu->arch.vr[reg->id - KVM_REG_PPC_VR0] = val.vval; + break; + case KVM_REG_PPC_VSCR: + if (!cpu_has_feature(CPU_FTR_ALTIVEC)) { + r = -ENXIO; + break; + } + vcpu->arch.vscr.u[3] = set_reg_val(reg->id, val); + break; +#endif /* CONFIG_ALTIVEC */ default: r = -EINVAL; break; diff --git a/arch/powerpc/kvm/book3s_hv.c b/arch/powerpc/kvm/book3s_hv.c index 1cc6b77fa63d..94ec0e30969d 100644 --- a/arch/powerpc/kvm/book3s_hv.c +++ b/arch/powerpc/kvm/book3s_hv.c @@ -579,6 +579,27 @@ int kvmppc_get_one_reg(struct kvm_vcpu *vcpu, u64 id, union kvmppc_one_reg *val) i = id - KVM_REG_PPC_PMC1; *val = get_reg_val(id, vcpu->arch.pmc[i]); break; +#ifdef CONFIG_VSX + case KVM_REG_PPC_FPR0 ... KVM_REG_PPC_FPR31: + if (cpu_has_feature(CPU_FTR_VSX)) { + /* VSX => FP reg i is stored in arch.vsr[2*i] */ + long int i = id - KVM_REG_PPC_FPR0; + *val = get_reg_val(id, vcpu->arch.vsr[2 * i]); + } else { + /* let generic code handle it */ + r = -EINVAL; + } + break; + case KVM_REG_PPC_VSR0 ... KVM_REG_PPC_VSR31: + if (cpu_has_feature(CPU_FTR_VSX)) { + long int i = id - KVM_REG_PPC_VSR0; + val->vsxval[0] = vcpu->arch.vsr[2 * i]; + val->vsxval[1] = vcpu->arch.vsr[2 * i + 1]; + } else { + r = -ENXIO; + } + break; +#endif /* CONFIG_VSX */ default: r = -EINVAL; break; @@ -624,6 +645,27 @@ int kvmppc_set_one_reg(struct kvm_vcpu *vcpu, u64 id, union kvmppc_one_reg *val) i = id - KVM_REG_PPC_PMC1; vcpu->arch.pmc[i] = set_reg_val(id, *val); break; +#ifdef CONFIG_VSX + case KVM_REG_PPC_FPR0 ... KVM_REG_PPC_FPR31: + if (cpu_has_feature(CPU_FTR_VSX)) { + /* VSX => FP reg i is stored in arch.vsr[2*i] */ + long int i = id - KVM_REG_PPC_FPR0; + vcpu->arch.vsr[2 * i] = set_reg_val(id, *val); + } else { + /* let generic code handle it */ + r = -EINVAL; + } + break; + case KVM_REG_PPC_VSR0 ... KVM_REG_PPC_VSR31: + if (cpu_has_feature(CPU_FTR_VSX)) { + long int i = id - KVM_REG_PPC_VSR0; + vcpu->arch.vsr[2 * i] = val->vsxval[0]; + vcpu->arch.vsr[2 * i + 1] = val->vsxval[1]; + } else { + r = -ENXIO; + } + break; +#endif /* CONFIG_VSX */ default: r = -EINVAL; break; diff --git a/arch/powerpc/kvm/book3s_pr.c b/arch/powerpc/kvm/book3s_pr.c index c81109f3a376..b853696b6d8e 100644 --- a/arch/powerpc/kvm/book3s_pr.c +++ b/arch/powerpc/kvm/book3s_pr.c @@ -953,6 +953,19 @@ int kvmppc_get_one_reg(struct kvm_vcpu *vcpu, u64 id, union kvmppc_one_reg *val) case KVM_REG_PPC_HIOR: *val = get_reg_val(id, to_book3s(vcpu)->hior); break; +#ifdef CONFIG_VSX + case KVM_REG_PPC_VSR0 ... KVM_REG_PPC_VSR31: { + long int i = id - KVM_REG_PPC_VSR0; + + if (!cpu_has_feature(CPU_FTR_VSX)) { + r = -ENXIO; + break; + } + val->vsxval[0] = vcpu->arch.fpr[i]; + val->vsxval[1] = vcpu->arch.vsr[i]; + break; + } +#endif /* CONFIG_VSX */ default: r = -EINVAL; break; @@ -970,6 +983,19 @@ int kvmppc_set_one_reg(struct kvm_vcpu *vcpu, u64 id, union kvmppc_one_reg *val) to_book3s(vcpu)->hior = set_reg_val(id, *val); to_book3s(vcpu)->hior_explicit = true; break; +#ifdef CONFIG_VSX + case KVM_REG_PPC_VSR0 ... KVM_REG_PPC_VSR31: { + long int i = id - KVM_REG_PPC_VSR0; + + if (!cpu_has_feature(CPU_FTR_VSX)) { + r = -ENXIO; + break; + } + vcpu->arch.fpr[i] = val->vsxval[0]; + vcpu->arch.vsr[i] = val->vsxval[1]; + break; + } +#endif /* CONFIG_VSX */ default: r = -EINVAL; break; -- cgit v1.2.3-59-g8ed1b From 55b665b0263ae88a776071306ef1eee4b769016b Mon Sep 17 00:00:00 2001 From: Paul Mackerras Date: Tue, 25 Sep 2012 20:33:06 +0000 Subject: KVM: PPC: Book3S HV: Provide a way for userspace to get/set per-vCPU areas The PAPR paravirtualization interface lets guests register three different types of per-vCPU buffer areas in its memory for communication with the hypervisor. These are called virtual processor areas (VPAs). Currently the hypercalls to register and unregister VPAs are handled by KVM in the kernel, and userspace has no way to know about or save and restore these registrations across a migration. This adds "register" codes for these three areas that userspace can use with the KVM_GET/SET_ONE_REG ioctls to see what addresses have been registered, and to register or unregister them. This will be needed for guest hibernation and migration, and is also needed so that userspace can unregister them on reset (otherwise we corrupt guest memory after reboot by writing to the VPAs registered by the previous kernel). The "register" for the VPA is a 64-bit value containing the address, since the length of the VPA is fixed. The "registers" for the SLB shadow buffer and dispatch trace log (DTL) are 128 bits long, consisting of the guest physical address in the high (first) 64 bits and the length in the low 64 bits. This also fixes a bug where we were calling init_vpa unconditionally, leading to an oops when unregistering the VPA. Signed-off-by: Paul Mackerras Signed-off-by: Alexander Graf --- Documentation/virtual/kvm/api.txt | 3 ++ arch/powerpc/include/asm/kvm.h | 6 ++++ arch/powerpc/include/asm/kvm_ppc.h | 4 +++ arch/powerpc/kvm/book3s_hv.c | 64 +++++++++++++++++++++++++++++++++++++- 4 files changed, 76 insertions(+), 1 deletion(-) (limited to 'arch/powerpc') diff --git a/Documentation/virtual/kvm/api.txt b/Documentation/virtual/kvm/api.txt index 71dc7e2ec8eb..e726d76eeebb 100644 --- a/Documentation/virtual/kvm/api.txt +++ b/Documentation/virtual/kvm/api.txt @@ -1770,6 +1770,9 @@ registers, find a list below: PPC | KVM_REG_PPC_VSR31 | 128 PPC | KVM_REG_PPC_FPSCR | 64 PPC | KVM_REG_PPC_VSCR | 32 + PPC | KVM_REG_PPC_VPA_ADDR | 64 + PPC | KVM_REG_PPC_VPA_SLB | 128 + PPC | KVM_REG_PPC_VPA_DTL | 128 4.69 KVM_GET_ONE_REG diff --git a/arch/powerpc/include/asm/kvm.h b/arch/powerpc/include/asm/kvm.h index 1466975129c7..b89ae4db45ce 100644 --- a/arch/powerpc/include/asm/kvm.h +++ b/arch/powerpc/include/asm/kvm.h @@ -380,4 +380,10 @@ struct kvm_book3e_206_tlb_params { #define KVM_REG_PPC_FPSCR (KVM_REG_PPC | KVM_REG_SIZE_U64 | 0x80) #define KVM_REG_PPC_VSCR (KVM_REG_PPC | KVM_REG_SIZE_U32 | 0x81) +/* Virtual processor areas */ +/* For SLB & DTL, address in high (first) half, length in low half */ +#define KVM_REG_PPC_VPA_ADDR (KVM_REG_PPC | KVM_REG_SIZE_U64 | 0x82) +#define KVM_REG_PPC_VPA_SLB (KVM_REG_PPC | KVM_REG_SIZE_U128 | 0x83) +#define KVM_REG_PPC_VPA_DTL (KVM_REG_PPC | KVM_REG_SIZE_U128 | 0x84) + #endif /* __LINUX_KVM_POWERPC_H */ diff --git a/arch/powerpc/include/asm/kvm_ppc.h b/arch/powerpc/include/asm/kvm_ppc.h index 51604a16c8a5..609cca3e9426 100644 --- a/arch/powerpc/include/asm/kvm_ppc.h +++ b/arch/powerpc/include/asm/kvm_ppc.h @@ -202,6 +202,10 @@ union kvmppc_one_reg { u64 dval; vector128 vval; u64 vsxval[2]; + struct { + u64 addr; + u64 length; + } vpaval; }; #define one_reg_size(id) \ diff --git a/arch/powerpc/kvm/book3s_hv.c b/arch/powerpc/kvm/book3s_hv.c index 94ec0e30969d..9a15da76e56b 100644 --- a/arch/powerpc/kvm/book3s_hv.c +++ b/arch/powerpc/kvm/book3s_hv.c @@ -143,6 +143,22 @@ static void init_vpa(struct kvm_vcpu *vcpu, struct lppaca *vpa) vpa->yield_count = 1; } +static int set_vpa(struct kvm_vcpu *vcpu, struct kvmppc_vpa *v, + unsigned long addr, unsigned long len) +{ + /* check address is cacheline aligned */ + if (addr & (L1_CACHE_BYTES - 1)) + return -EINVAL; + spin_lock(&vcpu->arch.vpa_update_lock); + if (v->next_gpa != addr || v->len != len) { + v->next_gpa = addr; + v->len = addr ? len : 0; + v->update_pending = 1; + } + spin_unlock(&vcpu->arch.vpa_update_lock); + return 0; +} + /* Length for a per-processor buffer is passed in at offset 4 in the buffer */ struct reg_vpa { u32 dummy; @@ -321,7 +337,8 @@ static void kvmppc_update_vpas(struct kvm_vcpu *vcpu) spin_lock(&vcpu->arch.vpa_update_lock); if (vcpu->arch.vpa.update_pending) { kvmppc_update_vpa(vcpu, &vcpu->arch.vpa); - init_vpa(vcpu, vcpu->arch.vpa.pinned_addr); + if (vcpu->arch.vpa.pinned_addr) + init_vpa(vcpu, vcpu->arch.vpa.pinned_addr); } if (vcpu->arch.dtl.update_pending) { kvmppc_update_vpa(vcpu, &vcpu->arch.dtl); @@ -600,6 +617,23 @@ int kvmppc_get_one_reg(struct kvm_vcpu *vcpu, u64 id, union kvmppc_one_reg *val) } break; #endif /* CONFIG_VSX */ + case KVM_REG_PPC_VPA_ADDR: + spin_lock(&vcpu->arch.vpa_update_lock); + *val = get_reg_val(id, vcpu->arch.vpa.next_gpa); + spin_unlock(&vcpu->arch.vpa_update_lock); + break; + case KVM_REG_PPC_VPA_SLB: + spin_lock(&vcpu->arch.vpa_update_lock); + val->vpaval.addr = vcpu->arch.slb_shadow.next_gpa; + val->vpaval.length = vcpu->arch.slb_shadow.len; + spin_unlock(&vcpu->arch.vpa_update_lock); + break; + case KVM_REG_PPC_VPA_DTL: + spin_lock(&vcpu->arch.vpa_update_lock); + val->vpaval.addr = vcpu->arch.dtl.next_gpa; + val->vpaval.length = vcpu->arch.dtl.len; + spin_unlock(&vcpu->arch.vpa_update_lock); + break; default: r = -EINVAL; break; @@ -612,6 +646,7 @@ int kvmppc_set_one_reg(struct kvm_vcpu *vcpu, u64 id, union kvmppc_one_reg *val) { int r = 0; long int i; + unsigned long addr, len; switch (id) { case KVM_REG_PPC_HIOR: @@ -666,6 +701,33 @@ int kvmppc_set_one_reg(struct kvm_vcpu *vcpu, u64 id, union kvmppc_one_reg *val) } break; #endif /* CONFIG_VSX */ + case KVM_REG_PPC_VPA_ADDR: + addr = set_reg_val(id, *val); + r = -EINVAL; + if (!addr && (vcpu->arch.slb_shadow.next_gpa || + vcpu->arch.dtl.next_gpa)) + break; + r = set_vpa(vcpu, &vcpu->arch.vpa, addr, sizeof(struct lppaca)); + break; + case KVM_REG_PPC_VPA_SLB: + addr = val->vpaval.addr; + len = val->vpaval.length; + r = -EINVAL; + if (addr && !vcpu->arch.vpa.next_gpa) + break; + r = set_vpa(vcpu, &vcpu->arch.slb_shadow, addr, len); + break; + case KVM_REG_PPC_VPA_DTL: + addr = val->vpaval.addr; + len = val->vpaval.length; + r = -EINVAL; + if (len < sizeof(struct dtl_entry)) + break; + if (addr && !vcpu->arch.vpa.next_gpa) + break; + len -= len % sizeof(struct dtl_entry); + r = set_vpa(vcpu, &vcpu->arch.dtl, addr, len); + break; default: r = -EINVAL; break; -- cgit v1.2.3-59-g8ed1b From 12ecd9570d8941c15602a11725ec9b0ede48d6c2 Mon Sep 17 00:00:00 2001 From: Julia Lawall Date: Sat, 4 Aug 2012 23:52:33 +0000 Subject: arch/powerpc/kvm/e500_tlb.c: fix error return code Convert a 0 error return code to a negative one, as returned elsewhere in the function. A new label is also added to avoid freeing things that are known to not yet be allocated. A simplified version of the semantic match that finds the first problem is as follows: (http://coccinelle.lip6.fr/) // @@ identifier ret; expression e,e1,e2,e3,e4,x; @@ ( if (\(ret != 0\|ret < 0\) || ...) { ... return ...; } | ret = 0 ) ... when != ret = e1 *x = \(kmalloc\|kzalloc\|kcalloc\|devm_kzalloc\|ioremap\|ioremap_nocache\|devm_ioremap\|devm_ioremap_nocache\)(...); ... when != x = e2 when != ret = e3 *if (x == NULL || ...) { ... when != ret = e4 * return ret; } // Signed-off-by: Julia Lawall Signed-off-by: Alexander Graf --- arch/powerpc/kvm/e500_tlb.c | 19 +++++++++++++------ 1 file changed, 13 insertions(+), 6 deletions(-) (limited to 'arch/powerpc') diff --git a/arch/powerpc/kvm/e500_tlb.c b/arch/powerpc/kvm/e500_tlb.c index 641f97847b95..c73389477d17 100644 --- a/arch/powerpc/kvm/e500_tlb.c +++ b/arch/powerpc/kvm/e500_tlb.c @@ -1233,21 +1233,27 @@ int kvm_vcpu_ioctl_config_tlb(struct kvm_vcpu *vcpu, } virt = vmap(pages, num_pages, VM_MAP, PAGE_KERNEL); - if (!virt) + if (!virt) { + ret = -ENOMEM; goto err_put_page; + } privs[0] = kzalloc(sizeof(struct tlbe_priv) * params.tlb_sizes[0], GFP_KERNEL); privs[1] = kzalloc(sizeof(struct tlbe_priv) * params.tlb_sizes[1], GFP_KERNEL); - if (!privs[0] || !privs[1]) - goto err_put_page; + if (!privs[0] || !privs[1]) { + ret = -ENOMEM; + goto err_privs; + } g2h_bitmap = kzalloc(sizeof(u64) * params.tlb_sizes[1], GFP_KERNEL); - if (!g2h_bitmap) - goto err_put_page; + if (!g2h_bitmap) { + ret = -ENOMEM; + goto err_privs; + } free_gtlb(vcpu_e500); @@ -1287,10 +1293,11 @@ int kvm_vcpu_ioctl_config_tlb(struct kvm_vcpu *vcpu, kvmppc_recalc_tlb1map_range(vcpu_e500); return 0; -err_put_page: +err_privs: kfree(privs[0]); kfree(privs[1]); +err_put_page: for (i = 0; i < num_pages; i++) put_page(pages[i]); -- cgit v1.2.3-59-g8ed1b From 8ca40a70a70988c0bdea106c894843f763ca2989 Mon Sep 17 00:00:00 2001 From: Christoffer Dall Date: Sun, 14 Oct 2012 23:10:18 -0400 Subject: KVM: Take kvm instead of vcpu to mmu_notifier_retry The mmu_notifier_retry is not specific to any vcpu (and never will be) so only take struct kvm as a parameter. The motivation is the ARM mmu code that needs to call this from somewhere where we long let go of the vcpu pointer. Signed-off-by: Christoffer Dall Signed-off-by: Avi Kivity --- arch/powerpc/kvm/book3s_64_mmu_hv.c | 2 +- arch/powerpc/kvm/book3s_hv_rm_mmu.c | 2 +- arch/x86/kvm/mmu.c | 4 ++-- arch/x86/kvm/paging_tmpl.h | 2 +- include/linux/kvm_host.h | 6 +++--- 5 files changed, 8 insertions(+), 8 deletions(-) (limited to 'arch/powerpc') diff --git a/arch/powerpc/kvm/book3s_64_mmu_hv.c b/arch/powerpc/kvm/book3s_64_mmu_hv.c index 7a4aae99ac5b..2a89a36e7263 100644 --- a/arch/powerpc/kvm/book3s_64_mmu_hv.c +++ b/arch/powerpc/kvm/book3s_64_mmu_hv.c @@ -710,7 +710,7 @@ int kvmppc_book3s_hv_page_fault(struct kvm_run *run, struct kvm_vcpu *vcpu, /* Check if we might have been invalidated; let the guest retry if so */ ret = RESUME_GUEST; - if (mmu_notifier_retry(vcpu, mmu_seq)) { + if (mmu_notifier_retry(vcpu->kvm, mmu_seq)) { unlock_rmap(rmap); goto out_unlock; } diff --git a/arch/powerpc/kvm/book3s_hv_rm_mmu.c b/arch/powerpc/kvm/book3s_hv_rm_mmu.c index 9955216477a4..5e06e3153888 100644 --- a/arch/powerpc/kvm/book3s_hv_rm_mmu.c +++ b/arch/powerpc/kvm/book3s_hv_rm_mmu.c @@ -297,7 +297,7 @@ long kvmppc_h_enter(struct kvm_vcpu *vcpu, unsigned long flags, lock_rmap(rmap); /* Check for pending invalidations under the rmap chain lock */ if (kvm->arch.using_mmu_notifiers && - mmu_notifier_retry(vcpu, mmu_seq)) { + mmu_notifier_retry(vcpu->kvm, mmu_seq)) { /* inval in progress, write a non-present HPTE */ pteh |= HPTE_V_ABSENT; pteh &= ~HPTE_V_VALID; diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c index 3d5ca7939380..6f78fa3a4706 100644 --- a/arch/x86/kvm/mmu.c +++ b/arch/x86/kvm/mmu.c @@ -2886,7 +2886,7 @@ static int nonpaging_map(struct kvm_vcpu *vcpu, gva_t v, u32 error_code, return r; spin_lock(&vcpu->kvm->mmu_lock); - if (mmu_notifier_retry(vcpu, mmu_seq)) + if (mmu_notifier_retry(vcpu->kvm, mmu_seq)) goto out_unlock; kvm_mmu_free_some_pages(vcpu); if (likely(!force_pt_level)) @@ -3355,7 +3355,7 @@ static int tdp_page_fault(struct kvm_vcpu *vcpu, gva_t gpa, u32 error_code, return r; spin_lock(&vcpu->kvm->mmu_lock); - if (mmu_notifier_retry(vcpu, mmu_seq)) + if (mmu_notifier_retry(vcpu->kvm, mmu_seq)) goto out_unlock; kvm_mmu_free_some_pages(vcpu); if (likely(!force_pt_level)) diff --git a/arch/x86/kvm/paging_tmpl.h b/arch/x86/kvm/paging_tmpl.h index f887e4cfc1fe..d17decaf1db9 100644 --- a/arch/x86/kvm/paging_tmpl.h +++ b/arch/x86/kvm/paging_tmpl.h @@ -565,7 +565,7 @@ static int FNAME(page_fault)(struct kvm_vcpu *vcpu, gva_t addr, u32 error_code, return r; spin_lock(&vcpu->kvm->mmu_lock); - if (mmu_notifier_retry(vcpu, mmu_seq)) + if (mmu_notifier_retry(vcpu->kvm, mmu_seq)) goto out_unlock; kvm_mmu_audit(vcpu, AUDIT_PRE_PAGE_FAULT); diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h index 6afc5be2615e..82e2c783a21e 100644 --- a/include/linux/kvm_host.h +++ b/include/linux/kvm_host.h @@ -841,9 +841,9 @@ extern struct kvm_stats_debugfs_item debugfs_entries[]; extern struct dentry *kvm_debugfs_dir; #if defined(CONFIG_MMU_NOTIFIER) && defined(KVM_ARCH_WANT_MMU_NOTIFIER) -static inline int mmu_notifier_retry(struct kvm_vcpu *vcpu, unsigned long mmu_seq) +static inline int mmu_notifier_retry(struct kvm *kvm, unsigned long mmu_seq) { - if (unlikely(vcpu->kvm->mmu_notifier_count)) + if (unlikely(kvm->mmu_notifier_count)) return 1; /* * Ensure the read of mmu_notifier_count happens before the read @@ -856,7 +856,7 @@ static inline int mmu_notifier_retry(struct kvm_vcpu *vcpu, unsigned long mmu_se * can't rely on kvm->mmu_lock to keep things ordered. */ smp_rmb(); - if (vcpu->kvm->mmu_notifier_seq != mmu_seq) + if (kvm->mmu_notifier_seq != mmu_seq) return 1; return 0; } -- cgit v1.2.3-59-g8ed1b From 81c52c56e2b43589091ee29038bcf793d3f184ab Mon Sep 17 00:00:00 2001 From: Xiao Guangrong Date: Tue, 16 Oct 2012 20:10:59 +0800 Subject: KVM: do not treat noslot pfn as a error pfn This patch filters noslot pfn out from error pfns based on Marcelo comment: noslot pfn is not a error pfn After this patch, - is_noslot_pfn indicates that the gfn is not in slot - is_error_pfn indicates that the gfn is in slot but the error is occurred when translate the gfn to pfn - is_error_noslot_pfn indicates that the pfn either it is error pfns or it is noslot pfn And is_invalid_pfn can be removed, it makes the code more clean Signed-off-by: Xiao Guangrong Signed-off-by: Marcelo Tosatti --- arch/powerpc/kvm/book3s_32_mmu_host.c | 2 +- arch/powerpc/kvm/book3s_64_mmu_host.c | 2 +- arch/powerpc/kvm/e500_tlb.c | 2 +- arch/x86/kvm/mmu.c | 4 ++-- arch/x86/kvm/paging_tmpl.h | 2 +- arch/x86/kvm/x86.c | 2 +- include/linux/kvm_host.h | 28 ++++++++++++++++++++-------- virt/kvm/iommu.c | 4 ++-- virt/kvm/kvm_main.c | 6 +++--- 9 files changed, 32 insertions(+), 20 deletions(-) (limited to 'arch/powerpc') diff --git a/arch/powerpc/kvm/book3s_32_mmu_host.c b/arch/powerpc/kvm/book3s_32_mmu_host.c index d1107a9b5d13..00e619bf608e 100644 --- a/arch/powerpc/kvm/book3s_32_mmu_host.c +++ b/arch/powerpc/kvm/book3s_32_mmu_host.c @@ -155,7 +155,7 @@ int kvmppc_mmu_map_page(struct kvm_vcpu *vcpu, struct kvmppc_pte *orig_pte) /* Get host physical address for gpa */ hpaddr = kvmppc_gfn_to_pfn(vcpu, orig_pte->raddr >> PAGE_SHIFT); - if (is_error_pfn(hpaddr)) { + if (is_error_noslot_pfn(hpaddr)) { printk(KERN_INFO "Couldn't get guest page for gfn %lx!\n", orig_pte->eaddr); r = -EINVAL; diff --git a/arch/powerpc/kvm/book3s_64_mmu_host.c b/arch/powerpc/kvm/book3s_64_mmu_host.c index d0205a545a81..ead58e317294 100644 --- a/arch/powerpc/kvm/book3s_64_mmu_host.c +++ b/arch/powerpc/kvm/book3s_64_mmu_host.c @@ -93,7 +93,7 @@ int kvmppc_mmu_map_page(struct kvm_vcpu *vcpu, struct kvmppc_pte *orig_pte) /* Get host physical address for gpa */ hpaddr = kvmppc_gfn_to_pfn(vcpu, orig_pte->raddr >> PAGE_SHIFT); - if (is_error_pfn(hpaddr)) { + if (is_error_noslot_pfn(hpaddr)) { printk(KERN_INFO "Couldn't get guest page for gfn %lx!\n", orig_pte->eaddr); r = -EINVAL; goto out; diff --git a/arch/powerpc/kvm/e500_tlb.c b/arch/powerpc/kvm/e500_tlb.c index c73389477d17..6305ee692ef7 100644 --- a/arch/powerpc/kvm/e500_tlb.c +++ b/arch/powerpc/kvm/e500_tlb.c @@ -524,7 +524,7 @@ static inline void kvmppc_e500_shadow_map(struct kvmppc_vcpu_e500 *vcpu_e500, if (likely(!pfnmap)) { unsigned long tsize_pages = 1 << (tsize + 10 - PAGE_SHIFT); pfn = gfn_to_pfn_memslot(slot, gfn); - if (is_error_pfn(pfn)) { + if (is_error_noslot_pfn(pfn)) { printk(KERN_ERR "Couldn't get real page for gfn %lx!\n", (long)gfn); return; diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c index aabb1289ff04..b875a9ed9b8e 100644 --- a/arch/x86/kvm/mmu.c +++ b/arch/x86/kvm/mmu.c @@ -2699,7 +2699,7 @@ static void transparent_hugepage_adjust(struct kvm_vcpu *vcpu, * PT_PAGE_TABLE_LEVEL and there would be no adjustment done * here. */ - if (!is_error_pfn(pfn) && !kvm_is_mmio_pfn(pfn) && + if (!is_error_noslot_pfn(pfn) && !kvm_is_mmio_pfn(pfn) && level == PT_PAGE_TABLE_LEVEL && PageTransCompound(pfn_to_page(pfn)) && !has_wrprotected_page(vcpu->kvm, gfn, PT_DIRECTORY_LEVEL)) { @@ -2733,7 +2733,7 @@ static bool handle_abnormal_pfn(struct kvm_vcpu *vcpu, gva_t gva, gfn_t gfn, bool ret = true; /* The pfn is invalid, report the error! */ - if (unlikely(is_invalid_pfn(pfn))) { + if (unlikely(is_error_pfn(pfn))) { *ret_val = kvm_handle_bad_page(vcpu, gfn, pfn); goto exit; } diff --git a/arch/x86/kvm/paging_tmpl.h b/arch/x86/kvm/paging_tmpl.h index d17decaf1db9..891eb6d93b8b 100644 --- a/arch/x86/kvm/paging_tmpl.h +++ b/arch/x86/kvm/paging_tmpl.h @@ -323,7 +323,7 @@ FNAME(prefetch_gpte)(struct kvm_vcpu *vcpu, struct kvm_mmu_page *sp, protect_clean_gpte(&pte_access, gpte); pfn = pte_prefetch_gfn_to_pfn(vcpu, gfn, no_dirty_log && (pte_access & ACC_WRITE_MASK)); - if (is_invalid_pfn(pfn)) + if (is_error_pfn(pfn)) return false; /* diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index 6e5f069bee30..49fa1f0e59bd 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -4504,7 +4504,7 @@ static bool reexecute_instruction(struct kvm_vcpu *vcpu, gva_t gva) * instruction -> ... */ pfn = gfn_to_pfn(vcpu->kvm, gpa_to_gfn(gpa)); - if (!is_error_pfn(pfn)) { + if (!is_error_noslot_pfn(pfn)) { kvm_release_pfn_clean(pfn); return true; } diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h index 82e2c783a21e..99a47627e046 100644 --- a/include/linux/kvm_host.h +++ b/include/linux/kvm_host.h @@ -58,28 +58,40 @@ /* * For the normal pfn, the highest 12 bits should be zero, - * so we can mask these bits to indicate the error. + * so we can mask bit 62 ~ bit 52 to indicate the error pfn, + * mask bit 63 to indicate the noslot pfn. */ -#define KVM_PFN_ERR_MASK (0xfffULL << 52) +#define KVM_PFN_ERR_MASK (0x7ffULL << 52) +#define KVM_PFN_ERR_NOSLOT_MASK (0xfffULL << 52) +#define KVM_PFN_NOSLOT (0x1ULL << 63) #define KVM_PFN_ERR_FAULT (KVM_PFN_ERR_MASK) #define KVM_PFN_ERR_HWPOISON (KVM_PFN_ERR_MASK + 1) -#define KVM_PFN_ERR_BAD (KVM_PFN_ERR_MASK + 2) -#define KVM_PFN_ERR_RO_FAULT (KVM_PFN_ERR_MASK + 3) +#define KVM_PFN_ERR_RO_FAULT (KVM_PFN_ERR_MASK + 2) +/* + * error pfns indicate that the gfn is in slot but faild to + * translate it to pfn on host. + */ static inline bool is_error_pfn(pfn_t pfn) { return !!(pfn & KVM_PFN_ERR_MASK); } -static inline bool is_noslot_pfn(pfn_t pfn) +/* + * error_noslot pfns indicate that the gfn can not be + * translated to pfn - it is not in slot or failed to + * translate it to pfn. + */ +static inline bool is_error_noslot_pfn(pfn_t pfn) { - return pfn == KVM_PFN_ERR_BAD; + return !!(pfn & KVM_PFN_ERR_NOSLOT_MASK); } -static inline bool is_invalid_pfn(pfn_t pfn) +/* noslot pfn indicates that the gfn is not in slot. */ +static inline bool is_noslot_pfn(pfn_t pfn) { - return !is_noslot_pfn(pfn) && is_error_pfn(pfn); + return pfn == KVM_PFN_NOSLOT; } #define KVM_HVA_ERR_BAD (PAGE_OFFSET) diff --git a/virt/kvm/iommu.c b/virt/kvm/iommu.c index 18e1e30019e3..4a340cb23013 100644 --- a/virt/kvm/iommu.c +++ b/virt/kvm/iommu.c @@ -52,7 +52,7 @@ static pfn_t kvm_pin_pages(struct kvm_memory_slot *slot, gfn_t gfn, end_gfn = gfn + (size >> PAGE_SHIFT); gfn += 1; - if (is_error_pfn(pfn)) + if (is_error_noslot_pfn(pfn)) return pfn; while (gfn < end_gfn) @@ -106,7 +106,7 @@ int kvm_iommu_map_pages(struct kvm *kvm, struct kvm_memory_slot *slot) * important because we unmap and unpin in 4kb steps later. */ pfn = kvm_pin_pages(slot, gfn, page_size); - if (is_error_pfn(pfn)) { + if (is_error_noslot_pfn(pfn)) { gfn += 1; continue; } diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c index be70035fd42a..2fb73191801f 100644 --- a/virt/kvm/kvm_main.c +++ b/virt/kvm/kvm_main.c @@ -1208,7 +1208,7 @@ __gfn_to_pfn_memslot(struct kvm_memory_slot *slot, gfn_t gfn, bool atomic, return KVM_PFN_ERR_RO_FAULT; if (kvm_is_error_hva(addr)) - return KVM_PFN_ERR_BAD; + return KVM_PFN_NOSLOT; /* Do not map writable pfn in the readonly memslot. */ if (writable && memslot_is_readonly(slot)) { @@ -1290,7 +1290,7 @@ EXPORT_SYMBOL_GPL(gfn_to_page_many_atomic); static struct page *kvm_pfn_to_page(pfn_t pfn) { - if (is_error_pfn(pfn)) + if (is_error_noslot_pfn(pfn)) return KVM_ERR_PTR_BAD_PAGE; if (kvm_is_mmio_pfn(pfn)) { @@ -1322,7 +1322,7 @@ EXPORT_SYMBOL_GPL(kvm_release_page_clean); void kvm_release_pfn_clean(pfn_t pfn) { - if (!is_error_pfn(pfn) && !kvm_is_mmio_pfn(pfn)) + if (!is_error_noslot_pfn(pfn) && !kvm_is_mmio_pfn(pfn)) put_page(pfn_to_page(pfn)); } EXPORT_SYMBOL_GPL(kvm_release_pfn_clean); -- cgit v1.2.3-59-g8ed1b From e43a028752fed049e4bd94ef895542f96d79fa74 Mon Sep 17 00:00:00 2001 From: Alexander Graf Date: Sat, 6 Oct 2012 03:56:35 +0200 Subject: KVM: PPC: 44x: fix DCR read/write When remembering the direction of a DCR transaction, we should write to the same variable that we interpret on later when doing vcpu_run again. Signed-off-by: Alexander Graf Cc: stable@vger.kernel.org --- arch/powerpc/kvm/44x_emulate.c | 2 ++ 1 file changed, 2 insertions(+) (limited to 'arch/powerpc') diff --git a/arch/powerpc/kvm/44x_emulate.c b/arch/powerpc/kvm/44x_emulate.c index 1a793c4c4a67..35ec0a8547da 100644 --- a/arch/powerpc/kvm/44x_emulate.c +++ b/arch/powerpc/kvm/44x_emulate.c @@ -46,6 +46,7 @@ static int emulate_mtdcr(struct kvm_vcpu *vcpu, int rs, int dcrn) vcpu->run->dcr.dcrn = dcrn; vcpu->run->dcr.data = kvmppc_get_gpr(vcpu, rs); vcpu->run->dcr.is_write = 1; + vcpu->arch.dcr_is_write = 1; vcpu->arch.dcr_needed = 1; kvmppc_account_exit(vcpu, DCR_EXITS); return EMULATE_DO_DCR; @@ -80,6 +81,7 @@ static int emulate_mfdcr(struct kvm_vcpu *vcpu, int rt, int dcrn) vcpu->run->dcr.dcrn = dcrn; vcpu->run->dcr.data = 0; vcpu->run->dcr.is_write = 0; + vcpu->arch.dcr_is_write = 0; vcpu->arch.io_gpr = rt; vcpu->arch.dcr_needed = 1; kvmppc_account_exit(vcpu, DCR_EXITS); -- cgit v1.2.3-59-g8ed1b From 388cf9ee3c751c3a4cf8776987143354d6d8c797 Mon Sep 17 00:00:00 2001 From: Alexander Graf Date: Sat, 6 Oct 2012 23:19:01 +0200 Subject: KVM: PPC: Move mtspr/mfspr emulation into own functions The mtspr/mfspr emulation code became quite big over time. Move it into its own function so things stay more readable. Signed-off-by: Alexander Graf --- arch/powerpc/kvm/emulate.c | 221 +++++++++++++++++++++++++-------------------- 1 file changed, 121 insertions(+), 100 deletions(-) (limited to 'arch/powerpc') diff --git a/arch/powerpc/kvm/emulate.c b/arch/powerpc/kvm/emulate.c index ee04abaefe23..b0855e5d8905 100644 --- a/arch/powerpc/kvm/emulate.c +++ b/arch/powerpc/kvm/emulate.c @@ -131,6 +131,125 @@ u32 kvmppc_get_dec(struct kvm_vcpu *vcpu, u64 tb) return vcpu->arch.dec - jd; } +static int kvmppc_emulate_mtspr(struct kvm_vcpu *vcpu, int sprn, int rs) +{ + enum emulation_result emulated = EMULATE_DONE; + ulong spr_val = kvmppc_get_gpr(vcpu, rs); + + switch (sprn) { + case SPRN_SRR0: + vcpu->arch.shared->srr0 = spr_val; + break; + case SPRN_SRR1: + vcpu->arch.shared->srr1 = spr_val; + break; + + /* XXX We need to context-switch the timebase for + * watchdog and FIT. */ + case SPRN_TBWL: break; + case SPRN_TBWU: break; + + case SPRN_MSSSR0: break; + + case SPRN_DEC: + vcpu->arch.dec = spr_val; + kvmppc_emulate_dec(vcpu); + break; + + case SPRN_SPRG0: + vcpu->arch.shared->sprg0 = spr_val; + break; + case SPRN_SPRG1: + vcpu->arch.shared->sprg1 = spr_val; + break; + case SPRN_SPRG2: + vcpu->arch.shared->sprg2 = spr_val; + break; + case SPRN_SPRG3: + vcpu->arch.shared->sprg3 = spr_val; + break; + + default: + emulated = kvmppc_core_emulate_mtspr(vcpu, sprn, + spr_val); + if (emulated == EMULATE_FAIL) + printk(KERN_INFO "mtspr: unknown spr " + "0x%x\n", sprn); + break; + } + + kvmppc_set_exit_type(vcpu, EMULATED_MTSPR_EXITS); + + return emulated; +} + +static int kvmppc_emulate_mfspr(struct kvm_vcpu *vcpu, int sprn, int rt) +{ + enum emulation_result emulated = EMULATE_DONE; + ulong spr_val = 0; + + switch (sprn) { + case SPRN_SRR0: + spr_val = vcpu->arch.shared->srr0; + break; + case SPRN_SRR1: + spr_val = vcpu->arch.shared->srr1; + break; + case SPRN_PVR: + spr_val = vcpu->arch.pvr; + break; + case SPRN_PIR: + spr_val = vcpu->vcpu_id; + break; + case SPRN_MSSSR0: + spr_val = 0; + break; + + /* Note: mftb and TBRL/TBWL are user-accessible, so + * the guest can always access the real TB anyways. + * In fact, we probably will never see these traps. */ + case SPRN_TBWL: + spr_val = get_tb() >> 32; + break; + case SPRN_TBWU: + spr_val = get_tb(); + break; + + case SPRN_SPRG0: + spr_val = vcpu->arch.shared->sprg0; + break; + case SPRN_SPRG1: + spr_val = vcpu->arch.shared->sprg1; + break; + case SPRN_SPRG2: + spr_val = vcpu->arch.shared->sprg2; + break; + case SPRN_SPRG3: + spr_val = vcpu->arch.shared->sprg3; + break; + /* Note: SPRG4-7 are user-readable, so we don't get + * a trap. */ + + case SPRN_DEC: + spr_val = kvmppc_get_dec(vcpu, get_tb()); + break; + default: + emulated = kvmppc_core_emulate_mfspr(vcpu, sprn, + &spr_val); + if (unlikely(emulated == EMULATE_FAIL)) { + printk(KERN_INFO "mfspr: unknown spr " + "0x%x\n", sprn); + } + break; + } + + if (emulated == EMULATE_DONE) + kvmppc_set_gpr(vcpu, rt, spr_val); + kvmppc_set_exit_type(vcpu, EMULATED_MFSPR_EXITS); + + return emulated; +} + /* XXX to do: * lhax * lhaux @@ -156,7 +275,6 @@ int kvmppc_emulate_instruction(struct kvm_run *run, struct kvm_vcpu *vcpu) int sprn = get_sprn(inst); enum emulation_result emulated = EMULATE_DONE; int advance = 1; - ulong spr_val = 0; /* this default type might be overwritten by subcategories */ kvmppc_set_exit_type(vcpu, EMULATED_INST_EXITS); @@ -236,62 +354,7 @@ int kvmppc_emulate_instruction(struct kvm_run *run, struct kvm_vcpu *vcpu) break; case OP_31_XOP_MFSPR: - switch (sprn) { - case SPRN_SRR0: - spr_val = vcpu->arch.shared->srr0; - break; - case SPRN_SRR1: - spr_val = vcpu->arch.shared->srr1; - break; - case SPRN_PVR: - spr_val = vcpu->arch.pvr; - break; - case SPRN_PIR: - spr_val = vcpu->vcpu_id; - break; - case SPRN_MSSSR0: - spr_val = 0; - break; - - /* Note: mftb and TBRL/TBWL are user-accessible, so - * the guest can always access the real TB anyways. - * In fact, we probably will never see these traps. */ - case SPRN_TBWL: - spr_val = get_tb() >> 32; - break; - case SPRN_TBWU: - spr_val = get_tb(); - break; - - case SPRN_SPRG0: - spr_val = vcpu->arch.shared->sprg0; - break; - case SPRN_SPRG1: - spr_val = vcpu->arch.shared->sprg1; - break; - case SPRN_SPRG2: - spr_val = vcpu->arch.shared->sprg2; - break; - case SPRN_SPRG3: - spr_val = vcpu->arch.shared->sprg3; - break; - /* Note: SPRG4-7 are user-readable, so we don't get - * a trap. */ - - case SPRN_DEC: - spr_val = kvmppc_get_dec(vcpu, get_tb()); - break; - default: - emulated = kvmppc_core_emulate_mfspr(vcpu, sprn, - &spr_val); - if (unlikely(emulated == EMULATE_FAIL)) { - printk(KERN_INFO "mfspr: unknown spr " - "0x%x\n", sprn); - } - break; - } - kvmppc_set_gpr(vcpu, rt, spr_val); - kvmppc_set_exit_type(vcpu, EMULATED_MFSPR_EXITS); + emulated = kvmppc_emulate_mfspr(vcpu, sprn, rt); break; case OP_31_XOP_STHX: @@ -308,49 +371,7 @@ int kvmppc_emulate_instruction(struct kvm_run *run, struct kvm_vcpu *vcpu) break; case OP_31_XOP_MTSPR: - spr_val = kvmppc_get_gpr(vcpu, rs); - switch (sprn) { - case SPRN_SRR0: - vcpu->arch.shared->srr0 = spr_val; - break; - case SPRN_SRR1: - vcpu->arch.shared->srr1 = spr_val; - break; - - /* XXX We need to context-switch the timebase for - * watchdog and FIT. */ - case SPRN_TBWL: break; - case SPRN_TBWU: break; - - case SPRN_MSSSR0: break; - - case SPRN_DEC: - vcpu->arch.dec = spr_val; - kvmppc_emulate_dec(vcpu); - break; - - case SPRN_SPRG0: - vcpu->arch.shared->sprg0 = spr_val; - break; - case SPRN_SPRG1: - vcpu->arch.shared->sprg1 = spr_val; - break; - case SPRN_SPRG2: - vcpu->arch.shared->sprg2 = spr_val; - break; - case SPRN_SPRG3: - vcpu->arch.shared->sprg3 = spr_val; - break; - - default: - emulated = kvmppc_core_emulate_mtspr(vcpu, sprn, - spr_val); - if (emulated == EMULATE_FAIL) - printk(KERN_INFO "mtspr: unknown spr " - "0x%x\n", sprn); - break; - } - kvmppc_set_exit_type(vcpu, EMULATED_MTSPR_EXITS); + emulated = kvmppc_emulate_mtspr(vcpu, sprn, rs); break; case OP_31_XOP_DCBI: -- cgit v1.2.3-59-g8ed1b From c99ec973a63e2249020d6d93a46d7572432da6a2 Mon Sep 17 00:00:00 2001 From: Alexander Graf Date: Sat, 27 Oct 2012 19:26:14 +0200 Subject: PPC: ePAPR: Convert header to uapi The new uapi framework splits kernel internal and user space exported bits of header files more cleanly. Adjust the ePAPR header accordingly. Signed-off-by: Alexander Graf --- arch/powerpc/include/asm/Kbuild | 1 - arch/powerpc/include/asm/epapr_hcalls.h | 55 +--------------- arch/powerpc/include/uapi/asm/Kbuild | 1 + arch/powerpc/include/uapi/asm/epapr_hcalls.h | 98 ++++++++++++++++++++++++++++ 4 files changed, 100 insertions(+), 55 deletions(-) create mode 100644 arch/powerpc/include/uapi/asm/epapr_hcalls.h (limited to 'arch/powerpc') diff --git a/arch/powerpc/include/asm/Kbuild b/arch/powerpc/include/asm/Kbuild index 13d6b7bf3b69..7e313f1ed183 100644 --- a/arch/powerpc/include/asm/Kbuild +++ b/arch/powerpc/include/asm/Kbuild @@ -34,6 +34,5 @@ header-y += termios.h header-y += types.h header-y += ucontext.h header-y += unistd.h -header-y += epapr_hcalls.h generic-y += rwsem.h diff --git a/arch/powerpc/include/asm/epapr_hcalls.h b/arch/powerpc/include/asm/epapr_hcalls.h index b8d94459a929..58997afcd085 100644 --- a/arch/powerpc/include/asm/epapr_hcalls.h +++ b/arch/powerpc/include/asm/epapr_hcalls.h @@ -50,60 +50,7 @@ #ifndef _EPAPR_HCALLS_H #define _EPAPR_HCALLS_H -#define EV_BYTE_CHANNEL_SEND 1 -#define EV_BYTE_CHANNEL_RECEIVE 2 -#define EV_BYTE_CHANNEL_POLL 3 -#define EV_INT_SET_CONFIG 4 -#define EV_INT_GET_CONFIG 5 -#define EV_INT_SET_MASK 6 -#define EV_INT_GET_MASK 7 -#define EV_INT_IACK 9 -#define EV_INT_EOI 10 -#define EV_INT_SEND_IPI 11 -#define EV_INT_SET_TASK_PRIORITY 12 -#define EV_INT_GET_TASK_PRIORITY 13 -#define EV_DOORBELL_SEND 14 -#define EV_MSGSND 15 -#define EV_IDLE 16 - -/* vendor ID: epapr */ -#define EV_LOCAL_VENDOR_ID 0 /* for private use */ -#define EV_EPAPR_VENDOR_ID 1 -#define EV_FSL_VENDOR_ID 2 /* Freescale Semiconductor */ -#define EV_IBM_VENDOR_ID 3 /* IBM */ -#define EV_GHS_VENDOR_ID 4 /* Green Hills Software */ -#define EV_ENEA_VENDOR_ID 5 /* Enea */ -#define EV_WR_VENDOR_ID 6 /* Wind River Systems */ -#define EV_AMCC_VENDOR_ID 7 /* Applied Micro Circuits */ -#define EV_KVM_VENDOR_ID 42 /* KVM */ - -/* The max number of bytes that a byte channel can send or receive per call */ -#define EV_BYTE_CHANNEL_MAX_BYTES 16 - - -#define _EV_HCALL_TOKEN(id, num) (((id) << 16) | (num)) -#define EV_HCALL_TOKEN(hcall_num) _EV_HCALL_TOKEN(EV_EPAPR_VENDOR_ID, hcall_num) - -/* epapr return codes */ -#define EV_SUCCESS 0 -#define EV_EPERM 1 /* Operation not permitted */ -#define EV_ENOENT 2 /* Entry Not Found */ -#define EV_EIO 3 /* I/O error occured */ -#define EV_EAGAIN 4 /* The operation had insufficient - * resources to complete and should be - * retried - */ -#define EV_ENOMEM 5 /* There was insufficient memory to - * complete the operation */ -#define EV_EFAULT 6 /* Bad guest address */ -#define EV_ENODEV 7 /* No such device */ -#define EV_EINVAL 8 /* An argument supplied to the hcall - was out of range or invalid */ -#define EV_INTERNAL 9 /* An internal error occured */ -#define EV_CONFIG 10 /* A configuration error was detected */ -#define EV_INVALID_STATE 11 /* The object is in an invalid state */ -#define EV_UNIMPLEMENTED 12 /* Unimplemented hypercall */ -#define EV_BUFFER_OVERFLOW 13 /* Caller-supplied buffer too small */ +#include #ifndef __ASSEMBLY__ #include diff --git a/arch/powerpc/include/uapi/asm/Kbuild b/arch/powerpc/include/uapi/asm/Kbuild index baebb3da1d44..e6b5be86e4fa 100644 --- a/arch/powerpc/include/uapi/asm/Kbuild +++ b/arch/powerpc/include/uapi/asm/Kbuild @@ -1,3 +1,4 @@ # UAPI Header export list include include/uapi/asm-generic/Kbuild.asm +header-y += epapr_hcalls.h diff --git a/arch/powerpc/include/uapi/asm/epapr_hcalls.h b/arch/powerpc/include/uapi/asm/epapr_hcalls.h new file mode 100644 index 000000000000..046c79364f83 --- /dev/null +++ b/arch/powerpc/include/uapi/asm/epapr_hcalls.h @@ -0,0 +1,98 @@ +/* + * ePAPR hcall interface + * + * Copyright 2008-2011 Freescale Semiconductor, Inc. + * + * Author: Timur Tabi + * + * This file is provided under a dual BSD/GPL license. When using or + * redistributing this file, you may do so under either license. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * * Neither the name of Freescale Semiconductor nor the + * names of its contributors may be used to endorse or promote products + * derived from this software without specific prior written permission. + * + * + * ALTERNATIVELY, this software may be distributed under the terms of the + * GNU General Public License ("GPL") as published by the Free Software + * Foundation, either version 2 of that License or (at your option) any + * later version. + * + * THIS SOFTWARE IS PROVIDED BY Freescale Semiconductor ``AS IS'' AND ANY + * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED + * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE + * DISCLAIMED. IN NO EVENT SHALL Freescale Semiconductor BE LIABLE FOR ANY + * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES + * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; + * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND + * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS + * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#ifndef _UAPI__EPAPR_HCALLS_H +#define _UAPI__EPAPR_HCALLS_H + +#define EV_BYTE_CHANNEL_SEND 1 +#define EV_BYTE_CHANNEL_RECEIVE 2 +#define EV_BYTE_CHANNEL_POLL 3 +#define EV_INT_SET_CONFIG 4 +#define EV_INT_GET_CONFIG 5 +#define EV_INT_SET_MASK 6 +#define EV_INT_GET_MASK 7 +#define EV_INT_IACK 9 +#define EV_INT_EOI 10 +#define EV_INT_SEND_IPI 11 +#define EV_INT_SET_TASK_PRIORITY 12 +#define EV_INT_GET_TASK_PRIORITY 13 +#define EV_DOORBELL_SEND 14 +#define EV_MSGSND 15 +#define EV_IDLE 16 + +/* vendor ID: epapr */ +#define EV_LOCAL_VENDOR_ID 0 /* for private use */ +#define EV_EPAPR_VENDOR_ID 1 +#define EV_FSL_VENDOR_ID 2 /* Freescale Semiconductor */ +#define EV_IBM_VENDOR_ID 3 /* IBM */ +#define EV_GHS_VENDOR_ID 4 /* Green Hills Software */ +#define EV_ENEA_VENDOR_ID 5 /* Enea */ +#define EV_WR_VENDOR_ID 6 /* Wind River Systems */ +#define EV_AMCC_VENDOR_ID 7 /* Applied Micro Circuits */ +#define EV_KVM_VENDOR_ID 42 /* KVM */ + +/* The max number of bytes that a byte channel can send or receive per call */ +#define EV_BYTE_CHANNEL_MAX_BYTES 16 + + +#define _EV_HCALL_TOKEN(id, num) (((id) << 16) | (num)) +#define EV_HCALL_TOKEN(hcall_num) _EV_HCALL_TOKEN(EV_EPAPR_VENDOR_ID, hcall_num) + +/* epapr return codes */ +#define EV_SUCCESS 0 +#define EV_EPERM 1 /* Operation not permitted */ +#define EV_ENOENT 2 /* Entry Not Found */ +#define EV_EIO 3 /* I/O error occured */ +#define EV_EAGAIN 4 /* The operation had insufficient + * resources to complete and should be + * retried + */ +#define EV_ENOMEM 5 /* There was insufficient memory to + * complete the operation */ +#define EV_EFAULT 6 /* Bad guest address */ +#define EV_ENODEV 7 /* No such device */ +#define EV_EINVAL 8 /* An argument supplied to the hcall + was out of range or invalid */ +#define EV_INTERNAL 9 /* An internal error occured */ +#define EV_CONFIG 10 /* A configuration error was detected */ +#define EV_INVALID_STATE 11 /* The object is in an invalid state */ +#define EV_UNIMPLEMENTED 12 /* Unimplemented hypercall */ +#define EV_BUFFER_OVERFLOW 13 /* Caller-supplied buffer too small */ + +#endif /* _UAPI__EPAPR_HCALLS_H */ -- cgit v1.2.3-59-g8ed1b From 512691d4907d7cf4b8d05c6f8572d1fa60ccec20 Mon Sep 17 00:00:00 2001 From: Paul Mackerras Date: Mon, 15 Oct 2012 01:15:41 +0000 Subject: KVM: PPC: Book3S HV: Allow KVM guests to stop secondary threads coming online When a Book3S HV KVM guest is running, we need the host to be in single-thread mode, that is, all of the cores (or at least all of the cores where the KVM guest could run) to be running only one active hardware thread. This is because of the hardware restriction in POWER processors that all of the hardware threads in the core must be in the same logical partition. Complying with this restriction is much easier if, from the host kernel's point of view, only one hardware thread is active. This adds two hooks in the SMP hotplug code to allow the KVM code to make sure that secondary threads (i.e. hardware threads other than thread 0) cannot come online while any KVM guest exists. The KVM code still has to check that any core where it runs a guest has the secondary threads offline, but having done that check it can now be sure that they will not come online while the guest is running. Signed-off-by: Paul Mackerras Acked-by: Benjamin Herrenschmidt Signed-off-by: Alexander Graf --- arch/powerpc/include/asm/smp.h | 8 ++++++++ arch/powerpc/kernel/smp.c | 46 ++++++++++++++++++++++++++++++++++++++++++ arch/powerpc/kvm/book3s_hv.c | 12 +++++++++-- 3 files changed, 64 insertions(+), 2 deletions(-) (limited to 'arch/powerpc') diff --git a/arch/powerpc/include/asm/smp.h b/arch/powerpc/include/asm/smp.h index ebc24dc5b1a1..b625a1a9ad16 100644 --- a/arch/powerpc/include/asm/smp.h +++ b/arch/powerpc/include/asm/smp.h @@ -66,6 +66,14 @@ void generic_cpu_die(unsigned int cpu); void generic_mach_cpu_die(void); void generic_set_cpu_dead(unsigned int cpu); int generic_check_cpu_restart(unsigned int cpu); + +extern void inhibit_secondary_onlining(void); +extern void uninhibit_secondary_onlining(void); + +#else /* HOTPLUG_CPU */ +static inline void inhibit_secondary_onlining(void) {} +static inline void uninhibit_secondary_onlining(void) {} + #endif #ifdef CONFIG_PPC64 diff --git a/arch/powerpc/kernel/smp.c b/arch/powerpc/kernel/smp.c index 8d4214afc21d..c4f420c5fc1b 100644 --- a/arch/powerpc/kernel/smp.c +++ b/arch/powerpc/kernel/smp.c @@ -417,6 +417,45 @@ int generic_check_cpu_restart(unsigned int cpu) { return per_cpu(cpu_state, cpu) == CPU_UP_PREPARE; } + +static atomic_t secondary_inhibit_count; + +/* + * Don't allow secondary CPU threads to come online + */ +void inhibit_secondary_onlining(void) +{ + /* + * This makes secondary_inhibit_count stable during cpu + * online/offline operations. + */ + get_online_cpus(); + + atomic_inc(&secondary_inhibit_count); + put_online_cpus(); +} +EXPORT_SYMBOL_GPL(inhibit_secondary_onlining); + +/* + * Allow secondary CPU threads to come online again + */ +void uninhibit_secondary_onlining(void) +{ + get_online_cpus(); + atomic_dec(&secondary_inhibit_count); + put_online_cpus(); +} +EXPORT_SYMBOL_GPL(uninhibit_secondary_onlining); + +static int secondaries_inhibited(void) +{ + return atomic_read(&secondary_inhibit_count); +} + +#else /* HOTPLUG_CPU */ + +#define secondaries_inhibited() 0 + #endif static void cpu_idle_thread_init(unsigned int cpu, struct task_struct *idle) @@ -435,6 +474,13 @@ int __cpuinit __cpu_up(unsigned int cpu, struct task_struct *tidle) { int rc, c; + /* + * Don't allow secondary threads to come online if inhibited + */ + if (threads_per_core > 1 && secondaries_inhibited() && + cpu % threads_per_core != 0) + return -EBUSY; + if (smp_ops == NULL || (smp_ops->cpu_bootable && !smp_ops->cpu_bootable(cpu))) return -EINVAL; diff --git a/arch/powerpc/kvm/book3s_hv.c b/arch/powerpc/kvm/book3s_hv.c index 9a15da76e56b..c5ddf048e19e 100644 --- a/arch/powerpc/kvm/book3s_hv.c +++ b/arch/powerpc/kvm/book3s_hv.c @@ -47,6 +47,7 @@ #include #include #include +#include #include #include #include @@ -1016,8 +1017,6 @@ static int kvmppc_run_core(struct kvmppc_vcore *vc) /* * Make sure we are running on thread 0, and that * secondary threads are offline. - * XXX we should also block attempts to bring any - * secondary threads online. */ if (threads_per_core > 1 && !on_primary_thread()) { list_for_each_entry(vcpu, &vc->runnable_threads, arch.run_list) @@ -1730,11 +1729,20 @@ int kvmppc_core_init_vm(struct kvm *kvm) kvm->arch.using_mmu_notifiers = !!cpu_has_feature(CPU_FTR_ARCH_206); spin_lock_init(&kvm->arch.slot_phys_lock); + + /* + * Don't allow secondary CPU threads to come online + * while any KVM VMs exist. + */ + inhibit_secondary_onlining(); + return 0; } void kvmppc_core_destroy_vm(struct kvm *kvm) { + uninhibit_secondary_onlining(); + if (kvm->arch.rma) { kvm_release_rma(kvm->arch.rma); kvm->arch.rma = NULL; -- cgit v1.2.3-59-g8ed1b From 7b444c6710c6c4994e31eb19216ce055836e65c4 Mon Sep 17 00:00:00 2001 From: Paul Mackerras Date: Mon, 15 Oct 2012 01:16:14 +0000 Subject: KVM: PPC: Book3S HV: Fix some races in starting secondary threads Subsequent patches implementing in-kernel XICS emulation will make it possible for IPIs to arrive at secondary threads at arbitrary times. This fixes some races in how we start the secondary threads, which if not fixed could lead to occasional crashes of the host kernel. This makes sure that (a) we have grabbed all the secondary threads, and verified that they are no longer in the kernel, before we start any thread, (b) that the secondary thread loads its vcpu pointer after clearing the IPI that woke it up (so we don't miss a wakeup), and (c) that the secondary thread clears its vcpu pointer before incrementing the nap count. It also removes unnecessary setting of the vcpu and vcore pointers in the paca in kvmppc_core_vcpu_load. Signed-off-by: Paul Mackerras Signed-off-by: Alexander Graf --- arch/powerpc/kvm/book3s_hv.c | 41 +++++++++++++++++++-------------- arch/powerpc/kvm/book3s_hv_rmhandlers.S | 11 ++++++--- 2 files changed, 32 insertions(+), 20 deletions(-) (limited to 'arch/powerpc') diff --git a/arch/powerpc/kvm/book3s_hv.c b/arch/powerpc/kvm/book3s_hv.c index c5ddf048e19e..77dec0f8a030 100644 --- a/arch/powerpc/kvm/book3s_hv.c +++ b/arch/powerpc/kvm/book3s_hv.c @@ -64,8 +64,6 @@ void kvmppc_core_vcpu_load(struct kvm_vcpu *vcpu, int cpu) { struct kvmppc_vcore *vc = vcpu->arch.vcore; - local_paca->kvm_hstate.kvm_vcpu = vcpu; - local_paca->kvm_hstate.kvm_vcore = vc; if (vc->runner == vcpu && vc->vcore_state != VCORE_INACTIVE) vc->stolen_tb += mftb() - vc->preempt_tb; } @@ -880,6 +878,7 @@ static int kvmppc_grab_hwthread(int cpu) /* Ensure the thread won't go into the kernel if it wakes */ tpaca->kvm_hstate.hwthread_req = 1; + tpaca->kvm_hstate.kvm_vcpu = NULL; /* * If the thread is already executing in the kernel (e.g. handling @@ -929,7 +928,6 @@ static void kvmppc_start_thread(struct kvm_vcpu *vcpu) smp_wmb(); #if defined(CONFIG_PPC_ICP_NATIVE) && defined(CONFIG_SMP) if (vcpu->arch.ptid) { - kvmppc_grab_hwthread(cpu); xics_wake_cpu(cpu); ++vc->n_woken; } @@ -955,7 +953,8 @@ static void kvmppc_wait_for_nap(struct kvmppc_vcore *vc) /* * Check that we are on thread 0 and that any other threads in - * this core are off-line. + * this core are off-line. Then grab the threads so they can't + * enter the kernel. */ static int on_primary_thread(void) { @@ -967,6 +966,17 @@ static int on_primary_thread(void) while (++thr < threads_per_core) if (cpu_online(cpu + thr)) return 0; + + /* Grab all hw threads so they can't go into the kernel */ + for (thr = 1; thr < threads_per_core; ++thr) { + if (kvmppc_grab_hwthread(cpu + thr)) { + /* Couldn't grab one; let the others go */ + do { + kvmppc_release_hwthread(cpu + thr); + } while (--thr > 0); + return 0; + } + } return 1; } @@ -1014,16 +1024,6 @@ static int kvmppc_run_core(struct kvmppc_vcore *vc) spin_lock(&vc->lock); } - /* - * Make sure we are running on thread 0, and that - * secondary threads are offline. - */ - if (threads_per_core > 1 && !on_primary_thread()) { - list_for_each_entry(vcpu, &vc->runnable_threads, arch.run_list) - vcpu->arch.ret = -EBUSY; - goto out; - } - /* * Assign physical thread IDs, first to non-ceded vcpus * and then to ceded ones. @@ -1043,15 +1043,22 @@ static int kvmppc_run_core(struct kvmppc_vcore *vc) if (vcpu->arch.ceded) vcpu->arch.ptid = ptid++; + /* + * Make sure we are running on thread 0, and that + * secondary threads are offline. + */ + if (threads_per_core > 1 && !on_primary_thread()) { + list_for_each_entry(vcpu, &vc->runnable_threads, arch.run_list) + vcpu->arch.ret = -EBUSY; + goto out; + } + vc->stolen_tb += mftb() - vc->preempt_tb; vc->pcpu = smp_processor_id(); list_for_each_entry(vcpu, &vc->runnable_threads, arch.run_list) { kvmppc_start_thread(vcpu); kvmppc_create_dtl_entry(vcpu, vc); } - /* Grab any remaining hw threads so they can't go into the kernel */ - for (i = ptid; i < threads_per_core; ++i) - kvmppc_grab_hwthread(vc->pcpu + i); preempt_disable(); spin_unlock(&vc->lock); diff --git a/arch/powerpc/kvm/book3s_hv_rmhandlers.S b/arch/powerpc/kvm/book3s_hv_rmhandlers.S index 44b72feaff7d..1e90ef6191a3 100644 --- a/arch/powerpc/kvm/book3s_hv_rmhandlers.S +++ b/arch/powerpc/kvm/book3s_hv_rmhandlers.S @@ -134,8 +134,11 @@ kvm_start_guest: 27: /* XXX should handle hypervisor maintenance interrupts etc. here */ + /* reload vcpu pointer after clearing the IPI */ + ld r4,HSTATE_KVM_VCPU(r13) + cmpdi r4,0 /* if we have no vcpu to run, go back to sleep */ - beq cr1,kvm_no_guest + beq kvm_no_guest /* were we napping due to cede? */ lbz r0,HSTATE_NAPPING(r13) @@ -1587,6 +1590,10 @@ secondary_too_late: .endr secondary_nap: + /* Clear our vcpu pointer so we don't come back in early */ + li r0, 0 + std r0, HSTATE_KVM_VCPU(r13) + lwsync /* Clear any pending IPI - assume we're a secondary thread */ ld r5, HSTATE_XICS_PHYS(r13) li r7, XICS_XIRR @@ -1612,8 +1619,6 @@ secondary_nap: kvm_no_guest: li r0, KVM_HWTHREAD_IN_NAP stb r0, HSTATE_HWTHREAD_STATE(r13) - li r0, 0 - std r0, HSTATE_KVM_VCPU(r13) li r3, LPCR_PECE0 mfspr r4, SPRN_LPCR -- cgit v1.2.3-59-g8ed1b From 913d3ff9a3c3a13c3115eb4b3265aa35a9e0a7ad Mon Sep 17 00:00:00 2001 From: Paul Mackerras Date: Mon, 15 Oct 2012 01:16:48 +0000 Subject: KVM: PPC: Book3s HV: Don't access runnable threads list without vcore lock There were a few places where we were traversing the list of runnable threads in a virtual core, i.e. vc->runnable_threads, without holding the vcore spinlock. This extends the places where we hold the vcore spinlock to cover everywhere that we traverse that list. Since we possibly need to sleep inside kvmppc_book3s_hv_page_fault, this moves the call of it from kvmppc_handle_exit out to kvmppc_vcpu_run, where we don't hold the vcore lock. In kvmppc_vcore_blocked, we don't actually need to check whether all vcpus are ceded and don't have any pending exceptions, since the caller has already done that. The caller (kvmppc_run_vcpu) wasn't actually checking for pending exceptions, so we add that. The change of if to while in kvmppc_run_vcpu is to make sure that we never call kvmppc_remove_runnable() when the vcore state is RUNNING or EXITING. Signed-off-by: Paul Mackerras Signed-off-by: Alexander Graf --- arch/powerpc/include/asm/kvm_asm.h | 1 + arch/powerpc/kvm/book3s_hv.c | 67 +++++++++++++++++++------------------- 2 files changed, 34 insertions(+), 34 deletions(-) (limited to 'arch/powerpc') diff --git a/arch/powerpc/include/asm/kvm_asm.h b/arch/powerpc/include/asm/kvm_asm.h index 76fdcfef0889..aabcdba8f6b0 100644 --- a/arch/powerpc/include/asm/kvm_asm.h +++ b/arch/powerpc/include/asm/kvm_asm.h @@ -118,6 +118,7 @@ #define RESUME_FLAG_NV (1<<0) /* Reload guest nonvolatile state? */ #define RESUME_FLAG_HOST (1<<1) /* Resume host? */ +#define RESUME_FLAG_ARCH1 (1<<2) #define RESUME_GUEST 0 #define RESUME_GUEST_NV RESUME_FLAG_NV diff --git a/arch/powerpc/kvm/book3s_hv.c b/arch/powerpc/kvm/book3s_hv.c index 77dec0f8a030..3a737a4bb8bf 100644 --- a/arch/powerpc/kvm/book3s_hv.c +++ b/arch/powerpc/kvm/book3s_hv.c @@ -57,6 +57,9 @@ /* #define EXIT_DEBUG_SIMPLE */ /* #define EXIT_DEBUG_INT */ +/* Used to indicate that a guest page fault needs to be handled */ +#define RESUME_PAGE_FAULT (RESUME_GUEST | RESUME_FLAG_ARCH1) + static void kvmppc_end_cede(struct kvm_vcpu *vcpu); static int kvmppc_hv_setup_htab_rma(struct kvm_vcpu *vcpu); @@ -431,7 +434,6 @@ static int kvmppc_handle_exit(struct kvm_run *run, struct kvm_vcpu *vcpu, struct task_struct *tsk) { int r = RESUME_HOST; - int srcu_idx; vcpu->stat.sum_exits++; @@ -491,16 +493,12 @@ static int kvmppc_handle_exit(struct kvm_run *run, struct kvm_vcpu *vcpu, * have been handled already. */ case BOOK3S_INTERRUPT_H_DATA_STORAGE: - srcu_idx = srcu_read_lock(&vcpu->kvm->srcu); - r = kvmppc_book3s_hv_page_fault(run, vcpu, - vcpu->arch.fault_dar, vcpu->arch.fault_dsisr); - srcu_read_unlock(&vcpu->kvm->srcu, srcu_idx); + r = RESUME_PAGE_FAULT; break; case BOOK3S_INTERRUPT_H_INST_STORAGE: - srcu_idx = srcu_read_lock(&vcpu->kvm->srcu); - r = kvmppc_book3s_hv_page_fault(run, vcpu, - kvmppc_get_pc(vcpu), 0); - srcu_read_unlock(&vcpu->kvm->srcu, srcu_idx); + vcpu->arch.fault_dar = kvmppc_get_pc(vcpu); + vcpu->arch.fault_dsisr = 0; + r = RESUME_PAGE_FAULT; break; /* * This occurs if the guest executes an illegal instruction. @@ -984,22 +982,24 @@ static int on_primary_thread(void) * Run a set of guest threads on a physical core. * Called with vc->lock held. */ -static int kvmppc_run_core(struct kvmppc_vcore *vc) +static void kvmppc_run_core(struct kvmppc_vcore *vc) { struct kvm_vcpu *vcpu, *vcpu0, *vnext; long ret; u64 now; int ptid, i, need_vpa_update; int srcu_idx; + struct kvm_vcpu *vcpus_to_update[threads_per_core]; /* don't start if any threads have a signal pending */ need_vpa_update = 0; list_for_each_entry(vcpu, &vc->runnable_threads, arch.run_list) { if (signal_pending(vcpu->arch.run_task)) - return 0; - need_vpa_update |= vcpu->arch.vpa.update_pending | - vcpu->arch.slb_shadow.update_pending | - vcpu->arch.dtl.update_pending; + return; + if (vcpu->arch.vpa.update_pending || + vcpu->arch.slb_shadow.update_pending || + vcpu->arch.dtl.update_pending) + vcpus_to_update[need_vpa_update++] = vcpu; } /* @@ -1019,8 +1019,8 @@ static int kvmppc_run_core(struct kvmppc_vcore *vc) */ if (need_vpa_update) { spin_unlock(&vc->lock); - list_for_each_entry(vcpu, &vc->runnable_threads, arch.run_list) - kvmppc_update_vpas(vcpu); + for (i = 0; i < need_vpa_update; ++i) + kvmppc_update_vpas(vcpus_to_update[i]); spin_lock(&vc->lock); } @@ -1037,8 +1037,10 @@ static int kvmppc_run_core(struct kvmppc_vcore *vc) vcpu->arch.ptid = ptid++; } } - if (!vcpu0) - return 0; /* nothing to run */ + if (!vcpu0) { + vc->vcore_state = VCORE_INACTIVE; + return; /* nothing to run; should never happen */ + } list_for_each_entry(vcpu, &vc->runnable_threads, arch.run_list) if (vcpu->arch.ceded) vcpu->arch.ptid = ptid++; @@ -1091,6 +1093,7 @@ static int kvmppc_run_core(struct kvmppc_vcore *vc) preempt_enable(); kvm_resched(vcpu); + spin_lock(&vc->lock); now = get_tb(); list_for_each_entry(vcpu, &vc->runnable_threads, arch.run_list) { /* cancel pending dec exception if dec is positive */ @@ -1114,7 +1117,6 @@ static int kvmppc_run_core(struct kvmppc_vcore *vc) } } - spin_lock(&vc->lock); out: vc->vcore_state = VCORE_INACTIVE; vc->preempt_tb = mftb(); @@ -1125,8 +1127,6 @@ static int kvmppc_run_core(struct kvmppc_vcore *vc) wake_up(&vcpu->arch.cpu_run); } } - - return 1; } /* @@ -1150,20 +1150,11 @@ static void kvmppc_wait_for_exec(struct kvm_vcpu *vcpu, int wait_state) static void kvmppc_vcore_blocked(struct kvmppc_vcore *vc) { DEFINE_WAIT(wait); - struct kvm_vcpu *v; - int all_idle = 1; prepare_to_wait(&vc->wq, &wait, TASK_INTERRUPTIBLE); vc->vcore_state = VCORE_SLEEPING; spin_unlock(&vc->lock); - list_for_each_entry(v, &vc->runnable_threads, arch.run_list) { - if (!v->arch.ceded || v->arch.pending_exceptions) { - all_idle = 0; - break; - } - } - if (all_idle) - schedule(); + schedule(); finish_wait(&vc->wq, &wait); spin_lock(&vc->lock); vc->vcore_state = VCORE_INACTIVE; @@ -1219,7 +1210,8 @@ static int kvmppc_run_vcpu(struct kvm_run *kvm_run, struct kvm_vcpu *vcpu) vc->runner = vcpu; n_ceded = 0; list_for_each_entry(v, &vc->runnable_threads, arch.run_list) - n_ceded += v->arch.ceded; + if (!v->arch.pending_exceptions) + n_ceded += v->arch.ceded; if (n_ceded == vc->n_runnable) kvmppc_vcore_blocked(vc); else @@ -1240,8 +1232,9 @@ static int kvmppc_run_vcpu(struct kvm_run *kvm_run, struct kvm_vcpu *vcpu) } if (signal_pending(current)) { - if (vc->vcore_state == VCORE_RUNNING || - vc->vcore_state == VCORE_EXITING) { + while (vcpu->arch.state == KVMPPC_VCPU_RUNNABLE && + (vc->vcore_state == VCORE_RUNNING || + vc->vcore_state == VCORE_EXITING)) { spin_unlock(&vc->lock); kvmppc_wait_for_exec(vcpu, TASK_UNINTERRUPTIBLE); spin_lock(&vc->lock); @@ -1261,6 +1254,7 @@ static int kvmppc_run_vcpu(struct kvm_run *kvm_run, struct kvm_vcpu *vcpu) int kvmppc_vcpu_run(struct kvm_run *run, struct kvm_vcpu *vcpu) { int r; + int srcu_idx; if (!vcpu->arch.sane) { run->exit_reason = KVM_EXIT_INTERNAL_ERROR; @@ -1299,6 +1293,11 @@ int kvmppc_vcpu_run(struct kvm_run *run, struct kvm_vcpu *vcpu) !(vcpu->arch.shregs.msr & MSR_PR)) { r = kvmppc_pseries_do_hcall(vcpu); kvmppc_core_prepare_to_enter(vcpu); + } else if (r == RESUME_PAGE_FAULT) { + srcu_idx = srcu_read_lock(&vcpu->kvm->srcu); + r = kvmppc_book3s_hv_page_fault(run, vcpu, + vcpu->arch.fault_dar, vcpu->arch.fault_dsisr); + srcu_read_unlock(&vcpu->kvm->srcu, srcu_idx); } } while (r == RESUME_GUEST); -- cgit v1.2.3-59-g8ed1b From 2f12f03436847e063cda8cc4c339ad84961cbf39 Mon Sep 17 00:00:00 2001 From: Paul Mackerras Date: Mon, 15 Oct 2012 01:17:17 +0000 Subject: KVM: PPC: Book3S HV: Fixes for late-joining threads If a thread in a virtual core becomes runnable while other threads in the same virtual core are already running in the guest, it is possible for the latecomer to join the others on the core without first pulling them all out of the guest. Currently this only happens rarely, when a vcpu is first started. This fixes some bugs and omissions in the code in this case. First, we need to check for VPA updates for the latecomer and make a DTL entry for it. Secondly, if it comes along while the master vcpu is doing a VPA update, we don't need to do anything since the master will pick it up in kvmppc_run_core. To handle this correctly we introduce a new vcore state, VCORE_STARTING. Thirdly, there is a race because we currently clear the hardware thread's hwthread_req before waiting to see it get to nap. A latecomer thread could have its hwthread_req cleared before it gets to test it, and therefore never increment the nap_count, leading to messages about wait_for_nap timeouts. Signed-off-by: Paul Mackerras Signed-off-by: Alexander Graf --- arch/powerpc/include/asm/kvm_host.h | 7 ++++--- arch/powerpc/kvm/book3s_hv.c | 14 +++++++++++--- 2 files changed, 15 insertions(+), 6 deletions(-) (limited to 'arch/powerpc') diff --git a/arch/powerpc/include/asm/kvm_host.h b/arch/powerpc/include/asm/kvm_host.h index 68f5a308737a..218534d46ae9 100644 --- a/arch/powerpc/include/asm/kvm_host.h +++ b/arch/powerpc/include/asm/kvm_host.h @@ -289,9 +289,10 @@ struct kvmppc_vcore { /* Values for vcore_state */ #define VCORE_INACTIVE 0 -#define VCORE_RUNNING 1 -#define VCORE_EXITING 2 -#define VCORE_SLEEPING 3 +#define VCORE_SLEEPING 1 +#define VCORE_STARTING 2 +#define VCORE_RUNNING 3 +#define VCORE_EXITING 4 /* * Struct used to manage memory for a virtual processor area diff --git a/arch/powerpc/kvm/book3s_hv.c b/arch/powerpc/kvm/book3s_hv.c index 3a737a4bb8bf..89995fa6e945 100644 --- a/arch/powerpc/kvm/book3s_hv.c +++ b/arch/powerpc/kvm/book3s_hv.c @@ -336,6 +336,11 @@ static void kvmppc_update_vpa(struct kvm_vcpu *vcpu, struct kvmppc_vpa *vpap) static void kvmppc_update_vpas(struct kvm_vcpu *vcpu) { + if (!(vcpu->arch.vpa.update_pending || + vcpu->arch.slb_shadow.update_pending || + vcpu->arch.dtl.update_pending)) + return; + spin_lock(&vcpu->arch.vpa_update_lock); if (vcpu->arch.vpa.update_pending) { kvmppc_update_vpa(vcpu, &vcpu->arch.vpa); @@ -1009,7 +1014,7 @@ static void kvmppc_run_core(struct kvmppc_vcore *vc) vc->n_woken = 0; vc->nap_count = 0; vc->entry_exit_count = 0; - vc->vcore_state = VCORE_RUNNING; + vc->vcore_state = VCORE_STARTING; vc->in_guest = 0; vc->napping_threads = 0; @@ -1062,6 +1067,7 @@ static void kvmppc_run_core(struct kvmppc_vcore *vc) kvmppc_create_dtl_entry(vcpu, vc); } + vc->vcore_state = VCORE_RUNNING; preempt_disable(); spin_unlock(&vc->lock); @@ -1070,8 +1076,6 @@ static void kvmppc_run_core(struct kvmppc_vcore *vc) srcu_idx = srcu_read_lock(&vcpu0->kvm->srcu); __kvmppc_vcore_entry(NULL, vcpu0); - for (i = 0; i < threads_per_core; ++i) - kvmppc_release_hwthread(vc->pcpu + i); spin_lock(&vc->lock); /* disable sending of IPIs on virtual external irqs */ @@ -1080,6 +1084,8 @@ static void kvmppc_run_core(struct kvmppc_vcore *vc) /* wait for secondary threads to finish writing their state to memory */ if (vc->nap_count < vc->n_woken) kvmppc_wait_for_nap(vc); + for (i = 0; i < threads_per_core; ++i) + kvmppc_release_hwthread(vc->pcpu + i); /* prevent other vcpu threads from doing kvmppc_start_thread() now */ vc->vcore_state = VCORE_EXITING; spin_unlock(&vc->lock); @@ -1170,6 +1176,7 @@ static int kvmppc_run_vcpu(struct kvm_run *kvm_run, struct kvm_vcpu *vcpu) kvm_run->exit_reason = 0; vcpu->arch.ret = RESUME_GUEST; vcpu->arch.trap = 0; + kvmppc_update_vpas(vcpu); /* * Synchronize with other threads in this virtual core @@ -1193,6 +1200,7 @@ static int kvmppc_run_vcpu(struct kvm_run *kvm_run, struct kvm_vcpu *vcpu) if (vc->vcore_state == VCORE_RUNNING && VCORE_EXIT_COUNT(vc) == 0) { vcpu->arch.ptid = vc->n_runnable - 1; + kvmppc_create_dtl_entry(vcpu, vc); kvmppc_start_thread(vcpu); } -- cgit v1.2.3-59-g8ed1b From 8455d79e2163997e479931b8d5b7e60a92cd2b86 Mon Sep 17 00:00:00 2001 From: Paul Mackerras Date: Mon, 15 Oct 2012 01:17:42 +0000 Subject: KVM: PPC: Book3S HV: Run virtual core whenever any vcpus in it can run Currently the Book3S HV code implements a policy on multi-threaded processors (i.e. POWER7) that requires all of the active vcpus in a virtual core to be ready to run before we run the virtual core. However, that causes problems on reset, because reset stops all vcpus except vcpu 0, and can also reduce throughput since all four threads in a virtual core have to wait whenever any one of them hits a hypervisor page fault. This relaxes the policy, allowing the virtual core to run as soon as any vcpu in it is runnable. With this, the KVMPPC_VCPU_STOPPED state and the KVMPPC_VCPU_BUSY_IN_HOST state have been combined into a single KVMPPC_VCPU_NOTREADY state, since we no longer need to distinguish between them. Signed-off-by: Paul Mackerras Signed-off-by: Alexander Graf --- arch/powerpc/include/asm/kvm_host.h | 5 +-- arch/powerpc/kvm/book3s_hv.c | 74 +++++++++++++++++++------------------ 2 files changed, 40 insertions(+), 39 deletions(-) (limited to 'arch/powerpc') diff --git a/arch/powerpc/include/asm/kvm_host.h b/arch/powerpc/include/asm/kvm_host.h index 218534d46ae9..1e8cbd1299fc 100644 --- a/arch/powerpc/include/asm/kvm_host.h +++ b/arch/powerpc/include/asm/kvm_host.h @@ -563,9 +563,8 @@ struct kvm_vcpu_arch { }; /* Values for vcpu->arch.state */ -#define KVMPPC_VCPU_STOPPED 0 -#define KVMPPC_VCPU_BUSY_IN_HOST 1 -#define KVMPPC_VCPU_RUNNABLE 2 +#define KVMPPC_VCPU_NOTREADY 0 +#define KVMPPC_VCPU_RUNNABLE 1 /* Values for vcpu->arch.io_gpr */ #define KVM_MMIO_REG_MASK 0x001f diff --git a/arch/powerpc/kvm/book3s_hv.c b/arch/powerpc/kvm/book3s_hv.c index 89995fa6e945..61d293465e81 100644 --- a/arch/powerpc/kvm/book3s_hv.c +++ b/arch/powerpc/kvm/book3s_hv.c @@ -776,10 +776,7 @@ struct kvm_vcpu *kvmppc_core_vcpu_create(struct kvm *kvm, unsigned int id) kvmppc_mmu_book3s_hv_init(vcpu); - /* - * We consider the vcpu stopped until we see the first run ioctl for it. - */ - vcpu->arch.state = KVMPPC_VCPU_STOPPED; + vcpu->arch.state = KVMPPC_VCPU_NOTREADY; init_waitqueue_head(&vcpu->arch.cpu_run); @@ -866,9 +863,8 @@ static void kvmppc_remove_runnable(struct kvmppc_vcore *vc, { if (vcpu->arch.state != KVMPPC_VCPU_RUNNABLE) return; - vcpu->arch.state = KVMPPC_VCPU_BUSY_IN_HOST; + vcpu->arch.state = KVMPPC_VCPU_NOTREADY; --vc->n_runnable; - ++vc->n_busy; list_del(&vcpu->arch.run_list); } @@ -1169,7 +1165,6 @@ static void kvmppc_vcore_blocked(struct kvmppc_vcore *vc) static int kvmppc_run_vcpu(struct kvm_run *kvm_run, struct kvm_vcpu *vcpu) { int n_ceded; - int prev_state; struct kvmppc_vcore *vc; struct kvm_vcpu *v, *vn; @@ -1186,7 +1181,6 @@ static int kvmppc_run_vcpu(struct kvm_run *kvm_run, struct kvm_vcpu *vcpu) vcpu->arch.ceded = 0; vcpu->arch.run_task = current; vcpu->arch.kvm_run = kvm_run; - prev_state = vcpu->arch.state; vcpu->arch.state = KVMPPC_VCPU_RUNNABLE; list_add_tail(&vcpu->arch.run_list, &vc->runnable_threads); ++vc->n_runnable; @@ -1196,35 +1190,26 @@ static int kvmppc_run_vcpu(struct kvm_run *kvm_run, struct kvm_vcpu *vcpu) * If the vcore is already running, we may be able to start * this thread straight away and have it join in. */ - if (prev_state == KVMPPC_VCPU_STOPPED) { + if (!signal_pending(current)) { if (vc->vcore_state == VCORE_RUNNING && VCORE_EXIT_COUNT(vc) == 0) { vcpu->arch.ptid = vc->n_runnable - 1; kvmppc_create_dtl_entry(vcpu, vc); kvmppc_start_thread(vcpu); + } else if (vc->vcore_state == VCORE_SLEEPING) { + wake_up(&vc->wq); } - } else if (prev_state == KVMPPC_VCPU_BUSY_IN_HOST) - --vc->n_busy; + } while (vcpu->arch.state == KVMPPC_VCPU_RUNNABLE && !signal_pending(current)) { - if (vc->n_busy || vc->vcore_state != VCORE_INACTIVE) { + if (vc->vcore_state != VCORE_INACTIVE) { spin_unlock(&vc->lock); kvmppc_wait_for_exec(vcpu, TASK_INTERRUPTIBLE); spin_lock(&vc->lock); continue; } - vc->runner = vcpu; - n_ceded = 0; - list_for_each_entry(v, &vc->runnable_threads, arch.run_list) - if (!v->arch.pending_exceptions) - n_ceded += v->arch.ceded; - if (n_ceded == vc->n_runnable) - kvmppc_vcore_blocked(vc); - else - kvmppc_run_core(vc); - list_for_each_entry_safe(v, vn, &vc->runnable_threads, arch.run_list) { kvmppc_core_prepare_to_enter(v); @@ -1236,23 +1221,40 @@ static int kvmppc_run_vcpu(struct kvm_run *kvm_run, struct kvm_vcpu *vcpu) wake_up(&v->arch.cpu_run); } } + if (!vc->n_runnable || vcpu->arch.state != KVMPPC_VCPU_RUNNABLE) + break; + vc->runner = vcpu; + n_ceded = 0; + list_for_each_entry(v, &vc->runnable_threads, arch.run_list) + if (!v->arch.pending_exceptions) + n_ceded += v->arch.ceded; + if (n_ceded == vc->n_runnable) + kvmppc_vcore_blocked(vc); + else + kvmppc_run_core(vc); vc->runner = NULL; } - if (signal_pending(current)) { - while (vcpu->arch.state == KVMPPC_VCPU_RUNNABLE && - (vc->vcore_state == VCORE_RUNNING || - vc->vcore_state == VCORE_EXITING)) { - spin_unlock(&vc->lock); - kvmppc_wait_for_exec(vcpu, TASK_UNINTERRUPTIBLE); - spin_lock(&vc->lock); - } - if (vcpu->arch.state == KVMPPC_VCPU_RUNNABLE) { - kvmppc_remove_runnable(vc, vcpu); - vcpu->stat.signal_exits++; - kvm_run->exit_reason = KVM_EXIT_INTR; - vcpu->arch.ret = -EINTR; - } + while (vcpu->arch.state == KVMPPC_VCPU_RUNNABLE && + (vc->vcore_state == VCORE_RUNNING || + vc->vcore_state == VCORE_EXITING)) { + spin_unlock(&vc->lock); + kvmppc_wait_for_exec(vcpu, TASK_UNINTERRUPTIBLE); + spin_lock(&vc->lock); + } + + if (vcpu->arch.state == KVMPPC_VCPU_RUNNABLE) { + kvmppc_remove_runnable(vc, vcpu); + vcpu->stat.signal_exits++; + kvm_run->exit_reason = KVM_EXIT_INTR; + vcpu->arch.ret = -EINTR; + } + + if (vc->n_runnable && vc->vcore_state == VCORE_INACTIVE) { + /* Wake up some vcpu to run the core */ + v = list_first_entry(&vc->runnable_threads, + struct kvm_vcpu, arch.run_list); + wake_up(&v->arch.cpu_run); } spin_unlock(&vc->lock); -- cgit v1.2.3-59-g8ed1b From c7b676709c163e12ec161c0593c2c76809c25ff4 Mon Sep 17 00:00:00 2001 From: Paul Mackerras Date: Mon, 15 Oct 2012 01:18:07 +0000 Subject: KVM: PPC: Book3S HV: Fix accounting of stolen time Currently the code that accounts stolen time tends to overestimate the stolen time, and will sometimes report more stolen time in a DTL (dispatch trace log) entry than has elapsed since the last DTL entry. This can cause guests to underflow the user or system time measured for some tasks, leading to ridiculous CPU percentages and total runtimes being reported by top and other utilities. In addition, the current code was designed for the previous policy where a vcore would only run when all the vcpus in it were runnable, and so only counted stolen time on a per-vcore basis. Now that a vcore can run while some of the vcpus in it are doing other things in the kernel (e.g. handling a page fault), we need to count the time when a vcpu task is preempted while it is not running as part of a vcore as stolen also. To do this, we bring back the BUSY_IN_HOST vcpu state and extend the vcpu_load/put functions to count preemption time while the vcpu is in that state. Handling the transitions between the RUNNING and BUSY_IN_HOST states requires checking and updating two variables (accumulated time stolen and time last preempted), so we add a new spinlock, vcpu->arch.tbacct_lock. This protects both the per-vcpu stolen/preempt-time variables, and the per-vcore variables while this vcpu is running the vcore. Finally, we now don't count time spent in userspace as stolen time. The task could be executing in userspace on behalf of the vcpu, or it could be preempted, or the vcpu could be genuinely stopped. Since we have no way of dividing up the time between these cases, we don't count any of it as stolen. Signed-off-by: Paul Mackerras Signed-off-by: Alexander Graf --- arch/powerpc/include/asm/kvm_host.h | 5 ++ arch/powerpc/kvm/book3s_hv.c | 127 +++++++++++++++++++++++++++++++----- 2 files changed, 117 insertions(+), 15 deletions(-) (limited to 'arch/powerpc') diff --git a/arch/powerpc/include/asm/kvm_host.h b/arch/powerpc/include/asm/kvm_host.h index 1e8cbd1299fc..3093896015f0 100644 --- a/arch/powerpc/include/asm/kvm_host.h +++ b/arch/powerpc/include/asm/kvm_host.h @@ -559,12 +559,17 @@ struct kvm_vcpu_arch { unsigned long dtl_index; u64 stolen_logged; struct kvmppc_vpa slb_shadow; + + spinlock_t tbacct_lock; + u64 busy_stolen; + u64 busy_preempt; #endif }; /* Values for vcpu->arch.state */ #define KVMPPC_VCPU_NOTREADY 0 #define KVMPPC_VCPU_RUNNABLE 1 +#define KVMPPC_VCPU_BUSY_IN_HOST 2 /* Values for vcpu->arch.io_gpr */ #define KVM_MMIO_REG_MASK 0x001f diff --git a/arch/powerpc/kvm/book3s_hv.c b/arch/powerpc/kvm/book3s_hv.c index 61d293465e81..8b3c470e6cb9 100644 --- a/arch/powerpc/kvm/book3s_hv.c +++ b/arch/powerpc/kvm/book3s_hv.c @@ -60,23 +60,74 @@ /* Used to indicate that a guest page fault needs to be handled */ #define RESUME_PAGE_FAULT (RESUME_GUEST | RESUME_FLAG_ARCH1) +/* Used as a "null" value for timebase values */ +#define TB_NIL (~(u64)0) + static void kvmppc_end_cede(struct kvm_vcpu *vcpu); static int kvmppc_hv_setup_htab_rma(struct kvm_vcpu *vcpu); +/* + * We use the vcpu_load/put functions to measure stolen time. + * Stolen time is counted as time when either the vcpu is able to + * run as part of a virtual core, but the task running the vcore + * is preempted or sleeping, or when the vcpu needs something done + * in the kernel by the task running the vcpu, but that task is + * preempted or sleeping. Those two things have to be counted + * separately, since one of the vcpu tasks will take on the job + * of running the core, and the other vcpu tasks in the vcore will + * sleep waiting for it to do that, but that sleep shouldn't count + * as stolen time. + * + * Hence we accumulate stolen time when the vcpu can run as part of + * a vcore using vc->stolen_tb, and the stolen time when the vcpu + * needs its task to do other things in the kernel (for example, + * service a page fault) in busy_stolen. We don't accumulate + * stolen time for a vcore when it is inactive, or for a vcpu + * when it is in state RUNNING or NOTREADY. NOTREADY is a bit of + * a misnomer; it means that the vcpu task is not executing in + * the KVM_VCPU_RUN ioctl, i.e. it is in userspace or elsewhere in + * the kernel. We don't have any way of dividing up that time + * between time that the vcpu is genuinely stopped, time that + * the task is actively working on behalf of the vcpu, and time + * that the task is preempted, so we don't count any of it as + * stolen. + * + * Updates to busy_stolen are protected by arch.tbacct_lock; + * updates to vc->stolen_tb are protected by the arch.tbacct_lock + * of the vcpu that has taken responsibility for running the vcore + * (i.e. vc->runner). The stolen times are measured in units of + * timebase ticks. (Note that the != TB_NIL checks below are + * purely defensive; they should never fail.) + */ + void kvmppc_core_vcpu_load(struct kvm_vcpu *vcpu, int cpu) { struct kvmppc_vcore *vc = vcpu->arch.vcore; - if (vc->runner == vcpu && vc->vcore_state != VCORE_INACTIVE) + spin_lock(&vcpu->arch.tbacct_lock); + if (vc->runner == vcpu && vc->vcore_state != VCORE_INACTIVE && + vc->preempt_tb != TB_NIL) { vc->stolen_tb += mftb() - vc->preempt_tb; + vc->preempt_tb = TB_NIL; + } + if (vcpu->arch.state == KVMPPC_VCPU_BUSY_IN_HOST && + vcpu->arch.busy_preempt != TB_NIL) { + vcpu->arch.busy_stolen += mftb() - vcpu->arch.busy_preempt; + vcpu->arch.busy_preempt = TB_NIL; + } + spin_unlock(&vcpu->arch.tbacct_lock); } void kvmppc_core_vcpu_put(struct kvm_vcpu *vcpu) { struct kvmppc_vcore *vc = vcpu->arch.vcore; + spin_lock(&vcpu->arch.tbacct_lock); if (vc->runner == vcpu && vc->vcore_state != VCORE_INACTIVE) vc->preempt_tb = mftb(); + if (vcpu->arch.state == KVMPPC_VCPU_BUSY_IN_HOST) + vcpu->arch.busy_preempt = mftb(); + spin_unlock(&vcpu->arch.tbacct_lock); } void kvmppc_set_msr(struct kvm_vcpu *vcpu, u64 msr) @@ -357,24 +408,61 @@ static void kvmppc_update_vpas(struct kvm_vcpu *vcpu) spin_unlock(&vcpu->arch.vpa_update_lock); } +/* + * Return the accumulated stolen time for the vcore up until `now'. + * The caller should hold the vcore lock. + */ +static u64 vcore_stolen_time(struct kvmppc_vcore *vc, u64 now) +{ + u64 p; + + /* + * If we are the task running the vcore, then since we hold + * the vcore lock, we can't be preempted, so stolen_tb/preempt_tb + * can't be updated, so we don't need the tbacct_lock. + * If the vcore is inactive, it can't become active (since we + * hold the vcore lock), so the vcpu load/put functions won't + * update stolen_tb/preempt_tb, and we don't need tbacct_lock. + */ + if (vc->vcore_state != VCORE_INACTIVE && + vc->runner->arch.run_task != current) { + spin_lock(&vc->runner->arch.tbacct_lock); + p = vc->stolen_tb; + if (vc->preempt_tb != TB_NIL) + p += now - vc->preempt_tb; + spin_unlock(&vc->runner->arch.tbacct_lock); + } else { + p = vc->stolen_tb; + } + return p; +} + static void kvmppc_create_dtl_entry(struct kvm_vcpu *vcpu, struct kvmppc_vcore *vc) { struct dtl_entry *dt; struct lppaca *vpa; - unsigned long old_stolen; + unsigned long stolen; + unsigned long core_stolen; + u64 now; dt = vcpu->arch.dtl_ptr; vpa = vcpu->arch.vpa.pinned_addr; - old_stolen = vcpu->arch.stolen_logged; - vcpu->arch.stolen_logged = vc->stolen_tb; + now = mftb(); + core_stolen = vcore_stolen_time(vc, now); + stolen = core_stolen - vcpu->arch.stolen_logged; + vcpu->arch.stolen_logged = core_stolen; + spin_lock(&vcpu->arch.tbacct_lock); + stolen += vcpu->arch.busy_stolen; + vcpu->arch.busy_stolen = 0; + spin_unlock(&vcpu->arch.tbacct_lock); if (!dt || !vpa) return; memset(dt, 0, sizeof(struct dtl_entry)); dt->dispatch_reason = 7; dt->processor_id = vc->pcpu + vcpu->arch.ptid; - dt->timebase = mftb(); - dt->enqueue_to_dispatch_time = vc->stolen_tb - old_stolen; + dt->timebase = now; + dt->enqueue_to_dispatch_time = stolen; dt->srr0 = kvmppc_get_pc(vcpu); dt->srr1 = vcpu->arch.shregs.msr; ++dt; @@ -773,6 +861,8 @@ struct kvm_vcpu *kvmppc_core_vcpu_create(struct kvm *kvm, unsigned int id) vcpu->arch.pvr = mfspr(SPRN_PVR); kvmppc_set_pvr(vcpu, vcpu->arch.pvr); spin_lock_init(&vcpu->arch.vpa_update_lock); + spin_lock_init(&vcpu->arch.tbacct_lock); + vcpu->arch.busy_preempt = TB_NIL; kvmppc_mmu_book3s_hv_init(vcpu); @@ -788,7 +878,7 @@ struct kvm_vcpu *kvmppc_core_vcpu_create(struct kvm *kvm, unsigned int id) INIT_LIST_HEAD(&vcore->runnable_threads); spin_lock_init(&vcore->lock); init_waitqueue_head(&vcore->wq); - vcore->preempt_tb = mftb(); + vcore->preempt_tb = TB_NIL; } kvm->arch.vcores[core] = vcore; } @@ -801,7 +891,6 @@ struct kvm_vcpu *kvmppc_core_vcpu_create(struct kvm *kvm, unsigned int id) ++vcore->num_threads; spin_unlock(&vcore->lock); vcpu->arch.vcore = vcore; - vcpu->arch.stolen_logged = vcore->stolen_tb; vcpu->arch.cpu_type = KVM_CPU_3S_64; kvmppc_sanity_check(vcpu); @@ -861,9 +950,17 @@ extern void xics_wake_cpu(int cpu); static void kvmppc_remove_runnable(struct kvmppc_vcore *vc, struct kvm_vcpu *vcpu) { + u64 now; + if (vcpu->arch.state != KVMPPC_VCPU_RUNNABLE) return; - vcpu->arch.state = KVMPPC_VCPU_NOTREADY; + spin_lock(&vcpu->arch.tbacct_lock); + now = mftb(); + vcpu->arch.busy_stolen += vcore_stolen_time(vc, now) - + vcpu->arch.stolen_logged; + vcpu->arch.busy_preempt = now; + vcpu->arch.state = KVMPPC_VCPU_BUSY_IN_HOST; + spin_unlock(&vcpu->arch.tbacct_lock); --vc->n_runnable; list_del(&vcpu->arch.run_list); } @@ -1038,10 +1135,8 @@ static void kvmppc_run_core(struct kvmppc_vcore *vc) vcpu->arch.ptid = ptid++; } } - if (!vcpu0) { - vc->vcore_state = VCORE_INACTIVE; - return; /* nothing to run; should never happen */ - } + if (!vcpu0) + goto out; /* nothing to run; should never happen */ list_for_each_entry(vcpu, &vc->runnable_threads, arch.run_list) if (vcpu->arch.ceded) vcpu->arch.ptid = ptid++; @@ -1056,7 +1151,6 @@ static void kvmppc_run_core(struct kvmppc_vcore *vc) goto out; } - vc->stolen_tb += mftb() - vc->preempt_tb; vc->pcpu = smp_processor_id(); list_for_each_entry(vcpu, &vc->runnable_threads, arch.run_list) { kvmppc_start_thread(vcpu); @@ -1121,7 +1215,6 @@ static void kvmppc_run_core(struct kvmppc_vcore *vc) out: vc->vcore_state = VCORE_INACTIVE; - vc->preempt_tb = mftb(); list_for_each_entry_safe(vcpu, vnext, &vc->runnable_threads, arch.run_list) { if (vcpu->arch.ret != RESUME_GUEST) { @@ -1181,7 +1274,9 @@ static int kvmppc_run_vcpu(struct kvm_run *kvm_run, struct kvm_vcpu *vcpu) vcpu->arch.ceded = 0; vcpu->arch.run_task = current; vcpu->arch.kvm_run = kvm_run; + vcpu->arch.stolen_logged = vcore_stolen_time(vc, mftb()); vcpu->arch.state = KVMPPC_VCPU_RUNNABLE; + vcpu->arch.busy_preempt = TB_NIL; list_add_tail(&vcpu->arch.run_list, &vc->runnable_threads); ++vc->n_runnable; @@ -1295,6 +1390,7 @@ int kvmppc_vcpu_run(struct kvm_run *run, struct kvm_vcpu *vcpu) flush_vsx_to_thread(current); vcpu->arch.wqp = &vcpu->arch.vcore->wq; vcpu->arch.pgdir = current->mm->pgd; + vcpu->arch.state = KVMPPC_VCPU_BUSY_IN_HOST; do { r = kvmppc_run_vcpu(run, vcpu); @@ -1312,6 +1408,7 @@ int kvmppc_vcpu_run(struct kvm_run *run, struct kvm_vcpu *vcpu) } while (r == RESUME_GUEST); out: + vcpu->arch.state = KVMPPC_VCPU_NOTREADY; atomic_dec(&vcpu->kvm->arch.vcpus_running); return r; } -- cgit v1.2.3-59-g8ed1b From 9f8c8c7812976fbfaf4bb30aaf8f6b001864f20a Mon Sep 17 00:00:00 2001 From: Paul Mackerras Date: Mon, 15 Oct 2012 01:18:37 +0000 Subject: KVM: PPC: Book3S HV: Allow DTL to be set to address 0, length 0 Commit 55b665b026 ("KVM: PPC: Book3S HV: Provide a way for userspace to get/set per-vCPU areas") includes a check on the length of the dispatch trace log (DTL) to make sure the buffer is at least one entry long. This is appropriate when registering a buffer, but the interface also allows for any existing buffer to be unregistered by specifying a zero address. In this case the length check is not appropriate. This makes the check conditional on the address being non-zero. Signed-off-by: Paul Mackerras Signed-off-by: Alexander Graf --- arch/powerpc/kvm/book3s_hv.c | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) (limited to 'arch/powerpc') diff --git a/arch/powerpc/kvm/book3s_hv.c b/arch/powerpc/kvm/book3s_hv.c index 8b3c470e6cb9..812764c96229 100644 --- a/arch/powerpc/kvm/book3s_hv.c +++ b/arch/powerpc/kvm/book3s_hv.c @@ -811,9 +811,8 @@ int kvmppc_set_one_reg(struct kvm_vcpu *vcpu, u64 id, union kvmppc_one_reg *val) addr = val->vpaval.addr; len = val->vpaval.length; r = -EINVAL; - if (len < sizeof(struct dtl_entry)) - break; - if (addr && !vcpu->arch.vpa.next_gpa) + if (addr && (len < sizeof(struct dtl_entry) || + !vcpu->arch.vpa.next_gpa)) break; len -= len % sizeof(struct dtl_entry); r = set_vpa(vcpu, &vcpu->arch.dtl, addr, len); -- cgit v1.2.3-59-g8ed1b From 8b5869ad85f703ffeb25e656eab826f6b85b984c Mon Sep 17 00:00:00 2001 From: Paul Mackerras Date: Mon, 15 Oct 2012 01:20:50 +0000 Subject: KVM: PPC: Book3S HV: Fix thinko in try_lock_hpte() This fixes an error in the inline asm in try_lock_hpte() where we were erroneously using a register number as an immediate operand. The bug only affects an error path, and in fact the code will still work as long as the compiler chooses some register other than r0 for the "bits" variable. Nevertheless it should still be fixed. Signed-off-by: Paul Mackerras Signed-off-by: Alexander Graf --- arch/powerpc/include/asm/kvm_book3s_64.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch/powerpc') diff --git a/arch/powerpc/include/asm/kvm_book3s_64.h b/arch/powerpc/include/asm/kvm_book3s_64.h index 0dd1d86d3e31..1472a5b4e4e3 100644 --- a/arch/powerpc/include/asm/kvm_book3s_64.h +++ b/arch/powerpc/include/asm/kvm_book3s_64.h @@ -60,7 +60,7 @@ static inline long try_lock_hpte(unsigned long *hpte, unsigned long bits) " ori %0,%0,%4\n" " stdcx. %0,0,%2\n" " beq+ 2f\n" - " li %1,%3\n" + " mr %1,%3\n" "2: isync" : "=&r" (tmp), "=&r" (old) : "r" (hpte), "r" (bits), "i" (HPTE_V_HVLOCK) -- cgit v1.2.3-59-g8ed1b From 63a1909190a3baa0abb9463ef28c8d8c969be951 Mon Sep 17 00:00:00 2001 From: Alexander Graf Date: Wed, 31 Oct 2012 13:37:59 +0100 Subject: PPC: ePAPR: Convert hcall header to uapi (round 2) The new uapi framework splits kernel internal and user space exported bits of header files more cleanly. Adjust the ePAPR header accordingly. Signed-off-by: Alexander Graf --- arch/powerpc/include/asm/epapr_hcalls.h | 458 +++++++++++++++++++++++++ arch/powerpc/include/uapi/asm/epapr_hcalls.h | 478 ++++----------------------- 2 files changed, 517 insertions(+), 419 deletions(-) create mode 100644 arch/powerpc/include/asm/epapr_hcalls.h (limited to 'arch/powerpc') diff --git a/arch/powerpc/include/asm/epapr_hcalls.h b/arch/powerpc/include/asm/epapr_hcalls.h new file mode 100644 index 000000000000..d3d634274d2c --- /dev/null +++ b/arch/powerpc/include/asm/epapr_hcalls.h @@ -0,0 +1,458 @@ +/* + * ePAPR hcall interface + * + * Copyright 2008-2011 Freescale Semiconductor, Inc. + * + * Author: Timur Tabi + * + * This file is provided under a dual BSD/GPL license. When using or + * redistributing this file, you may do so under either license. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * * Neither the name of Freescale Semiconductor nor the + * names of its contributors may be used to endorse or promote products + * derived from this software without specific prior written permission. + * + * + * ALTERNATIVELY, this software may be distributed under the terms of the + * GNU General Public License ("GPL") as published by the Free Software + * Foundation, either version 2 of that License or (at your option) any + * later version. + * + * THIS SOFTWARE IS PROVIDED BY Freescale Semiconductor ``AS IS'' AND ANY + * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED + * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE + * DISCLAIMED. IN NO EVENT SHALL Freescale Semiconductor BE LIABLE FOR ANY + * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES + * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; + * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND + * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS + * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +/* A "hypercall" is an "sc 1" instruction. This header file file provides C + * wrapper functions for the ePAPR hypervisor interface. It is inteded + * for use by Linux device drivers and other operating systems. + * + * The hypercalls are implemented as inline assembly, rather than assembly + * language functions in a .S file, for optimization. It allows + * the caller to issue the hypercall instruction directly, improving both + * performance and memory footprint. + */ + +#ifndef _EPAPR_HCALLS_H +#define _EPAPR_HCALLS_H + +#include + +#ifndef __ASSEMBLY__ +#include +#include +#include + +/* + * Hypercall register clobber list + * + * These macros are used to define the list of clobbered registers during a + * hypercall. Technically, registers r0 and r3-r12 are always clobbered, + * but the gcc inline assembly syntax does not allow us to specify registers + * on the clobber list that are also on the input/output list. Therefore, + * the lists of clobbered registers depends on the number of register + * parmeters ("+r" and "=r") passed to the hypercall. + * + * Each assembly block should use one of the HCALL_CLOBBERSx macros. As a + * general rule, 'x' is the number of parameters passed to the assembly + * block *except* for r11. + * + * If you're not sure, just use the smallest value of 'x' that does not + * generate a compilation error. Because these are static inline functions, + * the compiler will only check the clobber list for a function if you + * compile code that calls that function. + * + * r3 and r11 are not included in any clobbers list because they are always + * listed as output registers. + * + * XER, CTR, and LR are currently listed as clobbers because it's uncertain + * whether they will be clobbered. + * + * Note that r11 can be used as an output parameter. + * + * The "memory" clobber is only necessary for hcalls where the Hypervisor + * will read or write guest memory. However, we add it to all hcalls because + * the impact is minimal, and we want to ensure that it's present for the + * hcalls that need it. +*/ + +/* List of common clobbered registers. Do not use this macro. */ +#define EV_HCALL_CLOBBERS "r0", "r12", "xer", "ctr", "lr", "cc", "memory" + +#define EV_HCALL_CLOBBERS8 EV_HCALL_CLOBBERS +#define EV_HCALL_CLOBBERS7 EV_HCALL_CLOBBERS8, "r10" +#define EV_HCALL_CLOBBERS6 EV_HCALL_CLOBBERS7, "r9" +#define EV_HCALL_CLOBBERS5 EV_HCALL_CLOBBERS6, "r8" +#define EV_HCALL_CLOBBERS4 EV_HCALL_CLOBBERS5, "r7" +#define EV_HCALL_CLOBBERS3 EV_HCALL_CLOBBERS4, "r6" +#define EV_HCALL_CLOBBERS2 EV_HCALL_CLOBBERS3, "r5" +#define EV_HCALL_CLOBBERS1 EV_HCALL_CLOBBERS2, "r4" + +extern bool epapr_paravirt_enabled; +extern u32 epapr_hypercall_start[]; + +/* + * We use "uintptr_t" to define a register because it's guaranteed to be a + * 32-bit integer on a 32-bit platform, and a 64-bit integer on a 64-bit + * platform. + * + * All registers are either input/output or output only. Registers that are + * initialized before making the hypercall are input/output. All + * input/output registers are represented with "+r". Output-only registers + * are represented with "=r". Do not specify any unused registers. The + * clobber list will tell the compiler that the hypercall modifies those + * registers, which is good enough. + */ + +/** + * ev_int_set_config - configure the specified interrupt + * @interrupt: the interrupt number + * @config: configuration for this interrupt + * @priority: interrupt priority + * @destination: destination CPU number + * + * Returns 0 for success, or an error code. + */ +static inline unsigned int ev_int_set_config(unsigned int interrupt, + uint32_t config, unsigned int priority, uint32_t destination) +{ + register uintptr_t r11 __asm__("r11"); + register uintptr_t r3 __asm__("r3"); + register uintptr_t r4 __asm__("r4"); + register uintptr_t r5 __asm__("r5"); + register uintptr_t r6 __asm__("r6"); + + r11 = EV_HCALL_TOKEN(EV_INT_SET_CONFIG); + r3 = interrupt; + r4 = config; + r5 = priority; + r6 = destination; + + asm volatile("bl epapr_hypercall_start" + : "+r" (r11), "+r" (r3), "+r" (r4), "+r" (r5), "+r" (r6) + : : EV_HCALL_CLOBBERS4 + ); + + return r3; +} + +/** + * ev_int_get_config - return the config of the specified interrupt + * @interrupt: the interrupt number + * @config: returned configuration for this interrupt + * @priority: returned interrupt priority + * @destination: returned destination CPU number + * + * Returns 0 for success, or an error code. + */ +static inline unsigned int ev_int_get_config(unsigned int interrupt, + uint32_t *config, unsigned int *priority, uint32_t *destination) +{ + register uintptr_t r11 __asm__("r11"); + register uintptr_t r3 __asm__("r3"); + register uintptr_t r4 __asm__("r4"); + register uintptr_t r5 __asm__("r5"); + register uintptr_t r6 __asm__("r6"); + + r11 = EV_HCALL_TOKEN(EV_INT_GET_CONFIG); + r3 = interrupt; + + asm volatile("bl epapr_hypercall_start" + : "+r" (r11), "+r" (r3), "=r" (r4), "=r" (r5), "=r" (r6) + : : EV_HCALL_CLOBBERS4 + ); + + *config = r4; + *priority = r5; + *destination = r6; + + return r3; +} + +/** + * ev_int_set_mask - sets the mask for the specified interrupt source + * @interrupt: the interrupt number + * @mask: 0=enable interrupts, 1=disable interrupts + * + * Returns 0 for success, or an error code. + */ +static inline unsigned int ev_int_set_mask(unsigned int interrupt, + unsigned int mask) +{ + register uintptr_t r11 __asm__("r11"); + register uintptr_t r3 __asm__("r3"); + register uintptr_t r4 __asm__("r4"); + + r11 = EV_HCALL_TOKEN(EV_INT_SET_MASK); + r3 = interrupt; + r4 = mask; + + asm volatile("bl epapr_hypercall_start" + : "+r" (r11), "+r" (r3), "+r" (r4) + : : EV_HCALL_CLOBBERS2 + ); + + return r3; +} + +/** + * ev_int_get_mask - returns the mask for the specified interrupt source + * @interrupt: the interrupt number + * @mask: returned mask for this interrupt (0=enabled, 1=disabled) + * + * Returns 0 for success, or an error code. + */ +static inline unsigned int ev_int_get_mask(unsigned int interrupt, + unsigned int *mask) +{ + register uintptr_t r11 __asm__("r11"); + register uintptr_t r3 __asm__("r3"); + register uintptr_t r4 __asm__("r4"); + + r11 = EV_HCALL_TOKEN(EV_INT_GET_MASK); + r3 = interrupt; + + asm volatile("bl epapr_hypercall_start" + : "+r" (r11), "+r" (r3), "=r" (r4) + : : EV_HCALL_CLOBBERS2 + ); + + *mask = r4; + + return r3; +} + +/** + * ev_int_eoi - signal the end of interrupt processing + * @interrupt: the interrupt number + * + * This function signals the end of processing for the the specified + * interrupt, which must be the interrupt currently in service. By + * definition, this is also the highest-priority interrupt. + * + * Returns 0 for success, or an error code. + */ +static inline unsigned int ev_int_eoi(unsigned int interrupt) +{ + register uintptr_t r11 __asm__("r11"); + register uintptr_t r3 __asm__("r3"); + + r11 = EV_HCALL_TOKEN(EV_INT_EOI); + r3 = interrupt; + + asm volatile("bl epapr_hypercall_start" + : "+r" (r11), "+r" (r3) + : : EV_HCALL_CLOBBERS1 + ); + + return r3; +} + +/** + * ev_byte_channel_send - send characters to a byte stream + * @handle: byte stream handle + * @count: (input) num of chars to send, (output) num chars sent + * @buffer: pointer to a 16-byte buffer + * + * @buffer must be at least 16 bytes long, because all 16 bytes will be + * read from memory into registers, even if count < 16. + * + * Returns 0 for success, or an error code. + */ +static inline unsigned int ev_byte_channel_send(unsigned int handle, + unsigned int *count, const char buffer[EV_BYTE_CHANNEL_MAX_BYTES]) +{ + register uintptr_t r11 __asm__("r11"); + register uintptr_t r3 __asm__("r3"); + register uintptr_t r4 __asm__("r4"); + register uintptr_t r5 __asm__("r5"); + register uintptr_t r6 __asm__("r6"); + register uintptr_t r7 __asm__("r7"); + register uintptr_t r8 __asm__("r8"); + const uint32_t *p = (const uint32_t *) buffer; + + r11 = EV_HCALL_TOKEN(EV_BYTE_CHANNEL_SEND); + r3 = handle; + r4 = *count; + r5 = be32_to_cpu(p[0]); + r6 = be32_to_cpu(p[1]); + r7 = be32_to_cpu(p[2]); + r8 = be32_to_cpu(p[3]); + + asm volatile("bl epapr_hypercall_start" + : "+r" (r11), "+r" (r3), + "+r" (r4), "+r" (r5), "+r" (r6), "+r" (r7), "+r" (r8) + : : EV_HCALL_CLOBBERS6 + ); + + *count = r4; + + return r3; +} + +/** + * ev_byte_channel_receive - fetch characters from a byte channel + * @handle: byte channel handle + * @count: (input) max num of chars to receive, (output) num chars received + * @buffer: pointer to a 16-byte buffer + * + * The size of @buffer must be at least 16 bytes, even if you request fewer + * than 16 characters, because we always write 16 bytes to @buffer. This is + * for performance reasons. + * + * Returns 0 for success, or an error code. + */ +static inline unsigned int ev_byte_channel_receive(unsigned int handle, + unsigned int *count, char buffer[EV_BYTE_CHANNEL_MAX_BYTES]) +{ + register uintptr_t r11 __asm__("r11"); + register uintptr_t r3 __asm__("r3"); + register uintptr_t r4 __asm__("r4"); + register uintptr_t r5 __asm__("r5"); + register uintptr_t r6 __asm__("r6"); + register uintptr_t r7 __asm__("r7"); + register uintptr_t r8 __asm__("r8"); + uint32_t *p = (uint32_t *) buffer; + + r11 = EV_HCALL_TOKEN(EV_BYTE_CHANNEL_RECEIVE); + r3 = handle; + r4 = *count; + + asm volatile("bl epapr_hypercall_start" + : "+r" (r11), "+r" (r3), "+r" (r4), + "=r" (r5), "=r" (r6), "=r" (r7), "=r" (r8) + : : EV_HCALL_CLOBBERS6 + ); + + *count = r4; + p[0] = cpu_to_be32(r5); + p[1] = cpu_to_be32(r6); + p[2] = cpu_to_be32(r7); + p[3] = cpu_to_be32(r8); + + return r3; +} + +/** + * ev_byte_channel_poll - returns the status of the byte channel buffers + * @handle: byte channel handle + * @rx_count: returned count of bytes in receive queue + * @tx_count: returned count of free space in transmit queue + * + * This function reports the amount of data in the receive queue (i.e. the + * number of bytes you can read), and the amount of free space in the transmit + * queue (i.e. the number of bytes you can write). + * + * Returns 0 for success, or an error code. + */ +static inline unsigned int ev_byte_channel_poll(unsigned int handle, + unsigned int *rx_count, unsigned int *tx_count) +{ + register uintptr_t r11 __asm__("r11"); + register uintptr_t r3 __asm__("r3"); + register uintptr_t r4 __asm__("r4"); + register uintptr_t r5 __asm__("r5"); + + r11 = EV_HCALL_TOKEN(EV_BYTE_CHANNEL_POLL); + r3 = handle; + + asm volatile("bl epapr_hypercall_start" + : "+r" (r11), "+r" (r3), "=r" (r4), "=r" (r5) + : : EV_HCALL_CLOBBERS3 + ); + + *rx_count = r4; + *tx_count = r5; + + return r3; +} + +/** + * ev_int_iack - acknowledge an interrupt + * @handle: handle to the target interrupt controller + * @vector: returned interrupt vector + * + * If handle is zero, the function returns the next interrupt source + * number to be handled irrespective of the hierarchy or cascading + * of interrupt controllers. If non-zero, specifies a handle to the + * interrupt controller that is the target of the acknowledge. + * + * Returns 0 for success, or an error code. + */ +static inline unsigned int ev_int_iack(unsigned int handle, + unsigned int *vector) +{ + register uintptr_t r11 __asm__("r11"); + register uintptr_t r3 __asm__("r3"); + register uintptr_t r4 __asm__("r4"); + + r11 = EV_HCALL_TOKEN(EV_INT_IACK); + r3 = handle; + + asm volatile("bl epapr_hypercall_start" + : "+r" (r11), "+r" (r3), "=r" (r4) + : : EV_HCALL_CLOBBERS2 + ); + + *vector = r4; + + return r3; +} + +/** + * ev_doorbell_send - send a doorbell to another partition + * @handle: doorbell send handle + * + * Returns 0 for success, or an error code. + */ +static inline unsigned int ev_doorbell_send(unsigned int handle) +{ + register uintptr_t r11 __asm__("r11"); + register uintptr_t r3 __asm__("r3"); + + r11 = EV_HCALL_TOKEN(EV_DOORBELL_SEND); + r3 = handle; + + asm volatile("bl epapr_hypercall_start" + : "+r" (r11), "+r" (r3) + : : EV_HCALL_CLOBBERS1 + ); + + return r3; +} + +/** + * ev_idle -- wait for next interrupt on this core + * + * Returns 0 for success, or an error code. + */ +static inline unsigned int ev_idle(void) +{ + register uintptr_t r11 __asm__("r11"); + register uintptr_t r3 __asm__("r3"); + + r11 = EV_HCALL_TOKEN(EV_IDLE); + + asm volatile("bl epapr_hypercall_start" + : "+r" (r11), "=r" (r3) + : : EV_HCALL_CLOBBERS1 + ); + + return r3; +} +#endif /* !__ASSEMBLY__ */ +#endif /* _EPAPR_HCALLS_H */ diff --git a/arch/powerpc/include/uapi/asm/epapr_hcalls.h b/arch/powerpc/include/uapi/asm/epapr_hcalls.h index 58997afcd085..7f9c74b46704 100644 --- a/arch/powerpc/include/uapi/asm/epapr_hcalls.h +++ b/arch/powerpc/include/uapi/asm/epapr_hcalls.h @@ -37,422 +37,62 @@ * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ -/* A "hypercall" is an "sc 1" instruction. This header file file provides C - * wrapper functions for the ePAPR hypervisor interface. It is inteded - * for use by Linux device drivers and other operating systems. - * - * The hypercalls are implemented as inline assembly, rather than assembly - * language functions in a .S file, for optimization. It allows - * the caller to issue the hypercall instruction directly, improving both - * performance and memory footprint. - */ - -#ifndef _EPAPR_HCALLS_H -#define _EPAPR_HCALLS_H - -#include - -#ifndef __ASSEMBLY__ -#include -#include -#include - -/* - * Hypercall register clobber list - * - * These macros are used to define the list of clobbered registers during a - * hypercall. Technically, registers r0 and r3-r12 are always clobbered, - * but the gcc inline assembly syntax does not allow us to specify registers - * on the clobber list that are also on the input/output list. Therefore, - * the lists of clobbered registers depends on the number of register - * parmeters ("+r" and "=r") passed to the hypercall. - * - * Each assembly block should use one of the HCALL_CLOBBERSx macros. As a - * general rule, 'x' is the number of parameters passed to the assembly - * block *except* for r11. - * - * If you're not sure, just use the smallest value of 'x' that does not - * generate a compilation error. Because these are static inline functions, - * the compiler will only check the clobber list for a function if you - * compile code that calls that function. - * - * r3 and r11 are not included in any clobbers list because they are always - * listed as output registers. - * - * XER, CTR, and LR are currently listed as clobbers because it's uncertain - * whether they will be clobbered. - * - * Note that r11 can be used as an output parameter. - * - * The "memory" clobber is only necessary for hcalls where the Hypervisor - * will read or write guest memory. However, we add it to all hcalls because - * the impact is minimal, and we want to ensure that it's present for the - * hcalls that need it. -*/ - -/* List of common clobbered registers. Do not use this macro. */ -#define EV_HCALL_CLOBBERS "r0", "r12", "xer", "ctr", "lr", "cc", "memory" - -#define EV_HCALL_CLOBBERS8 EV_HCALL_CLOBBERS -#define EV_HCALL_CLOBBERS7 EV_HCALL_CLOBBERS8, "r10" -#define EV_HCALL_CLOBBERS6 EV_HCALL_CLOBBERS7, "r9" -#define EV_HCALL_CLOBBERS5 EV_HCALL_CLOBBERS6, "r8" -#define EV_HCALL_CLOBBERS4 EV_HCALL_CLOBBERS5, "r7" -#define EV_HCALL_CLOBBERS3 EV_HCALL_CLOBBERS4, "r6" -#define EV_HCALL_CLOBBERS2 EV_HCALL_CLOBBERS3, "r5" -#define EV_HCALL_CLOBBERS1 EV_HCALL_CLOBBERS2, "r4" - -extern bool epapr_paravirt_enabled; -extern u32 epapr_hypercall_start[]; - -/* - * We use "uintptr_t" to define a register because it's guaranteed to be a - * 32-bit integer on a 32-bit platform, and a 64-bit integer on a 64-bit - * platform. - * - * All registers are either input/output or output only. Registers that are - * initialized before making the hypercall are input/output. All - * input/output registers are represented with "+r". Output-only registers - * are represented with "=r". Do not specify any unused registers. The - * clobber list will tell the compiler that the hypercall modifies those - * registers, which is good enough. - */ - -/** - * ev_int_set_config - configure the specified interrupt - * @interrupt: the interrupt number - * @config: configuration for this interrupt - * @priority: interrupt priority - * @destination: destination CPU number - * - * Returns 0 for success, or an error code. - */ -static inline unsigned int ev_int_set_config(unsigned int interrupt, - uint32_t config, unsigned int priority, uint32_t destination) -{ - register uintptr_t r11 __asm__("r11"); - register uintptr_t r3 __asm__("r3"); - register uintptr_t r4 __asm__("r4"); - register uintptr_t r5 __asm__("r5"); - register uintptr_t r6 __asm__("r6"); - - r11 = EV_HCALL_TOKEN(EV_INT_SET_CONFIG); - r3 = interrupt; - r4 = config; - r5 = priority; - r6 = destination; - - asm volatile("bl epapr_hypercall_start" - : "+r" (r11), "+r" (r3), "+r" (r4), "+r" (r5), "+r" (r6) - : : EV_HCALL_CLOBBERS4 - ); - - return r3; -} - -/** - * ev_int_get_config - return the config of the specified interrupt - * @interrupt: the interrupt number - * @config: returned configuration for this interrupt - * @priority: returned interrupt priority - * @destination: returned destination CPU number - * - * Returns 0 for success, or an error code. - */ -static inline unsigned int ev_int_get_config(unsigned int interrupt, - uint32_t *config, unsigned int *priority, uint32_t *destination) -{ - register uintptr_t r11 __asm__("r11"); - register uintptr_t r3 __asm__("r3"); - register uintptr_t r4 __asm__("r4"); - register uintptr_t r5 __asm__("r5"); - register uintptr_t r6 __asm__("r6"); - - r11 = EV_HCALL_TOKEN(EV_INT_GET_CONFIG); - r3 = interrupt; - - asm volatile("bl epapr_hypercall_start" - : "+r" (r11), "+r" (r3), "=r" (r4), "=r" (r5), "=r" (r6) - : : EV_HCALL_CLOBBERS4 - ); - - *config = r4; - *priority = r5; - *destination = r6; - - return r3; -} - -/** - * ev_int_set_mask - sets the mask for the specified interrupt source - * @interrupt: the interrupt number - * @mask: 0=enable interrupts, 1=disable interrupts - * - * Returns 0 for success, or an error code. - */ -static inline unsigned int ev_int_set_mask(unsigned int interrupt, - unsigned int mask) -{ - register uintptr_t r11 __asm__("r11"); - register uintptr_t r3 __asm__("r3"); - register uintptr_t r4 __asm__("r4"); - - r11 = EV_HCALL_TOKEN(EV_INT_SET_MASK); - r3 = interrupt; - r4 = mask; - - asm volatile("bl epapr_hypercall_start" - : "+r" (r11), "+r" (r3), "+r" (r4) - : : EV_HCALL_CLOBBERS2 - ); - - return r3; -} - -/** - * ev_int_get_mask - returns the mask for the specified interrupt source - * @interrupt: the interrupt number - * @mask: returned mask for this interrupt (0=enabled, 1=disabled) - * - * Returns 0 for success, or an error code. - */ -static inline unsigned int ev_int_get_mask(unsigned int interrupt, - unsigned int *mask) -{ - register uintptr_t r11 __asm__("r11"); - register uintptr_t r3 __asm__("r3"); - register uintptr_t r4 __asm__("r4"); - - r11 = EV_HCALL_TOKEN(EV_INT_GET_MASK); - r3 = interrupt; - - asm volatile("bl epapr_hypercall_start" - : "+r" (r11), "+r" (r3), "=r" (r4) - : : EV_HCALL_CLOBBERS2 - ); - - *mask = r4; - - return r3; -} - -/** - * ev_int_eoi - signal the end of interrupt processing - * @interrupt: the interrupt number - * - * This function signals the end of processing for the the specified - * interrupt, which must be the interrupt currently in service. By - * definition, this is also the highest-priority interrupt. - * - * Returns 0 for success, or an error code. - */ -static inline unsigned int ev_int_eoi(unsigned int interrupt) -{ - register uintptr_t r11 __asm__("r11"); - register uintptr_t r3 __asm__("r3"); - - r11 = EV_HCALL_TOKEN(EV_INT_EOI); - r3 = interrupt; - - asm volatile("bl epapr_hypercall_start" - : "+r" (r11), "+r" (r3) - : : EV_HCALL_CLOBBERS1 - ); - - return r3; -} - -/** - * ev_byte_channel_send - send characters to a byte stream - * @handle: byte stream handle - * @count: (input) num of chars to send, (output) num chars sent - * @buffer: pointer to a 16-byte buffer - * - * @buffer must be at least 16 bytes long, because all 16 bytes will be - * read from memory into registers, even if count < 16. - * - * Returns 0 for success, or an error code. - */ -static inline unsigned int ev_byte_channel_send(unsigned int handle, - unsigned int *count, const char buffer[EV_BYTE_CHANNEL_MAX_BYTES]) -{ - register uintptr_t r11 __asm__("r11"); - register uintptr_t r3 __asm__("r3"); - register uintptr_t r4 __asm__("r4"); - register uintptr_t r5 __asm__("r5"); - register uintptr_t r6 __asm__("r6"); - register uintptr_t r7 __asm__("r7"); - register uintptr_t r8 __asm__("r8"); - const uint32_t *p = (const uint32_t *) buffer; - - r11 = EV_HCALL_TOKEN(EV_BYTE_CHANNEL_SEND); - r3 = handle; - r4 = *count; - r5 = be32_to_cpu(p[0]); - r6 = be32_to_cpu(p[1]); - r7 = be32_to_cpu(p[2]); - r8 = be32_to_cpu(p[3]); - - asm volatile("bl epapr_hypercall_start" - : "+r" (r11), "+r" (r3), - "+r" (r4), "+r" (r5), "+r" (r6), "+r" (r7), "+r" (r8) - : : EV_HCALL_CLOBBERS6 - ); - - *count = r4; - - return r3; -} - -/** - * ev_byte_channel_receive - fetch characters from a byte channel - * @handle: byte channel handle - * @count: (input) max num of chars to receive, (output) num chars received - * @buffer: pointer to a 16-byte buffer - * - * The size of @buffer must be at least 16 bytes, even if you request fewer - * than 16 characters, because we always write 16 bytes to @buffer. This is - * for performance reasons. - * - * Returns 0 for success, or an error code. - */ -static inline unsigned int ev_byte_channel_receive(unsigned int handle, - unsigned int *count, char buffer[EV_BYTE_CHANNEL_MAX_BYTES]) -{ - register uintptr_t r11 __asm__("r11"); - register uintptr_t r3 __asm__("r3"); - register uintptr_t r4 __asm__("r4"); - register uintptr_t r5 __asm__("r5"); - register uintptr_t r6 __asm__("r6"); - register uintptr_t r7 __asm__("r7"); - register uintptr_t r8 __asm__("r8"); - uint32_t *p = (uint32_t *) buffer; - - r11 = EV_HCALL_TOKEN(EV_BYTE_CHANNEL_RECEIVE); - r3 = handle; - r4 = *count; - - asm volatile("bl epapr_hypercall_start" - : "+r" (r11), "+r" (r3), "+r" (r4), - "=r" (r5), "=r" (r6), "=r" (r7), "=r" (r8) - : : EV_HCALL_CLOBBERS6 - ); - - *count = r4; - p[0] = cpu_to_be32(r5); - p[1] = cpu_to_be32(r6); - p[2] = cpu_to_be32(r7); - p[3] = cpu_to_be32(r8); - - return r3; -} - -/** - * ev_byte_channel_poll - returns the status of the byte channel buffers - * @handle: byte channel handle - * @rx_count: returned count of bytes in receive queue - * @tx_count: returned count of free space in transmit queue - * - * This function reports the amount of data in the receive queue (i.e. the - * number of bytes you can read), and the amount of free space in the transmit - * queue (i.e. the number of bytes you can write). - * - * Returns 0 for success, or an error code. - */ -static inline unsigned int ev_byte_channel_poll(unsigned int handle, - unsigned int *rx_count, unsigned int *tx_count) -{ - register uintptr_t r11 __asm__("r11"); - register uintptr_t r3 __asm__("r3"); - register uintptr_t r4 __asm__("r4"); - register uintptr_t r5 __asm__("r5"); - - r11 = EV_HCALL_TOKEN(EV_BYTE_CHANNEL_POLL); - r3 = handle; - - asm volatile("bl epapr_hypercall_start" - : "+r" (r11), "+r" (r3), "=r" (r4), "=r" (r5) - : : EV_HCALL_CLOBBERS3 - ); - - *rx_count = r4; - *tx_count = r5; - - return r3; -} - -/** - * ev_int_iack - acknowledge an interrupt - * @handle: handle to the target interrupt controller - * @vector: returned interrupt vector - * - * If handle is zero, the function returns the next interrupt source - * number to be handled irrespective of the hierarchy or cascading - * of interrupt controllers. If non-zero, specifies a handle to the - * interrupt controller that is the target of the acknowledge. - * - * Returns 0 for success, or an error code. - */ -static inline unsigned int ev_int_iack(unsigned int handle, - unsigned int *vector) -{ - register uintptr_t r11 __asm__("r11"); - register uintptr_t r3 __asm__("r3"); - register uintptr_t r4 __asm__("r4"); - - r11 = EV_HCALL_TOKEN(EV_INT_IACK); - r3 = handle; - - asm volatile("bl epapr_hypercall_start" - : "+r" (r11), "+r" (r3), "=r" (r4) - : : EV_HCALL_CLOBBERS2 - ); - - *vector = r4; - - return r3; -} - -/** - * ev_doorbell_send - send a doorbell to another partition - * @handle: doorbell send handle - * - * Returns 0 for success, or an error code. - */ -static inline unsigned int ev_doorbell_send(unsigned int handle) -{ - register uintptr_t r11 __asm__("r11"); - register uintptr_t r3 __asm__("r3"); - - r11 = EV_HCALL_TOKEN(EV_DOORBELL_SEND); - r3 = handle; - - asm volatile("bl epapr_hypercall_start" - : "+r" (r11), "+r" (r3) - : : EV_HCALL_CLOBBERS1 - ); - - return r3; -} - -/** - * ev_idle -- wait for next interrupt on this core - * - * Returns 0 for success, or an error code. - */ -static inline unsigned int ev_idle(void) -{ - register uintptr_t r11 __asm__("r11"); - register uintptr_t r3 __asm__("r3"); - - r11 = EV_HCALL_TOKEN(EV_IDLE); - - asm volatile("bl epapr_hypercall_start" - : "+r" (r11), "=r" (r3) - : : EV_HCALL_CLOBBERS1 - ); - - return r3; -} -#endif /* !__ASSEMBLY__ */ -#endif +#ifndef _UAPI_ASM_POWERPC_EPAPR_HCALLS_H +#define _UAPI_ASM_POWERPC_EPAPR_HCALLS_H + +#define EV_BYTE_CHANNEL_SEND 1 +#define EV_BYTE_CHANNEL_RECEIVE 2 +#define EV_BYTE_CHANNEL_POLL 3 +#define EV_INT_SET_CONFIG 4 +#define EV_INT_GET_CONFIG 5 +#define EV_INT_SET_MASK 6 +#define EV_INT_GET_MASK 7 +#define EV_INT_IACK 9 +#define EV_INT_EOI 10 +#define EV_INT_SEND_IPI 11 +#define EV_INT_SET_TASK_PRIORITY 12 +#define EV_INT_GET_TASK_PRIORITY 13 +#define EV_DOORBELL_SEND 14 +#define EV_MSGSND 15 +#define EV_IDLE 16 + +/* vendor ID: epapr */ +#define EV_LOCAL_VENDOR_ID 0 /* for private use */ +#define EV_EPAPR_VENDOR_ID 1 +#define EV_FSL_VENDOR_ID 2 /* Freescale Semiconductor */ +#define EV_IBM_VENDOR_ID 3 /* IBM */ +#define EV_GHS_VENDOR_ID 4 /* Green Hills Software */ +#define EV_ENEA_VENDOR_ID 5 /* Enea */ +#define EV_WR_VENDOR_ID 6 /* Wind River Systems */ +#define EV_AMCC_VENDOR_ID 7 /* Applied Micro Circuits */ +#define EV_KVM_VENDOR_ID 42 /* KVM */ + +/* The max number of bytes that a byte channel can send or receive per call */ +#define EV_BYTE_CHANNEL_MAX_BYTES 16 + + +#define _EV_HCALL_TOKEN(id, num) (((id) << 16) | (num)) +#define EV_HCALL_TOKEN(hcall_num) _EV_HCALL_TOKEN(EV_EPAPR_VENDOR_ID, hcall_num) + +/* epapr return codes */ +#define EV_SUCCESS 0 +#define EV_EPERM 1 /* Operation not permitted */ +#define EV_ENOENT 2 /* Entry Not Found */ +#define EV_EIO 3 /* I/O error occured */ +#define EV_EAGAIN 4 /* The operation had insufficient + * resources to complete and should be + * retried + */ +#define EV_ENOMEM 5 /* There was insufficient memory to + * complete the operation */ +#define EV_EFAULT 6 /* Bad guest address */ +#define EV_ENODEV 7 /* No such device */ +#define EV_EINVAL 8 /* An argument supplied to the hcall + was out of range or invalid */ +#define EV_INTERNAL 9 /* An internal error occured */ +#define EV_CONFIG 10 /* A configuration error was detected */ +#define EV_INVALID_STATE 11 /* The object is in an invalid state */ +#define EV_UNIMPLEMENTED 12 /* Unimplemented hypercall */ +#define EV_BUFFER_OVERFLOW 13 /* Caller-supplied buffer too small */ + +#endif /* _UAPI_ASM_POWERPC_EPAPR_HCALLS_H */ -- cgit v1.2.3-59-g8ed1b From 42897d866b120547777ae1fd316680ec53356d9c Mon Sep 17 00:00:00 2001 From: Marcelo Tosatti Date: Tue, 27 Nov 2012 23:29:02 -0200 Subject: KVM: x86: add kvm_arch_vcpu_postcreate callback, move TSC initialization TSC initialization will soon make use of online_vcpus. Signed-off-by: Marcelo Tosatti --- arch/ia64/kvm/kvm-ia64.c | 5 +++++ arch/powerpc/kvm/powerpc.c | 5 +++++ arch/s390/kvm/kvm-s390.c | 5 +++++ arch/x86/kvm/svm.c | 1 - arch/x86/kvm/vmx.c | 2 -- arch/x86/kvm/x86.c | 13 +++++++++++++ include/linux/kvm_host.h | 1 + virt/kvm/kvm_main.c | 1 + 8 files changed, 30 insertions(+), 3 deletions(-) (limited to 'arch/powerpc') diff --git a/arch/ia64/kvm/kvm-ia64.c b/arch/ia64/kvm/kvm-ia64.c index c71acd7f9a13..83b54530916e 100644 --- a/arch/ia64/kvm/kvm-ia64.c +++ b/arch/ia64/kvm/kvm-ia64.c @@ -1330,6 +1330,11 @@ int kvm_arch_vcpu_setup(struct kvm_vcpu *vcpu) return 0; } +int kvm_arch_vcpu_postcreate(struct kvm_vcpu *vcpu) +{ + return 0; +} + int kvm_arch_vcpu_ioctl_get_fpu(struct kvm_vcpu *vcpu, struct kvm_fpu *fpu) { return -EINVAL; diff --git a/arch/powerpc/kvm/powerpc.c b/arch/powerpc/kvm/powerpc.c index deb0d596d815..f9ab12aea829 100644 --- a/arch/powerpc/kvm/powerpc.c +++ b/arch/powerpc/kvm/powerpc.c @@ -439,6 +439,11 @@ struct kvm_vcpu *kvm_arch_vcpu_create(struct kvm *kvm, unsigned int id) return vcpu; } +int kvm_arch_vcpu_postcreate(struct kvm_vcpu *vcpu) +{ + return 0; +} + void kvm_arch_vcpu_free(struct kvm_vcpu *vcpu) { /* Make sure we're not using the vcpu anymore */ diff --git a/arch/s390/kvm/kvm-s390.c b/arch/s390/kvm/kvm-s390.c index 38883f0bf27e..731ddeee32e4 100644 --- a/arch/s390/kvm/kvm-s390.c +++ b/arch/s390/kvm/kvm-s390.c @@ -355,6 +355,11 @@ static void kvm_s390_vcpu_initial_reset(struct kvm_vcpu *vcpu) atomic_set_mask(CPUSTAT_STOPPED, &vcpu->arch.sie_block->cpuflags); } +int kvm_arch_vcpu_postcreate(struct kvm_vcpu *vcpu) +{ + return 0; +} + int kvm_arch_vcpu_setup(struct kvm_vcpu *vcpu) { atomic_set(&vcpu->arch.sie_block->cpuflags, CPUSTAT_ZARCH | diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c index 94f5ceba7e1e..161a5fa66d82 100644 --- a/arch/x86/kvm/svm.c +++ b/arch/x86/kvm/svm.c @@ -1251,7 +1251,6 @@ static struct kvm_vcpu *svm_create_vcpu(struct kvm *kvm, unsigned int id) svm->vmcb_pa = page_to_pfn(page) << PAGE_SHIFT; svm->asid_generation = 0; init_vmcb(svm); - kvm_write_tsc(&svm->vcpu, 0); err = fx_init(&svm->vcpu); if (err) diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c index 896efd4842e7..bb18923bf632 100644 --- a/arch/x86/kvm/vmx.c +++ b/arch/x86/kvm/vmx.c @@ -3896,8 +3896,6 @@ static int vmx_vcpu_setup(struct vcpu_vmx *vmx) vmcs_writel(CR0_GUEST_HOST_MASK, ~0UL); set_cr4_guest_host_mask(vmx); - kvm_write_tsc(&vmx->vcpu, 0); - return 0; } diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index a7b97a49d8ad..f3c069efc72a 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -6320,6 +6320,19 @@ int kvm_arch_vcpu_setup(struct kvm_vcpu *vcpu) return r; } +int kvm_arch_vcpu_postcreate(struct kvm_vcpu *vcpu) +{ + int r; + + r = vcpu_load(vcpu); + if (r) + return r; + kvm_write_tsc(vcpu, 0); + vcpu_put(vcpu); + + return r; +} + void kvm_arch_vcpu_destroy(struct kvm_vcpu *vcpu) { int r; diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h index c94c9985dee0..8e5c7b651655 100644 --- a/include/linux/kvm_host.h +++ b/include/linux/kvm_host.h @@ -596,6 +596,7 @@ void kvm_arch_vcpu_load(struct kvm_vcpu *vcpu, int cpu); void kvm_arch_vcpu_put(struct kvm_vcpu *vcpu); struct kvm_vcpu *kvm_arch_vcpu_create(struct kvm *kvm, unsigned int id); int kvm_arch_vcpu_setup(struct kvm_vcpu *vcpu); +int kvm_arch_vcpu_postcreate(struct kvm_vcpu *vcpu); void kvm_arch_vcpu_destroy(struct kvm_vcpu *vcpu); int kvm_arch_hardware_enable(void *garbage); diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c index be3e7bb73b10..e6cfd4344d28 100644 --- a/virt/kvm/kvm_main.c +++ b/virt/kvm/kvm_main.c @@ -1853,6 +1853,7 @@ static int kvm_vm_ioctl_create_vcpu(struct kvm *kvm, u32 id) atomic_inc(&kvm->online_vcpus); mutex_unlock(&kvm->lock); + kvm_arch_vcpu_postcreate(vcpu); return r; unlock_vcpu_destroy: -- cgit v1.2.3-59-g8ed1b From 0e673fb679027600cad45bd61a4cc9ebd2ed2bb1 Mon Sep 17 00:00:00 2001 From: Alexander Graf Date: Tue, 9 Oct 2012 00:06:20 +0200 Subject: KVM: PPC: Support eventfd In order to support the generic eventfd infrastructure on PPC, we need to call into the generic KVM in-kernel device mmio code. Signed-off-by: Alexander Graf --- arch/powerpc/kvm/Kconfig | 1 + arch/powerpc/kvm/Makefile | 4 +++- arch/powerpc/kvm/powerpc.c | 17 ++++++++++++++++- 3 files changed, 20 insertions(+), 2 deletions(-) (limited to 'arch/powerpc') diff --git a/arch/powerpc/kvm/Kconfig b/arch/powerpc/kvm/Kconfig index 71f0cd9edf33..4730c953f435 100644 --- a/arch/powerpc/kvm/Kconfig +++ b/arch/powerpc/kvm/Kconfig @@ -20,6 +20,7 @@ config KVM bool select PREEMPT_NOTIFIERS select ANON_INODES + select HAVE_KVM_EVENTFD config KVM_BOOK3S_HANDLER bool diff --git a/arch/powerpc/kvm/Makefile b/arch/powerpc/kvm/Makefile index c2a08636e6d4..cd8965828676 100644 --- a/arch/powerpc/kvm/Makefile +++ b/arch/powerpc/kvm/Makefile @@ -6,7 +6,8 @@ subdir-ccflags-$(CONFIG_PPC_WERROR) := -Werror ccflags-y := -Ivirt/kvm -Iarch/powerpc/kvm -common-objs-y = $(addprefix ../../../virt/kvm/, kvm_main.o coalesced_mmio.o) +common-objs-y = $(addprefix ../../../virt/kvm/, kvm_main.o coalesced_mmio.o \ + eventfd.o) CFLAGS_44x_tlb.o := -I. CFLAGS_e500_tlb.o := -I. @@ -76,6 +77,7 @@ kvm-book3s_64-builtin-objs-$(CONFIG_KVM_BOOK3S_64_HV) := \ kvm-book3s_64-module-objs := \ ../../../virt/kvm/kvm_main.o \ + ../../../virt/kvm/eventfd.o \ powerpc.o \ emulate.o \ book3s.o \ diff --git a/arch/powerpc/kvm/powerpc.c b/arch/powerpc/kvm/powerpc.c index f9ab12aea829..d583ea15e151 100644 --- a/arch/powerpc/kvm/powerpc.c +++ b/arch/powerpc/kvm/powerpc.c @@ -314,6 +314,7 @@ int kvm_dev_ioctl_check_extension(long ext) case KVM_CAP_PPC_IRQ_LEVEL: case KVM_CAP_ENABLE_CAP: case KVM_CAP_ONE_REG: + case KVM_CAP_IOEVENTFD: r = 1; break; #ifndef CONFIG_KVM_BOOK3S_64_HV @@ -618,6 +619,13 @@ int kvmppc_handle_load(struct kvm_run *run, struct kvm_vcpu *vcpu, vcpu->mmio_is_write = 0; vcpu->arch.mmio_sign_extend = 0; + if (!kvm_io_bus_read(vcpu->kvm, KVM_MMIO_BUS, run->mmio.phys_addr, + bytes, &run->mmio.data)) { + kvmppc_complete_mmio_load(vcpu, run); + vcpu->mmio_needed = 0; + return EMULATE_DONE; + } + return EMULATE_DO_MMIO; } @@ -627,8 +635,8 @@ int kvmppc_handle_loads(struct kvm_run *run, struct kvm_vcpu *vcpu, { int r; - r = kvmppc_handle_load(run, vcpu, rt, bytes, is_bigendian); vcpu->arch.mmio_sign_extend = 1; + r = kvmppc_handle_load(run, vcpu, rt, bytes, is_bigendian); return r; } @@ -666,6 +674,13 @@ int kvmppc_handle_store(struct kvm_run *run, struct kvm_vcpu *vcpu, } } + if (!kvm_io_bus_write(vcpu->kvm, KVM_MMIO_BUS, run->mmio.phys_addr, + bytes, &run->mmio.data)) { + kvmppc_complete_mmio_load(vcpu, run); + vcpu->mmio_needed = 0; + return EMULATE_DONE; + } + return EMULATE_DO_MMIO; } -- cgit v1.2.3-59-g8ed1b From 7ed661bf852cefa1ab57ad709a675bfb029d47ab Mon Sep 17 00:00:00 2001 From: Paul Mackerras Date: Tue, 13 Nov 2012 18:31:32 +0000 Subject: KVM: PPC: Book3S HV: Restructure HPT entry creation code This restructures the code that creates HPT (hashed page table) entries so that it can be called in situations where we don't have a struct vcpu pointer, only a struct kvm pointer. It also fixes a bug where kvmppc_map_vrma() would corrupt the guest R4 value. Most of the work of kvmppc_virtmode_h_enter is now done by a new function, kvmppc_virtmode_do_h_enter, which itself calls another new function, kvmppc_do_h_enter, which contains most of the old kvmppc_h_enter. The new kvmppc_do_h_enter takes explicit arguments for the place to return the HPTE index, the Linux page tables to use, and whether it is being called in real mode, thus removing the need for it to have the vcpu as an argument. Currently kvmppc_map_vrma creates the VRMA (virtual real mode area) HPTEs by calling kvmppc_virtmode_h_enter, which is designed primarily to handle H_ENTER hcalls from the guest that need to pin a page of memory. Since H_ENTER returns the index of the created HPTE in R4, kvmppc_virtmode_h_enter updates the guest R4, corrupting the guest R4 in the case when it gets called from kvmppc_map_vrma on the first VCPU_RUN ioctl. With this, kvmppc_map_vrma instead calls kvmppc_virtmode_do_h_enter with the address of a dummy word as the place to store the HPTE index, thus avoiding corrupting the guest R4. Signed-off-by: Paul Mackerras Signed-off-by: Alexander Graf --- arch/powerpc/include/asm/kvm_book3s.h | 5 +++-- arch/powerpc/kvm/book3s_64_mmu_hv.c | 36 ++++++++++++++++++++++++----------- arch/powerpc/kvm/book3s_hv_rm_mmu.c | 27 ++++++++++++++++---------- 3 files changed, 45 insertions(+), 23 deletions(-) (limited to 'arch/powerpc') diff --git a/arch/powerpc/include/asm/kvm_book3s.h b/arch/powerpc/include/asm/kvm_book3s.h index 36fcf4190461..fea768f21cd7 100644 --- a/arch/powerpc/include/asm/kvm_book3s.h +++ b/arch/powerpc/include/asm/kvm_book3s.h @@ -157,8 +157,9 @@ extern void *kvmppc_pin_guest_page(struct kvm *kvm, unsigned long addr, extern void kvmppc_unpin_guest_page(struct kvm *kvm, void *addr); extern long kvmppc_virtmode_h_enter(struct kvm_vcpu *vcpu, unsigned long flags, long pte_index, unsigned long pteh, unsigned long ptel); -extern long kvmppc_h_enter(struct kvm_vcpu *vcpu, unsigned long flags, - long pte_index, unsigned long pteh, unsigned long ptel); +extern long kvmppc_do_h_enter(struct kvm *kvm, unsigned long flags, + long pte_index, unsigned long pteh, unsigned long ptel, + pgd_t *pgdir, bool realmode, unsigned long *idx_ret); extern long kvmppc_hv_get_dirty_log(struct kvm *kvm, struct kvm_memory_slot *memslot, unsigned long *map); diff --git a/arch/powerpc/kvm/book3s_64_mmu_hv.c b/arch/powerpc/kvm/book3s_64_mmu_hv.c index 2a89a36e7263..6ee6516a0bee 100644 --- a/arch/powerpc/kvm/book3s_64_mmu_hv.c +++ b/arch/powerpc/kvm/book3s_64_mmu_hv.c @@ -41,6 +41,10 @@ /* Power architecture requires HPT is at least 256kB */ #define PPC_MIN_HPT_ORDER 18 +static long kvmppc_virtmode_do_h_enter(struct kvm *kvm, unsigned long flags, + long pte_index, unsigned long pteh, + unsigned long ptel, unsigned long *pte_idx_ret); + long kvmppc_alloc_hpt(struct kvm *kvm, u32 *htab_orderp) { unsigned long hpt; @@ -185,6 +189,7 @@ void kvmppc_map_vrma(struct kvm_vcpu *vcpu, struct kvm_memory_slot *memslot, unsigned long addr, hash; unsigned long psize; unsigned long hp0, hp1; + unsigned long idx_ret; long ret; struct kvm *kvm = vcpu->kvm; @@ -216,7 +221,8 @@ void kvmppc_map_vrma(struct kvm_vcpu *vcpu, struct kvm_memory_slot *memslot, hash = (hash << 3) + 7; hp_v = hp0 | ((addr >> 16) & ~0x7fUL); hp_r = hp1 | addr; - ret = kvmppc_virtmode_h_enter(vcpu, H_EXACT, hash, hp_v, hp_r); + ret = kvmppc_virtmode_do_h_enter(kvm, H_EXACT, hash, hp_v, hp_r, + &idx_ret); if (ret != H_SUCCESS) { pr_err("KVM: map_vrma at %lx failed, ret=%ld\n", addr, ret); @@ -354,15 +360,10 @@ static long kvmppc_get_guest_page(struct kvm *kvm, unsigned long gfn, return err; } -/* - * We come here on a H_ENTER call from the guest when we are not - * using mmu notifiers and we don't have the requested page pinned - * already. - */ -long kvmppc_virtmode_h_enter(struct kvm_vcpu *vcpu, unsigned long flags, - long pte_index, unsigned long pteh, unsigned long ptel) +long kvmppc_virtmode_do_h_enter(struct kvm *kvm, unsigned long flags, + long pte_index, unsigned long pteh, + unsigned long ptel, unsigned long *pte_idx_ret) { - struct kvm *kvm = vcpu->kvm; unsigned long psize, gpa, gfn; struct kvm_memory_slot *memslot; long ret; @@ -390,8 +391,8 @@ long kvmppc_virtmode_h_enter(struct kvm_vcpu *vcpu, unsigned long flags, do_insert: /* Protect linux PTE lookup from page table destruction */ rcu_read_lock_sched(); /* this disables preemption too */ - vcpu->arch.pgdir = current->mm->pgd; - ret = kvmppc_h_enter(vcpu, flags, pte_index, pteh, ptel); + ret = kvmppc_do_h_enter(kvm, flags, pte_index, pteh, ptel, + current->mm->pgd, false, pte_idx_ret); rcu_read_unlock_sched(); if (ret == H_TOO_HARD) { /* this can't happen */ @@ -402,6 +403,19 @@ long kvmppc_virtmode_h_enter(struct kvm_vcpu *vcpu, unsigned long flags, } +/* + * We come here on a H_ENTER call from the guest when we are not + * using mmu notifiers and we don't have the requested page pinned + * already. + */ +long kvmppc_virtmode_h_enter(struct kvm_vcpu *vcpu, unsigned long flags, + long pte_index, unsigned long pteh, + unsigned long ptel) +{ + return kvmppc_virtmode_do_h_enter(vcpu->kvm, flags, pte_index, + pteh, ptel, &vcpu->arch.gpr[4]); +} + static struct kvmppc_slb *kvmppc_mmu_book3s_hv_find_slbe(struct kvm_vcpu *vcpu, gva_t eaddr) { diff --git a/arch/powerpc/kvm/book3s_hv_rm_mmu.c b/arch/powerpc/kvm/book3s_hv_rm_mmu.c index 5e06e3153888..362dffe4db10 100644 --- a/arch/powerpc/kvm/book3s_hv_rm_mmu.c +++ b/arch/powerpc/kvm/book3s_hv_rm_mmu.c @@ -103,14 +103,14 @@ static void remove_revmap_chain(struct kvm *kvm, long pte_index, unlock_rmap(rmap); } -static pte_t lookup_linux_pte(struct kvm_vcpu *vcpu, unsigned long hva, +static pte_t lookup_linux_pte(pgd_t *pgdir, unsigned long hva, int writing, unsigned long *pte_sizep) { pte_t *ptep; unsigned long ps = *pte_sizep; unsigned int shift; - ptep = find_linux_pte_or_hugepte(vcpu->arch.pgdir, hva, &shift); + ptep = find_linux_pte_or_hugepte(pgdir, hva, &shift); if (!ptep) return __pte(0); if (shift) @@ -130,10 +130,10 @@ static inline void unlock_hpte(unsigned long *hpte, unsigned long hpte_v) hpte[0] = hpte_v; } -long kvmppc_h_enter(struct kvm_vcpu *vcpu, unsigned long flags, - long pte_index, unsigned long pteh, unsigned long ptel) +long kvmppc_do_h_enter(struct kvm *kvm, unsigned long flags, + long pte_index, unsigned long pteh, unsigned long ptel, + pgd_t *pgdir, bool realmode, unsigned long *pte_idx_ret) { - struct kvm *kvm = vcpu->kvm; unsigned long i, pa, gpa, gfn, psize; unsigned long slot_fn, hva; unsigned long *hpte; @@ -147,7 +147,6 @@ long kvmppc_h_enter(struct kvm_vcpu *vcpu, unsigned long flags, unsigned int writing; unsigned long mmu_seq; unsigned long rcbits; - bool realmode = vcpu->arch.vcore->vcore_state == VCORE_RUNNING; psize = hpte_page_size(pteh, ptel); if (!psize) @@ -201,7 +200,7 @@ long kvmppc_h_enter(struct kvm_vcpu *vcpu, unsigned long flags, /* Look up the Linux PTE for the backing page */ pte_size = psize; - pte = lookup_linux_pte(vcpu, hva, writing, &pte_size); + pte = lookup_linux_pte(pgdir, hva, writing, &pte_size); if (pte_present(pte)) { if (writing && !pte_write(pte)) /* make the actual HPTE be read-only */ @@ -210,6 +209,7 @@ long kvmppc_h_enter(struct kvm_vcpu *vcpu, unsigned long flags, pa = pte_pfn(pte) << PAGE_SHIFT; } } + if (pte_size < psize) return H_PARAMETER; if (pa && pte_size > psize) @@ -297,7 +297,7 @@ long kvmppc_h_enter(struct kvm_vcpu *vcpu, unsigned long flags, lock_rmap(rmap); /* Check for pending invalidations under the rmap chain lock */ if (kvm->arch.using_mmu_notifiers && - mmu_notifier_retry(vcpu->kvm, mmu_seq)) { + mmu_notifier_retry(kvm, mmu_seq)) { /* inval in progress, write a non-present HPTE */ pteh |= HPTE_V_ABSENT; pteh &= ~HPTE_V_VALID; @@ -318,10 +318,17 @@ long kvmppc_h_enter(struct kvm_vcpu *vcpu, unsigned long flags, hpte[0] = pteh; asm volatile("ptesync" : : : "memory"); - vcpu->arch.gpr[4] = pte_index; + *pte_idx_ret = pte_index; return H_SUCCESS; } -EXPORT_SYMBOL_GPL(kvmppc_h_enter); +EXPORT_SYMBOL_GPL(kvmppc_do_h_enter); + +long kvmppc_h_enter(struct kvm_vcpu *vcpu, unsigned long flags, + long pte_index, unsigned long pteh, unsigned long ptel) +{ + return kvmppc_do_h_enter(vcpu->kvm, flags, pte_index, pteh, ptel, + vcpu->arch.pgdir, true, &vcpu->arch.gpr[4]); +} #define LOCK_TOKEN (*(u32 *)(&get_paca()->lock_token)) -- cgit v1.2.3-59-g8ed1b From 4879f241720cda3e6c18a1713bf9b2ed2de14ee4 Mon Sep 17 00:00:00 2001 From: Paul Mackerras Date: Mon, 19 Nov 2012 23:01:34 +0000 Subject: KVM: PPC: Book3S HV: Fix bug causing loss of page dirty state This fixes a bug where adding a new guest HPT entry via the H_ENTER hcall would lose the "changed" bit in the reverse map information for the guest physical page being mapped. The result was that the KVM_GET_DIRTY_LOG could return a zero bit for the page even though the page had been modified by the guest. This fixes it by only modifying the index and present bits in the reverse map entry, thus preserving the reference and change bits. We were also unnecessarily setting the reference bit, and this fixes that too. Signed-off-by: Paul Mackerras Signed-off-by: Alexander Graf --- arch/powerpc/kvm/book3s_hv_rm_mmu.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'arch/powerpc') diff --git a/arch/powerpc/kvm/book3s_hv_rm_mmu.c b/arch/powerpc/kvm/book3s_hv_rm_mmu.c index 362dffe4db10..ff2da5ce475c 100644 --- a/arch/powerpc/kvm/book3s_hv_rm_mmu.c +++ b/arch/powerpc/kvm/book3s_hv_rm_mmu.c @@ -59,10 +59,10 @@ void kvmppc_add_revmap_chain(struct kvm *kvm, struct revmap_entry *rev, head->back = pte_index; } else { rev->forw = rev->back = pte_index; - i = pte_index; + *rmap = (*rmap & ~KVMPPC_RMAP_INDEX) | + pte_index | KVMPPC_RMAP_PRESENT; } - smp_wmb(); - *rmap = i | KVMPPC_RMAP_REFERENCED | KVMPPC_RMAP_PRESENT; /* unlock */ + unlock_rmap(rmap); } EXPORT_SYMBOL_GPL(kvmppc_add_revmap_chain); -- cgit v1.2.3-59-g8ed1b From 44e5f6be62741bd44968f40f3afa1cff1df983f2 Mon Sep 17 00:00:00 2001 From: Paul Mackerras Date: Mon, 19 Nov 2012 22:52:49 +0000 Subject: KVM: PPC: Book3S HV: Add a mechanism for recording modified HPTEs This uses a bit in our record of the guest view of the HPTE to record when the HPTE gets modified. We use a reserved bit for this, and ensure that this bit is always cleared in HPTE values returned to the guest. The recording of modified HPTEs is only done if other code indicates its interest by setting kvm->arch.hpte_mod_interest to a non-zero value. The reason for this is that when later commits add facilities for userspace to read the HPT, the first pass of reading the HPT will be quicker if there are no (or very few) HPTEs marked as modified, rather than having most HPTEs marked as modified. Signed-off-by: Paul Mackerras Signed-off-by: Alexander Graf --- arch/powerpc/include/asm/kvm_book3s_64.h | 9 +++++++++ arch/powerpc/include/asm/kvm_host.h | 1 + arch/powerpc/kvm/book3s_hv_rm_mmu.c | 28 ++++++++++++++++++++++++---- 3 files changed, 34 insertions(+), 4 deletions(-) (limited to 'arch/powerpc') diff --git a/arch/powerpc/include/asm/kvm_book3s_64.h b/arch/powerpc/include/asm/kvm_book3s_64.h index 1472a5b4e4e3..b322e5bd6964 100644 --- a/arch/powerpc/include/asm/kvm_book3s_64.h +++ b/arch/powerpc/include/asm/kvm_book3s_64.h @@ -50,6 +50,15 @@ extern int kvm_hpt_order; /* order of preallocated HPTs */ #define HPTE_V_HVLOCK 0x40UL #define HPTE_V_ABSENT 0x20UL +/* + * We use this bit in the guest_rpte field of the revmap entry + * to indicate a modified HPTE. + */ +#define HPTE_GR_MODIFIED (1ul << 62) + +/* These bits are reserved in the guest view of the HPTE */ +#define HPTE_GR_RESERVED HPTE_GR_MODIFIED + static inline long try_lock_hpte(unsigned long *hpte, unsigned long bits) { unsigned long tmp, old; diff --git a/arch/powerpc/include/asm/kvm_host.h b/arch/powerpc/include/asm/kvm_host.h index 3093896015f0..58c72646c445 100644 --- a/arch/powerpc/include/asm/kvm_host.h +++ b/arch/powerpc/include/asm/kvm_host.h @@ -248,6 +248,7 @@ struct kvm_arch { atomic_t vcpus_running; unsigned long hpt_npte; unsigned long hpt_mask; + atomic_t hpte_mod_interest; spinlock_t slot_phys_lock; unsigned short last_vcpu[NR_CPUS]; struct kvmppc_vcore *vcores[KVM_MAX_VCORES]; diff --git a/arch/powerpc/kvm/book3s_hv_rm_mmu.c b/arch/powerpc/kvm/book3s_hv_rm_mmu.c index ff2da5ce475c..ed563a5f25c8 100644 --- a/arch/powerpc/kvm/book3s_hv_rm_mmu.c +++ b/arch/powerpc/kvm/book3s_hv_rm_mmu.c @@ -66,6 +66,17 @@ void kvmppc_add_revmap_chain(struct kvm *kvm, struct revmap_entry *rev, } EXPORT_SYMBOL_GPL(kvmppc_add_revmap_chain); +/* + * Note modification of an HPTE; set the HPTE modified bit + * if anyone is interested. + */ +static inline void note_hpte_modification(struct kvm *kvm, + struct revmap_entry *rev) +{ + if (atomic_read(&kvm->arch.hpte_mod_interest)) + rev->guest_rpte |= HPTE_GR_MODIFIED; +} + /* Remove this HPTE from the chain for a real page */ static void remove_revmap_chain(struct kvm *kvm, long pte_index, struct revmap_entry *rev, @@ -138,7 +149,7 @@ long kvmppc_do_h_enter(struct kvm *kvm, unsigned long flags, unsigned long slot_fn, hva; unsigned long *hpte; struct revmap_entry *rev; - unsigned long g_ptel = ptel; + unsigned long g_ptel; struct kvm_memory_slot *memslot; unsigned long *physp, pte_size; unsigned long is_io; @@ -153,6 +164,8 @@ long kvmppc_do_h_enter(struct kvm *kvm, unsigned long flags, return H_PARAMETER; writing = hpte_is_writable(ptel); pteh &= ~(HPTE_V_HVLOCK | HPTE_V_ABSENT | HPTE_V_VALID); + ptel &= ~HPTE_GR_RESERVED; + g_ptel = ptel; /* used later to detect if we might have been invalidated */ mmu_seq = kvm->mmu_notifier_seq; @@ -287,8 +300,10 @@ long kvmppc_do_h_enter(struct kvm *kvm, unsigned long flags, rev = &kvm->arch.revmap[pte_index]; if (realmode) rev = real_vmalloc_addr(rev); - if (rev) + if (rev) { rev->guest_rpte = g_ptel; + note_hpte_modification(kvm, rev); + } /* Link HPTE into reverse-map chain */ if (pteh & HPTE_V_VALID) { @@ -392,7 +407,8 @@ long kvmppc_h_remove(struct kvm_vcpu *vcpu, unsigned long flags, /* Read PTE low word after tlbie to get final R/C values */ remove_revmap_chain(kvm, pte_index, rev, v, hpte[1]); } - r = rev->guest_rpte; + r = rev->guest_rpte & ~HPTE_GR_RESERVED; + note_hpte_modification(kvm, rev); unlock_hpte(hpte, 0); vcpu->arch.gpr[4] = v; @@ -466,6 +482,7 @@ long kvmppc_h_bulk_remove(struct kvm_vcpu *vcpu) args[j] = ((0x80 | flags) << 56) + pte_index; rev = real_vmalloc_addr(&kvm->arch.revmap[pte_index]); + note_hpte_modification(kvm, rev); if (!(hp[0] & HPTE_V_VALID)) { /* insert R and C bits from PTE */ @@ -555,6 +572,7 @@ long kvmppc_h_protect(struct kvm_vcpu *vcpu, unsigned long flags, if (rev) { r = (rev->guest_rpte & ~mask) | bits; rev->guest_rpte = r; + note_hpte_modification(kvm, rev); } r = (hpte[1] & ~mask) | bits; @@ -606,8 +624,10 @@ long kvmppc_h_read(struct kvm_vcpu *vcpu, unsigned long flags, v &= ~HPTE_V_ABSENT; v |= HPTE_V_VALID; } - if (v & HPTE_V_VALID) + if (v & HPTE_V_VALID) { r = rev[i].guest_rpte | (r & (HPTE_R_R | HPTE_R_C)); + r &= ~HPTE_GR_RESERVED; + } vcpu->arch.gpr[4 + i * 2] = v; vcpu->arch.gpr[5 + i * 2] = r; } -- cgit v1.2.3-59-g8ed1b From 6b445ad4f839b06e68dd8e178e1168482ca20310 Mon Sep 17 00:00:00 2001 From: Paul Mackerras Date: Mon, 19 Nov 2012 22:55:44 +0000 Subject: KVM: PPC: Book3S HV: Make a HPTE removal function available This makes a HPTE removal function, kvmppc_do_h_remove(), available outside book3s_hv_rm_mmu.c. This will be used by the HPT writing code. Signed-off-by: Paul Mackerras Signed-off-by: Alexander Graf --- arch/powerpc/include/asm/kvm_book3s.h | 3 +++ arch/powerpc/kvm/book3s_hv_rm_mmu.c | 19 +++++++++++++------ 2 files changed, 16 insertions(+), 6 deletions(-) (limited to 'arch/powerpc') diff --git a/arch/powerpc/include/asm/kvm_book3s.h b/arch/powerpc/include/asm/kvm_book3s.h index fea768f21cd7..46763d10ad52 100644 --- a/arch/powerpc/include/asm/kvm_book3s.h +++ b/arch/powerpc/include/asm/kvm_book3s.h @@ -160,6 +160,9 @@ extern long kvmppc_virtmode_h_enter(struct kvm_vcpu *vcpu, unsigned long flags, extern long kvmppc_do_h_enter(struct kvm *kvm, unsigned long flags, long pte_index, unsigned long pteh, unsigned long ptel, pgd_t *pgdir, bool realmode, unsigned long *idx_ret); +extern long kvmppc_do_h_remove(struct kvm *kvm, unsigned long flags, + unsigned long pte_index, unsigned long avpn, + unsigned long *hpret); extern long kvmppc_hv_get_dirty_log(struct kvm *kvm, struct kvm_memory_slot *memslot, unsigned long *map); diff --git a/arch/powerpc/kvm/book3s_hv_rm_mmu.c b/arch/powerpc/kvm/book3s_hv_rm_mmu.c index ed563a5f25c8..fc3da3208fda 100644 --- a/arch/powerpc/kvm/book3s_hv_rm_mmu.c +++ b/arch/powerpc/kvm/book3s_hv_rm_mmu.c @@ -365,11 +365,10 @@ static inline int try_lock_tlbie(unsigned int *lock) return old == 0; } -long kvmppc_h_remove(struct kvm_vcpu *vcpu, unsigned long flags, - unsigned long pte_index, unsigned long avpn, - unsigned long va) +long kvmppc_do_h_remove(struct kvm *kvm, unsigned long flags, + unsigned long pte_index, unsigned long avpn, + unsigned long *hpret) { - struct kvm *kvm = vcpu->kvm; unsigned long *hpte; unsigned long v, r, rb; struct revmap_entry *rev; @@ -411,10 +410,18 @@ long kvmppc_h_remove(struct kvm_vcpu *vcpu, unsigned long flags, note_hpte_modification(kvm, rev); unlock_hpte(hpte, 0); - vcpu->arch.gpr[4] = v; - vcpu->arch.gpr[5] = r; + hpret[0] = v; + hpret[1] = r; return H_SUCCESS; } +EXPORT_SYMBOL_GPL(kvmppc_do_h_remove); + +long kvmppc_h_remove(struct kvm_vcpu *vcpu, unsigned long flags, + unsigned long pte_index, unsigned long avpn) +{ + return kvmppc_do_h_remove(vcpu->kvm, flags, pte_index, avpn, + &vcpu->arch.gpr[4]); +} long kvmppc_h_bulk_remove(struct kvm_vcpu *vcpu) { -- cgit v1.2.3-59-g8ed1b From a2932923ccf63c419c77aaa18ac09be98f2c94d8 Mon Sep 17 00:00:00 2001 From: Paul Mackerras Date: Mon, 19 Nov 2012 22:57:20 +0000 Subject: KVM: PPC: Book3S HV: Provide a method for userspace to read and write the HPT A new ioctl, KVM_PPC_GET_HTAB_FD, returns a file descriptor. Reads on this fd return the contents of the HPT (hashed page table), writes create and/or remove entries in the HPT. There is a new capability, KVM_CAP_PPC_HTAB_FD, to indicate the presence of the ioctl. The ioctl takes an argument structure with the index of the first HPT entry to read out and a set of flags. The flags indicate whether the user is intending to read or write the HPT, and whether to return all entries or only the "bolted" entries (those with the bolted bit, 0x10, set in the first doubleword). This is intended for use in implementing qemu's savevm/loadvm and for live migration. Therefore, on reads, the first pass returns information about all HPTEs (or all bolted HPTEs). When the first pass reaches the end of the HPT, it returns from the read. Subsequent reads only return information about HPTEs that have changed since they were last read. A read that finds no changed HPTEs in the HPT following where the last read finished will return 0 bytes. The format of the data provides a simple run-length compression of the invalid entries. Each block of data starts with a header that indicates the index (position in the HPT, which is just an array), the number of valid entries starting at that index (may be zero), and the number of invalid entries following those valid entries. The valid entries, 16 bytes each, follow the header. The invalid entries are not explicitly represented. Signed-off-by: Paul Mackerras [agraf: fix documentation] Signed-off-by: Alexander Graf --- Documentation/virtual/kvm/api.txt | 54 +++++ arch/powerpc/include/asm/kvm_book3s_64.h | 22 ++ arch/powerpc/include/asm/kvm_ppc.h | 2 + arch/powerpc/include/uapi/asm/kvm.h | 25 +++ arch/powerpc/kvm/book3s_64_mmu_hv.c | 344 +++++++++++++++++++++++++++++++ arch/powerpc/kvm/book3s_hv.c | 12 -- arch/powerpc/kvm/powerpc.c | 17 ++ include/uapi/linux/kvm.h | 3 + 8 files changed, 467 insertions(+), 12 deletions(-) (limited to 'arch/powerpc') diff --git a/Documentation/virtual/kvm/api.txt b/Documentation/virtual/kvm/api.txt index 6671fdc0afb1..a5607c571cb3 100644 --- a/Documentation/virtual/kvm/api.txt +++ b/Documentation/virtual/kvm/api.txt @@ -2071,6 +2071,60 @@ KVM_S390_INT_EXTERNAL_CALL (vcpu) - sigp external call; source cpu in parm Note that the vcpu ioctl is asynchronous to vcpu execution. +4.78 KVM_PPC_GET_HTAB_FD + +Capability: KVM_CAP_PPC_HTAB_FD +Architectures: powerpc +Type: vm ioctl +Parameters: Pointer to struct kvm_get_htab_fd (in) +Returns: file descriptor number (>= 0) on success, -1 on error + +This returns a file descriptor that can be used either to read out the +entries in the guest's hashed page table (HPT), or to write entries to +initialize the HPT. The returned fd can only be written to if the +KVM_GET_HTAB_WRITE bit is set in the flags field of the argument, and +can only be read if that bit is clear. The argument struct looks like +this: + +/* For KVM_PPC_GET_HTAB_FD */ +struct kvm_get_htab_fd { + __u64 flags; + __u64 start_index; + __u64 reserved[2]; +}; + +/* Values for kvm_get_htab_fd.flags */ +#define KVM_GET_HTAB_BOLTED_ONLY ((__u64)0x1) +#define KVM_GET_HTAB_WRITE ((__u64)0x2) + +The `start_index' field gives the index in the HPT of the entry at +which to start reading. It is ignored when writing. + +Reads on the fd will initially supply information about all +"interesting" HPT entries. Interesting entries are those with the +bolted bit set, if the KVM_GET_HTAB_BOLTED_ONLY bit is set, otherwise +all entries. When the end of the HPT is reached, the read() will +return. If read() is called again on the fd, it will start again from +the beginning of the HPT, but will only return HPT entries that have +changed since they were last read. + +Data read or written is structured as a header (8 bytes) followed by a +series of valid HPT entries (16 bytes) each. The header indicates how +many valid HPT entries there are and how many invalid entries follow +the valid entries. The invalid entries are not represented explicitly +in the stream. The header format is: + +struct kvm_get_htab_header { + __u32 index; + __u16 n_valid; + __u16 n_invalid; +}; + +Writes to the fd create HPT entries starting at the index given in the +header; first `n_valid' valid entries with contents from the data +written, then `n_invalid' invalid entries, invalidating any previously +valid entries found. + 5. The kvm_run structure ------------------------ diff --git a/arch/powerpc/include/asm/kvm_book3s_64.h b/arch/powerpc/include/asm/kvm_book3s_64.h index b322e5bd6964..38bec1dc9928 100644 --- a/arch/powerpc/include/asm/kvm_book3s_64.h +++ b/arch/powerpc/include/asm/kvm_book3s_64.h @@ -246,4 +246,26 @@ static inline bool slot_is_aligned(struct kvm_memory_slot *memslot, return !(memslot->base_gfn & mask) && !(memslot->npages & mask); } +/* + * This works for 4k, 64k and 16M pages on POWER7, + * and 4k and 16M pages on PPC970. + */ +static inline unsigned long slb_pgsize_encoding(unsigned long psize) +{ + unsigned long senc = 0; + + if (psize > 0x1000) { + senc = SLB_VSID_L; + if (psize == 0x10000) + senc |= SLB_VSID_LP_01; + } + return senc; +} + +static inline int is_vrma_hpte(unsigned long hpte_v) +{ + return (hpte_v & ~0xffffffUL) == + (HPTE_V_1TB_SEG | (VRMA_VSID << (40 - 16))); +} + #endif /* __ASM_KVM_BOOK3S_64_H__ */ diff --git a/arch/powerpc/include/asm/kvm_ppc.h b/arch/powerpc/include/asm/kvm_ppc.h index 609cca3e9426..1ca31e92ee75 100644 --- a/arch/powerpc/include/asm/kvm_ppc.h +++ b/arch/powerpc/include/asm/kvm_ppc.h @@ -164,6 +164,8 @@ extern void kvmppc_bookehv_exit(void); extern int kvmppc_prepare_to_enter(struct kvm_vcpu *vcpu); +extern int kvm_vm_ioctl_get_htab_fd(struct kvm *kvm, struct kvm_get_htab_fd *); + /* * Cuts out inst bits with ordering according to spec. * That means the leftmost bit is zero. All given bits are included. diff --git a/arch/powerpc/include/uapi/asm/kvm.h b/arch/powerpc/include/uapi/asm/kvm.h index b89ae4db45ce..514883dd311e 100644 --- a/arch/powerpc/include/uapi/asm/kvm.h +++ b/arch/powerpc/include/uapi/asm/kvm.h @@ -331,6 +331,31 @@ struct kvm_book3e_206_tlb_params { __u32 reserved[8]; }; +/* For KVM_PPC_GET_HTAB_FD */ +struct kvm_get_htab_fd { + __u64 flags; + __u64 start_index; + __u64 reserved[2]; +}; + +/* Values for kvm_get_htab_fd.flags */ +#define KVM_GET_HTAB_BOLTED_ONLY ((__u64)0x1) +#define KVM_GET_HTAB_WRITE ((__u64)0x2) + +/* + * Data read on the file descriptor is formatted as a series of + * records, each consisting of a header followed by a series of + * `n_valid' HPTEs (16 bytes each), which are all valid. Following + * those valid HPTEs there are `n_invalid' invalid HPTEs, which + * are not represented explicitly in the stream. The same format + * is used for writing. + */ +struct kvm_get_htab_header { + __u32 index; + __u16 n_valid; + __u16 n_invalid; +}; + #define KVM_REG_PPC_HIOR (KVM_REG_PPC | KVM_REG_SIZE_U64 | 0x1) #define KVM_REG_PPC_IAC1 (KVM_REG_PPC | KVM_REG_SIZE_U64 | 0x2) #define KVM_REG_PPC_IAC2 (KVM_REG_PPC | KVM_REG_SIZE_U64 | 0x3) diff --git a/arch/powerpc/kvm/book3s_64_mmu_hv.c b/arch/powerpc/kvm/book3s_64_mmu_hv.c index 6ee6516a0bee..0aa40734c8f6 100644 --- a/arch/powerpc/kvm/book3s_64_mmu_hv.c +++ b/arch/powerpc/kvm/book3s_64_mmu_hv.c @@ -25,6 +25,8 @@ #include #include #include +#include +#include #include #include @@ -1145,6 +1147,348 @@ void kvmppc_unpin_guest_page(struct kvm *kvm, void *va) put_page(page); } +/* + * Functions for reading and writing the hash table via reads and + * writes on a file descriptor. + * + * Reads return the guest view of the hash table, which has to be + * pieced together from the real hash table and the guest_rpte + * values in the revmap array. + * + * On writes, each HPTE written is considered in turn, and if it + * is valid, it is written to the HPT as if an H_ENTER with the + * exact flag set was done. When the invalid count is non-zero + * in the header written to the stream, the kernel will make + * sure that that many HPTEs are invalid, and invalidate them + * if not. + */ + +struct kvm_htab_ctx { + unsigned long index; + unsigned long flags; + struct kvm *kvm; + int first_pass; +}; + +#define HPTE_SIZE (2 * sizeof(unsigned long)) + +static long record_hpte(unsigned long flags, unsigned long *hptp, + unsigned long *hpte, struct revmap_entry *revp, + int want_valid, int first_pass) +{ + unsigned long v, r; + int ok = 1; + int valid, dirty; + + /* Unmodified entries are uninteresting except on the first pass */ + dirty = !!(revp->guest_rpte & HPTE_GR_MODIFIED); + if (!first_pass && !dirty) + return 0; + + valid = 0; + if (hptp[0] & (HPTE_V_VALID | HPTE_V_ABSENT)) { + valid = 1; + if ((flags & KVM_GET_HTAB_BOLTED_ONLY) && + !(hptp[0] & HPTE_V_BOLTED)) + valid = 0; + } + if (valid != want_valid) + return 0; + + v = r = 0; + if (valid || dirty) { + /* lock the HPTE so it's stable and read it */ + preempt_disable(); + while (!try_lock_hpte(hptp, HPTE_V_HVLOCK)) + cpu_relax(); + v = hptp[0]; + if (v & HPTE_V_ABSENT) { + v &= ~HPTE_V_ABSENT; + v |= HPTE_V_VALID; + } + /* re-evaluate valid and dirty from synchronized HPTE value */ + valid = !!(v & HPTE_V_VALID); + if ((flags & KVM_GET_HTAB_BOLTED_ONLY) && !(v & HPTE_V_BOLTED)) + valid = 0; + r = revp->guest_rpte | (hptp[1] & (HPTE_R_R | HPTE_R_C)); + dirty = !!(revp->guest_rpte & HPTE_GR_MODIFIED); + /* only clear modified if this is the right sort of entry */ + if (valid == want_valid && dirty) { + r &= ~HPTE_GR_MODIFIED; + revp->guest_rpte = r; + } + asm volatile(PPC_RELEASE_BARRIER "" : : : "memory"); + hptp[0] &= ~HPTE_V_HVLOCK; + preempt_enable(); + if (!(valid == want_valid && (first_pass || dirty))) + ok = 0; + } + hpte[0] = v; + hpte[1] = r; + return ok; +} + +static ssize_t kvm_htab_read(struct file *file, char __user *buf, + size_t count, loff_t *ppos) +{ + struct kvm_htab_ctx *ctx = file->private_data; + struct kvm *kvm = ctx->kvm; + struct kvm_get_htab_header hdr; + unsigned long *hptp; + struct revmap_entry *revp; + unsigned long i, nb, nw; + unsigned long __user *lbuf; + struct kvm_get_htab_header __user *hptr; + unsigned long flags; + int first_pass; + unsigned long hpte[2]; + + if (!access_ok(VERIFY_WRITE, buf, count)) + return -EFAULT; + + first_pass = ctx->first_pass; + flags = ctx->flags; + + i = ctx->index; + hptp = (unsigned long *)(kvm->arch.hpt_virt + (i * HPTE_SIZE)); + revp = kvm->arch.revmap + i; + lbuf = (unsigned long __user *)buf; + + nb = 0; + while (nb + sizeof(hdr) + HPTE_SIZE < count) { + /* Initialize header */ + hptr = (struct kvm_get_htab_header __user *)buf; + hdr.index = i; + hdr.n_valid = 0; + hdr.n_invalid = 0; + nw = nb; + nb += sizeof(hdr); + lbuf = (unsigned long __user *)(buf + sizeof(hdr)); + + /* Skip uninteresting entries, i.e. clean on not-first pass */ + if (!first_pass) { + while (i < kvm->arch.hpt_npte && + !(revp->guest_rpte & HPTE_GR_MODIFIED)) { + ++i; + hptp += 2; + ++revp; + } + } + + /* Grab a series of valid entries */ + while (i < kvm->arch.hpt_npte && + hdr.n_valid < 0xffff && + nb + HPTE_SIZE < count && + record_hpte(flags, hptp, hpte, revp, 1, first_pass)) { + /* valid entry, write it out */ + ++hdr.n_valid; + if (__put_user(hpte[0], lbuf) || + __put_user(hpte[1], lbuf + 1)) + return -EFAULT; + nb += HPTE_SIZE; + lbuf += 2; + ++i; + hptp += 2; + ++revp; + } + /* Now skip invalid entries while we can */ + while (i < kvm->arch.hpt_npte && + hdr.n_invalid < 0xffff && + record_hpte(flags, hptp, hpte, revp, 0, first_pass)) { + /* found an invalid entry */ + ++hdr.n_invalid; + ++i; + hptp += 2; + ++revp; + } + + if (hdr.n_valid || hdr.n_invalid) { + /* write back the header */ + if (__copy_to_user(hptr, &hdr, sizeof(hdr))) + return -EFAULT; + nw = nb; + buf = (char __user *)lbuf; + } else { + nb = nw; + } + + /* Check if we've wrapped around the hash table */ + if (i >= kvm->arch.hpt_npte) { + i = 0; + ctx->first_pass = 0; + break; + } + } + + ctx->index = i; + + return nb; +} + +static ssize_t kvm_htab_write(struct file *file, const char __user *buf, + size_t count, loff_t *ppos) +{ + struct kvm_htab_ctx *ctx = file->private_data; + struct kvm *kvm = ctx->kvm; + struct kvm_get_htab_header hdr; + unsigned long i, j; + unsigned long v, r; + unsigned long __user *lbuf; + unsigned long *hptp; + unsigned long tmp[2]; + ssize_t nb; + long int err, ret; + int rma_setup; + + if (!access_ok(VERIFY_READ, buf, count)) + return -EFAULT; + + /* lock out vcpus from running while we're doing this */ + mutex_lock(&kvm->lock); + rma_setup = kvm->arch.rma_setup_done; + if (rma_setup) { + kvm->arch.rma_setup_done = 0; /* temporarily */ + /* order rma_setup_done vs. vcpus_running */ + smp_mb(); + if (atomic_read(&kvm->arch.vcpus_running)) { + kvm->arch.rma_setup_done = 1; + mutex_unlock(&kvm->lock); + return -EBUSY; + } + } + + err = 0; + for (nb = 0; nb + sizeof(hdr) <= count; ) { + err = -EFAULT; + if (__copy_from_user(&hdr, buf, sizeof(hdr))) + break; + + err = 0; + if (nb + hdr.n_valid * HPTE_SIZE > count) + break; + + nb += sizeof(hdr); + buf += sizeof(hdr); + + err = -EINVAL; + i = hdr.index; + if (i >= kvm->arch.hpt_npte || + i + hdr.n_valid + hdr.n_invalid > kvm->arch.hpt_npte) + break; + + hptp = (unsigned long *)(kvm->arch.hpt_virt + (i * HPTE_SIZE)); + lbuf = (unsigned long __user *)buf; + for (j = 0; j < hdr.n_valid; ++j) { + err = -EFAULT; + if (__get_user(v, lbuf) || __get_user(r, lbuf + 1)) + goto out; + err = -EINVAL; + if (!(v & HPTE_V_VALID)) + goto out; + lbuf += 2; + nb += HPTE_SIZE; + + if (hptp[0] & (HPTE_V_VALID | HPTE_V_ABSENT)) + kvmppc_do_h_remove(kvm, 0, i, 0, tmp); + err = -EIO; + ret = kvmppc_virtmode_do_h_enter(kvm, H_EXACT, i, v, r, + tmp); + if (ret != H_SUCCESS) { + pr_err("kvm_htab_write ret %ld i=%ld v=%lx " + "r=%lx\n", ret, i, v, r); + goto out; + } + if (!rma_setup && is_vrma_hpte(v)) { + unsigned long psize = hpte_page_size(v, r); + unsigned long senc = slb_pgsize_encoding(psize); + unsigned long lpcr; + + kvm->arch.vrma_slb_v = senc | SLB_VSID_B_1T | + (VRMA_VSID << SLB_VSID_SHIFT_1T); + lpcr = kvm->arch.lpcr & ~LPCR_VRMASD; + lpcr |= senc << (LPCR_VRMASD_SH - 4); + kvm->arch.lpcr = lpcr; + rma_setup = 1; + } + ++i; + hptp += 2; + } + + for (j = 0; j < hdr.n_invalid; ++j) { + if (hptp[0] & (HPTE_V_VALID | HPTE_V_ABSENT)) + kvmppc_do_h_remove(kvm, 0, i, 0, tmp); + ++i; + hptp += 2; + } + err = 0; + } + + out: + /* Order HPTE updates vs. rma_setup_done */ + smp_wmb(); + kvm->arch.rma_setup_done = rma_setup; + mutex_unlock(&kvm->lock); + + if (err) + return err; + return nb; +} + +static int kvm_htab_release(struct inode *inode, struct file *filp) +{ + struct kvm_htab_ctx *ctx = filp->private_data; + + filp->private_data = NULL; + if (!(ctx->flags & KVM_GET_HTAB_WRITE)) + atomic_dec(&ctx->kvm->arch.hpte_mod_interest); + kvm_put_kvm(ctx->kvm); + kfree(ctx); + return 0; +} + +static struct file_operations kvm_htab_fops = { + .read = kvm_htab_read, + .write = kvm_htab_write, + .llseek = default_llseek, + .release = kvm_htab_release, +}; + +int kvm_vm_ioctl_get_htab_fd(struct kvm *kvm, struct kvm_get_htab_fd *ghf) +{ + int ret; + struct kvm_htab_ctx *ctx; + int rwflag; + + /* reject flags we don't recognize */ + if (ghf->flags & ~(KVM_GET_HTAB_BOLTED_ONLY | KVM_GET_HTAB_WRITE)) + return -EINVAL; + ctx = kzalloc(sizeof(*ctx), GFP_KERNEL); + if (!ctx) + return -ENOMEM; + kvm_get_kvm(kvm); + ctx->kvm = kvm; + ctx->index = ghf->start_index; + ctx->flags = ghf->flags; + ctx->first_pass = 1; + + rwflag = (ghf->flags & KVM_GET_HTAB_WRITE) ? O_WRONLY : O_RDONLY; + ret = anon_inode_getfd("kvm-htab", &kvm_htab_fops, ctx, rwflag); + if (ret < 0) { + kvm_put_kvm(kvm); + return ret; + } + + if (rwflag == O_RDONLY) { + mutex_lock(&kvm->slots_lock); + atomic_inc(&kvm->arch.hpte_mod_interest); + /* make sure kvmppc_do_h_enter etc. see the increment */ + synchronize_srcu_expedited(&kvm->srcu); + mutex_unlock(&kvm->slots_lock); + } + + return ret; +} + void kvmppc_mmu_book3s_hv_init(struct kvm_vcpu *vcpu) { struct kvmppc_mmu *mmu = &vcpu->arch.mmu; diff --git a/arch/powerpc/kvm/book3s_hv.c b/arch/powerpc/kvm/book3s_hv.c index 843eb754a1d5..a4f59dbcd800 100644 --- a/arch/powerpc/kvm/book3s_hv.c +++ b/arch/powerpc/kvm/book3s_hv.c @@ -1563,18 +1563,6 @@ out: return r; } -static unsigned long slb_pgsize_encoding(unsigned long psize) -{ - unsigned long senc = 0; - - if (psize > 0x1000) { - senc = SLB_VSID_L; - if (psize == 0x10000) - senc |= SLB_VSID_LP_01; - } - return senc; -} - static void unpin_slot(struct kvm_memory_slot *memslot) { unsigned long *physp; diff --git a/arch/powerpc/kvm/powerpc.c b/arch/powerpc/kvm/powerpc.c index d583ea15e151..70739a089560 100644 --- a/arch/powerpc/kvm/powerpc.c +++ b/arch/powerpc/kvm/powerpc.c @@ -354,6 +354,12 @@ int kvm_dev_ioctl_check_extension(long ext) r = 1; #else r = 0; + break; +#endif +#ifdef CONFIG_KVM_BOOK3S_64_HV + case KVM_CAP_PPC_HTAB_FD: + r = 1; + break; #endif break; case KVM_CAP_NR_VCPUS: @@ -954,6 +960,17 @@ long kvm_arch_vm_ioctl(struct file *filp, r = 0; break; } + + case KVM_PPC_GET_HTAB_FD: { + struct kvm *kvm = filp->private_data; + struct kvm_get_htab_fd ghf; + + r = -EFAULT; + if (copy_from_user(&ghf, argp, sizeof(ghf))) + break; + r = kvm_vm_ioctl_get_htab_fd(kvm, &ghf); + break; + } #endif /* CONFIG_KVM_BOOK3S_64_HV */ #ifdef CONFIG_PPC_BOOK3S_64 diff --git a/include/uapi/linux/kvm.h b/include/uapi/linux/kvm.h index 494a84c37c3e..e6e5d4b13708 100644 --- a/include/uapi/linux/kvm.h +++ b/include/uapi/linux/kvm.h @@ -634,6 +634,7 @@ struct kvm_ppc_smmu_info { #endif #define KVM_CAP_IRQFD_RESAMPLE 82 #define KVM_CAP_PPC_BOOKE_WATCHDOG 83 +#define KVM_CAP_PPC_HTAB_FD 84 #ifdef KVM_CAP_IRQ_ROUTING @@ -859,6 +860,8 @@ struct kvm_s390_ucas_mapping { #define KVM_CREATE_SPAPR_TCE _IOW(KVMIO, 0xa8, struct kvm_create_spapr_tce) /* Available with KVM_CAP_RMA */ #define KVM_ALLOCATE_RMA _IOR(KVMIO, 0xa9, struct kvm_allocate_rma) +/* Available with KVM_CAP_PPC_HTAB_FD */ +#define KVM_PPC_GET_HTAB_FD _IOW(KVMIO, 0xaa, struct kvm_get_htab_fd) /* * ioctls for vcpu fds -- cgit v1.2.3-59-g8ed1b From a64fd707481631b9682f9baeefac489bc55bbf73 Mon Sep 17 00:00:00 2001 From: Paul Mackerras Date: Wed, 21 Nov 2012 23:27:19 +0000 Subject: KVM: PPC: Book3S HV: Reset reverse-map chains when resetting the HPT With HV-style KVM, we maintain reverse-mapping lists that enable us to find all the HPT (hashed page table) entries that reference each guest physical page, with the heads of the lists in the memslot->arch.rmap arrays. When we reset the HPT (i.e. when we reboot the VM), we clear out all the HPT entries but we were not clearing out the reverse mapping lists. The result is that as we create new HPT entries, the lists get corrupted, which can easily lead to loops, resulting in the host kernel hanging when it tries to traverse those lists. This fixes the problem by zeroing out all the reverse mapping lists when we zero out the HPT. This incidentally means that we are also zeroing our record of the referenced and changed bits (not the bits in the Linux PTEs, used by the Linux MM subsystem, but the bits used by the KVM_GET_DIRTY_LOG ioctl, and those used by kvm_age_hva() and kvm_test_age_hva()). Signed-off-by: Paul Mackerras Signed-off-by: Alexander Graf --- arch/powerpc/kvm/book3s_64_mmu_hv.c | 24 ++++++++++++++++++++++++ 1 file changed, 24 insertions(+) (limited to 'arch/powerpc') diff --git a/arch/powerpc/kvm/book3s_64_mmu_hv.c b/arch/powerpc/kvm/book3s_64_mmu_hv.c index 0aa40734c8f6..1029e2201bf6 100644 --- a/arch/powerpc/kvm/book3s_64_mmu_hv.c +++ b/arch/powerpc/kvm/book3s_64_mmu_hv.c @@ -46,6 +46,7 @@ static long kvmppc_virtmode_do_h_enter(struct kvm *kvm, unsigned long flags, long pte_index, unsigned long pteh, unsigned long ptel, unsigned long *pte_idx_ret); +static void kvmppc_rmap_reset(struct kvm *kvm); long kvmppc_alloc_hpt(struct kvm *kvm, u32 *htab_orderp) { @@ -143,6 +144,10 @@ long kvmppc_alloc_reset_hpt(struct kvm *kvm, u32 *htab_orderp) order = kvm->arch.hpt_order; /* Set the entire HPT to 0, i.e. invalid HPTEs */ memset((void *)kvm->arch.hpt_virt, 0, 1ul << order); + /* + * Reset all the reverse-mapping chains for all memslots + */ + kvmppc_rmap_reset(kvm); /* * Set the whole last_vcpu array to an invalid vcpu number. * This ensures that each vcpu will flush its TLB on next entry. @@ -772,6 +777,25 @@ int kvmppc_book3s_hv_page_fault(struct kvm_run *run, struct kvm_vcpu *vcpu, goto out_put; } +static void kvmppc_rmap_reset(struct kvm *kvm) +{ + struct kvm_memslots *slots; + struct kvm_memory_slot *memslot; + int srcu_idx; + + srcu_idx = srcu_read_lock(&kvm->srcu); + slots = kvm->memslots; + kvm_for_each_memslot(memslot, slots) { + /* + * This assumes it is acceptable to lose reference and + * change bits across a reset. + */ + memset(memslot->arch.rmap, 0, + memslot->npages * sizeof(*memslot->arch.rmap)); + } + srcu_read_unlock(&kvm->srcu, srcu_idx); +} + static int kvm_handle_hva_range(struct kvm *kvm, unsigned long start, unsigned long end, -- cgit v1.2.3-59-g8ed1b From 05dd85f7933ffbe6d71415e631c95ca615ae1e81 Mon Sep 17 00:00:00 2001 From: Paul Mackerras Date: Wed, 21 Nov 2012 23:29:12 +0000 Subject: KVM: PPC: Book3S HV: Report correct HPT entry index when reading HPT This fixes a bug in the code which allows userspace to read out the contents of the guest's hashed page table (HPT). On the second and subsequent passes through the HPT, when we are reporting only those entries that have changed, we were incorrectly initializing the index field of the header with the index of the first entry we skipped rather than the first changed entry. This fixes it. Signed-off-by: Paul Mackerras Signed-off-by: Alexander Graf --- arch/powerpc/kvm/book3s_64_mmu_hv.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch/powerpc') diff --git a/arch/powerpc/kvm/book3s_64_mmu_hv.c b/arch/powerpc/kvm/book3s_64_mmu_hv.c index 1029e2201bf6..ac6b5acb99b9 100644 --- a/arch/powerpc/kvm/book3s_64_mmu_hv.c +++ b/arch/powerpc/kvm/book3s_64_mmu_hv.c @@ -1282,7 +1282,6 @@ static ssize_t kvm_htab_read(struct file *file, char __user *buf, while (nb + sizeof(hdr) + HPTE_SIZE < count) { /* Initialize header */ hptr = (struct kvm_get_htab_header __user *)buf; - hdr.index = i; hdr.n_valid = 0; hdr.n_invalid = 0; nw = nb; @@ -1298,6 +1297,7 @@ static ssize_t kvm_htab_read(struct file *file, char __user *buf, ++revp; } } + hdr.index = i; /* Grab a series of valid entries */ while (i < kvm->arch.hpt_npte && -- cgit v1.2.3-59-g8ed1b From 1cc8ed0b13ae6e076a1dd1f18da508b48c7aa05a Mon Sep 17 00:00:00 2001 From: Paul Mackerras Date: Wed, 21 Nov 2012 23:28:41 +0000 Subject: KVM: PPC: Book3S HV: Don't give the guest RW access to RO pages Currently, if the guest does an H_PROTECT hcall requesting that the permissions on a HPT entry be changed to allow writing, we make the requested change even if the page is marked read-only in the host Linux page tables. This is a problem since it would for instance allow a guest to modify a page that KSM has decided can be shared between multiple guests. To fix this, if the new permissions for the page allow writing, we need to look up the memslot for the page, work out the host virtual address, and look up the Linux page tables to get the PTE for the page. If that PTE is read-only, we reduce the HPTE permissions to read-only. Signed-off-by: Paul Mackerras Signed-off-by: Alexander Graf --- arch/powerpc/kvm/book3s_hv_rm_mmu.c | 22 ++++++++++++++++++++++ 1 file changed, 22 insertions(+) (limited to 'arch/powerpc') diff --git a/arch/powerpc/kvm/book3s_hv_rm_mmu.c b/arch/powerpc/kvm/book3s_hv_rm_mmu.c index fc3da3208fda..7a57ea49172d 100644 --- a/arch/powerpc/kvm/book3s_hv_rm_mmu.c +++ b/arch/powerpc/kvm/book3s_hv_rm_mmu.c @@ -600,6 +600,28 @@ long kvmppc_h_protect(struct kvm_vcpu *vcpu, unsigned long flags, asm volatile("tlbiel %0" : : "r" (rb)); asm volatile("ptesync" : : : "memory"); } + /* + * If the host has this page as readonly but the guest + * wants to make it read/write, reduce the permissions. + * Checking the host permissions involves finding the + * memslot and then the Linux PTE for the page. + */ + if (hpte_is_writable(r) && kvm->arch.using_mmu_notifiers) { + unsigned long psize, gfn, hva; + struct kvm_memory_slot *memslot; + pgd_t *pgdir = vcpu->arch.pgdir; + pte_t pte; + + psize = hpte_page_size(v, r); + gfn = ((r & HPTE_R_RPN) & ~(psize - 1)) >> PAGE_SHIFT; + memslot = __gfn_to_memslot(kvm_memslots(kvm), gfn); + if (memslot) { + hva = __gfn_to_hva_memslot(memslot, gfn); + pte = lookup_linux_pte(pgdir, hva, 1, &psize); + if (pte_present(pte) && !pte_write(pte)) + r = hpte_make_readonly(r); + } + } } hpte[1] = r; eieio(); -- cgit v1.2.3-59-g8ed1b From b0a94d4e23201c7559bb8f8657cfb629561288f2 Mon Sep 17 00:00:00 2001 From: Paul Mackerras Date: Sun, 4 Nov 2012 18:15:43 +0000 Subject: KVM: PPC: Book3S PR: Emulate PURR, SPURR and DSCR registers This adds basic emulation of the PURR and SPURR registers. We assume we are emulating a single-threaded core, so these advance at the same rate as the timebase. A Linux kernel running on a POWER7 expects to be able to access these registers and is not prepared to handle a program interrupt on accessing them. This also adds a very minimal emulation of the DSCR (data stream control register). Writes are ignored and reads return zero. Signed-off-by: Paul Mackerras Signed-off-by: Alexander Graf --- arch/powerpc/include/asm/kvm_book3s.h | 2 ++ arch/powerpc/kvm/book3s_emulate.c | 16 +++++++++++++++- 2 files changed, 17 insertions(+), 1 deletion(-) (limited to 'arch/powerpc') diff --git a/arch/powerpc/include/asm/kvm_book3s.h b/arch/powerpc/include/asm/kvm_book3s.h index 46763d10ad52..5a56e1c5f851 100644 --- a/arch/powerpc/include/asm/kvm_book3s.h +++ b/arch/powerpc/include/asm/kvm_book3s.h @@ -81,6 +81,8 @@ struct kvmppc_vcpu_book3s { u64 sdr1; u64 hior; u64 msr_mask; + u64 purr_offset; + u64 spurr_offset; #ifdef CONFIG_PPC_BOOK3S_32 u32 vsid_pool[VSID_POOL_SIZE]; u32 vsid_next; diff --git a/arch/powerpc/kvm/book3s_emulate.c b/arch/powerpc/kvm/book3s_emulate.c index b9a989dc76cc..d31a716f7f2b 100644 --- a/arch/powerpc/kvm/book3s_emulate.c +++ b/arch/powerpc/kvm/book3s_emulate.c @@ -22,6 +22,7 @@ #include #include #include +#include #define OP_19_XOP_RFID 18 #define OP_19_XOP_RFI 50 @@ -395,6 +396,12 @@ int kvmppc_core_emulate_mtspr(struct kvm_vcpu *vcpu, int sprn, ulong spr_val) (mfmsr() & MSR_HV)) vcpu->arch.hflags |= BOOK3S_HFLAG_DCBZ32; break; + case SPRN_PURR: + to_book3s(vcpu)->purr_offset = spr_val - get_tb(); + break; + case SPRN_SPURR: + to_book3s(vcpu)->spurr_offset = spr_val - get_tb(); + break; case SPRN_GQR0: case SPRN_GQR1: case SPRN_GQR2: @@ -412,6 +419,7 @@ int kvmppc_core_emulate_mtspr(struct kvm_vcpu *vcpu, int sprn, ulong spr_val) case SPRN_CTRLF: case SPRN_CTRLT: case SPRN_L2CR: + case SPRN_DSCR: case SPRN_MMCR0_GEKKO: case SPRN_MMCR1_GEKKO: case SPRN_PMC1_GEKKO: @@ -483,9 +491,15 @@ int kvmppc_core_emulate_mfspr(struct kvm_vcpu *vcpu, int sprn, ulong *spr_val) *spr_val = to_book3s(vcpu)->hid[5]; break; case SPRN_CFAR: - case SPRN_PURR: + case SPRN_DSCR: *spr_val = 0; break; + case SPRN_PURR: + *spr_val = get_tb() + to_book3s(vcpu)->purr_offset; + break; + case SPRN_SPURR: + *spr_val = get_tb() + to_book3s(vcpu)->purr_offset; + break; case SPRN_GQR0: case SPRN_GQR1: case SPRN_GQR2: -- cgit v1.2.3-59-g8ed1b From 28c483b62fcd2589dadfc1250970f85aa0ab3df6 Mon Sep 17 00:00:00 2001 From: Paul Mackerras Date: Sun, 4 Nov 2012 18:16:46 +0000 Subject: KVM: PPC: Book3S PR: Fix VSX handling This fixes various issues in how we were handling the VSX registers that exist on POWER7 machines. First, we were running off the end of the current->thread.fpr[] array. Ultimately this was because the vcpu->arch.vsr[] array is sized to be able to store both the FP registers and the extra VSX registers (i.e. 64 entries), but PR KVM only uses it for the extra VSX registers (i.e. 32 entries). Secondly, calling load_up_vsx() from C code is a really bad idea, because it jumps to fast_exception_return at the end, rather than returning with a blr instruction. This was causing it to jump off to a random location with random register contents, since it was using the largely uninitialized stack frame created by kvmppc_load_up_vsx. In fact, it isn't necessary to call either __giveup_vsx or load_up_vsx, since giveup_fpu and load_up_fpu handle the extra VSX registers as well as the standard FP registers on machines with VSX. Also, since VSX instructions can access the VMX registers and the FP registers as well as the extra VSX registers, we have to load up the FP and VMX registers before we can turn on the MSR_VSX bit for the guest. Conversely, if we save away any of the VSX or FP registers, we have to turn off MSR_VSX for the guest. To handle all this, it is more convenient for a single call to kvmppc_giveup_ext() to handle all the state saving that needs to be done, so we make it take a set of MSR bits rather than just one, and the switch statement becomes a series of if statements. Similarly kvmppc_handle_ext needs to be able to load up more than one set of registers. Signed-off-by: Paul Mackerras Signed-off-by: Alexander Graf --- arch/powerpc/include/asm/reg.h | 1 + arch/powerpc/kvm/book3s_exports.c | 3 - arch/powerpc/kvm/book3s_pr.c | 112 +++++++++++++++++++---------------- arch/powerpc/kvm/book3s_rmhandlers.S | 3 - 4 files changed, 62 insertions(+), 57 deletions(-) (limited to 'arch/powerpc') diff --git a/arch/powerpc/include/asm/reg.h b/arch/powerpc/include/asm/reg.h index d24c14163966..97d37278ea2d 100644 --- a/arch/powerpc/include/asm/reg.h +++ b/arch/powerpc/include/asm/reg.h @@ -518,6 +518,7 @@ #define SRR1_WS_DEEPER 0x00020000 /* Some resources not maintained */ #define SRR1_WS_DEEP 0x00010000 /* All resources maintained */ #define SRR1_PROGFPE 0x00100000 /* Floating Point Enabled */ +#define SRR1_PROGILL 0x00080000 /* Illegal instruction */ #define SRR1_PROGPRIV 0x00040000 /* Privileged instruction */ #define SRR1_PROGTRAP 0x00020000 /* Trap */ #define SRR1_PROGADDR 0x00010000 /* SRR0 contains subsequent addr */ diff --git a/arch/powerpc/kvm/book3s_exports.c b/arch/powerpc/kvm/book3s_exports.c index a150817d6d4c..7057a02f0906 100644 --- a/arch/powerpc/kvm/book3s_exports.c +++ b/arch/powerpc/kvm/book3s_exports.c @@ -28,8 +28,5 @@ EXPORT_SYMBOL_GPL(kvmppc_load_up_fpu); #ifdef CONFIG_ALTIVEC EXPORT_SYMBOL_GPL(kvmppc_load_up_altivec); #endif -#ifdef CONFIG_VSX -EXPORT_SYMBOL_GPL(kvmppc_load_up_vsx); -#endif #endif diff --git a/arch/powerpc/kvm/book3s_pr.c b/arch/powerpc/kvm/book3s_pr.c index b853696b6d8e..5c496ecf5718 100644 --- a/arch/powerpc/kvm/book3s_pr.c +++ b/arch/powerpc/kvm/book3s_pr.c @@ -81,9 +81,7 @@ void kvmppc_core_vcpu_put(struct kvm_vcpu *vcpu) svcpu_put(svcpu); #endif - kvmppc_giveup_ext(vcpu, MSR_FP); - kvmppc_giveup_ext(vcpu, MSR_VEC); - kvmppc_giveup_ext(vcpu, MSR_VSX); + kvmppc_giveup_ext(vcpu, MSR_FP | MSR_VEC | MSR_VSX); vcpu->cpu = -1; } @@ -433,10 +431,7 @@ int kvmppc_handle_pagefault(struct kvm_run *run, struct kvm_vcpu *vcpu, static inline int get_fpr_index(int i) { -#ifdef CONFIG_VSX - i *= 2; -#endif - return i; + return i * TS_FPRWIDTH; } /* Give up external provider (FPU, Altivec, VSX) */ @@ -450,41 +445,49 @@ void kvmppc_giveup_ext(struct kvm_vcpu *vcpu, ulong msr) u64 *thread_fpr = (u64*)t->fpr; int i; - if (!(vcpu->arch.guest_owned_ext & msr)) + /* + * VSX instructions can access FP and vector registers, so if + * we are giving up VSX, make sure we give up FP and VMX as well. + */ + if (msr & MSR_VSX) + msr |= MSR_FP | MSR_VEC; + + msr &= vcpu->arch.guest_owned_ext; + if (!msr) return; #ifdef DEBUG_EXT printk(KERN_INFO "Giving up ext 0x%lx\n", msr); #endif - switch (msr) { - case MSR_FP: + if (msr & MSR_FP) { + /* + * Note that on CPUs with VSX, giveup_fpu stores + * both the traditional FP registers and the added VSX + * registers into thread.fpr[]. + */ giveup_fpu(current); for (i = 0; i < ARRAY_SIZE(vcpu->arch.fpr); i++) vcpu_fpr[i] = thread_fpr[get_fpr_index(i)]; vcpu->arch.fpscr = t->fpscr.val; - break; - case MSR_VEC: + +#ifdef CONFIG_VSX + if (cpu_has_feature(CPU_FTR_VSX)) + for (i = 0; i < ARRAY_SIZE(vcpu->arch.vsr) / 2; i++) + vcpu_vsx[i] = thread_fpr[get_fpr_index(i) + 1]; +#endif + } + #ifdef CONFIG_ALTIVEC + if (msr & MSR_VEC) { giveup_altivec(current); memcpy(vcpu->arch.vr, t->vr, sizeof(vcpu->arch.vr)); vcpu->arch.vscr = t->vscr; -#endif - break; - case MSR_VSX: -#ifdef CONFIG_VSX - __giveup_vsx(current); - for (i = 0; i < ARRAY_SIZE(vcpu->arch.vsr); i++) - vcpu_vsx[i] = thread_fpr[get_fpr_index(i) + 1]; -#endif - break; - default: - BUG(); } +#endif - vcpu->arch.guest_owned_ext &= ~msr; - current->thread.regs->msr &= ~msr; + vcpu->arch.guest_owned_ext &= ~(msr | MSR_VSX); kvmppc_recalc_shadow_msr(vcpu); } @@ -544,47 +547,56 @@ static int kvmppc_handle_ext(struct kvm_vcpu *vcpu, unsigned int exit_nr, return RESUME_GUEST; } - /* We already own the ext */ - if (vcpu->arch.guest_owned_ext & msr) { - return RESUME_GUEST; + if (msr == MSR_VSX) { + /* No VSX? Give an illegal instruction interrupt */ +#ifdef CONFIG_VSX + if (!cpu_has_feature(CPU_FTR_VSX)) +#endif + { + kvmppc_core_queue_program(vcpu, SRR1_PROGILL); + return RESUME_GUEST; + } + + /* + * We have to load up all the FP and VMX registers before + * we can let the guest use VSX instructions. + */ + msr = MSR_FP | MSR_VEC | MSR_VSX; } + /* See if we already own all the ext(s) needed */ + msr &= ~vcpu->arch.guest_owned_ext; + if (!msr) + return RESUME_GUEST; + #ifdef DEBUG_EXT printk(KERN_INFO "Loading up ext 0x%lx\n", msr); #endif current->thread.regs->msr |= msr; - switch (msr) { - case MSR_FP: + if (msr & MSR_FP) { for (i = 0; i < ARRAY_SIZE(vcpu->arch.fpr); i++) thread_fpr[get_fpr_index(i)] = vcpu_fpr[i]; - +#ifdef CONFIG_VSX + for (i = 0; i < ARRAY_SIZE(vcpu->arch.vsr) / 2; i++) + thread_fpr[get_fpr_index(i) + 1] = vcpu_vsx[i]; +#endif t->fpscr.val = vcpu->arch.fpscr; t->fpexc_mode = 0; kvmppc_load_up_fpu(); - break; - case MSR_VEC: + } + + if (msr & MSR_VEC) { #ifdef CONFIG_ALTIVEC memcpy(t->vr, vcpu->arch.vr, sizeof(vcpu->arch.vr)); t->vscr = vcpu->arch.vscr; t->vrsave = -1; kvmppc_load_up_altivec(); #endif - break; - case MSR_VSX: -#ifdef CONFIG_VSX - for (i = 0; i < ARRAY_SIZE(vcpu->arch.vsr); i++) - thread_fpr[get_fpr_index(i) + 1] = vcpu_vsx[i]; - kvmppc_load_up_vsx(); -#endif - break; - default: - BUG(); } vcpu->arch.guest_owned_ext |= msr; - kvmppc_recalc_shadow_msr(vcpu); return RESUME_GUEST; @@ -1134,7 +1146,7 @@ int kvmppc_vcpu_run(struct kvm_run *kvm_run, struct kvm_vcpu *vcpu) /* Save VSX state in stack */ used_vsr = current->thread.used_vsr; if (used_vsr && (current->thread.regs->msr & MSR_VSX)) - __giveup_vsx(current); + __giveup_vsx(current); #endif /* Remember the MSR with disabled extensions */ @@ -1151,14 +1163,12 @@ int kvmppc_vcpu_run(struct kvm_run *kvm_run, struct kvm_vcpu *vcpu) /* No need for kvm_guest_exit. It's done in handle_exit. We also get here with interrupts enabled. */ - current->thread.regs->msr = ext_msr; - /* Make sure we save the guest FPU/Altivec/VSX state */ - kvmppc_giveup_ext(vcpu, MSR_FP); - kvmppc_giveup_ext(vcpu, MSR_VEC); - kvmppc_giveup_ext(vcpu, MSR_VSX); + kvmppc_giveup_ext(vcpu, MSR_FP | MSR_VEC | MSR_VSX); + + current->thread.regs->msr = ext_msr; - /* Restore FPU state from stack */ + /* Restore FPU/VSX state from stack */ memcpy(current->thread.fpr, fpr, sizeof(current->thread.fpr)); current->thread.fpscr.val = fpscr; current->thread.fpexc_mode = fpexc_mode; diff --git a/arch/powerpc/kvm/book3s_rmhandlers.S b/arch/powerpc/kvm/book3s_rmhandlers.S index b2f8258b545a..8f7633e3afb8 100644 --- a/arch/powerpc/kvm/book3s_rmhandlers.S +++ b/arch/powerpc/kvm/book3s_rmhandlers.S @@ -234,8 +234,5 @@ define_load_up(fpu) #ifdef CONFIG_ALTIVEC define_load_up(altivec) #endif -#ifdef CONFIG_VSX -define_load_up(vsx) -#endif #include "book3s_segment.S" -- cgit v1.2.3-59-g8ed1b From 3a2e7b0d761ae3faecdb43482d178b5fe2e3b8a5 Mon Sep 17 00:00:00 2001 From: Paul Mackerras Date: Sun, 4 Nov 2012 18:17:28 +0000 Subject: KVM: PPC: Book3S PR: MSR_DE doesn't exist on Book 3S The mask of MSR bits that get transferred from the guest MSR to the shadow MSR included MSR_DE. In fact that bit only exists on Book 3E processors, and it is assigned the same bit used for MSR_BE on Book 3S processors. Since we already had MSR_BE in the mask, this just removes MSR_DE. Signed-off-by: Paul Mackerras Signed-off-by: Alexander Graf --- arch/powerpc/kvm/book3s_pr.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch/powerpc') diff --git a/arch/powerpc/kvm/book3s_pr.c b/arch/powerpc/kvm/book3s_pr.c index 5c496ecf5718..28d38adeca73 100644 --- a/arch/powerpc/kvm/book3s_pr.c +++ b/arch/powerpc/kvm/book3s_pr.c @@ -145,7 +145,7 @@ static void kvmppc_recalc_shadow_msr(struct kvm_vcpu *vcpu) ulong smsr = vcpu->arch.shared->msr; /* Guest MSR values */ - smsr &= MSR_FE0 | MSR_FE1 | MSR_SF | MSR_SE | MSR_BE | MSR_DE; + smsr &= MSR_FE0 | MSR_FE1 | MSR_SF | MSR_SE | MSR_BE; /* Process MSR values */ smsr |= MSR_ME | MSR_RI | MSR_IR | MSR_DR | MSR_PR | MSR_EE; /* External providers the guest reserved */ -- cgit v1.2.3-59-g8ed1b From 1b400ba0cd24a5994d792c7cfa0ee24cac266d3c Mon Sep 17 00:00:00 2001 From: Paul Mackerras Date: Wed, 21 Nov 2012 23:28:08 +0000 Subject: KVM: PPC: Book3S HV: Improve handling of local vs. global TLB invalidations When we change or remove a HPT (hashed page table) entry, we can do either a global TLB invalidation (tlbie) that works across the whole machine, or a local invalidation (tlbiel) that only affects this core. Currently we do local invalidations if the VM has only one vcpu or if the guest requests it with the H_LOCAL flag, though the guest Linux kernel currently doesn't ever use H_LOCAL. Then, to cope with the possibility that vcpus moving around to different physical cores might expose stale TLB entries, there is some code in kvmppc_hv_entry to flush the whole TLB of entries for this VM if either this vcpu is now running on a different physical core from where it last ran, or if this physical core last ran a different vcpu. There are a number of problems on POWER7 with this as it stands: - The TLB invalidation is done per thread, whereas it only needs to be done per core, since the TLB is shared between the threads. - With the possibility of the host paging out guest pages, the use of H_LOCAL by an SMP guest is dangerous since the guest could possibly retain and use a stale TLB entry pointing to a page that had been removed from the guest. - The TLB invalidations that we do when a vcpu moves from one physical core to another are unnecessary in the case of an SMP guest that isn't using H_LOCAL. - The optimization of using local invalidations rather than global should apply to guests with one virtual core, not just one vcpu. (None of this applies on PPC970, since there we always have to invalidate the whole TLB when entering and leaving the guest, and we can't support paging out guest memory.) To fix these problems and simplify the code, we now maintain a simple cpumask of which cpus need to flush the TLB on entry to the guest. (This is indexed by cpu, though we only ever use the bits for thread 0 of each core.) Whenever we do a local TLB invalidation, we set the bits for every cpu except the bit for thread 0 of the core that we're currently running on. Whenever we enter a guest, we test and clear the bit for our core, and flush the TLB if it was set. On initial startup of the VM, and when resetting the HPT, we set all the bits in the need_tlb_flush cpumask, since any core could potentially have stale TLB entries from the previous VM to use the same LPID, or the previous contents of the HPT. Then, we maintain a count of the number of online virtual cores, and use that when deciding whether to use a local invalidation rather than the number of online vcpus. The code to make that decision is extracted out into a new function, global_invalidates(). For multi-core guests on POWER7 (i.e. when we are using mmu notifiers), we now never do local invalidations regardless of the H_LOCAL flag. Signed-off-by: Paul Mackerras Signed-off-by: Alexander Graf --- arch/powerpc/include/asm/kvm_host.h | 5 +-- arch/powerpc/kernel/asm-offsets.c | 4 +-- arch/powerpc/kvm/book3s_64_mmu_hv.c | 7 ++--- arch/powerpc/kvm/book3s_hv.c | 9 +++++- arch/powerpc/kvm/book3s_hv_rm_mmu.c | 37 +++++++++++++++++++--- arch/powerpc/kvm/book3s_hv_rmhandlers.S | 56 +++++++++++++++------------------ 6 files changed, 73 insertions(+), 45 deletions(-) (limited to 'arch/powerpc') diff --git a/arch/powerpc/include/asm/kvm_host.h b/arch/powerpc/include/asm/kvm_host.h index 58c72646c445..62fbd38b15fa 100644 --- a/arch/powerpc/include/asm/kvm_host.h +++ b/arch/powerpc/include/asm/kvm_host.h @@ -246,11 +246,12 @@ struct kvm_arch { int using_mmu_notifiers; u32 hpt_order; atomic_t vcpus_running; + u32 online_vcores; unsigned long hpt_npte; unsigned long hpt_mask; atomic_t hpte_mod_interest; spinlock_t slot_phys_lock; - unsigned short last_vcpu[NR_CPUS]; + cpumask_t need_tlb_flush; struct kvmppc_vcore *vcores[KVM_MAX_VCORES]; struct kvmppc_linear_info *hpt_li; #endif /* CONFIG_KVM_BOOK3S_64_HV */ @@ -275,6 +276,7 @@ struct kvmppc_vcore { int nap_count; int napping_threads; u16 pcpu; + u16 last_cpu; u8 vcore_state; u8 in_guest; struct list_head runnable_threads; @@ -523,7 +525,6 @@ struct kvm_vcpu_arch { u64 dec_jiffies; u64 dec_expires; unsigned long pending_exceptions; - u16 last_cpu; u8 ceded; u8 prodded; u32 last_inst; diff --git a/arch/powerpc/kernel/asm-offsets.c b/arch/powerpc/kernel/asm-offsets.c index 7523539cfe9f..4e23ba2f3ca7 100644 --- a/arch/powerpc/kernel/asm-offsets.c +++ b/arch/powerpc/kernel/asm-offsets.c @@ -441,8 +441,7 @@ int main(void) DEFINE(KVM_HOST_LPCR, offsetof(struct kvm, arch.host_lpcr)); DEFINE(KVM_HOST_SDR1, offsetof(struct kvm, arch.host_sdr1)); DEFINE(KVM_TLBIE_LOCK, offsetof(struct kvm, arch.tlbie_lock)); - DEFINE(KVM_ONLINE_CPUS, offsetof(struct kvm, online_vcpus.counter)); - DEFINE(KVM_LAST_VCPU, offsetof(struct kvm, arch.last_vcpu)); + DEFINE(KVM_NEED_FLUSH, offsetof(struct kvm, arch.need_tlb_flush.bits)); DEFINE(KVM_LPCR, offsetof(struct kvm, arch.lpcr)); DEFINE(KVM_RMOR, offsetof(struct kvm, arch.rmor)); DEFINE(KVM_VRMA_SLB_V, offsetof(struct kvm, arch.vrma_slb_v)); @@ -470,7 +469,6 @@ int main(void) DEFINE(VCPU_SLB, offsetof(struct kvm_vcpu, arch.slb)); DEFINE(VCPU_SLB_MAX, offsetof(struct kvm_vcpu, arch.slb_max)); DEFINE(VCPU_SLB_NR, offsetof(struct kvm_vcpu, arch.slb_nr)); - DEFINE(VCPU_LAST_CPU, offsetof(struct kvm_vcpu, arch.last_cpu)); DEFINE(VCPU_FAULT_DSISR, offsetof(struct kvm_vcpu, arch.fault_dsisr)); DEFINE(VCPU_FAULT_DAR, offsetof(struct kvm_vcpu, arch.fault_dar)); DEFINE(VCPU_LAST_INST, offsetof(struct kvm_vcpu, arch.last_inst)); diff --git a/arch/powerpc/kvm/book3s_64_mmu_hv.c b/arch/powerpc/kvm/book3s_64_mmu_hv.c index ac6b5acb99b9..8cc18abd6dde 100644 --- a/arch/powerpc/kvm/book3s_64_mmu_hv.c +++ b/arch/powerpc/kvm/book3s_64_mmu_hv.c @@ -148,11 +148,8 @@ long kvmppc_alloc_reset_hpt(struct kvm *kvm, u32 *htab_orderp) * Reset all the reverse-mapping chains for all memslots */ kvmppc_rmap_reset(kvm); - /* - * Set the whole last_vcpu array to an invalid vcpu number. - * This ensures that each vcpu will flush its TLB on next entry. - */ - memset(kvm->arch.last_vcpu, 0xff, sizeof(kvm->arch.last_vcpu)); + /* Ensure that each vcpu will flush its TLB on next entry. */ + cpumask_setall(&kvm->arch.need_tlb_flush); *htab_orderp = order; err = 0; } else { diff --git a/arch/powerpc/kvm/book3s_hv.c b/arch/powerpc/kvm/book3s_hv.c index a4f59dbcd800..ddbec60cb0d2 100644 --- a/arch/powerpc/kvm/book3s_hv.c +++ b/arch/powerpc/kvm/book3s_hv.c @@ -853,7 +853,6 @@ struct kvm_vcpu *kvmppc_core_vcpu_create(struct kvm *kvm, unsigned int id) goto free_vcpu; vcpu->arch.shared = &vcpu->arch.shregs; - vcpu->arch.last_cpu = -1; vcpu->arch.mmcr[0] = MMCR0_FC; vcpu->arch.ctrl = CTRL_RUNLATCH; /* default to host PVR, since we can't spoof it */ @@ -880,6 +879,7 @@ struct kvm_vcpu *kvmppc_core_vcpu_create(struct kvm *kvm, unsigned int id) vcore->preempt_tb = TB_NIL; } kvm->arch.vcores[core] = vcore; + kvm->arch.online_vcores++; } mutex_unlock(&kvm->lock); @@ -1802,6 +1802,13 @@ int kvmppc_core_init_vm(struct kvm *kvm) return -ENOMEM; kvm->arch.lpid = lpid; + /* + * Since we don't flush the TLB when tearing down a VM, + * and this lpid might have previously been used, + * make sure we flush on each core before running the new VM. + */ + cpumask_setall(&kvm->arch.need_tlb_flush); + INIT_LIST_HEAD(&kvm->arch.spapr_tce_tables); kvm->arch.rma = NULL; diff --git a/arch/powerpc/kvm/book3s_hv_rm_mmu.c b/arch/powerpc/kvm/book3s_hv_rm_mmu.c index 7a57ea49172d..19c93bae1aea 100644 --- a/arch/powerpc/kvm/book3s_hv_rm_mmu.c +++ b/arch/powerpc/kvm/book3s_hv_rm_mmu.c @@ -35,6 +35,37 @@ static void *real_vmalloc_addr(void *x) return __va(addr); } +/* Return 1 if we need to do a global tlbie, 0 if we can use tlbiel */ +static int global_invalidates(struct kvm *kvm, unsigned long flags) +{ + int global; + + /* + * If there is only one vcore, and it's currently running, + * we can use tlbiel as long as we mark all other physical + * cores as potentially having stale TLB entries for this lpid. + * If we're not using MMU notifiers, we never take pages away + * from the guest, so we can use tlbiel if requested. + * Otherwise, don't use tlbiel. + */ + if (kvm->arch.online_vcores == 1 && local_paca->kvm_hstate.kvm_vcore) + global = 0; + else if (kvm->arch.using_mmu_notifiers) + global = 1; + else + global = !(flags & H_LOCAL); + + if (!global) { + /* any other core might now have stale TLB entries... */ + smp_wmb(); + cpumask_setall(&kvm->arch.need_tlb_flush); + cpumask_clear_cpu(local_paca->kvm_hstate.kvm_vcore->pcpu, + &kvm->arch.need_tlb_flush); + } + + return global; +} + /* * Add this HPTE into the chain for the real page. * Must be called with the chain locked; it unlocks the chain. @@ -390,7 +421,7 @@ long kvmppc_do_h_remove(struct kvm *kvm, unsigned long flags, if (v & HPTE_V_VALID) { hpte[0] &= ~HPTE_V_VALID; rb = compute_tlbie_rb(v, hpte[1], pte_index); - if (!(flags & H_LOCAL) && atomic_read(&kvm->online_vcpus) > 1) { + if (global_invalidates(kvm, flags)) { while (!try_lock_tlbie(&kvm->arch.tlbie_lock)) cpu_relax(); asm volatile("ptesync" : : : "memory"); @@ -565,8 +596,6 @@ long kvmppc_h_protect(struct kvm_vcpu *vcpu, unsigned long flags, return H_NOT_FOUND; } - if (atomic_read(&kvm->online_vcpus) == 1) - flags |= H_LOCAL; v = hpte[0]; bits = (flags << 55) & HPTE_R_PP0; bits |= (flags << 48) & HPTE_R_KEY_HI; @@ -587,7 +616,7 @@ long kvmppc_h_protect(struct kvm_vcpu *vcpu, unsigned long flags, if (v & HPTE_V_VALID) { rb = compute_tlbie_rb(v, r, pte_index); hpte[0] = v & ~HPTE_V_VALID; - if (!(flags & H_LOCAL)) { + if (global_invalidates(kvm, flags)) { while(!try_lock_tlbie(&kvm->arch.tlbie_lock)) cpu_relax(); asm volatile("ptesync" : : : "memory"); diff --git a/arch/powerpc/kvm/book3s_hv_rmhandlers.S b/arch/powerpc/kvm/book3s_hv_rmhandlers.S index 690d1120402d..b48bd53dd771 100644 --- a/arch/powerpc/kvm/book3s_hv_rmhandlers.S +++ b/arch/powerpc/kvm/book3s_hv_rmhandlers.S @@ -313,7 +313,33 @@ END_FTR_SECTION_IFSET(CPU_FTR_ARCH_201) mtspr SPRN_SDR1,r6 /* switch to partition page table */ mtspr SPRN_LPID,r7 isync + + /* See if we need to flush the TLB */ + lhz r6,PACAPACAINDEX(r13) /* test_bit(cpu, need_tlb_flush) */ + clrldi r7,r6,64-6 /* extract bit number (6 bits) */ + srdi r6,r6,6 /* doubleword number */ + sldi r6,r6,3 /* address offset */ + add r6,r6,r9 + addi r6,r6,KVM_NEED_FLUSH /* dword in kvm->arch.need_tlb_flush */ li r0,1 + sld r0,r0,r7 + ld r7,0(r6) + and. r7,r7,r0 + beq 22f +23: ldarx r7,0,r6 /* if set, clear the bit */ + andc r7,r7,r0 + stdcx. r7,0,r6 + bne 23b + li r6,128 /* and flush the TLB */ + mtctr r6 + li r7,0x800 /* IS field = 0b10 */ + ptesync +28: tlbiel r7 + addi r7,r7,0x1000 + bdnz 28b + ptesync + +22: li r0,1 stb r0,VCORE_IN_GUEST(r5) /* signal secondaries to continue */ b 10f @@ -336,36 +362,6 @@ END_FTR_SECTION_IFSET(CPU_FTR_ARCH_201) mr r9,r4 blt hdec_soon - /* - * Invalidate the TLB if we could possibly have stale TLB - * entries for this partition on this core due to the use - * of tlbiel. - * XXX maybe only need this on primary thread? - */ - ld r9,VCPU_KVM(r4) /* pointer to struct kvm */ - lwz r5,VCPU_VCPUID(r4) - lhz r6,PACAPACAINDEX(r13) - rldimi r6,r5,0,62 /* XXX map as if threads 1:1 p:v */ - lhz r8,VCPU_LAST_CPU(r4) - sldi r7,r6,1 /* see if this is the same vcpu */ - add r7,r7,r9 /* as last ran on this pcpu */ - lhz r0,KVM_LAST_VCPU(r7) - cmpw r6,r8 /* on the same cpu core as last time? */ - bne 3f - cmpw r0,r5 /* same vcpu as this core last ran? */ - beq 1f -3: sth r6,VCPU_LAST_CPU(r4) /* if not, invalidate partition TLB */ - sth r5,KVM_LAST_VCPU(r7) - li r6,128 - mtctr r6 - li r7,0x800 /* IS field = 0b10 */ - ptesync -2: tlbiel r7 - addi r7,r7,0x1000 - bdnz 2b - ptesync -1: - /* Save purr/spurr */ mfspr r5,SPRN_PURR mfspr r6,SPRN_SPURR -- cgit v1.2.3-59-g8ed1b From b4072df4076c4f33ac9f518052c318c979bca533 Mon Sep 17 00:00:00 2001 From: Paul Mackerras Date: Fri, 23 Nov 2012 22:37:50 +0000 Subject: KVM: PPC: Book3S HV: Handle guest-caused machine checks on POWER7 without panicking Currently, if a machine check interrupt happens while we are in the guest, we exit the guest and call the host's machine check handler, which tends to cause the host to panic. Some machine checks can be triggered by the guest; for example, if the guest creates two entries in the SLB that map the same effective address, and then accesses that effective address, the CPU will take a machine check interrupt. To handle this better, when a machine check happens inside the guest, we call a new function, kvmppc_realmode_machine_check(), while still in real mode before exiting the guest. On POWER7, it handles the cases that the guest can trigger, either by flushing and reloading the SLB, or by flushing the TLB, and then it delivers the machine check interrupt directly to the guest without going back to the host. On POWER7, the OPAL firmware patches the machine check interrupt vector so that it gets control first, and it leaves behind its analysis of the situation in a structure pointed to by the opal_mc_evt field of the paca. The kvmppc_realmode_machine_check() function looks at this, and if OPAL reports that there was no error, or that it has handled the error, we also go straight back to the guest with a machine check. We have to deliver a machine check to the guest since the machine check interrupt might have trashed valid values in SRR0/1. If the machine check is one we can't handle in real mode, and one that OPAL hasn't already handled, or on PPC970, we exit the guest and call the host's machine check handler. We do this by jumping to the machine_check_fwnmi label, rather than absolute address 0x200, because we don't want to re-execute OPAL's handler on POWER7. On PPC970, the two are equivalent because address 0x200 just contains a branch. Then, if the host machine check handler decides that the system can continue executing, kvmppc_handle_exit() delivers a machine check interrupt to the guest -- once again to let the guest know that SRR0/1 have been modified. Signed-off-by: Paul Mackerras [agraf: fix checkpatch warnings] Signed-off-by: Alexander Graf --- arch/powerpc/include/asm/mmu-hash64.h | 10 +++ arch/powerpc/kvm/Makefile | 1 + arch/powerpc/kvm/book3s_hv.c | 11 +++ arch/powerpc/kvm/book3s_hv_ras.c | 144 ++++++++++++++++++++++++++++++++ arch/powerpc/kvm/book3s_hv_rmhandlers.S | 75 ++++++++++------- 5 files changed, 213 insertions(+), 28 deletions(-) create mode 100644 arch/powerpc/kvm/book3s_hv_ras.c (limited to 'arch/powerpc') diff --git a/arch/powerpc/include/asm/mmu-hash64.h b/arch/powerpc/include/asm/mmu-hash64.h index 9673f73eb8db..2fdb47a19efd 100644 --- a/arch/powerpc/include/asm/mmu-hash64.h +++ b/arch/powerpc/include/asm/mmu-hash64.h @@ -121,6 +121,16 @@ extern char initial_stab[]; #define PP_RXRX 3 /* Supervisor read, User read */ #define PP_RXXX (HPTE_R_PP0 | 2) /* Supervisor read, user none */ +/* Fields for tlbiel instruction in architecture 2.06 */ +#define TLBIEL_INVAL_SEL_MASK 0xc00 /* invalidation selector */ +#define TLBIEL_INVAL_PAGE 0x000 /* invalidate a single page */ +#define TLBIEL_INVAL_SET_LPID 0x800 /* invalidate a set for current LPID */ +#define TLBIEL_INVAL_SET 0xc00 /* invalidate a set for all LPIDs */ +#define TLBIEL_INVAL_SET_MASK 0xfff000 /* set number to inval. */ +#define TLBIEL_INVAL_SET_SHIFT 12 + +#define POWER7_TLB_SETS 128 /* # sets in POWER7 TLB */ + #ifndef __ASSEMBLY__ struct hash_pte { diff --git a/arch/powerpc/kvm/Makefile b/arch/powerpc/kvm/Makefile index cd8965828676..1e473d46322c 100644 --- a/arch/powerpc/kvm/Makefile +++ b/arch/powerpc/kvm/Makefile @@ -73,6 +73,7 @@ kvm-book3s_64-builtin-objs-$(CONFIG_KVM_BOOK3S_64_HV) := \ book3s_hv_rmhandlers.o \ book3s_hv_rm_mmu.o \ book3s_64_vio_hv.o \ + book3s_hv_ras.o \ book3s_hv_builtin.o kvm-book3s_64-module-objs := \ diff --git a/arch/powerpc/kvm/book3s_hv.c b/arch/powerpc/kvm/book3s_hv.c index ddbec60cb0d2..71d0c90b62bf 100644 --- a/arch/powerpc/kvm/book3s_hv.c +++ b/arch/powerpc/kvm/book3s_hv.c @@ -545,6 +545,17 @@ static int kvmppc_handle_exit(struct kvm_run *run, struct kvm_vcpu *vcpu, case BOOK3S_INTERRUPT_PERFMON: r = RESUME_GUEST; break; + case BOOK3S_INTERRUPT_MACHINE_CHECK: + /* + * Deliver a machine check interrupt to the guest. + * We have to do this, even if the host has handled the + * machine check, because machine checks use SRR0/1 and + * the interrupt might have trashed guest state in them. + */ + kvmppc_book3s_queue_irqprio(vcpu, + BOOK3S_INTERRUPT_MACHINE_CHECK); + r = RESUME_GUEST; + break; case BOOK3S_INTERRUPT_PROGRAM: { ulong flags; diff --git a/arch/powerpc/kvm/book3s_hv_ras.c b/arch/powerpc/kvm/book3s_hv_ras.c new file mode 100644 index 000000000000..35f3cf0269b3 --- /dev/null +++ b/arch/powerpc/kvm/book3s_hv_ras.c @@ -0,0 +1,144 @@ +/* + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License, version 2, as + * published by the Free Software Foundation. + * + * Copyright 2012 Paul Mackerras, IBM Corp. + */ + +#include +#include +#include +#include +#include +#include + +/* SRR1 bits for machine check on POWER7 */ +#define SRR1_MC_LDSTERR (1ul << (63-42)) +#define SRR1_MC_IFETCH_SH (63-45) +#define SRR1_MC_IFETCH_MASK 0x7 +#define SRR1_MC_IFETCH_SLBPAR 2 /* SLB parity error */ +#define SRR1_MC_IFETCH_SLBMULTI 3 /* SLB multi-hit */ +#define SRR1_MC_IFETCH_SLBPARMULTI 4 /* SLB parity + multi-hit */ +#define SRR1_MC_IFETCH_TLBMULTI 5 /* I-TLB multi-hit */ + +/* DSISR bits for machine check on POWER7 */ +#define DSISR_MC_DERAT_MULTI 0x800 /* D-ERAT multi-hit */ +#define DSISR_MC_TLB_MULTI 0x400 /* D-TLB multi-hit */ +#define DSISR_MC_SLB_PARITY 0x100 /* SLB parity error */ +#define DSISR_MC_SLB_MULTI 0x080 /* SLB multi-hit */ +#define DSISR_MC_SLB_PARMULTI 0x040 /* SLB parity + multi-hit */ + +/* POWER7 SLB flush and reload */ +static void reload_slb(struct kvm_vcpu *vcpu) +{ + struct slb_shadow *slb; + unsigned long i, n; + + /* First clear out SLB */ + asm volatile("slbmte %0,%0; slbia" : : "r" (0)); + + /* Do they have an SLB shadow buffer registered? */ + slb = vcpu->arch.slb_shadow.pinned_addr; + if (!slb) + return; + + /* Sanity check */ + n = min_t(u32, slb->persistent, SLB_MIN_SIZE); + if ((void *) &slb->save_area[n] > vcpu->arch.slb_shadow.pinned_end) + return; + + /* Load up the SLB from that */ + for (i = 0; i < n; ++i) { + unsigned long rb = slb->save_area[i].esid; + unsigned long rs = slb->save_area[i].vsid; + + rb = (rb & ~0xFFFul) | i; /* insert entry number */ + asm volatile("slbmte %0,%1" : : "r" (rs), "r" (rb)); + } +} + +/* POWER7 TLB flush */ +static void flush_tlb_power7(struct kvm_vcpu *vcpu) +{ + unsigned long i, rb; + + rb = TLBIEL_INVAL_SET_LPID; + for (i = 0; i < POWER7_TLB_SETS; ++i) { + asm volatile("tlbiel %0" : : "r" (rb)); + rb += 1 << TLBIEL_INVAL_SET_SHIFT; + } +} + +/* + * On POWER7, see if we can handle a machine check that occurred inside + * the guest in real mode, without switching to the host partition. + * + * Returns: 0 => exit guest, 1 => deliver machine check to guest + */ +static long kvmppc_realmode_mc_power7(struct kvm_vcpu *vcpu) +{ + unsigned long srr1 = vcpu->arch.shregs.msr; + struct opal_machine_check_event *opal_evt; + long handled = 1; + + if (srr1 & SRR1_MC_LDSTERR) { + /* error on load/store */ + unsigned long dsisr = vcpu->arch.shregs.dsisr; + + if (dsisr & (DSISR_MC_SLB_PARMULTI | DSISR_MC_SLB_MULTI | + DSISR_MC_SLB_PARITY | DSISR_MC_DERAT_MULTI)) { + /* flush and reload SLB; flushes D-ERAT too */ + reload_slb(vcpu); + dsisr &= ~(DSISR_MC_SLB_PARMULTI | DSISR_MC_SLB_MULTI | + DSISR_MC_SLB_PARITY | DSISR_MC_DERAT_MULTI); + } + if (dsisr & DSISR_MC_TLB_MULTI) { + flush_tlb_power7(vcpu); + dsisr &= ~DSISR_MC_TLB_MULTI; + } + /* Any other errors we don't understand? */ + if (dsisr & 0xffffffffUL) + handled = 0; + } + + switch ((srr1 >> SRR1_MC_IFETCH_SH) & SRR1_MC_IFETCH_MASK) { + case 0: + break; + case SRR1_MC_IFETCH_SLBPAR: + case SRR1_MC_IFETCH_SLBMULTI: + case SRR1_MC_IFETCH_SLBPARMULTI: + reload_slb(vcpu); + break; + case SRR1_MC_IFETCH_TLBMULTI: + flush_tlb_power7(vcpu); + break; + default: + handled = 0; + } + + /* + * See if OPAL has already handled the condition. + * We assume that if the condition is recovered then OPAL + * will have generated an error log event that we will pick + * up and log later. + */ + opal_evt = local_paca->opal_mc_evt; + if (opal_evt->version == OpalMCE_V1 && + (opal_evt->severity == OpalMCE_SEV_NO_ERROR || + opal_evt->disposition == OpalMCE_DISPOSITION_RECOVERED)) + handled = 1; + + if (handled) + opal_evt->in_use = 0; + + return handled; +} + +long kvmppc_realmode_machine_check(struct kvm_vcpu *vcpu) +{ + if (cpu_has_feature(CPU_FTR_ARCH_206)) + return kvmppc_realmode_mc_power7(vcpu); + + return 0; +} diff --git a/arch/powerpc/kvm/book3s_hv_rmhandlers.S b/arch/powerpc/kvm/book3s_hv_rmhandlers.S index b48bd53dd771..10b6c358dd77 100644 --- a/arch/powerpc/kvm/book3s_hv_rmhandlers.S +++ b/arch/powerpc/kvm/book3s_hv_rmhandlers.S @@ -27,6 +27,7 @@ #include #include #include +#include /***************************************************************************** * * @@ -678,8 +679,7 @@ BEGIN_FTR_SECTION 1: END_FTR_SECTION_IFSET(CPU_FTR_ARCH_206) -nohpte_cont: -hcall_real_cont: /* r9 = vcpu, r12 = trap, r13 = paca */ +guest_exit_cont: /* r9 = vcpu, r12 = trap, r13 = paca */ /* Save DEC */ mfspr r5,SPRN_DEC mftb r6 @@ -700,6 +700,11 @@ END_FTR_SECTION_IFSET(CPU_FTR_ARCH_206) std r6, VCPU_FAULT_DAR(r9) stw r7, VCPU_FAULT_DSISR(r9) + /* See if it is a machine check */ + cmpwi r12, BOOK3S_INTERRUPT_MACHINE_CHECK + beq machine_check_realmode +mc_cont: + /* Save guest CTRL register, set runlatch to 1 */ 6: mfspr r6,SPRN_CTRLF stw r6,VCPU_CTRL(r9) @@ -1112,38 +1117,41 @@ END_FTR_SECTION_IFSET(CPU_FTR_ARCH_201) /* * For external and machine check interrupts, we need * to call the Linux handler to process the interrupt. - * We do that by jumping to the interrupt vector address - * which we have in r12. The [h]rfid at the end of the + * We do that by jumping to absolute address 0x500 for + * external interrupts, or the machine_check_fwnmi label + * for machine checks (since firmware might have patched + * the vector area at 0x200). The [h]rfid at the end of the * handler will return to the book3s_hv_interrupts.S code. * For other interrupts we do the rfid to get back - * to the book3s_interrupts.S code here. + * to the book3s_hv_interrupts.S code here. */ ld r8, HSTATE_VMHANDLER(r13) ld r7, HSTATE_HOST_MSR(r13) + cmpwi cr1, r12, BOOK3S_INTERRUPT_MACHINE_CHECK cmpwi r12, BOOK3S_INTERRUPT_EXTERNAL +BEGIN_FTR_SECTION beq 11f - cmpwi r12, BOOK3S_INTERRUPT_MACHINE_CHECK +END_FTR_SECTION_IFSET(CPU_FTR_ARCH_206) /* RFI into the highmem handler, or branch to interrupt handler */ -12: mfmsr r6 - mtctr r12 + mfmsr r6 li r0, MSR_RI andc r6, r6, r0 mtmsrd r6, 1 /* Clear RI in MSR */ mtsrr0 r8 mtsrr1 r7 - beqctr + beqa 0x500 /* external interrupt (PPC970) */ + beq cr1, 13f /* machine check */ RFI -11: -BEGIN_FTR_SECTION - b 12b -END_FTR_SECTION_IFSET(CPU_FTR_ARCH_201) - mtspr SPRN_HSRR0, r8 + /* On POWER7, we have external interrupts set to use HSRR0/1 */ +11: mtspr SPRN_HSRR0, r8 mtspr SPRN_HSRR1, r7 ba 0x500 +13: b machine_check_fwnmi + /* * Check whether an HDSI is an HPTE not found fault or something else. * If it is an HPTE not found fault that is due to the guest accessing @@ -1176,7 +1184,7 @@ kvmppc_hdsi: cmpdi r3, 0 /* retry the instruction */ beq 6f cmpdi r3, -1 /* handle in kernel mode */ - beq nohpte_cont + beq guest_exit_cont cmpdi r3, -2 /* MMIO emulation; need instr word */ beq 2f @@ -1190,6 +1198,7 @@ kvmppc_hdsi: li r10, BOOK3S_INTERRUPT_DATA_STORAGE li r11, (MSR_ME << 1) | 1 /* synthesize MSR_SF | MSR_ME */ rotldi r11, r11, 63 +fast_interrupt_c_return: 6: ld r7, VCPU_CTR(r9) lwz r8, VCPU_XER(r9) mtctr r7 @@ -1222,7 +1231,7 @@ kvmppc_hdsi: /* Unset guest mode. */ li r0, KVM_GUEST_MODE_NONE stb r0, HSTATE_IN_GUEST(r13) - b nohpte_cont + b guest_exit_cont /* * Similarly for an HISI, reflect it to the guest as an ISI unless @@ -1248,9 +1257,9 @@ kvmppc_hisi: ld r11, VCPU_MSR(r9) li r12, BOOK3S_INTERRUPT_H_INST_STORAGE cmpdi r3, 0 /* retry the instruction */ - beq 6f + beq fast_interrupt_c_return cmpdi r3, -1 /* handle in kernel mode */ - beq nohpte_cont + beq guest_exit_cont /* Synthesize an ISI for the guest */ mr r11, r3 @@ -1259,12 +1268,7 @@ kvmppc_hisi: li r10, BOOK3S_INTERRUPT_INST_STORAGE li r11, (MSR_ME << 1) | 1 /* synthesize MSR_SF | MSR_ME */ rotldi r11, r11, 63 -6: ld r7, VCPU_CTR(r9) - lwz r8, VCPU_XER(r9) - mtctr r7 - mtxer r8 - mr r4, r9 - b fast_guest_return + b fast_interrupt_c_return 3: ld r6, VCPU_KVM(r9) /* not relocated, use VRMA */ ld r5, KVM_VRMA_SLB_V(r6) @@ -1280,14 +1284,14 @@ kvmppc_hisi: hcall_try_real_mode: ld r3,VCPU_GPR(R3)(r9) andi. r0,r11,MSR_PR - bne hcall_real_cont + bne guest_exit_cont clrrdi r3,r3,2 cmpldi r3,hcall_real_table_end - hcall_real_table - bge hcall_real_cont + bge guest_exit_cont LOAD_REG_ADDR(r4, hcall_real_table) lwzx r3,r3,r4 cmpwi r3,0 - beq hcall_real_cont + beq guest_exit_cont add r3,r3,r4 mtctr r3 mr r3,r9 /* get vcpu pointer */ @@ -1308,7 +1312,7 @@ hcall_real_fallback: li r12,BOOK3S_INTERRUPT_SYSCALL ld r9, HSTATE_KVM_VCPU(r13) - b hcall_real_cont + b guest_exit_cont .globl hcall_real_table hcall_real_table: @@ -1567,6 +1571,21 @@ kvm_cede_exit: li r3,H_TOO_HARD blr + /* Try to handle a machine check in real mode */ +machine_check_realmode: + mr r3, r9 /* get vcpu pointer */ + bl .kvmppc_realmode_machine_check + nop + cmpdi r3, 0 /* continue exiting from guest? */ + ld r9, HSTATE_KVM_VCPU(r13) + li r12, BOOK3S_INTERRUPT_MACHINE_CHECK + beq mc_cont + /* If not, deliver a machine check. SRR0/1 are already set */ + li r10, BOOK3S_INTERRUPT_MACHINE_CHECK + li r11, (MSR_ME << 1) | 1 /* synthesize MSR_SF | MSR_ME */ + rotldi r11, r11, 63 + b fast_interrupt_c_return + secondary_too_late: ld r5,HSTATE_KVM_VCORE(r13) HMT_LOW -- cgit v1.2.3-59-g8ed1b From 910040b82de872af453bf3ecc59de8f0abd22697 Mon Sep 17 00:00:00 2001 From: Mihai Caraman Date: Thu, 11 Oct 2012 06:13:18 +0000 Subject: KVM: PPC: e500: Silence bogus GCC warning in tlb code 64-bit GCC 4.5.1 warns about an uninitialized variable which was guarded by a flag. Initialize the variable to make it happy. Signed-off-by: Mihai Caraman [agraf: reword comment] Signed-off-by: Alexander Graf --- arch/powerpc/kvm/e500_tlb.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'arch/powerpc') diff --git a/arch/powerpc/kvm/e500_tlb.c b/arch/powerpc/kvm/e500_tlb.c index 6305ee692ef7..5532bfb15464 100644 --- a/arch/powerpc/kvm/e500_tlb.c +++ b/arch/powerpc/kvm/e500_tlb.c @@ -415,7 +415,8 @@ static inline void kvmppc_e500_shadow_map(struct kvmppc_vcpu_e500 *vcpu_e500, struct tlbe_ref *ref) { struct kvm_memory_slot *slot; - unsigned long pfn, hva; + unsigned long pfn = 0; /* silence GCC warning */ + unsigned long hva; int pfnmap = 0; int tsize = BOOK3E_PAGESZ_4K; -- cgit v1.2.3-59-g8ed1b From b50df19cccdd169d5345b5169699446b80ee051a Mon Sep 17 00:00:00 2001 From: Mihai Caraman Date: Thu, 11 Oct 2012 06:13:19 +0000 Subject: KVM: PPC: booke: Fix get_tb() compile error on 64-bit Include header file for get_tb() declaration. Signed-off-by: Mihai Caraman Signed-off-by: Alexander Graf --- arch/powerpc/kvm/booke.c | 1 + 1 file changed, 1 insertion(+) (limited to 'arch/powerpc') diff --git a/arch/powerpc/kvm/booke.c b/arch/powerpc/kvm/booke.c index 3d1f35dc7862..7c9c3891a14a 100644 --- a/arch/powerpc/kvm/booke.c +++ b/arch/powerpc/kvm/booke.c @@ -36,6 +36,7 @@ #include #include #include +#include #include "timing.h" #include "booke.h" -- cgit v1.2.3-59-g8ed1b From ff594746845877c0a6402be23897df659188eacb Mon Sep 17 00:00:00 2001 From: Mihai Caraman Date: Thu, 11 Oct 2012 06:13:20 +0000 Subject: KVM: PPC: bookehv: Remove GET_VCPU macro from exception handler GET_VCPU define will not be implemented for 64-bit for performance reasons so get rid of it also on 32-bit. Signed-off-by: Mihai Caraman Signed-off-by: Alexander Graf --- arch/powerpc/kvm/bookehv_interrupts.S | 7 ++----- 1 file changed, 2 insertions(+), 5 deletions(-) (limited to 'arch/powerpc') diff --git a/arch/powerpc/kvm/bookehv_interrupts.S b/arch/powerpc/kvm/bookehv_interrupts.S index 099fe8272b57..fa6d5529ebfb 100644 --- a/arch/powerpc/kvm/bookehv_interrupts.S +++ b/arch/powerpc/kvm/bookehv_interrupts.S @@ -32,9 +32,6 @@ #include "../kernel/head_booke.h" /* for THREAD_NORMSAVE() */ -#define GET_VCPU(vcpu, thread) \ - PPC_LL vcpu, THREAD_KVM_VCPU(thread) - #define LONGBYTES (BITS_PER_LONG / 8) #define VCPU_GUEST_SPRG(n) (VCPU_GUEST_SPRGS + (n * LONGBYTES)) @@ -206,7 +203,7 @@ */ .macro kvm_handler intno srr0, srr1, flags _GLOBAL(kvmppc_handler_\intno\()_\srr1) - GET_VCPU(r11, r10) + PPC_LL r11, THREAD_KVM_VCPU(r10) PPC_STL r3, VCPU_GPR(R3)(r11) mfspr r3, SPRN_SPRG_RSCRATCH0 PPC_STL r4, VCPU_GPR(R4)(r11) @@ -233,7 +230,7 @@ _GLOBAL(kvmppc_handler_\intno\()_\srr1) .macro kvm_lvl_handler intno scratch srr0, srr1, flags _GLOBAL(kvmppc_handler_\intno\()_\srr1) mfspr r10, SPRN_SPRG_THREAD - GET_VCPU(r11, r10) + PPC_LL r11, THREAD_KVM_VCPU(r10) PPC_STL r3, VCPU_GPR(R3)(r11) mfspr r3, \scratch PPC_STL r4, VCPU_GPR(R4)(r11) -- cgit v1.2.3-59-g8ed1b From e51f8f32d6b82f4a34dbb5781769c79b813e5694 Mon Sep 17 00:00:00 2001 From: Mihai Caraman Date: Thu, 11 Oct 2012 06:13:21 +0000 Subject: KVM: PPC: bookehv64: Add support for interrupt handling Add interrupt handling support for 64-bit bookehv hosts. Unify 32 and 64 bit implementations using a common stack layout and a common execution flow starting from kvm_handler_common macro. Update documentation for 64-bit input register values. This patch only address the bolted TLB miss exception handlers version. Signed-off-by: Mihai Caraman Signed-off-by: Alexander Graf --- arch/powerpc/include/asm/kvm_booke_hv_asm.h | 25 +++++ arch/powerpc/kvm/bookehv_interrupts.S | 138 ++++++++++++++++++++++++++-- 2 files changed, 155 insertions(+), 8 deletions(-) (limited to 'arch/powerpc') diff --git a/arch/powerpc/include/asm/kvm_booke_hv_asm.h b/arch/powerpc/include/asm/kvm_booke_hv_asm.h index a37a12a9a7d7..3a79f5325712 100644 --- a/arch/powerpc/include/asm/kvm_booke_hv_asm.h +++ b/arch/powerpc/include/asm/kvm_booke_hv_asm.h @@ -17,6 +17,7 @@ * there are no exceptions for which we fall through directly to * the normal host handler. * + * 32-bit host * Expected inputs (normal exceptions): * SCRATCH0 = saved r10 * r10 = thread struct @@ -33,6 +34,30 @@ * *(r8 + GPR9) = saved r9 * *(r8 + GPR10) = saved r10 (r10 not yet clobbered) * *(r8 + GPR11) = saved r11 + * + * 64-bit host + * Expected inputs (GEN/GDBELL/DBG/MC exception types): + * r10 = saved CR + * r13 = PACA_POINTER + * *(r13 + PACA_EX##type + EX_R10) = saved r10 + * *(r13 + PACA_EX##type + EX_R11) = saved r11 + * SPRN_SPRG_##type##_SCRATCH = saved r13 + * + * Expected inputs (CRIT exception type): + * r10 = saved CR + * r13 = PACA_POINTER + * *(r13 + PACA_EX##type + EX_R10) = saved r10 + * *(r13 + PACA_EX##type + EX_R11) = saved r11 + * *(r13 + PACA_EX##type + EX_R13) = saved r13 + * + * Expected inputs (TLB exception type): + * r10 = saved CR + * r13 = PACA_POINTER + * *(r13 + PACA_EX##type + EX_TLB_R10) = saved r10 + * *(r13 + PACA_EX##type + EX_TLB_R11) = saved r11 + * SPRN_SPRG_GEN_SCRATCH = saved r13 + * + * Only the bolted version of TLB miss exception handlers is supported now. */ .macro DO_KVM intno srr1 #ifdef CONFIG_KVM_BOOKE_HV diff --git a/arch/powerpc/kvm/bookehv_interrupts.S b/arch/powerpc/kvm/bookehv_interrupts.S index fa6d5529ebfb..e8ed7d659c55 100644 --- a/arch/powerpc/kvm/bookehv_interrupts.S +++ b/arch/powerpc/kvm/bookehv_interrupts.S @@ -16,6 +16,7 @@ * * Author: Varun Sethi * Author: Scott Wood + * Author: Mihai Caraman * * This file is derived from arch/powerpc/kvm/booke_interrupts.S */ @@ -30,28 +31,33 @@ #include #include +#ifdef CONFIG_64BIT +#include +#else #include "../kernel/head_booke.h" /* for THREAD_NORMSAVE() */ +#endif #define LONGBYTES (BITS_PER_LONG / 8) #define VCPU_GUEST_SPRG(n) (VCPU_GUEST_SPRGS + (n * LONGBYTES)) /* The host stack layout: */ -#define HOST_R1 (0 * LONGBYTES) /* Implied by stwu. */ -#define HOST_CALLEE_LR (1 * LONGBYTES) -#define HOST_RUN (2 * LONGBYTES) /* struct kvm_run */ +#define HOST_R1 0 /* Implied by stwu. */ +#define HOST_CALLEE_LR PPC_LR_STKOFF +#define HOST_RUN (HOST_CALLEE_LR + LONGBYTES) /* * r2 is special: it holds 'current', and it made nonvolatile in the * kernel with the -ffixed-r2 gcc option. */ -#define HOST_R2 (3 * LONGBYTES) -#define HOST_CR (4 * LONGBYTES) -#define HOST_NV_GPRS (5 * LONGBYTES) +#define HOST_R2 (HOST_RUN + LONGBYTES) +#define HOST_CR (HOST_R2 + LONGBYTES) +#define HOST_NV_GPRS (HOST_CR + LONGBYTES) #define __HOST_NV_GPR(n) (HOST_NV_GPRS + ((n - 14) * LONGBYTES)) #define HOST_NV_GPR(n) __HOST_NV_GPR(__REG_##n) #define HOST_MIN_STACK_SIZE (HOST_NV_GPR(R31) + LONGBYTES) #define HOST_STACK_SIZE ((HOST_MIN_STACK_SIZE + 15) & ~15) /* Align. */ -#define HOST_STACK_LR (HOST_STACK_SIZE + LONGBYTES) /* In caller stack frame. */ +/* LR in caller stack frame. */ +#define HOST_STACK_LR (HOST_STACK_SIZE + PPC_LR_STKOFF) #define NEED_EMU 0x00000001 /* emulation -- save nv regs */ #define NEED_DEAR 0x00000002 /* save faulting DEAR */ @@ -198,6 +204,122 @@ b kvmppc_resume_host .endm +#ifdef CONFIG_64BIT +/* Exception types */ +#define EX_GEN 1 +#define EX_GDBELL 2 +#define EX_DBG 3 +#define EX_MC 4 +#define EX_CRIT 5 +#define EX_TLB 6 + +/* + * For input register values, see arch/powerpc/include/asm/kvm_booke_hv_asm.h + */ +.macro kvm_handler intno type scratch, paca_ex, ex_r10, ex_r11, srr0, srr1, flags + _GLOBAL(kvmppc_handler_\intno\()_\srr1) + mr r11, r4 + /* + * Get vcpu from Paca: paca->__current.thread->kvm_vcpu + */ + PPC_LL r4, PACACURRENT(r13) + PPC_LL r4, (THREAD + THREAD_KVM_VCPU)(r4) + stw r10, VCPU_CR(r4) + PPC_STL r11, VCPU_GPR(R4)(r4) + PPC_STL r5, VCPU_GPR(R5)(r4) + .if \type == EX_CRIT + PPC_LL r5, (\paca_ex + EX_R13)(r13) + .else + mfspr r5, \scratch + .endif + PPC_STL r6, VCPU_GPR(R6)(r4) + PPC_STL r8, VCPU_GPR(R8)(r4) + PPC_STL r9, VCPU_GPR(R9)(r4) + PPC_STL r5, VCPU_GPR(R13)(r4) + PPC_LL r6, (\paca_ex + \ex_r10)(r13) + PPC_LL r8, (\paca_ex + \ex_r11)(r13) + PPC_STL r3, VCPU_GPR(R3)(r4) + PPC_STL r7, VCPU_GPR(R7)(r4) + PPC_STL r12, VCPU_GPR(R12)(r4) + PPC_STL r6, VCPU_GPR(R10)(r4) + PPC_STL r8, VCPU_GPR(R11)(r4) + mfctr r5 + PPC_STL r5, VCPU_CTR(r4) + mfspr r5, \srr0 + mfspr r6, \srr1 + kvm_handler_common \intno, \srr0, \flags +.endm + +#define EX_PARAMS(type) \ + EX_##type, \ + SPRN_SPRG_##type##_SCRATCH, \ + PACA_EX##type, \ + EX_R10, \ + EX_R11 + +#define EX_PARAMS_TLB \ + EX_TLB, \ + SPRN_SPRG_GEN_SCRATCH, \ + PACA_EXTLB, \ + EX_TLB_R10, \ + EX_TLB_R11 + +kvm_handler BOOKE_INTERRUPT_CRITICAL, EX_PARAMS(CRIT), \ + SPRN_CSRR0, SPRN_CSRR1, 0 +kvm_handler BOOKE_INTERRUPT_MACHINE_CHECK, EX_PARAMS(MC), \ + SPRN_MCSRR0, SPRN_MCSRR1, 0 +kvm_handler BOOKE_INTERRUPT_DATA_STORAGE, EX_PARAMS(GEN), \ + SPRN_SRR0, SPRN_SRR1,(NEED_EMU | NEED_DEAR | NEED_ESR) +kvm_handler BOOKE_INTERRUPT_INST_STORAGE, EX_PARAMS(GEN), \ + SPRN_SRR0, SPRN_SRR1, NEED_ESR +kvm_handler BOOKE_INTERRUPT_EXTERNAL, EX_PARAMS(GEN), \ + SPRN_SRR0, SPRN_SRR1, 0 +kvm_handler BOOKE_INTERRUPT_ALIGNMENT, EX_PARAMS(GEN), \ + SPRN_SRR0, SPRN_SRR1,(NEED_DEAR | NEED_ESR) +kvm_handler BOOKE_INTERRUPT_PROGRAM, EX_PARAMS(GEN), \ + SPRN_SRR0, SPRN_SRR1,NEED_ESR +kvm_handler BOOKE_INTERRUPT_FP_UNAVAIL, EX_PARAMS(GEN), \ + SPRN_SRR0, SPRN_SRR1, 0 +kvm_handler BOOKE_INTERRUPT_AP_UNAVAIL, EX_PARAMS(GEN), \ + SPRN_SRR0, SPRN_SRR1, 0 +kvm_handler BOOKE_INTERRUPT_DECREMENTER, EX_PARAMS(GEN), \ + SPRN_SRR0, SPRN_SRR1, 0 +kvm_handler BOOKE_INTERRUPT_FIT, EX_PARAMS(GEN), \ + SPRN_SRR0, SPRN_SRR1, 0 +kvm_handler BOOKE_INTERRUPT_WATCHDOG, EX_PARAMS(CRIT),\ + SPRN_CSRR0, SPRN_CSRR1, 0 +/* + * Only bolted TLB miss exception handlers are supported for now + */ +kvm_handler BOOKE_INTERRUPT_DTLB_MISS, EX_PARAMS_TLB, \ + SPRN_SRR0, SPRN_SRR1, (NEED_EMU | NEED_DEAR | NEED_ESR) +kvm_handler BOOKE_INTERRUPT_ITLB_MISS, EX_PARAMS_TLB, \ + SPRN_SRR0, SPRN_SRR1, 0 +kvm_handler BOOKE_INTERRUPT_SPE_UNAVAIL, EX_PARAMS(GEN), \ + SPRN_SRR0, SPRN_SRR1, 0 +kvm_handler BOOKE_INTERRUPT_SPE_FP_DATA, EX_PARAMS(GEN), \ + SPRN_SRR0, SPRN_SRR1, 0 +kvm_handler BOOKE_INTERRUPT_SPE_FP_ROUND, EX_PARAMS(GEN), \ + SPRN_SRR0, SPRN_SRR1, 0 +kvm_handler BOOKE_INTERRUPT_PERFORMANCE_MONITOR, EX_PARAMS(GEN), \ + SPRN_SRR0, SPRN_SRR1, 0 +kvm_handler BOOKE_INTERRUPT_DOORBELL, EX_PARAMS(GEN), \ + SPRN_SRR0, SPRN_SRR1, 0 +kvm_handler BOOKE_INTERRUPT_DOORBELL_CRITICAL, EX_PARAMS(CRIT), \ + SPRN_CSRR0, SPRN_CSRR1, 0 +kvm_handler BOOKE_INTERRUPT_HV_PRIV, EX_PARAMS(GEN), \ + SPRN_SRR0, SPRN_SRR1, NEED_EMU +kvm_handler BOOKE_INTERRUPT_HV_SYSCALL, EX_PARAMS(GEN), \ + SPRN_SRR0, SPRN_SRR1, 0 +kvm_handler BOOKE_INTERRUPT_GUEST_DBELL, EX_PARAMS(GDBELL), \ + SPRN_GSRR0, SPRN_GSRR1, 0 +kvm_handler BOOKE_INTERRUPT_GUEST_DBELL_CRIT, EX_PARAMS(CRIT), \ + SPRN_CSRR0, SPRN_CSRR1, 0 +kvm_handler BOOKE_INTERRUPT_DEBUG, EX_PARAMS(DBG), \ + SPRN_DSRR0, SPRN_DSRR1, 0 +kvm_handler BOOKE_INTERRUPT_DEBUG, EX_PARAMS(CRIT), \ + SPRN_CSRR0, SPRN_CSRR1, 0 +#else /* * For input register values, see arch/powerpc/include/asm/kvm_booke_hv_asm.h */ @@ -292,7 +414,7 @@ kvm_lvl_handler BOOKE_INTERRUPT_DEBUG, \ SPRN_SPRG_RSCRATCH_CRIT, SPRN_CSRR0, SPRN_CSRR1, 0 kvm_lvl_handler BOOKE_INTERRUPT_DEBUG, \ SPRN_SPRG_RSCRATCH_DBG, SPRN_DSRR0, SPRN_DSRR1, 0 - +#endif /* Registers: * SPRG_SCRATCH0: guest r10 -- cgit v1.2.3-59-g8ed1b From 7cdd7a95c66a6309ae6156471033fb5375cbcfca Mon Sep 17 00:00:00 2001 From: Mihai Caraman Date: Thu, 11 Oct 2012 06:13:22 +0000 Subject: KVM: PPC: e500: Add emulation helper for getting instruction ea Add emulation helper for getting instruction ea and refactor tlb instruction emulation to use it. Signed-off-by: Mihai Caraman [agraf: keep rt variable around] Signed-off-by: Alexander Graf --- arch/powerpc/include/asm/kvm_ppc.h | 11 +++++++++++ arch/powerpc/kvm/e500.h | 6 +++--- arch/powerpc/kvm/e500_emulate.c | 14 ++++++++++---- arch/powerpc/kvm/e500_tlb.c | 33 +++++++++++---------------------- 4 files changed, 35 insertions(+), 29 deletions(-) (limited to 'arch/powerpc') diff --git a/arch/powerpc/include/asm/kvm_ppc.h b/arch/powerpc/include/asm/kvm_ppc.h index 1ca31e92ee75..d55a2b28706e 100644 --- a/arch/powerpc/include/asm/kvm_ppc.h +++ b/arch/powerpc/include/asm/kvm_ppc.h @@ -295,4 +295,15 @@ static inline void kvmppc_lazy_ee_enable(void) #endif } +static inline ulong kvmppc_get_ea_indexed(struct kvm_vcpu *vcpu, int ra, int rb) +{ + ulong ea; + + ea = kvmppc_get_gpr(vcpu, rb); + if (ra) + ea += kvmppc_get_gpr(vcpu, ra); + + return ea; +} + #endif /* __POWERPC_KVM_PPC_H__ */ diff --git a/arch/powerpc/kvm/e500.h b/arch/powerpc/kvm/e500.h index d1622864549e..32e98a72b0ac 100644 --- a/arch/powerpc/kvm/e500.h +++ b/arch/powerpc/kvm/e500.h @@ -129,9 +129,9 @@ int kvmppc_e500_emul_mt_mmucsr0(struct kvmppc_vcpu_e500 *vcpu_e500, ulong value); int kvmppc_e500_emul_tlbwe(struct kvm_vcpu *vcpu); int kvmppc_e500_emul_tlbre(struct kvm_vcpu *vcpu); -int kvmppc_e500_emul_tlbivax(struct kvm_vcpu *vcpu, int ra, int rb); -int kvmppc_e500_emul_tlbilx(struct kvm_vcpu *vcpu, int rt, int ra, int rb); -int kvmppc_e500_emul_tlbsx(struct kvm_vcpu *vcpu, int rb); +int kvmppc_e500_emul_tlbivax(struct kvm_vcpu *vcpu, gva_t ea); +int kvmppc_e500_emul_tlbilx(struct kvm_vcpu *vcpu, int type, gva_t ea); +int kvmppc_e500_emul_tlbsx(struct kvm_vcpu *vcpu, gva_t ea); int kvmppc_e500_tlb_init(struct kvmppc_vcpu_e500 *vcpu_e500); void kvmppc_e500_tlb_uninit(struct kvmppc_vcpu_e500 *vcpu_e500); diff --git a/arch/powerpc/kvm/e500_emulate.c b/arch/powerpc/kvm/e500_emulate.c index e04b0ef55ce0..e78f353a836a 100644 --- a/arch/powerpc/kvm/e500_emulate.c +++ b/arch/powerpc/kvm/e500_emulate.c @@ -89,6 +89,7 @@ int kvmppc_core_emulate_op(struct kvm_run *run, struct kvm_vcpu *vcpu, int ra = get_ra(inst); int rb = get_rb(inst); int rt = get_rt(inst); + gva_t ea; switch (get_op(inst)) { case 31: @@ -113,15 +114,20 @@ int kvmppc_core_emulate_op(struct kvm_run *run, struct kvm_vcpu *vcpu, break; case XOP_TLBSX: - emulated = kvmppc_e500_emul_tlbsx(vcpu,rb); + ea = kvmppc_get_ea_indexed(vcpu, ra, rb); + emulated = kvmppc_e500_emul_tlbsx(vcpu, ea); break; - case XOP_TLBILX: - emulated = kvmppc_e500_emul_tlbilx(vcpu, rt, ra, rb); + case XOP_TLBILX: { + int type = rt & 0x3; + ea = kvmppc_get_ea_indexed(vcpu, ra, rb); + emulated = kvmppc_e500_emul_tlbilx(vcpu, type, ea); break; + } case XOP_TLBIVAX: - emulated = kvmppc_e500_emul_tlbivax(vcpu, ra, rb); + ea = kvmppc_get_ea_indexed(vcpu, ra, rb); + emulated = kvmppc_e500_emul_tlbivax(vcpu, ea); break; default: diff --git a/arch/powerpc/kvm/e500_tlb.c b/arch/powerpc/kvm/e500_tlb.c index 5532bfb15464..7a1472163120 100644 --- a/arch/powerpc/kvm/e500_tlb.c +++ b/arch/powerpc/kvm/e500_tlb.c @@ -689,14 +689,11 @@ int kvmppc_e500_emul_mt_mmucsr0(struct kvmppc_vcpu_e500 *vcpu_e500, ulong value) return EMULATE_DONE; } -int kvmppc_e500_emul_tlbivax(struct kvm_vcpu *vcpu, int ra, int rb) +int kvmppc_e500_emul_tlbivax(struct kvm_vcpu *vcpu, gva_t ea) { struct kvmppc_vcpu_e500 *vcpu_e500 = to_e500(vcpu); unsigned int ia; int esel, tlbsel; - gva_t ea; - - ea = ((ra) ? kvmppc_get_gpr(vcpu, ra) : 0) + kvmppc_get_gpr(vcpu, rb); ia = (ea >> 2) & 0x1; @@ -723,7 +720,7 @@ int kvmppc_e500_emul_tlbivax(struct kvm_vcpu *vcpu, int ra, int rb) } static void tlbilx_all(struct kvmppc_vcpu_e500 *vcpu_e500, int tlbsel, - int pid, int rt) + int pid, int type) { struct kvm_book3e_206_tlb_entry *tlbe; int tid, esel; @@ -732,7 +729,7 @@ static void tlbilx_all(struct kvmppc_vcpu_e500 *vcpu_e500, int tlbsel, for (esel = 0; esel < vcpu_e500->gtlb_params[tlbsel].entries; esel++) { tlbe = get_entry(vcpu_e500, tlbsel, esel); tid = get_tlb_tid(tlbe); - if (rt == 0 || tid == pid) { + if (type == 0 || tid == pid) { inval_gtlbe_on_host(vcpu_e500, tlbsel, esel); kvmppc_e500_gtlbe_invalidate(vcpu_e500, tlbsel, esel); } @@ -740,14 +737,9 @@ static void tlbilx_all(struct kvmppc_vcpu_e500 *vcpu_e500, int tlbsel, } static void tlbilx_one(struct kvmppc_vcpu_e500 *vcpu_e500, int pid, - int ra, int rb) + gva_t ea) { int tlbsel, esel; - gva_t ea; - - ea = kvmppc_get_gpr(&vcpu_e500->vcpu, rb); - if (ra) - ea += kvmppc_get_gpr(&vcpu_e500->vcpu, ra); for (tlbsel = 0; tlbsel < 2; tlbsel++) { esel = kvmppc_e500_tlb_index(vcpu_e500, ea, tlbsel, pid, -1); @@ -759,16 +751,16 @@ static void tlbilx_one(struct kvmppc_vcpu_e500 *vcpu_e500, int pid, } } -int kvmppc_e500_emul_tlbilx(struct kvm_vcpu *vcpu, int rt, int ra, int rb) +int kvmppc_e500_emul_tlbilx(struct kvm_vcpu *vcpu, int type, gva_t ea) { struct kvmppc_vcpu_e500 *vcpu_e500 = to_e500(vcpu); int pid = get_cur_spid(vcpu); - if (rt == 0 || rt == 1) { - tlbilx_all(vcpu_e500, 0, pid, rt); - tlbilx_all(vcpu_e500, 1, pid, rt); - } else if (rt == 3) { - tlbilx_one(vcpu_e500, pid, ra, rb); + if (type == 0 || type == 1) { + tlbilx_all(vcpu_e500, 0, pid, type); + tlbilx_all(vcpu_e500, 1, pid, type); + } else if (type == 3) { + tlbilx_one(vcpu_e500, pid, ea); } return EMULATE_DONE; @@ -793,16 +785,13 @@ int kvmppc_e500_emul_tlbre(struct kvm_vcpu *vcpu) return EMULATE_DONE; } -int kvmppc_e500_emul_tlbsx(struct kvm_vcpu *vcpu, int rb) +int kvmppc_e500_emul_tlbsx(struct kvm_vcpu *vcpu, gva_t ea) { struct kvmppc_vcpu_e500 *vcpu_e500 = to_e500(vcpu); int as = !!get_cur_sas(vcpu); unsigned int pid = get_cur_spid(vcpu); int esel, tlbsel; struct kvm_book3e_206_tlb_entry *gtlbe = NULL; - gva_t ea; - - ea = kvmppc_get_gpr(vcpu, rb); for (tlbsel = 0; tlbsel < 2; tlbsel++) { esel = kvmppc_e500_tlb_index(vcpu_e500, ea, tlbsel, pid, as); -- cgit v1.2.3-59-g8ed1b From 8823a8fd0d730612f12a87102503622c01eb2468 Mon Sep 17 00:00:00 2001 From: Mihai Caraman Date: Thu, 11 Oct 2012 06:13:23 +0000 Subject: KVM: PPC: Mask ea's high 32-bits in 32/64 instr emulation Mask high 32 bits of effective address in emulation layer for guests running in 32-bit mode. Signed-off-by: Mihai Caraman [agraf: fix indent] Signed-off-by: Alexander Graf --- arch/powerpc/include/asm/kvm_ppc.h | 10 ++++++++++ 1 file changed, 10 insertions(+) (limited to 'arch/powerpc') diff --git a/arch/powerpc/include/asm/kvm_ppc.h b/arch/powerpc/include/asm/kvm_ppc.h index d55a2b28706e..572aa7530619 100644 --- a/arch/powerpc/include/asm/kvm_ppc.h +++ b/arch/powerpc/include/asm/kvm_ppc.h @@ -298,11 +298,21 @@ static inline void kvmppc_lazy_ee_enable(void) static inline ulong kvmppc_get_ea_indexed(struct kvm_vcpu *vcpu, int ra, int rb) { ulong ea; + ulong msr_64bit = 0; ea = kvmppc_get_gpr(vcpu, rb); if (ra) ea += kvmppc_get_gpr(vcpu, ra); +#if defined(CONFIG_PPC_BOOK3E_64) + msr_64bit = MSR_CM; +#elif defined(CONFIG_PPC_BOOK3S_64) + msr_64bit = MSR_SF; +#endif + + if (!(vcpu->arch.shared->msr & msr_64bit)) + ea = (uint32_t)ea; + return ea; } -- cgit v1.2.3-59-g8ed1b From 9e2fa646936160eca525bcb80c2cce05faa9b208 Mon Sep 17 00:00:00 2001 From: Mihai Caraman Date: Thu, 11 Oct 2012 06:13:24 +0000 Subject: KVM: PPC: e500: Mask MAS2 EPN high 32-bits in 32/64 tlbwe emulation Mask high 32 bits of MAS2's effective page number in tlbwe emulation for guests running in 32-bit mode. Signed-off-by: Mihai Caraman Signed-off-by: Alexander Graf --- arch/powerpc/kvm/e500_tlb.c | 2 ++ 1 file changed, 2 insertions(+) (limited to 'arch/powerpc') diff --git a/arch/powerpc/kvm/e500_tlb.c b/arch/powerpc/kvm/e500_tlb.c index 7a1472163120..cf3f18012371 100644 --- a/arch/powerpc/kvm/e500_tlb.c +++ b/arch/powerpc/kvm/e500_tlb.c @@ -871,6 +871,8 @@ int kvmppc_e500_emul_tlbwe(struct kvm_vcpu *vcpu) gtlbe->mas1 = vcpu->arch.shared->mas1; gtlbe->mas2 = vcpu->arch.shared->mas2; + if (!(vcpu->arch.shared->msr & MSR_CM)) + gtlbe->mas2 &= 0xffffffffUL; gtlbe->mas7_3 = vcpu->arch.shared->mas7_3; trace_kvm_booke206_gtlb_write(vcpu->arch.shared->mas0, gtlbe->mas1, -- cgit v1.2.3-59-g8ed1b From e9666ea1b3d11509b76f8ff5b9776d8d30709b19 Mon Sep 17 00:00:00 2001 From: Mihai Caraman Date: Thu, 11 Oct 2012 06:13:25 +0000 Subject: KVM: PPC: booke: Extend MAS2 EPN mask for 64-bit Extend MAS2 EPN mask to retain most significant bits on 64-bit hosts. Use this mask in tlb effective address accessor. Signed-off-by: Mihai Caraman Signed-off-by: Alexander Graf --- arch/powerpc/include/asm/mmu-book3e.h | 2 +- arch/powerpc/kvm/e500.h | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) (limited to 'arch/powerpc') diff --git a/arch/powerpc/include/asm/mmu-book3e.h b/arch/powerpc/include/asm/mmu-book3e.h index eeabcdbc30f7..99d43e0c1e4a 100644 --- a/arch/powerpc/include/asm/mmu-book3e.h +++ b/arch/powerpc/include/asm/mmu-book3e.h @@ -59,7 +59,7 @@ #define MAS1_TSIZE_SHIFT 7 #define MAS1_TSIZE(x) (((x) << MAS1_TSIZE_SHIFT) & MAS1_TSIZE_MASK) -#define MAS2_EPN 0xFFFFF000 +#define MAS2_EPN (~0xFFFUL) #define MAS2_X0 0x00000040 #define MAS2_X1 0x00000020 #define MAS2_W 0x00000010 diff --git a/arch/powerpc/kvm/e500.h b/arch/powerpc/kvm/e500.h index 32e98a72b0ac..c70d37ed770a 100644 --- a/arch/powerpc/kvm/e500.h +++ b/arch/powerpc/kvm/e500.h @@ -154,7 +154,7 @@ get_tlb_size(const struct kvm_book3e_206_tlb_entry *tlbe) static inline gva_t get_tlb_eaddr(const struct kvm_book3e_206_tlb_entry *tlbe) { - return tlbe->mas2 & 0xfffff000; + return tlbe->mas2 & MAS2_EPN; } static inline u64 get_tlb_bytes(const struct kvm_book3e_206_tlb_entry *tlbe) -- cgit v1.2.3-59-g8ed1b From 62b4db0042aa753810e0d4f184481cc107c925ba Mon Sep 17 00:00:00 2001 From: Alexander Graf Date: Sat, 1 Dec 2012 14:50:26 +0100 Subject: KVM: PPC: Make EPCR a valid field for booke64 and bookehv In BookE, EPCR is defined and valid when either the HV or the 64bit category are implemented. Reflect this in the field definition. Today the only KVM target on 64bit is HV enabled, so there is no change in actual source code, but this keeps the code closer to the spec and doesn't build up artificial road blocks for a PR KVM on 64bit. Signed-off-by: Alexander Graf --- arch/powerpc/include/asm/kvm_host.h | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) (limited to 'arch/powerpc') diff --git a/arch/powerpc/include/asm/kvm_host.h b/arch/powerpc/include/asm/kvm_host.h index 62fbd38b15fa..ca9bf459db6a 100644 --- a/arch/powerpc/include/asm/kvm_host.h +++ b/arch/powerpc/include/asm/kvm_host.h @@ -406,13 +406,18 @@ struct kvm_vcpu_arch { u32 host_mas4; u32 host_mas6; u32 shadow_epcr; - u32 epcr; u32 shadow_msrp; u32 eplc; u32 epsc; u32 oldpir; #endif +#if defined(CONFIG_BOOKE) +#if defined(CONFIG_KVM_BOOKE_HV) || defined(CONFIG_64BIT) + u32 epcr; +#endif +#endif + #ifdef CONFIG_PPC_BOOK3S /* For Gekko paired singles */ u32 qpr[32]; -- cgit v1.2.3-59-g8ed1b From 95e90b43c9c648bde607101e5a158941eec8e514 Mon Sep 17 00:00:00 2001 From: Mihai Caraman Date: Thu, 11 Oct 2012 06:13:26 +0000 Subject: KVM: PPC: bookehv: Add guest computation mode for irq delivery When delivering guest IRQs, update MSR computation mode according to guest interrupt computation mode found in EPCR. Signed-off-by: Mihai Caraman [agraf: remove HV dependency in the code] Signed-off-by: Alexander Graf --- arch/powerpc/kvm/booke.c | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) (limited to 'arch/powerpc') diff --git a/arch/powerpc/kvm/booke.c b/arch/powerpc/kvm/booke.c index 7c9c3891a14a..9457fb1b41c9 100644 --- a/arch/powerpc/kvm/booke.c +++ b/arch/powerpc/kvm/booke.c @@ -312,6 +312,7 @@ static int kvmppc_booke_irqprio_deliver(struct kvm_vcpu *vcpu, bool crit; bool keep_irq = false; enum int_class int_class; + ulong new_msr = vcpu->arch.shared->msr; /* Truncate crit indicators in 32 bit mode */ if (!(vcpu->arch.shared->msr & MSR_SF)) { @@ -407,7 +408,13 @@ static int kvmppc_booke_irqprio_deliver(struct kvm_vcpu *vcpu, set_guest_esr(vcpu, vcpu->arch.queued_esr); if (update_dear == true) set_guest_dear(vcpu, vcpu->arch.queued_dear); - kvmppc_set_msr(vcpu, vcpu->arch.shared->msr & msr_mask); + + new_msr &= msr_mask; +#if defined(CONFIG_64BIT) + if (vcpu->arch.epcr & SPRN_EPCR_ICM) + new_msr |= MSR_CM; +#endif + kvmppc_set_msr(vcpu, new_msr); if (!keep_irq) clear_bit(priority, &vcpu->arch.pending_exceptions); -- cgit v1.2.3-59-g8ed1b From 38f988240c611f9d2595feb1b8ddcb80b0e97dec Mon Sep 17 00:00:00 2001 From: Mihai Caraman Date: Thu, 11 Oct 2012 06:13:27 +0000 Subject: KVM: PPC: bookehv: Add EPCR support in mtspr/mfspr emulation Add EPCR support in booke mtspr/mfspr emulation. EPCR register is defined only for 64-bit and HV categories, we will expose it at this point only to 64-bit virtual processors running on 64-bit HV hosts. Define a reusable setter function for vcpu's EPCR. Signed-off-by: Mihai Caraman [agraf: move HV dependency in the code] Signed-off-by: Alexander Graf --- arch/powerpc/kvm/booke.c | 12 ++++++++++++ arch/powerpc/kvm/booke.h | 1 + arch/powerpc/kvm/booke_emulate.c | 14 +++++++++++++- 3 files changed, 26 insertions(+), 1 deletion(-) (limited to 'arch/powerpc') diff --git a/arch/powerpc/kvm/booke.c b/arch/powerpc/kvm/booke.c index 9457fb1b41c9..037d045db3f1 100644 --- a/arch/powerpc/kvm/booke.c +++ b/arch/powerpc/kvm/booke.c @@ -1473,6 +1473,18 @@ void kvmppc_core_flush_memslot(struct kvm *kvm, struct kvm_memory_slot *memslot) { } +void kvmppc_set_epcr(struct kvm_vcpu *vcpu, u32 new_epcr) +{ +#if defined(CONFIG_64BIT) + vcpu->arch.epcr = new_epcr; +#ifdef CONFIG_KVM_BOOKE_HV + vcpu->arch.shadow_epcr &= ~SPRN_EPCR_GICM; + if (vcpu->arch.epcr & SPRN_EPCR_ICM) + vcpu->arch.shadow_epcr |= SPRN_EPCR_GICM; +#endif +#endif +} + void kvmppc_set_tcr(struct kvm_vcpu *vcpu, u32 new_tcr) { vcpu->arch.tcr = new_tcr; diff --git a/arch/powerpc/kvm/booke.h b/arch/powerpc/kvm/booke.h index ba61974c1e20..e9b88e433f64 100644 --- a/arch/powerpc/kvm/booke.h +++ b/arch/powerpc/kvm/booke.h @@ -69,6 +69,7 @@ extern unsigned long kvmppc_booke_handlers; void kvmppc_set_msr(struct kvm_vcpu *vcpu, u32 new_msr); void kvmppc_mmu_msr_notify(struct kvm_vcpu *vcpu, u32 old_msr); +void kvmppc_set_epcr(struct kvm_vcpu *vcpu, u32 new_epcr); void kvmppc_set_tcr(struct kvm_vcpu *vcpu, u32 new_tcr); void kvmppc_set_tsr_bits(struct kvm_vcpu *vcpu, u32 tsr_bits); void kvmppc_clr_tsr_bits(struct kvm_vcpu *vcpu, u32 tsr_bits); diff --git a/arch/powerpc/kvm/booke_emulate.c b/arch/powerpc/kvm/booke_emulate.c index 514790f41aba..4685b8cf2249 100644 --- a/arch/powerpc/kvm/booke_emulate.c +++ b/arch/powerpc/kvm/booke_emulate.c @@ -240,7 +240,14 @@ int kvmppc_booke_emulate_mtspr(struct kvm_vcpu *vcpu, int sprn, ulong spr_val) case SPRN_MCSR: vcpu->arch.mcsr &= ~spr_val; break; - +#if defined(CONFIG_64BIT) + case SPRN_EPCR: + kvmppc_set_epcr(vcpu, spr_val); +#ifdef CONFIG_KVM_BOOKE_HV + mtspr(SPRN_EPCR, vcpu->arch.shadow_epcr); +#endif + break; +#endif default: emulated = EMULATE_FAIL; } @@ -335,6 +342,11 @@ int kvmppc_booke_emulate_mfspr(struct kvm_vcpu *vcpu, int sprn, ulong *spr_val) case SPRN_MCSR: *spr_val = vcpu->arch.mcsr; break; +#if defined(CONFIG_64BIT) + case SPRN_EPCR: + *spr_val = vcpu->arch.epcr; + break; +#endif default: emulated = EMULATE_FAIL; -- cgit v1.2.3-59-g8ed1b From 352df1deb2e3c40e65ff94c8d7c62d9144446b1c Mon Sep 17 00:00:00 2001 From: Mihai Caraman Date: Thu, 11 Oct 2012 06:13:29 +0000 Subject: KVM: PPC: booke: Get/set guest EPCR register using ONE_REG interface Implement ONE_REG interface for EPCR register adding KVM_REG_PPC_EPCR to the list of ONE_REG PPC supported registers. Signed-off-by: Mihai Caraman [agraf: remove HV dependency, use get/put_user] Signed-off-by: Alexander Graf --- Documentation/virtual/kvm/api.txt | 1 + arch/powerpc/include/uapi/asm/kvm.h | 2 ++ arch/powerpc/kvm/booke.c | 14 ++++++++++++++ 3 files changed, 17 insertions(+) (limited to 'arch/powerpc') diff --git a/Documentation/virtual/kvm/api.txt b/Documentation/virtual/kvm/api.txt index a5607c571cb3..a4df5535996b 100644 --- a/Documentation/virtual/kvm/api.txt +++ b/Documentation/virtual/kvm/api.txt @@ -1773,6 +1773,7 @@ registers, find a list below: PPC | KVM_REG_PPC_VPA_ADDR | 64 PPC | KVM_REG_PPC_VPA_SLB | 128 PPC | KVM_REG_PPC_VPA_DTL | 128 + PPC | KVM_REG_PPC_EPCR | 32 4.69 KVM_GET_ONE_REG diff --git a/arch/powerpc/include/uapi/asm/kvm.h b/arch/powerpc/include/uapi/asm/kvm.h index 514883dd311e..2fba8a66fb10 100644 --- a/arch/powerpc/include/uapi/asm/kvm.h +++ b/arch/powerpc/include/uapi/asm/kvm.h @@ -411,4 +411,6 @@ struct kvm_get_htab_header { #define KVM_REG_PPC_VPA_SLB (KVM_REG_PPC | KVM_REG_SIZE_U128 | 0x83) #define KVM_REG_PPC_VPA_DTL (KVM_REG_PPC | KVM_REG_SIZE_U128 | 0x84) +#define KVM_REG_PPC_EPCR (KVM_REG_PPC | KVM_REG_SIZE_U32 | 0x85) + #endif /* __LINUX_KVM_POWERPC_H */ diff --git a/arch/powerpc/kvm/booke.c b/arch/powerpc/kvm/booke.c index 037d045db3f1..69f114015780 100644 --- a/arch/powerpc/kvm/booke.c +++ b/arch/powerpc/kvm/booke.c @@ -1388,6 +1388,11 @@ int kvm_vcpu_ioctl_get_one_reg(struct kvm_vcpu *vcpu, struct kvm_one_reg *reg) &vcpu->arch.dbg_reg.dac[dac], sizeof(u64)); break; } +#if defined(CONFIG_64BIT) + case KVM_REG_PPC_EPCR: + r = put_user(vcpu->arch.epcr, (u32 __user *)(long)reg->addr); + break; +#endif default: break; } @@ -1415,6 +1420,15 @@ int kvm_vcpu_ioctl_set_one_reg(struct kvm_vcpu *vcpu, struct kvm_one_reg *reg) (u64 __user *)(long)reg->addr, sizeof(u64)); break; } +#if defined(CONFIG_64BIT) + case KVM_REG_PPC_EPCR: { + u32 new_epcr; + r = get_user(new_epcr, (u32 __user *)(long)reg->addr); + if (r == 0) + kvmppc_set_epcr(vcpu, new_epcr); + break; + } +#endif default: break; } -- cgit v1.2.3-59-g8ed1b