diff options
Diffstat (limited to 'arch/riscv/kvm')
-rw-r--r-- | arch/riscv/kvm/Kconfig | 3 | ||||
-rw-r--r-- | arch/riscv/kvm/Makefile | 30 | ||||
-rw-r--r-- | arch/riscv/kvm/aia.c | 154 | ||||
-rw-r--r-- | arch/riscv/kvm/aia_aplic.c | 40 | ||||
-rw-r--r-- | arch/riscv/kvm/aia_device.c | 47 | ||||
-rw-r--r-- | arch/riscv/kvm/aia_imsic.c | 11 | ||||
-rw-r--r-- | arch/riscv/kvm/main.c | 87 | ||||
-rw-r--r-- | arch/riscv/kvm/mmu.c | 33 | ||||
-rw-r--r-- | arch/riscv/kvm/nacl.c | 152 | ||||
-rw-r--r-- | arch/riscv/kvm/tlb.c | 57 | ||||
-rw-r--r-- | arch/riscv/kvm/trace.h | 67 | ||||
-rw-r--r-- | arch/riscv/kvm/vcpu.c | 345 | ||||
-rw-r--r-- | arch/riscv/kvm/vcpu_exit.c | 43 | ||||
-rw-r--r-- | arch/riscv/kvm/vcpu_insn.c | 15 | ||||
-rw-r--r-- | arch/riscv/kvm/vcpu_onereg.c | 55 | ||||
-rw-r--r-- | arch/riscv/kvm/vcpu_pmu.c | 253 | ||||
-rw-r--r-- | arch/riscv/kvm/vcpu_sbi.c | 54 | ||||
-rw-r--r-- | arch/riscv/kvm/vcpu_sbi_hsm.c | 58 | ||||
-rw-r--r-- | arch/riscv/kvm/vcpu_sbi_pmu.c | 17 | ||||
-rw-r--r-- | arch/riscv/kvm/vcpu_sbi_replace.c | 23 | ||||
-rw-r--r-- | arch/riscv/kvm/vcpu_sbi_sta.c | 4 | ||||
-rw-r--r-- | arch/riscv/kvm/vcpu_sbi_system.c | 66 | ||||
-rw-r--r-- | arch/riscv/kvm/vcpu_switch.S | 137 | ||||
-rw-r--r-- | arch/riscv/kvm/vcpu_timer.c | 35 | ||||
-rw-r--r-- | arch/riscv/kvm/vcpu_vector.c | 13 | ||||
-rw-r--r-- | arch/riscv/kvm/vm.c | 14 |
26 files changed, 1401 insertions, 412 deletions
diff --git a/arch/riscv/kvm/Kconfig b/arch/riscv/kvm/Kconfig index 26d1727f0550..704c2899197e 100644 --- a/arch/riscv/kvm/Kconfig +++ b/arch/riscv/kvm/Kconfig @@ -18,7 +18,7 @@ menuconfig VIRTUALIZATION if VIRTUALIZATION config KVM - tristate "Kernel-based Virtual Machine (KVM) support (EXPERIMENTAL)" + tristate "Kernel-based Virtual Machine (KVM) support" depends on RISCV_SBI && MMU select HAVE_KVM_IRQCHIP select HAVE_KVM_IRQ_ROUTING @@ -32,6 +32,7 @@ config KVM select KVM_XFER_TO_GUEST_WORK select KVM_GENERIC_MMU_NOTIFIER select SCHED_INFO + select GUEST_PERF_EVENTS if PERF_EVENTS help Support hosting virtualized guest machines. diff --git a/arch/riscv/kvm/Makefile b/arch/riscv/kvm/Makefile index c9646521f113..4e0bba91d284 100644 --- a/arch/riscv/kvm/Makefile +++ b/arch/riscv/kvm/Makefile @@ -3,33 +3,37 @@ # Makefile for RISC-V KVM support # -ccflags-y += -I $(srctree)/$(src) +ccflags-y += -I $(src) include $(srctree)/virt/kvm/Makefile.kvm obj-$(CONFIG_KVM) += kvm.o +# Ordered alphabetically +kvm-y += aia.o +kvm-y += aia_aplic.o +kvm-y += aia_device.o +kvm-y += aia_imsic.o kvm-y += main.o -kvm-y += vm.o -kvm-y += vmid.o -kvm-y += tlb.o kvm-y += mmu.o +kvm-y += nacl.o +kvm-y += tlb.o kvm-y += vcpu.o kvm-y += vcpu_exit.o kvm-y += vcpu_fp.o -kvm-y += vcpu_vector.o kvm-y += vcpu_insn.o kvm-y += vcpu_onereg.o -kvm-y += vcpu_switch.o +kvm-$(CONFIG_RISCV_PMU_SBI) += vcpu_pmu.o kvm-y += vcpu_sbi.o -kvm-$(CONFIG_RISCV_SBI_V01) += vcpu_sbi_v01.o kvm-y += vcpu_sbi_base.o -kvm-y += vcpu_sbi_replace.o kvm-y += vcpu_sbi_hsm.o +kvm-$(CONFIG_RISCV_PMU_SBI) += vcpu_sbi_pmu.o +kvm-y += vcpu_sbi_replace.o kvm-y += vcpu_sbi_sta.o +kvm-y += vcpu_sbi_system.o +kvm-$(CONFIG_RISCV_SBI_V01) += vcpu_sbi_v01.o +kvm-y += vcpu_switch.o kvm-y += vcpu_timer.o -kvm-$(CONFIG_RISCV_PMU_SBI) += vcpu_pmu.o vcpu_sbi_pmu.o -kvm-y += aia.o -kvm-y += aia_device.o -kvm-y += aia_aplic.o -kvm-y += aia_imsic.o +kvm-y += vcpu_vector.o +kvm-y += vm.o +kvm-y += vmid.o diff --git a/arch/riscv/kvm/aia.c b/arch/riscv/kvm/aia.c index a944294f6f23..19afd1f23537 100644 --- a/arch/riscv/kvm/aia.c +++ b/arch/riscv/kvm/aia.c @@ -10,12 +10,13 @@ #include <linux/kernel.h> #include <linux/bitops.h> #include <linux/irq.h> +#include <linux/irqchip/riscv-imsic.h> #include <linux/irqdomain.h> #include <linux/kvm_host.h> #include <linux/percpu.h> #include <linux/spinlock.h> #include <asm/cpufeature.h> -#include <asm/kvm_aia_imsic.h> +#include <asm/kvm_nacl.h> struct aia_hgei_control { raw_spinlock_t lock; @@ -51,7 +52,7 @@ static int aia_find_hgei(struct kvm_vcpu *owner) return hgei; } -static void aia_set_hvictl(bool ext_irq_pending) +static inline unsigned long aia_hvictl_value(bool ext_irq_pending) { unsigned long hvictl; @@ -62,7 +63,7 @@ static void aia_set_hvictl(bool ext_irq_pending) hvictl = (IRQ_S_EXT << HVICTL_IID_SHIFT) & HVICTL_IID; hvictl |= ext_irq_pending; - csr_write(CSR_HVICTL, hvictl); + return hvictl; } #ifdef CONFIG_32BIT @@ -88,7 +89,7 @@ void kvm_riscv_vcpu_aia_sync_interrupts(struct kvm_vcpu *vcpu) struct kvm_vcpu_aia_csr *csr = &vcpu->arch.aia_context.guest_csr; if (kvm_riscv_aia_available()) - csr->vsieh = csr_read(CSR_VSIEH); + csr->vsieh = ncsr_read(CSR_VSIEH); } #endif @@ -115,7 +116,7 @@ bool kvm_riscv_vcpu_aia_has_interrupts(struct kvm_vcpu *vcpu, u64 mask) hgei = aia_find_hgei(vcpu); if (hgei > 0) - return !!(csr_read(CSR_HGEIP) & BIT(hgei)); + return !!(ncsr_read(CSR_HGEIP) & BIT(hgei)); return false; } @@ -128,45 +129,73 @@ void kvm_riscv_vcpu_aia_update_hvip(struct kvm_vcpu *vcpu) return; #ifdef CONFIG_32BIT - csr_write(CSR_HVIPH, vcpu->arch.aia_context.guest_csr.hviph); + ncsr_write(CSR_HVIPH, vcpu->arch.aia_context.guest_csr.hviph); #endif - aia_set_hvictl(!!(csr->hvip & BIT(IRQ_VS_EXT))); + ncsr_write(CSR_HVICTL, aia_hvictl_value(!!(csr->hvip & BIT(IRQ_VS_EXT)))); } void kvm_riscv_vcpu_aia_load(struct kvm_vcpu *vcpu, int cpu) { struct kvm_vcpu_aia_csr *csr = &vcpu->arch.aia_context.guest_csr; + void *nsh; if (!kvm_riscv_aia_available()) return; - csr_write(CSR_VSISELECT, csr->vsiselect); - csr_write(CSR_HVIPRIO1, csr->hviprio1); - csr_write(CSR_HVIPRIO2, csr->hviprio2); + if (kvm_riscv_nacl_sync_csr_available()) { + nsh = nacl_shmem(); + nacl_csr_write(nsh, CSR_VSISELECT, csr->vsiselect); + nacl_csr_write(nsh, CSR_HVIPRIO1, csr->hviprio1); + nacl_csr_write(nsh, CSR_HVIPRIO2, csr->hviprio2); +#ifdef CONFIG_32BIT + nacl_csr_write(nsh, CSR_VSIEH, csr->vsieh); + nacl_csr_write(nsh, CSR_HVIPH, csr->hviph); + nacl_csr_write(nsh, CSR_HVIPRIO1H, csr->hviprio1h); + nacl_csr_write(nsh, CSR_HVIPRIO2H, csr->hviprio2h); +#endif + } else { + csr_write(CSR_VSISELECT, csr->vsiselect); + csr_write(CSR_HVIPRIO1, csr->hviprio1); + csr_write(CSR_HVIPRIO2, csr->hviprio2); #ifdef CONFIG_32BIT - csr_write(CSR_VSIEH, csr->vsieh); - csr_write(CSR_HVIPH, csr->hviph); - csr_write(CSR_HVIPRIO1H, csr->hviprio1h); - csr_write(CSR_HVIPRIO2H, csr->hviprio2h); + csr_write(CSR_VSIEH, csr->vsieh); + csr_write(CSR_HVIPH, csr->hviph); + csr_write(CSR_HVIPRIO1H, csr->hviprio1h); + csr_write(CSR_HVIPRIO2H, csr->hviprio2h); #endif + } } void kvm_riscv_vcpu_aia_put(struct kvm_vcpu *vcpu) { struct kvm_vcpu_aia_csr *csr = &vcpu->arch.aia_context.guest_csr; + void *nsh; if (!kvm_riscv_aia_available()) return; - csr->vsiselect = csr_read(CSR_VSISELECT); - csr->hviprio1 = csr_read(CSR_HVIPRIO1); - csr->hviprio2 = csr_read(CSR_HVIPRIO2); + if (kvm_riscv_nacl_available()) { + nsh = nacl_shmem(); + csr->vsiselect = nacl_csr_read(nsh, CSR_VSISELECT); + csr->hviprio1 = nacl_csr_read(nsh, CSR_HVIPRIO1); + csr->hviprio2 = nacl_csr_read(nsh, CSR_HVIPRIO2); #ifdef CONFIG_32BIT - csr->vsieh = csr_read(CSR_VSIEH); - csr->hviph = csr_read(CSR_HVIPH); - csr->hviprio1h = csr_read(CSR_HVIPRIO1H); - csr->hviprio2h = csr_read(CSR_HVIPRIO2H); + csr->vsieh = nacl_csr_read(nsh, CSR_VSIEH); + csr->hviph = nacl_csr_read(nsh, CSR_HVIPH); + csr->hviprio1h = nacl_csr_read(nsh, CSR_HVIPRIO1H); + csr->hviprio2h = nacl_csr_read(nsh, CSR_HVIPRIO2H); +#endif + } else { + csr->vsiselect = csr_read(CSR_VSISELECT); + csr->hviprio1 = csr_read(CSR_HVIPRIO1); + csr->hviprio2 = csr_read(CSR_HVIPRIO2); +#ifdef CONFIG_32BIT + csr->vsieh = csr_read(CSR_VSIEH); + csr->hviph = csr_read(CSR_HVIPH); + csr->hviprio1h = csr_read(CSR_HVIPRIO1H); + csr->hviprio2h = csr_read(CSR_HVIPRIO2H); #endif + } } int kvm_riscv_vcpu_aia_get_csr(struct kvm_vcpu *vcpu, @@ -250,20 +279,20 @@ static u8 aia_get_iprio8(struct kvm_vcpu *vcpu, unsigned int irq) switch (bitpos / BITS_PER_LONG) { case 0: - hviprio = csr_read(CSR_HVIPRIO1); + hviprio = ncsr_read(CSR_HVIPRIO1); break; case 1: #ifndef CONFIG_32BIT - hviprio = csr_read(CSR_HVIPRIO2); + hviprio = ncsr_read(CSR_HVIPRIO2); break; #else - hviprio = csr_read(CSR_HVIPRIO1H); + hviprio = ncsr_read(CSR_HVIPRIO1H); break; case 2: - hviprio = csr_read(CSR_HVIPRIO2); + hviprio = ncsr_read(CSR_HVIPRIO2); break; case 3: - hviprio = csr_read(CSR_HVIPRIO2H); + hviprio = ncsr_read(CSR_HVIPRIO2H); break; #endif default: @@ -283,20 +312,20 @@ static void aia_set_iprio8(struct kvm_vcpu *vcpu, unsigned int irq, u8 prio) switch (bitpos / BITS_PER_LONG) { case 0: - hviprio = csr_read(CSR_HVIPRIO1); + hviprio = ncsr_read(CSR_HVIPRIO1); break; case 1: #ifndef CONFIG_32BIT - hviprio = csr_read(CSR_HVIPRIO2); + hviprio = ncsr_read(CSR_HVIPRIO2); break; #else - hviprio = csr_read(CSR_HVIPRIO1H); + hviprio = ncsr_read(CSR_HVIPRIO1H); break; case 2: - hviprio = csr_read(CSR_HVIPRIO2); + hviprio = ncsr_read(CSR_HVIPRIO2); break; case 3: - hviprio = csr_read(CSR_HVIPRIO2H); + hviprio = ncsr_read(CSR_HVIPRIO2H); break; #endif default: @@ -308,20 +337,20 @@ static void aia_set_iprio8(struct kvm_vcpu *vcpu, unsigned int irq, u8 prio) switch (bitpos / BITS_PER_LONG) { case 0: - csr_write(CSR_HVIPRIO1, hviprio); + ncsr_write(CSR_HVIPRIO1, hviprio); break; case 1: #ifndef CONFIG_32BIT - csr_write(CSR_HVIPRIO2, hviprio); + ncsr_write(CSR_HVIPRIO2, hviprio); break; #else - csr_write(CSR_HVIPRIO1H, hviprio); + ncsr_write(CSR_HVIPRIO1H, hviprio); break; case 2: - csr_write(CSR_HVIPRIO2, hviprio); + ncsr_write(CSR_HVIPRIO2, hviprio); break; case 3: - csr_write(CSR_HVIPRIO2H, hviprio); + ncsr_write(CSR_HVIPRIO2H, hviprio); break; #endif default: @@ -377,7 +406,7 @@ int kvm_riscv_vcpu_aia_rmw_ireg(struct kvm_vcpu *vcpu, unsigned int csr_num, return KVM_INSN_ILLEGAL_TRAP; /* First try to emulate in kernel space */ - isel = csr_read(CSR_VSISELECT) & ISELECT_MASK; + isel = ncsr_read(CSR_VSISELECT) & ISELECT_MASK; if (isel >= ISELECT_IPRIO0 && isel <= ISELECT_IPRIO15) return aia_rmw_iprio(vcpu, isel, val, new_val, wr_mask); else if (isel >= IMSIC_FIRST && isel <= IMSIC_LAST && @@ -394,6 +423,8 @@ int kvm_riscv_aia_alloc_hgei(int cpu, struct kvm_vcpu *owner, { int ret = -ENOENT; unsigned long flags; + const struct imsic_global_config *gc; + const struct imsic_local_config *lc; struct aia_hgei_control *hgctrl = per_cpu_ptr(&aia_hgei, cpu); if (!kvm_riscv_aia_available() || !hgctrl) @@ -409,11 +440,14 @@ int kvm_riscv_aia_alloc_hgei(int cpu, struct kvm_vcpu *owner, raw_spin_unlock_irqrestore(&hgctrl->lock, flags); - /* TODO: To be updated later by AIA IMSIC HW guest file support */ - if (hgei_va) - *hgei_va = NULL; - if (hgei_pa) - *hgei_pa = 0; + gc = imsic_get_global_config(); + lc = (gc) ? per_cpu_ptr(gc->local, cpu) : NULL; + if (lc && ret > 0) { + if (hgei_va) + *hgei_va = lc->msi_va + (ret * IMSIC_MMIO_PAGE_SZ); + if (hgei_pa) + *hgei_pa = lc->msi_pa + (ret * IMSIC_MMIO_PAGE_SZ); + } return ret; } @@ -494,6 +528,10 @@ static int aia_hgei_init(void) hgctrl->free_bitmap = 0; } + /* Skip SGEI interrupt setup for zero guest external interrupts */ + if (!kvm_riscv_aia_nr_hgei) + goto skip_sgei_interrupt; + /* Find INTC irq domain */ domain = irq_find_matching_fwnode(riscv_get_intc_hwnode(), DOMAIN_BUS_ANY); @@ -517,11 +555,16 @@ static int aia_hgei_init(void) return rc; } +skip_sgei_interrupt: return 0; } static void aia_hgei_exit(void) { + /* Do nothing for zero guest external interrupts */ + if (!kvm_riscv_aia_nr_hgei) + return; + /* Free per-CPU SGEI interrupt */ free_percpu_irq(hgei_parent_irq, &aia_hgei); } @@ -531,7 +574,7 @@ void kvm_riscv_aia_enable(void) if (!kvm_riscv_aia_available()) return; - aia_set_hvictl(false); + csr_write(CSR_HVICTL, aia_hvictl_value(false)); csr_write(CSR_HVIPRIO1, 0x0); csr_write(CSR_HVIPRIO2, 0x0); #ifdef CONFIG_32BIT @@ -545,6 +588,9 @@ void kvm_riscv_aia_enable(void) enable_percpu_irq(hgei_parent_irq, irq_get_trigger_type(hgei_parent_irq)); csr_set(CSR_HIE, BIT(IRQ_S_GEXT)); + /* Enable IRQ filtering for overflow interrupt only if sscofpmf is present */ + if (__riscv_isa_extension_available(NULL, RISCV_ISA_EXT_SSCOFPMF)) + csr_set(CSR_HVIEN, BIT(IRQ_PMU_OVF)); } void kvm_riscv_aia_disable(void) @@ -558,11 +604,13 @@ void kvm_riscv_aia_disable(void) return; hgctrl = get_cpu_ptr(&aia_hgei); + if (__riscv_isa_extension_available(NULL, RISCV_ISA_EXT_SSCOFPMF)) + csr_clear(CSR_HVIEN, BIT(IRQ_PMU_OVF)); /* Disable per-CPU SGEI interrupt */ csr_clear(CSR_HIE, BIT(IRQ_S_GEXT)); disable_percpu_irq(hgei_parent_irq); - aia_set_hvictl(false); + csr_write(CSR_HVICTL, aia_hvictl_value(false)); raw_spin_lock_irqsave(&hgctrl->lock, flags); @@ -600,9 +648,11 @@ void kvm_riscv_aia_disable(void) int kvm_riscv_aia_init(void) { int rc; + const struct imsic_global_config *gc; if (!riscv_isa_extension_available(NULL, SxAIA)) return -ENODEV; + gc = imsic_get_global_config(); /* Figure-out number of bits in HGEIE */ csr_write(CSR_HGEIE, -1UL); @@ -614,17 +664,17 @@ int kvm_riscv_aia_init(void) /* * Number of usable HGEI lines should be minimum of per-HART * IMSIC guest files and number of bits in HGEIE - * - * TODO: To be updated later by AIA IMSIC HW guest file support */ - kvm_riscv_aia_nr_hgei = 0; + if (gc) + kvm_riscv_aia_nr_hgei = min((ulong)kvm_riscv_aia_nr_hgei, + BIT(gc->guest_index_bits) - 1); + else + kvm_riscv_aia_nr_hgei = 0; - /* - * Find number of guest MSI IDs - * - * TODO: To be updated later by AIA IMSIC HW guest file support - */ + /* Find number of guest MSI IDs */ kvm_riscv_aia_max_ids = IMSIC_MAX_ID; + if (gc && kvm_riscv_aia_nr_hgei) + kvm_riscv_aia_max_ids = gc->nr_guest_ids + 1; /* Initialize guest external interrupt line management */ rc = aia_hgei_init(); diff --git a/arch/riscv/kvm/aia_aplic.c b/arch/riscv/kvm/aia_aplic.c index 39e72aa016a4..f59d1c0c8c43 100644 --- a/arch/riscv/kvm/aia_aplic.c +++ b/arch/riscv/kvm/aia_aplic.c @@ -7,12 +7,12 @@ * Anup Patel <apatel@ventanamicro.com> */ +#include <linux/irqchip/riscv-aplic.h> #include <linux/kvm_host.h> #include <linux/math.h> #include <linux/spinlock.h> #include <linux/swab.h> #include <kvm/iodev.h> -#include <asm/kvm_aia_aplic.h> struct aplic_irq { raw_spinlock_t lock; @@ -137,11 +137,22 @@ static void aplic_write_pending(struct aplic *aplic, u32 irq, bool pending) raw_spin_lock_irqsave(&irqd->lock, flags); sm = irqd->sourcecfg & APLIC_SOURCECFG_SM_MASK; - if (!pending && - ((sm == APLIC_SOURCECFG_SM_LEVEL_HIGH) || - (sm == APLIC_SOURCECFG_SM_LEVEL_LOW))) + if (sm == APLIC_SOURCECFG_SM_INACTIVE) goto skip_write_pending; + if (sm == APLIC_SOURCECFG_SM_LEVEL_HIGH || + sm == APLIC_SOURCECFG_SM_LEVEL_LOW) { + if (!pending) + goto noskip_write_pending; + if ((irqd->state & APLIC_IRQ_STATE_INPUT) && + sm == APLIC_SOURCECFG_SM_LEVEL_LOW) + goto skip_write_pending; + if (!(irqd->state & APLIC_IRQ_STATE_INPUT) && + sm == APLIC_SOURCECFG_SM_LEVEL_HIGH) + goto skip_write_pending; + } + +noskip_write_pending: if (pending) irqd->state |= APLIC_IRQ_STATE_PENDING; else @@ -187,16 +198,31 @@ static void aplic_write_enabled(struct aplic *aplic, u32 irq, bool enabled) static bool aplic_read_input(struct aplic *aplic, u32 irq) { - bool ret; - unsigned long flags; + u32 sourcecfg, sm, raw_input, irq_inverted; struct aplic_irq *irqd; + unsigned long flags; + bool ret = false; if (!irq || aplic->nr_irqs <= irq) return false; irqd = &aplic->irqs[irq]; raw_spin_lock_irqsave(&irqd->lock, flags); - ret = (irqd->state & APLIC_IRQ_STATE_INPUT) ? true : false; + + sourcecfg = irqd->sourcecfg; + if (sourcecfg & APLIC_SOURCECFG_D) + goto skip; + + sm = sourcecfg & APLIC_SOURCECFG_SM_MASK; + if (sm == APLIC_SOURCECFG_SM_INACTIVE) + goto skip; + + raw_input = (irqd->state & APLIC_IRQ_STATE_INPUT) ? 1 : 0; + irq_inverted = (sm == APLIC_SOURCECFG_SM_LEVEL_LOW || + sm == APLIC_SOURCECFG_SM_EDGE_FALL) ? 1 : 0; + ret = !!(raw_input ^ irq_inverted); + +skip: raw_spin_unlock_irqrestore(&irqd->lock, flags); return ret; diff --git a/arch/riscv/kvm/aia_device.c b/arch/riscv/kvm/aia_device.c index 0eb689351b7d..806c41931cde 100644 --- a/arch/riscv/kvm/aia_device.c +++ b/arch/riscv/kvm/aia_device.c @@ -8,39 +8,9 @@ */ #include <linux/bits.h> +#include <linux/irqchip/riscv-imsic.h> #include <linux/kvm_host.h> #include <linux/uaccess.h> -#include <asm/kvm_aia_imsic.h> - -static void unlock_vcpus(struct kvm *kvm, int vcpu_lock_idx) -{ - struct kvm_vcpu *tmp_vcpu; - - for (; vcpu_lock_idx >= 0; vcpu_lock_idx--) { - tmp_vcpu = kvm_get_vcpu(kvm, vcpu_lock_idx); - mutex_unlock(&tmp_vcpu->mutex); - } -} - -static void unlock_all_vcpus(struct kvm *kvm) -{ - unlock_vcpus(kvm, atomic_read(&kvm->online_vcpus) - 1); -} - -static bool lock_all_vcpus(struct kvm *kvm) -{ - struct kvm_vcpu *tmp_vcpu; - unsigned long c; - - kvm_for_each_vcpu(c, tmp_vcpu, kvm) { - if (!mutex_trylock(&tmp_vcpu->mutex)) { - unlock_vcpus(kvm, c - 1); - return false; - } - } - - return true; -} static int aia_create(struct kvm_device *dev, u32 type) { @@ -53,7 +23,7 @@ static int aia_create(struct kvm_device *dev, u32 type) return -EEXIST; ret = -EBUSY; - if (!lock_all_vcpus(kvm)) + if (kvm_trylock_all_vcpus(kvm)) return ret; kvm_for_each_vcpu(i, vcpu, kvm) { @@ -65,7 +35,7 @@ static int aia_create(struct kvm_device *dev, u32 type) kvm->arch.aia.in_kernel = true; out_unlock: - unlock_all_vcpus(kvm); + kvm_unlock_all_vcpus(kvm); return ret; } @@ -237,10 +207,11 @@ static gpa_t aia_imsic_ppn(struct kvm_aia *aia, gpa_t addr) static u32 aia_imsic_hart_index(struct kvm_aia *aia, gpa_t addr) { - u32 hart, group = 0; + u32 hart = 0, group = 0; - hart = (addr >> (aia->nr_guest_bits + IMSIC_MMIO_PAGE_SHIFT)) & - GENMASK_ULL(aia->nr_hart_bits - 1, 0); + if (aia->nr_hart_bits) + hart = (addr >> (aia->nr_guest_bits + IMSIC_MMIO_PAGE_SHIFT)) & + GENMASK_ULL(aia->nr_hart_bits - 1, 0); if (aia->nr_group_bits) group = (addr >> aia->nr_group_shift) & GENMASK_ULL(aia->nr_group_bits - 1, 0); @@ -525,12 +496,10 @@ int kvm_riscv_vcpu_aia_update(struct kvm_vcpu *vcpu) void kvm_riscv_vcpu_aia_reset(struct kvm_vcpu *vcpu) { struct kvm_vcpu_aia_csr *csr = &vcpu->arch.aia_context.guest_csr; - struct kvm_vcpu_aia_csr *reset_csr = - &vcpu->arch.aia_context.guest_reset_csr; if (!kvm_riscv_aia_available()) return; - memcpy(csr, reset_csr, sizeof(*csr)); + memset(csr, 0, sizeof(*csr)); /* Proceed only if AIA was initialized successfully */ if (!kvm_riscv_aia_initialized(vcpu->kvm)) diff --git a/arch/riscv/kvm/aia_imsic.c b/arch/riscv/kvm/aia_imsic.c index e808723a85f1..29ef9c2133a9 100644 --- a/arch/riscv/kvm/aia_imsic.c +++ b/arch/riscv/kvm/aia_imsic.c @@ -9,13 +9,13 @@ #include <linux/atomic.h> #include <linux/bitmap.h> +#include <linux/irqchip/riscv-imsic.h> #include <linux/kvm_host.h> #include <linux/math.h> #include <linux/spinlock.h> #include <linux/swab.h> #include <kvm/iodev.h> #include <asm/csr.h> -#include <asm/kvm_aia_imsic.h> #define IMSIC_MAX_EIX (IMSIC_MAX_ID / BITS_PER_TYPE(u64)) @@ -55,7 +55,7 @@ struct imsic { /* IMSIC SW-file */ struct imsic_mrif *swfile; phys_addr_t swfile_pa; - spinlock_t swfile_extirq_lock; + raw_spinlock_t swfile_extirq_lock; }; #define imsic_vs_csr_read(__c) \ @@ -622,7 +622,7 @@ static void imsic_swfile_extirq_update(struct kvm_vcpu *vcpu) * interruptions between reading topei and updating pending status. */ - spin_lock_irqsave(&imsic->swfile_extirq_lock, flags); + raw_spin_lock_irqsave(&imsic->swfile_extirq_lock, flags); if (imsic_mrif_atomic_read(mrif, &mrif->eidelivery) && imsic_mrif_topei(mrif, imsic->nr_eix, imsic->nr_msis)) @@ -630,7 +630,7 @@ static void imsic_swfile_extirq_update(struct kvm_vcpu *vcpu) else kvm_riscv_vcpu_unset_interrupt(vcpu, IRQ_VS_EXT); - spin_unlock_irqrestore(&imsic->swfile_extirq_lock, flags); + raw_spin_unlock_irqrestore(&imsic->swfile_extirq_lock, flags); } static void imsic_swfile_read(struct kvm_vcpu *vcpu, bool clear, @@ -974,7 +974,6 @@ int kvm_riscv_vcpu_aia_imsic_inject(struct kvm_vcpu *vcpu, if (imsic->vsfile_cpu >= 0) { writel(iid, imsic->vsfile_va + IMSIC_MMIO_SETIPNUM_LE); - kvm_vcpu_kick(vcpu); } else { eix = &imsic->swfile->eix[iid / BITS_PER_TYPE(u64)]; set_bit(iid & (BITS_PER_TYPE(u64) - 1), eix->eip); @@ -1051,7 +1050,7 @@ int kvm_riscv_vcpu_aia_imsic_init(struct kvm_vcpu *vcpu) } imsic->swfile = page_to_virt(swfile_page); imsic->swfile_pa = page_to_phys(swfile_page); - spin_lock_init(&imsic->swfile_extirq_lock); + raw_spin_lock_init(&imsic->swfile_extirq_lock); /* Setup IO device */ kvm_iodevice_init(&imsic->iodev, &imsic_iodoev_ops); diff --git a/arch/riscv/kvm/main.c b/arch/riscv/kvm/main.c index 225a435d9c9a..4b24705dc63a 100644 --- a/arch/riscv/kvm/main.c +++ b/arch/riscv/kvm/main.c @@ -10,8 +10,8 @@ #include <linux/err.h> #include <linux/module.h> #include <linux/kvm_host.h> -#include <asm/csr.h> #include <asm/cpufeature.h> +#include <asm/kvm_nacl.h> #include <asm/sbi.h> long kvm_arch_dev_ioctl(struct file *filp, @@ -20,24 +20,16 @@ long kvm_arch_dev_ioctl(struct file *filp, return -EINVAL; } -int kvm_arch_hardware_enable(void) +int kvm_arch_enable_virtualization_cpu(void) { - unsigned long hideleg, hedeleg; - - hedeleg = 0; - hedeleg |= (1UL << EXC_INST_MISALIGNED); - hedeleg |= (1UL << EXC_BREAKPOINT); - hedeleg |= (1UL << EXC_SYSCALL); - hedeleg |= (1UL << EXC_INST_PAGE_FAULT); - hedeleg |= (1UL << EXC_LOAD_PAGE_FAULT); - hedeleg |= (1UL << EXC_STORE_PAGE_FAULT); - csr_write(CSR_HEDELEG, hedeleg); - - hideleg = 0; - hideleg |= (1UL << IRQ_VS_SOFT); - hideleg |= (1UL << IRQ_VS_TIMER); - hideleg |= (1UL << IRQ_VS_EXT); - csr_write(CSR_HIDELEG, hideleg); + int rc; + + rc = kvm_riscv_nacl_enable(); + if (rc) + return rc; + + csr_write(CSR_HEDELEG, KVM_HEDELEG_DEFAULT); + csr_write(CSR_HIDELEG, KVM_HIDELEG_DEFAULT); /* VS should access only the time counter directly. Everything else should trap */ csr_write(CSR_HCOUNTEREN, 0x02); @@ -49,7 +41,7 @@ int kvm_arch_hardware_enable(void) return 0; } -void kvm_arch_hardware_disable(void) +void kvm_arch_disable_virtualization_cpu(void) { kvm_riscv_aia_disable(); @@ -63,11 +55,21 @@ void kvm_arch_hardware_disable(void) csr_write(CSR_HVIP, 0); csr_write(CSR_HEDELEG, 0); csr_write(CSR_HIDELEG, 0); + + kvm_riscv_nacl_disable(); +} + +static void kvm_riscv_teardown(void) +{ + kvm_riscv_aia_exit(); + kvm_riscv_nacl_exit(); + kvm_unregister_perf_callbacks(); } static int __init riscv_kvm_init(void) { int rc; + char slist[64]; const char *str; if (!riscv_isa_extension_available(NULL, h)) { @@ -85,16 +87,53 @@ static int __init riscv_kvm_init(void) return -ENODEV; } + rc = kvm_riscv_nacl_init(); + if (rc && rc != -ENODEV) + return rc; + kvm_riscv_gstage_mode_detect(); kvm_riscv_gstage_vmid_detect(); rc = kvm_riscv_aia_init(); - if (rc && rc != -ENODEV) + if (rc && rc != -ENODEV) { + kvm_riscv_nacl_exit(); return rc; + } kvm_info("hypervisor extension available\n"); + if (kvm_riscv_nacl_available()) { + rc = 0; + slist[0] = '\0'; + if (kvm_riscv_nacl_sync_csr_available()) { + if (rc) + strcat(slist, ", "); + strcat(slist, "sync_csr"); + rc++; + } + if (kvm_riscv_nacl_sync_hfence_available()) { + if (rc) + strcat(slist, ", "); + strcat(slist, "sync_hfence"); + rc++; + } + if (kvm_riscv_nacl_sync_sret_available()) { + if (rc) + strcat(slist, ", "); + strcat(slist, "sync_sret"); + rc++; + } + if (kvm_riscv_nacl_autoswap_csr_available()) { + if (rc) + strcat(slist, ", "); + strcat(slist, "autoswap_csr"); + rc++; + } + kvm_info("using SBI nested acceleration with %s\n", + (rc) ? slist : "no features"); + } + switch (kvm_riscv_gstage_mode()) { case HGATP_MODE_SV32X4: str = "Sv32x4"; @@ -119,9 +158,11 @@ static int __init riscv_kvm_init(void) kvm_info("AIA available with %d guest external interrupts\n", kvm_riscv_aia_nr_hgei); + kvm_register_perf_callbacks(NULL); + rc = kvm_init(sizeof(struct kvm_vcpu), 0, THIS_MODULE); if (rc) { - kvm_riscv_aia_exit(); + kvm_riscv_teardown(); return rc; } @@ -131,8 +172,8 @@ module_init(riscv_kvm_init); static void __exit riscv_kvm_exit(void) { - kvm_riscv_aia_exit(); - kvm_exit(); + + kvm_riscv_teardown(); } module_exit(riscv_kvm_exit); diff --git a/arch/riscv/kvm/mmu.c b/arch/riscv/kvm/mmu.c index a9e2fd7245e1..1087ea74567b 100644 --- a/arch/riscv/kvm/mmu.c +++ b/arch/riscv/kvm/mmu.c @@ -15,7 +15,7 @@ #include <linux/vmalloc.h> #include <linux/kvm_host.h> #include <linux/sched/signal.h> -#include <asm/csr.h> +#include <asm/kvm_nacl.h> #include <asm/page.h> #include <asm/pgtable.h> @@ -550,26 +550,6 @@ bool kvm_unmap_gfn_range(struct kvm *kvm, struct kvm_gfn_range *range) return false; } -bool kvm_set_spte_gfn(struct kvm *kvm, struct kvm_gfn_range *range) -{ - int ret; - kvm_pfn_t pfn = pte_pfn(range->arg.pte); - - if (!kvm->arch.pgd) - return false; - - WARN_ON(range->end - range->start != 1); - - ret = gstage_map_page(kvm, NULL, range->start << PAGE_SHIFT, - __pfn_to_phys(pfn), PAGE_SIZE, true, true); - if (ret) { - kvm_debug("Failed to map G-stage page (error %d)\n", ret); - return true; - } - - return false; -} - bool kvm_age_gfn(struct kvm *kvm, struct kvm_gfn_range *range) { pte_t *ptep; @@ -621,6 +601,7 @@ int kvm_riscv_gstage_map(struct kvm_vcpu *vcpu, bool logging = (memslot->dirty_bitmap && !(memslot->flags & KVM_MEM_READONLY)) ? true : false; unsigned long vma_pagesize, mmu_seq; + struct page *page; /* We need minimum second+third level pages */ ret = kvm_mmu_topup_memory_cache(pcache, gstage_pgd_levels); @@ -651,7 +632,7 @@ int kvm_riscv_gstage_map(struct kvm_vcpu *vcpu, /* * Read mmu_invalidate_seq so that KVM can detect if the results of - * vma_lookup() or gfn_to_pfn_prot() become stale priort to acquiring + * vma_lookup() or __kvm_faultin_pfn() become stale prior to acquiring * kvm->mmu_lock. * * Rely on mmap_read_unlock() for an implicit smp_rmb(), which pairs @@ -667,7 +648,7 @@ int kvm_riscv_gstage_map(struct kvm_vcpu *vcpu, return -EFAULT; } - hfn = gfn_to_pfn_prot(kvm, gfn, is_write, &writable); + hfn = kvm_faultin_pfn(vcpu, gfn, is_write, &writable, &page); if (hfn == KVM_PFN_ERR_HWPOISON) { send_sig_mceerr(BUS_MCEERR_AR, (void __user *)hva, vma_pageshift, current); @@ -689,7 +670,6 @@ int kvm_riscv_gstage_map(struct kvm_vcpu *vcpu, goto out_unlock; if (writable) { - kvm_set_pfn_dirty(hfn); mark_page_dirty(kvm, gfn); ret = gstage_map_page(kvm, pcache, gpa, hfn << PAGE_SHIFT, vma_pagesize, false, true); @@ -702,9 +682,8 @@ int kvm_riscv_gstage_map(struct kvm_vcpu *vcpu, kvm_err("Failed to map in G-stage\n"); out_unlock: + kvm_release_faultin_page(kvm, page, ret && ret != -EEXIST, writable); spin_unlock(&kvm->mmu_lock); - kvm_set_pfn_accessed(hfn); - kvm_release_pfn_clean(hfn); return ret; } @@ -752,7 +731,7 @@ void kvm_riscv_gstage_update_hgatp(struct kvm_vcpu *vcpu) hgatp |= (READ_ONCE(k->vmid.vmid) << HGATP_VMID_SHIFT) & HGATP_VMID; hgatp |= (k->pgd_phys >> PAGE_SHIFT) & HGATP_PPN; - csr_write(CSR_HGATP, hgatp); + ncsr_write(CSR_HGATP, hgatp); if (!kvm_riscv_gstage_vmid_bits()) kvm_riscv_local_hfence_gvma_all(); diff --git a/arch/riscv/kvm/nacl.c b/arch/riscv/kvm/nacl.c new file mode 100644 index 000000000000..08a95ad9ada2 --- /dev/null +++ b/arch/riscv/kvm/nacl.c @@ -0,0 +1,152 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Copyright (c) 2024 Ventana Micro Systems Inc. + */ + +#include <linux/kvm_host.h> +#include <linux/vmalloc.h> +#include <asm/kvm_nacl.h> + +DEFINE_STATIC_KEY_FALSE(kvm_riscv_nacl_available); +DEFINE_STATIC_KEY_FALSE(kvm_riscv_nacl_sync_csr_available); +DEFINE_STATIC_KEY_FALSE(kvm_riscv_nacl_sync_hfence_available); +DEFINE_STATIC_KEY_FALSE(kvm_riscv_nacl_sync_sret_available); +DEFINE_STATIC_KEY_FALSE(kvm_riscv_nacl_autoswap_csr_available); +DEFINE_PER_CPU(struct kvm_riscv_nacl, kvm_riscv_nacl); + +void __kvm_riscv_nacl_hfence(void *shmem, + unsigned long control, + unsigned long page_num, + unsigned long page_count) +{ + int i, ent = -1, try_count = 5; + unsigned long *entp; + +again: + for (i = 0; i < SBI_NACL_SHMEM_HFENCE_ENTRY_MAX; i++) { + entp = shmem + SBI_NACL_SHMEM_HFENCE_ENTRY_CONFIG(i); + if (lelong_to_cpu(*entp) & SBI_NACL_SHMEM_HFENCE_CONFIG_PEND) + continue; + + ent = i; + break; + } + + if (ent < 0) { + if (try_count) { + nacl_sync_hfence(-1UL); + goto again; + } else { + pr_warn("KVM: No free entry in NACL shared memory\n"); + return; + } + } + + entp = shmem + SBI_NACL_SHMEM_HFENCE_ENTRY_CONFIG(i); + *entp = cpu_to_lelong(control); + entp = shmem + SBI_NACL_SHMEM_HFENCE_ENTRY_PNUM(i); + *entp = cpu_to_lelong(page_num); + entp = shmem + SBI_NACL_SHMEM_HFENCE_ENTRY_PCOUNT(i); + *entp = cpu_to_lelong(page_count); +} + +int kvm_riscv_nacl_enable(void) +{ + int rc; + struct sbiret ret; + struct kvm_riscv_nacl *nacl; + + if (!kvm_riscv_nacl_available()) + return 0; + nacl = this_cpu_ptr(&kvm_riscv_nacl); + + ret = sbi_ecall(SBI_EXT_NACL, SBI_EXT_NACL_SET_SHMEM, + nacl->shmem_phys, 0, 0, 0, 0, 0); + rc = sbi_err_map_linux_errno(ret.error); + if (rc) + return rc; + + return 0; +} + +void kvm_riscv_nacl_disable(void) +{ + if (!kvm_riscv_nacl_available()) + return; + + sbi_ecall(SBI_EXT_NACL, SBI_EXT_NACL_SET_SHMEM, + SBI_SHMEM_DISABLE, SBI_SHMEM_DISABLE, 0, 0, 0, 0); +} + +void kvm_riscv_nacl_exit(void) +{ + int cpu; + struct kvm_riscv_nacl *nacl; + + if (!kvm_riscv_nacl_available()) + return; + + /* Allocate per-CPU shared memory */ + for_each_possible_cpu(cpu) { + nacl = per_cpu_ptr(&kvm_riscv_nacl, cpu); + if (!nacl->shmem) + continue; + + free_pages((unsigned long)nacl->shmem, + get_order(SBI_NACL_SHMEM_SIZE)); + nacl->shmem = NULL; + nacl->shmem_phys = 0; + } +} + +static long nacl_probe_feature(long feature_id) +{ + struct sbiret ret; + + if (!kvm_riscv_nacl_available()) + return 0; + + ret = sbi_ecall(SBI_EXT_NACL, SBI_EXT_NACL_PROBE_FEATURE, + feature_id, 0, 0, 0, 0, 0); + return ret.value; +} + +int kvm_riscv_nacl_init(void) +{ + int cpu; + struct page *shmem_page; + struct kvm_riscv_nacl *nacl; + + if (sbi_spec_version < sbi_mk_version(1, 0) || + sbi_probe_extension(SBI_EXT_NACL) <= 0) + return -ENODEV; + + /* Enable NACL support */ + static_branch_enable(&kvm_riscv_nacl_available); + + /* Probe NACL features */ + if (nacl_probe_feature(SBI_NACL_FEAT_SYNC_CSR)) + static_branch_enable(&kvm_riscv_nacl_sync_csr_available); + if (nacl_probe_feature(SBI_NACL_FEAT_SYNC_HFENCE)) + static_branch_enable(&kvm_riscv_nacl_sync_hfence_available); + if (nacl_probe_feature(SBI_NACL_FEAT_SYNC_SRET)) + static_branch_enable(&kvm_riscv_nacl_sync_sret_available); + if (nacl_probe_feature(SBI_NACL_FEAT_AUTOSWAP_CSR)) + static_branch_enable(&kvm_riscv_nacl_autoswap_csr_available); + + /* Allocate per-CPU shared memory */ + for_each_possible_cpu(cpu) { + nacl = per_cpu_ptr(&kvm_riscv_nacl, cpu); + + shmem_page = alloc_pages(GFP_KERNEL | __GFP_ZERO, + get_order(SBI_NACL_SHMEM_SIZE)); + if (!shmem_page) { + kvm_riscv_nacl_exit(); + return -ENOMEM; + } + nacl->shmem = page_to_virt(shmem_page); + nacl->shmem_phys = page_to_phys(shmem_page); + } + + return 0; +} diff --git a/arch/riscv/kvm/tlb.c b/arch/riscv/kvm/tlb.c index 23c0e82b5103..2f91ea5f8493 100644 --- a/arch/riscv/kvm/tlb.c +++ b/arch/riscv/kvm/tlb.c @@ -14,6 +14,7 @@ #include <asm/csr.h> #include <asm/cpufeature.h> #include <asm/insn-def.h> +#include <asm/kvm_nacl.h> #define has_svinval() riscv_has_extension_unlikely(RISCV_ISA_EXT_SVINVAL) @@ -186,18 +187,24 @@ void kvm_riscv_fence_i_process(struct kvm_vcpu *vcpu) void kvm_riscv_hfence_gvma_vmid_all_process(struct kvm_vcpu *vcpu) { - struct kvm_vmid *vmid; + struct kvm_vmid *v = &vcpu->kvm->arch.vmid; + unsigned long vmid = READ_ONCE(v->vmid); - vmid = &vcpu->kvm->arch.vmid; - kvm_riscv_local_hfence_gvma_vmid_all(READ_ONCE(vmid->vmid)); + if (kvm_riscv_nacl_available()) + nacl_hfence_gvma_vmid_all(nacl_shmem(), vmid); + else + kvm_riscv_local_hfence_gvma_vmid_all(vmid); } void kvm_riscv_hfence_vvma_all_process(struct kvm_vcpu *vcpu) { - struct kvm_vmid *vmid; + struct kvm_vmid *v = &vcpu->kvm->arch.vmid; + unsigned long vmid = READ_ONCE(v->vmid); - vmid = &vcpu->kvm->arch.vmid; - kvm_riscv_local_hfence_vvma_all(READ_ONCE(vmid->vmid)); + if (kvm_riscv_nacl_available()) + nacl_hfence_vvma_all(nacl_shmem(), vmid); + else + kvm_riscv_local_hfence_vvma_all(vmid); } static bool vcpu_hfence_dequeue(struct kvm_vcpu *vcpu, @@ -251,6 +258,7 @@ static bool vcpu_hfence_enqueue(struct kvm_vcpu *vcpu, void kvm_riscv_hfence_process(struct kvm_vcpu *vcpu) { + unsigned long vmid; struct kvm_riscv_hfence d = { 0 }; struct kvm_vmid *v = &vcpu->kvm->arch.vmid; @@ -259,26 +267,41 @@ void kvm_riscv_hfence_process(struct kvm_vcpu *vcpu) case KVM_RISCV_HFENCE_UNKNOWN: break; case KVM_RISCV_HFENCE_GVMA_VMID_GPA: - kvm_riscv_local_hfence_gvma_vmid_gpa( - READ_ONCE(v->vmid), - d.addr, d.size, d.order); + vmid = READ_ONCE(v->vmid); + if (kvm_riscv_nacl_available()) + nacl_hfence_gvma_vmid(nacl_shmem(), vmid, + d.addr, d.size, d.order); + else + kvm_riscv_local_hfence_gvma_vmid_gpa(vmid, d.addr, + d.size, d.order); break; case KVM_RISCV_HFENCE_VVMA_ASID_GVA: kvm_riscv_vcpu_pmu_incr_fw(vcpu, SBI_PMU_FW_HFENCE_VVMA_ASID_RCVD); - kvm_riscv_local_hfence_vvma_asid_gva( - READ_ONCE(v->vmid), d.asid, - d.addr, d.size, d.order); + vmid = READ_ONCE(v->vmid); + if (kvm_riscv_nacl_available()) + nacl_hfence_vvma_asid(nacl_shmem(), vmid, d.asid, + d.addr, d.size, d.order); + else + kvm_riscv_local_hfence_vvma_asid_gva(vmid, d.asid, d.addr, + d.size, d.order); break; case KVM_RISCV_HFENCE_VVMA_ASID_ALL: kvm_riscv_vcpu_pmu_incr_fw(vcpu, SBI_PMU_FW_HFENCE_VVMA_ASID_RCVD); - kvm_riscv_local_hfence_vvma_asid_all( - READ_ONCE(v->vmid), d.asid); + vmid = READ_ONCE(v->vmid); + if (kvm_riscv_nacl_available()) + nacl_hfence_vvma_asid_all(nacl_shmem(), vmid, d.asid); + else + kvm_riscv_local_hfence_vvma_asid_all(vmid, d.asid); break; case KVM_RISCV_HFENCE_VVMA_GVA: kvm_riscv_vcpu_pmu_incr_fw(vcpu, SBI_PMU_FW_HFENCE_VVMA_RCVD); - kvm_riscv_local_hfence_vvma_gva( - READ_ONCE(v->vmid), - d.addr, d.size, d.order); + vmid = READ_ONCE(v->vmid); + if (kvm_riscv_nacl_available()) + nacl_hfence_vvma(nacl_shmem(), vmid, + d.addr, d.size, d.order); + else + kvm_riscv_local_hfence_vvma_gva(vmid, d.addr, + d.size, d.order); break; default: break; diff --git a/arch/riscv/kvm/trace.h b/arch/riscv/kvm/trace.h new file mode 100644 index 000000000000..3d54175d805c --- /dev/null +++ b/arch/riscv/kvm/trace.h @@ -0,0 +1,67 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Tracepoints for RISC-V KVM + * + * Copyright 2024 Beijing ESWIN Computing Technology Co., Ltd. + * + */ +#if !defined(_TRACE_KVM_H) || defined(TRACE_HEADER_MULTI_READ) +#define _TRACE_KVM_H + +#include <linux/tracepoint.h> + +#undef TRACE_SYSTEM +#define TRACE_SYSTEM kvm + +TRACE_EVENT(kvm_entry, + TP_PROTO(struct kvm_vcpu *vcpu), + TP_ARGS(vcpu), + + TP_STRUCT__entry( + __field(unsigned long, pc) + ), + + TP_fast_assign( + __entry->pc = vcpu->arch.guest_context.sepc; + ), + + TP_printk("PC: 0x016%lx", __entry->pc) +); + +TRACE_EVENT(kvm_exit, + TP_PROTO(struct kvm_cpu_trap *trap), + TP_ARGS(trap), + + TP_STRUCT__entry( + __field(unsigned long, sepc) + __field(unsigned long, scause) + __field(unsigned long, stval) + __field(unsigned long, htval) + __field(unsigned long, htinst) + ), + + TP_fast_assign( + __entry->sepc = trap->sepc; + __entry->scause = trap->scause; + __entry->stval = trap->stval; + __entry->htval = trap->htval; + __entry->htinst = trap->htinst; + ), + + TP_printk("SEPC:0x%lx, SCAUSE:0x%lx, STVAL:0x%lx, HTVAL:0x%lx, HTINST:0x%lx", + __entry->sepc, + __entry->scause, + __entry->stval, + __entry->htval, + __entry->htinst) +); + +#endif /* _TRACE_RSICV_KVM_H */ + +#undef TRACE_INCLUDE_PATH +#define TRACE_INCLUDE_PATH . +#undef TRACE_INCLUDE_FILE +#define TRACE_INCLUDE_FILE trace + +/* This part must be outside protection */ +#include <trace/define_trace.h> diff --git a/arch/riscv/kvm/vcpu.c b/arch/riscv/kvm/vcpu.c index b5ca9f2e98ac..e0a01af426ff 100644 --- a/arch/riscv/kvm/vcpu.c +++ b/arch/riscv/kvm/vcpu.c @@ -17,20 +17,29 @@ #include <linux/sched/signal.h> #include <linux/fs.h> #include <linux/kvm_host.h> -#include <asm/csr.h> #include <asm/cacheflush.h> +#include <asm/kvm_nacl.h> #include <asm/kvm_vcpu_vector.h> +#define CREATE_TRACE_POINTS +#include "trace.h" + const struct _kvm_stats_desc kvm_vcpu_stats_desc[] = { KVM_GENERIC_VCPU_STATS(), STATS_DESC_COUNTER(VCPU, ecall_exit_stat), STATS_DESC_COUNTER(VCPU, wfi_exit_stat), + STATS_DESC_COUNTER(VCPU, wrs_exit_stat), STATS_DESC_COUNTER(VCPU, mmio_exit_user), STATS_DESC_COUNTER(VCPU, mmio_exit_kernel), STATS_DESC_COUNTER(VCPU, csr_exit_user), STATS_DESC_COUNTER(VCPU, csr_exit_kernel), STATS_DESC_COUNTER(VCPU, signal_exits), - STATS_DESC_COUNTER(VCPU, exits) + STATS_DESC_COUNTER(VCPU, exits), + STATS_DESC_COUNTER(VCPU, instr_illegal_exits), + STATS_DESC_COUNTER(VCPU, load_misaligned_exits), + STATS_DESC_COUNTER(VCPU, store_misaligned_exits), + STATS_DESC_COUNTER(VCPU, load_access_exits), + STATS_DESC_COUNTER(VCPU, store_access_exits), }; const struct kvm_stats_header kvm_vcpu_stats_header = { @@ -42,12 +51,33 @@ const struct kvm_stats_header kvm_vcpu_stats_header = { sizeof(kvm_vcpu_stats_desc), }; -static void kvm_riscv_reset_vcpu(struct kvm_vcpu *vcpu) +static void kvm_riscv_vcpu_context_reset(struct kvm_vcpu *vcpu, + bool kvm_sbi_reset) { struct kvm_vcpu_csr *csr = &vcpu->arch.guest_csr; - struct kvm_vcpu_csr *reset_csr = &vcpu->arch.guest_reset_csr; struct kvm_cpu_context *cntx = &vcpu->arch.guest_context; - struct kvm_cpu_context *reset_cntx = &vcpu->arch.guest_reset_context; + void *vector_datap = cntx->vector.datap; + + memset(cntx, 0, sizeof(*cntx)); + memset(csr, 0, sizeof(*csr)); + memset(&vcpu->arch.smstateen_csr, 0, sizeof(vcpu->arch.smstateen_csr)); + + /* Restore datap as it's not a part of the guest context. */ + cntx->vector.datap = vector_datap; + + if (kvm_sbi_reset) + kvm_riscv_vcpu_sbi_load_reset_state(vcpu); + + /* Setup reset state of shadow SSTATUS and HSTATUS CSRs */ + cntx->sstatus = SR_SPP | SR_SPIE; + + cntx->hstatus |= HSTATUS_VTW; + cntx->hstatus |= HSTATUS_SPVP; + cntx->hstatus |= HSTATUS_SPV; +} + +static void kvm_riscv_reset_vcpu(struct kvm_vcpu *vcpu, bool kvm_sbi_reset) +{ bool loaded; /** @@ -62,9 +92,7 @@ static void kvm_riscv_reset_vcpu(struct kvm_vcpu *vcpu) vcpu->arch.last_exit_cpu = -1; - memcpy(csr, reset_csr, sizeof(*csr)); - - memcpy(cntx, reset_cntx, sizeof(*cntx)); + kvm_riscv_vcpu_context_reset(vcpu, kvm_sbi_reset); kvm_riscv_vcpu_fp_reset(vcpu); @@ -99,8 +127,8 @@ int kvm_arch_vcpu_precreate(struct kvm *kvm, unsigned int id) int kvm_arch_vcpu_create(struct kvm_vcpu *vcpu) { int rc; - struct kvm_cpu_context *cntx; - struct kvm_vcpu_csr *reset_csr = &vcpu->arch.guest_reset_csr; + + spin_lock_init(&vcpu->arch.mp_state_lock); /* Mark this VCPU never ran */ vcpu->arch.ran_atleast_once = false; @@ -118,20 +146,11 @@ int kvm_arch_vcpu_create(struct kvm_vcpu *vcpu) /* Setup VCPU hfence queue */ spin_lock_init(&vcpu->arch.hfence_lock); - /* Setup reset state of shadow SSTATUS and HSTATUS CSRs */ - cntx = &vcpu->arch.guest_reset_context; - cntx->sstatus = SR_SPP | SR_SPIE; - cntx->hstatus = 0; - cntx->hstatus |= HSTATUS_VTW; - cntx->hstatus |= HSTATUS_SPVP; - cntx->hstatus |= HSTATUS_SPV; + spin_lock_init(&vcpu->arch.reset_state.lock); - if (kvm_riscv_vcpu_alloc_vector_context(vcpu, cntx)) + if (kvm_riscv_vcpu_alloc_vector_context(vcpu)) return -ENOMEM; - /* By default, make CY, TM, and IR counters accessible in VU mode */ - reset_csr->scounteren = 0x7; - /* Setup VCPU timer */ kvm_riscv_vcpu_timer_init(vcpu); @@ -150,7 +169,7 @@ int kvm_arch_vcpu_create(struct kvm_vcpu *vcpu) kvm_riscv_vcpu_sbi_init(vcpu); /* Reset VCPU */ - kvm_riscv_reset_vcpu(vcpu); + kvm_riscv_reset_vcpu(vcpu, false); return 0; } @@ -201,7 +220,7 @@ void kvm_arch_vcpu_unblocking(struct kvm_vcpu *vcpu) int kvm_arch_vcpu_runnable(struct kvm_vcpu *vcpu) { return (kvm_riscv_vcpu_has_interrupts(vcpu, -1UL) && - !vcpu->arch.power_off && !vcpu->arch.pause); + !kvm_riscv_vcpu_stopped(vcpu) && !vcpu->arch.pause); } int kvm_arch_vcpu_should_kick(struct kvm_vcpu *vcpu) @@ -214,6 +233,13 @@ bool kvm_arch_vcpu_in_kernel(struct kvm_vcpu *vcpu) return (vcpu->arch.guest_context.sstatus & SR_SPP) ? true : false; } +#ifdef CONFIG_GUEST_PERF_EVENTS +unsigned long kvm_arch_vcpu_get_ip(struct kvm_vcpu *vcpu) +{ + return vcpu->arch.guest_context.sepc; +} +#endif + vm_fault_t kvm_arch_vcpu_fault(struct kvm_vcpu *vcpu, struct vm_fault *vmf) { return VM_FAULT_SIGBUS; @@ -349,10 +375,10 @@ void kvm_riscv_vcpu_sync_interrupts(struct kvm_vcpu *vcpu) struct kvm_vcpu_csr *csr = &vcpu->arch.guest_csr; /* Read current HVIP and VSIE CSRs */ - csr->vsie = csr_read(CSR_VSIE); + csr->vsie = ncsr_read(CSR_VSIE); /* Sync-up HVIP.VSSIP bit changes does by Guest */ - hvip = csr_read(CSR_HVIP); + hvip = ncsr_read(CSR_HVIP); if ((csr->hvip ^ hvip) & (1UL << IRQ_VS_SOFT)) { if (hvip & (1UL << IRQ_VS_SOFT)) { if (!test_and_set_bit(IRQ_VS_SOFT, @@ -365,6 +391,13 @@ void kvm_riscv_vcpu_sync_interrupts(struct kvm_vcpu *vcpu) } } + /* Sync up the HVIP.LCOFIP bit changes (only clear) by the guest */ + if ((csr->hvip ^ hvip) & (1UL << IRQ_PMU_OVF)) { + if (!(hvip & (1UL << IRQ_PMU_OVF)) && + !test_and_set_bit(IRQ_PMU_OVF, v->irqs_pending_mask)) + clear_bit(IRQ_PMU_OVF, v->irqs_pending); + } + /* Sync-up AIA high interrupts */ kvm_riscv_vcpu_aia_sync_interrupts(vcpu); @@ -382,7 +415,8 @@ int kvm_riscv_vcpu_set_interrupt(struct kvm_vcpu *vcpu, unsigned int irq) if (irq < IRQ_LOCAL_MAX && irq != IRQ_VS_SOFT && irq != IRQ_VS_TIMER && - irq != IRQ_VS_EXT) + irq != IRQ_VS_EXT && + irq != IRQ_PMU_OVF) return -EINVAL; set_bit(irq, vcpu->arch.irqs_pending); @@ -397,14 +431,15 @@ int kvm_riscv_vcpu_set_interrupt(struct kvm_vcpu *vcpu, unsigned int irq) int kvm_riscv_vcpu_unset_interrupt(struct kvm_vcpu *vcpu, unsigned int irq) { /* - * We only allow VS-mode software, timer, and external + * We only allow VS-mode software, timer, counter overflow and external * interrupts when irq is one of the local interrupts * defined by RISC-V privilege specification. */ if (irq < IRQ_LOCAL_MAX && irq != IRQ_VS_SOFT && irq != IRQ_VS_TIMER && - irq != IRQ_VS_EXT) + irq != IRQ_VS_EXT && + irq != IRQ_PMU_OVF) return -EINVAL; clear_bit(irq, vcpu->arch.irqs_pending); @@ -429,26 +464,42 @@ bool kvm_riscv_vcpu_has_interrupts(struct kvm_vcpu *vcpu, u64 mask) return kvm_riscv_vcpu_aia_has_interrupts(vcpu, mask); } -void kvm_riscv_vcpu_power_off(struct kvm_vcpu *vcpu) +void __kvm_riscv_vcpu_power_off(struct kvm_vcpu *vcpu) { - vcpu->arch.power_off = true; + WRITE_ONCE(vcpu->arch.mp_state.mp_state, KVM_MP_STATE_STOPPED); kvm_make_request(KVM_REQ_SLEEP, vcpu); kvm_vcpu_kick(vcpu); } -void kvm_riscv_vcpu_power_on(struct kvm_vcpu *vcpu) +void kvm_riscv_vcpu_power_off(struct kvm_vcpu *vcpu) { - vcpu->arch.power_off = false; + spin_lock(&vcpu->arch.mp_state_lock); + __kvm_riscv_vcpu_power_off(vcpu); + spin_unlock(&vcpu->arch.mp_state_lock); +} + +void __kvm_riscv_vcpu_power_on(struct kvm_vcpu *vcpu) +{ + WRITE_ONCE(vcpu->arch.mp_state.mp_state, KVM_MP_STATE_RUNNABLE); kvm_vcpu_wake_up(vcpu); } +void kvm_riscv_vcpu_power_on(struct kvm_vcpu *vcpu) +{ + spin_lock(&vcpu->arch.mp_state_lock); + __kvm_riscv_vcpu_power_on(vcpu); + spin_unlock(&vcpu->arch.mp_state_lock); +} + +bool kvm_riscv_vcpu_stopped(struct kvm_vcpu *vcpu) +{ + return READ_ONCE(vcpu->arch.mp_state.mp_state) == KVM_MP_STATE_STOPPED; +} + int kvm_arch_vcpu_ioctl_get_mpstate(struct kvm_vcpu *vcpu, struct kvm_mp_state *mp_state) { - if (vcpu->arch.power_off) - mp_state->mp_state = KVM_MP_STATE_STOPPED; - else - mp_state->mp_state = KVM_MP_STATE_RUNNABLE; + *mp_state = READ_ONCE(vcpu->arch.mp_state); return 0; } @@ -458,25 +509,42 @@ int kvm_arch_vcpu_ioctl_set_mpstate(struct kvm_vcpu *vcpu, { int ret = 0; + spin_lock(&vcpu->arch.mp_state_lock); + switch (mp_state->mp_state) { case KVM_MP_STATE_RUNNABLE: - vcpu->arch.power_off = false; + WRITE_ONCE(vcpu->arch.mp_state, *mp_state); break; case KVM_MP_STATE_STOPPED: - kvm_riscv_vcpu_power_off(vcpu); + __kvm_riscv_vcpu_power_off(vcpu); + break; + case KVM_MP_STATE_INIT_RECEIVED: + if (vcpu->kvm->arch.mp_state_reset) + kvm_riscv_reset_vcpu(vcpu, false); + else + ret = -EINVAL; break; default: ret = -EINVAL; } + spin_unlock(&vcpu->arch.mp_state_lock); + return ret; } int kvm_arch_vcpu_ioctl_set_guest_debug(struct kvm_vcpu *vcpu, struct kvm_guest_debug *dbg) { - /* TODO; To be implemented later. */ - return -EINVAL; + if (dbg->control & KVM_GUESTDBG_ENABLE) { + vcpu->guest_debug = dbg->control; + vcpu->arch.cfg.hedeleg &= ~BIT(EXC_BREAKPOINT); + } else { + vcpu->guest_debug = 0; + vcpu->arch.cfg.hedeleg |= BIT(EXC_BREAKPOINT); + } + + return 0; } static void kvm_riscv_vcpu_setup_config(struct kvm_vcpu *vcpu) @@ -496,6 +564,10 @@ static void kvm_riscv_vcpu_setup_config(struct kvm_vcpu *vcpu) if (riscv_isa_extension_available(isa, ZICBOZ)) cfg->henvcfg |= ENVCFG_CBZE; + if (riscv_isa_extension_available(isa, SVADU) && + !riscv_isa_extension_available(isa, SVADE)) + cfg->henvcfg |= ENVCFG_ADUE; + if (riscv_has_extension_unlikely(RISCV_ISA_EXT_SMSTATEEN)) { cfg->hstateen0 |= SMSTATEEN0_HSENVCFG; if (riscv_isa_extension_available(isa, SSAIA)) @@ -505,29 +577,57 @@ static void kvm_riscv_vcpu_setup_config(struct kvm_vcpu *vcpu) if (riscv_isa_extension_available(isa, SMSTATEEN)) cfg->hstateen0 |= SMSTATEEN0_SSTATEEN0; } + + cfg->hedeleg = KVM_HEDELEG_DEFAULT; + if (vcpu->guest_debug) + cfg->hedeleg &= ~BIT(EXC_BREAKPOINT); } void kvm_arch_vcpu_load(struct kvm_vcpu *vcpu, int cpu) { + void *nsh; struct kvm_vcpu_csr *csr = &vcpu->arch.guest_csr; struct kvm_vcpu_config *cfg = &vcpu->arch.cfg; - csr_write(CSR_VSSTATUS, csr->vsstatus); - csr_write(CSR_VSIE, csr->vsie); - csr_write(CSR_VSTVEC, csr->vstvec); - csr_write(CSR_VSSCRATCH, csr->vsscratch); - csr_write(CSR_VSEPC, csr->vsepc); - csr_write(CSR_VSCAUSE, csr->vscause); - csr_write(CSR_VSTVAL, csr->vstval); - csr_write(CSR_HVIP, csr->hvip); - csr_write(CSR_VSATP, csr->vsatp); - csr_write(CSR_HENVCFG, cfg->henvcfg); - if (IS_ENABLED(CONFIG_32BIT)) - csr_write(CSR_HENVCFGH, cfg->henvcfg >> 32); - if (riscv_has_extension_unlikely(RISCV_ISA_EXT_SMSTATEEN)) { - csr_write(CSR_HSTATEEN0, cfg->hstateen0); + if (kvm_riscv_nacl_sync_csr_available()) { + nsh = nacl_shmem(); + nacl_csr_write(nsh, CSR_VSSTATUS, csr->vsstatus); + nacl_csr_write(nsh, CSR_VSIE, csr->vsie); + nacl_csr_write(nsh, CSR_VSTVEC, csr->vstvec); + nacl_csr_write(nsh, CSR_VSSCRATCH, csr->vsscratch); + nacl_csr_write(nsh, CSR_VSEPC, csr->vsepc); + nacl_csr_write(nsh, CSR_VSCAUSE, csr->vscause); + nacl_csr_write(nsh, CSR_VSTVAL, csr->vstval); + nacl_csr_write(nsh, CSR_HEDELEG, cfg->hedeleg); + nacl_csr_write(nsh, CSR_HVIP, csr->hvip); + nacl_csr_write(nsh, CSR_VSATP, csr->vsatp); + nacl_csr_write(nsh, CSR_HENVCFG, cfg->henvcfg); + if (IS_ENABLED(CONFIG_32BIT)) + nacl_csr_write(nsh, CSR_HENVCFGH, cfg->henvcfg >> 32); + if (riscv_has_extension_unlikely(RISCV_ISA_EXT_SMSTATEEN)) { + nacl_csr_write(nsh, CSR_HSTATEEN0, cfg->hstateen0); + if (IS_ENABLED(CONFIG_32BIT)) + nacl_csr_write(nsh, CSR_HSTATEEN0H, cfg->hstateen0 >> 32); + } + } else { + csr_write(CSR_VSSTATUS, csr->vsstatus); + csr_write(CSR_VSIE, csr->vsie); + csr_write(CSR_VSTVEC, csr->vstvec); + csr_write(CSR_VSSCRATCH, csr->vsscratch); + csr_write(CSR_VSEPC, csr->vsepc); + csr_write(CSR_VSCAUSE, csr->vscause); + csr_write(CSR_VSTVAL, csr->vstval); + csr_write(CSR_HEDELEG, cfg->hedeleg); + csr_write(CSR_HVIP, csr->hvip); + csr_write(CSR_VSATP, csr->vsatp); + csr_write(CSR_HENVCFG, cfg->henvcfg); if (IS_ENABLED(CONFIG_32BIT)) - csr_write(CSR_HSTATEEN0H, cfg->hstateen0 >> 32); + csr_write(CSR_HENVCFGH, cfg->henvcfg >> 32); + if (riscv_has_extension_unlikely(RISCV_ISA_EXT_SMSTATEEN)) { + csr_write(CSR_HSTATEEN0, cfg->hstateen0); + if (IS_ENABLED(CONFIG_32BIT)) + csr_write(CSR_HSTATEEN0H, cfg->hstateen0 >> 32); + } } kvm_riscv_gstage_update_hgatp(vcpu); @@ -550,6 +650,7 @@ void kvm_arch_vcpu_load(struct kvm_vcpu *vcpu, int cpu) void kvm_arch_vcpu_put(struct kvm_vcpu *vcpu) { + void *nsh; struct kvm_vcpu_csr *csr = &vcpu->arch.guest_csr; vcpu->cpu = -1; @@ -565,15 +666,28 @@ void kvm_arch_vcpu_put(struct kvm_vcpu *vcpu) vcpu->arch.isa); kvm_riscv_vcpu_host_vector_restore(&vcpu->arch.host_context); - csr->vsstatus = csr_read(CSR_VSSTATUS); - csr->vsie = csr_read(CSR_VSIE); - csr->vstvec = csr_read(CSR_VSTVEC); - csr->vsscratch = csr_read(CSR_VSSCRATCH); - csr->vsepc = csr_read(CSR_VSEPC); - csr->vscause = csr_read(CSR_VSCAUSE); - csr->vstval = csr_read(CSR_VSTVAL); - csr->hvip = csr_read(CSR_HVIP); - csr->vsatp = csr_read(CSR_VSATP); + if (kvm_riscv_nacl_available()) { + nsh = nacl_shmem(); + csr->vsstatus = nacl_csr_read(nsh, CSR_VSSTATUS); + csr->vsie = nacl_csr_read(nsh, CSR_VSIE); + csr->vstvec = nacl_csr_read(nsh, CSR_VSTVEC); + csr->vsscratch = nacl_csr_read(nsh, CSR_VSSCRATCH); + csr->vsepc = nacl_csr_read(nsh, CSR_VSEPC); + csr->vscause = nacl_csr_read(nsh, CSR_VSCAUSE); + csr->vstval = nacl_csr_read(nsh, CSR_VSTVAL); + csr->hvip = nacl_csr_read(nsh, CSR_HVIP); + csr->vsatp = nacl_csr_read(nsh, CSR_VSATP); + } else { + csr->vsstatus = csr_read(CSR_VSSTATUS); + csr->vsie = csr_read(CSR_VSIE); + csr->vstvec = csr_read(CSR_VSTVEC); + csr->vsscratch = csr_read(CSR_VSSCRATCH); + csr->vsepc = csr_read(CSR_VSEPC); + csr->vscause = csr_read(CSR_VSCAUSE); + csr->vstval = csr_read(CSR_VSTVAL); + csr->hvip = csr_read(CSR_HVIP); + csr->vsatp = csr_read(CSR_VSATP); + } } static void kvm_riscv_check_vcpu_requests(struct kvm_vcpu *vcpu) @@ -584,11 +698,11 @@ static void kvm_riscv_check_vcpu_requests(struct kvm_vcpu *vcpu) if (kvm_check_request(KVM_REQ_SLEEP, vcpu)) { kvm_vcpu_srcu_read_unlock(vcpu); rcuwait_wait_event(wait, - (!vcpu->arch.power_off) && (!vcpu->arch.pause), + (!kvm_riscv_vcpu_stopped(vcpu)) && (!vcpu->arch.pause), TASK_INTERRUPTIBLE); kvm_vcpu_srcu_read_lock(vcpu); - if (vcpu->arch.power_off || vcpu->arch.pause) { + if (kvm_riscv_vcpu_stopped(vcpu) || vcpu->arch.pause) { /* * Awaken to handle a signal, request to * sleep again later. @@ -598,7 +712,7 @@ static void kvm_riscv_check_vcpu_requests(struct kvm_vcpu *vcpu) } if (kvm_check_request(KVM_REQ_VCPU_RESET, vcpu)) - kvm_riscv_reset_vcpu(vcpu); + kvm_riscv_reset_vcpu(vcpu, true); if (kvm_check_request(KVM_REQ_UPDATE_HGATP, vcpu)) kvm_riscv_gstage_update_hgatp(vcpu); @@ -628,7 +742,7 @@ static void kvm_riscv_update_hvip(struct kvm_vcpu *vcpu) { struct kvm_vcpu_csr *csr = &vcpu->arch.guest_csr; - csr_write(CSR_HVIP, csr->hvip); + ncsr_write(CSR_HVIP, csr->hvip); kvm_riscv_vcpu_aia_update_hvip(vcpu); } @@ -638,6 +752,7 @@ static __always_inline void kvm_riscv_vcpu_swap_in_guest_state(struct kvm_vcpu * struct kvm_vcpu_csr *csr = &vcpu->arch.guest_csr; struct kvm_vcpu_config *cfg = &vcpu->arch.cfg; + vcpu->arch.host_scounteren = csr_swap(CSR_SCOUNTEREN, csr->scounteren); vcpu->arch.host_senvcfg = csr_swap(CSR_SENVCFG, csr->senvcfg); if (riscv_has_extension_unlikely(RISCV_ISA_EXT_SMSTATEEN) && (cfg->hstateen0 & SMSTATEEN0_SSTATEEN0)) @@ -651,6 +766,7 @@ static __always_inline void kvm_riscv_vcpu_swap_in_host_state(struct kvm_vcpu *v struct kvm_vcpu_csr *csr = &vcpu->arch.guest_csr; struct kvm_vcpu_config *cfg = &vcpu->arch.cfg; + csr->scounteren = csr_swap(CSR_SCOUNTEREN, vcpu->arch.host_scounteren); csr->senvcfg = csr_swap(CSR_SENVCFG, vcpu->arch.host_senvcfg); if (riscv_has_extension_unlikely(RISCV_ISA_EXT_SMSTATEEN) && (cfg->hstateen0 & SMSTATEEN0_SSTATEEN0)) @@ -665,11 +781,81 @@ static __always_inline void kvm_riscv_vcpu_swap_in_host_state(struct kvm_vcpu *v * This must be noinstr as instrumentation may make use of RCU, and this is not * safe during the EQS. */ -static void noinstr kvm_riscv_vcpu_enter_exit(struct kvm_vcpu *vcpu) +static void noinstr kvm_riscv_vcpu_enter_exit(struct kvm_vcpu *vcpu, + struct kvm_cpu_trap *trap) { + void *nsh; + struct kvm_cpu_context *gcntx = &vcpu->arch.guest_context; + struct kvm_cpu_context *hcntx = &vcpu->arch.host_context; + + /* + * We save trap CSRs (such as SEPC, SCAUSE, STVAL, HTVAL, and + * HTINST) here because we do local_irq_enable() after this + * function in kvm_arch_vcpu_ioctl_run() which can result in + * an interrupt immediately after local_irq_enable() and can + * potentially change trap CSRs. + */ + kvm_riscv_vcpu_swap_in_guest_state(vcpu); guest_state_enter_irqoff(); - __kvm_riscv_switch_to(&vcpu->arch); + + if (kvm_riscv_nacl_sync_sret_available()) { + nsh = nacl_shmem(); + + if (kvm_riscv_nacl_autoswap_csr_available()) { + hcntx->hstatus = + nacl_csr_read(nsh, CSR_HSTATUS); + nacl_scratch_write_long(nsh, + SBI_NACL_SHMEM_AUTOSWAP_OFFSET + + SBI_NACL_SHMEM_AUTOSWAP_HSTATUS, + gcntx->hstatus); + nacl_scratch_write_long(nsh, + SBI_NACL_SHMEM_AUTOSWAP_OFFSET, + SBI_NACL_SHMEM_AUTOSWAP_FLAG_HSTATUS); + } else if (kvm_riscv_nacl_sync_csr_available()) { + hcntx->hstatus = nacl_csr_swap(nsh, + CSR_HSTATUS, gcntx->hstatus); + } else { + hcntx->hstatus = csr_swap(CSR_HSTATUS, gcntx->hstatus); + } + + nacl_scratch_write_longs(nsh, + SBI_NACL_SHMEM_SRET_OFFSET + + SBI_NACL_SHMEM_SRET_X(1), + &gcntx->ra, + SBI_NACL_SHMEM_SRET_X_LAST); + + __kvm_riscv_nacl_switch_to(&vcpu->arch, SBI_EXT_NACL, + SBI_EXT_NACL_SYNC_SRET); + + if (kvm_riscv_nacl_autoswap_csr_available()) { + nacl_scratch_write_long(nsh, + SBI_NACL_SHMEM_AUTOSWAP_OFFSET, + 0); + gcntx->hstatus = nacl_scratch_read_long(nsh, + SBI_NACL_SHMEM_AUTOSWAP_OFFSET + + SBI_NACL_SHMEM_AUTOSWAP_HSTATUS); + } else { + gcntx->hstatus = csr_swap(CSR_HSTATUS, hcntx->hstatus); + } + + trap->htval = nacl_csr_read(nsh, CSR_HTVAL); + trap->htinst = nacl_csr_read(nsh, CSR_HTINST); + } else { + hcntx->hstatus = csr_swap(CSR_HSTATUS, gcntx->hstatus); + + __kvm_riscv_switch_to(&vcpu->arch); + + gcntx->hstatus = csr_swap(CSR_HSTATUS, hcntx->hstatus); + + trap->htval = csr_read(CSR_HTVAL); + trap->htinst = csr_read(CSR_HTINST); + } + + trap->sepc = gcntx->sepc; + trap->scause = csr_read(CSR_SCAUSE); + trap->stval = csr_read(CSR_STVAL); + vcpu->arch.last_exit_cpu = vcpu->cpu; guest_state_exit_irqoff(); kvm_riscv_vcpu_swap_in_host_state(vcpu); @@ -711,7 +897,7 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu) return ret; } - if (run->immediate_exit) { + if (!vcpu->wants_to_run) { kvm_vcpu_srcu_read_unlock(vcpu); return -EINTR; } @@ -782,24 +968,15 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu) */ kvm_riscv_local_tlb_sanitize(vcpu); + trace_kvm_entry(vcpu); + guest_timing_enter_irqoff(); - kvm_riscv_vcpu_enter_exit(vcpu); + kvm_riscv_vcpu_enter_exit(vcpu, &trap); vcpu->mode = OUTSIDE_GUEST_MODE; vcpu->stat.exits++; - /* - * Save SCAUSE, STVAL, HTVAL, and HTINST because we might - * get an interrupt between __kvm_riscv_switch_to() and - * local_irq_enable() which can potentially change CSRs. - */ - trap.sepc = vcpu->arch.guest_context.sepc; - trap.scause = csr_read(CSR_SCAUSE); - trap.stval = csr_read(CSR_STVAL); - trap.htval = csr_read(CSR_HTVAL); - trap.htinst = csr_read(CSR_HTINST); - /* Syncup interrupts state with HW */ kvm_riscv_vcpu_sync_interrupts(vcpu); @@ -820,6 +997,8 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu) local_irq_enable(); + trace_kvm_exit(&trap); + preempt_enable(); kvm_vcpu_srcu_read_lock(vcpu); diff --git a/arch/riscv/kvm/vcpu_exit.c b/arch/riscv/kvm/vcpu_exit.c index 2415722c01b8..6e0c18412795 100644 --- a/arch/riscv/kvm/vcpu_exit.c +++ b/arch/riscv/kvm/vcpu_exit.c @@ -165,6 +165,17 @@ void kvm_riscv_vcpu_trap_redirect(struct kvm_vcpu *vcpu, vcpu->arch.guest_context.sstatus |= SR_SPP; } +static inline int vcpu_redirect(struct kvm_vcpu *vcpu, struct kvm_cpu_trap *trap) +{ + int ret = -EFAULT; + + if (vcpu->arch.guest_context.hstatus & HSTATUS_SPV) { + kvm_riscv_vcpu_trap_redirect(vcpu, trap); + ret = 1; + } + return ret; +} + /* * Return > 0 to return to guest, < 0 on error, 0 (and set exit_reason) on * proper exit to userspace. @@ -183,12 +194,32 @@ int kvm_riscv_vcpu_exit(struct kvm_vcpu *vcpu, struct kvm_run *run, run->exit_reason = KVM_EXIT_UNKNOWN; switch (trap->scause) { case EXC_INST_ILLEGAL: + kvm_riscv_vcpu_pmu_incr_fw(vcpu, SBI_PMU_FW_ILLEGAL_INSN); + vcpu->stat.instr_illegal_exits++; + ret = vcpu_redirect(vcpu, trap); + break; case EXC_LOAD_MISALIGNED: + kvm_riscv_vcpu_pmu_incr_fw(vcpu, SBI_PMU_FW_MISALIGNED_LOAD); + vcpu->stat.load_misaligned_exits++; + ret = vcpu_redirect(vcpu, trap); + break; case EXC_STORE_MISALIGNED: - if (vcpu->arch.guest_context.hstatus & HSTATUS_SPV) { - kvm_riscv_vcpu_trap_redirect(vcpu, trap); - ret = 1; - } + kvm_riscv_vcpu_pmu_incr_fw(vcpu, SBI_PMU_FW_MISALIGNED_STORE); + vcpu->stat.store_misaligned_exits++; + ret = vcpu_redirect(vcpu, trap); + break; + case EXC_LOAD_ACCESS: + kvm_riscv_vcpu_pmu_incr_fw(vcpu, SBI_PMU_FW_ACCESS_LOAD); + vcpu->stat.load_access_exits++; + ret = vcpu_redirect(vcpu, trap); + break; + case EXC_STORE_ACCESS: + kvm_riscv_vcpu_pmu_incr_fw(vcpu, SBI_PMU_FW_ACCESS_STORE); + vcpu->stat.store_access_exits++; + ret = vcpu_redirect(vcpu, trap); + break; + case EXC_INST_ACCESS: + ret = vcpu_redirect(vcpu, trap); break; case EXC_VIRTUAL_INST_FAULT: if (vcpu->arch.guest_context.hstatus & HSTATUS_SPV) @@ -204,6 +235,10 @@ int kvm_riscv_vcpu_exit(struct kvm_vcpu *vcpu, struct kvm_run *run, if (vcpu->arch.guest_context.hstatus & HSTATUS_SPV) ret = kvm_riscv_vcpu_sbi_ecall(vcpu, run); break; + case EXC_BREAKPOINT: + run->exit_reason = KVM_EXIT_DEBUG; + ret = 0; + break; default: break; } diff --git a/arch/riscv/kvm/vcpu_insn.c b/arch/riscv/kvm/vcpu_insn.c index ee7215f4071f..97dec18e6989 100644 --- a/arch/riscv/kvm/vcpu_insn.c +++ b/arch/riscv/kvm/vcpu_insn.c @@ -16,6 +16,9 @@ #define INSN_MASK_WFI 0xffffffff #define INSN_MATCH_WFI 0x10500073 +#define INSN_MASK_WRS 0xffffffff +#define INSN_MATCH_WRS 0x00d00073 + #define INSN_MATCH_CSRRW 0x1073 #define INSN_MASK_CSRRW 0x707f #define INSN_MATCH_CSRRS 0x2073 @@ -203,6 +206,13 @@ static int wfi_insn(struct kvm_vcpu *vcpu, struct kvm_run *run, ulong insn) return KVM_INSN_CONTINUE_NEXT_SEPC; } +static int wrs_insn(struct kvm_vcpu *vcpu, struct kvm_run *run, ulong insn) +{ + vcpu->stat.wrs_exit_stat++; + kvm_vcpu_on_spin(vcpu, vcpu->arch.guest_context.sstatus & SR_SPP); + return KVM_INSN_CONTINUE_NEXT_SEPC; +} + struct csr_func { unsigned int base; unsigned int count; @@ -378,6 +388,11 @@ static const struct insn_func system_opcode_funcs[] = { .match = INSN_MATCH_WFI, .func = wfi_insn, }, + { + .mask = INSN_MASK_WRS, + .match = INSN_MATCH_WRS, + .func = wrs_insn, + }, }; static int system_opcode_insn(struct kvm_vcpu *vcpu, struct kvm_run *run, diff --git a/arch/riscv/kvm/vcpu_onereg.c b/arch/riscv/kvm/vcpu_onereg.c index f4a6124d25c9..2e1b646f0d61 100644 --- a/arch/riscv/kvm/vcpu_onereg.c +++ b/arch/riscv/kvm/vcpu_onereg.c @@ -15,6 +15,7 @@ #include <asm/cacheflush.h> #include <asm/cpufeature.h> #include <asm/kvm_vcpu_vector.h> +#include <asm/pgtable.h> #include <asm/vector.h> #define KVM_RISCV_BASE_ISA_MASK GENMASK(25, 0) @@ -34,13 +35,23 @@ static const unsigned long kvm_isa_ext_arr[] = { [KVM_RISCV_ISA_EXT_M] = RISCV_ISA_EXT_m, [KVM_RISCV_ISA_EXT_V] = RISCV_ISA_EXT_v, /* Multi letter extensions (alphabetically sorted) */ + [KVM_RISCV_ISA_EXT_SMNPM] = RISCV_ISA_EXT_SSNPM, KVM_ISA_EXT_ARR(SMSTATEEN), KVM_ISA_EXT_ARR(SSAIA), + KVM_ISA_EXT_ARR(SSCOFPMF), + KVM_ISA_EXT_ARR(SSNPM), KVM_ISA_EXT_ARR(SSTC), + KVM_ISA_EXT_ARR(SVADE), + KVM_ISA_EXT_ARR(SVADU), KVM_ISA_EXT_ARR(SVINVAL), KVM_ISA_EXT_ARR(SVNAPOT), KVM_ISA_EXT_ARR(SVPBMT), + KVM_ISA_EXT_ARR(SVVPTC), + KVM_ISA_EXT_ARR(ZAAMO), + KVM_ISA_EXT_ARR(ZABHA), KVM_ISA_EXT_ARR(ZACAS), + KVM_ISA_EXT_ARR(ZALRSC), + KVM_ISA_EXT_ARR(ZAWRS), KVM_ISA_EXT_ARR(ZBA), KVM_ISA_EXT_ARR(ZBB), KVM_ISA_EXT_ARR(ZBC), @@ -48,11 +59,17 @@ static const unsigned long kvm_isa_ext_arr[] = { KVM_ISA_EXT_ARR(ZBKC), KVM_ISA_EXT_ARR(ZBKX), KVM_ISA_EXT_ARR(ZBS), + KVM_ISA_EXT_ARR(ZCA), + KVM_ISA_EXT_ARR(ZCB), + KVM_ISA_EXT_ARR(ZCD), + KVM_ISA_EXT_ARR(ZCF), + KVM_ISA_EXT_ARR(ZCMOP), KVM_ISA_EXT_ARR(ZFA), KVM_ISA_EXT_ARR(ZFH), KVM_ISA_EXT_ARR(ZFHMIN), KVM_ISA_EXT_ARR(ZICBOM), KVM_ISA_EXT_ARR(ZICBOZ), + KVM_ISA_EXT_ARR(ZICCRSE), KVM_ISA_EXT_ARR(ZICNTR), KVM_ISA_EXT_ARR(ZICOND), KVM_ISA_EXT_ARR(ZICSR), @@ -60,6 +77,7 @@ static const unsigned long kvm_isa_ext_arr[] = { KVM_ISA_EXT_ARR(ZIHINTNTL), KVM_ISA_EXT_ARR(ZIHINTPAUSE), KVM_ISA_EXT_ARR(ZIHPM), + KVM_ISA_EXT_ARR(ZIMOP), KVM_ISA_EXT_ARR(ZKND), KVM_ISA_EXT_ARR(ZKNE), KVM_ISA_EXT_ARR(ZKNH), @@ -99,6 +117,15 @@ static bool kvm_riscv_vcpu_isa_enable_allowed(unsigned long ext) switch (ext) { case KVM_RISCV_ISA_EXT_H: return false; + case KVM_RISCV_ISA_EXT_SSCOFPMF: + /* Sscofpmf depends on interrupt filtering defined in ssaia */ + return __riscv_isa_extension_available(NULL, RISCV_ISA_EXT_SSAIA); + case KVM_RISCV_ISA_EXT_SVADU: + /* + * The henvcfg.ADUE is read-only zero if menvcfg.ADUE is zero. + * Guest OS can use Svadu only when host OS enable Svadu. + */ + return arch_has_hw_pte_young(); case KVM_RISCV_ISA_EXT_V: return riscv_v_vstate_ctrl_user_allowed(); default: @@ -116,10 +143,19 @@ static bool kvm_riscv_vcpu_isa_disable_allowed(unsigned long ext) case KVM_RISCV_ISA_EXT_C: case KVM_RISCV_ISA_EXT_I: case KVM_RISCV_ISA_EXT_M: + case KVM_RISCV_ISA_EXT_SMNPM: + /* There is not architectural config bit to disable sscofpmf completely */ + case KVM_RISCV_ISA_EXT_SSCOFPMF: + case KVM_RISCV_ISA_EXT_SSNPM: case KVM_RISCV_ISA_EXT_SSTC: case KVM_RISCV_ISA_EXT_SVINVAL: case KVM_RISCV_ISA_EXT_SVNAPOT: + case KVM_RISCV_ISA_EXT_SVVPTC: + case KVM_RISCV_ISA_EXT_ZAAMO: + case KVM_RISCV_ISA_EXT_ZABHA: case KVM_RISCV_ISA_EXT_ZACAS: + case KVM_RISCV_ISA_EXT_ZALRSC: + case KVM_RISCV_ISA_EXT_ZAWRS: case KVM_RISCV_ISA_EXT_ZBA: case KVM_RISCV_ISA_EXT_ZBB: case KVM_RISCV_ISA_EXT_ZBC: @@ -127,9 +163,15 @@ static bool kvm_riscv_vcpu_isa_disable_allowed(unsigned long ext) case KVM_RISCV_ISA_EXT_ZBKC: case KVM_RISCV_ISA_EXT_ZBKX: case KVM_RISCV_ISA_EXT_ZBS: + case KVM_RISCV_ISA_EXT_ZCA: + case KVM_RISCV_ISA_EXT_ZCB: + case KVM_RISCV_ISA_EXT_ZCD: + case KVM_RISCV_ISA_EXT_ZCF: + case KVM_RISCV_ISA_EXT_ZCMOP: case KVM_RISCV_ISA_EXT_ZFA: case KVM_RISCV_ISA_EXT_ZFH: case KVM_RISCV_ISA_EXT_ZFHMIN: + case KVM_RISCV_ISA_EXT_ZICCRSE: case KVM_RISCV_ISA_EXT_ZICNTR: case KVM_RISCV_ISA_EXT_ZICOND: case KVM_RISCV_ISA_EXT_ZICSR: @@ -137,6 +179,7 @@ static bool kvm_riscv_vcpu_isa_disable_allowed(unsigned long ext) case KVM_RISCV_ISA_EXT_ZIHINTNTL: case KVM_RISCV_ISA_EXT_ZIHINTPAUSE: case KVM_RISCV_ISA_EXT_ZIHPM: + case KVM_RISCV_ISA_EXT_ZIMOP: case KVM_RISCV_ISA_EXT_ZKND: case KVM_RISCV_ISA_EXT_ZKNE: case KVM_RISCV_ISA_EXT_ZKNH: @@ -161,6 +204,12 @@ static bool kvm_riscv_vcpu_isa_disable_allowed(unsigned long ext) /* Extensions which can be disabled using Smstateen */ case KVM_RISCV_ISA_EXT_SSAIA: return riscv_has_extension_unlikely(RISCV_ISA_EXT_SMSTATEEN); + case KVM_RISCV_ISA_EXT_SVADE: + /* + * The henvcfg.ADUE is read-only zero if menvcfg.ADUE is zero. + * Svade can't be disabled unless we support Svadu. + */ + return arch_has_hw_pte_young(); default: break; } @@ -718,9 +767,9 @@ static int kvm_riscv_vcpu_set_reg_isa_ext(struct kvm_vcpu *vcpu, switch (reg_subtype) { case KVM_REG_RISCV_ISA_SINGLE: return riscv_vcpu_set_isa_ext_single(vcpu, reg_num, reg_val); - case KVM_REG_RISCV_SBI_MULTI_EN: + case KVM_REG_RISCV_ISA_MULTI_EN: return riscv_vcpu_set_isa_ext_multi(vcpu, reg_num, reg_val, true); - case KVM_REG_RISCV_SBI_MULTI_DIS: + case KVM_REG_RISCV_ISA_MULTI_DIS: return riscv_vcpu_set_isa_ext_multi(vcpu, reg_num, reg_val, false); default: return -ENOENT; @@ -986,7 +1035,7 @@ static int copy_isa_ext_reg_indices(const struct kvm_vcpu *vcpu, static inline unsigned long num_isa_ext_regs(const struct kvm_vcpu *vcpu) { - return copy_isa_ext_reg_indices(vcpu, NULL);; + return copy_isa_ext_reg_indices(vcpu, NULL); } static int copy_sbi_ext_reg_indices(struct kvm_vcpu *vcpu, u64 __user *uindices) diff --git a/arch/riscv/kvm/vcpu_pmu.c b/arch/riscv/kvm/vcpu_pmu.c index 86391a5061dd..78ac3216a54d 100644 --- a/arch/riscv/kvm/vcpu_pmu.c +++ b/arch/riscv/kvm/vcpu_pmu.c @@ -14,6 +14,7 @@ #include <asm/csr.h> #include <asm/kvm_vcpu_sbi.h> #include <asm/kvm_vcpu_pmu.h> +#include <asm/sbi.h> #include <linux/bitops.h> #define kvm_pmu_num_counters(pmu) ((pmu)->num_hw_ctrs + (pmu)->num_fw_ctrs) @@ -39,7 +40,7 @@ static u64 kvm_pmu_get_sample_period(struct kvm_pmc *pmc) u64 sample_period; if (!pmc->counter_val) - sample_period = counter_val_mask + 1; + sample_period = counter_val_mask; else sample_period = (-pmc->counter_val) & counter_val_mask; @@ -196,6 +197,36 @@ static int pmu_get_pmc_index(struct kvm_pmu *pmu, unsigned long eidx, return kvm_pmu_get_programmable_pmc_index(pmu, eidx, cbase, cmask); } +static int pmu_fw_ctr_read_hi(struct kvm_vcpu *vcpu, unsigned long cidx, + unsigned long *out_val) +{ + struct kvm_pmu *kvpmu = vcpu_to_pmu(vcpu); + struct kvm_pmc *pmc; + int fevent_code; + + if (!IS_ENABLED(CONFIG_32BIT)) { + pr_warn("%s: should be invoked for only RV32\n", __func__); + return -EINVAL; + } + + if (cidx >= kvm_pmu_num_counters(kvpmu) || cidx == 1) { + pr_warn("Invalid counter id [%ld]during read\n", cidx); + return -EINVAL; + } + + pmc = &kvpmu->pmc[cidx]; + + if (pmc->cinfo.type != SBI_PMU_CTR_TYPE_FW) + return -EINVAL; + + fevent_code = get_event_code(pmc->event_idx); + pmc->counter_val = kvpmu->fw_event[fevent_code].value; + + *out_val = pmc->counter_val >> 32; + + return 0; +} + static int pmu_ctr_read(struct kvm_vcpu *vcpu, unsigned long cidx, unsigned long *out_val) { @@ -204,6 +235,11 @@ static int pmu_ctr_read(struct kvm_vcpu *vcpu, unsigned long cidx, u64 enabled, running; int fevent_code; + if (cidx >= kvm_pmu_num_counters(kvpmu) || cidx == 1) { + pr_warn("Invalid counter id [%ld] during read\n", cidx); + return -EINVAL; + } + pmc = &kvpmu->pmc[cidx]; if (pmc->cinfo.type == SBI_PMU_CTR_TYPE_FW) { @@ -229,8 +265,50 @@ static int kvm_pmu_validate_counter_mask(struct kvm_pmu *kvpmu, unsigned long ct return 0; } -static int kvm_pmu_create_perf_event(struct kvm_pmc *pmc, struct perf_event_attr *attr, - unsigned long flags, unsigned long eidx, unsigned long evtdata) +static void kvm_riscv_pmu_overflow(struct perf_event *perf_event, + struct perf_sample_data *data, + struct pt_regs *regs) +{ + struct kvm_pmc *pmc = perf_event->overflow_handler_context; + struct kvm_vcpu *vcpu = pmc->vcpu; + struct kvm_pmu *kvpmu = vcpu_to_pmu(vcpu); + struct riscv_pmu *rpmu = to_riscv_pmu(perf_event->pmu); + u64 period; + + /* + * Stop the event counting by directly accessing the perf_event. + * Otherwise, this needs to deferred via a workqueue. + * That will introduce skew in the counter value because the actual + * physical counter would start after returning from this function. + * It will be stopped again once the workqueue is scheduled + */ + rpmu->pmu.stop(perf_event, PERF_EF_UPDATE); + + /* + * The hw counter would start automatically when this function returns. + * Thus, the host may continue to interrupt and inject it to the guest + * even without the guest configuring the next event. Depending on the hardware + * the host may have some sluggishness only if privilege mode filtering is not + * available. In an ideal world, where qemu is not the only capable hardware, + * this can be removed. + * FYI: ARM64 does this way while x86 doesn't do anything as such. + * TODO: Should we keep it for RISC-V ? + */ + period = -(local64_read(&perf_event->count)); + + local64_set(&perf_event->hw.period_left, 0); + perf_event->attr.sample_period = period; + perf_event->hw.sample_period = period; + + set_bit(pmc->idx, kvpmu->pmc_overflown); + kvm_riscv_vcpu_set_interrupt(vcpu, IRQ_PMU_OVF); + + rpmu->pmu.start(perf_event, PERF_EF_RELOAD); +} + +static long kvm_pmu_create_perf_event(struct kvm_pmc *pmc, struct perf_event_attr *attr, + unsigned long flags, unsigned long eidx, + unsigned long evtdata) { struct perf_event *event; @@ -247,9 +325,9 @@ static int kvm_pmu_create_perf_event(struct kvm_pmc *pmc, struct perf_event_attr */ attr->sample_period = kvm_pmu_get_sample_period(pmc); - event = perf_event_create_kernel_counter(attr, -1, current, NULL, pmc); + event = perf_event_create_kernel_counter(attr, -1, current, kvm_riscv_pmu_overflow, pmc); if (IS_ERR(event)) { - pr_err("kvm pmu event creation failed for eidx %lx: %ld\n", eidx, PTR_ERR(event)); + pr_debug("kvm pmu event creation failed for eidx %lx: %ld\n", eidx, PTR_ERR(event)); return PTR_ERR(event); } @@ -310,6 +388,70 @@ int kvm_riscv_vcpu_pmu_read_hpm(struct kvm_vcpu *vcpu, unsigned int csr_num, return ret; } +static void kvm_pmu_clear_snapshot_area(struct kvm_vcpu *vcpu) +{ + struct kvm_pmu *kvpmu = vcpu_to_pmu(vcpu); + + kfree(kvpmu->sdata); + kvpmu->sdata = NULL; + kvpmu->snapshot_addr = INVALID_GPA; +} + +int kvm_riscv_vcpu_pmu_snapshot_set_shmem(struct kvm_vcpu *vcpu, unsigned long saddr_low, + unsigned long saddr_high, unsigned long flags, + struct kvm_vcpu_sbi_return *retdata) +{ + struct kvm_pmu *kvpmu = vcpu_to_pmu(vcpu); + int snapshot_area_size = sizeof(struct riscv_pmu_snapshot_data); + int sbiret = 0; + gpa_t saddr; + unsigned long hva; + bool writable; + + if (!kvpmu || flags) { + sbiret = SBI_ERR_INVALID_PARAM; + goto out; + } + + if (saddr_low == SBI_SHMEM_DISABLE && saddr_high == SBI_SHMEM_DISABLE) { + kvm_pmu_clear_snapshot_area(vcpu); + return 0; + } + + saddr = saddr_low; + + if (saddr_high != 0) { + if (IS_ENABLED(CONFIG_32BIT)) + saddr |= ((gpa_t)saddr_high << 32); + else + sbiret = SBI_ERR_INVALID_ADDRESS; + goto out; + } + + hva = kvm_vcpu_gfn_to_hva_prot(vcpu, saddr >> PAGE_SHIFT, &writable); + if (kvm_is_error_hva(hva) || !writable) { + sbiret = SBI_ERR_INVALID_ADDRESS; + goto out; + } + + kvpmu->sdata = kzalloc(snapshot_area_size, GFP_ATOMIC); + if (!kvpmu->sdata) + return -ENOMEM; + + if (kvm_vcpu_write_guest(vcpu, saddr, kvpmu->sdata, snapshot_area_size)) { + kfree(kvpmu->sdata); + sbiret = SBI_ERR_FAILURE; + goto out; + } + + kvpmu->snapshot_addr = saddr; + +out: + retdata->err_val = sbiret; + + return 0; +} + int kvm_riscv_vcpu_pmu_num_ctrs(struct kvm_vcpu *vcpu, struct kvm_vcpu_sbi_return *retdata) { @@ -343,20 +485,40 @@ int kvm_riscv_vcpu_pmu_ctr_start(struct kvm_vcpu *vcpu, unsigned long ctr_base, int i, pmc_index, sbiret = 0; struct kvm_pmc *pmc; int fevent_code; + bool snap_flag_set = flags & SBI_PMU_START_FLAG_INIT_SNAPSHOT; if (kvm_pmu_validate_counter_mask(kvpmu, ctr_base, ctr_mask) < 0) { sbiret = SBI_ERR_INVALID_PARAM; goto out; } + if (snap_flag_set) { + if (kvpmu->snapshot_addr == INVALID_GPA) { + sbiret = SBI_ERR_NO_SHMEM; + goto out; + } + if (kvm_vcpu_read_guest(vcpu, kvpmu->snapshot_addr, kvpmu->sdata, + sizeof(struct riscv_pmu_snapshot_data))) { + pr_warn("Unable to read snapshot shared memory while starting counters\n"); + sbiret = SBI_ERR_FAILURE; + goto out; + } + } /* Start the counters that have been configured and requested by the guest */ for_each_set_bit(i, &ctr_mask, RISCV_MAX_COUNTERS) { pmc_index = i + ctr_base; if (!test_bit(pmc_index, kvpmu->pmc_in_use)) continue; + /* The guest started the counter again. Reset the overflow status */ + clear_bit(pmc_index, kvpmu->pmc_overflown); pmc = &kvpmu->pmc[pmc_index]; - if (flags & SBI_PMU_START_FLAG_SET_INIT_VALUE) + if (flags & SBI_PMU_START_FLAG_SET_INIT_VALUE) { pmc->counter_val = ival; + } else if (snap_flag_set) { + /* The counter index in the snapshot are relative to the counter base */ + pmc->counter_val = kvpmu->sdata->ctr_values[i]; + } + if (pmc->cinfo.type == SBI_PMU_CTR_TYPE_FW) { fevent_code = get_event_code(pmc->event_idx); if (fevent_code >= SBI_PMU_FW_MAX) { @@ -400,12 +562,19 @@ int kvm_riscv_vcpu_pmu_ctr_stop(struct kvm_vcpu *vcpu, unsigned long ctr_base, u64 enabled, running; struct kvm_pmc *pmc; int fevent_code; + bool snap_flag_set = flags & SBI_PMU_STOP_FLAG_TAKE_SNAPSHOT; + bool shmem_needs_update = false; if (kvm_pmu_validate_counter_mask(kvpmu, ctr_base, ctr_mask) < 0) { sbiret = SBI_ERR_INVALID_PARAM; goto out; } + if (snap_flag_set && kvpmu->snapshot_addr == INVALID_GPA) { + sbiret = SBI_ERR_NO_SHMEM; + goto out; + } + /* Stop the counters that have been configured and requested by the guest */ for_each_set_bit(i, &ctr_mask, RISCV_MAX_COUNTERS) { pmc_index = i + ctr_base; @@ -432,21 +601,49 @@ int kvm_riscv_vcpu_pmu_ctr_stop(struct kvm_vcpu *vcpu, unsigned long ctr_base, sbiret = SBI_ERR_ALREADY_STOPPED; } - if (flags & SBI_PMU_STOP_FLAG_RESET) { - /* Relase the counter if this is a reset request */ - pmc->counter_val += perf_event_read_value(pmc->perf_event, - &enabled, &running); + if (flags & SBI_PMU_STOP_FLAG_RESET) + /* Release the counter if this is a reset request */ kvm_pmu_release_perf_event(pmc); - } } else { sbiret = SBI_ERR_INVALID_PARAM; } + + if (snap_flag_set && !sbiret) { + if (pmc->cinfo.type == SBI_PMU_CTR_TYPE_FW) + pmc->counter_val = kvpmu->fw_event[fevent_code].value; + else if (pmc->perf_event) + pmc->counter_val += perf_event_read_value(pmc->perf_event, + &enabled, &running); + /* + * The counter and overflow indicies in the snapshot region are w.r.to + * cbase. Modify the set bit in the counter mask instead of the pmc_index + * which indicates the absolute counter index. + */ + if (test_bit(pmc_index, kvpmu->pmc_overflown)) + kvpmu->sdata->ctr_overflow_mask |= BIT(i); + kvpmu->sdata->ctr_values[i] = pmc->counter_val; + shmem_needs_update = true; + } + if (flags & SBI_PMU_STOP_FLAG_RESET) { pmc->event_idx = SBI_PMU_EVENT_IDX_INVALID; clear_bit(pmc_index, kvpmu->pmc_in_use); + clear_bit(pmc_index, kvpmu->pmc_overflown); + if (snap_flag_set) { + /* + * Only clear the given counter as the caller is responsible to + * validate both the overflow mask and configured counters. + */ + kvpmu->sdata->ctr_overflow_mask &= ~BIT(i); + shmem_needs_update = true; + } } } + if (shmem_needs_update) + kvm_vcpu_write_guest(vcpu, kvpmu->snapshot_addr, kvpmu->sdata, + sizeof(struct riscv_pmu_snapshot_data)); + out: retdata->err_val = sbiret; @@ -458,7 +655,8 @@ int kvm_riscv_vcpu_pmu_ctr_cfg_match(struct kvm_vcpu *vcpu, unsigned long ctr_ba unsigned long eidx, u64 evtdata, struct kvm_vcpu_sbi_return *retdata) { - int ctr_idx, ret, sbiret = 0; + int ctr_idx, sbiret = 0; + long ret; bool is_fevent; unsigned long event_code; u32 etype = kvm_pmu_get_perf_event_type(eidx); @@ -468,6 +666,7 @@ int kvm_riscv_vcpu_pmu_ctr_cfg_match(struct kvm_vcpu *vcpu, unsigned long ctr_ba .type = etype, .size = sizeof(struct perf_event_attr), .pinned = true, + .disabled = true, /* * It should never reach here if the platform doesn't support the sscofpmf * extension as mode filtering won't work without it. @@ -517,8 +716,10 @@ int kvm_riscv_vcpu_pmu_ctr_cfg_match(struct kvm_vcpu *vcpu, unsigned long ctr_ba kvpmu->fw_event[event_code].started = true; } else { ret = kvm_pmu_create_perf_event(pmc, &attr, flags, eidx, evtdata); - if (ret) - return ret; + if (ret) { + sbiret = SBI_ERR_NOT_SUPPORTED; + goto out; + } } set_bit(ctr_idx, kvpmu->pmc_in_use); @@ -530,7 +731,19 @@ out: return 0; } -int kvm_riscv_vcpu_pmu_ctr_read(struct kvm_vcpu *vcpu, unsigned long cidx, +int kvm_riscv_vcpu_pmu_fw_ctr_read_hi(struct kvm_vcpu *vcpu, unsigned long cidx, + struct kvm_vcpu_sbi_return *retdata) +{ + int ret; + + ret = pmu_fw_ctr_read_hi(vcpu, cidx, &retdata->out_val); + if (ret == -EINVAL) + retdata->err_val = SBI_ERR_INVALID_PARAM; + + return 0; +} + +int kvm_riscv_vcpu_pmu_fw_ctr_read(struct kvm_vcpu *vcpu, unsigned long cidx, struct kvm_vcpu_sbi_return *retdata) { int ret; @@ -566,6 +779,7 @@ void kvm_riscv_vcpu_pmu_init(struct kvm_vcpu *vcpu) kvpmu->num_hw_ctrs = num_hw_ctrs + 1; kvpmu->num_fw_ctrs = SBI_PMU_FW_MAX; memset(&kvpmu->fw_event, 0, SBI_PMU_FW_MAX * sizeof(struct kvm_fw_event)); + kvpmu->snapshot_addr = INVALID_GPA; if (kvpmu->num_hw_ctrs > RISCV_KVM_MAX_HW_CTRS) { pr_warn_once("Limiting the hardware counters to 32 as specified by the ISA"); @@ -585,6 +799,7 @@ void kvm_riscv_vcpu_pmu_init(struct kvm_vcpu *vcpu) pmc = &kvpmu->pmc[i]; pmc->idx = i; pmc->event_idx = SBI_PMU_EVENT_IDX_INVALID; + pmc->vcpu = vcpu; if (i < kvpmu->num_hw_ctrs) { pmc->cinfo.type = SBI_PMU_CTR_TYPE_HW; if (i < 3) @@ -601,7 +816,7 @@ void kvm_riscv_vcpu_pmu_init(struct kvm_vcpu *vcpu) pmc->cinfo.csr = CSR_CYCLE + i; } else { pmc->cinfo.type = SBI_PMU_CTR_TYPE_FW; - pmc->cinfo.width = BITS_PER_LONG - 1; + pmc->cinfo.width = 63; } } @@ -617,14 +832,16 @@ void kvm_riscv_vcpu_pmu_deinit(struct kvm_vcpu *vcpu) if (!kvpmu) return; - for_each_set_bit(i, kvpmu->pmc_in_use, RISCV_MAX_COUNTERS) { + for_each_set_bit(i, kvpmu->pmc_in_use, RISCV_KVM_MAX_COUNTERS) { pmc = &kvpmu->pmc[i]; pmc->counter_val = 0; kvm_pmu_release_perf_event(pmc); pmc->event_idx = SBI_PMU_EVENT_IDX_INVALID; } - bitmap_zero(kvpmu->pmc_in_use, RISCV_MAX_COUNTERS); + bitmap_zero(kvpmu->pmc_in_use, RISCV_KVM_MAX_COUNTERS); + bitmap_zero(kvpmu->pmc_overflown, RISCV_KVM_MAX_COUNTERS); memset(&kvpmu->fw_event, 0, SBI_PMU_FW_MAX * sizeof(struct kvm_fw_event)); + kvm_pmu_clear_snapshot_area(vcpu); } void kvm_riscv_vcpu_pmu_reset(struct kvm_vcpu *vcpu) diff --git a/arch/riscv/kvm/vcpu_sbi.c b/arch/riscv/kvm/vcpu_sbi.c index 72a2ffb8dcd1..6e09b518a5d1 100644 --- a/arch/riscv/kvm/vcpu_sbi.c +++ b/arch/riscv/kvm/vcpu_sbi.c @@ -71,6 +71,10 @@ static const struct kvm_riscv_sbi_extension_entry sbi_ext[] = { .ext_ptr = &vcpu_sbi_ext_dbcn, }, { + .ext_idx = KVM_RISCV_SBI_EXT_SUSP, + .ext_ptr = &vcpu_sbi_ext_susp, + }, + { .ext_idx = KVM_RISCV_SBI_EXT_STA, .ext_ptr = &vcpu_sbi_ext_sta, }, @@ -127,8 +131,8 @@ void kvm_riscv_vcpu_sbi_forward(struct kvm_vcpu *vcpu, struct kvm_run *run) run->riscv_sbi.args[3] = cp->a3; run->riscv_sbi.args[4] = cp->a4; run->riscv_sbi.args[5] = cp->a5; - run->riscv_sbi.ret[0] = cp->a0; - run->riscv_sbi.ret[1] = cp->a1; + run->riscv_sbi.ret[0] = SBI_ERR_NOT_SUPPORTED; + run->riscv_sbi.ret[1] = 0; } void kvm_riscv_vcpu_sbi_system_reset(struct kvm_vcpu *vcpu, @@ -138,8 +142,11 @@ void kvm_riscv_vcpu_sbi_system_reset(struct kvm_vcpu *vcpu, unsigned long i; struct kvm_vcpu *tmp; - kvm_for_each_vcpu(i, tmp, vcpu->kvm) - tmp->arch.power_off = true; + kvm_for_each_vcpu(i, tmp, vcpu->kvm) { + spin_lock(&tmp->arch.mp_state_lock); + WRITE_ONCE(tmp->arch.mp_state.mp_state, KVM_MP_STATE_STOPPED); + spin_unlock(&tmp->arch.mp_state_lock); + } kvm_make_all_cpus_request(vcpu->kvm, KVM_REQ_SLEEP); memset(&run->system_event, 0, sizeof(run->system_event)); @@ -149,6 +156,34 @@ void kvm_riscv_vcpu_sbi_system_reset(struct kvm_vcpu *vcpu, run->exit_reason = KVM_EXIT_SYSTEM_EVENT; } +void kvm_riscv_vcpu_sbi_request_reset(struct kvm_vcpu *vcpu, + unsigned long pc, unsigned long a1) +{ + spin_lock(&vcpu->arch.reset_state.lock); + vcpu->arch.reset_state.pc = pc; + vcpu->arch.reset_state.a1 = a1; + spin_unlock(&vcpu->arch.reset_state.lock); + + kvm_make_request(KVM_REQ_VCPU_RESET, vcpu); +} + +void kvm_riscv_vcpu_sbi_load_reset_state(struct kvm_vcpu *vcpu) +{ + struct kvm_vcpu_csr *csr = &vcpu->arch.guest_csr; + struct kvm_cpu_context *cntx = &vcpu->arch.guest_context; + struct kvm_vcpu_reset_state *reset_state = &vcpu->arch.reset_state; + + cntx->a0 = vcpu->vcpu_id; + + spin_lock(&vcpu->arch.reset_state.lock); + cntx->sepc = reset_state->pc; + cntx->a1 = reset_state->a1; + spin_unlock(&vcpu->arch.reset_state.lock); + + cntx->sstatus &= ~SR_SIE; + csr->vsatp = 0; +} + int kvm_riscv_vcpu_sbi_return(struct kvm_vcpu *vcpu, struct kvm_run *run) { struct kvm_cpu_context *cp = &vcpu->arch.guest_context; @@ -483,19 +518,22 @@ void kvm_riscv_vcpu_sbi_init(struct kvm_vcpu *vcpu) struct kvm_vcpu_sbi_context *scontext = &vcpu->arch.sbi_context; const struct kvm_riscv_sbi_extension_entry *entry; const struct kvm_vcpu_sbi_extension *ext; - int i; + int idx, i; for (i = 0; i < ARRAY_SIZE(sbi_ext); i++) { entry = &sbi_ext[i]; ext = entry->ext_ptr; + idx = entry->ext_idx; + + if (idx < 0 || idx >= ARRAY_SIZE(scontext->ext_status)) + continue; if (ext->probe && !ext->probe(vcpu)) { - scontext->ext_status[entry->ext_idx] = - KVM_RISCV_SBI_EXT_STATUS_UNAVAILABLE; + scontext->ext_status[idx] = KVM_RISCV_SBI_EXT_STATUS_UNAVAILABLE; continue; } - scontext->ext_status[entry->ext_idx] = ext->default_disabled ? + scontext->ext_status[idx] = ext->default_disabled ? KVM_RISCV_SBI_EXT_STATUS_DISABLED : KVM_RISCV_SBI_EXT_STATUS_ENABLED; } diff --git a/arch/riscv/kvm/vcpu_sbi_hsm.c b/arch/riscv/kvm/vcpu_sbi_hsm.c index 7dca0e9381d9..f26207f84bab 100644 --- a/arch/riscv/kvm/vcpu_sbi_hsm.c +++ b/arch/riscv/kvm/vcpu_sbi_hsm.c @@ -9,44 +9,55 @@ #include <linux/errno.h> #include <linux/err.h> #include <linux/kvm_host.h> +#include <linux/wordpart.h> #include <asm/sbi.h> #include <asm/kvm_vcpu_sbi.h> static int kvm_sbi_hsm_vcpu_start(struct kvm_vcpu *vcpu) { - struct kvm_cpu_context *reset_cntx; struct kvm_cpu_context *cp = &vcpu->arch.guest_context; struct kvm_vcpu *target_vcpu; unsigned long target_vcpuid = cp->a0; + int ret = 0; target_vcpu = kvm_get_vcpu_by_id(vcpu->kvm, target_vcpuid); if (!target_vcpu) return SBI_ERR_INVALID_PARAM; - if (!target_vcpu->arch.power_off) - return SBI_ERR_ALREADY_AVAILABLE; - reset_cntx = &target_vcpu->arch.guest_reset_context; - /* start address */ - reset_cntx->sepc = cp->a1; - /* target vcpu id to start */ - reset_cntx->a0 = target_vcpuid; - /* private data passed from kernel */ - reset_cntx->a1 = cp->a2; - kvm_make_request(KVM_REQ_VCPU_RESET, target_vcpu); + spin_lock(&target_vcpu->arch.mp_state_lock); + + if (!kvm_riscv_vcpu_stopped(target_vcpu)) { + ret = SBI_ERR_ALREADY_AVAILABLE; + goto out; + } - kvm_riscv_vcpu_power_on(target_vcpu); + kvm_riscv_vcpu_sbi_request_reset(target_vcpu, cp->a1, cp->a2); - return 0; + __kvm_riscv_vcpu_power_on(target_vcpu); + +out: + spin_unlock(&target_vcpu->arch.mp_state_lock); + + return ret; } static int kvm_sbi_hsm_vcpu_stop(struct kvm_vcpu *vcpu) { - if (vcpu->arch.power_off) - return SBI_ERR_FAILURE; + int ret = 0; - kvm_riscv_vcpu_power_off(vcpu); + spin_lock(&vcpu->arch.mp_state_lock); - return 0; + if (kvm_riscv_vcpu_stopped(vcpu)) { + ret = SBI_ERR_FAILURE; + goto out; + } + + __kvm_riscv_vcpu_power_off(vcpu); + +out: + spin_unlock(&vcpu->arch.mp_state_lock); + + return ret; } static int kvm_sbi_hsm_vcpu_get_status(struct kvm_vcpu *vcpu) @@ -58,12 +69,12 @@ static int kvm_sbi_hsm_vcpu_get_status(struct kvm_vcpu *vcpu) target_vcpu = kvm_get_vcpu_by_id(vcpu->kvm, target_vcpuid); if (!target_vcpu) return SBI_ERR_INVALID_PARAM; - if (!target_vcpu->arch.power_off) - return SBI_HSM_STATE_STARTED; - else if (vcpu->stat.generic.blocking) + if (kvm_riscv_vcpu_stopped(target_vcpu)) + return SBI_HSM_STATE_STOPPED; + else if (target_vcpu->stat.generic.blocking) return SBI_HSM_STATE_SUSPENDED; else - return SBI_HSM_STATE_STOPPED; + return SBI_HSM_STATE_STARTED; } static int kvm_sbi_ext_hsm_handler(struct kvm_vcpu *vcpu, struct kvm_run *run, @@ -71,14 +82,11 @@ static int kvm_sbi_ext_hsm_handler(struct kvm_vcpu *vcpu, struct kvm_run *run, { int ret = 0; struct kvm_cpu_context *cp = &vcpu->arch.guest_context; - struct kvm *kvm = vcpu->kvm; unsigned long funcid = cp->a6; switch (funcid) { case SBI_EXT_HSM_HART_START: - mutex_lock(&kvm->lock); ret = kvm_sbi_hsm_vcpu_start(vcpu); - mutex_unlock(&kvm->lock); break; case SBI_EXT_HSM_HART_STOP: ret = kvm_sbi_hsm_vcpu_stop(vcpu); @@ -91,7 +99,7 @@ static int kvm_sbi_ext_hsm_handler(struct kvm_vcpu *vcpu, struct kvm_run *run, } return 0; case SBI_EXT_HSM_HART_SUSPEND: - switch (cp->a0) { + switch (lower_32_bits(cp->a0)) { case SBI_HSM_SUSPEND_RET_DEFAULT: kvm_riscv_vcpu_wfi(vcpu); break; diff --git a/arch/riscv/kvm/vcpu_sbi_pmu.c b/arch/riscv/kvm/vcpu_sbi_pmu.c index 7eca72df2cbd..e4be34e03e83 100644 --- a/arch/riscv/kvm/vcpu_sbi_pmu.c +++ b/arch/riscv/kvm/vcpu_sbi_pmu.c @@ -42,9 +42,9 @@ static int kvm_sbi_ext_pmu_handler(struct kvm_vcpu *vcpu, struct kvm_run *run, #endif /* * This can fail if perf core framework fails to create an event. - * Forward the error to userspace because it's an error which - * happened within the host kernel. The other option would be - * to convert to an SBI error and forward to the guest. + * No need to forward the error to userspace and exit the guest. + * The operation can continue without profiling. Forward the + * appropriate SBI error to the guest. */ ret = kvm_riscv_vcpu_pmu_ctr_cfg_match(vcpu, cp->a0, cp->a1, cp->a2, cp->a3, temp, retdata); @@ -62,7 +62,16 @@ static int kvm_sbi_ext_pmu_handler(struct kvm_vcpu *vcpu, struct kvm_run *run, ret = kvm_riscv_vcpu_pmu_ctr_stop(vcpu, cp->a0, cp->a1, cp->a2, retdata); break; case SBI_EXT_PMU_COUNTER_FW_READ: - ret = kvm_riscv_vcpu_pmu_ctr_read(vcpu, cp->a0, retdata); + ret = kvm_riscv_vcpu_pmu_fw_ctr_read(vcpu, cp->a0, retdata); + break; + case SBI_EXT_PMU_COUNTER_FW_READ_HI: + if (IS_ENABLED(CONFIG_32BIT)) + ret = kvm_riscv_vcpu_pmu_fw_ctr_read_hi(vcpu, cp->a0, retdata); + else + retdata->out_val = 0; + break; + case SBI_EXT_PMU_SNAPSHOT_SET_SHMEM: + ret = kvm_riscv_vcpu_pmu_snapshot_set_shmem(vcpu, cp->a0, cp->a1, cp->a2, retdata); break; default: retdata->err_val = SBI_ERR_NOT_SUPPORTED; diff --git a/arch/riscv/kvm/vcpu_sbi_replace.c b/arch/riscv/kvm/vcpu_sbi_replace.c index 9c2ab3dfa93a..b17fad091bab 100644 --- a/arch/riscv/kvm/vcpu_sbi_replace.c +++ b/arch/riscv/kvm/vcpu_sbi_replace.c @@ -21,7 +21,7 @@ static int kvm_sbi_ext_time_handler(struct kvm_vcpu *vcpu, struct kvm_run *run, u64 next_cycle; if (cp->a6 != SBI_EXT_TIME_SET_TIMER) { - retdata->err_val = SBI_ERR_INVALID_PARAM; + retdata->err_val = SBI_ERR_NOT_SUPPORTED; return 0; } @@ -51,9 +51,10 @@ static int kvm_sbi_ext_ipi_handler(struct kvm_vcpu *vcpu, struct kvm_run *run, struct kvm_cpu_context *cp = &vcpu->arch.guest_context; unsigned long hmask = cp->a0; unsigned long hbase = cp->a1; + unsigned long hart_bit = 0, sentmask = 0; if (cp->a6 != SBI_EXT_IPI_SEND_IPI) { - retdata->err_val = SBI_ERR_INVALID_PARAM; + retdata->err_val = SBI_ERR_NOT_SUPPORTED; return 0; } @@ -62,15 +63,23 @@ static int kvm_sbi_ext_ipi_handler(struct kvm_vcpu *vcpu, struct kvm_run *run, if (hbase != -1UL) { if (tmp->vcpu_id < hbase) continue; - if (!(hmask & (1UL << (tmp->vcpu_id - hbase)))) + hart_bit = tmp->vcpu_id - hbase; + if (hart_bit >= __riscv_xlen) + goto done; + if (!(hmask & (1UL << hart_bit))) continue; } ret = kvm_riscv_vcpu_set_interrupt(tmp, IRQ_VS_SOFT); if (ret < 0) break; + sentmask |= 1UL << hart_bit; kvm_riscv_vcpu_pmu_incr_fw(tmp, SBI_PMU_FW_IPI_RCVD); } +done: + if (hbase != -1UL && (hmask ^ sentmask)) + retdata->err_val = SBI_ERR_INVALID_PARAM; + return ret; } @@ -94,7 +103,7 @@ static int kvm_sbi_ext_rfence_handler(struct kvm_vcpu *vcpu, struct kvm_run *run kvm_riscv_vcpu_pmu_incr_fw(vcpu, SBI_PMU_FW_FENCE_I_SENT); break; case SBI_EXT_RFENCE_REMOTE_SFENCE_VMA: - if (cp->a2 == 0 && cp->a3 == 0) + if ((cp->a2 == 0 && cp->a3 == 0) || cp->a3 == -1UL) kvm_riscv_hfence_vvma_all(vcpu->kvm, hbase, hmask); else kvm_riscv_hfence_vvma_gva(vcpu->kvm, hbase, hmask, @@ -102,7 +111,7 @@ static int kvm_sbi_ext_rfence_handler(struct kvm_vcpu *vcpu, struct kvm_run *run kvm_riscv_vcpu_pmu_incr_fw(vcpu, SBI_PMU_FW_HFENCE_VVMA_SENT); break; case SBI_EXT_RFENCE_REMOTE_SFENCE_VMA_ASID: - if (cp->a2 == 0 && cp->a3 == 0) + if ((cp->a2 == 0 && cp->a3 == 0) || cp->a3 == -1UL) kvm_riscv_hfence_vvma_asid_all(vcpu->kvm, hbase, hmask, cp->a4); else @@ -118,9 +127,9 @@ static int kvm_sbi_ext_rfence_handler(struct kvm_vcpu *vcpu, struct kvm_run *run case SBI_EXT_RFENCE_REMOTE_HFENCE_VVMA_ASID: /* * Until nested virtualization is implemented, the - * SBI HFENCE calls should be treated as NOPs + * SBI HFENCE calls should return not supported + * hence fallthrough. */ - break; default: retdata->err_val = SBI_ERR_NOT_SUPPORTED; } diff --git a/arch/riscv/kvm/vcpu_sbi_sta.c b/arch/riscv/kvm/vcpu_sbi_sta.c index d8cf9ca28c61..5f35427114c1 100644 --- a/arch/riscv/kvm/vcpu_sbi_sta.c +++ b/arch/riscv/kvm/vcpu_sbi_sta.c @@ -93,8 +93,8 @@ static int kvm_sbi_sta_steal_time_set_shmem(struct kvm_vcpu *vcpu) if (flags != 0) return SBI_ERR_INVALID_PARAM; - if (shmem_phys_lo == SBI_STA_SHMEM_DISABLE && - shmem_phys_hi == SBI_STA_SHMEM_DISABLE) { + if (shmem_phys_lo == SBI_SHMEM_DISABLE && + shmem_phys_hi == SBI_SHMEM_DISABLE) { vcpu->arch.sta.shmem = INVALID_GPA; return 0; } diff --git a/arch/riscv/kvm/vcpu_sbi_system.c b/arch/riscv/kvm/vcpu_sbi_system.c new file mode 100644 index 000000000000..359be90b0fc5 --- /dev/null +++ b/arch/riscv/kvm/vcpu_sbi_system.c @@ -0,0 +1,66 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Copyright (c) 2024 Ventana Micro Systems Inc. + */ + +#include <linux/kvm_host.h> +#include <linux/wordpart.h> + +#include <asm/kvm_vcpu_sbi.h> +#include <asm/sbi.h> + +static int kvm_sbi_ext_susp_handler(struct kvm_vcpu *vcpu, struct kvm_run *run, + struct kvm_vcpu_sbi_return *retdata) +{ + struct kvm_cpu_context *cp = &vcpu->arch.guest_context; + unsigned long funcid = cp->a6; + unsigned long hva, i; + struct kvm_vcpu *tmp; + + switch (funcid) { + case SBI_EXT_SUSP_SYSTEM_SUSPEND: + if (lower_32_bits(cp->a0) != SBI_SUSP_SLEEP_TYPE_SUSPEND_TO_RAM) { + retdata->err_val = SBI_ERR_INVALID_PARAM; + return 0; + } + + if (!(cp->sstatus & SR_SPP)) { + retdata->err_val = SBI_ERR_FAILURE; + return 0; + } + + hva = kvm_vcpu_gfn_to_hva_prot(vcpu, cp->a1 >> PAGE_SHIFT, NULL); + if (kvm_is_error_hva(hva)) { + retdata->err_val = SBI_ERR_INVALID_ADDRESS; + return 0; + } + + kvm_for_each_vcpu(i, tmp, vcpu->kvm) { + if (tmp == vcpu) + continue; + if (!kvm_riscv_vcpu_stopped(tmp)) { + retdata->err_val = SBI_ERR_DENIED; + return 0; + } + } + + kvm_riscv_vcpu_sbi_request_reset(vcpu, cp->a1, cp->a2); + + /* userspace provides the suspend implementation */ + kvm_riscv_vcpu_sbi_forward(vcpu, run); + retdata->uexit = true; + break; + default: + retdata->err_val = SBI_ERR_NOT_SUPPORTED; + break; + } + + return 0; +} + +const struct kvm_vcpu_sbi_extension vcpu_sbi_ext_susp = { + .extid_start = SBI_EXT_SUSP, + .extid_end = SBI_EXT_SUSP, + .default_disabled = true, + .handler = kvm_sbi_ext_susp_handler, +}; diff --git a/arch/riscv/kvm/vcpu_switch.S b/arch/riscv/kvm/vcpu_switch.S index 0c26189aa01c..47686bcb21e0 100644 --- a/arch/riscv/kvm/vcpu_switch.S +++ b/arch/riscv/kvm/vcpu_switch.S @@ -11,11 +11,7 @@ #include <asm/asm-offsets.h> #include <asm/csr.h> - .text - .altmacro - .option norelax - -SYM_FUNC_START(__kvm_riscv_switch_to) +.macro SAVE_HOST_GPRS /* Save Host GPRs (except A0 and T0-T6) */ REG_S ra, (KVM_ARCH_HOST_RA)(a0) REG_S sp, (KVM_ARCH_HOST_SP)(a0) @@ -40,39 +36,33 @@ SYM_FUNC_START(__kvm_riscv_switch_to) REG_S s9, (KVM_ARCH_HOST_S9)(a0) REG_S s10, (KVM_ARCH_HOST_S10)(a0) REG_S s11, (KVM_ARCH_HOST_S11)(a0) +.endm +.macro SAVE_HOST_AND_RESTORE_GUEST_CSRS __resume_addr /* Load Guest CSR values */ REG_L t0, (KVM_ARCH_GUEST_SSTATUS)(a0) - REG_L t1, (KVM_ARCH_GUEST_HSTATUS)(a0) - REG_L t2, (KVM_ARCH_GUEST_SCOUNTEREN)(a0) - la t4, .Lkvm_switch_return - REG_L t5, (KVM_ARCH_GUEST_SEPC)(a0) + la t1, \__resume_addr + REG_L t2, (KVM_ARCH_GUEST_SEPC)(a0) /* Save Host and Restore Guest SSTATUS */ csrrw t0, CSR_SSTATUS, t0 - /* Save Host and Restore Guest HSTATUS */ - csrrw t1, CSR_HSTATUS, t1 - - /* Save Host and Restore Guest SCOUNTEREN */ - csrrw t2, CSR_SCOUNTEREN, t2 - /* Save Host STVEC and change it to return path */ - csrrw t4, CSR_STVEC, t4 + csrrw t1, CSR_STVEC, t1 + + /* Restore Guest SEPC */ + csrw CSR_SEPC, t2 /* Save Host SSCRATCH and change it to struct kvm_vcpu_arch pointer */ csrrw t3, CSR_SSCRATCH, a0 - /* Restore Guest SEPC */ - csrw CSR_SEPC, t5 - /* Store Host CSR values */ REG_S t0, (KVM_ARCH_HOST_SSTATUS)(a0) - REG_S t1, (KVM_ARCH_HOST_HSTATUS)(a0) - REG_S t2, (KVM_ARCH_HOST_SCOUNTEREN)(a0) + REG_S t1, (KVM_ARCH_HOST_STVEC)(a0) REG_S t3, (KVM_ARCH_HOST_SSCRATCH)(a0) - REG_S t4, (KVM_ARCH_HOST_STVEC)(a0) +.endm +.macro RESTORE_GUEST_GPRS /* Restore Guest GPRs (except A0) */ REG_L ra, (KVM_ARCH_GUEST_RA)(a0) REG_L sp, (KVM_ARCH_GUEST_SP)(a0) @@ -107,13 +97,9 @@ SYM_FUNC_START(__kvm_riscv_switch_to) /* Restore Guest A0 */ REG_L a0, (KVM_ARCH_GUEST_A0)(a0) +.endm - /* Resume Guest */ - sret - - /* Back to Host */ - .align 2 -.Lkvm_switch_return: +.macro SAVE_GUEST_GPRS /* Swap Guest A0 with SSCRATCH */ csrrw a0, CSR_SSCRATCH, a0 @@ -148,39 +134,33 @@ SYM_FUNC_START(__kvm_riscv_switch_to) REG_S t4, (KVM_ARCH_GUEST_T4)(a0) REG_S t5, (KVM_ARCH_GUEST_T5)(a0) REG_S t6, (KVM_ARCH_GUEST_T6)(a0) +.endm +.macro SAVE_GUEST_AND_RESTORE_HOST_CSRS /* Load Host CSR values */ - REG_L t1, (KVM_ARCH_HOST_STVEC)(a0) - REG_L t2, (KVM_ARCH_HOST_SSCRATCH)(a0) - REG_L t3, (KVM_ARCH_HOST_SCOUNTEREN)(a0) - REG_L t4, (KVM_ARCH_HOST_HSTATUS)(a0) - REG_L t5, (KVM_ARCH_HOST_SSTATUS)(a0) - - /* Save Guest SEPC */ - csrr t0, CSR_SEPC + REG_L t0, (KVM_ARCH_HOST_STVEC)(a0) + REG_L t1, (KVM_ARCH_HOST_SSCRATCH)(a0) + REG_L t2, (KVM_ARCH_HOST_SSTATUS)(a0) /* Save Guest A0 and Restore Host SSCRATCH */ - csrrw t2, CSR_SSCRATCH, t2 + csrrw t1, CSR_SSCRATCH, t1 - /* Restore Host STVEC */ - csrw CSR_STVEC, t1 - - /* Save Guest and Restore Host SCOUNTEREN */ - csrrw t3, CSR_SCOUNTEREN, t3 + /* Save Guest SEPC */ + csrr t3, CSR_SEPC - /* Save Guest and Restore Host HSTATUS */ - csrrw t4, CSR_HSTATUS, t4 + /* Restore Host STVEC */ + csrw CSR_STVEC, t0 /* Save Guest and Restore Host SSTATUS */ - csrrw t5, CSR_SSTATUS, t5 + csrrw t2, CSR_SSTATUS, t2 /* Store Guest CSR values */ - REG_S t0, (KVM_ARCH_GUEST_SEPC)(a0) - REG_S t2, (KVM_ARCH_GUEST_A0)(a0) - REG_S t3, (KVM_ARCH_GUEST_SCOUNTEREN)(a0) - REG_S t4, (KVM_ARCH_GUEST_HSTATUS)(a0) - REG_S t5, (KVM_ARCH_GUEST_SSTATUS)(a0) + REG_S t1, (KVM_ARCH_GUEST_A0)(a0) + REG_S t2, (KVM_ARCH_GUEST_SSTATUS)(a0) + REG_S t3, (KVM_ARCH_GUEST_SEPC)(a0) +.endm +.macro RESTORE_HOST_GPRS /* Restore Host GPRs (except A0 and T0-T6) */ REG_L ra, (KVM_ARCH_HOST_RA)(a0) REG_L sp, (KVM_ARCH_HOST_SP)(a0) @@ -205,11 +185,68 @@ SYM_FUNC_START(__kvm_riscv_switch_to) REG_L s9, (KVM_ARCH_HOST_S9)(a0) REG_L s10, (KVM_ARCH_HOST_S10)(a0) REG_L s11, (KVM_ARCH_HOST_S11)(a0) +.endm + + .text + .altmacro + .option norelax + + /* + * Parameters: + * A0 <= Pointer to struct kvm_vcpu_arch + */ +SYM_FUNC_START(__kvm_riscv_switch_to) + SAVE_HOST_GPRS + + SAVE_HOST_AND_RESTORE_GUEST_CSRS .Lkvm_switch_return + + RESTORE_GUEST_GPRS + + /* Resume Guest using SRET */ + sret + + /* Back to Host */ + .align 2 +.Lkvm_switch_return: + SAVE_GUEST_GPRS + + SAVE_GUEST_AND_RESTORE_HOST_CSRS + + RESTORE_HOST_GPRS /* Return to C code */ ret SYM_FUNC_END(__kvm_riscv_switch_to) + /* + * Parameters: + * A0 <= Pointer to struct kvm_vcpu_arch + * A1 <= SBI extension ID + * A2 <= SBI function ID + */ +SYM_FUNC_START(__kvm_riscv_nacl_switch_to) + SAVE_HOST_GPRS + + SAVE_HOST_AND_RESTORE_GUEST_CSRS .Lkvm_nacl_switch_return + + /* Resume Guest using SBI nested acceleration */ + add a6, a2, zero + add a7, a1, zero + ecall + + /* Back to Host */ + .align 2 +.Lkvm_nacl_switch_return: + SAVE_GUEST_GPRS + + SAVE_GUEST_AND_RESTORE_HOST_CSRS + + RESTORE_HOST_GPRS + + /* Return to C code */ + ret +SYM_FUNC_END(__kvm_riscv_nacl_switch_to) + SYM_CODE_START(__kvm_riscv_unpriv_trap) /* * We assume that faulting unpriv load/store instruction is diff --git a/arch/riscv/kvm/vcpu_timer.c b/arch/riscv/kvm/vcpu_timer.c index 75486b25ac45..ff672fa71fcc 100644 --- a/arch/riscv/kvm/vcpu_timer.c +++ b/arch/riscv/kvm/vcpu_timer.c @@ -11,8 +11,8 @@ #include <linux/kvm_host.h> #include <linux/uaccess.h> #include <clocksource/timer-riscv.h> -#include <asm/csr.h> #include <asm/delay.h> +#include <asm/kvm_nacl.h> #include <asm/kvm_vcpu_timer.h> static u64 kvm_riscv_current_cycles(struct kvm_guest_timer *gt) @@ -72,12 +72,12 @@ static int kvm_riscv_vcpu_timer_cancel(struct kvm_vcpu_timer *t) static int kvm_riscv_vcpu_update_vstimecmp(struct kvm_vcpu *vcpu, u64 ncycles) { #if defined(CONFIG_32BIT) - csr_write(CSR_VSTIMECMP, ncycles & 0xFFFFFFFF); - csr_write(CSR_VSTIMECMPH, ncycles >> 32); + ncsr_write(CSR_VSTIMECMP, ncycles & 0xFFFFFFFF); + ncsr_write(CSR_VSTIMECMPH, ncycles >> 32); #else - csr_write(CSR_VSTIMECMP, ncycles); + ncsr_write(CSR_VSTIMECMP, ncycles); #endif - return 0; + return 0; } static int kvm_riscv_vcpu_update_hrtimer(struct kvm_vcpu *vcpu, u64 ncycles) @@ -248,18 +248,19 @@ int kvm_riscv_vcpu_timer_init(struct kvm_vcpu *vcpu) if (t->init_done) return -EINVAL; - hrtimer_init(&t->hrt, CLOCK_MONOTONIC, HRTIMER_MODE_REL); t->init_done = true; t->next_set = false; /* Enable sstc for every vcpu if available in hardware */ if (riscv_isa_extension_available(NULL, SSTC)) { t->sstc_enabled = true; - t->hrt.function = kvm_riscv_vcpu_vstimer_expired; + hrtimer_setup(&t->hrt, kvm_riscv_vcpu_vstimer_expired, CLOCK_MONOTONIC, + HRTIMER_MODE_REL); t->timer_next_event = kvm_riscv_vcpu_update_vstimecmp; } else { t->sstc_enabled = false; - t->hrt.function = kvm_riscv_vcpu_hrtimer_expired; + hrtimer_setup(&t->hrt, kvm_riscv_vcpu_hrtimer_expired, CLOCK_MONOTONIC, + HRTIMER_MODE_REL); t->timer_next_event = kvm_riscv_vcpu_update_hrtimer; } @@ -289,10 +290,10 @@ static void kvm_riscv_vcpu_update_timedelta(struct kvm_vcpu *vcpu) struct kvm_guest_timer *gt = &vcpu->kvm->arch.timer; #if defined(CONFIG_32BIT) - csr_write(CSR_HTIMEDELTA, (u32)(gt->time_delta)); - csr_write(CSR_HTIMEDELTAH, (u32)(gt->time_delta >> 32)); + ncsr_write(CSR_HTIMEDELTA, (u32)(gt->time_delta)); + ncsr_write(CSR_HTIMEDELTAH, (u32)(gt->time_delta >> 32)); #else - csr_write(CSR_HTIMEDELTA, gt->time_delta); + ncsr_write(CSR_HTIMEDELTA, gt->time_delta); #endif } @@ -306,10 +307,10 @@ void kvm_riscv_vcpu_timer_restore(struct kvm_vcpu *vcpu) return; #if defined(CONFIG_32BIT) - csr_write(CSR_VSTIMECMP, (u32)t->next_cycles); - csr_write(CSR_VSTIMECMPH, (u32)(t->next_cycles >> 32)); + ncsr_write(CSR_VSTIMECMP, (u32)t->next_cycles); + ncsr_write(CSR_VSTIMECMPH, (u32)(t->next_cycles >> 32)); #else - csr_write(CSR_VSTIMECMP, t->next_cycles); + ncsr_write(CSR_VSTIMECMP, t->next_cycles); #endif /* timer should be enabled for the remaining operations */ @@ -327,10 +328,10 @@ void kvm_riscv_vcpu_timer_sync(struct kvm_vcpu *vcpu) return; #if defined(CONFIG_32BIT) - t->next_cycles = csr_read(CSR_VSTIMECMP); - t->next_cycles |= (u64)csr_read(CSR_VSTIMECMPH) << 32; + t->next_cycles = ncsr_read(CSR_VSTIMECMP); + t->next_cycles |= (u64)ncsr_read(CSR_VSTIMECMPH) << 32; #else - t->next_cycles = csr_read(CSR_VSTIMECMP); + t->next_cycles = ncsr_read(CSR_VSTIMECMP); #endif } diff --git a/arch/riscv/kvm/vcpu_vector.c b/arch/riscv/kvm/vcpu_vector.c index d92d1348045c..a5f88cb717f3 100644 --- a/arch/riscv/kvm/vcpu_vector.c +++ b/arch/riscv/kvm/vcpu_vector.c @@ -22,6 +22,9 @@ void kvm_riscv_vcpu_vector_reset(struct kvm_vcpu *vcpu) struct kvm_cpu_context *cntx = &vcpu->arch.guest_context; cntx->sstatus &= ~SR_VS; + + cntx->vector.vlenb = riscv_v_vsize / 32; + if (riscv_isa_extension_available(isa, v)) { cntx->sstatus |= SR_VS_INITIAL; WARN_ON(!cntx->vector.datap); @@ -70,13 +73,11 @@ void kvm_riscv_vcpu_host_vector_restore(struct kvm_cpu_context *cntx) __kvm_riscv_vector_restore(cntx); } -int kvm_riscv_vcpu_alloc_vector_context(struct kvm_vcpu *vcpu, - struct kvm_cpu_context *cntx) +int kvm_riscv_vcpu_alloc_vector_context(struct kvm_vcpu *vcpu) { - cntx->vector.datap = kmalloc(riscv_v_vsize, GFP_KERNEL); - if (!cntx->vector.datap) + vcpu->arch.guest_context.vector.datap = kzalloc(riscv_v_vsize, GFP_KERNEL); + if (!vcpu->arch.guest_context.vector.datap) return -ENOMEM; - cntx->vector.vlenb = riscv_v_vsize / 32; vcpu->arch.host_context.vector.datap = kzalloc(riscv_v_vsize, GFP_KERNEL); if (!vcpu->arch.host_context.vector.datap) @@ -87,7 +88,7 @@ int kvm_riscv_vcpu_alloc_vector_context(struct kvm_vcpu *vcpu, void kvm_riscv_vcpu_free_vector_context(struct kvm_vcpu *vcpu) { - kfree(vcpu->arch.guest_reset_context.vector.datap); + kfree(vcpu->arch.guest_context.vector.datap); kfree(vcpu->arch.host_context.vector.datap); } #endif diff --git a/arch/riscv/kvm/vm.c b/arch/riscv/kvm/vm.c index ce58bc48e5b8..b27ec8f96697 100644 --- a/arch/riscv/kvm/vm.c +++ b/arch/riscv/kvm/vm.c @@ -186,6 +186,7 @@ int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext) case KVM_CAP_READONLY_MEM: case KVM_CAP_MP_STATE: case KVM_CAP_IMMEDIATE_EXIT: + case KVM_CAP_SET_GUEST_DEBUG: r = 1; break; case KVM_CAP_NR_VCPUS: @@ -208,6 +209,19 @@ int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext) return r; } +int kvm_vm_ioctl_enable_cap(struct kvm *kvm, struct kvm_enable_cap *cap) +{ + switch (cap->cap) { + case KVM_CAP_RISCV_MP_STATE_RESET: + if (cap->flags) + return -EINVAL; + kvm->arch.mp_state_reset = true; + return 0; + default: + return -EINVAL; + } +} + int kvm_arch_vm_ioctl(struct file *filp, unsigned int ioctl, unsigned long arg) { return -EINVAL; |