aboutsummaryrefslogtreecommitdiffstatshomepage
path: root/arch/riscv/kvm
diff options
context:
space:
mode:
Diffstat (limited to 'arch/riscv/kvm')
-rw-r--r--arch/riscv/kvm/Kconfig3
-rw-r--r--arch/riscv/kvm/Makefile30
-rw-r--r--arch/riscv/kvm/aia.c154
-rw-r--r--arch/riscv/kvm/aia_aplic.c40
-rw-r--r--arch/riscv/kvm/aia_device.c47
-rw-r--r--arch/riscv/kvm/aia_imsic.c11
-rw-r--r--arch/riscv/kvm/main.c87
-rw-r--r--arch/riscv/kvm/mmu.c33
-rw-r--r--arch/riscv/kvm/nacl.c152
-rw-r--r--arch/riscv/kvm/tlb.c57
-rw-r--r--arch/riscv/kvm/trace.h67
-rw-r--r--arch/riscv/kvm/vcpu.c345
-rw-r--r--arch/riscv/kvm/vcpu_exit.c43
-rw-r--r--arch/riscv/kvm/vcpu_insn.c15
-rw-r--r--arch/riscv/kvm/vcpu_onereg.c55
-rw-r--r--arch/riscv/kvm/vcpu_pmu.c253
-rw-r--r--arch/riscv/kvm/vcpu_sbi.c54
-rw-r--r--arch/riscv/kvm/vcpu_sbi_hsm.c58
-rw-r--r--arch/riscv/kvm/vcpu_sbi_pmu.c17
-rw-r--r--arch/riscv/kvm/vcpu_sbi_replace.c23
-rw-r--r--arch/riscv/kvm/vcpu_sbi_sta.c4
-rw-r--r--arch/riscv/kvm/vcpu_sbi_system.c66
-rw-r--r--arch/riscv/kvm/vcpu_switch.S137
-rw-r--r--arch/riscv/kvm/vcpu_timer.c35
-rw-r--r--arch/riscv/kvm/vcpu_vector.c13
-rw-r--r--arch/riscv/kvm/vm.c14
26 files changed, 1401 insertions, 412 deletions
diff --git a/arch/riscv/kvm/Kconfig b/arch/riscv/kvm/Kconfig
index 26d1727f0550..704c2899197e 100644
--- a/arch/riscv/kvm/Kconfig
+++ b/arch/riscv/kvm/Kconfig
@@ -18,7 +18,7 @@ menuconfig VIRTUALIZATION
if VIRTUALIZATION
config KVM
- tristate "Kernel-based Virtual Machine (KVM) support (EXPERIMENTAL)"
+ tristate "Kernel-based Virtual Machine (KVM) support"
depends on RISCV_SBI && MMU
select HAVE_KVM_IRQCHIP
select HAVE_KVM_IRQ_ROUTING
@@ -32,6 +32,7 @@ config KVM
select KVM_XFER_TO_GUEST_WORK
select KVM_GENERIC_MMU_NOTIFIER
select SCHED_INFO
+ select GUEST_PERF_EVENTS if PERF_EVENTS
help
Support hosting virtualized guest machines.
diff --git a/arch/riscv/kvm/Makefile b/arch/riscv/kvm/Makefile
index c9646521f113..4e0bba91d284 100644
--- a/arch/riscv/kvm/Makefile
+++ b/arch/riscv/kvm/Makefile
@@ -3,33 +3,37 @@
# Makefile for RISC-V KVM support
#
-ccflags-y += -I $(srctree)/$(src)
+ccflags-y += -I $(src)
include $(srctree)/virt/kvm/Makefile.kvm
obj-$(CONFIG_KVM) += kvm.o
+# Ordered alphabetically
+kvm-y += aia.o
+kvm-y += aia_aplic.o
+kvm-y += aia_device.o
+kvm-y += aia_imsic.o
kvm-y += main.o
-kvm-y += vm.o
-kvm-y += vmid.o
-kvm-y += tlb.o
kvm-y += mmu.o
+kvm-y += nacl.o
+kvm-y += tlb.o
kvm-y += vcpu.o
kvm-y += vcpu_exit.o
kvm-y += vcpu_fp.o
-kvm-y += vcpu_vector.o
kvm-y += vcpu_insn.o
kvm-y += vcpu_onereg.o
-kvm-y += vcpu_switch.o
+kvm-$(CONFIG_RISCV_PMU_SBI) += vcpu_pmu.o
kvm-y += vcpu_sbi.o
-kvm-$(CONFIG_RISCV_SBI_V01) += vcpu_sbi_v01.o
kvm-y += vcpu_sbi_base.o
-kvm-y += vcpu_sbi_replace.o
kvm-y += vcpu_sbi_hsm.o
+kvm-$(CONFIG_RISCV_PMU_SBI) += vcpu_sbi_pmu.o
+kvm-y += vcpu_sbi_replace.o
kvm-y += vcpu_sbi_sta.o
+kvm-y += vcpu_sbi_system.o
+kvm-$(CONFIG_RISCV_SBI_V01) += vcpu_sbi_v01.o
+kvm-y += vcpu_switch.o
kvm-y += vcpu_timer.o
-kvm-$(CONFIG_RISCV_PMU_SBI) += vcpu_pmu.o vcpu_sbi_pmu.o
-kvm-y += aia.o
-kvm-y += aia_device.o
-kvm-y += aia_aplic.o
-kvm-y += aia_imsic.o
+kvm-y += vcpu_vector.o
+kvm-y += vm.o
+kvm-y += vmid.o
diff --git a/arch/riscv/kvm/aia.c b/arch/riscv/kvm/aia.c
index a944294f6f23..19afd1f23537 100644
--- a/arch/riscv/kvm/aia.c
+++ b/arch/riscv/kvm/aia.c
@@ -10,12 +10,13 @@
#include <linux/kernel.h>
#include <linux/bitops.h>
#include <linux/irq.h>
+#include <linux/irqchip/riscv-imsic.h>
#include <linux/irqdomain.h>
#include <linux/kvm_host.h>
#include <linux/percpu.h>
#include <linux/spinlock.h>
#include <asm/cpufeature.h>
-#include <asm/kvm_aia_imsic.h>
+#include <asm/kvm_nacl.h>
struct aia_hgei_control {
raw_spinlock_t lock;
@@ -51,7 +52,7 @@ static int aia_find_hgei(struct kvm_vcpu *owner)
return hgei;
}
-static void aia_set_hvictl(bool ext_irq_pending)
+static inline unsigned long aia_hvictl_value(bool ext_irq_pending)
{
unsigned long hvictl;
@@ -62,7 +63,7 @@ static void aia_set_hvictl(bool ext_irq_pending)
hvictl = (IRQ_S_EXT << HVICTL_IID_SHIFT) & HVICTL_IID;
hvictl |= ext_irq_pending;
- csr_write(CSR_HVICTL, hvictl);
+ return hvictl;
}
#ifdef CONFIG_32BIT
@@ -88,7 +89,7 @@ void kvm_riscv_vcpu_aia_sync_interrupts(struct kvm_vcpu *vcpu)
struct kvm_vcpu_aia_csr *csr = &vcpu->arch.aia_context.guest_csr;
if (kvm_riscv_aia_available())
- csr->vsieh = csr_read(CSR_VSIEH);
+ csr->vsieh = ncsr_read(CSR_VSIEH);
}
#endif
@@ -115,7 +116,7 @@ bool kvm_riscv_vcpu_aia_has_interrupts(struct kvm_vcpu *vcpu, u64 mask)
hgei = aia_find_hgei(vcpu);
if (hgei > 0)
- return !!(csr_read(CSR_HGEIP) & BIT(hgei));
+ return !!(ncsr_read(CSR_HGEIP) & BIT(hgei));
return false;
}
@@ -128,45 +129,73 @@ void kvm_riscv_vcpu_aia_update_hvip(struct kvm_vcpu *vcpu)
return;
#ifdef CONFIG_32BIT
- csr_write(CSR_HVIPH, vcpu->arch.aia_context.guest_csr.hviph);
+ ncsr_write(CSR_HVIPH, vcpu->arch.aia_context.guest_csr.hviph);
#endif
- aia_set_hvictl(!!(csr->hvip & BIT(IRQ_VS_EXT)));
+ ncsr_write(CSR_HVICTL, aia_hvictl_value(!!(csr->hvip & BIT(IRQ_VS_EXT))));
}
void kvm_riscv_vcpu_aia_load(struct kvm_vcpu *vcpu, int cpu)
{
struct kvm_vcpu_aia_csr *csr = &vcpu->arch.aia_context.guest_csr;
+ void *nsh;
if (!kvm_riscv_aia_available())
return;
- csr_write(CSR_VSISELECT, csr->vsiselect);
- csr_write(CSR_HVIPRIO1, csr->hviprio1);
- csr_write(CSR_HVIPRIO2, csr->hviprio2);
+ if (kvm_riscv_nacl_sync_csr_available()) {
+ nsh = nacl_shmem();
+ nacl_csr_write(nsh, CSR_VSISELECT, csr->vsiselect);
+ nacl_csr_write(nsh, CSR_HVIPRIO1, csr->hviprio1);
+ nacl_csr_write(nsh, CSR_HVIPRIO2, csr->hviprio2);
+#ifdef CONFIG_32BIT
+ nacl_csr_write(nsh, CSR_VSIEH, csr->vsieh);
+ nacl_csr_write(nsh, CSR_HVIPH, csr->hviph);
+ nacl_csr_write(nsh, CSR_HVIPRIO1H, csr->hviprio1h);
+ nacl_csr_write(nsh, CSR_HVIPRIO2H, csr->hviprio2h);
+#endif
+ } else {
+ csr_write(CSR_VSISELECT, csr->vsiselect);
+ csr_write(CSR_HVIPRIO1, csr->hviprio1);
+ csr_write(CSR_HVIPRIO2, csr->hviprio2);
#ifdef CONFIG_32BIT
- csr_write(CSR_VSIEH, csr->vsieh);
- csr_write(CSR_HVIPH, csr->hviph);
- csr_write(CSR_HVIPRIO1H, csr->hviprio1h);
- csr_write(CSR_HVIPRIO2H, csr->hviprio2h);
+ csr_write(CSR_VSIEH, csr->vsieh);
+ csr_write(CSR_HVIPH, csr->hviph);
+ csr_write(CSR_HVIPRIO1H, csr->hviprio1h);
+ csr_write(CSR_HVIPRIO2H, csr->hviprio2h);
#endif
+ }
}
void kvm_riscv_vcpu_aia_put(struct kvm_vcpu *vcpu)
{
struct kvm_vcpu_aia_csr *csr = &vcpu->arch.aia_context.guest_csr;
+ void *nsh;
if (!kvm_riscv_aia_available())
return;
- csr->vsiselect = csr_read(CSR_VSISELECT);
- csr->hviprio1 = csr_read(CSR_HVIPRIO1);
- csr->hviprio2 = csr_read(CSR_HVIPRIO2);
+ if (kvm_riscv_nacl_available()) {
+ nsh = nacl_shmem();
+ csr->vsiselect = nacl_csr_read(nsh, CSR_VSISELECT);
+ csr->hviprio1 = nacl_csr_read(nsh, CSR_HVIPRIO1);
+ csr->hviprio2 = nacl_csr_read(nsh, CSR_HVIPRIO2);
#ifdef CONFIG_32BIT
- csr->vsieh = csr_read(CSR_VSIEH);
- csr->hviph = csr_read(CSR_HVIPH);
- csr->hviprio1h = csr_read(CSR_HVIPRIO1H);
- csr->hviprio2h = csr_read(CSR_HVIPRIO2H);
+ csr->vsieh = nacl_csr_read(nsh, CSR_VSIEH);
+ csr->hviph = nacl_csr_read(nsh, CSR_HVIPH);
+ csr->hviprio1h = nacl_csr_read(nsh, CSR_HVIPRIO1H);
+ csr->hviprio2h = nacl_csr_read(nsh, CSR_HVIPRIO2H);
+#endif
+ } else {
+ csr->vsiselect = csr_read(CSR_VSISELECT);
+ csr->hviprio1 = csr_read(CSR_HVIPRIO1);
+ csr->hviprio2 = csr_read(CSR_HVIPRIO2);
+#ifdef CONFIG_32BIT
+ csr->vsieh = csr_read(CSR_VSIEH);
+ csr->hviph = csr_read(CSR_HVIPH);
+ csr->hviprio1h = csr_read(CSR_HVIPRIO1H);
+ csr->hviprio2h = csr_read(CSR_HVIPRIO2H);
#endif
+ }
}
int kvm_riscv_vcpu_aia_get_csr(struct kvm_vcpu *vcpu,
@@ -250,20 +279,20 @@ static u8 aia_get_iprio8(struct kvm_vcpu *vcpu, unsigned int irq)
switch (bitpos / BITS_PER_LONG) {
case 0:
- hviprio = csr_read(CSR_HVIPRIO1);
+ hviprio = ncsr_read(CSR_HVIPRIO1);
break;
case 1:
#ifndef CONFIG_32BIT
- hviprio = csr_read(CSR_HVIPRIO2);
+ hviprio = ncsr_read(CSR_HVIPRIO2);
break;
#else
- hviprio = csr_read(CSR_HVIPRIO1H);
+ hviprio = ncsr_read(CSR_HVIPRIO1H);
break;
case 2:
- hviprio = csr_read(CSR_HVIPRIO2);
+ hviprio = ncsr_read(CSR_HVIPRIO2);
break;
case 3:
- hviprio = csr_read(CSR_HVIPRIO2H);
+ hviprio = ncsr_read(CSR_HVIPRIO2H);
break;
#endif
default:
@@ -283,20 +312,20 @@ static void aia_set_iprio8(struct kvm_vcpu *vcpu, unsigned int irq, u8 prio)
switch (bitpos / BITS_PER_LONG) {
case 0:
- hviprio = csr_read(CSR_HVIPRIO1);
+ hviprio = ncsr_read(CSR_HVIPRIO1);
break;
case 1:
#ifndef CONFIG_32BIT
- hviprio = csr_read(CSR_HVIPRIO2);
+ hviprio = ncsr_read(CSR_HVIPRIO2);
break;
#else
- hviprio = csr_read(CSR_HVIPRIO1H);
+ hviprio = ncsr_read(CSR_HVIPRIO1H);
break;
case 2:
- hviprio = csr_read(CSR_HVIPRIO2);
+ hviprio = ncsr_read(CSR_HVIPRIO2);
break;
case 3:
- hviprio = csr_read(CSR_HVIPRIO2H);
+ hviprio = ncsr_read(CSR_HVIPRIO2H);
break;
#endif
default:
@@ -308,20 +337,20 @@ static void aia_set_iprio8(struct kvm_vcpu *vcpu, unsigned int irq, u8 prio)
switch (bitpos / BITS_PER_LONG) {
case 0:
- csr_write(CSR_HVIPRIO1, hviprio);
+ ncsr_write(CSR_HVIPRIO1, hviprio);
break;
case 1:
#ifndef CONFIG_32BIT
- csr_write(CSR_HVIPRIO2, hviprio);
+ ncsr_write(CSR_HVIPRIO2, hviprio);
break;
#else
- csr_write(CSR_HVIPRIO1H, hviprio);
+ ncsr_write(CSR_HVIPRIO1H, hviprio);
break;
case 2:
- csr_write(CSR_HVIPRIO2, hviprio);
+ ncsr_write(CSR_HVIPRIO2, hviprio);
break;
case 3:
- csr_write(CSR_HVIPRIO2H, hviprio);
+ ncsr_write(CSR_HVIPRIO2H, hviprio);
break;
#endif
default:
@@ -377,7 +406,7 @@ int kvm_riscv_vcpu_aia_rmw_ireg(struct kvm_vcpu *vcpu, unsigned int csr_num,
return KVM_INSN_ILLEGAL_TRAP;
/* First try to emulate in kernel space */
- isel = csr_read(CSR_VSISELECT) & ISELECT_MASK;
+ isel = ncsr_read(CSR_VSISELECT) & ISELECT_MASK;
if (isel >= ISELECT_IPRIO0 && isel <= ISELECT_IPRIO15)
return aia_rmw_iprio(vcpu, isel, val, new_val, wr_mask);
else if (isel >= IMSIC_FIRST && isel <= IMSIC_LAST &&
@@ -394,6 +423,8 @@ int kvm_riscv_aia_alloc_hgei(int cpu, struct kvm_vcpu *owner,
{
int ret = -ENOENT;
unsigned long flags;
+ const struct imsic_global_config *gc;
+ const struct imsic_local_config *lc;
struct aia_hgei_control *hgctrl = per_cpu_ptr(&aia_hgei, cpu);
if (!kvm_riscv_aia_available() || !hgctrl)
@@ -409,11 +440,14 @@ int kvm_riscv_aia_alloc_hgei(int cpu, struct kvm_vcpu *owner,
raw_spin_unlock_irqrestore(&hgctrl->lock, flags);
- /* TODO: To be updated later by AIA IMSIC HW guest file support */
- if (hgei_va)
- *hgei_va = NULL;
- if (hgei_pa)
- *hgei_pa = 0;
+ gc = imsic_get_global_config();
+ lc = (gc) ? per_cpu_ptr(gc->local, cpu) : NULL;
+ if (lc && ret > 0) {
+ if (hgei_va)
+ *hgei_va = lc->msi_va + (ret * IMSIC_MMIO_PAGE_SZ);
+ if (hgei_pa)
+ *hgei_pa = lc->msi_pa + (ret * IMSIC_MMIO_PAGE_SZ);
+ }
return ret;
}
@@ -494,6 +528,10 @@ static int aia_hgei_init(void)
hgctrl->free_bitmap = 0;
}
+ /* Skip SGEI interrupt setup for zero guest external interrupts */
+ if (!kvm_riscv_aia_nr_hgei)
+ goto skip_sgei_interrupt;
+
/* Find INTC irq domain */
domain = irq_find_matching_fwnode(riscv_get_intc_hwnode(),
DOMAIN_BUS_ANY);
@@ -517,11 +555,16 @@ static int aia_hgei_init(void)
return rc;
}
+skip_sgei_interrupt:
return 0;
}
static void aia_hgei_exit(void)
{
+ /* Do nothing for zero guest external interrupts */
+ if (!kvm_riscv_aia_nr_hgei)
+ return;
+
/* Free per-CPU SGEI interrupt */
free_percpu_irq(hgei_parent_irq, &aia_hgei);
}
@@ -531,7 +574,7 @@ void kvm_riscv_aia_enable(void)
if (!kvm_riscv_aia_available())
return;
- aia_set_hvictl(false);
+ csr_write(CSR_HVICTL, aia_hvictl_value(false));
csr_write(CSR_HVIPRIO1, 0x0);
csr_write(CSR_HVIPRIO2, 0x0);
#ifdef CONFIG_32BIT
@@ -545,6 +588,9 @@ void kvm_riscv_aia_enable(void)
enable_percpu_irq(hgei_parent_irq,
irq_get_trigger_type(hgei_parent_irq));
csr_set(CSR_HIE, BIT(IRQ_S_GEXT));
+ /* Enable IRQ filtering for overflow interrupt only if sscofpmf is present */
+ if (__riscv_isa_extension_available(NULL, RISCV_ISA_EXT_SSCOFPMF))
+ csr_set(CSR_HVIEN, BIT(IRQ_PMU_OVF));
}
void kvm_riscv_aia_disable(void)
@@ -558,11 +604,13 @@ void kvm_riscv_aia_disable(void)
return;
hgctrl = get_cpu_ptr(&aia_hgei);
+ if (__riscv_isa_extension_available(NULL, RISCV_ISA_EXT_SSCOFPMF))
+ csr_clear(CSR_HVIEN, BIT(IRQ_PMU_OVF));
/* Disable per-CPU SGEI interrupt */
csr_clear(CSR_HIE, BIT(IRQ_S_GEXT));
disable_percpu_irq(hgei_parent_irq);
- aia_set_hvictl(false);
+ csr_write(CSR_HVICTL, aia_hvictl_value(false));
raw_spin_lock_irqsave(&hgctrl->lock, flags);
@@ -600,9 +648,11 @@ void kvm_riscv_aia_disable(void)
int kvm_riscv_aia_init(void)
{
int rc;
+ const struct imsic_global_config *gc;
if (!riscv_isa_extension_available(NULL, SxAIA))
return -ENODEV;
+ gc = imsic_get_global_config();
/* Figure-out number of bits in HGEIE */
csr_write(CSR_HGEIE, -1UL);
@@ -614,17 +664,17 @@ int kvm_riscv_aia_init(void)
/*
* Number of usable HGEI lines should be minimum of per-HART
* IMSIC guest files and number of bits in HGEIE
- *
- * TODO: To be updated later by AIA IMSIC HW guest file support
*/
- kvm_riscv_aia_nr_hgei = 0;
+ if (gc)
+ kvm_riscv_aia_nr_hgei = min((ulong)kvm_riscv_aia_nr_hgei,
+ BIT(gc->guest_index_bits) - 1);
+ else
+ kvm_riscv_aia_nr_hgei = 0;
- /*
- * Find number of guest MSI IDs
- *
- * TODO: To be updated later by AIA IMSIC HW guest file support
- */
+ /* Find number of guest MSI IDs */
kvm_riscv_aia_max_ids = IMSIC_MAX_ID;
+ if (gc && kvm_riscv_aia_nr_hgei)
+ kvm_riscv_aia_max_ids = gc->nr_guest_ids + 1;
/* Initialize guest external interrupt line management */
rc = aia_hgei_init();
diff --git a/arch/riscv/kvm/aia_aplic.c b/arch/riscv/kvm/aia_aplic.c
index 39e72aa016a4..f59d1c0c8c43 100644
--- a/arch/riscv/kvm/aia_aplic.c
+++ b/arch/riscv/kvm/aia_aplic.c
@@ -7,12 +7,12 @@
* Anup Patel <apatel@ventanamicro.com>
*/
+#include <linux/irqchip/riscv-aplic.h>
#include <linux/kvm_host.h>
#include <linux/math.h>
#include <linux/spinlock.h>
#include <linux/swab.h>
#include <kvm/iodev.h>
-#include <asm/kvm_aia_aplic.h>
struct aplic_irq {
raw_spinlock_t lock;
@@ -137,11 +137,22 @@ static void aplic_write_pending(struct aplic *aplic, u32 irq, bool pending)
raw_spin_lock_irqsave(&irqd->lock, flags);
sm = irqd->sourcecfg & APLIC_SOURCECFG_SM_MASK;
- if (!pending &&
- ((sm == APLIC_SOURCECFG_SM_LEVEL_HIGH) ||
- (sm == APLIC_SOURCECFG_SM_LEVEL_LOW)))
+ if (sm == APLIC_SOURCECFG_SM_INACTIVE)
goto skip_write_pending;
+ if (sm == APLIC_SOURCECFG_SM_LEVEL_HIGH ||
+ sm == APLIC_SOURCECFG_SM_LEVEL_LOW) {
+ if (!pending)
+ goto noskip_write_pending;
+ if ((irqd->state & APLIC_IRQ_STATE_INPUT) &&
+ sm == APLIC_SOURCECFG_SM_LEVEL_LOW)
+ goto skip_write_pending;
+ if (!(irqd->state & APLIC_IRQ_STATE_INPUT) &&
+ sm == APLIC_SOURCECFG_SM_LEVEL_HIGH)
+ goto skip_write_pending;
+ }
+
+noskip_write_pending:
if (pending)
irqd->state |= APLIC_IRQ_STATE_PENDING;
else
@@ -187,16 +198,31 @@ static void aplic_write_enabled(struct aplic *aplic, u32 irq, bool enabled)
static bool aplic_read_input(struct aplic *aplic, u32 irq)
{
- bool ret;
- unsigned long flags;
+ u32 sourcecfg, sm, raw_input, irq_inverted;
struct aplic_irq *irqd;
+ unsigned long flags;
+ bool ret = false;
if (!irq || aplic->nr_irqs <= irq)
return false;
irqd = &aplic->irqs[irq];
raw_spin_lock_irqsave(&irqd->lock, flags);
- ret = (irqd->state & APLIC_IRQ_STATE_INPUT) ? true : false;
+
+ sourcecfg = irqd->sourcecfg;
+ if (sourcecfg & APLIC_SOURCECFG_D)
+ goto skip;
+
+ sm = sourcecfg & APLIC_SOURCECFG_SM_MASK;
+ if (sm == APLIC_SOURCECFG_SM_INACTIVE)
+ goto skip;
+
+ raw_input = (irqd->state & APLIC_IRQ_STATE_INPUT) ? 1 : 0;
+ irq_inverted = (sm == APLIC_SOURCECFG_SM_LEVEL_LOW ||
+ sm == APLIC_SOURCECFG_SM_EDGE_FALL) ? 1 : 0;
+ ret = !!(raw_input ^ irq_inverted);
+
+skip:
raw_spin_unlock_irqrestore(&irqd->lock, flags);
return ret;
diff --git a/arch/riscv/kvm/aia_device.c b/arch/riscv/kvm/aia_device.c
index 0eb689351b7d..806c41931cde 100644
--- a/arch/riscv/kvm/aia_device.c
+++ b/arch/riscv/kvm/aia_device.c
@@ -8,39 +8,9 @@
*/
#include <linux/bits.h>
+#include <linux/irqchip/riscv-imsic.h>
#include <linux/kvm_host.h>
#include <linux/uaccess.h>
-#include <asm/kvm_aia_imsic.h>
-
-static void unlock_vcpus(struct kvm *kvm, int vcpu_lock_idx)
-{
- struct kvm_vcpu *tmp_vcpu;
-
- for (; vcpu_lock_idx >= 0; vcpu_lock_idx--) {
- tmp_vcpu = kvm_get_vcpu(kvm, vcpu_lock_idx);
- mutex_unlock(&tmp_vcpu->mutex);
- }
-}
-
-static void unlock_all_vcpus(struct kvm *kvm)
-{
- unlock_vcpus(kvm, atomic_read(&kvm->online_vcpus) - 1);
-}
-
-static bool lock_all_vcpus(struct kvm *kvm)
-{
- struct kvm_vcpu *tmp_vcpu;
- unsigned long c;
-
- kvm_for_each_vcpu(c, tmp_vcpu, kvm) {
- if (!mutex_trylock(&tmp_vcpu->mutex)) {
- unlock_vcpus(kvm, c - 1);
- return false;
- }
- }
-
- return true;
-}
static int aia_create(struct kvm_device *dev, u32 type)
{
@@ -53,7 +23,7 @@ static int aia_create(struct kvm_device *dev, u32 type)
return -EEXIST;
ret = -EBUSY;
- if (!lock_all_vcpus(kvm))
+ if (kvm_trylock_all_vcpus(kvm))
return ret;
kvm_for_each_vcpu(i, vcpu, kvm) {
@@ -65,7 +35,7 @@ static int aia_create(struct kvm_device *dev, u32 type)
kvm->arch.aia.in_kernel = true;
out_unlock:
- unlock_all_vcpus(kvm);
+ kvm_unlock_all_vcpus(kvm);
return ret;
}
@@ -237,10 +207,11 @@ static gpa_t aia_imsic_ppn(struct kvm_aia *aia, gpa_t addr)
static u32 aia_imsic_hart_index(struct kvm_aia *aia, gpa_t addr)
{
- u32 hart, group = 0;
+ u32 hart = 0, group = 0;
- hart = (addr >> (aia->nr_guest_bits + IMSIC_MMIO_PAGE_SHIFT)) &
- GENMASK_ULL(aia->nr_hart_bits - 1, 0);
+ if (aia->nr_hart_bits)
+ hart = (addr >> (aia->nr_guest_bits + IMSIC_MMIO_PAGE_SHIFT)) &
+ GENMASK_ULL(aia->nr_hart_bits - 1, 0);
if (aia->nr_group_bits)
group = (addr >> aia->nr_group_shift) &
GENMASK_ULL(aia->nr_group_bits - 1, 0);
@@ -525,12 +496,10 @@ int kvm_riscv_vcpu_aia_update(struct kvm_vcpu *vcpu)
void kvm_riscv_vcpu_aia_reset(struct kvm_vcpu *vcpu)
{
struct kvm_vcpu_aia_csr *csr = &vcpu->arch.aia_context.guest_csr;
- struct kvm_vcpu_aia_csr *reset_csr =
- &vcpu->arch.aia_context.guest_reset_csr;
if (!kvm_riscv_aia_available())
return;
- memcpy(csr, reset_csr, sizeof(*csr));
+ memset(csr, 0, sizeof(*csr));
/* Proceed only if AIA was initialized successfully */
if (!kvm_riscv_aia_initialized(vcpu->kvm))
diff --git a/arch/riscv/kvm/aia_imsic.c b/arch/riscv/kvm/aia_imsic.c
index e808723a85f1..29ef9c2133a9 100644
--- a/arch/riscv/kvm/aia_imsic.c
+++ b/arch/riscv/kvm/aia_imsic.c
@@ -9,13 +9,13 @@
#include <linux/atomic.h>
#include <linux/bitmap.h>
+#include <linux/irqchip/riscv-imsic.h>
#include <linux/kvm_host.h>
#include <linux/math.h>
#include <linux/spinlock.h>
#include <linux/swab.h>
#include <kvm/iodev.h>
#include <asm/csr.h>
-#include <asm/kvm_aia_imsic.h>
#define IMSIC_MAX_EIX (IMSIC_MAX_ID / BITS_PER_TYPE(u64))
@@ -55,7 +55,7 @@ struct imsic {
/* IMSIC SW-file */
struct imsic_mrif *swfile;
phys_addr_t swfile_pa;
- spinlock_t swfile_extirq_lock;
+ raw_spinlock_t swfile_extirq_lock;
};
#define imsic_vs_csr_read(__c) \
@@ -622,7 +622,7 @@ static void imsic_swfile_extirq_update(struct kvm_vcpu *vcpu)
* interruptions between reading topei and updating pending status.
*/
- spin_lock_irqsave(&imsic->swfile_extirq_lock, flags);
+ raw_spin_lock_irqsave(&imsic->swfile_extirq_lock, flags);
if (imsic_mrif_atomic_read(mrif, &mrif->eidelivery) &&
imsic_mrif_topei(mrif, imsic->nr_eix, imsic->nr_msis))
@@ -630,7 +630,7 @@ static void imsic_swfile_extirq_update(struct kvm_vcpu *vcpu)
else
kvm_riscv_vcpu_unset_interrupt(vcpu, IRQ_VS_EXT);
- spin_unlock_irqrestore(&imsic->swfile_extirq_lock, flags);
+ raw_spin_unlock_irqrestore(&imsic->swfile_extirq_lock, flags);
}
static void imsic_swfile_read(struct kvm_vcpu *vcpu, bool clear,
@@ -974,7 +974,6 @@ int kvm_riscv_vcpu_aia_imsic_inject(struct kvm_vcpu *vcpu,
if (imsic->vsfile_cpu >= 0) {
writel(iid, imsic->vsfile_va + IMSIC_MMIO_SETIPNUM_LE);
- kvm_vcpu_kick(vcpu);
} else {
eix = &imsic->swfile->eix[iid / BITS_PER_TYPE(u64)];
set_bit(iid & (BITS_PER_TYPE(u64) - 1), eix->eip);
@@ -1051,7 +1050,7 @@ int kvm_riscv_vcpu_aia_imsic_init(struct kvm_vcpu *vcpu)
}
imsic->swfile = page_to_virt(swfile_page);
imsic->swfile_pa = page_to_phys(swfile_page);
- spin_lock_init(&imsic->swfile_extirq_lock);
+ raw_spin_lock_init(&imsic->swfile_extirq_lock);
/* Setup IO device */
kvm_iodevice_init(&imsic->iodev, &imsic_iodoev_ops);
diff --git a/arch/riscv/kvm/main.c b/arch/riscv/kvm/main.c
index 225a435d9c9a..4b24705dc63a 100644
--- a/arch/riscv/kvm/main.c
+++ b/arch/riscv/kvm/main.c
@@ -10,8 +10,8 @@
#include <linux/err.h>
#include <linux/module.h>
#include <linux/kvm_host.h>
-#include <asm/csr.h>
#include <asm/cpufeature.h>
+#include <asm/kvm_nacl.h>
#include <asm/sbi.h>
long kvm_arch_dev_ioctl(struct file *filp,
@@ -20,24 +20,16 @@ long kvm_arch_dev_ioctl(struct file *filp,
return -EINVAL;
}
-int kvm_arch_hardware_enable(void)
+int kvm_arch_enable_virtualization_cpu(void)
{
- unsigned long hideleg, hedeleg;
-
- hedeleg = 0;
- hedeleg |= (1UL << EXC_INST_MISALIGNED);
- hedeleg |= (1UL << EXC_BREAKPOINT);
- hedeleg |= (1UL << EXC_SYSCALL);
- hedeleg |= (1UL << EXC_INST_PAGE_FAULT);
- hedeleg |= (1UL << EXC_LOAD_PAGE_FAULT);
- hedeleg |= (1UL << EXC_STORE_PAGE_FAULT);
- csr_write(CSR_HEDELEG, hedeleg);
-
- hideleg = 0;
- hideleg |= (1UL << IRQ_VS_SOFT);
- hideleg |= (1UL << IRQ_VS_TIMER);
- hideleg |= (1UL << IRQ_VS_EXT);
- csr_write(CSR_HIDELEG, hideleg);
+ int rc;
+
+ rc = kvm_riscv_nacl_enable();
+ if (rc)
+ return rc;
+
+ csr_write(CSR_HEDELEG, KVM_HEDELEG_DEFAULT);
+ csr_write(CSR_HIDELEG, KVM_HIDELEG_DEFAULT);
/* VS should access only the time counter directly. Everything else should trap */
csr_write(CSR_HCOUNTEREN, 0x02);
@@ -49,7 +41,7 @@ int kvm_arch_hardware_enable(void)
return 0;
}
-void kvm_arch_hardware_disable(void)
+void kvm_arch_disable_virtualization_cpu(void)
{
kvm_riscv_aia_disable();
@@ -63,11 +55,21 @@ void kvm_arch_hardware_disable(void)
csr_write(CSR_HVIP, 0);
csr_write(CSR_HEDELEG, 0);
csr_write(CSR_HIDELEG, 0);
+
+ kvm_riscv_nacl_disable();
+}
+
+static void kvm_riscv_teardown(void)
+{
+ kvm_riscv_aia_exit();
+ kvm_riscv_nacl_exit();
+ kvm_unregister_perf_callbacks();
}
static int __init riscv_kvm_init(void)
{
int rc;
+ char slist[64];
const char *str;
if (!riscv_isa_extension_available(NULL, h)) {
@@ -85,16 +87,53 @@ static int __init riscv_kvm_init(void)
return -ENODEV;
}
+ rc = kvm_riscv_nacl_init();
+ if (rc && rc != -ENODEV)
+ return rc;
+
kvm_riscv_gstage_mode_detect();
kvm_riscv_gstage_vmid_detect();
rc = kvm_riscv_aia_init();
- if (rc && rc != -ENODEV)
+ if (rc && rc != -ENODEV) {
+ kvm_riscv_nacl_exit();
return rc;
+ }
kvm_info("hypervisor extension available\n");
+ if (kvm_riscv_nacl_available()) {
+ rc = 0;
+ slist[0] = '\0';
+ if (kvm_riscv_nacl_sync_csr_available()) {
+ if (rc)
+ strcat(slist, ", ");
+ strcat(slist, "sync_csr");
+ rc++;
+ }
+ if (kvm_riscv_nacl_sync_hfence_available()) {
+ if (rc)
+ strcat(slist, ", ");
+ strcat(slist, "sync_hfence");
+ rc++;
+ }
+ if (kvm_riscv_nacl_sync_sret_available()) {
+ if (rc)
+ strcat(slist, ", ");
+ strcat(slist, "sync_sret");
+ rc++;
+ }
+ if (kvm_riscv_nacl_autoswap_csr_available()) {
+ if (rc)
+ strcat(slist, ", ");
+ strcat(slist, "autoswap_csr");
+ rc++;
+ }
+ kvm_info("using SBI nested acceleration with %s\n",
+ (rc) ? slist : "no features");
+ }
+
switch (kvm_riscv_gstage_mode()) {
case HGATP_MODE_SV32X4:
str = "Sv32x4";
@@ -119,9 +158,11 @@ static int __init riscv_kvm_init(void)
kvm_info("AIA available with %d guest external interrupts\n",
kvm_riscv_aia_nr_hgei);
+ kvm_register_perf_callbacks(NULL);
+
rc = kvm_init(sizeof(struct kvm_vcpu), 0, THIS_MODULE);
if (rc) {
- kvm_riscv_aia_exit();
+ kvm_riscv_teardown();
return rc;
}
@@ -131,8 +172,8 @@ module_init(riscv_kvm_init);
static void __exit riscv_kvm_exit(void)
{
- kvm_riscv_aia_exit();
-
kvm_exit();
+
+ kvm_riscv_teardown();
}
module_exit(riscv_kvm_exit);
diff --git a/arch/riscv/kvm/mmu.c b/arch/riscv/kvm/mmu.c
index a9e2fd7245e1..1087ea74567b 100644
--- a/arch/riscv/kvm/mmu.c
+++ b/arch/riscv/kvm/mmu.c
@@ -15,7 +15,7 @@
#include <linux/vmalloc.h>
#include <linux/kvm_host.h>
#include <linux/sched/signal.h>
-#include <asm/csr.h>
+#include <asm/kvm_nacl.h>
#include <asm/page.h>
#include <asm/pgtable.h>
@@ -550,26 +550,6 @@ bool kvm_unmap_gfn_range(struct kvm *kvm, struct kvm_gfn_range *range)
return false;
}
-bool kvm_set_spte_gfn(struct kvm *kvm, struct kvm_gfn_range *range)
-{
- int ret;
- kvm_pfn_t pfn = pte_pfn(range->arg.pte);
-
- if (!kvm->arch.pgd)
- return false;
-
- WARN_ON(range->end - range->start != 1);
-
- ret = gstage_map_page(kvm, NULL, range->start << PAGE_SHIFT,
- __pfn_to_phys(pfn), PAGE_SIZE, true, true);
- if (ret) {
- kvm_debug("Failed to map G-stage page (error %d)\n", ret);
- return true;
- }
-
- return false;
-}
-
bool kvm_age_gfn(struct kvm *kvm, struct kvm_gfn_range *range)
{
pte_t *ptep;
@@ -621,6 +601,7 @@ int kvm_riscv_gstage_map(struct kvm_vcpu *vcpu,
bool logging = (memslot->dirty_bitmap &&
!(memslot->flags & KVM_MEM_READONLY)) ? true : false;
unsigned long vma_pagesize, mmu_seq;
+ struct page *page;
/* We need minimum second+third level pages */
ret = kvm_mmu_topup_memory_cache(pcache, gstage_pgd_levels);
@@ -651,7 +632,7 @@ int kvm_riscv_gstage_map(struct kvm_vcpu *vcpu,
/*
* Read mmu_invalidate_seq so that KVM can detect if the results of
- * vma_lookup() or gfn_to_pfn_prot() become stale priort to acquiring
+ * vma_lookup() or __kvm_faultin_pfn() become stale prior to acquiring
* kvm->mmu_lock.
*
* Rely on mmap_read_unlock() for an implicit smp_rmb(), which pairs
@@ -667,7 +648,7 @@ int kvm_riscv_gstage_map(struct kvm_vcpu *vcpu,
return -EFAULT;
}
- hfn = gfn_to_pfn_prot(kvm, gfn, is_write, &writable);
+ hfn = kvm_faultin_pfn(vcpu, gfn, is_write, &writable, &page);
if (hfn == KVM_PFN_ERR_HWPOISON) {
send_sig_mceerr(BUS_MCEERR_AR, (void __user *)hva,
vma_pageshift, current);
@@ -689,7 +670,6 @@ int kvm_riscv_gstage_map(struct kvm_vcpu *vcpu,
goto out_unlock;
if (writable) {
- kvm_set_pfn_dirty(hfn);
mark_page_dirty(kvm, gfn);
ret = gstage_map_page(kvm, pcache, gpa, hfn << PAGE_SHIFT,
vma_pagesize, false, true);
@@ -702,9 +682,8 @@ int kvm_riscv_gstage_map(struct kvm_vcpu *vcpu,
kvm_err("Failed to map in G-stage\n");
out_unlock:
+ kvm_release_faultin_page(kvm, page, ret && ret != -EEXIST, writable);
spin_unlock(&kvm->mmu_lock);
- kvm_set_pfn_accessed(hfn);
- kvm_release_pfn_clean(hfn);
return ret;
}
@@ -752,7 +731,7 @@ void kvm_riscv_gstage_update_hgatp(struct kvm_vcpu *vcpu)
hgatp |= (READ_ONCE(k->vmid.vmid) << HGATP_VMID_SHIFT) & HGATP_VMID;
hgatp |= (k->pgd_phys >> PAGE_SHIFT) & HGATP_PPN;
- csr_write(CSR_HGATP, hgatp);
+ ncsr_write(CSR_HGATP, hgatp);
if (!kvm_riscv_gstage_vmid_bits())
kvm_riscv_local_hfence_gvma_all();
diff --git a/arch/riscv/kvm/nacl.c b/arch/riscv/kvm/nacl.c
new file mode 100644
index 000000000000..08a95ad9ada2
--- /dev/null
+++ b/arch/riscv/kvm/nacl.c
@@ -0,0 +1,152 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Copyright (c) 2024 Ventana Micro Systems Inc.
+ */
+
+#include <linux/kvm_host.h>
+#include <linux/vmalloc.h>
+#include <asm/kvm_nacl.h>
+
+DEFINE_STATIC_KEY_FALSE(kvm_riscv_nacl_available);
+DEFINE_STATIC_KEY_FALSE(kvm_riscv_nacl_sync_csr_available);
+DEFINE_STATIC_KEY_FALSE(kvm_riscv_nacl_sync_hfence_available);
+DEFINE_STATIC_KEY_FALSE(kvm_riscv_nacl_sync_sret_available);
+DEFINE_STATIC_KEY_FALSE(kvm_riscv_nacl_autoswap_csr_available);
+DEFINE_PER_CPU(struct kvm_riscv_nacl, kvm_riscv_nacl);
+
+void __kvm_riscv_nacl_hfence(void *shmem,
+ unsigned long control,
+ unsigned long page_num,
+ unsigned long page_count)
+{
+ int i, ent = -1, try_count = 5;
+ unsigned long *entp;
+
+again:
+ for (i = 0; i < SBI_NACL_SHMEM_HFENCE_ENTRY_MAX; i++) {
+ entp = shmem + SBI_NACL_SHMEM_HFENCE_ENTRY_CONFIG(i);
+ if (lelong_to_cpu(*entp) & SBI_NACL_SHMEM_HFENCE_CONFIG_PEND)
+ continue;
+
+ ent = i;
+ break;
+ }
+
+ if (ent < 0) {
+ if (try_count) {
+ nacl_sync_hfence(-1UL);
+ goto again;
+ } else {
+ pr_warn("KVM: No free entry in NACL shared memory\n");
+ return;
+ }
+ }
+
+ entp = shmem + SBI_NACL_SHMEM_HFENCE_ENTRY_CONFIG(i);
+ *entp = cpu_to_lelong(control);
+ entp = shmem + SBI_NACL_SHMEM_HFENCE_ENTRY_PNUM(i);
+ *entp = cpu_to_lelong(page_num);
+ entp = shmem + SBI_NACL_SHMEM_HFENCE_ENTRY_PCOUNT(i);
+ *entp = cpu_to_lelong(page_count);
+}
+
+int kvm_riscv_nacl_enable(void)
+{
+ int rc;
+ struct sbiret ret;
+ struct kvm_riscv_nacl *nacl;
+
+ if (!kvm_riscv_nacl_available())
+ return 0;
+ nacl = this_cpu_ptr(&kvm_riscv_nacl);
+
+ ret = sbi_ecall(SBI_EXT_NACL, SBI_EXT_NACL_SET_SHMEM,
+ nacl->shmem_phys, 0, 0, 0, 0, 0);
+ rc = sbi_err_map_linux_errno(ret.error);
+ if (rc)
+ return rc;
+
+ return 0;
+}
+
+void kvm_riscv_nacl_disable(void)
+{
+ if (!kvm_riscv_nacl_available())
+ return;
+
+ sbi_ecall(SBI_EXT_NACL, SBI_EXT_NACL_SET_SHMEM,
+ SBI_SHMEM_DISABLE, SBI_SHMEM_DISABLE, 0, 0, 0, 0);
+}
+
+void kvm_riscv_nacl_exit(void)
+{
+ int cpu;
+ struct kvm_riscv_nacl *nacl;
+
+ if (!kvm_riscv_nacl_available())
+ return;
+
+ /* Allocate per-CPU shared memory */
+ for_each_possible_cpu(cpu) {
+ nacl = per_cpu_ptr(&kvm_riscv_nacl, cpu);
+ if (!nacl->shmem)
+ continue;
+
+ free_pages((unsigned long)nacl->shmem,
+ get_order(SBI_NACL_SHMEM_SIZE));
+ nacl->shmem = NULL;
+ nacl->shmem_phys = 0;
+ }
+}
+
+static long nacl_probe_feature(long feature_id)
+{
+ struct sbiret ret;
+
+ if (!kvm_riscv_nacl_available())
+ return 0;
+
+ ret = sbi_ecall(SBI_EXT_NACL, SBI_EXT_NACL_PROBE_FEATURE,
+ feature_id, 0, 0, 0, 0, 0);
+ return ret.value;
+}
+
+int kvm_riscv_nacl_init(void)
+{
+ int cpu;
+ struct page *shmem_page;
+ struct kvm_riscv_nacl *nacl;
+
+ if (sbi_spec_version < sbi_mk_version(1, 0) ||
+ sbi_probe_extension(SBI_EXT_NACL) <= 0)
+ return -ENODEV;
+
+ /* Enable NACL support */
+ static_branch_enable(&kvm_riscv_nacl_available);
+
+ /* Probe NACL features */
+ if (nacl_probe_feature(SBI_NACL_FEAT_SYNC_CSR))
+ static_branch_enable(&kvm_riscv_nacl_sync_csr_available);
+ if (nacl_probe_feature(SBI_NACL_FEAT_SYNC_HFENCE))
+ static_branch_enable(&kvm_riscv_nacl_sync_hfence_available);
+ if (nacl_probe_feature(SBI_NACL_FEAT_SYNC_SRET))
+ static_branch_enable(&kvm_riscv_nacl_sync_sret_available);
+ if (nacl_probe_feature(SBI_NACL_FEAT_AUTOSWAP_CSR))
+ static_branch_enable(&kvm_riscv_nacl_autoswap_csr_available);
+
+ /* Allocate per-CPU shared memory */
+ for_each_possible_cpu(cpu) {
+ nacl = per_cpu_ptr(&kvm_riscv_nacl, cpu);
+
+ shmem_page = alloc_pages(GFP_KERNEL | __GFP_ZERO,
+ get_order(SBI_NACL_SHMEM_SIZE));
+ if (!shmem_page) {
+ kvm_riscv_nacl_exit();
+ return -ENOMEM;
+ }
+ nacl->shmem = page_to_virt(shmem_page);
+ nacl->shmem_phys = page_to_phys(shmem_page);
+ }
+
+ return 0;
+}
diff --git a/arch/riscv/kvm/tlb.c b/arch/riscv/kvm/tlb.c
index 23c0e82b5103..2f91ea5f8493 100644
--- a/arch/riscv/kvm/tlb.c
+++ b/arch/riscv/kvm/tlb.c
@@ -14,6 +14,7 @@
#include <asm/csr.h>
#include <asm/cpufeature.h>
#include <asm/insn-def.h>
+#include <asm/kvm_nacl.h>
#define has_svinval() riscv_has_extension_unlikely(RISCV_ISA_EXT_SVINVAL)
@@ -186,18 +187,24 @@ void kvm_riscv_fence_i_process(struct kvm_vcpu *vcpu)
void kvm_riscv_hfence_gvma_vmid_all_process(struct kvm_vcpu *vcpu)
{
- struct kvm_vmid *vmid;
+ struct kvm_vmid *v = &vcpu->kvm->arch.vmid;
+ unsigned long vmid = READ_ONCE(v->vmid);
- vmid = &vcpu->kvm->arch.vmid;
- kvm_riscv_local_hfence_gvma_vmid_all(READ_ONCE(vmid->vmid));
+ if (kvm_riscv_nacl_available())
+ nacl_hfence_gvma_vmid_all(nacl_shmem(), vmid);
+ else
+ kvm_riscv_local_hfence_gvma_vmid_all(vmid);
}
void kvm_riscv_hfence_vvma_all_process(struct kvm_vcpu *vcpu)
{
- struct kvm_vmid *vmid;
+ struct kvm_vmid *v = &vcpu->kvm->arch.vmid;
+ unsigned long vmid = READ_ONCE(v->vmid);
- vmid = &vcpu->kvm->arch.vmid;
- kvm_riscv_local_hfence_vvma_all(READ_ONCE(vmid->vmid));
+ if (kvm_riscv_nacl_available())
+ nacl_hfence_vvma_all(nacl_shmem(), vmid);
+ else
+ kvm_riscv_local_hfence_vvma_all(vmid);
}
static bool vcpu_hfence_dequeue(struct kvm_vcpu *vcpu,
@@ -251,6 +258,7 @@ static bool vcpu_hfence_enqueue(struct kvm_vcpu *vcpu,
void kvm_riscv_hfence_process(struct kvm_vcpu *vcpu)
{
+ unsigned long vmid;
struct kvm_riscv_hfence d = { 0 };
struct kvm_vmid *v = &vcpu->kvm->arch.vmid;
@@ -259,26 +267,41 @@ void kvm_riscv_hfence_process(struct kvm_vcpu *vcpu)
case KVM_RISCV_HFENCE_UNKNOWN:
break;
case KVM_RISCV_HFENCE_GVMA_VMID_GPA:
- kvm_riscv_local_hfence_gvma_vmid_gpa(
- READ_ONCE(v->vmid),
- d.addr, d.size, d.order);
+ vmid = READ_ONCE(v->vmid);
+ if (kvm_riscv_nacl_available())
+ nacl_hfence_gvma_vmid(nacl_shmem(), vmid,
+ d.addr, d.size, d.order);
+ else
+ kvm_riscv_local_hfence_gvma_vmid_gpa(vmid, d.addr,
+ d.size, d.order);
break;
case KVM_RISCV_HFENCE_VVMA_ASID_GVA:
kvm_riscv_vcpu_pmu_incr_fw(vcpu, SBI_PMU_FW_HFENCE_VVMA_ASID_RCVD);
- kvm_riscv_local_hfence_vvma_asid_gva(
- READ_ONCE(v->vmid), d.asid,
- d.addr, d.size, d.order);
+ vmid = READ_ONCE(v->vmid);
+ if (kvm_riscv_nacl_available())
+ nacl_hfence_vvma_asid(nacl_shmem(), vmid, d.asid,
+ d.addr, d.size, d.order);
+ else
+ kvm_riscv_local_hfence_vvma_asid_gva(vmid, d.asid, d.addr,
+ d.size, d.order);
break;
case KVM_RISCV_HFENCE_VVMA_ASID_ALL:
kvm_riscv_vcpu_pmu_incr_fw(vcpu, SBI_PMU_FW_HFENCE_VVMA_ASID_RCVD);
- kvm_riscv_local_hfence_vvma_asid_all(
- READ_ONCE(v->vmid), d.asid);
+ vmid = READ_ONCE(v->vmid);
+ if (kvm_riscv_nacl_available())
+ nacl_hfence_vvma_asid_all(nacl_shmem(), vmid, d.asid);
+ else
+ kvm_riscv_local_hfence_vvma_asid_all(vmid, d.asid);
break;
case KVM_RISCV_HFENCE_VVMA_GVA:
kvm_riscv_vcpu_pmu_incr_fw(vcpu, SBI_PMU_FW_HFENCE_VVMA_RCVD);
- kvm_riscv_local_hfence_vvma_gva(
- READ_ONCE(v->vmid),
- d.addr, d.size, d.order);
+ vmid = READ_ONCE(v->vmid);
+ if (kvm_riscv_nacl_available())
+ nacl_hfence_vvma(nacl_shmem(), vmid,
+ d.addr, d.size, d.order);
+ else
+ kvm_riscv_local_hfence_vvma_gva(vmid, d.addr,
+ d.size, d.order);
break;
default:
break;
diff --git a/arch/riscv/kvm/trace.h b/arch/riscv/kvm/trace.h
new file mode 100644
index 000000000000..3d54175d805c
--- /dev/null
+++ b/arch/riscv/kvm/trace.h
@@ -0,0 +1,67 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Tracepoints for RISC-V KVM
+ *
+ * Copyright 2024 Beijing ESWIN Computing Technology Co., Ltd.
+ *
+ */
+#if !defined(_TRACE_KVM_H) || defined(TRACE_HEADER_MULTI_READ)
+#define _TRACE_KVM_H
+
+#include <linux/tracepoint.h>
+
+#undef TRACE_SYSTEM
+#define TRACE_SYSTEM kvm
+
+TRACE_EVENT(kvm_entry,
+ TP_PROTO(struct kvm_vcpu *vcpu),
+ TP_ARGS(vcpu),
+
+ TP_STRUCT__entry(
+ __field(unsigned long, pc)
+ ),
+
+ TP_fast_assign(
+ __entry->pc = vcpu->arch.guest_context.sepc;
+ ),
+
+ TP_printk("PC: 0x016%lx", __entry->pc)
+);
+
+TRACE_EVENT(kvm_exit,
+ TP_PROTO(struct kvm_cpu_trap *trap),
+ TP_ARGS(trap),
+
+ TP_STRUCT__entry(
+ __field(unsigned long, sepc)
+ __field(unsigned long, scause)
+ __field(unsigned long, stval)
+ __field(unsigned long, htval)
+ __field(unsigned long, htinst)
+ ),
+
+ TP_fast_assign(
+ __entry->sepc = trap->sepc;
+ __entry->scause = trap->scause;
+ __entry->stval = trap->stval;
+ __entry->htval = trap->htval;
+ __entry->htinst = trap->htinst;
+ ),
+
+ TP_printk("SEPC:0x%lx, SCAUSE:0x%lx, STVAL:0x%lx, HTVAL:0x%lx, HTINST:0x%lx",
+ __entry->sepc,
+ __entry->scause,
+ __entry->stval,
+ __entry->htval,
+ __entry->htinst)
+);
+
+#endif /* _TRACE_RSICV_KVM_H */
+
+#undef TRACE_INCLUDE_PATH
+#define TRACE_INCLUDE_PATH .
+#undef TRACE_INCLUDE_FILE
+#define TRACE_INCLUDE_FILE trace
+
+/* This part must be outside protection */
+#include <trace/define_trace.h>
diff --git a/arch/riscv/kvm/vcpu.c b/arch/riscv/kvm/vcpu.c
index b5ca9f2e98ac..e0a01af426ff 100644
--- a/arch/riscv/kvm/vcpu.c
+++ b/arch/riscv/kvm/vcpu.c
@@ -17,20 +17,29 @@
#include <linux/sched/signal.h>
#include <linux/fs.h>
#include <linux/kvm_host.h>
-#include <asm/csr.h>
#include <asm/cacheflush.h>
+#include <asm/kvm_nacl.h>
#include <asm/kvm_vcpu_vector.h>
+#define CREATE_TRACE_POINTS
+#include "trace.h"
+
const struct _kvm_stats_desc kvm_vcpu_stats_desc[] = {
KVM_GENERIC_VCPU_STATS(),
STATS_DESC_COUNTER(VCPU, ecall_exit_stat),
STATS_DESC_COUNTER(VCPU, wfi_exit_stat),
+ STATS_DESC_COUNTER(VCPU, wrs_exit_stat),
STATS_DESC_COUNTER(VCPU, mmio_exit_user),
STATS_DESC_COUNTER(VCPU, mmio_exit_kernel),
STATS_DESC_COUNTER(VCPU, csr_exit_user),
STATS_DESC_COUNTER(VCPU, csr_exit_kernel),
STATS_DESC_COUNTER(VCPU, signal_exits),
- STATS_DESC_COUNTER(VCPU, exits)
+ STATS_DESC_COUNTER(VCPU, exits),
+ STATS_DESC_COUNTER(VCPU, instr_illegal_exits),
+ STATS_DESC_COUNTER(VCPU, load_misaligned_exits),
+ STATS_DESC_COUNTER(VCPU, store_misaligned_exits),
+ STATS_DESC_COUNTER(VCPU, load_access_exits),
+ STATS_DESC_COUNTER(VCPU, store_access_exits),
};
const struct kvm_stats_header kvm_vcpu_stats_header = {
@@ -42,12 +51,33 @@ const struct kvm_stats_header kvm_vcpu_stats_header = {
sizeof(kvm_vcpu_stats_desc),
};
-static void kvm_riscv_reset_vcpu(struct kvm_vcpu *vcpu)
+static void kvm_riscv_vcpu_context_reset(struct kvm_vcpu *vcpu,
+ bool kvm_sbi_reset)
{
struct kvm_vcpu_csr *csr = &vcpu->arch.guest_csr;
- struct kvm_vcpu_csr *reset_csr = &vcpu->arch.guest_reset_csr;
struct kvm_cpu_context *cntx = &vcpu->arch.guest_context;
- struct kvm_cpu_context *reset_cntx = &vcpu->arch.guest_reset_context;
+ void *vector_datap = cntx->vector.datap;
+
+ memset(cntx, 0, sizeof(*cntx));
+ memset(csr, 0, sizeof(*csr));
+ memset(&vcpu->arch.smstateen_csr, 0, sizeof(vcpu->arch.smstateen_csr));
+
+ /* Restore datap as it's not a part of the guest context. */
+ cntx->vector.datap = vector_datap;
+
+ if (kvm_sbi_reset)
+ kvm_riscv_vcpu_sbi_load_reset_state(vcpu);
+
+ /* Setup reset state of shadow SSTATUS and HSTATUS CSRs */
+ cntx->sstatus = SR_SPP | SR_SPIE;
+
+ cntx->hstatus |= HSTATUS_VTW;
+ cntx->hstatus |= HSTATUS_SPVP;
+ cntx->hstatus |= HSTATUS_SPV;
+}
+
+static void kvm_riscv_reset_vcpu(struct kvm_vcpu *vcpu, bool kvm_sbi_reset)
+{
bool loaded;
/**
@@ -62,9 +92,7 @@ static void kvm_riscv_reset_vcpu(struct kvm_vcpu *vcpu)
vcpu->arch.last_exit_cpu = -1;
- memcpy(csr, reset_csr, sizeof(*csr));
-
- memcpy(cntx, reset_cntx, sizeof(*cntx));
+ kvm_riscv_vcpu_context_reset(vcpu, kvm_sbi_reset);
kvm_riscv_vcpu_fp_reset(vcpu);
@@ -99,8 +127,8 @@ int kvm_arch_vcpu_precreate(struct kvm *kvm, unsigned int id)
int kvm_arch_vcpu_create(struct kvm_vcpu *vcpu)
{
int rc;
- struct kvm_cpu_context *cntx;
- struct kvm_vcpu_csr *reset_csr = &vcpu->arch.guest_reset_csr;
+
+ spin_lock_init(&vcpu->arch.mp_state_lock);
/* Mark this VCPU never ran */
vcpu->arch.ran_atleast_once = false;
@@ -118,20 +146,11 @@ int kvm_arch_vcpu_create(struct kvm_vcpu *vcpu)
/* Setup VCPU hfence queue */
spin_lock_init(&vcpu->arch.hfence_lock);
- /* Setup reset state of shadow SSTATUS and HSTATUS CSRs */
- cntx = &vcpu->arch.guest_reset_context;
- cntx->sstatus = SR_SPP | SR_SPIE;
- cntx->hstatus = 0;
- cntx->hstatus |= HSTATUS_VTW;
- cntx->hstatus |= HSTATUS_SPVP;
- cntx->hstatus |= HSTATUS_SPV;
+ spin_lock_init(&vcpu->arch.reset_state.lock);
- if (kvm_riscv_vcpu_alloc_vector_context(vcpu, cntx))
+ if (kvm_riscv_vcpu_alloc_vector_context(vcpu))
return -ENOMEM;
- /* By default, make CY, TM, and IR counters accessible in VU mode */
- reset_csr->scounteren = 0x7;
-
/* Setup VCPU timer */
kvm_riscv_vcpu_timer_init(vcpu);
@@ -150,7 +169,7 @@ int kvm_arch_vcpu_create(struct kvm_vcpu *vcpu)
kvm_riscv_vcpu_sbi_init(vcpu);
/* Reset VCPU */
- kvm_riscv_reset_vcpu(vcpu);
+ kvm_riscv_reset_vcpu(vcpu, false);
return 0;
}
@@ -201,7 +220,7 @@ void kvm_arch_vcpu_unblocking(struct kvm_vcpu *vcpu)
int kvm_arch_vcpu_runnable(struct kvm_vcpu *vcpu)
{
return (kvm_riscv_vcpu_has_interrupts(vcpu, -1UL) &&
- !vcpu->arch.power_off && !vcpu->arch.pause);
+ !kvm_riscv_vcpu_stopped(vcpu) && !vcpu->arch.pause);
}
int kvm_arch_vcpu_should_kick(struct kvm_vcpu *vcpu)
@@ -214,6 +233,13 @@ bool kvm_arch_vcpu_in_kernel(struct kvm_vcpu *vcpu)
return (vcpu->arch.guest_context.sstatus & SR_SPP) ? true : false;
}
+#ifdef CONFIG_GUEST_PERF_EVENTS
+unsigned long kvm_arch_vcpu_get_ip(struct kvm_vcpu *vcpu)
+{
+ return vcpu->arch.guest_context.sepc;
+}
+#endif
+
vm_fault_t kvm_arch_vcpu_fault(struct kvm_vcpu *vcpu, struct vm_fault *vmf)
{
return VM_FAULT_SIGBUS;
@@ -349,10 +375,10 @@ void kvm_riscv_vcpu_sync_interrupts(struct kvm_vcpu *vcpu)
struct kvm_vcpu_csr *csr = &vcpu->arch.guest_csr;
/* Read current HVIP and VSIE CSRs */
- csr->vsie = csr_read(CSR_VSIE);
+ csr->vsie = ncsr_read(CSR_VSIE);
/* Sync-up HVIP.VSSIP bit changes does by Guest */
- hvip = csr_read(CSR_HVIP);
+ hvip = ncsr_read(CSR_HVIP);
if ((csr->hvip ^ hvip) & (1UL << IRQ_VS_SOFT)) {
if (hvip & (1UL << IRQ_VS_SOFT)) {
if (!test_and_set_bit(IRQ_VS_SOFT,
@@ -365,6 +391,13 @@ void kvm_riscv_vcpu_sync_interrupts(struct kvm_vcpu *vcpu)
}
}
+ /* Sync up the HVIP.LCOFIP bit changes (only clear) by the guest */
+ if ((csr->hvip ^ hvip) & (1UL << IRQ_PMU_OVF)) {
+ if (!(hvip & (1UL << IRQ_PMU_OVF)) &&
+ !test_and_set_bit(IRQ_PMU_OVF, v->irqs_pending_mask))
+ clear_bit(IRQ_PMU_OVF, v->irqs_pending);
+ }
+
/* Sync-up AIA high interrupts */
kvm_riscv_vcpu_aia_sync_interrupts(vcpu);
@@ -382,7 +415,8 @@ int kvm_riscv_vcpu_set_interrupt(struct kvm_vcpu *vcpu, unsigned int irq)
if (irq < IRQ_LOCAL_MAX &&
irq != IRQ_VS_SOFT &&
irq != IRQ_VS_TIMER &&
- irq != IRQ_VS_EXT)
+ irq != IRQ_VS_EXT &&
+ irq != IRQ_PMU_OVF)
return -EINVAL;
set_bit(irq, vcpu->arch.irqs_pending);
@@ -397,14 +431,15 @@ int kvm_riscv_vcpu_set_interrupt(struct kvm_vcpu *vcpu, unsigned int irq)
int kvm_riscv_vcpu_unset_interrupt(struct kvm_vcpu *vcpu, unsigned int irq)
{
/*
- * We only allow VS-mode software, timer, and external
+ * We only allow VS-mode software, timer, counter overflow and external
* interrupts when irq is one of the local interrupts
* defined by RISC-V privilege specification.
*/
if (irq < IRQ_LOCAL_MAX &&
irq != IRQ_VS_SOFT &&
irq != IRQ_VS_TIMER &&
- irq != IRQ_VS_EXT)
+ irq != IRQ_VS_EXT &&
+ irq != IRQ_PMU_OVF)
return -EINVAL;
clear_bit(irq, vcpu->arch.irqs_pending);
@@ -429,26 +464,42 @@ bool kvm_riscv_vcpu_has_interrupts(struct kvm_vcpu *vcpu, u64 mask)
return kvm_riscv_vcpu_aia_has_interrupts(vcpu, mask);
}
-void kvm_riscv_vcpu_power_off(struct kvm_vcpu *vcpu)
+void __kvm_riscv_vcpu_power_off(struct kvm_vcpu *vcpu)
{
- vcpu->arch.power_off = true;
+ WRITE_ONCE(vcpu->arch.mp_state.mp_state, KVM_MP_STATE_STOPPED);
kvm_make_request(KVM_REQ_SLEEP, vcpu);
kvm_vcpu_kick(vcpu);
}
-void kvm_riscv_vcpu_power_on(struct kvm_vcpu *vcpu)
+void kvm_riscv_vcpu_power_off(struct kvm_vcpu *vcpu)
{
- vcpu->arch.power_off = false;
+ spin_lock(&vcpu->arch.mp_state_lock);
+ __kvm_riscv_vcpu_power_off(vcpu);
+ spin_unlock(&vcpu->arch.mp_state_lock);
+}
+
+void __kvm_riscv_vcpu_power_on(struct kvm_vcpu *vcpu)
+{
+ WRITE_ONCE(vcpu->arch.mp_state.mp_state, KVM_MP_STATE_RUNNABLE);
kvm_vcpu_wake_up(vcpu);
}
+void kvm_riscv_vcpu_power_on(struct kvm_vcpu *vcpu)
+{
+ spin_lock(&vcpu->arch.mp_state_lock);
+ __kvm_riscv_vcpu_power_on(vcpu);
+ spin_unlock(&vcpu->arch.mp_state_lock);
+}
+
+bool kvm_riscv_vcpu_stopped(struct kvm_vcpu *vcpu)
+{
+ return READ_ONCE(vcpu->arch.mp_state.mp_state) == KVM_MP_STATE_STOPPED;
+}
+
int kvm_arch_vcpu_ioctl_get_mpstate(struct kvm_vcpu *vcpu,
struct kvm_mp_state *mp_state)
{
- if (vcpu->arch.power_off)
- mp_state->mp_state = KVM_MP_STATE_STOPPED;
- else
- mp_state->mp_state = KVM_MP_STATE_RUNNABLE;
+ *mp_state = READ_ONCE(vcpu->arch.mp_state);
return 0;
}
@@ -458,25 +509,42 @@ int kvm_arch_vcpu_ioctl_set_mpstate(struct kvm_vcpu *vcpu,
{
int ret = 0;
+ spin_lock(&vcpu->arch.mp_state_lock);
+
switch (mp_state->mp_state) {
case KVM_MP_STATE_RUNNABLE:
- vcpu->arch.power_off = false;
+ WRITE_ONCE(vcpu->arch.mp_state, *mp_state);
break;
case KVM_MP_STATE_STOPPED:
- kvm_riscv_vcpu_power_off(vcpu);
+ __kvm_riscv_vcpu_power_off(vcpu);
+ break;
+ case KVM_MP_STATE_INIT_RECEIVED:
+ if (vcpu->kvm->arch.mp_state_reset)
+ kvm_riscv_reset_vcpu(vcpu, false);
+ else
+ ret = -EINVAL;
break;
default:
ret = -EINVAL;
}
+ spin_unlock(&vcpu->arch.mp_state_lock);
+
return ret;
}
int kvm_arch_vcpu_ioctl_set_guest_debug(struct kvm_vcpu *vcpu,
struct kvm_guest_debug *dbg)
{
- /* TODO; To be implemented later. */
- return -EINVAL;
+ if (dbg->control & KVM_GUESTDBG_ENABLE) {
+ vcpu->guest_debug = dbg->control;
+ vcpu->arch.cfg.hedeleg &= ~BIT(EXC_BREAKPOINT);
+ } else {
+ vcpu->guest_debug = 0;
+ vcpu->arch.cfg.hedeleg |= BIT(EXC_BREAKPOINT);
+ }
+
+ return 0;
}
static void kvm_riscv_vcpu_setup_config(struct kvm_vcpu *vcpu)
@@ -496,6 +564,10 @@ static void kvm_riscv_vcpu_setup_config(struct kvm_vcpu *vcpu)
if (riscv_isa_extension_available(isa, ZICBOZ))
cfg->henvcfg |= ENVCFG_CBZE;
+ if (riscv_isa_extension_available(isa, SVADU) &&
+ !riscv_isa_extension_available(isa, SVADE))
+ cfg->henvcfg |= ENVCFG_ADUE;
+
if (riscv_has_extension_unlikely(RISCV_ISA_EXT_SMSTATEEN)) {
cfg->hstateen0 |= SMSTATEEN0_HSENVCFG;
if (riscv_isa_extension_available(isa, SSAIA))
@@ -505,29 +577,57 @@ static void kvm_riscv_vcpu_setup_config(struct kvm_vcpu *vcpu)
if (riscv_isa_extension_available(isa, SMSTATEEN))
cfg->hstateen0 |= SMSTATEEN0_SSTATEEN0;
}
+
+ cfg->hedeleg = KVM_HEDELEG_DEFAULT;
+ if (vcpu->guest_debug)
+ cfg->hedeleg &= ~BIT(EXC_BREAKPOINT);
}
void kvm_arch_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
{
+ void *nsh;
struct kvm_vcpu_csr *csr = &vcpu->arch.guest_csr;
struct kvm_vcpu_config *cfg = &vcpu->arch.cfg;
- csr_write(CSR_VSSTATUS, csr->vsstatus);
- csr_write(CSR_VSIE, csr->vsie);
- csr_write(CSR_VSTVEC, csr->vstvec);
- csr_write(CSR_VSSCRATCH, csr->vsscratch);
- csr_write(CSR_VSEPC, csr->vsepc);
- csr_write(CSR_VSCAUSE, csr->vscause);
- csr_write(CSR_VSTVAL, csr->vstval);
- csr_write(CSR_HVIP, csr->hvip);
- csr_write(CSR_VSATP, csr->vsatp);
- csr_write(CSR_HENVCFG, cfg->henvcfg);
- if (IS_ENABLED(CONFIG_32BIT))
- csr_write(CSR_HENVCFGH, cfg->henvcfg >> 32);
- if (riscv_has_extension_unlikely(RISCV_ISA_EXT_SMSTATEEN)) {
- csr_write(CSR_HSTATEEN0, cfg->hstateen0);
+ if (kvm_riscv_nacl_sync_csr_available()) {
+ nsh = nacl_shmem();
+ nacl_csr_write(nsh, CSR_VSSTATUS, csr->vsstatus);
+ nacl_csr_write(nsh, CSR_VSIE, csr->vsie);
+ nacl_csr_write(nsh, CSR_VSTVEC, csr->vstvec);
+ nacl_csr_write(nsh, CSR_VSSCRATCH, csr->vsscratch);
+ nacl_csr_write(nsh, CSR_VSEPC, csr->vsepc);
+ nacl_csr_write(nsh, CSR_VSCAUSE, csr->vscause);
+ nacl_csr_write(nsh, CSR_VSTVAL, csr->vstval);
+ nacl_csr_write(nsh, CSR_HEDELEG, cfg->hedeleg);
+ nacl_csr_write(nsh, CSR_HVIP, csr->hvip);
+ nacl_csr_write(nsh, CSR_VSATP, csr->vsatp);
+ nacl_csr_write(nsh, CSR_HENVCFG, cfg->henvcfg);
+ if (IS_ENABLED(CONFIG_32BIT))
+ nacl_csr_write(nsh, CSR_HENVCFGH, cfg->henvcfg >> 32);
+ if (riscv_has_extension_unlikely(RISCV_ISA_EXT_SMSTATEEN)) {
+ nacl_csr_write(nsh, CSR_HSTATEEN0, cfg->hstateen0);
+ if (IS_ENABLED(CONFIG_32BIT))
+ nacl_csr_write(nsh, CSR_HSTATEEN0H, cfg->hstateen0 >> 32);
+ }
+ } else {
+ csr_write(CSR_VSSTATUS, csr->vsstatus);
+ csr_write(CSR_VSIE, csr->vsie);
+ csr_write(CSR_VSTVEC, csr->vstvec);
+ csr_write(CSR_VSSCRATCH, csr->vsscratch);
+ csr_write(CSR_VSEPC, csr->vsepc);
+ csr_write(CSR_VSCAUSE, csr->vscause);
+ csr_write(CSR_VSTVAL, csr->vstval);
+ csr_write(CSR_HEDELEG, cfg->hedeleg);
+ csr_write(CSR_HVIP, csr->hvip);
+ csr_write(CSR_VSATP, csr->vsatp);
+ csr_write(CSR_HENVCFG, cfg->henvcfg);
if (IS_ENABLED(CONFIG_32BIT))
- csr_write(CSR_HSTATEEN0H, cfg->hstateen0 >> 32);
+ csr_write(CSR_HENVCFGH, cfg->henvcfg >> 32);
+ if (riscv_has_extension_unlikely(RISCV_ISA_EXT_SMSTATEEN)) {
+ csr_write(CSR_HSTATEEN0, cfg->hstateen0);
+ if (IS_ENABLED(CONFIG_32BIT))
+ csr_write(CSR_HSTATEEN0H, cfg->hstateen0 >> 32);
+ }
}
kvm_riscv_gstage_update_hgatp(vcpu);
@@ -550,6 +650,7 @@ void kvm_arch_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
void kvm_arch_vcpu_put(struct kvm_vcpu *vcpu)
{
+ void *nsh;
struct kvm_vcpu_csr *csr = &vcpu->arch.guest_csr;
vcpu->cpu = -1;
@@ -565,15 +666,28 @@ void kvm_arch_vcpu_put(struct kvm_vcpu *vcpu)
vcpu->arch.isa);
kvm_riscv_vcpu_host_vector_restore(&vcpu->arch.host_context);
- csr->vsstatus = csr_read(CSR_VSSTATUS);
- csr->vsie = csr_read(CSR_VSIE);
- csr->vstvec = csr_read(CSR_VSTVEC);
- csr->vsscratch = csr_read(CSR_VSSCRATCH);
- csr->vsepc = csr_read(CSR_VSEPC);
- csr->vscause = csr_read(CSR_VSCAUSE);
- csr->vstval = csr_read(CSR_VSTVAL);
- csr->hvip = csr_read(CSR_HVIP);
- csr->vsatp = csr_read(CSR_VSATP);
+ if (kvm_riscv_nacl_available()) {
+ nsh = nacl_shmem();
+ csr->vsstatus = nacl_csr_read(nsh, CSR_VSSTATUS);
+ csr->vsie = nacl_csr_read(nsh, CSR_VSIE);
+ csr->vstvec = nacl_csr_read(nsh, CSR_VSTVEC);
+ csr->vsscratch = nacl_csr_read(nsh, CSR_VSSCRATCH);
+ csr->vsepc = nacl_csr_read(nsh, CSR_VSEPC);
+ csr->vscause = nacl_csr_read(nsh, CSR_VSCAUSE);
+ csr->vstval = nacl_csr_read(nsh, CSR_VSTVAL);
+ csr->hvip = nacl_csr_read(nsh, CSR_HVIP);
+ csr->vsatp = nacl_csr_read(nsh, CSR_VSATP);
+ } else {
+ csr->vsstatus = csr_read(CSR_VSSTATUS);
+ csr->vsie = csr_read(CSR_VSIE);
+ csr->vstvec = csr_read(CSR_VSTVEC);
+ csr->vsscratch = csr_read(CSR_VSSCRATCH);
+ csr->vsepc = csr_read(CSR_VSEPC);
+ csr->vscause = csr_read(CSR_VSCAUSE);
+ csr->vstval = csr_read(CSR_VSTVAL);
+ csr->hvip = csr_read(CSR_HVIP);
+ csr->vsatp = csr_read(CSR_VSATP);
+ }
}
static void kvm_riscv_check_vcpu_requests(struct kvm_vcpu *vcpu)
@@ -584,11 +698,11 @@ static void kvm_riscv_check_vcpu_requests(struct kvm_vcpu *vcpu)
if (kvm_check_request(KVM_REQ_SLEEP, vcpu)) {
kvm_vcpu_srcu_read_unlock(vcpu);
rcuwait_wait_event(wait,
- (!vcpu->arch.power_off) && (!vcpu->arch.pause),
+ (!kvm_riscv_vcpu_stopped(vcpu)) && (!vcpu->arch.pause),
TASK_INTERRUPTIBLE);
kvm_vcpu_srcu_read_lock(vcpu);
- if (vcpu->arch.power_off || vcpu->arch.pause) {
+ if (kvm_riscv_vcpu_stopped(vcpu) || vcpu->arch.pause) {
/*
* Awaken to handle a signal, request to
* sleep again later.
@@ -598,7 +712,7 @@ static void kvm_riscv_check_vcpu_requests(struct kvm_vcpu *vcpu)
}
if (kvm_check_request(KVM_REQ_VCPU_RESET, vcpu))
- kvm_riscv_reset_vcpu(vcpu);
+ kvm_riscv_reset_vcpu(vcpu, true);
if (kvm_check_request(KVM_REQ_UPDATE_HGATP, vcpu))
kvm_riscv_gstage_update_hgatp(vcpu);
@@ -628,7 +742,7 @@ static void kvm_riscv_update_hvip(struct kvm_vcpu *vcpu)
{
struct kvm_vcpu_csr *csr = &vcpu->arch.guest_csr;
- csr_write(CSR_HVIP, csr->hvip);
+ ncsr_write(CSR_HVIP, csr->hvip);
kvm_riscv_vcpu_aia_update_hvip(vcpu);
}
@@ -638,6 +752,7 @@ static __always_inline void kvm_riscv_vcpu_swap_in_guest_state(struct kvm_vcpu *
struct kvm_vcpu_csr *csr = &vcpu->arch.guest_csr;
struct kvm_vcpu_config *cfg = &vcpu->arch.cfg;
+ vcpu->arch.host_scounteren = csr_swap(CSR_SCOUNTEREN, csr->scounteren);
vcpu->arch.host_senvcfg = csr_swap(CSR_SENVCFG, csr->senvcfg);
if (riscv_has_extension_unlikely(RISCV_ISA_EXT_SMSTATEEN) &&
(cfg->hstateen0 & SMSTATEEN0_SSTATEEN0))
@@ -651,6 +766,7 @@ static __always_inline void kvm_riscv_vcpu_swap_in_host_state(struct kvm_vcpu *v
struct kvm_vcpu_csr *csr = &vcpu->arch.guest_csr;
struct kvm_vcpu_config *cfg = &vcpu->arch.cfg;
+ csr->scounteren = csr_swap(CSR_SCOUNTEREN, vcpu->arch.host_scounteren);
csr->senvcfg = csr_swap(CSR_SENVCFG, vcpu->arch.host_senvcfg);
if (riscv_has_extension_unlikely(RISCV_ISA_EXT_SMSTATEEN) &&
(cfg->hstateen0 & SMSTATEEN0_SSTATEEN0))
@@ -665,11 +781,81 @@ static __always_inline void kvm_riscv_vcpu_swap_in_host_state(struct kvm_vcpu *v
* This must be noinstr as instrumentation may make use of RCU, and this is not
* safe during the EQS.
*/
-static void noinstr kvm_riscv_vcpu_enter_exit(struct kvm_vcpu *vcpu)
+static void noinstr kvm_riscv_vcpu_enter_exit(struct kvm_vcpu *vcpu,
+ struct kvm_cpu_trap *trap)
{
+ void *nsh;
+ struct kvm_cpu_context *gcntx = &vcpu->arch.guest_context;
+ struct kvm_cpu_context *hcntx = &vcpu->arch.host_context;
+
+ /*
+ * We save trap CSRs (such as SEPC, SCAUSE, STVAL, HTVAL, and
+ * HTINST) here because we do local_irq_enable() after this
+ * function in kvm_arch_vcpu_ioctl_run() which can result in
+ * an interrupt immediately after local_irq_enable() and can
+ * potentially change trap CSRs.
+ */
+
kvm_riscv_vcpu_swap_in_guest_state(vcpu);
guest_state_enter_irqoff();
- __kvm_riscv_switch_to(&vcpu->arch);
+
+ if (kvm_riscv_nacl_sync_sret_available()) {
+ nsh = nacl_shmem();
+
+ if (kvm_riscv_nacl_autoswap_csr_available()) {
+ hcntx->hstatus =
+ nacl_csr_read(nsh, CSR_HSTATUS);
+ nacl_scratch_write_long(nsh,
+ SBI_NACL_SHMEM_AUTOSWAP_OFFSET +
+ SBI_NACL_SHMEM_AUTOSWAP_HSTATUS,
+ gcntx->hstatus);
+ nacl_scratch_write_long(nsh,
+ SBI_NACL_SHMEM_AUTOSWAP_OFFSET,
+ SBI_NACL_SHMEM_AUTOSWAP_FLAG_HSTATUS);
+ } else if (kvm_riscv_nacl_sync_csr_available()) {
+ hcntx->hstatus = nacl_csr_swap(nsh,
+ CSR_HSTATUS, gcntx->hstatus);
+ } else {
+ hcntx->hstatus = csr_swap(CSR_HSTATUS, gcntx->hstatus);
+ }
+
+ nacl_scratch_write_longs(nsh,
+ SBI_NACL_SHMEM_SRET_OFFSET +
+ SBI_NACL_SHMEM_SRET_X(1),
+ &gcntx->ra,
+ SBI_NACL_SHMEM_SRET_X_LAST);
+
+ __kvm_riscv_nacl_switch_to(&vcpu->arch, SBI_EXT_NACL,
+ SBI_EXT_NACL_SYNC_SRET);
+
+ if (kvm_riscv_nacl_autoswap_csr_available()) {
+ nacl_scratch_write_long(nsh,
+ SBI_NACL_SHMEM_AUTOSWAP_OFFSET,
+ 0);
+ gcntx->hstatus = nacl_scratch_read_long(nsh,
+ SBI_NACL_SHMEM_AUTOSWAP_OFFSET +
+ SBI_NACL_SHMEM_AUTOSWAP_HSTATUS);
+ } else {
+ gcntx->hstatus = csr_swap(CSR_HSTATUS, hcntx->hstatus);
+ }
+
+ trap->htval = nacl_csr_read(nsh, CSR_HTVAL);
+ trap->htinst = nacl_csr_read(nsh, CSR_HTINST);
+ } else {
+ hcntx->hstatus = csr_swap(CSR_HSTATUS, gcntx->hstatus);
+
+ __kvm_riscv_switch_to(&vcpu->arch);
+
+ gcntx->hstatus = csr_swap(CSR_HSTATUS, hcntx->hstatus);
+
+ trap->htval = csr_read(CSR_HTVAL);
+ trap->htinst = csr_read(CSR_HTINST);
+ }
+
+ trap->sepc = gcntx->sepc;
+ trap->scause = csr_read(CSR_SCAUSE);
+ trap->stval = csr_read(CSR_STVAL);
+
vcpu->arch.last_exit_cpu = vcpu->cpu;
guest_state_exit_irqoff();
kvm_riscv_vcpu_swap_in_host_state(vcpu);
@@ -711,7 +897,7 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu)
return ret;
}
- if (run->immediate_exit) {
+ if (!vcpu->wants_to_run) {
kvm_vcpu_srcu_read_unlock(vcpu);
return -EINTR;
}
@@ -782,24 +968,15 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu)
*/
kvm_riscv_local_tlb_sanitize(vcpu);
+ trace_kvm_entry(vcpu);
+
guest_timing_enter_irqoff();
- kvm_riscv_vcpu_enter_exit(vcpu);
+ kvm_riscv_vcpu_enter_exit(vcpu, &trap);
vcpu->mode = OUTSIDE_GUEST_MODE;
vcpu->stat.exits++;
- /*
- * Save SCAUSE, STVAL, HTVAL, and HTINST because we might
- * get an interrupt between __kvm_riscv_switch_to() and
- * local_irq_enable() which can potentially change CSRs.
- */
- trap.sepc = vcpu->arch.guest_context.sepc;
- trap.scause = csr_read(CSR_SCAUSE);
- trap.stval = csr_read(CSR_STVAL);
- trap.htval = csr_read(CSR_HTVAL);
- trap.htinst = csr_read(CSR_HTINST);
-
/* Syncup interrupts state with HW */
kvm_riscv_vcpu_sync_interrupts(vcpu);
@@ -820,6 +997,8 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu)
local_irq_enable();
+ trace_kvm_exit(&trap);
+
preempt_enable();
kvm_vcpu_srcu_read_lock(vcpu);
diff --git a/arch/riscv/kvm/vcpu_exit.c b/arch/riscv/kvm/vcpu_exit.c
index 2415722c01b8..6e0c18412795 100644
--- a/arch/riscv/kvm/vcpu_exit.c
+++ b/arch/riscv/kvm/vcpu_exit.c
@@ -165,6 +165,17 @@ void kvm_riscv_vcpu_trap_redirect(struct kvm_vcpu *vcpu,
vcpu->arch.guest_context.sstatus |= SR_SPP;
}
+static inline int vcpu_redirect(struct kvm_vcpu *vcpu, struct kvm_cpu_trap *trap)
+{
+ int ret = -EFAULT;
+
+ if (vcpu->arch.guest_context.hstatus & HSTATUS_SPV) {
+ kvm_riscv_vcpu_trap_redirect(vcpu, trap);
+ ret = 1;
+ }
+ return ret;
+}
+
/*
* Return > 0 to return to guest, < 0 on error, 0 (and set exit_reason) on
* proper exit to userspace.
@@ -183,12 +194,32 @@ int kvm_riscv_vcpu_exit(struct kvm_vcpu *vcpu, struct kvm_run *run,
run->exit_reason = KVM_EXIT_UNKNOWN;
switch (trap->scause) {
case EXC_INST_ILLEGAL:
+ kvm_riscv_vcpu_pmu_incr_fw(vcpu, SBI_PMU_FW_ILLEGAL_INSN);
+ vcpu->stat.instr_illegal_exits++;
+ ret = vcpu_redirect(vcpu, trap);
+ break;
case EXC_LOAD_MISALIGNED:
+ kvm_riscv_vcpu_pmu_incr_fw(vcpu, SBI_PMU_FW_MISALIGNED_LOAD);
+ vcpu->stat.load_misaligned_exits++;
+ ret = vcpu_redirect(vcpu, trap);
+ break;
case EXC_STORE_MISALIGNED:
- if (vcpu->arch.guest_context.hstatus & HSTATUS_SPV) {
- kvm_riscv_vcpu_trap_redirect(vcpu, trap);
- ret = 1;
- }
+ kvm_riscv_vcpu_pmu_incr_fw(vcpu, SBI_PMU_FW_MISALIGNED_STORE);
+ vcpu->stat.store_misaligned_exits++;
+ ret = vcpu_redirect(vcpu, trap);
+ break;
+ case EXC_LOAD_ACCESS:
+ kvm_riscv_vcpu_pmu_incr_fw(vcpu, SBI_PMU_FW_ACCESS_LOAD);
+ vcpu->stat.load_access_exits++;
+ ret = vcpu_redirect(vcpu, trap);
+ break;
+ case EXC_STORE_ACCESS:
+ kvm_riscv_vcpu_pmu_incr_fw(vcpu, SBI_PMU_FW_ACCESS_STORE);
+ vcpu->stat.store_access_exits++;
+ ret = vcpu_redirect(vcpu, trap);
+ break;
+ case EXC_INST_ACCESS:
+ ret = vcpu_redirect(vcpu, trap);
break;
case EXC_VIRTUAL_INST_FAULT:
if (vcpu->arch.guest_context.hstatus & HSTATUS_SPV)
@@ -204,6 +235,10 @@ int kvm_riscv_vcpu_exit(struct kvm_vcpu *vcpu, struct kvm_run *run,
if (vcpu->arch.guest_context.hstatus & HSTATUS_SPV)
ret = kvm_riscv_vcpu_sbi_ecall(vcpu, run);
break;
+ case EXC_BREAKPOINT:
+ run->exit_reason = KVM_EXIT_DEBUG;
+ ret = 0;
+ break;
default:
break;
}
diff --git a/arch/riscv/kvm/vcpu_insn.c b/arch/riscv/kvm/vcpu_insn.c
index ee7215f4071f..97dec18e6989 100644
--- a/arch/riscv/kvm/vcpu_insn.c
+++ b/arch/riscv/kvm/vcpu_insn.c
@@ -16,6 +16,9 @@
#define INSN_MASK_WFI 0xffffffff
#define INSN_MATCH_WFI 0x10500073
+#define INSN_MASK_WRS 0xffffffff
+#define INSN_MATCH_WRS 0x00d00073
+
#define INSN_MATCH_CSRRW 0x1073
#define INSN_MASK_CSRRW 0x707f
#define INSN_MATCH_CSRRS 0x2073
@@ -203,6 +206,13 @@ static int wfi_insn(struct kvm_vcpu *vcpu, struct kvm_run *run, ulong insn)
return KVM_INSN_CONTINUE_NEXT_SEPC;
}
+static int wrs_insn(struct kvm_vcpu *vcpu, struct kvm_run *run, ulong insn)
+{
+ vcpu->stat.wrs_exit_stat++;
+ kvm_vcpu_on_spin(vcpu, vcpu->arch.guest_context.sstatus & SR_SPP);
+ return KVM_INSN_CONTINUE_NEXT_SEPC;
+}
+
struct csr_func {
unsigned int base;
unsigned int count;
@@ -378,6 +388,11 @@ static const struct insn_func system_opcode_funcs[] = {
.match = INSN_MATCH_WFI,
.func = wfi_insn,
},
+ {
+ .mask = INSN_MASK_WRS,
+ .match = INSN_MATCH_WRS,
+ .func = wrs_insn,
+ },
};
static int system_opcode_insn(struct kvm_vcpu *vcpu, struct kvm_run *run,
diff --git a/arch/riscv/kvm/vcpu_onereg.c b/arch/riscv/kvm/vcpu_onereg.c
index f4a6124d25c9..2e1b646f0d61 100644
--- a/arch/riscv/kvm/vcpu_onereg.c
+++ b/arch/riscv/kvm/vcpu_onereg.c
@@ -15,6 +15,7 @@
#include <asm/cacheflush.h>
#include <asm/cpufeature.h>
#include <asm/kvm_vcpu_vector.h>
+#include <asm/pgtable.h>
#include <asm/vector.h>
#define KVM_RISCV_BASE_ISA_MASK GENMASK(25, 0)
@@ -34,13 +35,23 @@ static const unsigned long kvm_isa_ext_arr[] = {
[KVM_RISCV_ISA_EXT_M] = RISCV_ISA_EXT_m,
[KVM_RISCV_ISA_EXT_V] = RISCV_ISA_EXT_v,
/* Multi letter extensions (alphabetically sorted) */
+ [KVM_RISCV_ISA_EXT_SMNPM] = RISCV_ISA_EXT_SSNPM,
KVM_ISA_EXT_ARR(SMSTATEEN),
KVM_ISA_EXT_ARR(SSAIA),
+ KVM_ISA_EXT_ARR(SSCOFPMF),
+ KVM_ISA_EXT_ARR(SSNPM),
KVM_ISA_EXT_ARR(SSTC),
+ KVM_ISA_EXT_ARR(SVADE),
+ KVM_ISA_EXT_ARR(SVADU),
KVM_ISA_EXT_ARR(SVINVAL),
KVM_ISA_EXT_ARR(SVNAPOT),
KVM_ISA_EXT_ARR(SVPBMT),
+ KVM_ISA_EXT_ARR(SVVPTC),
+ KVM_ISA_EXT_ARR(ZAAMO),
+ KVM_ISA_EXT_ARR(ZABHA),
KVM_ISA_EXT_ARR(ZACAS),
+ KVM_ISA_EXT_ARR(ZALRSC),
+ KVM_ISA_EXT_ARR(ZAWRS),
KVM_ISA_EXT_ARR(ZBA),
KVM_ISA_EXT_ARR(ZBB),
KVM_ISA_EXT_ARR(ZBC),
@@ -48,11 +59,17 @@ static const unsigned long kvm_isa_ext_arr[] = {
KVM_ISA_EXT_ARR(ZBKC),
KVM_ISA_EXT_ARR(ZBKX),
KVM_ISA_EXT_ARR(ZBS),
+ KVM_ISA_EXT_ARR(ZCA),
+ KVM_ISA_EXT_ARR(ZCB),
+ KVM_ISA_EXT_ARR(ZCD),
+ KVM_ISA_EXT_ARR(ZCF),
+ KVM_ISA_EXT_ARR(ZCMOP),
KVM_ISA_EXT_ARR(ZFA),
KVM_ISA_EXT_ARR(ZFH),
KVM_ISA_EXT_ARR(ZFHMIN),
KVM_ISA_EXT_ARR(ZICBOM),
KVM_ISA_EXT_ARR(ZICBOZ),
+ KVM_ISA_EXT_ARR(ZICCRSE),
KVM_ISA_EXT_ARR(ZICNTR),
KVM_ISA_EXT_ARR(ZICOND),
KVM_ISA_EXT_ARR(ZICSR),
@@ -60,6 +77,7 @@ static const unsigned long kvm_isa_ext_arr[] = {
KVM_ISA_EXT_ARR(ZIHINTNTL),
KVM_ISA_EXT_ARR(ZIHINTPAUSE),
KVM_ISA_EXT_ARR(ZIHPM),
+ KVM_ISA_EXT_ARR(ZIMOP),
KVM_ISA_EXT_ARR(ZKND),
KVM_ISA_EXT_ARR(ZKNE),
KVM_ISA_EXT_ARR(ZKNH),
@@ -99,6 +117,15 @@ static bool kvm_riscv_vcpu_isa_enable_allowed(unsigned long ext)
switch (ext) {
case KVM_RISCV_ISA_EXT_H:
return false;
+ case KVM_RISCV_ISA_EXT_SSCOFPMF:
+ /* Sscofpmf depends on interrupt filtering defined in ssaia */
+ return __riscv_isa_extension_available(NULL, RISCV_ISA_EXT_SSAIA);
+ case KVM_RISCV_ISA_EXT_SVADU:
+ /*
+ * The henvcfg.ADUE is read-only zero if menvcfg.ADUE is zero.
+ * Guest OS can use Svadu only when host OS enable Svadu.
+ */
+ return arch_has_hw_pte_young();
case KVM_RISCV_ISA_EXT_V:
return riscv_v_vstate_ctrl_user_allowed();
default:
@@ -116,10 +143,19 @@ static bool kvm_riscv_vcpu_isa_disable_allowed(unsigned long ext)
case KVM_RISCV_ISA_EXT_C:
case KVM_RISCV_ISA_EXT_I:
case KVM_RISCV_ISA_EXT_M:
+ case KVM_RISCV_ISA_EXT_SMNPM:
+ /* There is not architectural config bit to disable sscofpmf completely */
+ case KVM_RISCV_ISA_EXT_SSCOFPMF:
+ case KVM_RISCV_ISA_EXT_SSNPM:
case KVM_RISCV_ISA_EXT_SSTC:
case KVM_RISCV_ISA_EXT_SVINVAL:
case KVM_RISCV_ISA_EXT_SVNAPOT:
+ case KVM_RISCV_ISA_EXT_SVVPTC:
+ case KVM_RISCV_ISA_EXT_ZAAMO:
+ case KVM_RISCV_ISA_EXT_ZABHA:
case KVM_RISCV_ISA_EXT_ZACAS:
+ case KVM_RISCV_ISA_EXT_ZALRSC:
+ case KVM_RISCV_ISA_EXT_ZAWRS:
case KVM_RISCV_ISA_EXT_ZBA:
case KVM_RISCV_ISA_EXT_ZBB:
case KVM_RISCV_ISA_EXT_ZBC:
@@ -127,9 +163,15 @@ static bool kvm_riscv_vcpu_isa_disable_allowed(unsigned long ext)
case KVM_RISCV_ISA_EXT_ZBKC:
case KVM_RISCV_ISA_EXT_ZBKX:
case KVM_RISCV_ISA_EXT_ZBS:
+ case KVM_RISCV_ISA_EXT_ZCA:
+ case KVM_RISCV_ISA_EXT_ZCB:
+ case KVM_RISCV_ISA_EXT_ZCD:
+ case KVM_RISCV_ISA_EXT_ZCF:
+ case KVM_RISCV_ISA_EXT_ZCMOP:
case KVM_RISCV_ISA_EXT_ZFA:
case KVM_RISCV_ISA_EXT_ZFH:
case KVM_RISCV_ISA_EXT_ZFHMIN:
+ case KVM_RISCV_ISA_EXT_ZICCRSE:
case KVM_RISCV_ISA_EXT_ZICNTR:
case KVM_RISCV_ISA_EXT_ZICOND:
case KVM_RISCV_ISA_EXT_ZICSR:
@@ -137,6 +179,7 @@ static bool kvm_riscv_vcpu_isa_disable_allowed(unsigned long ext)
case KVM_RISCV_ISA_EXT_ZIHINTNTL:
case KVM_RISCV_ISA_EXT_ZIHINTPAUSE:
case KVM_RISCV_ISA_EXT_ZIHPM:
+ case KVM_RISCV_ISA_EXT_ZIMOP:
case KVM_RISCV_ISA_EXT_ZKND:
case KVM_RISCV_ISA_EXT_ZKNE:
case KVM_RISCV_ISA_EXT_ZKNH:
@@ -161,6 +204,12 @@ static bool kvm_riscv_vcpu_isa_disable_allowed(unsigned long ext)
/* Extensions which can be disabled using Smstateen */
case KVM_RISCV_ISA_EXT_SSAIA:
return riscv_has_extension_unlikely(RISCV_ISA_EXT_SMSTATEEN);
+ case KVM_RISCV_ISA_EXT_SVADE:
+ /*
+ * The henvcfg.ADUE is read-only zero if menvcfg.ADUE is zero.
+ * Svade can't be disabled unless we support Svadu.
+ */
+ return arch_has_hw_pte_young();
default:
break;
}
@@ -718,9 +767,9 @@ static int kvm_riscv_vcpu_set_reg_isa_ext(struct kvm_vcpu *vcpu,
switch (reg_subtype) {
case KVM_REG_RISCV_ISA_SINGLE:
return riscv_vcpu_set_isa_ext_single(vcpu, reg_num, reg_val);
- case KVM_REG_RISCV_SBI_MULTI_EN:
+ case KVM_REG_RISCV_ISA_MULTI_EN:
return riscv_vcpu_set_isa_ext_multi(vcpu, reg_num, reg_val, true);
- case KVM_REG_RISCV_SBI_MULTI_DIS:
+ case KVM_REG_RISCV_ISA_MULTI_DIS:
return riscv_vcpu_set_isa_ext_multi(vcpu, reg_num, reg_val, false);
default:
return -ENOENT;
@@ -986,7 +1035,7 @@ static int copy_isa_ext_reg_indices(const struct kvm_vcpu *vcpu,
static inline unsigned long num_isa_ext_regs(const struct kvm_vcpu *vcpu)
{
- return copy_isa_ext_reg_indices(vcpu, NULL);;
+ return copy_isa_ext_reg_indices(vcpu, NULL);
}
static int copy_sbi_ext_reg_indices(struct kvm_vcpu *vcpu, u64 __user *uindices)
diff --git a/arch/riscv/kvm/vcpu_pmu.c b/arch/riscv/kvm/vcpu_pmu.c
index 86391a5061dd..78ac3216a54d 100644
--- a/arch/riscv/kvm/vcpu_pmu.c
+++ b/arch/riscv/kvm/vcpu_pmu.c
@@ -14,6 +14,7 @@
#include <asm/csr.h>
#include <asm/kvm_vcpu_sbi.h>
#include <asm/kvm_vcpu_pmu.h>
+#include <asm/sbi.h>
#include <linux/bitops.h>
#define kvm_pmu_num_counters(pmu) ((pmu)->num_hw_ctrs + (pmu)->num_fw_ctrs)
@@ -39,7 +40,7 @@ static u64 kvm_pmu_get_sample_period(struct kvm_pmc *pmc)
u64 sample_period;
if (!pmc->counter_val)
- sample_period = counter_val_mask + 1;
+ sample_period = counter_val_mask;
else
sample_period = (-pmc->counter_val) & counter_val_mask;
@@ -196,6 +197,36 @@ static int pmu_get_pmc_index(struct kvm_pmu *pmu, unsigned long eidx,
return kvm_pmu_get_programmable_pmc_index(pmu, eidx, cbase, cmask);
}
+static int pmu_fw_ctr_read_hi(struct kvm_vcpu *vcpu, unsigned long cidx,
+ unsigned long *out_val)
+{
+ struct kvm_pmu *kvpmu = vcpu_to_pmu(vcpu);
+ struct kvm_pmc *pmc;
+ int fevent_code;
+
+ if (!IS_ENABLED(CONFIG_32BIT)) {
+ pr_warn("%s: should be invoked for only RV32\n", __func__);
+ return -EINVAL;
+ }
+
+ if (cidx >= kvm_pmu_num_counters(kvpmu) || cidx == 1) {
+ pr_warn("Invalid counter id [%ld]during read\n", cidx);
+ return -EINVAL;
+ }
+
+ pmc = &kvpmu->pmc[cidx];
+
+ if (pmc->cinfo.type != SBI_PMU_CTR_TYPE_FW)
+ return -EINVAL;
+
+ fevent_code = get_event_code(pmc->event_idx);
+ pmc->counter_val = kvpmu->fw_event[fevent_code].value;
+
+ *out_val = pmc->counter_val >> 32;
+
+ return 0;
+}
+
static int pmu_ctr_read(struct kvm_vcpu *vcpu, unsigned long cidx,
unsigned long *out_val)
{
@@ -204,6 +235,11 @@ static int pmu_ctr_read(struct kvm_vcpu *vcpu, unsigned long cidx,
u64 enabled, running;
int fevent_code;
+ if (cidx >= kvm_pmu_num_counters(kvpmu) || cidx == 1) {
+ pr_warn("Invalid counter id [%ld] during read\n", cidx);
+ return -EINVAL;
+ }
+
pmc = &kvpmu->pmc[cidx];
if (pmc->cinfo.type == SBI_PMU_CTR_TYPE_FW) {
@@ -229,8 +265,50 @@ static int kvm_pmu_validate_counter_mask(struct kvm_pmu *kvpmu, unsigned long ct
return 0;
}
-static int kvm_pmu_create_perf_event(struct kvm_pmc *pmc, struct perf_event_attr *attr,
- unsigned long flags, unsigned long eidx, unsigned long evtdata)
+static void kvm_riscv_pmu_overflow(struct perf_event *perf_event,
+ struct perf_sample_data *data,
+ struct pt_regs *regs)
+{
+ struct kvm_pmc *pmc = perf_event->overflow_handler_context;
+ struct kvm_vcpu *vcpu = pmc->vcpu;
+ struct kvm_pmu *kvpmu = vcpu_to_pmu(vcpu);
+ struct riscv_pmu *rpmu = to_riscv_pmu(perf_event->pmu);
+ u64 period;
+
+ /*
+ * Stop the event counting by directly accessing the perf_event.
+ * Otherwise, this needs to deferred via a workqueue.
+ * That will introduce skew in the counter value because the actual
+ * physical counter would start after returning from this function.
+ * It will be stopped again once the workqueue is scheduled
+ */
+ rpmu->pmu.stop(perf_event, PERF_EF_UPDATE);
+
+ /*
+ * The hw counter would start automatically when this function returns.
+ * Thus, the host may continue to interrupt and inject it to the guest
+ * even without the guest configuring the next event. Depending on the hardware
+ * the host may have some sluggishness only if privilege mode filtering is not
+ * available. In an ideal world, where qemu is not the only capable hardware,
+ * this can be removed.
+ * FYI: ARM64 does this way while x86 doesn't do anything as such.
+ * TODO: Should we keep it for RISC-V ?
+ */
+ period = -(local64_read(&perf_event->count));
+
+ local64_set(&perf_event->hw.period_left, 0);
+ perf_event->attr.sample_period = period;
+ perf_event->hw.sample_period = period;
+
+ set_bit(pmc->idx, kvpmu->pmc_overflown);
+ kvm_riscv_vcpu_set_interrupt(vcpu, IRQ_PMU_OVF);
+
+ rpmu->pmu.start(perf_event, PERF_EF_RELOAD);
+}
+
+static long kvm_pmu_create_perf_event(struct kvm_pmc *pmc, struct perf_event_attr *attr,
+ unsigned long flags, unsigned long eidx,
+ unsigned long evtdata)
{
struct perf_event *event;
@@ -247,9 +325,9 @@ static int kvm_pmu_create_perf_event(struct kvm_pmc *pmc, struct perf_event_attr
*/
attr->sample_period = kvm_pmu_get_sample_period(pmc);
- event = perf_event_create_kernel_counter(attr, -1, current, NULL, pmc);
+ event = perf_event_create_kernel_counter(attr, -1, current, kvm_riscv_pmu_overflow, pmc);
if (IS_ERR(event)) {
- pr_err("kvm pmu event creation failed for eidx %lx: %ld\n", eidx, PTR_ERR(event));
+ pr_debug("kvm pmu event creation failed for eidx %lx: %ld\n", eidx, PTR_ERR(event));
return PTR_ERR(event);
}
@@ -310,6 +388,70 @@ int kvm_riscv_vcpu_pmu_read_hpm(struct kvm_vcpu *vcpu, unsigned int csr_num,
return ret;
}
+static void kvm_pmu_clear_snapshot_area(struct kvm_vcpu *vcpu)
+{
+ struct kvm_pmu *kvpmu = vcpu_to_pmu(vcpu);
+
+ kfree(kvpmu->sdata);
+ kvpmu->sdata = NULL;
+ kvpmu->snapshot_addr = INVALID_GPA;
+}
+
+int kvm_riscv_vcpu_pmu_snapshot_set_shmem(struct kvm_vcpu *vcpu, unsigned long saddr_low,
+ unsigned long saddr_high, unsigned long flags,
+ struct kvm_vcpu_sbi_return *retdata)
+{
+ struct kvm_pmu *kvpmu = vcpu_to_pmu(vcpu);
+ int snapshot_area_size = sizeof(struct riscv_pmu_snapshot_data);
+ int sbiret = 0;
+ gpa_t saddr;
+ unsigned long hva;
+ bool writable;
+
+ if (!kvpmu || flags) {
+ sbiret = SBI_ERR_INVALID_PARAM;
+ goto out;
+ }
+
+ if (saddr_low == SBI_SHMEM_DISABLE && saddr_high == SBI_SHMEM_DISABLE) {
+ kvm_pmu_clear_snapshot_area(vcpu);
+ return 0;
+ }
+
+ saddr = saddr_low;
+
+ if (saddr_high != 0) {
+ if (IS_ENABLED(CONFIG_32BIT))
+ saddr |= ((gpa_t)saddr_high << 32);
+ else
+ sbiret = SBI_ERR_INVALID_ADDRESS;
+ goto out;
+ }
+
+ hva = kvm_vcpu_gfn_to_hva_prot(vcpu, saddr >> PAGE_SHIFT, &writable);
+ if (kvm_is_error_hva(hva) || !writable) {
+ sbiret = SBI_ERR_INVALID_ADDRESS;
+ goto out;
+ }
+
+ kvpmu->sdata = kzalloc(snapshot_area_size, GFP_ATOMIC);
+ if (!kvpmu->sdata)
+ return -ENOMEM;
+
+ if (kvm_vcpu_write_guest(vcpu, saddr, kvpmu->sdata, snapshot_area_size)) {
+ kfree(kvpmu->sdata);
+ sbiret = SBI_ERR_FAILURE;
+ goto out;
+ }
+
+ kvpmu->snapshot_addr = saddr;
+
+out:
+ retdata->err_val = sbiret;
+
+ return 0;
+}
+
int kvm_riscv_vcpu_pmu_num_ctrs(struct kvm_vcpu *vcpu,
struct kvm_vcpu_sbi_return *retdata)
{
@@ -343,20 +485,40 @@ int kvm_riscv_vcpu_pmu_ctr_start(struct kvm_vcpu *vcpu, unsigned long ctr_base,
int i, pmc_index, sbiret = 0;
struct kvm_pmc *pmc;
int fevent_code;
+ bool snap_flag_set = flags & SBI_PMU_START_FLAG_INIT_SNAPSHOT;
if (kvm_pmu_validate_counter_mask(kvpmu, ctr_base, ctr_mask) < 0) {
sbiret = SBI_ERR_INVALID_PARAM;
goto out;
}
+ if (snap_flag_set) {
+ if (kvpmu->snapshot_addr == INVALID_GPA) {
+ sbiret = SBI_ERR_NO_SHMEM;
+ goto out;
+ }
+ if (kvm_vcpu_read_guest(vcpu, kvpmu->snapshot_addr, kvpmu->sdata,
+ sizeof(struct riscv_pmu_snapshot_data))) {
+ pr_warn("Unable to read snapshot shared memory while starting counters\n");
+ sbiret = SBI_ERR_FAILURE;
+ goto out;
+ }
+ }
/* Start the counters that have been configured and requested by the guest */
for_each_set_bit(i, &ctr_mask, RISCV_MAX_COUNTERS) {
pmc_index = i + ctr_base;
if (!test_bit(pmc_index, kvpmu->pmc_in_use))
continue;
+ /* The guest started the counter again. Reset the overflow status */
+ clear_bit(pmc_index, kvpmu->pmc_overflown);
pmc = &kvpmu->pmc[pmc_index];
- if (flags & SBI_PMU_START_FLAG_SET_INIT_VALUE)
+ if (flags & SBI_PMU_START_FLAG_SET_INIT_VALUE) {
pmc->counter_val = ival;
+ } else if (snap_flag_set) {
+ /* The counter index in the snapshot are relative to the counter base */
+ pmc->counter_val = kvpmu->sdata->ctr_values[i];
+ }
+
if (pmc->cinfo.type == SBI_PMU_CTR_TYPE_FW) {
fevent_code = get_event_code(pmc->event_idx);
if (fevent_code >= SBI_PMU_FW_MAX) {
@@ -400,12 +562,19 @@ int kvm_riscv_vcpu_pmu_ctr_stop(struct kvm_vcpu *vcpu, unsigned long ctr_base,
u64 enabled, running;
struct kvm_pmc *pmc;
int fevent_code;
+ bool snap_flag_set = flags & SBI_PMU_STOP_FLAG_TAKE_SNAPSHOT;
+ bool shmem_needs_update = false;
if (kvm_pmu_validate_counter_mask(kvpmu, ctr_base, ctr_mask) < 0) {
sbiret = SBI_ERR_INVALID_PARAM;
goto out;
}
+ if (snap_flag_set && kvpmu->snapshot_addr == INVALID_GPA) {
+ sbiret = SBI_ERR_NO_SHMEM;
+ goto out;
+ }
+
/* Stop the counters that have been configured and requested by the guest */
for_each_set_bit(i, &ctr_mask, RISCV_MAX_COUNTERS) {
pmc_index = i + ctr_base;
@@ -432,21 +601,49 @@ int kvm_riscv_vcpu_pmu_ctr_stop(struct kvm_vcpu *vcpu, unsigned long ctr_base,
sbiret = SBI_ERR_ALREADY_STOPPED;
}
- if (flags & SBI_PMU_STOP_FLAG_RESET) {
- /* Relase the counter if this is a reset request */
- pmc->counter_val += perf_event_read_value(pmc->perf_event,
- &enabled, &running);
+ if (flags & SBI_PMU_STOP_FLAG_RESET)
+ /* Release the counter if this is a reset request */
kvm_pmu_release_perf_event(pmc);
- }
} else {
sbiret = SBI_ERR_INVALID_PARAM;
}
+
+ if (snap_flag_set && !sbiret) {
+ if (pmc->cinfo.type == SBI_PMU_CTR_TYPE_FW)
+ pmc->counter_val = kvpmu->fw_event[fevent_code].value;
+ else if (pmc->perf_event)
+ pmc->counter_val += perf_event_read_value(pmc->perf_event,
+ &enabled, &running);
+ /*
+ * The counter and overflow indicies in the snapshot region are w.r.to
+ * cbase. Modify the set bit in the counter mask instead of the pmc_index
+ * which indicates the absolute counter index.
+ */
+ if (test_bit(pmc_index, kvpmu->pmc_overflown))
+ kvpmu->sdata->ctr_overflow_mask |= BIT(i);
+ kvpmu->sdata->ctr_values[i] = pmc->counter_val;
+ shmem_needs_update = true;
+ }
+
if (flags & SBI_PMU_STOP_FLAG_RESET) {
pmc->event_idx = SBI_PMU_EVENT_IDX_INVALID;
clear_bit(pmc_index, kvpmu->pmc_in_use);
+ clear_bit(pmc_index, kvpmu->pmc_overflown);
+ if (snap_flag_set) {
+ /*
+ * Only clear the given counter as the caller is responsible to
+ * validate both the overflow mask and configured counters.
+ */
+ kvpmu->sdata->ctr_overflow_mask &= ~BIT(i);
+ shmem_needs_update = true;
+ }
}
}
+ if (shmem_needs_update)
+ kvm_vcpu_write_guest(vcpu, kvpmu->snapshot_addr, kvpmu->sdata,
+ sizeof(struct riscv_pmu_snapshot_data));
+
out:
retdata->err_val = sbiret;
@@ -458,7 +655,8 @@ int kvm_riscv_vcpu_pmu_ctr_cfg_match(struct kvm_vcpu *vcpu, unsigned long ctr_ba
unsigned long eidx, u64 evtdata,
struct kvm_vcpu_sbi_return *retdata)
{
- int ctr_idx, ret, sbiret = 0;
+ int ctr_idx, sbiret = 0;
+ long ret;
bool is_fevent;
unsigned long event_code;
u32 etype = kvm_pmu_get_perf_event_type(eidx);
@@ -468,6 +666,7 @@ int kvm_riscv_vcpu_pmu_ctr_cfg_match(struct kvm_vcpu *vcpu, unsigned long ctr_ba
.type = etype,
.size = sizeof(struct perf_event_attr),
.pinned = true,
+ .disabled = true,
/*
* It should never reach here if the platform doesn't support the sscofpmf
* extension as mode filtering won't work without it.
@@ -517,8 +716,10 @@ int kvm_riscv_vcpu_pmu_ctr_cfg_match(struct kvm_vcpu *vcpu, unsigned long ctr_ba
kvpmu->fw_event[event_code].started = true;
} else {
ret = kvm_pmu_create_perf_event(pmc, &attr, flags, eidx, evtdata);
- if (ret)
- return ret;
+ if (ret) {
+ sbiret = SBI_ERR_NOT_SUPPORTED;
+ goto out;
+ }
}
set_bit(ctr_idx, kvpmu->pmc_in_use);
@@ -530,7 +731,19 @@ out:
return 0;
}
-int kvm_riscv_vcpu_pmu_ctr_read(struct kvm_vcpu *vcpu, unsigned long cidx,
+int kvm_riscv_vcpu_pmu_fw_ctr_read_hi(struct kvm_vcpu *vcpu, unsigned long cidx,
+ struct kvm_vcpu_sbi_return *retdata)
+{
+ int ret;
+
+ ret = pmu_fw_ctr_read_hi(vcpu, cidx, &retdata->out_val);
+ if (ret == -EINVAL)
+ retdata->err_val = SBI_ERR_INVALID_PARAM;
+
+ return 0;
+}
+
+int kvm_riscv_vcpu_pmu_fw_ctr_read(struct kvm_vcpu *vcpu, unsigned long cidx,
struct kvm_vcpu_sbi_return *retdata)
{
int ret;
@@ -566,6 +779,7 @@ void kvm_riscv_vcpu_pmu_init(struct kvm_vcpu *vcpu)
kvpmu->num_hw_ctrs = num_hw_ctrs + 1;
kvpmu->num_fw_ctrs = SBI_PMU_FW_MAX;
memset(&kvpmu->fw_event, 0, SBI_PMU_FW_MAX * sizeof(struct kvm_fw_event));
+ kvpmu->snapshot_addr = INVALID_GPA;
if (kvpmu->num_hw_ctrs > RISCV_KVM_MAX_HW_CTRS) {
pr_warn_once("Limiting the hardware counters to 32 as specified by the ISA");
@@ -585,6 +799,7 @@ void kvm_riscv_vcpu_pmu_init(struct kvm_vcpu *vcpu)
pmc = &kvpmu->pmc[i];
pmc->idx = i;
pmc->event_idx = SBI_PMU_EVENT_IDX_INVALID;
+ pmc->vcpu = vcpu;
if (i < kvpmu->num_hw_ctrs) {
pmc->cinfo.type = SBI_PMU_CTR_TYPE_HW;
if (i < 3)
@@ -601,7 +816,7 @@ void kvm_riscv_vcpu_pmu_init(struct kvm_vcpu *vcpu)
pmc->cinfo.csr = CSR_CYCLE + i;
} else {
pmc->cinfo.type = SBI_PMU_CTR_TYPE_FW;
- pmc->cinfo.width = BITS_PER_LONG - 1;
+ pmc->cinfo.width = 63;
}
}
@@ -617,14 +832,16 @@ void kvm_riscv_vcpu_pmu_deinit(struct kvm_vcpu *vcpu)
if (!kvpmu)
return;
- for_each_set_bit(i, kvpmu->pmc_in_use, RISCV_MAX_COUNTERS) {
+ for_each_set_bit(i, kvpmu->pmc_in_use, RISCV_KVM_MAX_COUNTERS) {
pmc = &kvpmu->pmc[i];
pmc->counter_val = 0;
kvm_pmu_release_perf_event(pmc);
pmc->event_idx = SBI_PMU_EVENT_IDX_INVALID;
}
- bitmap_zero(kvpmu->pmc_in_use, RISCV_MAX_COUNTERS);
+ bitmap_zero(kvpmu->pmc_in_use, RISCV_KVM_MAX_COUNTERS);
+ bitmap_zero(kvpmu->pmc_overflown, RISCV_KVM_MAX_COUNTERS);
memset(&kvpmu->fw_event, 0, SBI_PMU_FW_MAX * sizeof(struct kvm_fw_event));
+ kvm_pmu_clear_snapshot_area(vcpu);
}
void kvm_riscv_vcpu_pmu_reset(struct kvm_vcpu *vcpu)
diff --git a/arch/riscv/kvm/vcpu_sbi.c b/arch/riscv/kvm/vcpu_sbi.c
index 72a2ffb8dcd1..6e09b518a5d1 100644
--- a/arch/riscv/kvm/vcpu_sbi.c
+++ b/arch/riscv/kvm/vcpu_sbi.c
@@ -71,6 +71,10 @@ static const struct kvm_riscv_sbi_extension_entry sbi_ext[] = {
.ext_ptr = &vcpu_sbi_ext_dbcn,
},
{
+ .ext_idx = KVM_RISCV_SBI_EXT_SUSP,
+ .ext_ptr = &vcpu_sbi_ext_susp,
+ },
+ {
.ext_idx = KVM_RISCV_SBI_EXT_STA,
.ext_ptr = &vcpu_sbi_ext_sta,
},
@@ -127,8 +131,8 @@ void kvm_riscv_vcpu_sbi_forward(struct kvm_vcpu *vcpu, struct kvm_run *run)
run->riscv_sbi.args[3] = cp->a3;
run->riscv_sbi.args[4] = cp->a4;
run->riscv_sbi.args[5] = cp->a5;
- run->riscv_sbi.ret[0] = cp->a0;
- run->riscv_sbi.ret[1] = cp->a1;
+ run->riscv_sbi.ret[0] = SBI_ERR_NOT_SUPPORTED;
+ run->riscv_sbi.ret[1] = 0;
}
void kvm_riscv_vcpu_sbi_system_reset(struct kvm_vcpu *vcpu,
@@ -138,8 +142,11 @@ void kvm_riscv_vcpu_sbi_system_reset(struct kvm_vcpu *vcpu,
unsigned long i;
struct kvm_vcpu *tmp;
- kvm_for_each_vcpu(i, tmp, vcpu->kvm)
- tmp->arch.power_off = true;
+ kvm_for_each_vcpu(i, tmp, vcpu->kvm) {
+ spin_lock(&tmp->arch.mp_state_lock);
+ WRITE_ONCE(tmp->arch.mp_state.mp_state, KVM_MP_STATE_STOPPED);
+ spin_unlock(&tmp->arch.mp_state_lock);
+ }
kvm_make_all_cpus_request(vcpu->kvm, KVM_REQ_SLEEP);
memset(&run->system_event, 0, sizeof(run->system_event));
@@ -149,6 +156,34 @@ void kvm_riscv_vcpu_sbi_system_reset(struct kvm_vcpu *vcpu,
run->exit_reason = KVM_EXIT_SYSTEM_EVENT;
}
+void kvm_riscv_vcpu_sbi_request_reset(struct kvm_vcpu *vcpu,
+ unsigned long pc, unsigned long a1)
+{
+ spin_lock(&vcpu->arch.reset_state.lock);
+ vcpu->arch.reset_state.pc = pc;
+ vcpu->arch.reset_state.a1 = a1;
+ spin_unlock(&vcpu->arch.reset_state.lock);
+
+ kvm_make_request(KVM_REQ_VCPU_RESET, vcpu);
+}
+
+void kvm_riscv_vcpu_sbi_load_reset_state(struct kvm_vcpu *vcpu)
+{
+ struct kvm_vcpu_csr *csr = &vcpu->arch.guest_csr;
+ struct kvm_cpu_context *cntx = &vcpu->arch.guest_context;
+ struct kvm_vcpu_reset_state *reset_state = &vcpu->arch.reset_state;
+
+ cntx->a0 = vcpu->vcpu_id;
+
+ spin_lock(&vcpu->arch.reset_state.lock);
+ cntx->sepc = reset_state->pc;
+ cntx->a1 = reset_state->a1;
+ spin_unlock(&vcpu->arch.reset_state.lock);
+
+ cntx->sstatus &= ~SR_SIE;
+ csr->vsatp = 0;
+}
+
int kvm_riscv_vcpu_sbi_return(struct kvm_vcpu *vcpu, struct kvm_run *run)
{
struct kvm_cpu_context *cp = &vcpu->arch.guest_context;
@@ -483,19 +518,22 @@ void kvm_riscv_vcpu_sbi_init(struct kvm_vcpu *vcpu)
struct kvm_vcpu_sbi_context *scontext = &vcpu->arch.sbi_context;
const struct kvm_riscv_sbi_extension_entry *entry;
const struct kvm_vcpu_sbi_extension *ext;
- int i;
+ int idx, i;
for (i = 0; i < ARRAY_SIZE(sbi_ext); i++) {
entry = &sbi_ext[i];
ext = entry->ext_ptr;
+ idx = entry->ext_idx;
+
+ if (idx < 0 || idx >= ARRAY_SIZE(scontext->ext_status))
+ continue;
if (ext->probe && !ext->probe(vcpu)) {
- scontext->ext_status[entry->ext_idx] =
- KVM_RISCV_SBI_EXT_STATUS_UNAVAILABLE;
+ scontext->ext_status[idx] = KVM_RISCV_SBI_EXT_STATUS_UNAVAILABLE;
continue;
}
- scontext->ext_status[entry->ext_idx] = ext->default_disabled ?
+ scontext->ext_status[idx] = ext->default_disabled ?
KVM_RISCV_SBI_EXT_STATUS_DISABLED :
KVM_RISCV_SBI_EXT_STATUS_ENABLED;
}
diff --git a/arch/riscv/kvm/vcpu_sbi_hsm.c b/arch/riscv/kvm/vcpu_sbi_hsm.c
index 7dca0e9381d9..f26207f84bab 100644
--- a/arch/riscv/kvm/vcpu_sbi_hsm.c
+++ b/arch/riscv/kvm/vcpu_sbi_hsm.c
@@ -9,44 +9,55 @@
#include <linux/errno.h>
#include <linux/err.h>
#include <linux/kvm_host.h>
+#include <linux/wordpart.h>
#include <asm/sbi.h>
#include <asm/kvm_vcpu_sbi.h>
static int kvm_sbi_hsm_vcpu_start(struct kvm_vcpu *vcpu)
{
- struct kvm_cpu_context *reset_cntx;
struct kvm_cpu_context *cp = &vcpu->arch.guest_context;
struct kvm_vcpu *target_vcpu;
unsigned long target_vcpuid = cp->a0;
+ int ret = 0;
target_vcpu = kvm_get_vcpu_by_id(vcpu->kvm, target_vcpuid);
if (!target_vcpu)
return SBI_ERR_INVALID_PARAM;
- if (!target_vcpu->arch.power_off)
- return SBI_ERR_ALREADY_AVAILABLE;
- reset_cntx = &target_vcpu->arch.guest_reset_context;
- /* start address */
- reset_cntx->sepc = cp->a1;
- /* target vcpu id to start */
- reset_cntx->a0 = target_vcpuid;
- /* private data passed from kernel */
- reset_cntx->a1 = cp->a2;
- kvm_make_request(KVM_REQ_VCPU_RESET, target_vcpu);
+ spin_lock(&target_vcpu->arch.mp_state_lock);
+
+ if (!kvm_riscv_vcpu_stopped(target_vcpu)) {
+ ret = SBI_ERR_ALREADY_AVAILABLE;
+ goto out;
+ }
- kvm_riscv_vcpu_power_on(target_vcpu);
+ kvm_riscv_vcpu_sbi_request_reset(target_vcpu, cp->a1, cp->a2);
- return 0;
+ __kvm_riscv_vcpu_power_on(target_vcpu);
+
+out:
+ spin_unlock(&target_vcpu->arch.mp_state_lock);
+
+ return ret;
}
static int kvm_sbi_hsm_vcpu_stop(struct kvm_vcpu *vcpu)
{
- if (vcpu->arch.power_off)
- return SBI_ERR_FAILURE;
+ int ret = 0;
- kvm_riscv_vcpu_power_off(vcpu);
+ spin_lock(&vcpu->arch.mp_state_lock);
- return 0;
+ if (kvm_riscv_vcpu_stopped(vcpu)) {
+ ret = SBI_ERR_FAILURE;
+ goto out;
+ }
+
+ __kvm_riscv_vcpu_power_off(vcpu);
+
+out:
+ spin_unlock(&vcpu->arch.mp_state_lock);
+
+ return ret;
}
static int kvm_sbi_hsm_vcpu_get_status(struct kvm_vcpu *vcpu)
@@ -58,12 +69,12 @@ static int kvm_sbi_hsm_vcpu_get_status(struct kvm_vcpu *vcpu)
target_vcpu = kvm_get_vcpu_by_id(vcpu->kvm, target_vcpuid);
if (!target_vcpu)
return SBI_ERR_INVALID_PARAM;
- if (!target_vcpu->arch.power_off)
- return SBI_HSM_STATE_STARTED;
- else if (vcpu->stat.generic.blocking)
+ if (kvm_riscv_vcpu_stopped(target_vcpu))
+ return SBI_HSM_STATE_STOPPED;
+ else if (target_vcpu->stat.generic.blocking)
return SBI_HSM_STATE_SUSPENDED;
else
- return SBI_HSM_STATE_STOPPED;
+ return SBI_HSM_STATE_STARTED;
}
static int kvm_sbi_ext_hsm_handler(struct kvm_vcpu *vcpu, struct kvm_run *run,
@@ -71,14 +82,11 @@ static int kvm_sbi_ext_hsm_handler(struct kvm_vcpu *vcpu, struct kvm_run *run,
{
int ret = 0;
struct kvm_cpu_context *cp = &vcpu->arch.guest_context;
- struct kvm *kvm = vcpu->kvm;
unsigned long funcid = cp->a6;
switch (funcid) {
case SBI_EXT_HSM_HART_START:
- mutex_lock(&kvm->lock);
ret = kvm_sbi_hsm_vcpu_start(vcpu);
- mutex_unlock(&kvm->lock);
break;
case SBI_EXT_HSM_HART_STOP:
ret = kvm_sbi_hsm_vcpu_stop(vcpu);
@@ -91,7 +99,7 @@ static int kvm_sbi_ext_hsm_handler(struct kvm_vcpu *vcpu, struct kvm_run *run,
}
return 0;
case SBI_EXT_HSM_HART_SUSPEND:
- switch (cp->a0) {
+ switch (lower_32_bits(cp->a0)) {
case SBI_HSM_SUSPEND_RET_DEFAULT:
kvm_riscv_vcpu_wfi(vcpu);
break;
diff --git a/arch/riscv/kvm/vcpu_sbi_pmu.c b/arch/riscv/kvm/vcpu_sbi_pmu.c
index 7eca72df2cbd..e4be34e03e83 100644
--- a/arch/riscv/kvm/vcpu_sbi_pmu.c
+++ b/arch/riscv/kvm/vcpu_sbi_pmu.c
@@ -42,9 +42,9 @@ static int kvm_sbi_ext_pmu_handler(struct kvm_vcpu *vcpu, struct kvm_run *run,
#endif
/*
* This can fail if perf core framework fails to create an event.
- * Forward the error to userspace because it's an error which
- * happened within the host kernel. The other option would be
- * to convert to an SBI error and forward to the guest.
+ * No need to forward the error to userspace and exit the guest.
+ * The operation can continue without profiling. Forward the
+ * appropriate SBI error to the guest.
*/
ret = kvm_riscv_vcpu_pmu_ctr_cfg_match(vcpu, cp->a0, cp->a1,
cp->a2, cp->a3, temp, retdata);
@@ -62,7 +62,16 @@ static int kvm_sbi_ext_pmu_handler(struct kvm_vcpu *vcpu, struct kvm_run *run,
ret = kvm_riscv_vcpu_pmu_ctr_stop(vcpu, cp->a0, cp->a1, cp->a2, retdata);
break;
case SBI_EXT_PMU_COUNTER_FW_READ:
- ret = kvm_riscv_vcpu_pmu_ctr_read(vcpu, cp->a0, retdata);
+ ret = kvm_riscv_vcpu_pmu_fw_ctr_read(vcpu, cp->a0, retdata);
+ break;
+ case SBI_EXT_PMU_COUNTER_FW_READ_HI:
+ if (IS_ENABLED(CONFIG_32BIT))
+ ret = kvm_riscv_vcpu_pmu_fw_ctr_read_hi(vcpu, cp->a0, retdata);
+ else
+ retdata->out_val = 0;
+ break;
+ case SBI_EXT_PMU_SNAPSHOT_SET_SHMEM:
+ ret = kvm_riscv_vcpu_pmu_snapshot_set_shmem(vcpu, cp->a0, cp->a1, cp->a2, retdata);
break;
default:
retdata->err_val = SBI_ERR_NOT_SUPPORTED;
diff --git a/arch/riscv/kvm/vcpu_sbi_replace.c b/arch/riscv/kvm/vcpu_sbi_replace.c
index 9c2ab3dfa93a..b17fad091bab 100644
--- a/arch/riscv/kvm/vcpu_sbi_replace.c
+++ b/arch/riscv/kvm/vcpu_sbi_replace.c
@@ -21,7 +21,7 @@ static int kvm_sbi_ext_time_handler(struct kvm_vcpu *vcpu, struct kvm_run *run,
u64 next_cycle;
if (cp->a6 != SBI_EXT_TIME_SET_TIMER) {
- retdata->err_val = SBI_ERR_INVALID_PARAM;
+ retdata->err_val = SBI_ERR_NOT_SUPPORTED;
return 0;
}
@@ -51,9 +51,10 @@ static int kvm_sbi_ext_ipi_handler(struct kvm_vcpu *vcpu, struct kvm_run *run,
struct kvm_cpu_context *cp = &vcpu->arch.guest_context;
unsigned long hmask = cp->a0;
unsigned long hbase = cp->a1;
+ unsigned long hart_bit = 0, sentmask = 0;
if (cp->a6 != SBI_EXT_IPI_SEND_IPI) {
- retdata->err_val = SBI_ERR_INVALID_PARAM;
+ retdata->err_val = SBI_ERR_NOT_SUPPORTED;
return 0;
}
@@ -62,15 +63,23 @@ static int kvm_sbi_ext_ipi_handler(struct kvm_vcpu *vcpu, struct kvm_run *run,
if (hbase != -1UL) {
if (tmp->vcpu_id < hbase)
continue;
- if (!(hmask & (1UL << (tmp->vcpu_id - hbase))))
+ hart_bit = tmp->vcpu_id - hbase;
+ if (hart_bit >= __riscv_xlen)
+ goto done;
+ if (!(hmask & (1UL << hart_bit)))
continue;
}
ret = kvm_riscv_vcpu_set_interrupt(tmp, IRQ_VS_SOFT);
if (ret < 0)
break;
+ sentmask |= 1UL << hart_bit;
kvm_riscv_vcpu_pmu_incr_fw(tmp, SBI_PMU_FW_IPI_RCVD);
}
+done:
+ if (hbase != -1UL && (hmask ^ sentmask))
+ retdata->err_val = SBI_ERR_INVALID_PARAM;
+
return ret;
}
@@ -94,7 +103,7 @@ static int kvm_sbi_ext_rfence_handler(struct kvm_vcpu *vcpu, struct kvm_run *run
kvm_riscv_vcpu_pmu_incr_fw(vcpu, SBI_PMU_FW_FENCE_I_SENT);
break;
case SBI_EXT_RFENCE_REMOTE_SFENCE_VMA:
- if (cp->a2 == 0 && cp->a3 == 0)
+ if ((cp->a2 == 0 && cp->a3 == 0) || cp->a3 == -1UL)
kvm_riscv_hfence_vvma_all(vcpu->kvm, hbase, hmask);
else
kvm_riscv_hfence_vvma_gva(vcpu->kvm, hbase, hmask,
@@ -102,7 +111,7 @@ static int kvm_sbi_ext_rfence_handler(struct kvm_vcpu *vcpu, struct kvm_run *run
kvm_riscv_vcpu_pmu_incr_fw(vcpu, SBI_PMU_FW_HFENCE_VVMA_SENT);
break;
case SBI_EXT_RFENCE_REMOTE_SFENCE_VMA_ASID:
- if (cp->a2 == 0 && cp->a3 == 0)
+ if ((cp->a2 == 0 && cp->a3 == 0) || cp->a3 == -1UL)
kvm_riscv_hfence_vvma_asid_all(vcpu->kvm,
hbase, hmask, cp->a4);
else
@@ -118,9 +127,9 @@ static int kvm_sbi_ext_rfence_handler(struct kvm_vcpu *vcpu, struct kvm_run *run
case SBI_EXT_RFENCE_REMOTE_HFENCE_VVMA_ASID:
/*
* Until nested virtualization is implemented, the
- * SBI HFENCE calls should be treated as NOPs
+ * SBI HFENCE calls should return not supported
+ * hence fallthrough.
*/
- break;
default:
retdata->err_val = SBI_ERR_NOT_SUPPORTED;
}
diff --git a/arch/riscv/kvm/vcpu_sbi_sta.c b/arch/riscv/kvm/vcpu_sbi_sta.c
index d8cf9ca28c61..5f35427114c1 100644
--- a/arch/riscv/kvm/vcpu_sbi_sta.c
+++ b/arch/riscv/kvm/vcpu_sbi_sta.c
@@ -93,8 +93,8 @@ static int kvm_sbi_sta_steal_time_set_shmem(struct kvm_vcpu *vcpu)
if (flags != 0)
return SBI_ERR_INVALID_PARAM;
- if (shmem_phys_lo == SBI_STA_SHMEM_DISABLE &&
- shmem_phys_hi == SBI_STA_SHMEM_DISABLE) {
+ if (shmem_phys_lo == SBI_SHMEM_DISABLE &&
+ shmem_phys_hi == SBI_SHMEM_DISABLE) {
vcpu->arch.sta.shmem = INVALID_GPA;
return 0;
}
diff --git a/arch/riscv/kvm/vcpu_sbi_system.c b/arch/riscv/kvm/vcpu_sbi_system.c
new file mode 100644
index 000000000000..359be90b0fc5
--- /dev/null
+++ b/arch/riscv/kvm/vcpu_sbi_system.c
@@ -0,0 +1,66 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Copyright (c) 2024 Ventana Micro Systems Inc.
+ */
+
+#include <linux/kvm_host.h>
+#include <linux/wordpart.h>
+
+#include <asm/kvm_vcpu_sbi.h>
+#include <asm/sbi.h>
+
+static int kvm_sbi_ext_susp_handler(struct kvm_vcpu *vcpu, struct kvm_run *run,
+ struct kvm_vcpu_sbi_return *retdata)
+{
+ struct kvm_cpu_context *cp = &vcpu->arch.guest_context;
+ unsigned long funcid = cp->a6;
+ unsigned long hva, i;
+ struct kvm_vcpu *tmp;
+
+ switch (funcid) {
+ case SBI_EXT_SUSP_SYSTEM_SUSPEND:
+ if (lower_32_bits(cp->a0) != SBI_SUSP_SLEEP_TYPE_SUSPEND_TO_RAM) {
+ retdata->err_val = SBI_ERR_INVALID_PARAM;
+ return 0;
+ }
+
+ if (!(cp->sstatus & SR_SPP)) {
+ retdata->err_val = SBI_ERR_FAILURE;
+ return 0;
+ }
+
+ hva = kvm_vcpu_gfn_to_hva_prot(vcpu, cp->a1 >> PAGE_SHIFT, NULL);
+ if (kvm_is_error_hva(hva)) {
+ retdata->err_val = SBI_ERR_INVALID_ADDRESS;
+ return 0;
+ }
+
+ kvm_for_each_vcpu(i, tmp, vcpu->kvm) {
+ if (tmp == vcpu)
+ continue;
+ if (!kvm_riscv_vcpu_stopped(tmp)) {
+ retdata->err_val = SBI_ERR_DENIED;
+ return 0;
+ }
+ }
+
+ kvm_riscv_vcpu_sbi_request_reset(vcpu, cp->a1, cp->a2);
+
+ /* userspace provides the suspend implementation */
+ kvm_riscv_vcpu_sbi_forward(vcpu, run);
+ retdata->uexit = true;
+ break;
+ default:
+ retdata->err_val = SBI_ERR_NOT_SUPPORTED;
+ break;
+ }
+
+ return 0;
+}
+
+const struct kvm_vcpu_sbi_extension vcpu_sbi_ext_susp = {
+ .extid_start = SBI_EXT_SUSP,
+ .extid_end = SBI_EXT_SUSP,
+ .default_disabled = true,
+ .handler = kvm_sbi_ext_susp_handler,
+};
diff --git a/arch/riscv/kvm/vcpu_switch.S b/arch/riscv/kvm/vcpu_switch.S
index 0c26189aa01c..47686bcb21e0 100644
--- a/arch/riscv/kvm/vcpu_switch.S
+++ b/arch/riscv/kvm/vcpu_switch.S
@@ -11,11 +11,7 @@
#include <asm/asm-offsets.h>
#include <asm/csr.h>
- .text
- .altmacro
- .option norelax
-
-SYM_FUNC_START(__kvm_riscv_switch_to)
+.macro SAVE_HOST_GPRS
/* Save Host GPRs (except A0 and T0-T6) */
REG_S ra, (KVM_ARCH_HOST_RA)(a0)
REG_S sp, (KVM_ARCH_HOST_SP)(a0)
@@ -40,39 +36,33 @@ SYM_FUNC_START(__kvm_riscv_switch_to)
REG_S s9, (KVM_ARCH_HOST_S9)(a0)
REG_S s10, (KVM_ARCH_HOST_S10)(a0)
REG_S s11, (KVM_ARCH_HOST_S11)(a0)
+.endm
+.macro SAVE_HOST_AND_RESTORE_GUEST_CSRS __resume_addr
/* Load Guest CSR values */
REG_L t0, (KVM_ARCH_GUEST_SSTATUS)(a0)
- REG_L t1, (KVM_ARCH_GUEST_HSTATUS)(a0)
- REG_L t2, (KVM_ARCH_GUEST_SCOUNTEREN)(a0)
- la t4, .Lkvm_switch_return
- REG_L t5, (KVM_ARCH_GUEST_SEPC)(a0)
+ la t1, \__resume_addr
+ REG_L t2, (KVM_ARCH_GUEST_SEPC)(a0)
/* Save Host and Restore Guest SSTATUS */
csrrw t0, CSR_SSTATUS, t0
- /* Save Host and Restore Guest HSTATUS */
- csrrw t1, CSR_HSTATUS, t1
-
- /* Save Host and Restore Guest SCOUNTEREN */
- csrrw t2, CSR_SCOUNTEREN, t2
-
/* Save Host STVEC and change it to return path */
- csrrw t4, CSR_STVEC, t4
+ csrrw t1, CSR_STVEC, t1
+
+ /* Restore Guest SEPC */
+ csrw CSR_SEPC, t2
/* Save Host SSCRATCH and change it to struct kvm_vcpu_arch pointer */
csrrw t3, CSR_SSCRATCH, a0
- /* Restore Guest SEPC */
- csrw CSR_SEPC, t5
-
/* Store Host CSR values */
REG_S t0, (KVM_ARCH_HOST_SSTATUS)(a0)
- REG_S t1, (KVM_ARCH_HOST_HSTATUS)(a0)
- REG_S t2, (KVM_ARCH_HOST_SCOUNTEREN)(a0)
+ REG_S t1, (KVM_ARCH_HOST_STVEC)(a0)
REG_S t3, (KVM_ARCH_HOST_SSCRATCH)(a0)
- REG_S t4, (KVM_ARCH_HOST_STVEC)(a0)
+.endm
+.macro RESTORE_GUEST_GPRS
/* Restore Guest GPRs (except A0) */
REG_L ra, (KVM_ARCH_GUEST_RA)(a0)
REG_L sp, (KVM_ARCH_GUEST_SP)(a0)
@@ -107,13 +97,9 @@ SYM_FUNC_START(__kvm_riscv_switch_to)
/* Restore Guest A0 */
REG_L a0, (KVM_ARCH_GUEST_A0)(a0)
+.endm
- /* Resume Guest */
- sret
-
- /* Back to Host */
- .align 2
-.Lkvm_switch_return:
+.macro SAVE_GUEST_GPRS
/* Swap Guest A0 with SSCRATCH */
csrrw a0, CSR_SSCRATCH, a0
@@ -148,39 +134,33 @@ SYM_FUNC_START(__kvm_riscv_switch_to)
REG_S t4, (KVM_ARCH_GUEST_T4)(a0)
REG_S t5, (KVM_ARCH_GUEST_T5)(a0)
REG_S t6, (KVM_ARCH_GUEST_T6)(a0)
+.endm
+.macro SAVE_GUEST_AND_RESTORE_HOST_CSRS
/* Load Host CSR values */
- REG_L t1, (KVM_ARCH_HOST_STVEC)(a0)
- REG_L t2, (KVM_ARCH_HOST_SSCRATCH)(a0)
- REG_L t3, (KVM_ARCH_HOST_SCOUNTEREN)(a0)
- REG_L t4, (KVM_ARCH_HOST_HSTATUS)(a0)
- REG_L t5, (KVM_ARCH_HOST_SSTATUS)(a0)
-
- /* Save Guest SEPC */
- csrr t0, CSR_SEPC
+ REG_L t0, (KVM_ARCH_HOST_STVEC)(a0)
+ REG_L t1, (KVM_ARCH_HOST_SSCRATCH)(a0)
+ REG_L t2, (KVM_ARCH_HOST_SSTATUS)(a0)
/* Save Guest A0 and Restore Host SSCRATCH */
- csrrw t2, CSR_SSCRATCH, t2
+ csrrw t1, CSR_SSCRATCH, t1
- /* Restore Host STVEC */
- csrw CSR_STVEC, t1
-
- /* Save Guest and Restore Host SCOUNTEREN */
- csrrw t3, CSR_SCOUNTEREN, t3
+ /* Save Guest SEPC */
+ csrr t3, CSR_SEPC
- /* Save Guest and Restore Host HSTATUS */
- csrrw t4, CSR_HSTATUS, t4
+ /* Restore Host STVEC */
+ csrw CSR_STVEC, t0
/* Save Guest and Restore Host SSTATUS */
- csrrw t5, CSR_SSTATUS, t5
+ csrrw t2, CSR_SSTATUS, t2
/* Store Guest CSR values */
- REG_S t0, (KVM_ARCH_GUEST_SEPC)(a0)
- REG_S t2, (KVM_ARCH_GUEST_A0)(a0)
- REG_S t3, (KVM_ARCH_GUEST_SCOUNTEREN)(a0)
- REG_S t4, (KVM_ARCH_GUEST_HSTATUS)(a0)
- REG_S t5, (KVM_ARCH_GUEST_SSTATUS)(a0)
+ REG_S t1, (KVM_ARCH_GUEST_A0)(a0)
+ REG_S t2, (KVM_ARCH_GUEST_SSTATUS)(a0)
+ REG_S t3, (KVM_ARCH_GUEST_SEPC)(a0)
+.endm
+.macro RESTORE_HOST_GPRS
/* Restore Host GPRs (except A0 and T0-T6) */
REG_L ra, (KVM_ARCH_HOST_RA)(a0)
REG_L sp, (KVM_ARCH_HOST_SP)(a0)
@@ -205,11 +185,68 @@ SYM_FUNC_START(__kvm_riscv_switch_to)
REG_L s9, (KVM_ARCH_HOST_S9)(a0)
REG_L s10, (KVM_ARCH_HOST_S10)(a0)
REG_L s11, (KVM_ARCH_HOST_S11)(a0)
+.endm
+
+ .text
+ .altmacro
+ .option norelax
+
+ /*
+ * Parameters:
+ * A0 <= Pointer to struct kvm_vcpu_arch
+ */
+SYM_FUNC_START(__kvm_riscv_switch_to)
+ SAVE_HOST_GPRS
+
+ SAVE_HOST_AND_RESTORE_GUEST_CSRS .Lkvm_switch_return
+
+ RESTORE_GUEST_GPRS
+
+ /* Resume Guest using SRET */
+ sret
+
+ /* Back to Host */
+ .align 2
+.Lkvm_switch_return:
+ SAVE_GUEST_GPRS
+
+ SAVE_GUEST_AND_RESTORE_HOST_CSRS
+
+ RESTORE_HOST_GPRS
/* Return to C code */
ret
SYM_FUNC_END(__kvm_riscv_switch_to)
+ /*
+ * Parameters:
+ * A0 <= Pointer to struct kvm_vcpu_arch
+ * A1 <= SBI extension ID
+ * A2 <= SBI function ID
+ */
+SYM_FUNC_START(__kvm_riscv_nacl_switch_to)
+ SAVE_HOST_GPRS
+
+ SAVE_HOST_AND_RESTORE_GUEST_CSRS .Lkvm_nacl_switch_return
+
+ /* Resume Guest using SBI nested acceleration */
+ add a6, a2, zero
+ add a7, a1, zero
+ ecall
+
+ /* Back to Host */
+ .align 2
+.Lkvm_nacl_switch_return:
+ SAVE_GUEST_GPRS
+
+ SAVE_GUEST_AND_RESTORE_HOST_CSRS
+
+ RESTORE_HOST_GPRS
+
+ /* Return to C code */
+ ret
+SYM_FUNC_END(__kvm_riscv_nacl_switch_to)
+
SYM_CODE_START(__kvm_riscv_unpriv_trap)
/*
* We assume that faulting unpriv load/store instruction is
diff --git a/arch/riscv/kvm/vcpu_timer.c b/arch/riscv/kvm/vcpu_timer.c
index 75486b25ac45..ff672fa71fcc 100644
--- a/arch/riscv/kvm/vcpu_timer.c
+++ b/arch/riscv/kvm/vcpu_timer.c
@@ -11,8 +11,8 @@
#include <linux/kvm_host.h>
#include <linux/uaccess.h>
#include <clocksource/timer-riscv.h>
-#include <asm/csr.h>
#include <asm/delay.h>
+#include <asm/kvm_nacl.h>
#include <asm/kvm_vcpu_timer.h>
static u64 kvm_riscv_current_cycles(struct kvm_guest_timer *gt)
@@ -72,12 +72,12 @@ static int kvm_riscv_vcpu_timer_cancel(struct kvm_vcpu_timer *t)
static int kvm_riscv_vcpu_update_vstimecmp(struct kvm_vcpu *vcpu, u64 ncycles)
{
#if defined(CONFIG_32BIT)
- csr_write(CSR_VSTIMECMP, ncycles & 0xFFFFFFFF);
- csr_write(CSR_VSTIMECMPH, ncycles >> 32);
+ ncsr_write(CSR_VSTIMECMP, ncycles & 0xFFFFFFFF);
+ ncsr_write(CSR_VSTIMECMPH, ncycles >> 32);
#else
- csr_write(CSR_VSTIMECMP, ncycles);
+ ncsr_write(CSR_VSTIMECMP, ncycles);
#endif
- return 0;
+ return 0;
}
static int kvm_riscv_vcpu_update_hrtimer(struct kvm_vcpu *vcpu, u64 ncycles)
@@ -248,18 +248,19 @@ int kvm_riscv_vcpu_timer_init(struct kvm_vcpu *vcpu)
if (t->init_done)
return -EINVAL;
- hrtimer_init(&t->hrt, CLOCK_MONOTONIC, HRTIMER_MODE_REL);
t->init_done = true;
t->next_set = false;
/* Enable sstc for every vcpu if available in hardware */
if (riscv_isa_extension_available(NULL, SSTC)) {
t->sstc_enabled = true;
- t->hrt.function = kvm_riscv_vcpu_vstimer_expired;
+ hrtimer_setup(&t->hrt, kvm_riscv_vcpu_vstimer_expired, CLOCK_MONOTONIC,
+ HRTIMER_MODE_REL);
t->timer_next_event = kvm_riscv_vcpu_update_vstimecmp;
} else {
t->sstc_enabled = false;
- t->hrt.function = kvm_riscv_vcpu_hrtimer_expired;
+ hrtimer_setup(&t->hrt, kvm_riscv_vcpu_hrtimer_expired, CLOCK_MONOTONIC,
+ HRTIMER_MODE_REL);
t->timer_next_event = kvm_riscv_vcpu_update_hrtimer;
}
@@ -289,10 +290,10 @@ static void kvm_riscv_vcpu_update_timedelta(struct kvm_vcpu *vcpu)
struct kvm_guest_timer *gt = &vcpu->kvm->arch.timer;
#if defined(CONFIG_32BIT)
- csr_write(CSR_HTIMEDELTA, (u32)(gt->time_delta));
- csr_write(CSR_HTIMEDELTAH, (u32)(gt->time_delta >> 32));
+ ncsr_write(CSR_HTIMEDELTA, (u32)(gt->time_delta));
+ ncsr_write(CSR_HTIMEDELTAH, (u32)(gt->time_delta >> 32));
#else
- csr_write(CSR_HTIMEDELTA, gt->time_delta);
+ ncsr_write(CSR_HTIMEDELTA, gt->time_delta);
#endif
}
@@ -306,10 +307,10 @@ void kvm_riscv_vcpu_timer_restore(struct kvm_vcpu *vcpu)
return;
#if defined(CONFIG_32BIT)
- csr_write(CSR_VSTIMECMP, (u32)t->next_cycles);
- csr_write(CSR_VSTIMECMPH, (u32)(t->next_cycles >> 32));
+ ncsr_write(CSR_VSTIMECMP, (u32)t->next_cycles);
+ ncsr_write(CSR_VSTIMECMPH, (u32)(t->next_cycles >> 32));
#else
- csr_write(CSR_VSTIMECMP, t->next_cycles);
+ ncsr_write(CSR_VSTIMECMP, t->next_cycles);
#endif
/* timer should be enabled for the remaining operations */
@@ -327,10 +328,10 @@ void kvm_riscv_vcpu_timer_sync(struct kvm_vcpu *vcpu)
return;
#if defined(CONFIG_32BIT)
- t->next_cycles = csr_read(CSR_VSTIMECMP);
- t->next_cycles |= (u64)csr_read(CSR_VSTIMECMPH) << 32;
+ t->next_cycles = ncsr_read(CSR_VSTIMECMP);
+ t->next_cycles |= (u64)ncsr_read(CSR_VSTIMECMPH) << 32;
#else
- t->next_cycles = csr_read(CSR_VSTIMECMP);
+ t->next_cycles = ncsr_read(CSR_VSTIMECMP);
#endif
}
diff --git a/arch/riscv/kvm/vcpu_vector.c b/arch/riscv/kvm/vcpu_vector.c
index d92d1348045c..a5f88cb717f3 100644
--- a/arch/riscv/kvm/vcpu_vector.c
+++ b/arch/riscv/kvm/vcpu_vector.c
@@ -22,6 +22,9 @@ void kvm_riscv_vcpu_vector_reset(struct kvm_vcpu *vcpu)
struct kvm_cpu_context *cntx = &vcpu->arch.guest_context;
cntx->sstatus &= ~SR_VS;
+
+ cntx->vector.vlenb = riscv_v_vsize / 32;
+
if (riscv_isa_extension_available(isa, v)) {
cntx->sstatus |= SR_VS_INITIAL;
WARN_ON(!cntx->vector.datap);
@@ -70,13 +73,11 @@ void kvm_riscv_vcpu_host_vector_restore(struct kvm_cpu_context *cntx)
__kvm_riscv_vector_restore(cntx);
}
-int kvm_riscv_vcpu_alloc_vector_context(struct kvm_vcpu *vcpu,
- struct kvm_cpu_context *cntx)
+int kvm_riscv_vcpu_alloc_vector_context(struct kvm_vcpu *vcpu)
{
- cntx->vector.datap = kmalloc(riscv_v_vsize, GFP_KERNEL);
- if (!cntx->vector.datap)
+ vcpu->arch.guest_context.vector.datap = kzalloc(riscv_v_vsize, GFP_KERNEL);
+ if (!vcpu->arch.guest_context.vector.datap)
return -ENOMEM;
- cntx->vector.vlenb = riscv_v_vsize / 32;
vcpu->arch.host_context.vector.datap = kzalloc(riscv_v_vsize, GFP_KERNEL);
if (!vcpu->arch.host_context.vector.datap)
@@ -87,7 +88,7 @@ int kvm_riscv_vcpu_alloc_vector_context(struct kvm_vcpu *vcpu,
void kvm_riscv_vcpu_free_vector_context(struct kvm_vcpu *vcpu)
{
- kfree(vcpu->arch.guest_reset_context.vector.datap);
+ kfree(vcpu->arch.guest_context.vector.datap);
kfree(vcpu->arch.host_context.vector.datap);
}
#endif
diff --git a/arch/riscv/kvm/vm.c b/arch/riscv/kvm/vm.c
index ce58bc48e5b8..b27ec8f96697 100644
--- a/arch/riscv/kvm/vm.c
+++ b/arch/riscv/kvm/vm.c
@@ -186,6 +186,7 @@ int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext)
case KVM_CAP_READONLY_MEM:
case KVM_CAP_MP_STATE:
case KVM_CAP_IMMEDIATE_EXIT:
+ case KVM_CAP_SET_GUEST_DEBUG:
r = 1;
break;
case KVM_CAP_NR_VCPUS:
@@ -208,6 +209,19 @@ int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext)
return r;
}
+int kvm_vm_ioctl_enable_cap(struct kvm *kvm, struct kvm_enable_cap *cap)
+{
+ switch (cap->cap) {
+ case KVM_CAP_RISCV_MP_STATE_RESET:
+ if (cap->flags)
+ return -EINVAL;
+ kvm->arch.mp_state_reset = true;
+ return 0;
+ default:
+ return -EINVAL;
+ }
+}
+
int kvm_arch_vm_ioctl(struct file *filp, unsigned int ioctl, unsigned long arg)
{
return -EINVAL;