aboutsummaryrefslogtreecommitdiffstats
path: root/arch/riscv/kvm
diff options
context:
space:
mode:
Diffstat (limited to 'arch/riscv/kvm')
-rw-r--r--arch/riscv/kvm/Kconfig1
-rw-r--r--arch/riscv/kvm/Makefile11
-rw-r--r--arch/riscv/kvm/main.c21
-rw-r--r--arch/riscv/kvm/mmu.c375
-rw-r--r--arch/riscv/kvm/tlb.S74
-rw-r--r--arch/riscv/kvm/tlb.c402
-rw-r--r--arch/riscv/kvm/vcpu.c413
-rw-r--r--arch/riscv/kvm/vcpu_exit.c531
-rw-r--r--arch/riscv/kvm/vcpu_fp.c30
-rw-r--r--arch/riscv/kvm/vcpu_insn.c751
-rw-r--r--arch/riscv/kvm/vcpu_sbi.c223
-rw-r--r--arch/riscv/kvm/vcpu_sbi_base.c100
-rw-r--r--arch/riscv/kvm/vcpu_sbi_hsm.c119
-rw-r--r--arch/riscv/kvm/vcpu_sbi_replace.c174
-rw-r--r--arch/riscv/kvm/vcpu_sbi_v01.c118
-rw-r--r--arch/riscv/kvm/vcpu_switch.S60
-rw-r--r--arch/riscv/kvm/vcpu_timer.c157
-rw-r--r--arch/riscv/kvm/vm.c23
-rw-r--r--arch/riscv/kvm/vmid.c36
19 files changed, 2562 insertions, 1057 deletions
diff --git a/arch/riscv/kvm/Kconfig b/arch/riscv/kvm/Kconfig
index f5a342fa1b1d..f36a737d5f96 100644
--- a/arch/riscv/kvm/Kconfig
+++ b/arch/riscv/kvm/Kconfig
@@ -24,6 +24,7 @@ config KVM
select PREEMPT_NOTIFIERS
select KVM_MMIO
select KVM_GENERIC_DIRTYLOG_READ_PROTECT
+ select KVM_XFER_TO_GUEST_WORK
select HAVE_KVM_VCPU_ASYNC_IOCTL
select HAVE_KVM_EVENTFD
select SRCU
diff --git a/arch/riscv/kvm/Makefile b/arch/riscv/kvm/Makefile
index 30cdd1df0098..019df9208bdd 100644
--- a/arch/riscv/kvm/Makefile
+++ b/arch/riscv/kvm/Makefile
@@ -5,14 +5,10 @@
ccflags-y += -I $(srctree)/$(src)
-KVM := ../../../virt/kvm
+include $(srctree)/virt/kvm/Makefile.kvm
obj-$(CONFIG_KVM) += kvm.o
-kvm-y += $(KVM)/kvm_main.o
-kvm-y += $(KVM)/coalesced_mmio.o
-kvm-y += $(KVM)/binary_stats.o
-kvm-y += $(KVM)/eventfd.o
kvm-y += main.o
kvm-y += vm.o
kvm-y += vmid.o
@@ -21,6 +17,11 @@ kvm-y += mmu.o
kvm-y += vcpu.o
kvm-y += vcpu_exit.o
kvm-y += vcpu_fp.o
+kvm-y += vcpu_insn.o
kvm-y += vcpu_switch.o
kvm-y += vcpu_sbi.o
+kvm-$(CONFIG_RISCV_SBI_V01) += vcpu_sbi_v01.o
+kvm-y += vcpu_sbi_base.o
+kvm-y += vcpu_sbi_replace.o
+kvm-y += vcpu_sbi_hsm.o
kvm-y += vcpu_timer.o
diff --git a/arch/riscv/kvm/main.c b/arch/riscv/kvm/main.c
index 421ecf4e6360..df2d8716851f 100644
--- a/arch/riscv/kvm/main.c
+++ b/arch/riscv/kvm/main.c
@@ -58,6 +58,14 @@ int kvm_arch_hardware_enable(void)
void kvm_arch_hardware_disable(void)
{
+ /*
+ * After clearing the hideleg CSR, the host kernel will receive
+ * spurious interrupts if hvip CSR has pending interrupts and the
+ * corresponding enable bits in vsie CSR are asserted. To avoid it,
+ * hvip CSR and vsie CSR must be cleared before clearing hideleg CSR.
+ */
+ csr_write(CSR_VSIE, 0);
+ csr_write(CSR_HVIP, 0);
csr_write(CSR_HEDELEG, 0);
csr_write(CSR_HIDELEG, 0);
}
@@ -81,13 +89,13 @@ int kvm_arch_init(void *opaque)
return -ENODEV;
}
- kvm_riscv_stage2_mode_detect();
+ kvm_riscv_gstage_mode_detect();
- kvm_riscv_stage2_vmid_detect();
+ kvm_riscv_gstage_vmid_detect();
kvm_info("hypervisor extension available\n");
- switch (kvm_riscv_stage2_mode()) {
+ switch (kvm_riscv_gstage_mode()) {
case HGATP_MODE_SV32X4:
str = "Sv32x4";
break;
@@ -97,12 +105,15 @@ int kvm_arch_init(void *opaque)
case HGATP_MODE_SV48X4:
str = "Sv48x4";
break;
+ case HGATP_MODE_SV57X4:
+ str = "Sv57x4";
+ break;
default:
return -ENODEV;
}
kvm_info("using %s G-stage page table format\n", str);
- kvm_info("VMID %ld bits available\n", kvm_riscv_stage2_vmid_bits());
+ kvm_info("VMID %ld bits available\n", kvm_riscv_gstage_vmid_bits());
return 0;
}
@@ -111,7 +122,7 @@ void kvm_arch_exit(void)
{
}
-static int riscv_kvm_init(void)
+static int __init riscv_kvm_init(void)
{
return kvm_init(NULL, sizeof(struct kvm_vcpu), 0, THIS_MODULE);
}
diff --git a/arch/riscv/kvm/mmu.c b/arch/riscv/kvm/mmu.c
index fc058ff5f4b6..3620ecac2fa1 100644
--- a/arch/riscv/kvm/mmu.c
+++ b/arch/riscv/kvm/mmu.c
@@ -18,53 +18,52 @@
#include <asm/csr.h>
#include <asm/page.h>
#include <asm/pgtable.h>
-#include <asm/sbi.h>
#ifdef CONFIG_64BIT
-static unsigned long stage2_mode = (HGATP_MODE_SV39X4 << HGATP_MODE_SHIFT);
-static unsigned long stage2_pgd_levels = 3;
-#define stage2_index_bits 9
+static unsigned long gstage_mode = (HGATP_MODE_SV39X4 << HGATP_MODE_SHIFT);
+static unsigned long gstage_pgd_levels = 3;
+#define gstage_index_bits 9
#else
-static unsigned long stage2_mode = (HGATP_MODE_SV32X4 << HGATP_MODE_SHIFT);
-static unsigned long stage2_pgd_levels = 2;
-#define stage2_index_bits 10
+static unsigned long gstage_mode = (HGATP_MODE_SV32X4 << HGATP_MODE_SHIFT);
+static unsigned long gstage_pgd_levels = 2;
+#define gstage_index_bits 10
#endif
-#define stage2_pgd_xbits 2
-#define stage2_pgd_size (1UL << (HGATP_PAGE_SHIFT + stage2_pgd_xbits))
-#define stage2_gpa_bits (HGATP_PAGE_SHIFT + \
- (stage2_pgd_levels * stage2_index_bits) + \
- stage2_pgd_xbits)
-#define stage2_gpa_size ((gpa_t)(1ULL << stage2_gpa_bits))
+#define gstage_pgd_xbits 2
+#define gstage_pgd_size (1UL << (HGATP_PAGE_SHIFT + gstage_pgd_xbits))
+#define gstage_gpa_bits (HGATP_PAGE_SHIFT + \
+ (gstage_pgd_levels * gstage_index_bits) + \
+ gstage_pgd_xbits)
+#define gstage_gpa_size ((gpa_t)(1ULL << gstage_gpa_bits))
-#define stage2_pte_leaf(__ptep) \
+#define gstage_pte_leaf(__ptep) \
(pte_val(*(__ptep)) & (_PAGE_READ | _PAGE_WRITE | _PAGE_EXEC))
-static inline unsigned long stage2_pte_index(gpa_t addr, u32 level)
+static inline unsigned long gstage_pte_index(gpa_t addr, u32 level)
{
unsigned long mask;
- unsigned long shift = HGATP_PAGE_SHIFT + (stage2_index_bits * level);
+ unsigned long shift = HGATP_PAGE_SHIFT + (gstage_index_bits * level);
- if (level == (stage2_pgd_levels - 1))
- mask = (PTRS_PER_PTE * (1UL << stage2_pgd_xbits)) - 1;
+ if (level == (gstage_pgd_levels - 1))
+ mask = (PTRS_PER_PTE * (1UL << gstage_pgd_xbits)) - 1;
else
mask = PTRS_PER_PTE - 1;
return (addr >> shift) & mask;
}
-static inline unsigned long stage2_pte_page_vaddr(pte_t pte)
+static inline unsigned long gstage_pte_page_vaddr(pte_t pte)
{
- return (unsigned long)pfn_to_virt(pte_val(pte) >> _PAGE_PFN_SHIFT);
+ return (unsigned long)pfn_to_virt(__page_val_to_pfn(pte_val(pte)));
}
-static int stage2_page_size_to_level(unsigned long page_size, u32 *out_level)
+static int gstage_page_size_to_level(unsigned long page_size, u32 *out_level)
{
u32 i;
unsigned long psz = 1UL << 12;
- for (i = 0; i < stage2_pgd_levels; i++) {
- if (page_size == (psz << (i * stage2_index_bits))) {
+ for (i = 0; i < gstage_pgd_levels; i++) {
+ if (page_size == (psz << (i * gstage_index_bits))) {
*out_level = i;
return 0;
}
@@ -73,64 +72,39 @@ static int stage2_page_size_to_level(unsigned long page_size, u32 *out_level)
return -EINVAL;
}
-static int stage2_level_to_page_size(u32 level, unsigned long *out_pgsize)
+static int gstage_level_to_page_order(u32 level, unsigned long *out_pgorder)
{
- if (stage2_pgd_levels < level)
+ if (gstage_pgd_levels < level)
return -EINVAL;
- *out_pgsize = 1UL << (12 + (level * stage2_index_bits));
-
+ *out_pgorder = 12 + (level * gstage_index_bits);
return 0;
}
-static int stage2_cache_topup(struct kvm_mmu_page_cache *pcache,
- int min, int max)
+static int gstage_level_to_page_size(u32 level, unsigned long *out_pgsize)
{
- void *page;
+ int rc;
+ unsigned long page_order = PAGE_SHIFT;
- BUG_ON(max > KVM_MMU_PAGE_CACHE_NR_OBJS);
- if (pcache->nobjs >= min)
- return 0;
- while (pcache->nobjs < max) {
- page = (void *)__get_free_page(GFP_KERNEL | __GFP_ZERO);
- if (!page)
- return -ENOMEM;
- pcache->objects[pcache->nobjs++] = page;
- }
+ rc = gstage_level_to_page_order(level, &page_order);
+ if (rc)
+ return rc;
+ *out_pgsize = BIT(page_order);
return 0;
}
-static void stage2_cache_flush(struct kvm_mmu_page_cache *pcache)
-{
- while (pcache && pcache->nobjs)
- free_page((unsigned long)pcache->objects[--pcache->nobjs]);
-}
-
-static void *stage2_cache_alloc(struct kvm_mmu_page_cache *pcache)
-{
- void *p;
-
- if (!pcache)
- return NULL;
-
- BUG_ON(!pcache->nobjs);
- p = pcache->objects[--pcache->nobjs];
-
- return p;
-}
-
-static bool stage2_get_leaf_entry(struct kvm *kvm, gpa_t addr,
+static bool gstage_get_leaf_entry(struct kvm *kvm, gpa_t addr,
pte_t **ptepp, u32 *ptep_level)
{
pte_t *ptep;
- u32 current_level = stage2_pgd_levels - 1;
+ u32 current_level = gstage_pgd_levels - 1;
*ptep_level = current_level;
ptep = (pte_t *)kvm->arch.pgd;
- ptep = &ptep[stage2_pte_index(addr, current_level)];
+ ptep = &ptep[gstage_pte_index(addr, current_level)];
while (ptep && pte_val(*ptep)) {
- if (stage2_pte_leaf(ptep)) {
+ if (gstage_pte_leaf(ptep)) {
*ptep_level = current_level;
*ptepp = ptep;
return true;
@@ -139,8 +113,8 @@ static bool stage2_get_leaf_entry(struct kvm *kvm, gpa_t addr,
if (current_level) {
current_level--;
*ptep_level = current_level;
- ptep = (pte_t *)stage2_pte_page_vaddr(*ptep);
- ptep = &ptep[stage2_pte_index(addr, current_level)];
+ ptep = (pte_t *)gstage_pte_page_vaddr(*ptep);
+ ptep = &ptep[gstage_pte_index(addr, current_level)];
} else {
ptep = NULL;
}
@@ -149,67 +123,59 @@ static bool stage2_get_leaf_entry(struct kvm *kvm, gpa_t addr,
return false;
}
-static void stage2_remote_tlb_flush(struct kvm *kvm, u32 level, gpa_t addr)
+static void gstage_remote_tlb_flush(struct kvm *kvm, u32 level, gpa_t addr)
{
- struct cpumask hmask;
- unsigned long size = PAGE_SIZE;
- struct kvm_vmid *vmid = &kvm->arch.vmid;
+ unsigned long order = PAGE_SHIFT;
- if (stage2_level_to_page_size(level, &size))
+ if (gstage_level_to_page_order(level, &order))
return;
- addr &= ~(size - 1);
+ addr &= ~(BIT(order) - 1);
- /*
- * TODO: Instead of cpu_online_mask, we should only target CPUs
- * where the Guest/VM is running.
- */
- preempt_disable();
- riscv_cpuid_to_hartid_mask(cpu_online_mask, &hmask);
- sbi_remote_hfence_gvma_vmid(cpumask_bits(&hmask), addr, size,
- READ_ONCE(vmid->vmid));
- preempt_enable();
+ kvm_riscv_hfence_gvma_vmid_gpa(kvm, -1UL, 0, addr, BIT(order), order);
}
-static int stage2_set_pte(struct kvm *kvm, u32 level,
- struct kvm_mmu_page_cache *pcache,
+static int gstage_set_pte(struct kvm *kvm, u32 level,
+ struct kvm_mmu_memory_cache *pcache,
gpa_t addr, const pte_t *new_pte)
{
- u32 current_level = stage2_pgd_levels - 1;
+ u32 current_level = gstage_pgd_levels - 1;
pte_t *next_ptep = (pte_t *)kvm->arch.pgd;
- pte_t *ptep = &next_ptep[stage2_pte_index(addr, current_level)];
+ pte_t *ptep = &next_ptep[gstage_pte_index(addr, current_level)];
if (current_level < level)
return -EINVAL;
while (current_level != level) {
- if (stage2_pte_leaf(ptep))
+ if (gstage_pte_leaf(ptep))
return -EEXIST;
if (!pte_val(*ptep)) {
- next_ptep = stage2_cache_alloc(pcache);
+ if (!pcache)
+ return -ENOMEM;
+ next_ptep = kvm_mmu_memory_cache_alloc(pcache);
if (!next_ptep)
return -ENOMEM;
*ptep = pfn_pte(PFN_DOWN(__pa(next_ptep)),
__pgprot(_PAGE_TABLE));
} else {
- if (stage2_pte_leaf(ptep))
+ if (gstage_pte_leaf(ptep))
return -EEXIST;
- next_ptep = (pte_t *)stage2_pte_page_vaddr(*ptep);
+ next_ptep = (pte_t *)gstage_pte_page_vaddr(*ptep);
}
current_level--;
- ptep = &next_ptep[stage2_pte_index(addr, current_level)];
+ ptep = &next_ptep[gstage_pte_index(addr, current_level)];
}
*ptep = *new_pte;
- if (stage2_pte_leaf(ptep))
- stage2_remote_tlb_flush(kvm, current_level, addr);
+ if (gstage_pte_leaf(ptep))
+ gstage_remote_tlb_flush(kvm, current_level, addr);
return 0;
}
-static int stage2_map_page(struct kvm *kvm,
- struct kvm_mmu_page_cache *pcache,
+static int gstage_map_page(struct kvm *kvm,
+ struct kvm_mmu_memory_cache *pcache,
gpa_t gpa, phys_addr_t hpa,
unsigned long page_size,
bool page_rdonly, bool page_exec)
@@ -219,7 +185,7 @@ static int stage2_map_page(struct kvm *kvm,
pte_t new_pte;
pgprot_t prot;
- ret = stage2_page_size_to_level(page_size, &level);
+ ret = gstage_page_size_to_level(page_size, &level);
if (ret)
return ret;
@@ -230,9 +196,9 @@ static int stage2_map_page(struct kvm *kvm,
* PTE so that software can update these bits.
*
* We support both options mentioned above. To achieve this, we
- * always set 'A' and 'D' PTE bits at time of creating stage2
+ * always set 'A' and 'D' PTE bits at time of creating G-stage
* mapping. To support KVM dirty page logging with both options
- * mentioned above, we will write-protect stage2 PTEs to track
+ * mentioned above, we will write-protect G-stage PTEs to track
* dirty pages.
*/
@@ -250,24 +216,24 @@ static int stage2_map_page(struct kvm *kvm,
new_pte = pfn_pte(PFN_DOWN(hpa), prot);
new_pte = pte_mkdirty(new_pte);
- return stage2_set_pte(kvm, level, pcache, gpa, &new_pte);
+ return gstage_set_pte(kvm, level, pcache, gpa, &new_pte);
}
-enum stage2_op {
- STAGE2_OP_NOP = 0, /* Nothing */
- STAGE2_OP_CLEAR, /* Clear/Unmap */
- STAGE2_OP_WP, /* Write-protect */
+enum gstage_op {
+ GSTAGE_OP_NOP = 0, /* Nothing */
+ GSTAGE_OP_CLEAR, /* Clear/Unmap */
+ GSTAGE_OP_WP, /* Write-protect */
};
-static void stage2_op_pte(struct kvm *kvm, gpa_t addr,
- pte_t *ptep, u32 ptep_level, enum stage2_op op)
+static void gstage_op_pte(struct kvm *kvm, gpa_t addr,
+ pte_t *ptep, u32 ptep_level, enum gstage_op op)
{
int i, ret;
pte_t *next_ptep;
u32 next_ptep_level;
unsigned long next_page_size, page_size;
- ret = stage2_level_to_page_size(ptep_level, &page_size);
+ ret = gstage_level_to_page_size(ptep_level, &page_size);
if (ret)
return;
@@ -276,31 +242,31 @@ static void stage2_op_pte(struct kvm *kvm, gpa_t addr,
if (!pte_val(*ptep))
return;
- if (ptep_level && !stage2_pte_leaf(ptep)) {
- next_ptep = (pte_t *)stage2_pte_page_vaddr(*ptep);
+ if (ptep_level && !gstage_pte_leaf(ptep)) {
+ next_ptep = (pte_t *)gstage_pte_page_vaddr(*ptep);
next_ptep_level = ptep_level - 1;
- ret = stage2_level_to_page_size(next_ptep_level,
+ ret = gstage_level_to_page_size(next_ptep_level,
&next_page_size);
if (ret)
return;
- if (op == STAGE2_OP_CLEAR)
+ if (op == GSTAGE_OP_CLEAR)
set_pte(ptep, __pte(0));
for (i = 0; i < PTRS_PER_PTE; i++)
- stage2_op_pte(kvm, addr + i * next_page_size,
+ gstage_op_pte(kvm, addr + i * next_page_size,
&next_ptep[i], next_ptep_level, op);
- if (op == STAGE2_OP_CLEAR)
+ if (op == GSTAGE_OP_CLEAR)
put_page(virt_to_page(next_ptep));
} else {
- if (op == STAGE2_OP_CLEAR)
+ if (op == GSTAGE_OP_CLEAR)
set_pte(ptep, __pte(0));
- else if (op == STAGE2_OP_WP)
+ else if (op == GSTAGE_OP_WP)
set_pte(ptep, __pte(pte_val(*ptep) & ~_PAGE_WRITE));
- stage2_remote_tlb_flush(kvm, ptep_level, addr);
+ gstage_remote_tlb_flush(kvm, ptep_level, addr);
}
}
-static void stage2_unmap_range(struct kvm *kvm, gpa_t start,
+static void gstage_unmap_range(struct kvm *kvm, gpa_t start,
gpa_t size, bool may_block)
{
int ret;
@@ -311,9 +277,9 @@ static void stage2_unmap_range(struct kvm *kvm, gpa_t start,
gpa_t addr = start, end = start + size;
while (addr < end) {
- found_leaf = stage2_get_leaf_entry(kvm, addr,
+ found_leaf = gstage_get_leaf_entry(kvm, addr,
&ptep, &ptep_level);
- ret = stage2_level_to_page_size(ptep_level, &page_size);
+ ret = gstage_level_to_page_size(ptep_level, &page_size);
if (ret)
break;
@@ -321,8 +287,8 @@ static void stage2_unmap_range(struct kvm *kvm, gpa_t start,
goto next;
if (!(addr & (page_size - 1)) && ((end - addr) >= page_size))
- stage2_op_pte(kvm, addr, ptep,
- ptep_level, STAGE2_OP_CLEAR);
+ gstage_op_pte(kvm, addr, ptep,
+ ptep_level, GSTAGE_OP_CLEAR);
next:
addr += page_size;
@@ -336,7 +302,7 @@ next:
}
}
-static void stage2_wp_range(struct kvm *kvm, gpa_t start, gpa_t end)
+static void gstage_wp_range(struct kvm *kvm, gpa_t start, gpa_t end)
{
int ret;
pte_t *ptep;
@@ -346,9 +312,9 @@ static void stage2_wp_range(struct kvm *kvm, gpa_t start, gpa_t end)
unsigned long page_size;
while (addr < end) {
- found_leaf = stage2_get_leaf_entry(kvm, addr,
+ found_leaf = gstage_get_leaf_entry(kvm, addr,
&ptep, &ptep_level);
- ret = stage2_level_to_page_size(ptep_level, &page_size);
+ ret = gstage_level_to_page_size(ptep_level, &page_size);
if (ret)
break;
@@ -356,15 +322,15 @@ static void stage2_wp_range(struct kvm *kvm, gpa_t start, gpa_t end)
goto next;
if (!(addr & (page_size - 1)) && ((end - addr) >= page_size))
- stage2_op_pte(kvm, addr, ptep,
- ptep_level, STAGE2_OP_WP);
+ gstage_op_pte(kvm, addr, ptep,
+ ptep_level, GSTAGE_OP_WP);
next:
addr += page_size;
}
}
-static void stage2_wp_memory_region(struct kvm *kvm, int slot)
+static void gstage_wp_memory_region(struct kvm *kvm, int slot)
{
struct kvm_memslots *slots = kvm_memslots(kvm);
struct kvm_memory_slot *memslot = id_to_memslot(slots, slot);
@@ -372,37 +338,39 @@ static void stage2_wp_memory_region(struct kvm *kvm, int slot)
phys_addr_t end = (memslot->base_gfn + memslot->npages) << PAGE_SHIFT;
spin_lock(&kvm->mmu_lock);
- stage2_wp_range(kvm, start, end);
+ gstage_wp_range(kvm, start, end);
spin_unlock(&kvm->mmu_lock);
kvm_flush_remote_tlbs(kvm);
}
-static int stage2_ioremap(struct kvm *kvm, gpa_t gpa, phys_addr_t hpa,
- unsigned long size, bool writable)
+int kvm_riscv_gstage_ioremap(struct kvm *kvm, gpa_t gpa,
+ phys_addr_t hpa, unsigned long size,
+ bool writable, bool in_atomic)
{
pte_t pte;
int ret = 0;
unsigned long pfn;
phys_addr_t addr, end;
- struct kvm_mmu_page_cache pcache = { 0, };
+ struct kvm_mmu_memory_cache pcache = {
+ .gfp_custom = (in_atomic) ? GFP_ATOMIC | __GFP_ACCOUNT : 0,
+ .gfp_zero = __GFP_ZERO,
+ };
end = (gpa + size + PAGE_SIZE - 1) & PAGE_MASK;
pfn = __phys_to_pfn(hpa);
for (addr = gpa; addr < end; addr += PAGE_SIZE) {
- pte = pfn_pte(pfn, PAGE_KERNEL);
+ pte = pfn_pte(pfn, PAGE_KERNEL_IO);
if (!writable)
pte = pte_wrprotect(pte);
- ret = stage2_cache_topup(&pcache,
- stage2_pgd_levels,
- KVM_MMU_PAGE_CACHE_NR_OBJS);
+ ret = kvm_mmu_topup_memory_cache(&pcache, gstage_pgd_levels);
if (ret)
goto out;
spin_lock(&kvm->mmu_lock);
- ret = stage2_set_pte(kvm, 0, &pcache, addr, &pte);
+ ret = gstage_set_pte(kvm, 0, &pcache, addr, &pte);
spin_unlock(&kvm->mmu_lock);
if (ret)
goto out;
@@ -411,10 +379,17 @@ static int stage2_ioremap(struct kvm *kvm, gpa_t gpa, phys_addr_t hpa,
}
out:
- stage2_cache_flush(&pcache);
+ kvm_mmu_free_memory_cache(&pcache);
return ret;
}
+void kvm_riscv_gstage_iounmap(struct kvm *kvm, gpa_t gpa, unsigned long size)
+{
+ spin_lock(&kvm->mmu_lock);
+ gstage_unmap_range(kvm, gpa, size, false);
+ spin_unlock(&kvm->mmu_lock);
+}
+
void kvm_arch_mmu_enable_log_dirty_pt_masked(struct kvm *kvm,
struct kvm_memory_slot *slot,
gfn_t gfn_offset,
@@ -424,7 +399,7 @@ void kvm_arch_mmu_enable_log_dirty_pt_masked(struct kvm *kvm,
phys_addr_t start = (base_gfn + __ffs(mask)) << PAGE_SHIFT;
phys_addr_t end = (base_gfn + __fls(mask) + 1) << PAGE_SHIFT;
- stage2_wp_range(kvm, start, end);
+ gstage_wp_range(kvm, start, end);
}
void kvm_arch_sync_dirty_log(struct kvm *kvm, struct kvm_memory_slot *memslot)
@@ -447,7 +422,7 @@ void kvm_arch_memslots_updated(struct kvm *kvm, u64 gen)
void kvm_arch_flush_shadow_all(struct kvm *kvm)
{
- kvm_riscv_stage2_free_pgd(kvm);
+ kvm_riscv_gstage_free_pgd(kvm);
}
void kvm_arch_flush_shadow_memslot(struct kvm *kvm,
@@ -457,12 +432,11 @@ void kvm_arch_flush_shadow_memslot(struct kvm *kvm,
phys_addr_t size = slot->npages << PAGE_SHIFT;
spin_lock(&kvm->mmu_lock);
- stage2_unmap_range(kvm, gpa, size, false);
+ gstage_unmap_range(kvm, gpa, size, false);
spin_unlock(&kvm->mmu_lock);
}
void kvm_arch_commit_memory_region(struct kvm *kvm,
- const struct kvm_userspace_memory_region *mem,
struct kvm_memory_slot *old,
const struct kvm_memory_slot *new,
enum kvm_mr_change change)
@@ -472,18 +446,18 @@ void kvm_arch_commit_memory_region(struct kvm *kvm,
* allocated dirty_bitmap[], dirty pages will be tracked while
* the memory slot is write protected.
*/
- if (change != KVM_MR_DELETE && mem->flags & KVM_MEM_LOG_DIRTY_PAGES)
- stage2_wp_memory_region(kvm, mem->slot);
+ if (change != KVM_MR_DELETE && new->flags & KVM_MEM_LOG_DIRTY_PAGES)
+ gstage_wp_memory_region(kvm, new->id);
}
int kvm_arch_prepare_memory_region(struct kvm *kvm,
- struct kvm_memory_slot *memslot,
- const struct kvm_userspace_memory_region *mem,
+ const struct kvm_memory_slot *old,
+ struct kvm_memory_slot *new,
enum kvm_mr_change change)
{
- hva_t hva = mem->userspace_addr;
- hva_t reg_end = hva + mem->memory_size;
- bool writable = !(mem->flags & KVM_MEM_READONLY);
+ hva_t hva, reg_end, size;
+ gpa_t base_gpa;
+ bool writable;
int ret = 0;
if (change != KVM_MR_CREATE && change != KVM_MR_MOVE &&
@@ -494,10 +468,16 @@ int kvm_arch_prepare_memory_region(struct kvm *kvm,
* Prevent userspace from creating a memory region outside of the GPA
* space addressable by the KVM guest GPA space.
*/
- if ((memslot->base_gfn + memslot->npages) >=
- (stage2_gpa_size >> PAGE_SHIFT))
+ if ((new->base_gfn + new->npages) >=
+ (gstage_gpa_size >> PAGE_SHIFT))
return -EFAULT;
+ hva = new->userspace_addr;
+ size = new->npages << PAGE_SHIFT;
+ reg_end = hva + size;
+ base_gpa = new->base_gfn << PAGE_SHIFT;
+ writable = !(new->flags & KVM_MEM_READONLY);
+
mmap_read_lock(current->mm);
/*
@@ -533,21 +513,21 @@ int kvm_arch_prepare_memory_region(struct kvm *kvm,
vm_end = min(reg_end, vma->vm_end);
if (vma->vm_flags & VM_PFNMAP) {
- gpa_t gpa = mem->guest_phys_addr +
- (vm_start - mem->userspace_addr);
+ gpa_t gpa = base_gpa + (vm_start - hva);
phys_addr_t pa;
pa = (phys_addr_t)vma->vm_pgoff << PAGE_SHIFT;
pa += vm_start - vma->vm_start;
/* IO region dirty page logging not allowed */
- if (memslot->flags & KVM_MEM_LOG_DIRTY_PAGES) {
+ if (new->flags & KVM_MEM_LOG_DIRTY_PAGES) {
ret = -EINVAL;
goto out;
}
- ret = stage2_ioremap(kvm, gpa, pa,
- vm_end - vm_start, writable);
+ ret = kvm_riscv_gstage_ioremap(kvm, gpa, pa,
+ vm_end - vm_start,
+ writable, false);
if (ret)
break;
}
@@ -559,8 +539,7 @@ int kvm_arch_prepare_memory_region(struct kvm *kvm,
spin_lock(&kvm->mmu_lock);
if (ret)
- stage2_unmap_range(kvm, mem->guest_phys_addr,
- mem->memory_size, false);
+ gstage_unmap_range(kvm, base_gpa, size, false);
spin_unlock(&kvm->mmu_lock);
out:
@@ -573,7 +552,7 @@ bool kvm_unmap_gfn_range(struct kvm *kvm, struct kvm_gfn_range *range)
if (!kvm->arch.pgd)
return false;
- stage2_unmap_range(kvm, range->start << PAGE_SHIFT,
+ gstage_unmap_range(kvm, range->start << PAGE_SHIFT,
(range->end - range->start) << PAGE_SHIFT,
range->may_block);
return false;
@@ -589,10 +568,10 @@ bool kvm_set_spte_gfn(struct kvm *kvm, struct kvm_gfn_range *range)
WARN_ON(range->end - range->start != 1);
- ret = stage2_map_page(kvm, NULL, range->start << PAGE_SHIFT,
+ ret = gstage_map_page(kvm, NULL, range->start << PAGE_SHIFT,
__pfn_to_phys(pfn), PAGE_SIZE, true, true);
if (ret) {
- kvm_debug("Failed to map stage2 page (error %d)\n", ret);
+ kvm_debug("Failed to map G-stage page (error %d)\n", ret);
return true;
}
@@ -610,7 +589,7 @@ bool kvm_age_gfn(struct kvm *kvm, struct kvm_gfn_range *range)
WARN_ON(size != PAGE_SIZE && size != PMD_SIZE && size != PGDIR_SIZE);
- if (!stage2_get_leaf_entry(kvm, range->start << PAGE_SHIFT,
+ if (!gstage_get_leaf_entry(kvm, range->start << PAGE_SHIFT,
&ptep, &ptep_level))
return false;
@@ -628,25 +607,25 @@ bool kvm_test_age_gfn(struct kvm *kvm, struct kvm_gfn_range *range)
WARN_ON(size != PAGE_SIZE && size != PMD_SIZE && size != PGDIR_SIZE);
- if (!stage2_get_leaf_entry(kvm, range->start << PAGE_SHIFT,
+ if (!gstage_get_leaf_entry(kvm, range->start << PAGE_SHIFT,
&ptep, &ptep_level))
return false;
return pte_young(*ptep);
}
-int kvm_riscv_stage2_map(struct kvm_vcpu *vcpu,
+int kvm_riscv_gstage_map(struct kvm_vcpu *vcpu,
struct kvm_memory_slot *memslot,
gpa_t gpa, unsigned long hva, bool is_write)
{
int ret;
kvm_pfn_t hfn;
- bool writeable;
+ bool writable;
short vma_pageshift;
gfn_t gfn = gpa >> PAGE_SHIFT;
struct vm_area_struct *vma;
struct kvm *kvm = vcpu->kvm;
- struct kvm_mmu_page_cache *pcache = &vcpu->arch.mmu_page_cache;
+ struct kvm_mmu_memory_cache *pcache = &vcpu->arch.mmu_page_cache;
bool logging = (memslot->dirty_bitmap &&
!(memslot->flags & KVM_MEM_READONLY)) ? true : false;
unsigned long vma_pagesize, mmu_seq;
@@ -681,16 +660,15 @@ int kvm_riscv_stage2_map(struct kvm_vcpu *vcpu,
}
/* We need minimum second+third level pages */
- ret = stage2_cache_topup(pcache, stage2_pgd_levels,
- KVM_MMU_PAGE_CACHE_NR_OBJS);
+ ret = kvm_mmu_topup_memory_cache(pcache, gstage_pgd_levels);
if (ret) {
- kvm_err("Failed to topup stage2 cache\n");
+ kvm_err("Failed to topup G-stage cache\n");
return ret;
}
- mmu_seq = kvm->mmu_notifier_seq;
+ mmu_seq = kvm->mmu_invalidate_seq;
- hfn = gfn_to_pfn_prot(kvm, gfn, is_write, &writeable);
+ hfn = gfn_to_pfn_prot(kvm, gfn, is_write, &writable);
if (hfn == KVM_PFN_ERR_HWPOISON) {
send_sig_mceerr(BUS_MCEERR_AR, (void __user *)hva,
vma_pageshift, current);
@@ -704,25 +682,25 @@ int kvm_riscv_stage2_map(struct kvm_vcpu *vcpu,
* for write faults.
*/
if (logging && !is_write)
- writeable = false;
+ writable = false;
spin_lock(&kvm->mmu_lock);
- if (mmu_notifier_retry(kvm, mmu_seq))
+ if (mmu_invalidate_retry(kvm, mmu_seq))
goto out_unlock;
- if (writeable) {
+ if (writable) {
kvm_set_pfn_dirty(hfn);
mark_page_dirty(kvm, gfn);
- ret = stage2_map_page(kvm, pcache, gpa, hfn << PAGE_SHIFT,
+ ret = gstage_map_page(kvm, pcache, gpa, hfn << PAGE_SHIFT,
vma_pagesize, false, true);
} else {
- ret = stage2_map_page(kvm, pcache, gpa, hfn << PAGE_SHIFT,
+ ret = gstage_map_page(kvm, pcache, gpa, hfn << PAGE_SHIFT,
vma_pagesize, true, true);
}
if (ret)
- kvm_err("Failed to map in stage2\n");
+ kvm_err("Failed to map in G-stage\n");
out_unlock:
spin_unlock(&kvm->mmu_lock);
@@ -731,12 +709,7 @@ out_unlock:
return ret;
}
-void kvm_riscv_stage2_flush_cache(struct kvm_vcpu *vcpu)
-{
- stage2_cache_flush(&vcpu->arch.mmu_page_cache);
-}
-
-int kvm_riscv_stage2_alloc_pgd(struct kvm *kvm)
+int kvm_riscv_gstage_alloc_pgd(struct kvm *kvm)
{
struct page *pgd_page;
@@ -746,7 +719,7 @@ int kvm_riscv_stage2_alloc_pgd(struct kvm *kvm)
}
pgd_page = alloc_pages(GFP_KERNEL | __GFP_ZERO,
- get_order(stage2_pgd_size));
+ get_order(gstage_pgd_size));
if (!pgd_page)
return -ENOMEM;
kvm->arch.pgd = page_to_virt(pgd_page);
@@ -755,13 +728,13 @@ int kvm_riscv_stage2_alloc_pgd(struct kvm *kvm)
return 0;
}
-void kvm_riscv_stage2_free_pgd(struct kvm *kvm)
+void kvm_riscv_gstage_free_pgd(struct kvm *kvm)
{
void *pgd = NULL;
spin_lock(&kvm->mmu_lock);
if (kvm->arch.pgd) {
- stage2_unmap_range(kvm, 0UL, stage2_gpa_size, false);
+ gstage_unmap_range(kvm, 0UL, gstage_gpa_size, false);
pgd = READ_ONCE(kvm->arch.pgd);
kvm->arch.pgd = NULL;
kvm->arch.pgd_phys = 0;
@@ -769,12 +742,12 @@ void kvm_riscv_stage2_free_pgd(struct kvm *kvm)
spin_unlock(&kvm->mmu_lock);
if (pgd)
- free_pages((unsigned long)pgd, get_order(stage2_pgd_size));
+ free_pages((unsigned long)pgd, get_order(gstage_pgd_size));
}
-void kvm_riscv_stage2_update_hgatp(struct kvm_vcpu *vcpu)
+void kvm_riscv_gstage_update_hgatp(struct kvm_vcpu *vcpu)
{
- unsigned long hgatp = stage2_mode;
+ unsigned long hgatp = gstage_mode;
struct kvm_arch *k = &vcpu->kvm->arch;
hgatp |= (READ_ONCE(k->vmid.vmid) << HGATP_VMID_SHIFT) &
@@ -783,26 +756,40 @@ void kvm_riscv_stage2_update_hgatp(struct kvm_vcpu *vcpu)
csr_write(CSR_HGATP, hgatp);
- if (!kvm_riscv_stage2_vmid_bits())
- __kvm_riscv_hfence_gvma_all();
+ if (!kvm_riscv_gstage_vmid_bits())
+ kvm_riscv_local_hfence_gvma_all();
}
-void kvm_riscv_stage2_mode_detect(void)
+void kvm_riscv_gstage_mode_detect(void)
{
#ifdef CONFIG_64BIT
- /* Try Sv48x4 stage2 mode */
+ /* Try Sv57x4 G-stage mode */
+ csr_write(CSR_HGATP, HGATP_MODE_SV57X4 << HGATP_MODE_SHIFT);
+ if ((csr_read(CSR_HGATP) >> HGATP_MODE_SHIFT) == HGATP_MODE_SV57X4) {
+ gstage_mode = (HGATP_MODE_SV57X4 << HGATP_MODE_SHIFT);
+ gstage_pgd_levels = 5;
+ goto skip_sv48x4_test;
+ }
+
+ /* Try Sv48x4 G-stage mode */
csr_write(CSR_HGATP, HGATP_MODE_SV48X4 << HGATP_MODE_SHIFT);
if ((csr_read(CSR_HGATP) >> HGATP_MODE_SHIFT) == HGATP_MODE_SV48X4) {
- stage2_mode = (HGATP_MODE_SV48X4 << HGATP_MODE_SHIFT);
- stage2_pgd_levels = 4;
+ gstage_mode = (HGATP_MODE_SV48X4 << HGATP_MODE_SHIFT);
+ gstage_pgd_levels = 4;
}
- csr_write(CSR_HGATP, 0);
+skip_sv48x4_test:
- __kvm_riscv_hfence_gvma_all();
+ csr_write(CSR_HGATP, 0);
+ kvm_riscv_local_hfence_gvma_all();
#endif
}
-unsigned long kvm_riscv_stage2_mode(void)
+unsigned long kvm_riscv_gstage_mode(void)
+{
+ return gstage_mode >> HGATP_MODE_SHIFT;
+}
+
+int kvm_riscv_gstage_gpa_bits(void)
{
- return stage2_mode >> HGATP_MODE_SHIFT;
+ return gstage_gpa_bits;
}
diff --git a/arch/riscv/kvm/tlb.S b/arch/riscv/kvm/tlb.S
deleted file mode 100644
index 899f75d60bad..000000000000
--- a/arch/riscv/kvm/tlb.S
+++ /dev/null
@@ -1,74 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-/*
- * Copyright (C) 2019 Western Digital Corporation or its affiliates.
- *
- * Authors:
- * Anup Patel <anup.patel@wdc.com>
- */
-
-#include <linux/linkage.h>
-#include <asm/asm.h>
-
- .text
- .altmacro
- .option norelax
-
- /*
- * Instruction encoding of hfence.gvma is:
- * HFENCE.GVMA rs1, rs2
- * HFENCE.GVMA zero, rs2
- * HFENCE.GVMA rs1
- * HFENCE.GVMA
- *
- * rs1!=zero and rs2!=zero ==> HFENCE.GVMA rs1, rs2
- * rs1==zero and rs2!=zero ==> HFENCE.GVMA zero, rs2
- * rs1!=zero and rs2==zero ==> HFENCE.GVMA rs1
- * rs1==zero and rs2==zero ==> HFENCE.GVMA
- *
- * Instruction encoding of HFENCE.GVMA is:
- * 0110001 rs2(5) rs1(5) 000 00000 1110011
- */
-
-ENTRY(__kvm_riscv_hfence_gvma_vmid_gpa)
- /*
- * rs1 = a0 (GPA >> 2)
- * rs2 = a1 (VMID)
- * HFENCE.GVMA a0, a1
- * 0110001 01011 01010 000 00000 1110011
- */
- .word 0x62b50073
- ret
-ENDPROC(__kvm_riscv_hfence_gvma_vmid_gpa)
-
-ENTRY(__kvm_riscv_hfence_gvma_vmid)
- /*
- * rs1 = zero
- * rs2 = a0 (VMID)
- * HFENCE.GVMA zero, a0
- * 0110001 01010 00000 000 00000 1110011
- */
- .word 0x62a00073
- ret
-ENDPROC(__kvm_riscv_hfence_gvma_vmid)
-
-ENTRY(__kvm_riscv_hfence_gvma_gpa)
- /*
- * rs1 = a0 (GPA >> 2)
- * rs2 = zero
- * HFENCE.GVMA a0
- * 0110001 00000 01010 000 00000 1110011
- */
- .word 0x62050073
- ret
-ENDPROC(__kvm_riscv_hfence_gvma_gpa)
-
-ENTRY(__kvm_riscv_hfence_gvma_all)
- /*
- * rs1 = zero
- * rs2 = zero
- * HFENCE.GVMA
- * 0110001 00000 00000 000 00000 1110011
- */
- .word 0x62000073
- ret
-ENDPROC(__kvm_riscv_hfence_gvma_all)
diff --git a/arch/riscv/kvm/tlb.c b/arch/riscv/kvm/tlb.c
new file mode 100644
index 000000000000..309d79b3e5cd
--- /dev/null
+++ b/arch/riscv/kvm/tlb.c
@@ -0,0 +1,402 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Copyright (c) 2022 Ventana Micro Systems Inc.
+ */
+
+#include <linux/bitmap.h>
+#include <linux/cpumask.h>
+#include <linux/errno.h>
+#include <linux/err.h>
+#include <linux/module.h>
+#include <linux/smp.h>
+#include <linux/kvm_host.h>
+#include <asm/cacheflush.h>
+#include <asm/csr.h>
+#include <asm/hwcap.h>
+#include <asm/insn-def.h>
+
+#define has_svinval() \
+ static_branch_unlikely(&riscv_isa_ext_keys[RISCV_ISA_EXT_KEY_SVINVAL])
+
+void kvm_riscv_local_hfence_gvma_vmid_gpa(unsigned long vmid,
+ gpa_t gpa, gpa_t gpsz,
+ unsigned long order)
+{
+ gpa_t pos;
+
+ if (PTRS_PER_PTE < (gpsz >> order)) {
+ kvm_riscv_local_hfence_gvma_vmid_all(vmid);
+ return;
+ }
+
+ if (has_svinval()) {
+ asm volatile (SFENCE_W_INVAL() ::: "memory");
+ for (pos = gpa; pos < (gpa + gpsz); pos += BIT(order))
+ asm volatile (HINVAL_GVMA(%0, %1)
+ : : "r" (pos >> 2), "r" (vmid) : "memory");
+ asm volatile (SFENCE_INVAL_IR() ::: "memory");
+ } else {
+ for (pos = gpa; pos < (gpa + gpsz); pos += BIT(order))
+ asm volatile (HFENCE_GVMA(%0, %1)
+ : : "r" (pos >> 2), "r" (vmid) : "memory");
+ }
+}
+
+void kvm_riscv_local_hfence_gvma_vmid_all(unsigned long vmid)
+{
+ asm volatile(HFENCE_GVMA(zero, %0) : : "r" (vmid) : "memory");
+}
+
+void kvm_riscv_local_hfence_gvma_gpa(gpa_t gpa, gpa_t gpsz,
+ unsigned long order)
+{
+ gpa_t pos;
+
+ if (PTRS_PER_PTE < (gpsz >> order)) {
+ kvm_riscv_local_hfence_gvma_all();
+ return;
+ }
+
+ if (has_svinval()) {
+ asm volatile (SFENCE_W_INVAL() ::: "memory");
+ for (pos = gpa; pos < (gpa + gpsz); pos += BIT(order))
+ asm volatile(HINVAL_GVMA(%0, zero)
+ : : "r" (pos >> 2) : "memory");
+ asm volatile (SFENCE_INVAL_IR() ::: "memory");
+ } else {
+ for (pos = gpa; pos < (gpa + gpsz); pos += BIT(order))
+ asm volatile(HFENCE_GVMA(%0, zero)
+ : : "r" (pos >> 2) : "memory");
+ }
+}
+
+void kvm_riscv_local_hfence_gvma_all(void)
+{
+ asm volatile(HFENCE_GVMA(zero, zero) : : : "memory");
+}
+
+void kvm_riscv_local_hfence_vvma_asid_gva(unsigned long vmid,
+ unsigned long asid,
+ unsigned long gva,
+ unsigned long gvsz,
+ unsigned long order)
+{
+ unsigned long pos, hgatp;
+
+ if (PTRS_PER_PTE < (gvsz >> order)) {
+ kvm_riscv_local_hfence_vvma_asid_all(vmid, asid);
+ return;
+ }
+
+ hgatp = csr_swap(CSR_HGATP, vmid << HGATP_VMID_SHIFT);
+
+ if (has_svinval()) {
+ asm volatile (SFENCE_W_INVAL() ::: "memory");
+ for (pos = gva; pos < (gva + gvsz); pos += BIT(order))
+ asm volatile(HINVAL_VVMA(%0, %1)
+ : : "r" (pos), "r" (asid) : "memory");
+ asm volatile (SFENCE_INVAL_IR() ::: "memory");
+ } else {
+ for (pos = gva; pos < (gva + gvsz); pos += BIT(order))
+ asm volatile(HFENCE_VVMA(%0, %1)
+ : : "r" (pos), "r" (asid) : "memory");
+ }
+
+ csr_write(CSR_HGATP, hgatp);
+}
+
+void kvm_riscv_local_hfence_vvma_asid_all(unsigned long vmid,
+ unsigned long asid)
+{
+ unsigned long hgatp;
+
+ hgatp = csr_swap(CSR_HGATP, vmid << HGATP_VMID_SHIFT);
+
+ asm volatile(HFENCE_VVMA(zero, %0) : : "r" (asid) : "memory");
+
+ csr_write(CSR_HGATP, hgatp);
+}
+
+void kvm_riscv_local_hfence_vvma_gva(unsigned long vmid,
+ unsigned long gva, unsigned long gvsz,
+ unsigned long order)
+{
+ unsigned long pos, hgatp;
+
+ if (PTRS_PER_PTE < (gvsz >> order)) {
+ kvm_riscv_local_hfence_vvma_all(vmid);
+ return;
+ }
+
+ hgatp = csr_swap(CSR_HGATP, vmid << HGATP_VMID_SHIFT);
+
+ if (has_svinval()) {
+ asm volatile (SFENCE_W_INVAL() ::: "memory");
+ for (pos = gva; pos < (gva + gvsz); pos += BIT(order))
+ asm volatile(HINVAL_VVMA(%0, zero)
+ : : "r" (pos) : "memory");
+ asm volatile (SFENCE_INVAL_IR() ::: "memory");
+ } else {
+ for (pos = gva; pos < (gva + gvsz); pos += BIT(order))
+ asm volatile(HFENCE_VVMA(%0, zero)
+ : : "r" (pos) : "memory");
+ }
+
+ csr_write(CSR_HGATP, hgatp);
+}
+
+void kvm_riscv_local_hfence_vvma_all(unsigned long vmid)
+{
+ unsigned long hgatp;
+
+ hgatp = csr_swap(CSR_HGATP, vmid << HGATP_VMID_SHIFT);
+
+ asm volatile(HFENCE_VVMA(zero, zero) : : : "memory");
+
+ csr_write(CSR_HGATP, hgatp);
+}
+
+void kvm_riscv_local_tlb_sanitize(struct kvm_vcpu *vcpu)
+{
+ unsigned long vmid;
+
+ if (!kvm_riscv_gstage_vmid_bits() ||
+ vcpu->arch.last_exit_cpu == vcpu->cpu)
+ return;
+
+ /*
+ * On RISC-V platforms with hardware VMID support, we share same
+ * VMID for all VCPUs of a particular Guest/VM. This means we might
+ * have stale G-stage TLB entries on the current Host CPU due to
+ * some other VCPU of the same Guest which ran previously on the
+ * current Host CPU.
+ *
+ * To cleanup stale TLB entries, we simply flush all G-stage TLB
+ * entries by VMID whenever underlying Host CPU changes for a VCPU.
+ */
+
+ vmid = READ_ONCE(vcpu->kvm->arch.vmid.vmid);
+ kvm_riscv_local_hfence_gvma_vmid_all(vmid);
+}
+
+void kvm_riscv_fence_i_process(struct kvm_vcpu *vcpu)
+{
+ local_flush_icache_all();
+}
+
+void kvm_riscv_hfence_gvma_vmid_all_process(struct kvm_vcpu *vcpu)
+{
+ struct kvm_vmid *vmid;
+
+ vmid = &vcpu->kvm->arch.vmid;
+ kvm_riscv_local_hfence_gvma_vmid_all(READ_ONCE(vmid->vmid));
+}
+
+void kvm_riscv_hfence_vvma_all_process(struct kvm_vcpu *vcpu)
+{
+ struct kvm_vmid *vmid;
+
+ vmid = &vcpu->kvm->arch.vmid;
+ kvm_riscv_local_hfence_vvma_all(READ_ONCE(vmid->vmid));
+}
+
+static bool vcpu_hfence_dequeue(struct kvm_vcpu *vcpu,
+ struct kvm_riscv_hfence *out_data)
+{
+ bool ret = false;
+ struct kvm_vcpu_arch *varch = &vcpu->arch;
+
+ spin_lock(&varch->hfence_lock);
+
+ if (varch->hfence_queue[varch->hfence_head].type) {
+ memcpy(out_data, &varch->hfence_queue[varch->hfence_head],
+ sizeof(*out_data));
+ varch->hfence_queue[varch->hfence_head].type = 0;
+
+ varch->hfence_head++;
+ if (varch->hfence_head == KVM_RISCV_VCPU_MAX_HFENCE)
+ varch->hfence_head = 0;
+
+ ret = true;
+ }
+
+ spin_unlock(&varch->hfence_lock);
+
+ return ret;
+}
+
+static bool vcpu_hfence_enqueue(struct kvm_vcpu *vcpu,
+ const struct kvm_riscv_hfence *data)
+{
+ bool ret = false;
+ struct kvm_vcpu_arch *varch = &vcpu->arch;
+
+ spin_lock(&varch->hfence_lock);
+
+ if (!varch->hfence_queue[varch->hfence_tail].type) {
+ memcpy(&varch->hfence_queue[varch->hfence_tail],
+ data, sizeof(*data));
+
+ varch->hfence_tail++;
+ if (varch->hfence_tail == KVM_RISCV_VCPU_MAX_HFENCE)
+ varch->hfence_tail = 0;
+
+ ret = true;
+ }
+
+ spin_unlock(&varch->hfence_lock);
+
+ return ret;
+}
+
+void kvm_riscv_hfence_process(struct kvm_vcpu *vcpu)
+{
+ struct kvm_riscv_hfence d = { 0 };
+ struct kvm_vmid *v = &vcpu->kvm->arch.vmid;
+
+ while (vcpu_hfence_dequeue(vcpu, &d)) {
+ switch (d.type) {
+ case KVM_RISCV_HFENCE_UNKNOWN:
+ break;
+ case KVM_RISCV_HFENCE_GVMA_VMID_GPA:
+ kvm_riscv_local_hfence_gvma_vmid_gpa(
+ READ_ONCE(v->vmid),
+ d.addr, d.size, d.order);
+ break;
+ case KVM_RISCV_HFENCE_VVMA_ASID_GVA:
+ kvm_riscv_local_hfence_vvma_asid_gva(
+ READ_ONCE(v->vmid), d.asid,
+ d.addr, d.size, d.order);
+ break;
+ case KVM_RISCV_HFENCE_VVMA_ASID_ALL:
+ kvm_riscv_local_hfence_vvma_asid_all(
+ READ_ONCE(v->vmid), d.asid);
+ break;
+ case KVM_RISCV_HFENCE_VVMA_GVA:
+ kvm_riscv_local_hfence_vvma_gva(
+ READ_ONCE(v->vmid),
+ d.addr, d.size, d.order);
+ break;
+ default:
+ break;
+ }
+ }
+}
+
+static void make_xfence_request(struct kvm *kvm,
+ unsigned long hbase, unsigned long hmask,
+ unsigned int req, unsigned int fallback_req,
+ const struct kvm_riscv_hfence *data)
+{
+ unsigned long i;
+ struct kvm_vcpu *vcpu;
+ unsigned int actual_req = req;
+ DECLARE_BITMAP(vcpu_mask, KVM_MAX_VCPUS);
+
+ bitmap_clear(vcpu_mask, 0, KVM_MAX_VCPUS);
+ kvm_for_each_vcpu(i, vcpu, kvm) {
+ if (hbase != -1UL) {
+ if (vcpu->vcpu_id < hbase)
+ continue;
+ if (!(hmask & (1UL << (vcpu->vcpu_id - hbase))))
+ continue;
+ }
+
+ bitmap_set(vcpu_mask, i, 1);
+
+ if (!data || !data->type)
+ continue;
+
+ /*
+ * Enqueue hfence data to VCPU hfence queue. If we don't
+ * have space in the VCPU hfence queue then fallback to
+ * a more conservative hfence request.
+ */
+ if (!vcpu_hfence_enqueue(vcpu, data))
+ actual_req = fallback_req;
+ }
+
+ kvm_make_vcpus_request_mask(kvm, actual_req, vcpu_mask);
+}
+
+void kvm_riscv_fence_i(struct kvm *kvm,
+ unsigned long hbase, unsigned long hmask)
+{
+ make_xfence_request(kvm, hbase, hmask, KVM_REQ_FENCE_I,
+ KVM_REQ_FENCE_I, NULL);
+}
+
+void kvm_riscv_hfence_gvma_vmid_gpa(struct kvm *kvm,
+ unsigned long hbase, unsigned long hmask,
+ gpa_t gpa, gpa_t gpsz,
+ unsigned long order)
+{
+ struct kvm_riscv_hfence data;
+
+ data.type = KVM_RISCV_HFENCE_GVMA_VMID_GPA;
+ data.asid = 0;
+ data.addr = gpa;
+ data.size = gpsz;
+ data.order = order;
+ make_xfence_request(kvm, hbase, hmask, KVM_REQ_HFENCE,
+ KVM_REQ_HFENCE_GVMA_VMID_ALL, &data);
+}
+
+void kvm_riscv_hfence_gvma_vmid_all(struct kvm *kvm,
+ unsigned long hbase, unsigned long hmask)
+{
+ make_xfence_request(kvm, hbase, hmask, KVM_REQ_HFENCE_GVMA_VMID_ALL,
+ KVM_REQ_HFENCE_GVMA_VMID_ALL, NULL);
+}
+
+void kvm_riscv_hfence_vvma_asid_gva(struct kvm *kvm,
+ unsigned long hbase, unsigned long hmask,
+ unsigned long gva, unsigned long gvsz,
+ unsigned long order, unsigned long asid)
+{
+ struct kvm_riscv_hfence data;
+
+ data.type = KVM_RISCV_HFENCE_VVMA_ASID_GVA;
+ data.asid = asid;
+ data.addr = gva;
+ data.size = gvsz;
+ data.order = order;
+ make_xfence_request(kvm, hbase, hmask, KVM_REQ_HFENCE,
+ KVM_REQ_HFENCE_VVMA_ALL, &data);
+}
+
+void kvm_riscv_hfence_vvma_asid_all(struct kvm *kvm,
+ unsigned long hbase, unsigned long hmask,
+ unsigned long asid)
+{
+ struct kvm_riscv_hfence data;
+
+ data.type = KVM_RISCV_HFENCE_VVMA_ASID_ALL;
+ data.asid = asid;
+ data.addr = data.size = data.order = 0;
+ make_xfence_request(kvm, hbase, hmask, KVM_REQ_HFENCE,
+ KVM_REQ_HFENCE_VVMA_ALL, &data);
+}
+
+void kvm_riscv_hfence_vvma_gva(struct kvm *kvm,
+ unsigned long hbase, unsigned long hmask,
+ unsigned long gva, unsigned long gvsz,
+ unsigned long order)
+{
+ struct kvm_riscv_hfence data;
+
+ data.type = KVM_RISCV_HFENCE_VVMA_GVA;
+ data.asid = 0;
+ data.addr = gva;
+ data.size = gvsz;
+ data.order = order;
+ make_xfence_request(kvm, hbase, hmask, KVM_REQ_HFENCE,
+ KVM_REQ_HFENCE_VVMA_ALL, &data);
+}
+
+void kvm_riscv_hfence_vvma_all(struct kvm *kvm,
+ unsigned long hbase, unsigned long hmask)
+{
+ make_xfence_request(kvm, hbase, hmask, KVM_REQ_HFENCE_VVMA_ALL,
+ KVM_REQ_HFENCE_VVMA_ALL, NULL);
+}
diff --git a/arch/riscv/kvm/vcpu.c b/arch/riscv/kvm/vcpu.c
index fb84619df012..71ebbc4821f0 100644
--- a/arch/riscv/kvm/vcpu.c
+++ b/arch/riscv/kvm/vcpu.c
@@ -7,6 +7,7 @@
*/
#include <linux/bitops.h>
+#include <linux/entry-kvm.h>
#include <linux/errno.h>
#include <linux/err.h>
#include <linux/kdebug.h>
@@ -18,6 +19,7 @@
#include <linux/fs.h>
#include <linux/kvm_host.h>
#include <asm/csr.h>
+#include <asm/cacheflush.h>
#include <asm/hwcap.h>
const struct _kvm_stats_desc kvm_vcpu_stats_desc[] = {
@@ -26,6 +28,9 @@ const struct _kvm_stats_desc kvm_vcpu_stats_desc[] = {
STATS_DESC_COUNTER(VCPU, wfi_exit_stat),
STATS_DESC_COUNTER(VCPU, mmio_exit_user),
STATS_DESC_COUNTER(VCPU, mmio_exit_kernel),
+ STATS_DESC_COUNTER(VCPU, csr_exit_user),
+ STATS_DESC_COUNTER(VCPU, csr_exit_kernel),
+ STATS_DESC_COUNTER(VCPU, signal_exits),
STATS_DESC_COUNTER(VCPU, exits)
};
@@ -38,14 +43,68 @@ const struct kvm_stats_header kvm_vcpu_stats_header = {
sizeof(kvm_vcpu_stats_desc),
};
-#define KVM_RISCV_ISA_ALLOWED (riscv_isa_extension_mask(a) | \
- riscv_isa_extension_mask(c) | \
- riscv_isa_extension_mask(d) | \
- riscv_isa_extension_mask(f) | \
- riscv_isa_extension_mask(i) | \
- riscv_isa_extension_mask(m) | \
- riscv_isa_extension_mask(s) | \
- riscv_isa_extension_mask(u))
+#define KVM_RISCV_BASE_ISA_MASK GENMASK(25, 0)
+
+#define KVM_ISA_EXT_ARR(ext) [KVM_RISCV_ISA_EXT_##ext] = RISCV_ISA_EXT_##ext
+
+/* Mapping between KVM ISA Extension ID & Host ISA extension ID */
+static const unsigned long kvm_isa_ext_arr[] = {
+ [KVM_RISCV_ISA_EXT_A] = RISCV_ISA_EXT_a,
+ [KVM_RISCV_ISA_EXT_C] = RISCV_ISA_EXT_c,
+ [KVM_RISCV_ISA_EXT_D] = RISCV_ISA_EXT_d,
+ [KVM_RISCV_ISA_EXT_F] = RISCV_ISA_EXT_f,
+ [KVM_RISCV_ISA_EXT_H] = RISCV_ISA_EXT_h,
+ [KVM_RISCV_ISA_EXT_I] = RISCV_ISA_EXT_i,
+ [KVM_RISCV_ISA_EXT_M] = RISCV_ISA_EXT_m,
+
+ KVM_ISA_EXT_ARR(SSTC),
+ KVM_ISA_EXT_ARR(SVINVAL),
+ KVM_ISA_EXT_ARR(SVPBMT),
+ KVM_ISA_EXT_ARR(ZIHINTPAUSE),
+ KVM_ISA_EXT_ARR(ZICBOM),
+};
+
+static unsigned long kvm_riscv_vcpu_base2isa_ext(unsigned long base_ext)
+{
+ unsigned long i;
+
+ for (i = 0; i < KVM_RISCV_ISA_EXT_MAX; i++) {
+ if (kvm_isa_ext_arr[i] == base_ext)
+ return i;
+ }
+
+ return KVM_RISCV_ISA_EXT_MAX;
+}
+
+static bool kvm_riscv_vcpu_isa_enable_allowed(unsigned long ext)
+{
+ switch (ext) {
+ case KVM_RISCV_ISA_EXT_H:
+ return false;
+ default:
+ break;
+ }
+
+ return true;
+}
+
+static bool kvm_riscv_vcpu_isa_disable_allowed(unsigned long ext)
+{
+ switch (ext) {
+ case KVM_RISCV_ISA_EXT_A:
+ case KVM_RISCV_ISA_EXT_C:
+ case KVM_RISCV_ISA_EXT_I:
+ case KVM_RISCV_ISA_EXT_M:
+ case KVM_RISCV_ISA_EXT_SSTC:
+ case KVM_RISCV_ISA_EXT_SVINVAL:
+ case KVM_RISCV_ISA_EXT_ZIHINTPAUSE:
+ return false;
+ default:
+ break;
+ }
+
+ return true;
+}
static void kvm_riscv_reset_vcpu(struct kvm_vcpu *vcpu)
{
@@ -53,6 +112,19 @@ static void kvm_riscv_reset_vcpu(struct kvm_vcpu *vcpu)
struct kvm_vcpu_csr *reset_csr = &vcpu->arch.guest_reset_csr;
struct kvm_cpu_context *cntx = &vcpu->arch.guest_context;
struct kvm_cpu_context *reset_cntx = &vcpu->arch.guest_reset_context;
+ bool loaded;
+
+ /**
+ * The preemption should be disabled here because it races with
+ * kvm_sched_out/kvm_sched_in(called from preempt notifiers) which
+ * also calls vcpu_load/put.
+ */
+ get_cpu();
+ loaded = (vcpu->cpu != -1);
+ if (loaded)
+ kvm_arch_vcpu_put(vcpu);
+
+ vcpu->arch.last_exit_cpu = -1;
memcpy(csr, reset_csr, sizeof(*csr));
@@ -64,6 +136,15 @@ static void kvm_riscv_reset_vcpu(struct kvm_vcpu *vcpu)
WRITE_ONCE(vcpu->arch.irqs_pending, 0);
WRITE_ONCE(vcpu->arch.irqs_pending_mask, 0);
+
+ vcpu->arch.hfence_head = 0;
+ vcpu->arch.hfence_tail = 0;
+ memset(vcpu->arch.hfence_queue, 0, sizeof(vcpu->arch.hfence_queue));
+
+ /* Reset the guest CSRs for hotplug usecase */
+ if (loaded)
+ kvm_arch_vcpu_load(vcpu, smp_processor_id());
+ put_cpu();
}
int kvm_arch_vcpu_precreate(struct kvm *kvm, unsigned int id)
@@ -74,12 +155,24 @@ int kvm_arch_vcpu_precreate(struct kvm *kvm, unsigned int id)
int kvm_arch_vcpu_create(struct kvm_vcpu *vcpu)
{
struct kvm_cpu_context *cntx;
+ struct kvm_vcpu_csr *reset_csr = &vcpu->arch.guest_reset_csr;
+ unsigned long host_isa, i;
/* Mark this VCPU never ran */
vcpu->arch.ran_atleast_once = false;
+ vcpu->arch.mmu_page_cache.gfp_zero = __GFP_ZERO;
+ bitmap_zero(vcpu->arch.isa, RISCV_ISA_EXT_MAX);
/* Setup ISA features available to VCPU */
- vcpu->arch.isa = riscv_isa_extension_base(NULL) & KVM_RISCV_ISA_ALLOWED;
+ for (i = 0; i < ARRAY_SIZE(kvm_isa_ext_arr); i++) {
+ host_isa = kvm_isa_ext_arr[i];
+ if (__riscv_isa_extension_available(NULL, host_isa) &&
+ kvm_riscv_vcpu_isa_enable_allowed(i))
+ set_bit(host_isa, vcpu->arch.isa);
+ }
+
+ /* Setup VCPU hfence queue */
+ spin_lock_init(&vcpu->arch.hfence_lock);
/* Setup reset state of shadow SSTATUS and HSTATUS CSRs */
cntx = &vcpu->arch.guest_reset_context;
@@ -89,6 +182,9 @@ int kvm_arch_vcpu_create(struct kvm_vcpu *vcpu)
cntx->hstatus |= HSTATUS_SPVP;
cntx->hstatus |= HSTATUS_SPV;
+ /* By default, make CY, TM, and IR counters accessible in VU mode */
+ reset_csr->scounteren = 0x7;
+
/* Setup VCPU timer */
kvm_riscv_vcpu_timer_init(vcpu);
@@ -100,6 +196,13 @@ int kvm_arch_vcpu_create(struct kvm_vcpu *vcpu)
void kvm_arch_vcpu_postcreate(struct kvm_vcpu *vcpu)
{
+ /**
+ * vcpu with id 0 is the designated boot cpu.
+ * Keep all vcpus with non-zero id in power-off state so that
+ * they can be brought up using SBI HSM extension.
+ */
+ if (vcpu->vcpu_idx != 0)
+ kvm_riscv_vcpu_power_off(vcpu);
}
void kvm_arch_vcpu_destroy(struct kvm_vcpu *vcpu)
@@ -107,13 +210,13 @@ void kvm_arch_vcpu_destroy(struct kvm_vcpu *vcpu)
/* Cleanup VCPU timer */
kvm_riscv_vcpu_timer_deinit(vcpu);
- /* Flush the pages pre-allocated for Stage2 page table mappings */
- kvm_riscv_stage2_flush_cache(vcpu);
+ /* Free unused pages pre-allocated for G-stage page table mappings */
+ kvm_mmu_free_memory_cache(&vcpu->arch.mmu_page_cache);
}
int kvm_cpu_has_pending_timer(struct kvm_vcpu *vcpu)
{
- return kvm_riscv_vcpu_has_interrupts(vcpu, 1UL << IRQ_VS_TIMER);
+ return kvm_riscv_vcpu_timer_pending(vcpu);
}
void kvm_arch_vcpu_blocking(struct kvm_vcpu *vcpu)
@@ -160,7 +263,12 @@ static int kvm_riscv_vcpu_get_reg_config(struct kvm_vcpu *vcpu,
switch (reg_num) {
case KVM_REG_RISCV_CONFIG_REG(isa):
- reg_val = vcpu->arch.isa;
+ reg_val = vcpu->arch.isa[0] & KVM_RISCV_BASE_ISA_MASK;
+ break;
+ case KVM_REG_RISCV_CONFIG_REG(zicbom_block_size):
+ if (!riscv_isa_extension_available(vcpu->arch.isa, ZICBOM))
+ return -EINVAL;
+ reg_val = riscv_cbom_block_size;
break;
default:
return -EINVAL;
@@ -180,7 +288,7 @@ static int kvm_riscv_vcpu_set_reg_config(struct kvm_vcpu *vcpu,
unsigned long reg_num = reg->id & ~(KVM_REG_ARCH_MASK |
KVM_REG_SIZE_MASK |
KVM_REG_RISCV_CONFIG);
- unsigned long reg_val;
+ unsigned long i, isa_ext, reg_val;
if (KVM_REG_SIZE(reg->id) != sizeof(unsigned long))
return -EINVAL;
@@ -188,17 +296,39 @@ static int kvm_riscv_vcpu_set_reg_config(struct kvm_vcpu *vcpu,
if (copy_from_user(&reg_val, uaddr, KVM_REG_SIZE(reg->id)))
return -EFAULT;
+ /* This ONE REG interface is only defined for single letter extensions */
+ if (fls(reg_val) >= RISCV_ISA_EXT_BASE)
+ return -EINVAL;
+
switch (reg_num) {
case KVM_REG_RISCV_CONFIG_REG(isa):
if (!vcpu->arch.ran_atleast_once) {
- vcpu->arch.isa = reg_val;
- vcpu->arch.isa &= riscv_isa_extension_base(NULL);
- vcpu->arch.isa &= KVM_RISCV_ISA_ALLOWED;
+ /* Ignore the enable/disable request for certain extensions */
+ for (i = 0; i < RISCV_ISA_EXT_BASE; i++) {
+ isa_ext = kvm_riscv_vcpu_base2isa_ext(i);
+ if (isa_ext >= KVM_RISCV_ISA_EXT_MAX) {
+ reg_val &= ~BIT(i);
+ continue;
+ }
+ if (!kvm_riscv_vcpu_isa_enable_allowed(isa_ext))
+ if (reg_val & BIT(i))
+ reg_val &= ~BIT(i);
+ if (!kvm_riscv_vcpu_isa_disable_allowed(isa_ext))
+ if (!(reg_val & BIT(i)))
+ reg_val |= BIT(i);
+ }
+ reg_val &= riscv_isa_extension_base(NULL);
+ /* Do not modify anything beyond single letter extensions */
+ reg_val = (vcpu->arch.isa[0] & ~KVM_RISCV_BASE_ISA_MASK) |
+ (reg_val & KVM_RISCV_BASE_ISA_MASK);
+ vcpu->arch.isa[0] = reg_val;
kvm_riscv_vcpu_fp_reset(vcpu);
} else {
return -EOPNOTSUPP;
}
break;
+ case KVM_REG_RISCV_CONFIG_REG(zicbom_block_size):
+ return -EOPNOTSUPP;
default:
return -EINVAL;
}
@@ -334,6 +464,80 @@ static int kvm_riscv_vcpu_set_reg_csr(struct kvm_vcpu *vcpu,
return 0;
}
+static int kvm_riscv_vcpu_get_reg_isa_ext(struct kvm_vcpu *vcpu,
+ const struct kvm_one_reg *reg)
+{
+ unsigned long __user *uaddr =
+ (unsigned long __user *)(unsigned long)reg->addr;
+ unsigned long reg_num = reg->id & ~(KVM_REG_ARCH_MASK |
+ KVM_REG_SIZE_MASK |
+ KVM_REG_RISCV_ISA_EXT);
+ unsigned long reg_val = 0;
+ unsigned long host_isa_ext;
+
+ if (KVM_REG_SIZE(reg->id) != sizeof(unsigned long))
+ return -EINVAL;
+
+ if (reg_num >= KVM_RISCV_ISA_EXT_MAX ||
+ reg_num >= ARRAY_SIZE(kvm_isa_ext_arr))
+ return -EINVAL;
+
+ host_isa_ext = kvm_isa_ext_arr[reg_num];
+ if (__riscv_isa_extension_available(vcpu->arch.isa, host_isa_ext))
+ reg_val = 1; /* Mark the given extension as available */
+
+ if (copy_to_user(uaddr, &reg_val, KVM_REG_SIZE(reg->id)))
+ return -EFAULT;
+
+ return 0;
+}
+
+static int kvm_riscv_vcpu_set_reg_isa_ext(struct kvm_vcpu *vcpu,
+ const struct kvm_one_reg *reg)
+{
+ unsigned long __user *uaddr =
+ (unsigned long __user *)(unsigned long)reg->addr;
+ unsigned long reg_num = reg->id & ~(KVM_REG_ARCH_MASK |
+ KVM_REG_SIZE_MASK |
+ KVM_REG_RISCV_ISA_EXT);
+ unsigned long reg_val;
+ unsigned long host_isa_ext;
+
+ if (KVM_REG_SIZE(reg->id) != sizeof(unsigned long))
+ return -EINVAL;
+
+ if (reg_num >= KVM_RISCV_ISA_EXT_MAX ||
+ reg_num >= ARRAY_SIZE(kvm_isa_ext_arr))
+ return -EINVAL;
+
+ if (copy_from_user(&reg_val, uaddr, KVM_REG_SIZE(reg->id)))
+ return -EFAULT;
+
+ host_isa_ext = kvm_isa_ext_arr[reg_num];
+ if (!__riscv_isa_extension_available(NULL, host_isa_ext))
+ return -EOPNOTSUPP;
+
+ if (!vcpu->arch.ran_atleast_once) {
+ /*
+ * All multi-letter extension and a few single letter
+ * extension can be disabled
+ */
+ if (reg_val == 1 &&
+ kvm_riscv_vcpu_isa_enable_allowed(reg_num))
+ set_bit(host_isa_ext, vcpu->arch.isa);
+ else if (!reg_val &&
+ kvm_riscv_vcpu_isa_disable_allowed(reg_num))
+ clear_bit(host_isa_ext, vcpu->arch.isa);
+ else
+ return -EINVAL;
+ kvm_riscv_vcpu_fp_reset(vcpu);
+ } else {
+ return -EOPNOTSUPP;
+ }
+
+ return 0;
+}
+
static int kvm_riscv_vcpu_set_reg(struct kvm_vcpu *vcpu,
const struct kvm_one_reg *reg)
{
@@ -351,6 +555,8 @@ static int kvm_riscv_vcpu_set_reg(struct kvm_vcpu *vcpu,
else if ((reg->id & KVM_REG_RISCV_TYPE_MASK) == KVM_REG_RISCV_FP_D)
return kvm_riscv_vcpu_set_reg_fp(vcpu, reg,
KVM_REG_RISCV_FP_D);
+ else if ((reg->id & KVM_REG_RISCV_TYPE_MASK) == KVM_REG_RISCV_ISA_EXT)
+ return kvm_riscv_vcpu_set_reg_isa_ext(vcpu, reg);
return -EINVAL;
}
@@ -372,6 +578,8 @@ static int kvm_riscv_vcpu_get_reg(struct kvm_vcpu *vcpu,
else if ((reg->id & KVM_REG_RISCV_TYPE_MASK) == KVM_REG_RISCV_FP_D)
return kvm_riscv_vcpu_get_reg_fp(vcpu, reg,
KVM_REG_RISCV_FP_D);
+ else if ((reg->id & KVM_REG_RISCV_TYPE_MASK) == KVM_REG_RISCV_ISA_EXT)
+ return kvm_riscv_vcpu_get_reg_isa_ext(vcpu, reg);
return -EINVAL;
}
@@ -500,6 +708,9 @@ void kvm_riscv_vcpu_sync_interrupts(struct kvm_vcpu *vcpu)
clear_bit(IRQ_VS_SOFT, &v->irqs_pending);
}
}
+
+ /* Sync-up timer CSRs */
+ kvm_riscv_vcpu_timer_sync(vcpu);
}
int kvm_riscv_vcpu_set_interrupt(struct kvm_vcpu *vcpu, unsigned int irq)
@@ -590,6 +801,25 @@ int kvm_arch_vcpu_ioctl_set_guest_debug(struct kvm_vcpu *vcpu,
return -EINVAL;
}
+static void kvm_riscv_vcpu_update_config(const unsigned long *isa)
+{
+ u64 henvcfg = 0;
+
+ if (riscv_isa_extension_available(isa, SVPBMT))
+ henvcfg |= ENVCFG_PBMTE;
+
+ if (riscv_isa_extension_available(isa, SSTC))
+ henvcfg |= ENVCFG_STCE;
+
+ if (riscv_isa_extension_available(isa, ZICBOM))
+ henvcfg |= (ENVCFG_CBIE | ENVCFG_CBCFE);
+
+ csr_write(CSR_HENVCFG, henvcfg);
+#ifdef CONFIG_32BIT
+ csr_write(CSR_HENVCFGH, henvcfg >> 32);
+#endif
+}
+
void kvm_arch_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
{
struct kvm_vcpu_csr *csr = &vcpu->arch.guest_csr;
@@ -604,7 +834,9 @@ void kvm_arch_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
csr_write(CSR_HVIP, csr->hvip);
csr_write(CSR_VSATP, csr->vsatp);
- kvm_riscv_stage2_update_hgatp(vcpu);
+ kvm_riscv_vcpu_update_config(vcpu->arch.isa);
+
+ kvm_riscv_gstage_update_hgatp(vcpu);
kvm_riscv_vcpu_timer_restore(vcpu);
@@ -625,7 +857,7 @@ void kvm_arch_vcpu_put(struct kvm_vcpu *vcpu)
vcpu->arch.isa);
kvm_riscv_vcpu_host_fp_restore(&vcpu->arch.host_context);
- csr_write(CSR_HGATP, 0);
+ kvm_riscv_vcpu_timer_save(vcpu);
csr->vsstatus = csr_read(CSR_VSSTATUS);
csr->vsie = csr_read(CSR_VSIE);
@@ -644,9 +876,11 @@ static void kvm_riscv_check_vcpu_requests(struct kvm_vcpu *vcpu)
if (kvm_request_pending(vcpu)) {
if (kvm_check_request(KVM_REQ_SLEEP, vcpu)) {
+ kvm_vcpu_srcu_read_unlock(vcpu);
rcuwait_wait_event(wait,
(!vcpu->arch.power_off) && (!vcpu->arch.pause),
TASK_INTERRUPTIBLE);
+ kvm_vcpu_srcu_read_lock(vcpu);
if (vcpu->arch.power_off || vcpu->arch.pause) {
/*
@@ -661,10 +895,23 @@ static void kvm_riscv_check_vcpu_requests(struct kvm_vcpu *vcpu)
kvm_riscv_reset_vcpu(vcpu);
if (kvm_check_request(KVM_REQ_UPDATE_HGATP, vcpu))
- kvm_riscv_stage2_update_hgatp(vcpu);
+ kvm_riscv_gstage_update_hgatp(vcpu);
- if (kvm_check_request(KVM_REQ_TLB_FLUSH, vcpu))
- __kvm_riscv_hfence_gvma_all();
+ if (kvm_check_request(KVM_REQ_FENCE_I, vcpu))
+ kvm_riscv_fence_i_process(vcpu);
+
+ /*
+ * The generic KVM_REQ_TLB_FLUSH is same as
+ * KVM_REQ_HFENCE_GVMA_VMID_ALL
+ */
+ if (kvm_check_request(KVM_REQ_HFENCE_GVMA_VMID_ALL, vcpu))
+ kvm_riscv_hfence_gvma_vmid_all_process(vcpu);
+
+ if (kvm_check_request(KVM_REQ_HFENCE_VVMA_ALL, vcpu))
+ kvm_riscv_hfence_vvma_all_process(vcpu);
+
+ if (kvm_check_request(KVM_REQ_HFENCE, vcpu))
+ kvm_riscv_hfence_process(vcpu);
}
}
@@ -675,6 +922,21 @@ static void kvm_riscv_update_hvip(struct kvm_vcpu *vcpu)
csr_write(CSR_HVIP, csr->hvip);
}
+/*
+ * Actually run the vCPU, entering an RCU extended quiescent state (EQS) while
+ * the vCPU is running.
+ *
+ * This must be noinstr as instrumentation may make use of RCU, and this is not
+ * safe during the EQS.
+ */
+static void noinstr kvm_riscv_vcpu_enter_exit(struct kvm_vcpu *vcpu)
+{
+ guest_state_enter_irqoff();
+ __kvm_riscv_switch_to(&vcpu->arch);
+ vcpu->arch.last_exit_cpu = vcpu->cpu;
+ guest_state_exit_irqoff();
+}
+
int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu)
{
int ret;
@@ -684,28 +946,32 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu)
/* Mark this VCPU ran at least once */
vcpu->arch.ran_atleast_once = true;
- vcpu->arch.srcu_idx = srcu_read_lock(&vcpu->kvm->srcu);
+ kvm_vcpu_srcu_read_lock(vcpu);
- /* Process MMIO value returned from user-space */
- if (run->exit_reason == KVM_EXIT_MMIO) {
+ switch (run->exit_reason) {
+ case KVM_EXIT_MMIO:
+ /* Process MMIO value returned from user-space */
ret = kvm_riscv_vcpu_mmio_return(vcpu, vcpu->run);
- if (ret) {
- srcu_read_unlock(&vcpu->kvm->srcu, vcpu->arch.srcu_idx);
- return ret;
- }
- }
-
- /* Process SBI value returned from user-space */
- if (run->exit_reason == KVM_EXIT_RISCV_SBI) {
+ break;
+ case KVM_EXIT_RISCV_SBI:
+ /* Process SBI value returned from user-space */
ret = kvm_riscv_vcpu_sbi_return(vcpu, vcpu->run);
- if (ret) {
- srcu_read_unlock(&vcpu->kvm->srcu, vcpu->arch.srcu_idx);
- return ret;
- }
+ break;
+ case KVM_EXIT_RISCV_CSR:
+ /* Process CSR value returned from user-space */
+ ret = kvm_riscv_vcpu_csr_return(vcpu, vcpu->run);
+ break;
+ default:
+ ret = 0;
+ break;
+ }
+ if (ret) {
+ kvm_vcpu_srcu_read_unlock(vcpu);
+ return ret;
}
if (run->immediate_exit) {
- srcu_read_unlock(&vcpu->kvm->srcu, vcpu->arch.srcu_idx);
+ kvm_vcpu_srcu_read_unlock(vcpu);
return -EINTR;
}
@@ -717,26 +983,17 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu)
run->exit_reason = KVM_EXIT_UNKNOWN;
while (ret > 0) {
/* Check conditions before entering the guest */
- cond_resched();
+ ret = xfer_to_guest_mode_handle_work(vcpu);
+ if (!ret)
+ ret = 1;
- kvm_riscv_stage2_vmid_update(vcpu);
+ kvm_riscv_gstage_vmid_update(vcpu);
kvm_riscv_check_vcpu_requests(vcpu);
- preempt_disable();
-
local_irq_disable();
/*
- * Exit if we have a signal pending so that we can deliver
- * the signal to user space.
- */
- if (signal_pending(current)) {
- ret = -EINTR;
- run->exit_reason = KVM_EXIT_INTR;
- }
-
- /*
* Ensure we set mode to IN_GUEST_MODE after we disable
* interrupts and before the final VCPU requests check.
* See the comment in kvm_vcpu_exiting_guest_mode() and
@@ -744,7 +1001,7 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu)
*/
vcpu->mode = IN_GUEST_MODE;
- srcu_read_unlock(&vcpu->kvm->srcu, vcpu->arch.srcu_idx);
+ kvm_vcpu_srcu_read_unlock(vcpu);
smp_mb__after_srcu_read_unlock();
/*
@@ -757,18 +1014,26 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu)
kvm_riscv_update_hvip(vcpu);
if (ret <= 0 ||
- kvm_riscv_stage2_vmid_ver_changed(&vcpu->kvm->arch.vmid) ||
- kvm_request_pending(vcpu)) {
+ kvm_riscv_gstage_vmid_ver_changed(&vcpu->kvm->arch.vmid) ||
+ kvm_request_pending(vcpu) ||
+ xfer_to_guest_mode_work_pending()) {
vcpu->mode = OUTSIDE_GUEST_MODE;
local_irq_enable();
- preempt_enable();
- vcpu->arch.srcu_idx = srcu_read_lock(&vcpu->kvm->srcu);
+ kvm_vcpu_srcu_read_lock(vcpu);
continue;
}
- guest_enter_irqoff();
+ /*
+ * Cleanup stale TLB enteries
+ *
+ * Note: This should be done after G-stage VMID has been
+ * updated using kvm_riscv_gstage_vmid_ver_changed()
+ */
+ kvm_riscv_local_tlb_sanitize(vcpu);
+
+ guest_timing_enter_irqoff();
- __kvm_riscv_switch_to(&vcpu->arch);
+ kvm_riscv_vcpu_enter_exit(vcpu);
vcpu->mode = OUTSIDE_GUEST_MODE;
vcpu->stat.exits++;
@@ -787,30 +1052,28 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu)
/* Syncup interrupts state with HW */
kvm_riscv_vcpu_sync_interrupts(vcpu);
+ preempt_disable();
+
/*
- * We may have taken a host interrupt in VS/VU-mode (i.e.
- * while executing the guest). This interrupt is still
- * pending, as we haven't serviced it yet!
+ * We must ensure that any pending interrupts are taken before
+ * we exit guest timing so that timer ticks are accounted as
+ * guest time. Transiently unmask interrupts so that any
+ * pending interrupts are taken.
*
- * We're now back in HS-mode with interrupts disabled
- * so enabling the interrupts now will have the effect
- * of taking the interrupt again, in HS-mode this time.
+ * There's no barrier which ensures that pending interrupts are
+ * recognised, so we just hope that the CPU takes any pending
+ * interrupts between the enable and disable.
*/
local_irq_enable();
+ local_irq_disable();
- /*
- * We do local_irq_enable() before calling guest_exit() so
- * that if a timer interrupt hits while running the guest
- * we account that tick as being spent in the guest. We
- * enable preemption after calling guest_exit() so that if
- * we get preempted we make sure ticks after that is not
- * counted as guest time.
- */
- guest_exit();
+ guest_timing_exit_irqoff();
+
+ local_irq_enable();
preempt_enable();
- vcpu->arch.srcu_idx = srcu_read_lock(&vcpu->kvm->srcu);
+ kvm_vcpu_srcu_read_lock(vcpu);
ret = kvm_riscv_vcpu_exit(vcpu, run, &trap);
}
@@ -819,7 +1082,7 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu)
vcpu_put(vcpu);
- srcu_read_unlock(&vcpu->kvm->srcu, vcpu->arch.srcu_idx);
+ kvm_vcpu_srcu_read_unlock(vcpu);
return ret;
}
diff --git a/arch/riscv/kvm/vcpu_exit.c b/arch/riscv/kvm/vcpu_exit.c
index 7f2d742ae4c6..c9f741ab26f5 100644
--- a/arch/riscv/kvm/vcpu_exit.c
+++ b/arch/riscv/kvm/vcpu_exit.c
@@ -6,446 +6,41 @@
* Anup Patel <anup.patel@wdc.com>
*/
-#include <linux/bitops.h>
-#include <linux/errno.h>
-#include <linux/err.h>
#include <linux/kvm_host.h>
#include <asm/csr.h>
+#include <asm/insn-def.h>
-#define INSN_OPCODE_MASK 0x007c
-#define INSN_OPCODE_SHIFT 2
-#define INSN_OPCODE_SYSTEM 28
-
-#define INSN_MASK_WFI 0xffffffff
-#define INSN_MATCH_WFI 0x10500073
-
-#define INSN_MATCH_LB 0x3
-#define INSN_MASK_LB 0x707f
-#define INSN_MATCH_LH 0x1003
-#define INSN_MASK_LH 0x707f
-#define INSN_MATCH_LW 0x2003
-#define INSN_MASK_LW 0x707f
-#define INSN_MATCH_LD 0x3003
-#define INSN_MASK_LD 0x707f
-#define INSN_MATCH_LBU 0x4003
-#define INSN_MASK_LBU 0x707f
-#define INSN_MATCH_LHU 0x5003
-#define INSN_MASK_LHU 0x707f
-#define INSN_MATCH_LWU 0x6003
-#define INSN_MASK_LWU 0x707f
-#define INSN_MATCH_SB 0x23
-#define INSN_MASK_SB 0x707f
-#define INSN_MATCH_SH 0x1023
-#define INSN_MASK_SH 0x707f
-#define INSN_MATCH_SW 0x2023
-#define INSN_MASK_SW 0x707f
-#define INSN_MATCH_SD 0x3023
-#define INSN_MASK_SD 0x707f
-
-#define INSN_MATCH_C_LD 0x6000
-#define INSN_MASK_C_LD 0xe003
-#define INSN_MATCH_C_SD 0xe000
-#define INSN_MASK_C_SD 0xe003
-#define INSN_MATCH_C_LW 0x4000
-#define INSN_MASK_C_LW 0xe003
-#define INSN_MATCH_C_SW 0xc000
-#define INSN_MASK_C_SW 0xe003
-#define INSN_MATCH_C_LDSP 0x6002
-#define INSN_MASK_C_LDSP 0xe003
-#define INSN_MATCH_C_SDSP 0xe002
-#define INSN_MASK_C_SDSP 0xe003
-#define INSN_MATCH_C_LWSP 0x4002
-#define INSN_MASK_C_LWSP 0xe003
-#define INSN_MATCH_C_SWSP 0xc002
-#define INSN_MASK_C_SWSP 0xe003
-
-#define INSN_16BIT_MASK 0x3
-
-#define INSN_IS_16BIT(insn) (((insn) & INSN_16BIT_MASK) != INSN_16BIT_MASK)
-
-#define INSN_LEN(insn) (INSN_IS_16BIT(insn) ? 2 : 4)
-
-#ifdef CONFIG_64BIT
-#define LOG_REGBYTES 3
-#else
-#define LOG_REGBYTES 2
-#endif
-#define REGBYTES (1 << LOG_REGBYTES)
-
-#define SH_RD 7
-#define SH_RS1 15
-#define SH_RS2 20
-#define SH_RS2C 2
-
-#define RV_X(x, s, n) (((x) >> (s)) & ((1 << (n)) - 1))
-#define RVC_LW_IMM(x) ((RV_X(x, 6, 1) << 2) | \
- (RV_X(x, 10, 3) << 3) | \
- (RV_X(x, 5, 1) << 6))
-#define RVC_LD_IMM(x) ((RV_X(x, 10, 3) << 3) | \
- (RV_X(x, 5, 2) << 6))
-#define RVC_LWSP_IMM(x) ((RV_X(x, 4, 3) << 2) | \
- (RV_X(x, 12, 1) << 5) | \
- (RV_X(x, 2, 2) << 6))
-#define RVC_LDSP_IMM(x) ((RV_X(x, 5, 2) << 3) | \
- (RV_X(x, 12, 1) << 5) | \
- (RV_X(x, 2, 3) << 6))
-#define RVC_SWSP_IMM(x) ((RV_X(x, 9, 4) << 2) | \
- (RV_X(x, 7, 2) << 6))
-#define RVC_SDSP_IMM(x) ((RV_X(x, 10, 3) << 3) | \
- (RV_X(x, 7, 3) << 6))
-#define RVC_RS1S(insn) (8 + RV_X(insn, SH_RD, 3))
-#define RVC_RS2S(insn) (8 + RV_X(insn, SH_RS2C, 3))
-#define RVC_RS2(insn) RV_X(insn, SH_RS2C, 5)
-
-#define SHIFT_RIGHT(x, y) \
- ((y) < 0 ? ((x) << -(y)) : ((x) >> (y)))
-
-#define REG_MASK \
- ((1 << (5 + LOG_REGBYTES)) - (1 << LOG_REGBYTES))
-
-#define REG_OFFSET(insn, pos) \
- (SHIFT_RIGHT((insn), (pos) - LOG_REGBYTES) & REG_MASK)
-
-#define REG_PTR(insn, pos, regs) \
- ((ulong *)((ulong)(regs) + REG_OFFSET(insn, pos)))
-
-#define GET_RM(insn) (((insn) >> 12) & 7)
-
-#define GET_RS1(insn, regs) (*REG_PTR(insn, SH_RS1, regs))
-#define GET_RS2(insn, regs) (*REG_PTR(insn, SH_RS2, regs))
-#define GET_RS1S(insn, regs) (*REG_PTR(RVC_RS1S(insn), 0, regs))
-#define GET_RS2S(insn, regs) (*REG_PTR(RVC_RS2S(insn), 0, regs))
-#define GET_RS2C(insn, regs) (*REG_PTR(insn, SH_RS2C, regs))
-#define GET_SP(regs) (*REG_PTR(2, 0, regs))
-#define SET_RD(insn, regs, val) (*REG_PTR(insn, SH_RD, regs) = (val))
-#define IMM_I(insn) ((s32)(insn) >> 20)
-#define IMM_S(insn) (((s32)(insn) >> 25 << 5) | \
- (s32)(((insn) >> 7) & 0x1f))
-#define MASK_FUNCT3 0x7000
-
-static int truly_illegal_insn(struct kvm_vcpu *vcpu,
- struct kvm_run *run,
- ulong insn)
-{
- struct kvm_cpu_trap utrap = { 0 };
-
- /* Redirect trap to Guest VCPU */
- utrap.sepc = vcpu->arch.guest_context.sepc;
- utrap.scause = EXC_INST_ILLEGAL;
- utrap.stval = insn;
- kvm_riscv_vcpu_trap_redirect(vcpu, &utrap);
-
- return 1;
-}
-
-static int system_opcode_insn(struct kvm_vcpu *vcpu,
- struct kvm_run *run,
- ulong insn)
-{
- if ((insn & INSN_MASK_WFI) == INSN_MATCH_WFI) {
- vcpu->stat.wfi_exit_stat++;
- if (!kvm_arch_vcpu_runnable(vcpu)) {
- srcu_read_unlock(&vcpu->kvm->srcu, vcpu->arch.srcu_idx);
- kvm_vcpu_block(vcpu);
- vcpu->arch.srcu_idx = srcu_read_lock(&vcpu->kvm->srcu);
- kvm_clear_request(KVM_REQ_UNHALT, vcpu);
- }
- vcpu->arch.guest_context.sepc += INSN_LEN(insn);
- return 1;
- }
-
- return truly_illegal_insn(vcpu, run, insn);
-}
-
-static int virtual_inst_fault(struct kvm_vcpu *vcpu, struct kvm_run *run,
- struct kvm_cpu_trap *trap)
-{
- unsigned long insn = trap->stval;
- struct kvm_cpu_trap utrap = { 0 };
- struct kvm_cpu_context *ct;
-
- if (unlikely(INSN_IS_16BIT(insn))) {
- if (insn == 0) {
- ct = &vcpu->arch.guest_context;
- insn = kvm_riscv_vcpu_unpriv_read(vcpu, true,
- ct->sepc,
- &utrap);
- if (utrap.scause) {
- utrap.sepc = ct->sepc;
- kvm_riscv_vcpu_trap_redirect(vcpu, &utrap);
- return 1;
- }
- }
- if (INSN_IS_16BIT(insn))
- return truly_illegal_insn(vcpu, run, insn);
- }
-
- switch ((insn & INSN_OPCODE_MASK) >> INSN_OPCODE_SHIFT) {
- case INSN_OPCODE_SYSTEM:
- return system_opcode_insn(vcpu, run, insn);
- default:
- return truly_illegal_insn(vcpu, run, insn);
- }
-}
-
-static int emulate_load(struct kvm_vcpu *vcpu, struct kvm_run *run,
- unsigned long fault_addr, unsigned long htinst)
-{
- u8 data_buf[8];
- unsigned long insn;
- int shift = 0, len = 0, insn_len = 0;
- struct kvm_cpu_trap utrap = { 0 };
- struct kvm_cpu_context *ct = &vcpu->arch.guest_context;
-
- /* Determine trapped instruction */
- if (htinst & 0x1) {
- /*
- * Bit[0] == 1 implies trapped instruction value is
- * transformed instruction or custom instruction.
- */
- insn = htinst | INSN_16BIT_MASK;
- insn_len = (htinst & BIT(1)) ? INSN_LEN(insn) : 2;
- } else {
- /*
- * Bit[0] == 0 implies trapped instruction value is
- * zero or special value.
- */
- insn = kvm_riscv_vcpu_unpriv_read(vcpu, true, ct->sepc,
- &utrap);
- if (utrap.scause) {
- /* Redirect trap if we failed to read instruction */
- utrap.sepc = ct->sepc;
- kvm_riscv_vcpu_trap_redirect(vcpu, &utrap);
- return 1;
- }
- insn_len = INSN_LEN(insn);
- }
-
- /* Decode length of MMIO and shift */
- if ((insn & INSN_MASK_LW) == INSN_MATCH_LW) {
- len = 4;
- shift = 8 * (sizeof(ulong) - len);
- } else if ((insn & INSN_MASK_LB) == INSN_MATCH_LB) {
- len = 1;
- shift = 8 * (sizeof(ulong) - len);
- } else if ((insn & INSN_MASK_LBU) == INSN_MATCH_LBU) {
- len = 1;
- shift = 8 * (sizeof(ulong) - len);
-#ifdef CONFIG_64BIT
- } else if ((insn & INSN_MASK_LD) == INSN_MATCH_LD) {
- len = 8;
- shift = 8 * (sizeof(ulong) - len);
- } else if ((insn & INSN_MASK_LWU) == INSN_MATCH_LWU) {
- len = 4;
-#endif
- } else if ((insn & INSN_MASK_LH) == INSN_MATCH_LH) {
- len = 2;
- shift = 8 * (sizeof(ulong) - len);
- } else if ((insn & INSN_MASK_LHU) == INSN_MATCH_LHU) {
- len = 2;
-#ifdef CONFIG_64BIT
- } else if ((insn & INSN_MASK_C_LD) == INSN_MATCH_C_LD) {
- len = 8;
- shift = 8 * (sizeof(ulong) - len);
- insn = RVC_RS2S(insn) << SH_RD;
- } else if ((insn & INSN_MASK_C_LDSP) == INSN_MATCH_C_LDSP &&
- ((insn >> SH_RD) & 0x1f)) {
- len = 8;
- shift = 8 * (sizeof(ulong) - len);
-#endif
- } else if ((insn & INSN_MASK_C_LW) == INSN_MATCH_C_LW) {
- len = 4;
- shift = 8 * (sizeof(ulong) - len);
- insn = RVC_RS2S(insn) << SH_RD;
- } else if ((insn & INSN_MASK_C_LWSP) == INSN_MATCH_C_LWSP &&
- ((insn >> SH_RD) & 0x1f)) {
- len = 4;
- shift = 8 * (sizeof(ulong) - len);
- } else {
- return -EOPNOTSUPP;
- }
-
- /* Fault address should be aligned to length of MMIO */
- if (fault_addr & (len - 1))
- return -EIO;
-
- /* Save instruction decode info */
- vcpu->arch.mmio_decode.insn = insn;
- vcpu->arch.mmio_decode.insn_len = insn_len;
- vcpu->arch.mmio_decode.shift = shift;
- vcpu->arch.mmio_decode.len = len;
- vcpu->arch.mmio_decode.return_handled = 0;
-
- /* Update MMIO details in kvm_run struct */
- run->mmio.is_write = false;
- run->mmio.phys_addr = fault_addr;
- run->mmio.len = len;
-
- /* Try to handle MMIO access in the kernel */
- if (!kvm_io_bus_read(vcpu, KVM_MMIO_BUS, fault_addr, len, data_buf)) {
- /* Successfully handled MMIO access in the kernel so resume */
- memcpy(run->mmio.data, data_buf, len);
- vcpu->stat.mmio_exit_kernel++;
- kvm_riscv_vcpu_mmio_return(vcpu, run);
- return 1;
- }
-
- /* Exit to userspace for MMIO emulation */
- vcpu->stat.mmio_exit_user++;
- run->exit_reason = KVM_EXIT_MMIO;
-
- return 0;
-}
-
-static int emulate_store(struct kvm_vcpu *vcpu, struct kvm_run *run,
- unsigned long fault_addr, unsigned long htinst)
-{
- u8 data8;
- u16 data16;
- u32 data32;
- u64 data64;
- ulong data;
- unsigned long insn;
- int len = 0, insn_len = 0;
- struct kvm_cpu_trap utrap = { 0 };
- struct kvm_cpu_context *ct = &vcpu->arch.guest_context;
-
- /* Determine trapped instruction */
- if (htinst & 0x1) {
- /*
- * Bit[0] == 1 implies trapped instruction value is
- * transformed instruction or custom instruction.
- */
- insn = htinst | INSN_16BIT_MASK;
- insn_len = (htinst & BIT(1)) ? INSN_LEN(insn) : 2;
- } else {
- /*
- * Bit[0] == 0 implies trapped instruction value is
- * zero or special value.
- */
- insn = kvm_riscv_vcpu_unpriv_read(vcpu, true, ct->sepc,
- &utrap);
- if (utrap.scause) {
- /* Redirect trap if we failed to read instruction */
- utrap.sepc = ct->sepc;
- kvm_riscv_vcpu_trap_redirect(vcpu, &utrap);
- return 1;
- }
- insn_len = INSN_LEN(insn);
- }
-
- data = GET_RS2(insn, &vcpu->arch.guest_context);
- data8 = data16 = data32 = data64 = data;
-
- if ((insn & INSN_MASK_SW) == INSN_MATCH_SW) {
- len = 4;
- } else if ((insn & INSN_MASK_SB) == INSN_MATCH_SB) {
- len = 1;
-#ifdef CONFIG_64BIT
- } else if ((insn & INSN_MASK_SD) == INSN_MATCH_SD) {
- len = 8;
-#endif
- } else if ((insn & INSN_MASK_SH) == INSN_MATCH_SH) {
- len = 2;
-#ifdef CONFIG_64BIT
- } else if ((insn & INSN_MASK_C_SD) == INSN_MATCH_C_SD) {
- len = 8;
- data64 = GET_RS2S(insn, &vcpu->arch.guest_context);
- } else if ((insn & INSN_MASK_C_SDSP) == INSN_MATCH_C_SDSP &&
- ((insn >> SH_RD) & 0x1f)) {
- len = 8;
- data64 = GET_RS2C(insn, &vcpu->arch.guest_context);
-#endif
- } else if ((insn & INSN_MASK_C_SW) == INSN_MATCH_C_SW) {
- len = 4;
- data32 = GET_RS2S(insn, &vcpu->arch.guest_context);
- } else if ((insn & INSN_MASK_C_SWSP) == INSN_MATCH_C_SWSP &&
- ((insn >> SH_RD) & 0x1f)) {
- len = 4;
- data32 = GET_RS2C(insn, &vcpu->arch.guest_context);
- } else {
- return -EOPNOTSUPP;
- }
-
- /* Fault address should be aligned to length of MMIO */
- if (fault_addr & (len - 1))
- return -EIO;
-
- /* Save instruction decode info */
- vcpu->arch.mmio_decode.insn = insn;
- vcpu->arch.mmio_decode.insn_len = insn_len;
- vcpu->arch.mmio_decode.shift = 0;
- vcpu->arch.mmio_decode.len = len;
- vcpu->arch.mmio_decode.return_handled = 0;
-
- /* Copy data to kvm_run instance */
- switch (len) {
- case 1:
- *((u8 *)run->mmio.data) = data8;
- break;
- case 2:
- *((u16 *)run->mmio.data) = data16;
- break;
- case 4:
- *((u32 *)run->mmio.data) = data32;
- break;
- case 8:
- *((u64 *)run->mmio.data) = data64;
- break;
- default:
- return -EOPNOTSUPP;
- }
-
- /* Update MMIO details in kvm_run struct */
- run->mmio.is_write = true;
- run->mmio.phys_addr = fault_addr;
- run->mmio.len = len;
-
- /* Try to handle MMIO access in the kernel */
- if (!kvm_io_bus_write(vcpu, KVM_MMIO_BUS,
- fault_addr, len, run->mmio.data)) {
- /* Successfully handled MMIO access in the kernel so resume */
- vcpu->stat.mmio_exit_kernel++;
- kvm_riscv_vcpu_mmio_return(vcpu, run);
- return 1;
- }
-
- /* Exit to userspace for MMIO emulation */
- vcpu->stat.mmio_exit_user++;
- run->exit_reason = KVM_EXIT_MMIO;
-
- return 0;
-}
-
-static int stage2_page_fault(struct kvm_vcpu *vcpu, struct kvm_run *run,
+static int gstage_page_fault(struct kvm_vcpu *vcpu, struct kvm_run *run,
struct kvm_cpu_trap *trap)
{
struct kvm_memory_slot *memslot;
unsigned long hva, fault_addr;
- bool writeable;
+ bool writable;
gfn_t gfn;
int ret;
fault_addr = (trap->htval << 2) | (trap->stval & 0x3);
gfn = fault_addr >> PAGE_SHIFT;
memslot = gfn_to_memslot(vcpu->kvm, gfn);
- hva = gfn_to_hva_memslot_prot(memslot, gfn, &writeable);
+ hva = gfn_to_hva_memslot_prot(memslot, gfn, &writable);
if (kvm_is_error_hva(hva) ||
- (trap->scause == EXC_STORE_GUEST_PAGE_FAULT && !writeable)) {
+ (trap->scause == EXC_STORE_GUEST_PAGE_FAULT && !writable)) {
switch (trap->scause) {
case EXC_LOAD_GUEST_PAGE_FAULT:
- return emulate_load(vcpu, run, fault_addr,
- trap->htinst);
+ return kvm_riscv_vcpu_mmio_load(vcpu, run,
+ fault_addr,
+ trap->htinst);
case EXC_STORE_GUEST_PAGE_FAULT:
- return emulate_store(vcpu, run, fault_addr,
- trap->htinst);
+ return kvm_riscv_vcpu_mmio_store(vcpu, run,
+ fault_addr,
+ trap->htinst);
default:
return -EOPNOTSUPP;
};
}
- ret = kvm_riscv_stage2_map(vcpu, memslot, fault_addr, hva,
+ ret = kvm_riscv_gstage_map(vcpu, memslot, fault_addr, hva,
(trap->scause == EXC_STORE_GUEST_PAGE_FAULT) ? true : false);
if (ret < 0)
return ret;
@@ -468,11 +63,7 @@ unsigned long kvm_riscv_vcpu_unpriv_read(struct kvm_vcpu *vcpu,
{
register unsigned long taddr asm("a0") = (unsigned long)trap;
register unsigned long ttmp asm("a1");
- register unsigned long val asm("t0");
- register unsigned long tmp asm("t1");
- register unsigned long addr asm("t2") = guest_addr;
- unsigned long flags;
- unsigned long old_stvec, old_hstatus;
+ unsigned long flags, val, tmp, old_stvec, old_hstatus;
local_irq_save(flags);
@@ -488,29 +79,19 @@ unsigned long kvm_riscv_vcpu_unpriv_read(struct kvm_vcpu *vcpu,
".option push\n"
".option norvc\n"
"add %[ttmp], %[taddr], 0\n"
- /*
- * HLVX.HU %[val], (%[addr])
- * HLVX.HU t0, (t2)
- * 0110010 00011 00111 100 00101 1110011
- */
- ".word 0x6433c2f3\n"
+ HLVX_HU(%[val], %[addr])
"andi %[tmp], %[val], 3\n"
"addi %[tmp], %[tmp], -3\n"
"bne %[tmp], zero, 2f\n"
"addi %[addr], %[addr], 2\n"
- /*
- * HLVX.HU %[tmp], (%[addr])
- * HLVX.HU t1, (t2)
- * 0110010 00011 00111 100 00110 1110011
- */
- ".word 0x6433c373\n"
+ HLVX_HU(%[tmp], %[addr])
"sll %[tmp], %[tmp], 16\n"
"add %[val], %[val], %[tmp]\n"
"2:\n"
".option pop"
: [val] "=&r" (val), [tmp] "=&r" (tmp),
[taddr] "+&r" (taddr), [ttmp] "+&r" (ttmp),
- [addr] "+&r" (addr) : : "memory");
+ [addr] "+&r" (guest_addr) : : "memory");
if (trap->scause == EXC_LOAD_PAGE_FAULT)
trap->scause = EXC_INST_PAGE_FAULT;
@@ -527,24 +108,14 @@ unsigned long kvm_riscv_vcpu_unpriv_read(struct kvm_vcpu *vcpu,
".option norvc\n"
"add %[ttmp], %[taddr], 0\n"
#ifdef CONFIG_64BIT
- /*
- * HLV.D %[val], (%[addr])
- * HLV.D t0, (t2)
- * 0110110 00000 00111 100 00101 1110011
- */
- ".word 0x6c03c2f3\n"
+ HLV_D(%[val], %[addr])
#else
- /*
- * HLV.W %[val], (%[addr])
- * HLV.W t0, (t2)
- * 0110100 00000 00111 100 00101 1110011
- */
- ".word 0x6803c2f3\n"
+ HLV_W(%[val], %[addr])
#endif
".option pop"
: [val] "=&r" (val),
[taddr] "+&r" (taddr), [ttmp] "+&r" (ttmp)
- : [addr] "r" (addr) : "memory");
+ : [addr] "r" (guest_addr) : "memory");
}
csr_write(CSR_STVEC, old_stvec);
@@ -591,66 +162,6 @@ void kvm_riscv_vcpu_trap_redirect(struct kvm_vcpu *vcpu,
vcpu->arch.guest_context.sepc = csr_read(CSR_VSTVEC);
}
-/**
- * kvm_riscv_vcpu_mmio_return -- Handle MMIO loads after user space emulation
- * or in-kernel IO emulation
- *
- * @vcpu: The VCPU pointer
- * @run: The VCPU run struct containing the mmio data
- */
-int kvm_riscv_vcpu_mmio_return(struct kvm_vcpu *vcpu, struct kvm_run *run)
-{
- u8 data8;
- u16 data16;
- u32 data32;
- u64 data64;
- ulong insn;
- int len, shift;
-
- if (vcpu->arch.mmio_decode.return_handled)
- return 0;
-
- vcpu->arch.mmio_decode.return_handled = 1;
- insn = vcpu->arch.mmio_decode.insn;
-
- if (run->mmio.is_write)
- goto done;
-
- len = vcpu->arch.mmio_decode.len;
- shift = vcpu->arch.mmio_decode.shift;
-
- switch (len) {
- case 1:
- data8 = *((u8 *)run->mmio.data);
- SET_RD(insn, &vcpu->arch.guest_context,
- (ulong)data8 << shift >> shift);
- break;
- case 2:
- data16 = *((u16 *)run->mmio.data);
- SET_RD(insn, &vcpu->arch.guest_context,
- (ulong)data16 << shift >> shift);
- break;
- case 4:
- data32 = *((u32 *)run->mmio.data);
- SET_RD(insn, &vcpu->arch.guest_context,
- (ulong)data32 << shift >> shift);
- break;
- case 8:
- data64 = *((u64 *)run->mmio.data);
- SET_RD(insn, &vcpu->arch.guest_context,
- (ulong)data64 << shift >> shift);
- break;
- default:
- return -EOPNOTSUPP;
- }
-
-done:
- /* Move to next instruction */
- vcpu->arch.guest_context.sepc += vcpu->arch.mmio_decode.insn_len;
-
- return 0;
-}
-
/*
* Return > 0 to return to guest, < 0 on error, 0 (and set exit_reason) on
* proper exit to userspace.
@@ -670,13 +181,13 @@ int kvm_riscv_vcpu_exit(struct kvm_vcpu *vcpu, struct kvm_run *run,
switch (trap->scause) {
case EXC_VIRTUAL_INST_FAULT:
if (vcpu->arch.guest_context.hstatus & HSTATUS_SPV)
- ret = virtual_inst_fault(vcpu, run, trap);
+ ret = kvm_riscv_vcpu_virtual_insn(vcpu, run, trap);
break;
case EXC_INST_GUEST_PAGE_FAULT:
case EXC_LOAD_GUEST_PAGE_FAULT:
case EXC_STORE_GUEST_PAGE_FAULT:
if (vcpu->arch.guest_context.hstatus & HSTATUS_SPV)
- ret = stage2_page_fault(vcpu, run, trap);
+ ret = gstage_page_fault(vcpu, run, trap);
break;
case EXC_SUPERVISOR_SYSCALL:
if (vcpu->arch.guest_context.hstatus & HSTATUS_SPV)
diff --git a/arch/riscv/kvm/vcpu_fp.c b/arch/riscv/kvm/vcpu_fp.c
index 1b070152578f..9d8cbc42057a 100644
--- a/arch/riscv/kvm/vcpu_fp.c
+++ b/arch/riscv/kvm/vcpu_fp.c
@@ -11,46 +11,46 @@
#include <linux/err.h>
#include <linux/kvm_host.h>
#include <linux/uaccess.h>
+#include <asm/hwcap.h>
#ifdef CONFIG_FPU
void kvm_riscv_vcpu_fp_reset(struct kvm_vcpu *vcpu)
{
- unsigned long isa = vcpu->arch.isa;
struct kvm_cpu_context *cntx = &vcpu->arch.guest_context;
cntx->sstatus &= ~SR_FS;
- if (riscv_isa_extension_available(&isa, f) ||
- riscv_isa_extension_available(&isa, d))
+ if (riscv_isa_extension_available(vcpu->arch.isa, f) ||
+ riscv_isa_extension_available(vcpu->arch.isa, d))
cntx->sstatus |= SR_FS_INITIAL;
else
cntx->sstatus |= SR_FS_OFF;
}
-void kvm_riscv_vcpu_fp_clean(struct kvm_cpu_context *cntx)
+static void kvm_riscv_vcpu_fp_clean(struct kvm_cpu_context *cntx)
{
cntx->sstatus &= ~SR_FS;
cntx->sstatus |= SR_FS_CLEAN;
}
void kvm_riscv_vcpu_guest_fp_save(struct kvm_cpu_context *cntx,
- unsigned long isa)
+ const unsigned long *isa)
{
if ((cntx->sstatus & SR_FS) == SR_FS_DIRTY) {
- if (riscv_isa_extension_available(&isa, d))
+ if (riscv_isa_extension_available(isa, d))
__kvm_riscv_fp_d_save(cntx);
- else if (riscv_isa_extension_available(&isa, f))
+ else if (riscv_isa_extension_available(isa, f))
__kvm_riscv_fp_f_save(cntx);
kvm_riscv_vcpu_fp_clean(cntx);
}
}
void kvm_riscv_vcpu_guest_fp_restore(struct kvm_cpu_context *cntx,
- unsigned long isa)
+ const unsigned long *isa)
{
if ((cntx->sstatus & SR_FS) != SR_FS_OFF) {
- if (riscv_isa_extension_available(&isa, d))
+ if (riscv_isa_extension_available(isa, d))
__kvm_riscv_fp_d_restore(cntx);
- else if (riscv_isa_extension_available(&isa, f))
+ else if (riscv_isa_extension_available(isa, f))
__kvm_riscv_fp_f_restore(cntx);
kvm_riscv_vcpu_fp_clean(cntx);
}
@@ -79,7 +79,6 @@ int kvm_riscv_vcpu_get_reg_fp(struct kvm_vcpu *vcpu,
unsigned long rtype)
{
struct kvm_cpu_context *cntx = &vcpu->arch.guest_context;
- unsigned long isa = vcpu->arch.isa;
unsigned long __user *uaddr =
(unsigned long __user *)(unsigned long)reg->addr;
unsigned long reg_num = reg->id & ~(KVM_REG_ARCH_MASK |
@@ -88,7 +87,7 @@ int kvm_riscv_vcpu_get_reg_fp(struct kvm_vcpu *vcpu,
void *reg_val;
if ((rtype == KVM_REG_RISCV_FP_F) &&
- riscv_isa_extension_available(&isa, f)) {
+ riscv_isa_extension_available(vcpu->arch.isa, f)) {
if (KVM_REG_SIZE(reg->id) != sizeof(u32))
return -EINVAL;
if (reg_num == KVM_REG_RISCV_FP_F_REG(fcsr))
@@ -99,7 +98,7 @@ int kvm_riscv_vcpu_get_reg_fp(struct kvm_vcpu *vcpu,
else
return -EINVAL;
} else if ((rtype == KVM_REG_RISCV_FP_D) &&
- riscv_isa_extension_available(&isa, d)) {
+ riscv_isa_extension_available(vcpu->arch.isa, d)) {
if (reg_num == KVM_REG_RISCV_FP_D_REG(fcsr)) {
if (KVM_REG_SIZE(reg->id) != sizeof(u32))
return -EINVAL;
@@ -125,7 +124,6 @@ int kvm_riscv_vcpu_set_reg_fp(struct kvm_vcpu *vcpu,
unsigned long rtype)
{
struct kvm_cpu_context *cntx = &vcpu->arch.guest_context;
- unsigned long isa = vcpu->arch.isa;
unsigned long __user *uaddr =
(unsigned long __user *)(unsigned long)reg->addr;
unsigned long reg_num = reg->id & ~(KVM_REG_ARCH_MASK |
@@ -134,7 +132,7 @@ int kvm_riscv_vcpu_set_reg_fp(struct kvm_vcpu *vcpu,
void *reg_val;
if ((rtype == KVM_REG_RISCV_FP_F) &&
- riscv_isa_extension_available(&isa, f)) {
+ riscv_isa_extension_available(vcpu->arch.isa, f)) {
if (KVM_REG_SIZE(reg->id) != sizeof(u32))
return -EINVAL;
if (reg_num == KVM_REG_RISCV_FP_F_REG(fcsr))
@@ -145,7 +143,7 @@ int kvm_riscv_vcpu_set_reg_fp(struct kvm_vcpu *vcpu,
else
return -EINVAL;
} else if ((rtype == KVM_REG_RISCV_FP_D) &&
- riscv_isa_extension_available(&isa, d)) {
+ riscv_isa_extension_available(vcpu->arch.isa, d)) {
if (reg_num == KVM_REG_RISCV_FP_D_REG(fcsr)) {
if (KVM_REG_SIZE(reg->id) != sizeof(u32))
return -EINVAL;
diff --git a/arch/riscv/kvm/vcpu_insn.c b/arch/riscv/kvm/vcpu_insn.c
new file mode 100644
index 000000000000..0bb52761a3f7
--- /dev/null
+++ b/arch/riscv/kvm/vcpu_insn.c
@@ -0,0 +1,751 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Copyright (C) 2019 Western Digital Corporation or its affiliates.
+ * Copyright (c) 2022 Ventana Micro Systems Inc.
+ */
+
+#include <linux/bitops.h>
+#include <linux/kvm_host.h>
+
+#define INSN_OPCODE_MASK 0x007c
+#define INSN_OPCODE_SHIFT 2
+#define INSN_OPCODE_SYSTEM 28
+
+#define INSN_MASK_WFI 0xffffffff
+#define INSN_MATCH_WFI 0x10500073
+
+#define INSN_MATCH_CSRRW 0x1073
+#define INSN_MASK_CSRRW 0x707f
+#define INSN_MATCH_CSRRS 0x2073
+#define INSN_MASK_CSRRS 0x707f
+#define INSN_MATCH_CSRRC 0x3073
+#define INSN_MASK_CSRRC 0x707f
+#define INSN_MATCH_CSRRWI 0x5073
+#define INSN_MASK_CSRRWI 0x707f
+#define INSN_MATCH_CSRRSI 0x6073
+#define INSN_MASK_CSRRSI 0x707f
+#define INSN_MATCH_CSRRCI 0x7073
+#define INSN_MASK_CSRRCI 0x707f
+
+#define INSN_MATCH_LB 0x3
+#define INSN_MASK_LB 0x707f
+#define INSN_MATCH_LH 0x1003
+#define INSN_MASK_LH 0x707f
+#define INSN_MATCH_LW 0x2003
+#define INSN_MASK_LW 0x707f
+#define INSN_MATCH_LD 0x3003
+#define INSN_MASK_LD 0x707f
+#define INSN_MATCH_LBU 0x4003
+#define INSN_MASK_LBU 0x707f
+#define INSN_MATCH_LHU 0x5003
+#define INSN_MASK_LHU 0x707f
+#define INSN_MATCH_LWU 0x6003
+#define INSN_MASK_LWU 0x707f
+#define INSN_MATCH_SB 0x23
+#define INSN_MASK_SB 0x707f
+#define INSN_MATCH_SH 0x1023
+#define INSN_MASK_SH 0x707f
+#define INSN_MATCH_SW 0x2023
+#define INSN_MASK_SW 0x707f
+#define INSN_MATCH_SD 0x3023
+#define INSN_MASK_SD 0x707f
+
+#define INSN_MATCH_C_LD 0x6000
+#define INSN_MASK_C_LD 0xe003
+#define INSN_MATCH_C_SD 0xe000
+#define INSN_MASK_C_SD 0xe003
+#define INSN_MATCH_C_LW 0x4000
+#define INSN_MASK_C_LW 0xe003
+#define INSN_MATCH_C_SW 0xc000
+#define INSN_MASK_C_SW 0xe003
+#define INSN_MATCH_C_LDSP 0x6002
+#define INSN_MASK_C_LDSP 0xe003
+#define INSN_MATCH_C_SDSP 0xe002
+#define INSN_MASK_C_SDSP 0xe003
+#define INSN_MATCH_C_LWSP 0x4002
+#define INSN_MASK_C_LWSP 0xe003
+#define INSN_MATCH_C_SWSP 0xc002
+#define INSN_MASK_C_SWSP 0xe003
+
+#define INSN_16BIT_MASK 0x3
+
+#define INSN_IS_16BIT(insn) (((insn) & INSN_16BIT_MASK) != INSN_16BIT_MASK)
+
+#define INSN_LEN(insn) (INSN_IS_16BIT(insn) ? 2 : 4)
+
+#ifdef CONFIG_64BIT
+#define LOG_REGBYTES 3
+#else
+#define LOG_REGBYTES 2
+#endif
+#define REGBYTES (1 << LOG_REGBYTES)
+
+#define SH_RD 7
+#define SH_RS1 15
+#define SH_RS2 20
+#define SH_RS2C 2
+#define MASK_RX 0x1f
+
+#define RV_X(x, s, n) (((x) >> (s)) & ((1 << (n)) - 1))
+#define RVC_LW_IMM(x) ((RV_X(x, 6, 1) << 2) | \
+ (RV_X(x, 10, 3) << 3) | \
+ (RV_X(x, 5, 1) << 6))
+#define RVC_LD_IMM(x) ((RV_X(x, 10, 3) << 3) | \
+ (RV_X(x, 5, 2) << 6))
+#define RVC_LWSP_IMM(x) ((RV_X(x, 4, 3) << 2) | \
+ (RV_X(x, 12, 1) << 5) | \
+ (RV_X(x, 2, 2) << 6))
+#define RVC_LDSP_IMM(x) ((RV_X(x, 5, 2) << 3) | \
+ (RV_X(x, 12, 1) << 5) | \
+ (RV_X(x, 2, 3) << 6))
+#define RVC_SWSP_IMM(x) ((RV_X(x, 9, 4) << 2) | \
+ (RV_X(x, 7, 2) << 6))
+#define RVC_SDSP_IMM(x) ((RV_X(x, 10, 3) << 3) | \
+ (RV_X(x, 7, 3) << 6))
+#define RVC_RS1S(insn) (8 + RV_X(insn, SH_RD, 3))
+#define RVC_RS2S(insn) (8 + RV_X(insn, SH_RS2C, 3))
+#define RVC_RS2(insn) RV_X(insn, SH_RS2C, 5)
+
+#define SHIFT_RIGHT(x, y) \
+ ((y) < 0 ? ((x) << -(y)) : ((x) >> (y)))
+
+#define REG_MASK \
+ ((1 << (5 + LOG_REGBYTES)) - (1 << LOG_REGBYTES))
+
+#define REG_OFFSET(insn, pos) \
+ (SHIFT_RIGHT((insn), (pos) - LOG_REGBYTES) & REG_MASK)
+
+#define REG_PTR(insn, pos, regs) \
+ ((ulong *)((ulong)(regs) + REG_OFFSET(insn, pos)))
+
+#define GET_FUNCT3(insn) (((insn) >> 12) & 7)
+
+#define GET_RS1(insn, regs) (*REG_PTR(insn, SH_RS1, regs))
+#define GET_RS2(insn, regs) (*REG_PTR(insn, SH_RS2, regs))
+#define GET_RS1S(insn, regs) (*REG_PTR(RVC_RS1S(insn), 0, regs))
+#define GET_RS2S(insn, regs) (*REG_PTR(RVC_RS2S(insn), 0, regs))
+#define GET_RS2C(insn, regs) (*REG_PTR(insn, SH_RS2C, regs))
+#define GET_SP(regs) (*REG_PTR(2, 0, regs))
+#define SET_RD(insn, regs, val) (*REG_PTR(insn, SH_RD, regs) = (val))
+#define IMM_I(insn) ((s32)(insn) >> 20)
+#define IMM_S(insn) (((s32)(insn) >> 25 << 5) | \
+ (s32)(((insn) >> 7) & 0x1f))
+
+struct insn_func {
+ unsigned long mask;
+ unsigned long match;
+ /*
+ * Possible return values are as follows:
+ * 1) Returns < 0 for error case
+ * 2) Returns 0 for exit to user-space
+ * 3) Returns 1 to continue with next sepc
+ * 4) Returns 2 to continue with same sepc
+ * 5) Returns 3 to inject illegal instruction trap and continue
+ * 6) Returns 4 to inject virtual instruction trap and continue
+ *
+ * Use enum kvm_insn_return for return values
+ */
+ int (*func)(struct kvm_vcpu *vcpu, struct kvm_run *run, ulong insn);
+};
+
+static int truly_illegal_insn(struct kvm_vcpu *vcpu, struct kvm_run *run,
+ ulong insn)
+{
+ struct kvm_cpu_trap utrap = { 0 };
+
+ /* Redirect trap to Guest VCPU */
+ utrap.sepc = vcpu->arch.guest_context.sepc;
+ utrap.scause = EXC_INST_ILLEGAL;
+ utrap.stval = insn;
+ utrap.htval = 0;
+ utrap.htinst = 0;
+ kvm_riscv_vcpu_trap_redirect(vcpu, &utrap);
+
+ return 1;
+}
+
+static int truly_virtual_insn(struct kvm_vcpu *vcpu, struct kvm_run *run,
+ ulong insn)
+{
+ struct kvm_cpu_trap utrap = { 0 };
+
+ /* Redirect trap to Guest VCPU */
+ utrap.sepc = vcpu->arch.guest_context.sepc;
+ utrap.scause = EXC_VIRTUAL_INST_FAULT;
+ utrap.stval = insn;
+ utrap.htval = 0;
+ utrap.htinst = 0;
+ kvm_riscv_vcpu_trap_redirect(vcpu, &utrap);
+
+ return 1;
+}
+
+/**
+ * kvm_riscv_vcpu_wfi -- Emulate wait for interrupt (WFI) behaviour
+ *
+ * @vcpu: The VCPU pointer
+ */
+void kvm_riscv_vcpu_wfi(struct kvm_vcpu *vcpu)
+{
+ if (!kvm_arch_vcpu_runnable(vcpu)) {
+ kvm_vcpu_srcu_read_unlock(vcpu);
+ kvm_vcpu_halt(vcpu);
+ kvm_vcpu_srcu_read_lock(vcpu);
+ }
+}
+
+static int wfi_insn(struct kvm_vcpu *vcpu, struct kvm_run *run, ulong insn)
+{
+ vcpu->stat.wfi_exit_stat++;
+ kvm_riscv_vcpu_wfi(vcpu);
+ return KVM_INSN_CONTINUE_NEXT_SEPC;
+}
+
+struct csr_func {
+ unsigned int base;
+ unsigned int count;
+ /*
+ * Possible return values are as same as "func" callback in
+ * "struct insn_func".
+ */
+ int (*func)(struct kvm_vcpu *vcpu, unsigned int csr_num,
+ unsigned long *val, unsigned long new_val,
+ unsigned long wr_mask);
+};
+
+static const struct csr_func csr_funcs[] = { };
+
+/**
+ * kvm_riscv_vcpu_csr_return -- Handle CSR read/write after user space
+ * emulation or in-kernel emulation
+ *
+ * @vcpu: The VCPU pointer
+ * @run: The VCPU run struct containing the CSR data
+ *
+ * Returns > 0 upon failure and 0 upon success
+ */
+int kvm_riscv_vcpu_csr_return(struct kvm_vcpu *vcpu, struct kvm_run *run)
+{
+ ulong insn;
+
+ if (vcpu->arch.csr_decode.return_handled)
+ return 0;
+ vcpu->arch.csr_decode.return_handled = 1;
+
+ /* Update destination register for CSR reads */
+ insn = vcpu->arch.csr_decode.insn;
+ if ((insn >> SH_RD) & MASK_RX)
+ SET_RD(insn, &vcpu->arch.guest_context,
+ run->riscv_csr.ret_value);
+
+ /* Move to next instruction */
+ vcpu->arch.guest_context.sepc += INSN_LEN(insn);
+
+ return 0;
+}
+
+static int csr_insn(struct kvm_vcpu *vcpu, struct kvm_run *run, ulong insn)
+{
+ int i, rc = KVM_INSN_ILLEGAL_TRAP;
+ unsigned int csr_num = insn >> SH_RS2;
+ unsigned int rs1_num = (insn >> SH_RS1) & MASK_RX;
+ ulong rs1_val = GET_RS1(insn, &vcpu->arch.guest_context);
+ const struct csr_func *tcfn, *cfn = NULL;
+ ulong val = 0, wr_mask = 0, new_val = 0;
+
+ /* Decode the CSR instruction */
+ switch (GET_FUNCT3(insn)) {
+ case GET_FUNCT3(INSN_MATCH_CSRRW):
+ wr_mask = -1UL;
+ new_val = rs1_val;
+ break;
+ case GET_FUNCT3(INSN_MATCH_CSRRS):
+ wr_mask = rs1_val;
+ new_val = -1UL;
+ break;
+ case GET_FUNCT3(INSN_MATCH_CSRRC):
+ wr_mask = rs1_val;
+ new_val = 0;
+ break;
+ case GET_FUNCT3(INSN_MATCH_CSRRWI):
+ wr_mask = -1UL;
+ new_val = rs1_num;
+ break;
+ case GET_FUNCT3(INSN_MATCH_CSRRSI):
+ wr_mask = rs1_num;
+ new_val = -1UL;
+ break;
+ case GET_FUNCT3(INSN_MATCH_CSRRCI):
+ wr_mask = rs1_num;
+ new_val = 0;
+ break;
+ default:
+ return rc;
+ }
+
+ /* Save instruction decode info */
+ vcpu->arch.csr_decode.insn = insn;
+ vcpu->arch.csr_decode.return_handled = 0;
+
+ /* Update CSR details in kvm_run struct */
+ run->riscv_csr.csr_num = csr_num;
+ run->riscv_csr.new_value = new_val;
+ run->riscv_csr.write_mask = wr_mask;
+ run->riscv_csr.ret_value = 0;
+
+ /* Find in-kernel CSR function */
+ for (i = 0; i < ARRAY_SIZE(csr_funcs); i++) {
+ tcfn = &csr_funcs[i];
+ if ((tcfn->base <= csr_num) &&
+ (csr_num < (tcfn->base + tcfn->count))) {
+ cfn = tcfn;
+ break;
+ }
+ }
+
+ /* First try in-kernel CSR emulation */
+ if (cfn && cfn->func) {
+ rc = cfn->func(vcpu, csr_num, &val, new_val, wr_mask);
+ if (rc > KVM_INSN_EXIT_TO_USER_SPACE) {
+ if (rc == KVM_INSN_CONTINUE_NEXT_SEPC) {
+ run->riscv_csr.ret_value = val;
+ vcpu->stat.csr_exit_kernel++;
+ kvm_riscv_vcpu_csr_return(vcpu, run);
+ rc = KVM_INSN_CONTINUE_SAME_SEPC;
+ }
+ return rc;
+ }
+ }
+
+ /* Exit to user-space for CSR emulation */
+ if (rc <= KVM_INSN_EXIT_TO_USER_SPACE) {
+ vcpu->stat.csr_exit_user++;
+ run->exit_reason = KVM_EXIT_RISCV_CSR;
+ }
+
+ return rc;
+}
+
+static const struct insn_func system_opcode_funcs[] = {
+ {
+ .mask = INSN_MASK_CSRRW,
+ .match = INSN_MATCH_CSRRW,
+ .func = csr_insn,
+ },
+ {
+ .mask = INSN_MASK_CSRRS,
+ .match = INSN_MATCH_CSRRS,
+ .func = csr_insn,
+ },
+ {
+ .mask = INSN_MASK_CSRRC,
+ .match = INSN_MATCH_CSRRC,
+ .func = csr_insn,
+ },
+ {
+ .mask = INSN_MASK_CSRRWI,
+ .match = INSN_MATCH_CSRRWI,
+ .func = csr_insn,
+ },
+ {
+ .mask = INSN_MASK_CSRRSI,
+ .match = INSN_MATCH_CSRRSI,
+ .func = csr_insn,
+ },
+ {
+ .mask = INSN_MASK_CSRRCI,
+ .match = INSN_MATCH_CSRRCI,
+ .func = csr_insn,
+ },
+ {
+ .mask = INSN_MASK_WFI,
+ .match = INSN_MATCH_WFI,
+ .func = wfi_insn,
+ },
+};
+
+static int system_opcode_insn(struct kvm_vcpu *vcpu, struct kvm_run *run,
+ ulong insn)
+{
+ int i, rc = KVM_INSN_ILLEGAL_TRAP;
+ const struct insn_func *ifn;
+
+ for (i = 0; i < ARRAY_SIZE(system_opcode_funcs); i++) {
+ ifn = &system_opcode_funcs[i];
+ if ((insn & ifn->mask) == ifn->match) {
+ rc = ifn->func(vcpu, run, insn);
+ break;
+ }
+ }
+
+ switch (rc) {
+ case KVM_INSN_ILLEGAL_TRAP:
+ return truly_illegal_insn(vcpu, run, insn);
+ case KVM_INSN_VIRTUAL_TRAP:
+ return truly_virtual_insn(vcpu, run, insn);
+ case KVM_INSN_CONTINUE_NEXT_SEPC:
+ vcpu->arch.guest_context.sepc += INSN_LEN(insn);
+ break;
+ default:
+ break;
+ }
+
+ return (rc <= 0) ? rc : 1;
+}
+
+/**
+ * kvm_riscv_vcpu_virtual_insn -- Handle virtual instruction trap
+ *
+ * @vcpu: The VCPU pointer
+ * @run: The VCPU run struct containing the mmio data
+ * @trap: Trap details
+ *
+ * Returns > 0 to continue run-loop
+ * Returns 0 to exit run-loop and handle in user-space.
+ * Returns < 0 to report failure and exit run-loop
+ */
+int kvm_riscv_vcpu_virtual_insn(struct kvm_vcpu *vcpu, struct kvm_run *run,
+ struct kvm_cpu_trap *trap)
+{
+ unsigned long insn = trap->stval;
+ struct kvm_cpu_trap utrap = { 0 };
+ struct kvm_cpu_context *ct;
+
+ if (unlikely(INSN_IS_16BIT(insn))) {
+ if (insn == 0) {
+ ct = &vcpu->arch.guest_context;
+ insn = kvm_riscv_vcpu_unpriv_read(vcpu, true,
+ ct->sepc,
+ &utrap);
+ if (utrap.scause) {
+ utrap.sepc = ct->sepc;
+ kvm_riscv_vcpu_trap_redirect(vcpu, &utrap);
+ return 1;
+ }
+ }
+ if (INSN_IS_16BIT(insn))
+ return truly_illegal_insn(vcpu, run, insn);
+ }
+
+ switch ((insn & INSN_OPCODE_MASK) >> INSN_OPCODE_SHIFT) {
+ case INSN_OPCODE_SYSTEM:
+ return system_opcode_insn(vcpu, run, insn);
+ default:
+ return truly_illegal_insn(vcpu, run, insn);
+ }
+}
+
+/**
+ * kvm_riscv_vcpu_mmio_load -- Emulate MMIO load instruction
+ *
+ * @vcpu: The VCPU pointer
+ * @run: The VCPU run struct containing the mmio data
+ * @fault_addr: Guest physical address to load
+ * @htinst: Transformed encoding of the load instruction
+ *
+ * Returns > 0 to continue run-loop
+ * Returns 0 to exit run-loop and handle in user-space.
+ * Returns < 0 to report failure and exit run-loop
+ */
+int kvm_riscv_vcpu_mmio_load(struct kvm_vcpu *vcpu, struct kvm_run *run,
+ unsigned long fault_addr,
+ unsigned long htinst)
+{
+ u8 data_buf[8];
+ unsigned long insn;
+ int shift = 0, len = 0, insn_len = 0;
+ struct kvm_cpu_trap utrap = { 0 };
+ struct kvm_cpu_context *ct = &vcpu->arch.guest_context;
+
+ /* Determine trapped instruction */
+ if (htinst & 0x1) {
+ /*
+ * Bit[0] == 1 implies trapped instruction value is
+ * transformed instruction or custom instruction.
+ */
+ insn = htinst | INSN_16BIT_MASK;
+ insn_len = (htinst & BIT(1)) ? INSN_LEN(insn) : 2;
+ } else {
+ /*
+ * Bit[0] == 0 implies trapped instruction value is
+ * zero or special value.
+ */
+ insn = kvm_riscv_vcpu_unpriv_read(vcpu, true, ct->sepc,
+ &utrap);
+ if (utrap.scause) {
+ /* Redirect trap if we failed to read instruction */
+ utrap.sepc = ct->sepc;
+ kvm_riscv_vcpu_trap_redirect(vcpu, &utrap);
+ return 1;
+ }
+ insn_len = INSN_LEN(insn);
+ }
+
+ /* Decode length of MMIO and shift */
+ if ((insn & INSN_MASK_LW) == INSN_MATCH_LW) {
+ len = 4;
+ shift = 8 * (sizeof(ulong) - len);
+ } else if ((insn & INSN_MASK_LB) == INSN_MATCH_LB) {
+ len = 1;
+ shift = 8 * (sizeof(ulong) - len);
+ } else if ((insn & INSN_MASK_LBU) == INSN_MATCH_LBU) {
+ len = 1;
+ shift = 8 * (sizeof(ulong) - len);
+#ifdef CONFIG_64BIT
+ } else if ((insn & INSN_MASK_LD) == INSN_MATCH_LD) {
+ len = 8;
+ shift = 8 * (sizeof(ulong) - len);
+ } else if ((insn & INSN_MASK_LWU) == INSN_MATCH_LWU) {
+ len = 4;
+#endif
+ } else if ((insn & INSN_MASK_LH) == INSN_MATCH_LH) {
+ len = 2;
+ shift = 8 * (sizeof(ulong) - len);
+ } else if ((insn & INSN_MASK_LHU) == INSN_MATCH_LHU) {
+ len = 2;
+#ifdef CONFIG_64BIT
+ } else if ((insn & INSN_MASK_C_LD) == INSN_MATCH_C_LD) {
+ len = 8;
+ shift = 8 * (sizeof(ulong) - len);
+ insn = RVC_RS2S(insn) << SH_RD;
+ } else if ((insn & INSN_MASK_C_LDSP) == INSN_MATCH_C_LDSP &&
+ ((insn >> SH_RD) & 0x1f)) {
+ len = 8;
+ shift = 8 * (sizeof(ulong) - len);
+#endif
+ } else if ((insn & INSN_MASK_C_LW) == INSN_MATCH_C_LW) {
+ len = 4;
+ shift = 8 * (sizeof(ulong) - len);
+ insn = RVC_RS2S(insn) << SH_RD;
+ } else if ((insn & INSN_MASK_C_LWSP) == INSN_MATCH_C_LWSP &&
+ ((insn >> SH_RD) & 0x1f)) {
+ len = 4;
+ shift = 8 * (sizeof(ulong) - len);
+ } else {
+ return -EOPNOTSUPP;
+ }
+
+ /* Fault address should be aligned to length of MMIO */
+ if (fault_addr & (len - 1))
+ return -EIO;
+
+ /* Save instruction decode info */
+ vcpu->arch.mmio_decode.insn = insn;
+ vcpu->arch.mmio_decode.insn_len = insn_len;
+ vcpu->arch.mmio_decode.shift = shift;
+ vcpu->arch.mmio_decode.len = len;
+ vcpu->arch.mmio_decode.return_handled = 0;
+
+ /* Update MMIO details in kvm_run struct */
+ run->mmio.is_write = false;
+ run->mmio.phys_addr = fault_addr;
+ run->mmio.len = len;
+
+ /* Try to handle MMIO access in the kernel */
+ if (!kvm_io_bus_read(vcpu, KVM_MMIO_BUS, fault_addr, len, data_buf)) {
+ /* Successfully handled MMIO access in the kernel so resume */
+ memcpy(run->mmio.data, data_buf, len);
+ vcpu->stat.mmio_exit_kernel++;
+ kvm_riscv_vcpu_mmio_return(vcpu, run);
+ return 1;
+ }
+
+ /* Exit to userspace for MMIO emulation */
+ vcpu->stat.mmio_exit_user++;
+ run->exit_reason = KVM_EXIT_MMIO;
+
+ return 0;
+}
+
+/**
+ * kvm_riscv_vcpu_mmio_store -- Emulate MMIO store instruction
+ *
+ * @vcpu: The VCPU pointer
+ * @run: The VCPU run struct containing the mmio data
+ * @fault_addr: Guest physical address to store
+ * @htinst: Transformed encoding of the store instruction
+ *
+ * Returns > 0 to continue run-loop
+ * Returns 0 to exit run-loop and handle in user-space.
+ * Returns < 0 to report failure and exit run-loop
+ */
+int kvm_riscv_vcpu_mmio_store(struct kvm_vcpu *vcpu, struct kvm_run *run,
+ unsigned long fault_addr,
+ unsigned long htinst)
+{
+ u8 data8;
+ u16 data16;
+ u32 data32;
+ u64 data64;
+ ulong data;
+ unsigned long insn;
+ int len = 0, insn_len = 0;
+ struct kvm_cpu_trap utrap = { 0 };
+ struct kvm_cpu_context *ct = &vcpu->arch.guest_context;
+
+ /* Determine trapped instruction */
+ if (htinst & 0x1) {
+ /*
+ * Bit[0] == 1 implies trapped instruction value is
+ * transformed instruction or custom instruction.
+ */
+ insn = htinst | INSN_16BIT_MASK;
+ insn_len = (htinst & BIT(1)) ? INSN_LEN(insn) : 2;
+ } else {
+ /*
+ * Bit[0] == 0 implies trapped instruction value is
+ * zero or special value.
+ */
+ insn = kvm_riscv_vcpu_unpriv_read(vcpu, true, ct->sepc,
+ &utrap);
+ if (utrap.scause) {
+ /* Redirect trap if we failed to read instruction */
+ utrap.sepc = ct->sepc;
+ kvm_riscv_vcpu_trap_redirect(vcpu, &utrap);
+ return 1;
+ }
+ insn_len = INSN_LEN(insn);
+ }
+
+ data = GET_RS2(insn, &vcpu->arch.guest_context);
+ data8 = data16 = data32 = data64 = data;
+
+ if ((insn & INSN_MASK_SW) == INSN_MATCH_SW) {
+ len = 4;
+ } else if ((insn & INSN_MASK_SB) == INSN_MATCH_SB) {
+ len = 1;
+#ifdef CONFIG_64BIT
+ } else if ((insn & INSN_MASK_SD) == INSN_MATCH_SD) {
+ len = 8;
+#endif
+ } else if ((insn & INSN_MASK_SH) == INSN_MATCH_SH) {
+ len = 2;
+#ifdef CONFIG_64BIT
+ } else if ((insn & INSN_MASK_C_SD) == INSN_MATCH_C_SD) {
+ len = 8;
+ data64 = GET_RS2S(insn, &vcpu->arch.guest_context);
+ } else if ((insn & INSN_MASK_C_SDSP) == INSN_MATCH_C_SDSP &&
+ ((insn >> SH_RD) & 0x1f)) {
+ len = 8;
+ data64 = GET_RS2C(insn, &vcpu->arch.guest_context);
+#endif
+ } else if ((insn & INSN_MASK_C_SW) == INSN_MATCH_C_SW) {
+ len = 4;
+ data32 = GET_RS2S(insn, &vcpu->arch.guest_context);
+ } else if ((insn & INSN_MASK_C_SWSP) == INSN_MATCH_C_SWSP &&
+ ((insn >> SH_RD) & 0x1f)) {
+ len = 4;
+ data32 = GET_RS2C(insn, &vcpu->arch.guest_context);
+ } else {
+ return -EOPNOTSUPP;
+ }
+
+ /* Fault address should be aligned to length of MMIO */
+ if (fault_addr & (len - 1))
+ return -EIO;
+
+ /* Save instruction decode info */
+ vcpu->arch.mmio_decode.insn = insn;
+ vcpu->arch.mmio_decode.insn_len = insn_len;
+ vcpu->arch.mmio_decode.shift = 0;
+ vcpu->arch.mmio_decode.len = len;
+ vcpu->arch.mmio_decode.return_handled = 0;
+
+ /* Copy data to kvm_run instance */
+ switch (len) {
+ case 1:
+ *((u8 *)run->mmio.data) = data8;
+ break;
+ case 2:
+ *((u16 *)run->mmio.data) = data16;
+ break;
+ case 4:
+ *((u32 *)run->mmio.data) = data32;
+ break;
+ case 8:
+ *((u64 *)run->mmio.data) = data64;
+ break;
+ default:
+ return -EOPNOTSUPP;
+ }
+
+ /* Update MMIO details in kvm_run struct */
+ run->mmio.is_write = true;
+ run->mmio.phys_addr = fault_addr;
+ run->mmio.len = len;
+
+ /* Try to handle MMIO access in the kernel */
+ if (!kvm_io_bus_write(vcpu, KVM_MMIO_BUS,
+ fault_addr, len, run->mmio.data)) {
+ /* Successfully handled MMIO access in the kernel so resume */
+ vcpu->stat.mmio_exit_kernel++;
+ kvm_riscv_vcpu_mmio_return(vcpu, run);
+ return 1;
+ }
+
+ /* Exit to userspace for MMIO emulation */
+ vcpu->stat.mmio_exit_user++;
+ run->exit_reason = KVM_EXIT_MMIO;
+
+ return 0;
+}
+
+/**
+ * kvm_riscv_vcpu_mmio_return -- Handle MMIO loads after user space emulation
+ * or in-kernel IO emulation
+ *
+ * @vcpu: The VCPU pointer
+ * @run: The VCPU run struct containing the mmio data
+ */
+int kvm_riscv_vcpu_mmio_return(struct kvm_vcpu *vcpu, struct kvm_run *run)
+{
+ u8 data8;
+ u16 data16;
+ u32 data32;
+ u64 data64;
+ ulong insn;
+ int len, shift;
+
+ if (vcpu->arch.mmio_decode.return_handled)
+ return 0;
+
+ vcpu->arch.mmio_decode.return_handled = 1;
+ insn = vcpu->arch.mmio_decode.insn;
+
+ if (run->mmio.is_write)
+ goto done;
+
+ len = vcpu->arch.mmio_decode.len;
+ shift = vcpu->arch.mmio_decode.shift;
+
+ switch (len) {
+ case 1:
+ data8 = *((u8 *)run->mmio.data);
+ SET_RD(insn, &vcpu->arch.guest_context,
+ (ulong)data8 << shift >> shift);
+ break;
+ case 2:
+ data16 = *((u16 *)run->mmio.data);
+ SET_RD(insn, &vcpu->arch.guest_context,
+ (ulong)data16 << shift >> shift);
+ break;
+ case 4:
+ data32 = *((u32 *)run->mmio.data);
+ SET_RD(insn, &vcpu->arch.guest_context,
+ (ulong)data32 << shift >> shift);
+ break;
+ case 8:
+ data64 = *((u64 *)run->mmio.data);
+ SET_RD(insn, &vcpu->arch.guest_context,
+ (ulong)data64 << shift >> shift);
+ break;
+ default:
+ return -EOPNOTSUPP;
+ }
+
+done:
+ /* Move to next instruction */
+ vcpu->arch.guest_context.sepc += vcpu->arch.mmio_decode.insn_len;
+
+ return 0;
+}
diff --git a/arch/riscv/kvm/vcpu_sbi.c b/arch/riscv/kvm/vcpu_sbi.c
index 3b0e703d22cf..f96991d230bf 100644
--- a/arch/riscv/kvm/vcpu_sbi.c
+++ b/arch/riscv/kvm/vcpu_sbi.c
@@ -9,15 +9,50 @@
#include <linux/errno.h>
#include <linux/err.h>
#include <linux/kvm_host.h>
-#include <asm/csr.h>
#include <asm/sbi.h>
-#include <asm/kvm_vcpu_timer.h>
+#include <asm/kvm_vcpu_sbi.h>
-#define SBI_VERSION_MAJOR 0
-#define SBI_VERSION_MINOR 1
+static int kvm_linux_err_map_sbi(int err)
+{
+ switch (err) {
+ case 0:
+ return SBI_SUCCESS;
+ case -EPERM:
+ return SBI_ERR_DENIED;
+ case -EINVAL:
+ return SBI_ERR_INVALID_PARAM;
+ case -EFAULT:
+ return SBI_ERR_INVALID_ADDRESS;
+ case -EOPNOTSUPP:
+ return SBI_ERR_NOT_SUPPORTED;
+ case -EALREADY:
+ return SBI_ERR_ALREADY_AVAILABLE;
+ default:
+ return SBI_ERR_FAILURE;
+ };
+}
-static void kvm_riscv_vcpu_sbi_forward(struct kvm_vcpu *vcpu,
- struct kvm_run *run)
+#ifndef CONFIG_RISCV_SBI_V01
+static const struct kvm_vcpu_sbi_extension vcpu_sbi_ext_v01 = {
+ .extid_start = -1UL,
+ .extid_end = -1UL,
+ .handler = NULL,
+};
+#endif
+
+static const struct kvm_vcpu_sbi_extension *sbi_ext[] = {
+ &vcpu_sbi_ext_v01,
+ &vcpu_sbi_ext_base,
+ &vcpu_sbi_ext_time,
+ &vcpu_sbi_ext_ipi,
+ &vcpu_sbi_ext_rfence,
+ &vcpu_sbi_ext_srst,
+ &vcpu_sbi_ext_hsm,
+ &vcpu_sbi_ext_experimental,
+ &vcpu_sbi_ext_vendor,
+};
+
+void kvm_riscv_vcpu_sbi_forward(struct kvm_vcpu *vcpu, struct kvm_run *run)
{
struct kvm_cpu_context *cp = &vcpu->arch.guest_context;
@@ -36,6 +71,24 @@ static void kvm_riscv_vcpu_sbi_forward(struct kvm_vcpu *vcpu,
run->riscv_sbi.ret[1] = cp->a1;
}
+void kvm_riscv_vcpu_sbi_system_reset(struct kvm_vcpu *vcpu,
+ struct kvm_run *run,
+ u32 type, u64 reason)
+{
+ unsigned long i;
+ struct kvm_vcpu *tmp;
+
+ kvm_for_each_vcpu(i, tmp, vcpu->kvm)
+ tmp->arch.power_off = true;
+ kvm_make_all_cpus_request(vcpu->kvm, KVM_REQ_SLEEP);
+
+ memset(&run->system_event, 0, sizeof(run->system_event));
+ run->system_event.type = type;
+ run->system_event.ndata = 1;
+ run->system_event.data[0] = reason;
+ run->exit_reason = KVM_EXIT_SYSTEM_EVENT;
+}
+
int kvm_riscv_vcpu_sbi_return(struct kvm_vcpu *vcpu, struct kvm_run *run)
{
struct kvm_cpu_context *cp = &vcpu->arch.guest_context;
@@ -55,131 +108,73 @@ int kvm_riscv_vcpu_sbi_return(struct kvm_vcpu *vcpu, struct kvm_run *run)
return 0;
}
-#ifdef CONFIG_RISCV_SBI_V01
-
-static void kvm_sbi_system_shutdown(struct kvm_vcpu *vcpu,
- struct kvm_run *run, u32 type)
+const struct kvm_vcpu_sbi_extension *kvm_vcpu_sbi_find_ext(unsigned long extid)
{
- int i;
- struct kvm_vcpu *tmp;
+ int i = 0;
- kvm_for_each_vcpu(i, tmp, vcpu->kvm)
- tmp->arch.power_off = true;
- kvm_make_all_cpus_request(vcpu->kvm, KVM_REQ_SLEEP);
+ for (i = 0; i < ARRAY_SIZE(sbi_ext); i++) {
+ if (sbi_ext[i]->extid_start <= extid &&
+ sbi_ext[i]->extid_end >= extid)
+ return sbi_ext[i];
+ }
- memset(&run->system_event, 0, sizeof(run->system_event));
- run->system_event.type = type;
- run->exit_reason = KVM_EXIT_SYSTEM_EVENT;
+ return NULL;
}
int kvm_riscv_vcpu_sbi_ecall(struct kvm_vcpu *vcpu, struct kvm_run *run)
{
- ulong hmask;
- int i, ret = 1;
- u64 next_cycle;
- struct kvm_vcpu *rvcpu;
+ int ret = 1;
bool next_sepc = true;
- struct cpumask cm, hm;
- struct kvm *kvm = vcpu->kvm;
- struct kvm_cpu_trap utrap = { 0 };
+ bool userspace_exit = false;
struct kvm_cpu_context *cp = &vcpu->arch.guest_context;
+ const struct kvm_vcpu_sbi_extension *sbi_ext;
+ struct kvm_cpu_trap utrap = { 0 };
+ unsigned long out_val = 0;
+ bool ext_is_v01 = false;
- if (!cp)
- return -EINVAL;
-
- switch (cp->a7) {
- case SBI_EXT_0_1_CONSOLE_GETCHAR:
- case SBI_EXT_0_1_CONSOLE_PUTCHAR:
- /*
- * The CONSOLE_GETCHAR/CONSOLE_PUTCHAR SBI calls cannot be
- * handled in kernel so we forward these to user-space
- */
- kvm_riscv_vcpu_sbi_forward(vcpu, run);
- next_sepc = false;
- ret = 0;
- break;
- case SBI_EXT_0_1_SET_TIMER:
-#if __riscv_xlen == 32
- next_cycle = ((u64)cp->a1 << 32) | (u64)cp->a0;
-#else
- next_cycle = (u64)cp->a0;
+ sbi_ext = kvm_vcpu_sbi_find_ext(cp->a7);
+ if (sbi_ext && sbi_ext->handler) {
+#ifdef CONFIG_RISCV_SBI_V01
+ if (cp->a7 >= SBI_EXT_0_1_SET_TIMER &&
+ cp->a7 <= SBI_EXT_0_1_SHUTDOWN)
+ ext_is_v01 = true;
#endif
- kvm_riscv_vcpu_timer_next_event(vcpu, next_cycle);
- break;
- case SBI_EXT_0_1_CLEAR_IPI:
- kvm_riscv_vcpu_unset_interrupt(vcpu, IRQ_VS_SOFT);
- break;
- case SBI_EXT_0_1_SEND_IPI:
- if (cp->a0)
- hmask = kvm_riscv_vcpu_unpriv_read(vcpu, false, cp->a0,
- &utrap);
- else
- hmask = (1UL << atomic_read(&kvm->online_vcpus)) - 1;
- if (utrap.scause) {
- utrap.sepc = cp->sepc;
- kvm_riscv_vcpu_trap_redirect(vcpu, &utrap);
- next_sepc = false;
- break;
- }
- for_each_set_bit(i, &hmask, BITS_PER_LONG) {
- rvcpu = kvm_get_vcpu_by_id(vcpu->kvm, i);
- kvm_riscv_vcpu_set_interrupt(rvcpu, IRQ_VS_SOFT);
- }
- break;
- case SBI_EXT_0_1_SHUTDOWN:
- kvm_sbi_system_shutdown(vcpu, run, KVM_SYSTEM_EVENT_SHUTDOWN);
- next_sepc = false;
- ret = 0;
- break;
- case SBI_EXT_0_1_REMOTE_FENCE_I:
- case SBI_EXT_0_1_REMOTE_SFENCE_VMA:
- case SBI_EXT_0_1_REMOTE_SFENCE_VMA_ASID:
- if (cp->a0)
- hmask = kvm_riscv_vcpu_unpriv_read(vcpu, false, cp->a0,
- &utrap);
- else
- hmask = (1UL << atomic_read(&kvm->online_vcpus)) - 1;
- if (utrap.scause) {
- utrap.sepc = cp->sepc;
- kvm_riscv_vcpu_trap_redirect(vcpu, &utrap);
- next_sepc = false;
- break;
- }
- cpumask_clear(&cm);
- for_each_set_bit(i, &hmask, BITS_PER_LONG) {
- rvcpu = kvm_get_vcpu_by_id(vcpu->kvm, i);
- if (rvcpu->cpu < 0)
- continue;
- cpumask_set_cpu(rvcpu->cpu, &cm);
- }
- riscv_cpuid_to_hartid_mask(&cm, &hm);
- if (cp->a7 == SBI_EXT_0_1_REMOTE_FENCE_I)
- sbi_remote_fence_i(cpumask_bits(&hm));
- else if (cp->a7 == SBI_EXT_0_1_REMOTE_SFENCE_VMA)
- sbi_remote_hfence_vvma(cpumask_bits(&hm),
- cp->a1, cp->a2);
- else
- sbi_remote_hfence_vvma_asid(cpumask_bits(&hm),
- cp->a1, cp->a2, cp->a3);
- break;
- default:
+ ret = sbi_ext->handler(vcpu, run, &out_val, &utrap, &userspace_exit);
+ } else {
/* Return error for unsupported SBI calls */
cp->a0 = SBI_ERR_NOT_SUPPORTED;
- break;
+ goto ecall_done;
+ }
+
+ /* Handle special error cases i.e trap, exit or userspace forward */
+ if (utrap.scause) {
+ /* No need to increment sepc or exit ioctl loop */
+ ret = 1;
+ utrap.sepc = cp->sepc;
+ kvm_riscv_vcpu_trap_redirect(vcpu, &utrap);
+ next_sepc = false;
+ goto ecall_done;
}
+ /* Exit ioctl loop or Propagate the error code the guest */
+ if (userspace_exit) {
+ next_sepc = false;
+ ret = 0;
+ } else {
+ /**
+ * SBI extension handler always returns an Linux error code. Convert
+ * it to the SBI specific error code that can be propagated the SBI
+ * caller.
+ */
+ ret = kvm_linux_err_map_sbi(ret);
+ cp->a0 = ret;
+ ret = 1;
+ }
+ecall_done:
if (next_sepc)
cp->sepc += 4;
+ if (!ext_is_v01)
+ cp->a1 = out_val;
return ret;
}
-
-#else
-
-int kvm_riscv_vcpu_sbi_ecall(struct kvm_vcpu *vcpu, struct kvm_run *run)
-{
- kvm_riscv_vcpu_sbi_forward(vcpu, run);
- return 0;
-}
-
-#endif
diff --git a/arch/riscv/kvm/vcpu_sbi_base.c b/arch/riscv/kvm/vcpu_sbi_base.c
new file mode 100644
index 000000000000..48f431091cdb
--- /dev/null
+++ b/arch/riscv/kvm/vcpu_sbi_base.c
@@ -0,0 +1,100 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Copyright (c) 2021 Western Digital Corporation or its affiliates.
+ *
+ * Authors:
+ * Atish Patra <atish.patra@wdc.com>
+ */
+
+#include <linux/errno.h>
+#include <linux/err.h>
+#include <linux/kvm_host.h>
+#include <linux/version.h>
+#include <asm/csr.h>
+#include <asm/sbi.h>
+#include <asm/kvm_vcpu_timer.h>
+#include <asm/kvm_vcpu_sbi.h>
+
+static int kvm_sbi_ext_base_handler(struct kvm_vcpu *vcpu, struct kvm_run *run,
+ unsigned long *out_val,
+ struct kvm_cpu_trap *trap, bool *exit)
+{
+ int ret = 0;
+ struct kvm_cpu_context *cp = &vcpu->arch.guest_context;
+ struct sbiret ecall_ret;
+
+ switch (cp->a6) {
+ case SBI_EXT_BASE_GET_SPEC_VERSION:
+ *out_val = (KVM_SBI_VERSION_MAJOR <<
+ SBI_SPEC_VERSION_MAJOR_SHIFT) |
+ KVM_SBI_VERSION_MINOR;
+ break;
+ case SBI_EXT_BASE_GET_IMP_ID:
+ *out_val = KVM_SBI_IMPID;
+ break;
+ case SBI_EXT_BASE_GET_IMP_VERSION:
+ *out_val = LINUX_VERSION_CODE;
+ break;
+ case SBI_EXT_BASE_PROBE_EXT:
+ if ((cp->a0 >= SBI_EXT_EXPERIMENTAL_START &&
+ cp->a0 <= SBI_EXT_EXPERIMENTAL_END) ||
+ (cp->a0 >= SBI_EXT_VENDOR_START &&
+ cp->a0 <= SBI_EXT_VENDOR_END)) {
+ /*
+ * For experimental/vendor extensions
+ * forward it to the userspace
+ */
+ kvm_riscv_vcpu_sbi_forward(vcpu, run);
+ *exit = true;
+ } else
+ *out_val = kvm_vcpu_sbi_find_ext(cp->a0) ? 1 : 0;
+ break;
+ case SBI_EXT_BASE_GET_MVENDORID:
+ case SBI_EXT_BASE_GET_MARCHID:
+ case SBI_EXT_BASE_GET_MIMPID:
+ ecall_ret = sbi_ecall(SBI_EXT_BASE, cp->a6, 0, 0, 0, 0, 0, 0);
+ if (!ecall_ret.error)
+ *out_val = ecall_ret.value;
+ /*TODO: We are unnecessarily converting the error twice */
+ ret = sbi_err_map_linux_errno(ecall_ret.error);
+ break;
+ default:
+ ret = -EOPNOTSUPP;
+ break;
+ }
+
+ return ret;
+}
+
+const struct kvm_vcpu_sbi_extension vcpu_sbi_ext_base = {
+ .extid_start = SBI_EXT_BASE,
+ .extid_end = SBI_EXT_BASE,
+ .handler = kvm_sbi_ext_base_handler,
+};
+
+static int kvm_sbi_ext_forward_handler(struct kvm_vcpu *vcpu,
+ struct kvm_run *run,
+ unsigned long *out_val,
+ struct kvm_cpu_trap *utrap,
+ bool *exit)
+{
+ /*
+ * Both SBI experimental and vendor extensions are
+ * unconditionally forwarded to userspace.
+ */
+ kvm_riscv_vcpu_sbi_forward(vcpu, run);
+ *exit = true;
+ return 0;
+}
+
+const struct kvm_vcpu_sbi_extension vcpu_sbi_ext_experimental = {
+ .extid_start = SBI_EXT_EXPERIMENTAL_START,
+ .extid_end = SBI_EXT_EXPERIMENTAL_END,
+ .handler = kvm_sbi_ext_forward_handler,
+};
+
+const struct kvm_vcpu_sbi_extension vcpu_sbi_ext_vendor = {
+ .extid_start = SBI_EXT_VENDOR_START,
+ .extid_end = SBI_EXT_VENDOR_END,
+ .handler = kvm_sbi_ext_forward_handler,
+};
diff --git a/arch/riscv/kvm/vcpu_sbi_hsm.c b/arch/riscv/kvm/vcpu_sbi_hsm.c
new file mode 100644
index 000000000000..239dec0a628a
--- /dev/null
+++ b/arch/riscv/kvm/vcpu_sbi_hsm.c
@@ -0,0 +1,119 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Copyright (c) 2021 Western Digital Corporation or its affiliates.
+ *
+ * Authors:
+ * Atish Patra <atish.patra@wdc.com>
+ */
+
+#include <linux/errno.h>
+#include <linux/err.h>
+#include <linux/kvm_host.h>
+#include <asm/csr.h>
+#include <asm/sbi.h>
+#include <asm/kvm_vcpu_sbi.h>
+
+static int kvm_sbi_hsm_vcpu_start(struct kvm_vcpu *vcpu)
+{
+ struct kvm_cpu_context *reset_cntx;
+ struct kvm_cpu_context *cp = &vcpu->arch.guest_context;
+ struct kvm_vcpu *target_vcpu;
+ unsigned long target_vcpuid = cp->a0;
+
+ target_vcpu = kvm_get_vcpu_by_id(vcpu->kvm, target_vcpuid);
+ if (!target_vcpu)
+ return -EINVAL;
+ if (!target_vcpu->arch.power_off)
+ return -EALREADY;
+
+ reset_cntx = &target_vcpu->arch.guest_reset_context;
+ /* start address */
+ reset_cntx->sepc = cp->a1;
+ /* target vcpu id to start */
+ reset_cntx->a0 = target_vcpuid;
+ /* private data passed from kernel */
+ reset_cntx->a1 = cp->a2;
+ kvm_make_request(KVM_REQ_VCPU_RESET, target_vcpu);
+
+ kvm_riscv_vcpu_power_on(target_vcpu);
+
+ return 0;
+}
+
+static int kvm_sbi_hsm_vcpu_stop(struct kvm_vcpu *vcpu)
+{
+ if (vcpu->arch.power_off)
+ return -EINVAL;
+
+ kvm_riscv_vcpu_power_off(vcpu);
+
+ return 0;
+}
+
+static int kvm_sbi_hsm_vcpu_get_status(struct kvm_vcpu *vcpu)
+{
+ struct kvm_cpu_context *cp = &vcpu->arch.guest_context;
+ unsigned long target_vcpuid = cp->a0;
+ struct kvm_vcpu *target_vcpu;
+
+ target_vcpu = kvm_get_vcpu_by_id(vcpu->kvm, target_vcpuid);
+ if (!target_vcpu)
+ return -EINVAL;
+ if (!target_vcpu->arch.power_off)
+ return SBI_HSM_STATE_STARTED;
+ else if (vcpu->stat.generic.blocking)
+ return SBI_HSM_STATE_SUSPENDED;
+ else
+ return SBI_HSM_STATE_STOPPED;
+}
+
+static int kvm_sbi_ext_hsm_handler(struct kvm_vcpu *vcpu, struct kvm_run *run,
+ unsigned long *out_val,
+ struct kvm_cpu_trap *utrap,
+ bool *exit)
+{
+ int ret = 0;
+ struct kvm_cpu_context *cp = &vcpu->arch.guest_context;
+ struct kvm *kvm = vcpu->kvm;
+ unsigned long funcid = cp->a6;
+
+ switch (funcid) {
+ case SBI_EXT_HSM_HART_START:
+ mutex_lock(&kvm->lock);
+ ret = kvm_sbi_hsm_vcpu_start(vcpu);
+ mutex_unlock(&kvm->lock);
+ break;
+ case SBI_EXT_HSM_HART_STOP:
+ ret = kvm_sbi_hsm_vcpu_stop(vcpu);
+ break;
+ case SBI_EXT_HSM_HART_STATUS:
+ ret = kvm_sbi_hsm_vcpu_get_status(vcpu);
+ if (ret >= 0) {
+ *out_val = ret;
+ ret = 0;
+ }
+ break;
+ case SBI_EXT_HSM_HART_SUSPEND:
+ switch (cp->a0) {
+ case SBI_HSM_SUSPEND_RET_DEFAULT:
+ kvm_riscv_vcpu_wfi(vcpu);
+ break;
+ case SBI_HSM_SUSPEND_NON_RET_DEFAULT:
+ ret = -EOPNOTSUPP;
+ break;
+ default:
+ ret = -EINVAL;
+ }
+ break;
+ default:
+ ret = -EOPNOTSUPP;
+ }
+
+ return ret;
+}
+
+const struct kvm_vcpu_sbi_extension vcpu_sbi_ext_hsm = {
+ .extid_start = SBI_EXT_HSM,
+ .extid_end = SBI_EXT_HSM,
+ .handler = kvm_sbi_ext_hsm_handler,
+};
diff --git a/arch/riscv/kvm/vcpu_sbi_replace.c b/arch/riscv/kvm/vcpu_sbi_replace.c
new file mode 100644
index 000000000000..4c034d8a606a
--- /dev/null
+++ b/arch/riscv/kvm/vcpu_sbi_replace.c
@@ -0,0 +1,174 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Copyright (c) 2021 Western Digital Corporation or its affiliates.
+ *
+ * Authors:
+ * Atish Patra <atish.patra@wdc.com>
+ */
+
+#include <linux/errno.h>
+#include <linux/err.h>
+#include <linux/kvm_host.h>
+#include <asm/csr.h>
+#include <asm/sbi.h>
+#include <asm/kvm_vcpu_timer.h>
+#include <asm/kvm_vcpu_sbi.h>
+
+static int kvm_sbi_ext_time_handler(struct kvm_vcpu *vcpu, struct kvm_run *run,
+ unsigned long *out_val,
+ struct kvm_cpu_trap *utrap, bool *exit)
+{
+ int ret = 0;
+ struct kvm_cpu_context *cp = &vcpu->arch.guest_context;
+ u64 next_cycle;
+
+ if (cp->a6 != SBI_EXT_TIME_SET_TIMER)
+ return -EINVAL;
+
+#if __riscv_xlen == 32
+ next_cycle = ((u64)cp->a1 << 32) | (u64)cp->a0;
+#else
+ next_cycle = (u64)cp->a0;
+#endif
+ kvm_riscv_vcpu_timer_next_event(vcpu, next_cycle);
+
+ return ret;
+}
+
+const struct kvm_vcpu_sbi_extension vcpu_sbi_ext_time = {
+ .extid_start = SBI_EXT_TIME,
+ .extid_end = SBI_EXT_TIME,
+ .handler = kvm_sbi_ext_time_handler,
+};
+
+static int kvm_sbi_ext_ipi_handler(struct kvm_vcpu *vcpu, struct kvm_run *run,
+ unsigned long *out_val,
+ struct kvm_cpu_trap *utrap, bool *exit)
+{
+ int ret = 0;
+ unsigned long i;
+ struct kvm_vcpu *tmp;
+ struct kvm_cpu_context *cp = &vcpu->arch.guest_context;
+ unsigned long hmask = cp->a0;
+ unsigned long hbase = cp->a1;
+
+ if (cp->a6 != SBI_EXT_IPI_SEND_IPI)
+ return -EINVAL;
+
+ kvm_for_each_vcpu(i, tmp, vcpu->kvm) {
+ if (hbase != -1UL) {
+ if (tmp->vcpu_id < hbase)
+ continue;
+ if (!(hmask & (1UL << (tmp->vcpu_id - hbase))))
+ continue;
+ }
+ ret = kvm_riscv_vcpu_set_interrupt(tmp, IRQ_VS_SOFT);
+ if (ret < 0)
+ break;
+ }
+
+ return ret;
+}
+
+const struct kvm_vcpu_sbi_extension vcpu_sbi_ext_ipi = {
+ .extid_start = SBI_EXT_IPI,
+ .extid_end = SBI_EXT_IPI,
+ .handler = kvm_sbi_ext_ipi_handler,
+};
+
+static int kvm_sbi_ext_rfence_handler(struct kvm_vcpu *vcpu, struct kvm_run *run,
+ unsigned long *out_val,
+ struct kvm_cpu_trap *utrap, bool *exit)
+{
+ int ret = 0;
+ struct kvm_cpu_context *cp = &vcpu->arch.guest_context;
+ unsigned long hmask = cp->a0;
+ unsigned long hbase = cp->a1;
+ unsigned long funcid = cp->a6;
+
+ switch (funcid) {
+ case SBI_EXT_RFENCE_REMOTE_FENCE_I:
+ kvm_riscv_fence_i(vcpu->kvm, hbase, hmask);
+ break;
+ case SBI_EXT_RFENCE_REMOTE_SFENCE_VMA:
+ if (cp->a2 == 0 && cp->a3 == 0)
+ kvm_riscv_hfence_vvma_all(vcpu->kvm, hbase, hmask);
+ else
+ kvm_riscv_hfence_vvma_gva(vcpu->kvm, hbase, hmask,
+ cp->a2, cp->a3, PAGE_SHIFT);
+ break;
+ case SBI_EXT_RFENCE_REMOTE_SFENCE_VMA_ASID:
+ if (cp->a2 == 0 && cp->a3 == 0)
+ kvm_riscv_hfence_vvma_asid_all(vcpu->kvm,
+ hbase, hmask, cp->a4);
+ else
+ kvm_riscv_hfence_vvma_asid_gva(vcpu->kvm,
+ hbase, hmask,
+ cp->a2, cp->a3,
+ PAGE_SHIFT, cp->a4);
+ break;
+ case SBI_EXT_RFENCE_REMOTE_HFENCE_GVMA:
+ case SBI_EXT_RFENCE_REMOTE_HFENCE_GVMA_VMID:
+ case SBI_EXT_RFENCE_REMOTE_HFENCE_VVMA:
+ case SBI_EXT_RFENCE_REMOTE_HFENCE_VVMA_ASID:
+ /*
+ * Until nested virtualization is implemented, the
+ * SBI HFENCE calls should be treated as NOPs
+ */
+ break;
+ default:
+ ret = -EOPNOTSUPP;
+ }
+
+ return ret;
+}
+
+const struct kvm_vcpu_sbi_extension vcpu_sbi_ext_rfence = {
+ .extid_start = SBI_EXT_RFENCE,
+ .extid_end = SBI_EXT_RFENCE,
+ .handler = kvm_sbi_ext_rfence_handler,
+};
+
+static int kvm_sbi_ext_srst_handler(struct kvm_vcpu *vcpu,
+ struct kvm_run *run,
+ unsigned long *out_val,
+ struct kvm_cpu_trap *utrap, bool *exit)
+{
+ struct kvm_cpu_context *cp = &vcpu->arch.guest_context;
+ unsigned long funcid = cp->a6;
+ u32 reason = cp->a1;
+ u32 type = cp->a0;
+ int ret = 0;
+
+ switch (funcid) {
+ case SBI_EXT_SRST_RESET:
+ switch (type) {
+ case SBI_SRST_RESET_TYPE_SHUTDOWN:
+ kvm_riscv_vcpu_sbi_system_reset(vcpu, run,
+ KVM_SYSTEM_EVENT_SHUTDOWN,
+ reason);
+ *exit = true;
+ break;
+ case SBI_SRST_RESET_TYPE_COLD_REBOOT:
+ case SBI_SRST_RESET_TYPE_WARM_REBOOT:
+ kvm_riscv_vcpu_sbi_system_reset(vcpu, run,
+ KVM_SYSTEM_EVENT_RESET,
+ reason);
+ *exit = true;
+ break;
+ default:
+ ret = -EOPNOTSUPP;
+ }
+ break;
+ default:
+ ret = -EOPNOTSUPP;
+ }
+
+ return ret;
+}
+
+const struct kvm_vcpu_sbi_extension vcpu_sbi_ext_srst = {
+ .extid_start = SBI_EXT_SRST,
+ .extid_end = SBI_EXT_SRST,
+ .handler = kvm_sbi_ext_srst_handler,
+};
diff --git a/arch/riscv/kvm/vcpu_sbi_v01.c b/arch/riscv/kvm/vcpu_sbi_v01.c
new file mode 100644
index 000000000000..8a91a14e7139
--- /dev/null
+++ b/arch/riscv/kvm/vcpu_sbi_v01.c
@@ -0,0 +1,118 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Copyright (c) 2021 Western Digital Corporation or its affiliates.
+ *
+ * Authors:
+ * Atish Patra <atish.patra@wdc.com>
+ */
+
+#include <linux/errno.h>
+#include <linux/err.h>
+#include <linux/kvm_host.h>
+#include <asm/csr.h>
+#include <asm/sbi.h>
+#include <asm/kvm_vcpu_timer.h>
+#include <asm/kvm_vcpu_sbi.h>
+
+static int kvm_sbi_ext_v01_handler(struct kvm_vcpu *vcpu, struct kvm_run *run,
+ unsigned long *out_val,
+ struct kvm_cpu_trap *utrap,
+ bool *exit)
+{
+ ulong hmask;
+ int i, ret = 0;
+ u64 next_cycle;
+ struct kvm_vcpu *rvcpu;
+ struct kvm *kvm = vcpu->kvm;
+ struct kvm_cpu_context *cp = &vcpu->arch.guest_context;
+
+ switch (cp->a7) {
+ case SBI_EXT_0_1_CONSOLE_GETCHAR:
+ case SBI_EXT_0_1_CONSOLE_PUTCHAR:
+ /*
+ * The CONSOLE_GETCHAR/CONSOLE_PUTCHAR SBI calls cannot be
+ * handled in kernel so we forward these to user-space
+ */
+ kvm_riscv_vcpu_sbi_forward(vcpu, run);
+ *exit = true;
+ break;
+ case SBI_EXT_0_1_SET_TIMER:
+#if __riscv_xlen == 32
+ next_cycle = ((u64)cp->a1 << 32) | (u64)cp->a0;
+#else
+ next_cycle = (u64)cp->a0;
+#endif
+ ret = kvm_riscv_vcpu_timer_next_event(vcpu, next_cycle);
+ break;
+ case SBI_EXT_0_1_CLEAR_IPI:
+ ret = kvm_riscv_vcpu_unset_interrupt(vcpu, IRQ_VS_SOFT);
+ break;
+ case SBI_EXT_0_1_SEND_IPI:
+ if (cp->a0)
+ hmask = kvm_riscv_vcpu_unpriv_read(vcpu, false, cp->a0,
+ utrap);
+ else
+ hmask = (1UL << atomic_read(&kvm->online_vcpus)) - 1;
+ if (utrap->scause)
+ break;
+
+ for_each_set_bit(i, &hmask, BITS_PER_LONG) {
+ rvcpu = kvm_get_vcpu_by_id(vcpu->kvm, i);
+ ret = kvm_riscv_vcpu_set_interrupt(rvcpu, IRQ_VS_SOFT);
+ if (ret < 0)
+ break;
+ }
+ break;
+ case SBI_EXT_0_1_SHUTDOWN:
+ kvm_riscv_vcpu_sbi_system_reset(vcpu, run,
+ KVM_SYSTEM_EVENT_SHUTDOWN, 0);
+ *exit = true;
+ break;
+ case SBI_EXT_0_1_REMOTE_FENCE_I:
+ case SBI_EXT_0_1_REMOTE_SFENCE_VMA:
+ case SBI_EXT_0_1_REMOTE_SFENCE_VMA_ASID:
+ if (cp->a0)
+ hmask = kvm_riscv_vcpu_unpriv_read(vcpu, false, cp->a0,
+ utrap);
+ else
+ hmask = (1UL << atomic_read(&kvm->online_vcpus)) - 1;
+ if (utrap->scause)
+ break;
+
+ if (cp->a7 == SBI_EXT_0_1_REMOTE_FENCE_I)
+ kvm_riscv_fence_i(vcpu->kvm, 0, hmask);
+ else if (cp->a7 == SBI_EXT_0_1_REMOTE_SFENCE_VMA) {
+ if (cp->a1 == 0 && cp->a2 == 0)
+ kvm_riscv_hfence_vvma_all(vcpu->kvm,
+ 0, hmask);
+ else
+ kvm_riscv_hfence_vvma_gva(vcpu->kvm,
+ 0, hmask,
+ cp->a1, cp->a2,
+ PAGE_SHIFT);
+ } else {
+ if (cp->a1 == 0 && cp->a2 == 0)
+ kvm_riscv_hfence_vvma_asid_all(vcpu->kvm,
+ 0, hmask,
+ cp->a3);
+ else
+ kvm_riscv_hfence_vvma_asid_gva(vcpu->kvm,
+ 0, hmask,
+ cp->a1, cp->a2,
+ PAGE_SHIFT,
+ cp->a3);
+ }
+ break;
+ default:
+ ret = -EINVAL;
+ break;
+ }
+
+ return ret;
+}
+
+const struct kvm_vcpu_sbi_extension vcpu_sbi_ext_v01 = {
+ .extid_start = SBI_EXT_0_1_SET_TIMER,
+ .extid_end = SBI_EXT_0_1_SHUTDOWN,
+ .handler = kvm_sbi_ext_v01_handler,
+};
diff --git a/arch/riscv/kvm/vcpu_switch.S b/arch/riscv/kvm/vcpu_switch.S
index 029a28a195c6..d74df8eb4d71 100644
--- a/arch/riscv/kvm/vcpu_switch.S
+++ b/arch/riscv/kvm/vcpu_switch.S
@@ -41,33 +41,37 @@ ENTRY(__kvm_riscv_switch_to)
REG_S s10, (KVM_ARCH_HOST_S10)(a0)
REG_S s11, (KVM_ARCH_HOST_S11)(a0)
- /* Save Host and Restore Guest SSTATUS */
+ /* Load Guest CSR values */
REG_L t0, (KVM_ARCH_GUEST_SSTATUS)(a0)
+ REG_L t1, (KVM_ARCH_GUEST_HSTATUS)(a0)
+ REG_L t2, (KVM_ARCH_GUEST_SCOUNTEREN)(a0)
+ la t4, __kvm_switch_return
+ REG_L t5, (KVM_ARCH_GUEST_SEPC)(a0)
+
+ /* Save Host and Restore Guest SSTATUS */
csrrw t0, CSR_SSTATUS, t0
- REG_S t0, (KVM_ARCH_HOST_SSTATUS)(a0)
/* Save Host and Restore Guest HSTATUS */
- REG_L t1, (KVM_ARCH_GUEST_HSTATUS)(a0)
csrrw t1, CSR_HSTATUS, t1
- REG_S t1, (KVM_ARCH_HOST_HSTATUS)(a0)
/* Save Host and Restore Guest SCOUNTEREN */
- REG_L t2, (KVM_ARCH_GUEST_SCOUNTEREN)(a0)
csrrw t2, CSR_SCOUNTEREN, t2
- REG_S t2, (KVM_ARCH_HOST_SCOUNTEREN)(a0)
-
- /* Save Host SSCRATCH and change it to struct kvm_vcpu_arch pointer */
- csrrw t3, CSR_SSCRATCH, a0
- REG_S t3, (KVM_ARCH_HOST_SSCRATCH)(a0)
/* Save Host STVEC and change it to return path */
- la t4, __kvm_switch_return
csrrw t4, CSR_STVEC, t4
- REG_S t4, (KVM_ARCH_HOST_STVEC)(a0)
+
+ /* Save Host SSCRATCH and change it to struct kvm_vcpu_arch pointer */
+ csrrw t3, CSR_SSCRATCH, a0
/* Restore Guest SEPC */
- REG_L t0, (KVM_ARCH_GUEST_SEPC)(a0)
- csrw CSR_SEPC, t0
+ csrw CSR_SEPC, t5
+
+ /* Store Host CSR values */
+ REG_S t0, (KVM_ARCH_HOST_SSTATUS)(a0)
+ REG_S t1, (KVM_ARCH_HOST_HSTATUS)(a0)
+ REG_S t2, (KVM_ARCH_HOST_SCOUNTEREN)(a0)
+ REG_S t3, (KVM_ARCH_HOST_SSCRATCH)(a0)
+ REG_S t4, (KVM_ARCH_HOST_STVEC)(a0)
/* Restore Guest GPRs (except A0) */
REG_L ra, (KVM_ARCH_GUEST_RA)(a0)
@@ -145,32 +149,36 @@ __kvm_switch_return:
REG_S t5, (KVM_ARCH_GUEST_T5)(a0)
REG_S t6, (KVM_ARCH_GUEST_T6)(a0)
+ /* Load Host CSR values */
+ REG_L t1, (KVM_ARCH_HOST_STVEC)(a0)
+ REG_L t2, (KVM_ARCH_HOST_SSCRATCH)(a0)
+ REG_L t3, (KVM_ARCH_HOST_SCOUNTEREN)(a0)
+ REG_L t4, (KVM_ARCH_HOST_HSTATUS)(a0)
+ REG_L t5, (KVM_ARCH_HOST_SSTATUS)(a0)
+
/* Save Guest SEPC */
csrr t0, CSR_SEPC
- REG_S t0, (KVM_ARCH_GUEST_SEPC)(a0)
-
- /* Restore Host STVEC */
- REG_L t1, (KVM_ARCH_HOST_STVEC)(a0)
- csrw CSR_STVEC, t1
/* Save Guest A0 and Restore Host SSCRATCH */
- REG_L t2, (KVM_ARCH_HOST_SSCRATCH)(a0)
csrrw t2, CSR_SSCRATCH, t2
- REG_S t2, (KVM_ARCH_GUEST_A0)(a0)
+
+ /* Restore Host STVEC */
+ csrw CSR_STVEC, t1
/* Save Guest and Restore Host SCOUNTEREN */
- REG_L t3, (KVM_ARCH_HOST_SCOUNTEREN)(a0)
csrrw t3, CSR_SCOUNTEREN, t3
- REG_S t3, (KVM_ARCH_GUEST_SCOUNTEREN)(a0)
/* Save Guest and Restore Host HSTATUS */
- REG_L t4, (KVM_ARCH_HOST_HSTATUS)(a0)
csrrw t4, CSR_HSTATUS, t4
- REG_S t4, (KVM_ARCH_GUEST_HSTATUS)(a0)
/* Save Guest and Restore Host SSTATUS */
- REG_L t5, (KVM_ARCH_HOST_SSTATUS)(a0)
csrrw t5, CSR_SSTATUS, t5
+
+ /* Store Guest CSR values */
+ REG_S t0, (KVM_ARCH_GUEST_SEPC)(a0)
+ REG_S t2, (KVM_ARCH_GUEST_A0)(a0)
+ REG_S t3, (KVM_ARCH_GUEST_SCOUNTEREN)(a0)
+ REG_S t4, (KVM_ARCH_GUEST_HSTATUS)(a0)
REG_S t5, (KVM_ARCH_GUEST_SSTATUS)(a0)
/* Restore Host GPRs (except A0 and T0-T6) */
diff --git a/arch/riscv/kvm/vcpu_timer.c b/arch/riscv/kvm/vcpu_timer.c
index 5c4c37ff2d48..ad34519c8a13 100644
--- a/arch/riscv/kvm/vcpu_timer.c
+++ b/arch/riscv/kvm/vcpu_timer.c
@@ -69,7 +69,18 @@ static int kvm_riscv_vcpu_timer_cancel(struct kvm_vcpu_timer *t)
return 0;
}
-int kvm_riscv_vcpu_timer_next_event(struct kvm_vcpu *vcpu, u64 ncycles)
+static int kvm_riscv_vcpu_update_vstimecmp(struct kvm_vcpu *vcpu, u64 ncycles)
+{
+#if defined(CONFIG_32BIT)
+ csr_write(CSR_VSTIMECMP, ncycles & 0xFFFFFFFF);
+ csr_write(CSR_VSTIMECMPH, ncycles >> 32);
+#else
+ csr_write(CSR_VSTIMECMP, ncycles);
+#endif
+ return 0;
+}
+
+static int kvm_riscv_vcpu_update_hrtimer(struct kvm_vcpu *vcpu, u64 ncycles)
{
struct kvm_vcpu_timer *t = &vcpu->arch.timer;
struct kvm_guest_timer *gt = &vcpu->kvm->arch.timer;
@@ -88,6 +99,65 @@ int kvm_riscv_vcpu_timer_next_event(struct kvm_vcpu *vcpu, u64 ncycles)
return 0;
}
+int kvm_riscv_vcpu_timer_next_event(struct kvm_vcpu *vcpu, u64 ncycles)
+{
+ struct kvm_vcpu_timer *t = &vcpu->arch.timer;
+
+ return t->timer_next_event(vcpu, ncycles);
+}
+
+static enum hrtimer_restart kvm_riscv_vcpu_vstimer_expired(struct hrtimer *h)
+{
+ u64 delta_ns;
+ struct kvm_vcpu_timer *t = container_of(h, struct kvm_vcpu_timer, hrt);
+ struct kvm_vcpu *vcpu = container_of(t, struct kvm_vcpu, arch.timer);
+ struct kvm_guest_timer *gt = &vcpu->kvm->arch.timer;
+
+ if (kvm_riscv_current_cycles(gt) < t->next_cycles) {
+ delta_ns = kvm_riscv_delta_cycles2ns(t->next_cycles, gt, t);
+ hrtimer_forward_now(&t->hrt, ktime_set(0, delta_ns));
+ return HRTIMER_RESTART;
+ }
+
+ t->next_set = false;
+ kvm_vcpu_kick(vcpu);
+
+ return HRTIMER_NORESTART;
+}
+
+bool kvm_riscv_vcpu_timer_pending(struct kvm_vcpu *vcpu)
+{
+ struct kvm_vcpu_timer *t = &vcpu->arch.timer;
+ struct kvm_guest_timer *gt = &vcpu->kvm->arch.timer;
+
+ if (!kvm_riscv_delta_cycles2ns(t->next_cycles, gt, t) ||
+ kvm_riscv_vcpu_has_interrupts(vcpu, 1UL << IRQ_VS_TIMER))
+ return true;
+ else
+ return false;
+}
+
+static void kvm_riscv_vcpu_timer_blocking(struct kvm_vcpu *vcpu)
+{
+ struct kvm_vcpu_timer *t = &vcpu->arch.timer;
+ struct kvm_guest_timer *gt = &vcpu->kvm->arch.timer;
+ u64 delta_ns;
+
+ if (!t->init_done)
+ return;
+
+ delta_ns = kvm_riscv_delta_cycles2ns(t->next_cycles, gt, t);
+ if (delta_ns) {
+ hrtimer_start(&t->hrt, ktime_set(0, delta_ns), HRTIMER_MODE_REL);
+ t->next_set = true;
+ }
+}
+
+static void kvm_riscv_vcpu_timer_unblocking(struct kvm_vcpu *vcpu)
+{
+ kvm_riscv_vcpu_timer_cancel(&vcpu->arch.timer);
+}
+
int kvm_riscv_vcpu_get_reg_timer(struct kvm_vcpu *vcpu,
const struct kvm_one_reg *reg)
{
@@ -180,10 +250,20 @@ int kvm_riscv_vcpu_timer_init(struct kvm_vcpu *vcpu)
return -EINVAL;
hrtimer_init(&t->hrt, CLOCK_MONOTONIC, HRTIMER_MODE_REL);
- t->hrt.function = kvm_riscv_vcpu_hrtimer_expired;
t->init_done = true;
t->next_set = false;
+ /* Enable sstc for every vcpu if available in hardware */
+ if (riscv_isa_extension_available(NULL, SSTC)) {
+ t->sstc_enabled = true;
+ t->hrt.function = kvm_riscv_vcpu_vstimer_expired;
+ t->timer_next_event = kvm_riscv_vcpu_update_vstimecmp;
+ } else {
+ t->sstc_enabled = false;
+ t->hrt.function = kvm_riscv_vcpu_hrtimer_expired;
+ t->timer_next_event = kvm_riscv_vcpu_update_hrtimer;
+ }
+
return 0;
}
@@ -199,27 +279,86 @@ int kvm_riscv_vcpu_timer_deinit(struct kvm_vcpu *vcpu)
int kvm_riscv_vcpu_timer_reset(struct kvm_vcpu *vcpu)
{
+ struct kvm_vcpu_timer *t = &vcpu->arch.timer;
+
+ t->next_cycles = -1ULL;
return kvm_riscv_vcpu_timer_cancel(&vcpu->arch.timer);
}
-void kvm_riscv_vcpu_timer_restore(struct kvm_vcpu *vcpu)
+static void kvm_riscv_vcpu_update_timedelta(struct kvm_vcpu *vcpu)
{
struct kvm_guest_timer *gt = &vcpu->kvm->arch.timer;
-#ifdef CONFIG_64BIT
- csr_write(CSR_HTIMEDELTA, gt->time_delta);
-#else
+#if defined(CONFIG_32BIT)
csr_write(CSR_HTIMEDELTA, (u32)(gt->time_delta));
csr_write(CSR_HTIMEDELTAH, (u32)(gt->time_delta >> 32));
+#else
+ csr_write(CSR_HTIMEDELTA, gt->time_delta);
+#endif
+}
+
+void kvm_riscv_vcpu_timer_restore(struct kvm_vcpu *vcpu)
+{
+ struct kvm_vcpu_timer *t = &vcpu->arch.timer;
+
+ kvm_riscv_vcpu_update_timedelta(vcpu);
+
+ if (!t->sstc_enabled)
+ return;
+
+#if defined(CONFIG_32BIT)
+ csr_write(CSR_VSTIMECMP, (u32)t->next_cycles);
+ csr_write(CSR_VSTIMECMPH, (u32)(t->next_cycles >> 32));
+#else
+ csr_write(CSR_VSTIMECMP, t->next_cycles);
+#endif
+
+ /* timer should be enabled for the remaining operations */
+ if (unlikely(!t->init_done))
+ return;
+
+ kvm_riscv_vcpu_timer_unblocking(vcpu);
+}
+
+void kvm_riscv_vcpu_timer_sync(struct kvm_vcpu *vcpu)
+{
+ struct kvm_vcpu_timer *t = &vcpu->arch.timer;
+
+ if (!t->sstc_enabled)
+ return;
+
+#if defined(CONFIG_32BIT)
+ t->next_cycles = csr_read(CSR_VSTIMECMP);
+ t->next_cycles |= (u64)csr_read(CSR_VSTIMECMPH) << 32;
+#else
+ t->next_cycles = csr_read(CSR_VSTIMECMP);
#endif
}
-int kvm_riscv_guest_timer_init(struct kvm *kvm)
+void kvm_riscv_vcpu_timer_save(struct kvm_vcpu *vcpu)
+{
+ struct kvm_vcpu_timer *t = &vcpu->arch.timer;
+
+ if (!t->sstc_enabled)
+ return;
+
+ /*
+ * The vstimecmp CSRs are saved by kvm_riscv_vcpu_timer_sync()
+ * upon every VM exit so no need to save here.
+ */
+
+ /* timer should be enabled for the remaining operations */
+ if (unlikely(!t->init_done))
+ return;
+
+ if (kvm_vcpu_is_blocking(vcpu))
+ kvm_riscv_vcpu_timer_blocking(vcpu);
+}
+
+void kvm_riscv_guest_timer_init(struct kvm *kvm)
{
struct kvm_guest_timer *gt = &kvm->arch.timer;
riscv_cs_get_mult_shift(&gt->nsec_mult, &gt->nsec_shift);
gt->time_delta = -get_cycles64();
-
- return 0;
}
diff --git a/arch/riscv/kvm/vm.c b/arch/riscv/kvm/vm.c
index fb18af34a4b5..65a964d7e70d 100644
--- a/arch/riscv/kvm/vm.c
+++ b/arch/riscv/kvm/vm.c
@@ -31,30 +31,24 @@ int kvm_arch_init_vm(struct kvm *kvm, unsigned long type)
{
int r;
- r = kvm_riscv_stage2_alloc_pgd(kvm);
+ r = kvm_riscv_gstage_alloc_pgd(kvm);
if (r)
return r;
- r = kvm_riscv_stage2_vmid_init(kvm);
+ r = kvm_riscv_gstage_vmid_init(kvm);
if (r) {
- kvm_riscv_stage2_free_pgd(kvm);
+ kvm_riscv_gstage_free_pgd(kvm);
return r;
}
- return kvm_riscv_guest_timer_init(kvm);
+ kvm_riscv_guest_timer_init(kvm);
+
+ return 0;
}
void kvm_arch_destroy_vm(struct kvm *kvm)
{
- int i;
-
- for (i = 0; i < KVM_MAX_VCPUS; ++i) {
- if (kvm->vcpus[i]) {
- kvm_vcpu_destroy(kvm->vcpus[i]);
- kvm->vcpus[i] = NULL;
- }
- }
- atomic_set(&kvm->online_vcpus, 0);
+ kvm_destroy_vcpus(kvm);
}
int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext)
@@ -82,6 +76,9 @@ int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext)
case KVM_CAP_NR_MEMSLOTS:
r = KVM_USER_MEM_SLOTS;
break;
+ case KVM_CAP_VM_GPA_BITS:
+ r = kvm_riscv_gstage_gpa_bits();
+ break;
default:
r = 0;
break;
diff --git a/arch/riscv/kvm/vmid.c b/arch/riscv/kvm/vmid.c
index 2c6253b293bc..6cd93995fb65 100644
--- a/arch/riscv/kvm/vmid.c
+++ b/arch/riscv/kvm/vmid.c
@@ -11,16 +11,16 @@
#include <linux/errno.h>
#include <linux/err.h>
#include <linux/module.h>
+#include <linux/smp.h>
#include <linux/kvm_host.h>
#include <asm/csr.h>
-#include <asm/sbi.h>
static unsigned long vmid_version = 1;
static unsigned long vmid_next;
static unsigned long vmid_bits;
static DEFINE_SPINLOCK(vmid_lock);
-void kvm_riscv_stage2_vmid_detect(void)
+void kvm_riscv_gstage_vmid_detect(void)
{
unsigned long old;
@@ -33,19 +33,19 @@ void kvm_riscv_stage2_vmid_detect(void)
csr_write(CSR_HGATP, old);
/* We polluted local TLB so flush all guest TLB */
- __kvm_riscv_hfence_gvma_all();
+ kvm_riscv_local_hfence_gvma_all();
/* We don't use VMID bits if they are not sufficient */
if ((1UL << vmid_bits) < num_possible_cpus())
vmid_bits = 0;
}
-unsigned long kvm_riscv_stage2_vmid_bits(void)
+unsigned long kvm_riscv_gstage_vmid_bits(void)
{
return vmid_bits;
}
-int kvm_riscv_stage2_vmid_init(struct kvm *kvm)
+int kvm_riscv_gstage_vmid_init(struct kvm *kvm)
{
/* Mark the initial VMID and VMID version invalid */
kvm->arch.vmid.vmid_version = 0;
@@ -54,7 +54,7 @@ int kvm_riscv_stage2_vmid_init(struct kvm *kvm)
return 0;
}
-bool kvm_riscv_stage2_vmid_ver_changed(struct kvm_vmid *vmid)
+bool kvm_riscv_gstage_vmid_ver_changed(struct kvm_vmid *vmid)
{
if (!vmid_bits)
return false;
@@ -63,14 +63,18 @@ bool kvm_riscv_stage2_vmid_ver_changed(struct kvm_vmid *vmid)
READ_ONCE(vmid_version));
}
-void kvm_riscv_stage2_vmid_update(struct kvm_vcpu *vcpu)
+static void __local_hfence_gvma_all(void *info)
{
- int i;
+ kvm_riscv_local_hfence_gvma_all();
+}
+
+void kvm_riscv_gstage_vmid_update(struct kvm_vcpu *vcpu)
+{
+ unsigned long i;
struct kvm_vcpu *v;
- struct cpumask hmask;
struct kvm_vmid *vmid = &vcpu->kvm->arch.vmid;
- if (!kvm_riscv_stage2_vmid_ver_changed(vmid))
+ if (!kvm_riscv_gstage_vmid_ver_changed(vmid))
return;
spin_lock(&vmid_lock);
@@ -79,7 +83,7 @@ void kvm_riscv_stage2_vmid_update(struct kvm_vcpu *vcpu)
* We need to re-check the vmid_version here to ensure that if
* another vcpu already allocated a valid vmid for this vm.
*/
- if (!kvm_riscv_stage2_vmid_ver_changed(vmid)) {
+ if (!kvm_riscv_gstage_vmid_ver_changed(vmid)) {
spin_unlock(&vmid_lock);
return;
}
@@ -93,17 +97,17 @@ void kvm_riscv_stage2_vmid_update(struct kvm_vcpu *vcpu)
* We ran out of VMIDs so we increment vmid_version and
* start assigning VMIDs from 1.
*
- * This also means existing VMIDs assignement to all Guest
+ * This also means existing VMIDs assignment to all Guest
* instances is invalid and we have force VMID re-assignement
* for all Guest instances. The Guest instances that were not
* running will automatically pick-up new VMIDs because will
- * call kvm_riscv_stage2_vmid_update() whenever they enter
+ * call kvm_riscv_gstage_vmid_update() whenever they enter
* in-kernel run loop. For Guest instances that are already
* running, we force VM exits on all host CPUs using IPI and
* flush all Guest TLBs.
*/
- riscv_cpuid_to_hartid_mask(cpu_online_mask, &hmask);
- sbi_remote_hfence_gvma(cpumask_bits(&hmask), 0, 0);
+ on_each_cpu_mask(cpu_online_mask, __local_hfence_gvma_all,
+ NULL, 1);
}
vmid->vmid = vmid_next;
@@ -114,7 +118,7 @@ void kvm_riscv_stage2_vmid_update(struct kvm_vcpu *vcpu)
spin_unlock(&vmid_lock);
- /* Request stage2 page table update for all VCPUs */
+ /* Request G-stage page table update for all VCPUs */
kvm_for_each_vcpu(i, v, vcpu->kvm)
kvm_make_request(KVM_REQ_UPDATE_HGATP, v);
}