aboutsummaryrefslogtreecommitdiffstatshomepage
path: root/arch/loongarch/kvm
diff options
context:
space:
mode:
Diffstat (limited to 'arch/loongarch/kvm')
-rw-r--r--arch/loongarch/kvm/Kconfig44
-rw-r--r--arch/loongarch/kvm/Makefile24
-rw-r--r--arch/loongarch/kvm/exit.c971
-rw-r--r--arch/loongarch/kvm/intc/eiointc.c1027
-rw-r--r--arch/loongarch/kvm/intc/ipi.c479
-rw-r--r--arch/loongarch/kvm/intc/pch_pic.c519
-rw-r--r--arch/loongarch/kvm/interrupt.c183
-rw-r--r--arch/loongarch/kvm/irqfd.c89
-rw-r--r--arch/loongarch/kvm/main.c463
-rw-r--r--arch/loongarch/kvm/mmu.c947
-rw-r--r--arch/loongarch/kvm/switch.S278
-rw-r--r--arch/loongarch/kvm/timer.c191
-rw-r--r--arch/loongarch/kvm/tlb.c29
-rw-r--r--arch/loongarch/kvm/trace.h174
-rw-r--r--arch/loongarch/kvm/vcpu.c1815
-rw-r--r--arch/loongarch/kvm/vm.c197
16 files changed, 7430 insertions, 0 deletions
diff --git a/arch/loongarch/kvm/Kconfig b/arch/loongarch/kvm/Kconfig
new file mode 100644
index 000000000000..40eea6da7c25
--- /dev/null
+++ b/arch/loongarch/kvm/Kconfig
@@ -0,0 +1,44 @@
+# SPDX-License-Identifier: GPL-2.0
+#
+# KVM configuration
+#
+
+source "virt/kvm/Kconfig"
+
+menuconfig VIRTUALIZATION
+ bool "Virtualization"
+ help
+ Say Y here to get to see options for using your Linux host to run
+ other operating systems inside virtual machines (guests).
+ This option alone does not add any kernel code.
+
+ If you say N, all options in this submenu will be skipped and
+ disabled.
+
+if VIRTUALIZATION
+
+config KVM
+ tristate "Kernel-based Virtual Machine (KVM) support"
+ depends on AS_HAS_LVZ_EXTENSION
+ select HAVE_KVM_DIRTY_RING_ACQ_REL
+ select HAVE_KVM_IRQ_ROUTING
+ select HAVE_KVM_IRQCHIP
+ select HAVE_KVM_MSI
+ select HAVE_KVM_READONLY_MEM
+ select HAVE_KVM_VCPU_ASYNC_IOCTL
+ select KVM_COMMON
+ select KVM_GENERIC_DIRTYLOG_READ_PROTECT
+ select KVM_GENERIC_HARDWARE_ENABLING
+ select KVM_GENERIC_MMU_NOTIFIER
+ select KVM_MMIO
+ select KVM_XFER_TO_GUEST_WORK
+ select SCHED_INFO
+ select GUEST_PERF_EVENTS if PERF_EVENTS
+ help
+ Support hosting virtualized guest machines using
+ hardware virtualization extensions. You will need
+ a processor equipped with virtualization extensions.
+
+ If unsure, say N.
+
+endif # VIRTUALIZATION
diff --git a/arch/loongarch/kvm/Makefile b/arch/loongarch/kvm/Makefile
new file mode 100644
index 000000000000..cb41d9265662
--- /dev/null
+++ b/arch/loongarch/kvm/Makefile
@@ -0,0 +1,24 @@
+# SPDX-License-Identifier: GPL-2.0
+#
+# Makefile for LoongArch KVM support
+#
+
+include $(srctree)/virt/kvm/Makefile.kvm
+
+obj-$(CONFIG_KVM) += kvm.o
+
+kvm-y += exit.o
+kvm-y += interrupt.o
+kvm-y += main.o
+kvm-y += mmu.o
+kvm-y += switch.o
+kvm-y += timer.o
+kvm-y += tlb.o
+kvm-y += vcpu.o
+kvm-y += vm.o
+kvm-y += intc/ipi.o
+kvm-y += intc/eiointc.o
+kvm-y += intc/pch_pic.o
+kvm-y += irqfd.o
+
+CFLAGS_exit.o += $(call cc-disable-warning, override-init)
diff --git a/arch/loongarch/kvm/exit.c b/arch/loongarch/kvm/exit.c
new file mode 100644
index 000000000000..fa52251b3bf1
--- /dev/null
+++ b/arch/loongarch/kvm/exit.c
@@ -0,0 +1,971 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Copyright (C) 2020-2023 Loongson Technology Corporation Limited
+ */
+
+#include <linux/err.h>
+#include <linux/errno.h>
+#include <linux/kvm_host.h>
+#include <linux/module.h>
+#include <linux/preempt.h>
+#include <linux/vmalloc.h>
+#include <trace/events/kvm.h>
+#include <asm/fpu.h>
+#include <asm/inst.h>
+#include <asm/loongarch.h>
+#include <asm/mmzone.h>
+#include <asm/numa.h>
+#include <asm/time.h>
+#include <asm/tlb.h>
+#include <asm/kvm_csr.h>
+#include <asm/kvm_vcpu.h>
+#include "trace.h"
+
+static int kvm_emu_cpucfg(struct kvm_vcpu *vcpu, larch_inst inst)
+{
+ int rd, rj;
+ unsigned int index, ret;
+
+ if (inst.reg2_format.opcode != cpucfg_op)
+ return EMULATE_FAIL;
+
+ rd = inst.reg2_format.rd;
+ rj = inst.reg2_format.rj;
+ ++vcpu->stat.cpucfg_exits;
+ index = vcpu->arch.gprs[rj];
+
+ /*
+ * By LoongArch Reference Manual 2.2.10.5
+ * Return value is 0 for undefined CPUCFG index
+ *
+ * Disable preemption since hw gcsr is accessed
+ */
+ preempt_disable();
+ switch (index) {
+ case 0 ... (KVM_MAX_CPUCFG_REGS - 1):
+ vcpu->arch.gprs[rd] = vcpu->arch.cpucfg[index];
+ break;
+ case CPUCFG_KVM_SIG:
+ /* CPUCFG emulation between 0x40000000 -- 0x400000ff */
+ vcpu->arch.gprs[rd] = *(unsigned int *)KVM_SIGNATURE;
+ break;
+ case CPUCFG_KVM_FEATURE:
+ ret = vcpu->kvm->arch.pv_features & LOONGARCH_PV_FEAT_MASK;
+ vcpu->arch.gprs[rd] = ret;
+ break;
+ default:
+ vcpu->arch.gprs[rd] = 0;
+ break;
+ }
+ preempt_enable();
+
+ return EMULATE_DONE;
+}
+
+static unsigned long kvm_emu_read_csr(struct kvm_vcpu *vcpu, int csrid)
+{
+ unsigned long val = 0;
+ struct loongarch_csrs *csr = vcpu->arch.csr;
+
+ /*
+ * From LoongArch Reference Manual Volume 1 Chapter 4.2.1
+ * For undefined CSR id, return value is 0
+ */
+ if (get_gcsr_flag(csrid) & SW_GCSR)
+ val = kvm_read_sw_gcsr(csr, csrid);
+ else
+ pr_warn_once("Unsupported csrrd 0x%x with pc %lx\n", csrid, vcpu->arch.pc);
+
+ return val;
+}
+
+static unsigned long kvm_emu_write_csr(struct kvm_vcpu *vcpu, int csrid, unsigned long val)
+{
+ unsigned long old = 0;
+ struct loongarch_csrs *csr = vcpu->arch.csr;
+
+ if (get_gcsr_flag(csrid) & SW_GCSR) {
+ old = kvm_read_sw_gcsr(csr, csrid);
+ kvm_write_sw_gcsr(csr, csrid, val);
+ } else
+ pr_warn_once("Unsupported csrwr 0x%x with pc %lx\n", csrid, vcpu->arch.pc);
+
+ return old;
+}
+
+static unsigned long kvm_emu_xchg_csr(struct kvm_vcpu *vcpu, int csrid,
+ unsigned long csr_mask, unsigned long val)
+{
+ unsigned long old = 0;
+ struct loongarch_csrs *csr = vcpu->arch.csr;
+
+ if (get_gcsr_flag(csrid) & SW_GCSR) {
+ old = kvm_read_sw_gcsr(csr, csrid);
+ val = (old & ~csr_mask) | (val & csr_mask);
+ kvm_write_sw_gcsr(csr, csrid, val);
+ old = old & csr_mask;
+ } else
+ pr_warn_once("Unsupported csrxchg 0x%x with pc %lx\n", csrid, vcpu->arch.pc);
+
+ return old;
+}
+
+static int kvm_handle_csr(struct kvm_vcpu *vcpu, larch_inst inst)
+{
+ unsigned int rd, rj, csrid;
+ unsigned long csr_mask, val = 0;
+
+ /*
+ * CSR value mask imm
+ * rj = 0 means csrrd
+ * rj = 1 means csrwr
+ * rj != 0,1 means csrxchg
+ */
+ rd = inst.reg2csr_format.rd;
+ rj = inst.reg2csr_format.rj;
+ csrid = inst.reg2csr_format.csr;
+
+ if (csrid >= LOONGARCH_CSR_PERFCTRL0 && csrid <= vcpu->arch.max_pmu_csrid) {
+ if (kvm_guest_has_pmu(&vcpu->arch)) {
+ vcpu->arch.pc -= 4;
+ kvm_make_request(KVM_REQ_PMU, vcpu);
+ return EMULATE_DONE;
+ }
+ }
+
+ /* Process CSR ops */
+ switch (rj) {
+ case 0: /* process csrrd */
+ val = kvm_emu_read_csr(vcpu, csrid);
+ vcpu->arch.gprs[rd] = val;
+ break;
+ case 1: /* process csrwr */
+ val = vcpu->arch.gprs[rd];
+ val = kvm_emu_write_csr(vcpu, csrid, val);
+ vcpu->arch.gprs[rd] = val;
+ break;
+ default: /* process csrxchg */
+ val = vcpu->arch.gprs[rd];
+ csr_mask = vcpu->arch.gprs[rj];
+ val = kvm_emu_xchg_csr(vcpu, csrid, csr_mask, val);
+ vcpu->arch.gprs[rd] = val;
+ }
+
+ return EMULATE_DONE;
+}
+
+int kvm_emu_iocsr(larch_inst inst, struct kvm_run *run, struct kvm_vcpu *vcpu)
+{
+ int idx, ret;
+ unsigned long *val;
+ u32 addr, rd, rj, opcode;
+
+ /*
+ * Each IOCSR with different opcode
+ */
+ rd = inst.reg2_format.rd;
+ rj = inst.reg2_format.rj;
+ opcode = inst.reg2_format.opcode;
+ addr = vcpu->arch.gprs[rj];
+ run->iocsr_io.phys_addr = addr;
+ run->iocsr_io.is_write = 0;
+ val = &vcpu->arch.gprs[rd];
+
+ /* LoongArch is Little endian */
+ switch (opcode) {
+ case iocsrrdb_op:
+ run->iocsr_io.len = 1;
+ break;
+ case iocsrrdh_op:
+ run->iocsr_io.len = 2;
+ break;
+ case iocsrrdw_op:
+ run->iocsr_io.len = 4;
+ break;
+ case iocsrrdd_op:
+ run->iocsr_io.len = 8;
+ break;
+ case iocsrwrb_op:
+ run->iocsr_io.len = 1;
+ run->iocsr_io.is_write = 1;
+ break;
+ case iocsrwrh_op:
+ run->iocsr_io.len = 2;
+ run->iocsr_io.is_write = 1;
+ break;
+ case iocsrwrw_op:
+ run->iocsr_io.len = 4;
+ run->iocsr_io.is_write = 1;
+ break;
+ case iocsrwrd_op:
+ run->iocsr_io.len = 8;
+ run->iocsr_io.is_write = 1;
+ break;
+ default:
+ return EMULATE_FAIL;
+ }
+
+ if (run->iocsr_io.is_write) {
+ idx = srcu_read_lock(&vcpu->kvm->srcu);
+ ret = kvm_io_bus_write(vcpu, KVM_IOCSR_BUS, addr, run->iocsr_io.len, val);
+ srcu_read_unlock(&vcpu->kvm->srcu, idx);
+ if (ret == 0)
+ ret = EMULATE_DONE;
+ else {
+ ret = EMULATE_DO_IOCSR;
+ /* Save data and let user space to write it */
+ memcpy(run->iocsr_io.data, val, run->iocsr_io.len);
+ }
+ trace_kvm_iocsr(KVM_TRACE_IOCSR_WRITE, run->iocsr_io.len, addr, val);
+ } else {
+ idx = srcu_read_lock(&vcpu->kvm->srcu);
+ ret = kvm_io_bus_read(vcpu, KVM_IOCSR_BUS, addr, run->iocsr_io.len, val);
+ srcu_read_unlock(&vcpu->kvm->srcu, idx);
+ if (ret == 0)
+ ret = EMULATE_DONE;
+ else {
+ ret = EMULATE_DO_IOCSR;
+ /* Save register id for iocsr read completion */
+ vcpu->arch.io_gpr = rd;
+ }
+ trace_kvm_iocsr(KVM_TRACE_IOCSR_READ, run->iocsr_io.len, addr, NULL);
+ }
+
+ return ret;
+}
+
+int kvm_complete_iocsr_read(struct kvm_vcpu *vcpu, struct kvm_run *run)
+{
+ enum emulation_result er = EMULATE_DONE;
+ unsigned long *gpr = &vcpu->arch.gprs[vcpu->arch.io_gpr];
+
+ switch (run->iocsr_io.len) {
+ case 1:
+ *gpr = *(s8 *)run->iocsr_io.data;
+ break;
+ case 2:
+ *gpr = *(s16 *)run->iocsr_io.data;
+ break;
+ case 4:
+ *gpr = *(s32 *)run->iocsr_io.data;
+ break;
+ case 8:
+ *gpr = *(s64 *)run->iocsr_io.data;
+ break;
+ default:
+ kvm_err("Bad IOCSR length: %d, addr is 0x%lx\n",
+ run->iocsr_io.len, vcpu->arch.badv);
+ er = EMULATE_FAIL;
+ break;
+ }
+
+ return er;
+}
+
+int kvm_emu_idle(struct kvm_vcpu *vcpu)
+{
+ ++vcpu->stat.idle_exits;
+ trace_kvm_exit_idle(vcpu, KVM_TRACE_EXIT_IDLE);
+
+ if (!kvm_arch_vcpu_runnable(vcpu))
+ kvm_vcpu_halt(vcpu);
+
+ return EMULATE_DONE;
+}
+
+static int kvm_trap_handle_gspr(struct kvm_vcpu *vcpu)
+{
+ unsigned long curr_pc;
+ larch_inst inst;
+ enum emulation_result er = EMULATE_DONE;
+ struct kvm_run *run = vcpu->run;
+
+ /* Fetch the instruction */
+ inst.word = vcpu->arch.badi;
+ curr_pc = vcpu->arch.pc;
+ update_pc(&vcpu->arch);
+
+ trace_kvm_exit_gspr(vcpu, inst.word);
+ er = EMULATE_FAIL;
+ switch (((inst.word >> 24) & 0xff)) {
+ case 0x0: /* CPUCFG GSPR */
+ er = kvm_emu_cpucfg(vcpu, inst);
+ break;
+ case 0x4: /* CSR{RD,WR,XCHG} GSPR */
+ er = kvm_handle_csr(vcpu, inst);
+ break;
+ case 0x6: /* Cache, Idle and IOCSR GSPR */
+ switch (((inst.word >> 22) & 0x3ff)) {
+ case 0x18: /* Cache GSPR */
+ er = EMULATE_DONE;
+ trace_kvm_exit_cache(vcpu, KVM_TRACE_EXIT_CACHE);
+ break;
+ case 0x19: /* Idle/IOCSR GSPR */
+ switch (((inst.word >> 15) & 0x1ffff)) {
+ case 0xc90: /* IOCSR GSPR */
+ er = kvm_emu_iocsr(inst, run, vcpu);
+ break;
+ case 0xc91: /* Idle GSPR */
+ er = kvm_emu_idle(vcpu);
+ break;
+ default:
+ er = EMULATE_FAIL;
+ break;
+ }
+ break;
+ default:
+ er = EMULATE_FAIL;
+ break;
+ }
+ break;
+ default:
+ er = EMULATE_FAIL;
+ break;
+ }
+
+ /* Rollback PC only if emulation was unsuccessful */
+ if (er == EMULATE_FAIL) {
+ kvm_err("[%#lx]%s: unsupported gspr instruction 0x%08x\n",
+ curr_pc, __func__, inst.word);
+
+ kvm_arch_vcpu_dump_regs(vcpu);
+ vcpu->arch.pc = curr_pc;
+ }
+
+ return er;
+}
+
+/*
+ * Trigger GSPR:
+ * 1) Execute CPUCFG instruction;
+ * 2) Execute CACOP/IDLE instructions;
+ * 3) Access to unimplemented CSRs/IOCSRs.
+ */
+static int kvm_handle_gspr(struct kvm_vcpu *vcpu, int ecode)
+{
+ int ret = RESUME_GUEST;
+ enum emulation_result er = EMULATE_DONE;
+
+ er = kvm_trap_handle_gspr(vcpu);
+
+ if (er == EMULATE_DONE) {
+ ret = RESUME_GUEST;
+ } else if (er == EMULATE_DO_MMIO) {
+ vcpu->run->exit_reason = KVM_EXIT_MMIO;
+ ret = RESUME_HOST;
+ } else if (er == EMULATE_DO_IOCSR) {
+ vcpu->run->exit_reason = KVM_EXIT_LOONGARCH_IOCSR;
+ ret = RESUME_HOST;
+ } else {
+ kvm_queue_exception(vcpu, EXCCODE_INE, 0);
+ ret = RESUME_GUEST;
+ }
+
+ return ret;
+}
+
+int kvm_emu_mmio_read(struct kvm_vcpu *vcpu, larch_inst inst)
+{
+ int idx, ret;
+ unsigned int op8, opcode, rd;
+ struct kvm_run *run = vcpu->run;
+
+ run->mmio.phys_addr = vcpu->arch.badv;
+ vcpu->mmio_needed = 2; /* signed */
+ op8 = (inst.word >> 24) & 0xff;
+ ret = EMULATE_DO_MMIO;
+
+ switch (op8) {
+ case 0x24 ... 0x27: /* ldptr.w/d process */
+ rd = inst.reg2i14_format.rd;
+ opcode = inst.reg2i14_format.opcode;
+
+ switch (opcode) {
+ case ldptrw_op:
+ run->mmio.len = 4;
+ break;
+ case ldptrd_op:
+ run->mmio.len = 8;
+ break;
+ default:
+ break;
+ }
+ break;
+ case 0x28 ... 0x2e: /* ld.b/h/w/d, ld.bu/hu/wu process */
+ rd = inst.reg2i12_format.rd;
+ opcode = inst.reg2i12_format.opcode;
+
+ switch (opcode) {
+ case ldb_op:
+ run->mmio.len = 1;
+ break;
+ case ldbu_op:
+ vcpu->mmio_needed = 1; /* unsigned */
+ run->mmio.len = 1;
+ break;
+ case ldh_op:
+ run->mmio.len = 2;
+ break;
+ case ldhu_op:
+ vcpu->mmio_needed = 1; /* unsigned */
+ run->mmio.len = 2;
+ break;
+ case ldw_op:
+ run->mmio.len = 4;
+ break;
+ case ldwu_op:
+ vcpu->mmio_needed = 1; /* unsigned */
+ run->mmio.len = 4;
+ break;
+ case ldd_op:
+ run->mmio.len = 8;
+ break;
+ default:
+ ret = EMULATE_FAIL;
+ break;
+ }
+ break;
+ case 0x38: /* ldx.b/h/w/d, ldx.bu/hu/wu process */
+ rd = inst.reg3_format.rd;
+ opcode = inst.reg3_format.opcode;
+
+ switch (opcode) {
+ case ldxb_op:
+ run->mmio.len = 1;
+ break;
+ case ldxbu_op:
+ run->mmio.len = 1;
+ vcpu->mmio_needed = 1; /* unsigned */
+ break;
+ case ldxh_op:
+ run->mmio.len = 2;
+ break;
+ case ldxhu_op:
+ run->mmio.len = 2;
+ vcpu->mmio_needed = 1; /* unsigned */
+ break;
+ case ldxw_op:
+ run->mmio.len = 4;
+ break;
+ case ldxwu_op:
+ run->mmio.len = 4;
+ vcpu->mmio_needed = 1; /* unsigned */
+ break;
+ case ldxd_op:
+ run->mmio.len = 8;
+ break;
+ default:
+ ret = EMULATE_FAIL;
+ break;
+ }
+ break;
+ default:
+ ret = EMULATE_FAIL;
+ }
+
+ if (ret == EMULATE_DO_MMIO) {
+ trace_kvm_mmio(KVM_TRACE_MMIO_READ, run->mmio.len, run->mmio.phys_addr, NULL);
+
+ /*
+ * If mmio device such as PCH-PIC is emulated in KVM,
+ * it need not return to user space to handle the mmio
+ * exception.
+ */
+ idx = srcu_read_lock(&vcpu->kvm->srcu);
+ ret = kvm_io_bus_read(vcpu, KVM_MMIO_BUS, vcpu->arch.badv,
+ run->mmio.len, &vcpu->arch.gprs[rd]);
+ srcu_read_unlock(&vcpu->kvm->srcu, idx);
+ if (!ret) {
+ update_pc(&vcpu->arch);
+ vcpu->mmio_needed = 0;
+ return EMULATE_DONE;
+ }
+
+ /* Set for kvm_complete_mmio_read() use */
+ vcpu->arch.io_gpr = rd;
+ run->mmio.is_write = 0;
+ vcpu->mmio_is_write = 0;
+ return EMULATE_DO_MMIO;
+ }
+
+ kvm_err("Read not supported Inst=0x%08x @%lx BadVaddr:%#lx\n",
+ inst.word, vcpu->arch.pc, vcpu->arch.badv);
+ kvm_arch_vcpu_dump_regs(vcpu);
+ vcpu->mmio_needed = 0;
+
+ return ret;
+}
+
+int kvm_complete_mmio_read(struct kvm_vcpu *vcpu, struct kvm_run *run)
+{
+ enum emulation_result er = EMULATE_DONE;
+ unsigned long *gpr = &vcpu->arch.gprs[vcpu->arch.io_gpr];
+
+ /* Update with new PC */
+ update_pc(&vcpu->arch);
+ switch (run->mmio.len) {
+ case 1:
+ if (vcpu->mmio_needed == 2)
+ *gpr = *(s8 *)run->mmio.data;
+ else
+ *gpr = *(u8 *)run->mmio.data;
+ break;
+ case 2:
+ if (vcpu->mmio_needed == 2)
+ *gpr = *(s16 *)run->mmio.data;
+ else
+ *gpr = *(u16 *)run->mmio.data;
+ break;
+ case 4:
+ if (vcpu->mmio_needed == 2)
+ *gpr = *(s32 *)run->mmio.data;
+ else
+ *gpr = *(u32 *)run->mmio.data;
+ break;
+ case 8:
+ *gpr = *(s64 *)run->mmio.data;
+ break;
+ default:
+ kvm_err("Bad MMIO length: %d, addr is 0x%lx\n",
+ run->mmio.len, vcpu->arch.badv);
+ er = EMULATE_FAIL;
+ break;
+ }
+
+ trace_kvm_mmio(KVM_TRACE_MMIO_READ, run->mmio.len,
+ run->mmio.phys_addr, run->mmio.data);
+
+ return er;
+}
+
+int kvm_emu_mmio_write(struct kvm_vcpu *vcpu, larch_inst inst)
+{
+ int idx, ret;
+ unsigned int rd, op8, opcode;
+ unsigned long curr_pc, rd_val = 0;
+ struct kvm_run *run = vcpu->run;
+ void *data = run->mmio.data;
+
+ /*
+ * Update PC and hold onto current PC in case there is
+ * an error and we want to rollback the PC
+ */
+ curr_pc = vcpu->arch.pc;
+ update_pc(&vcpu->arch);
+
+ op8 = (inst.word >> 24) & 0xff;
+ run->mmio.phys_addr = vcpu->arch.badv;
+ ret = EMULATE_DO_MMIO;
+ switch (op8) {
+ case 0x24 ... 0x27: /* stptr.w/d process */
+ rd = inst.reg2i14_format.rd;
+ opcode = inst.reg2i14_format.opcode;
+
+ switch (opcode) {
+ case stptrw_op:
+ run->mmio.len = 4;
+ *(unsigned int *)data = vcpu->arch.gprs[rd];
+ break;
+ case stptrd_op:
+ run->mmio.len = 8;
+ *(unsigned long *)data = vcpu->arch.gprs[rd];
+ break;
+ default:
+ ret = EMULATE_FAIL;
+ break;
+ }
+ break;
+ case 0x28 ... 0x2e: /* st.b/h/w/d process */
+ rd = inst.reg2i12_format.rd;
+ opcode = inst.reg2i12_format.opcode;
+ rd_val = vcpu->arch.gprs[rd];
+
+ switch (opcode) {
+ case stb_op:
+ run->mmio.len = 1;
+ *(unsigned char *)data = rd_val;
+ break;
+ case sth_op:
+ run->mmio.len = 2;
+ *(unsigned short *)data = rd_val;
+ break;
+ case stw_op:
+ run->mmio.len = 4;
+ *(unsigned int *)data = rd_val;
+ break;
+ case std_op:
+ run->mmio.len = 8;
+ *(unsigned long *)data = rd_val;
+ break;
+ default:
+ ret = EMULATE_FAIL;
+ break;
+ }
+ break;
+ case 0x38: /* stx.b/h/w/d process */
+ rd = inst.reg3_format.rd;
+ opcode = inst.reg3_format.opcode;
+
+ switch (opcode) {
+ case stxb_op:
+ run->mmio.len = 1;
+ *(unsigned char *)data = vcpu->arch.gprs[rd];
+ break;
+ case stxh_op:
+ run->mmio.len = 2;
+ *(unsigned short *)data = vcpu->arch.gprs[rd];
+ break;
+ case stxw_op:
+ run->mmio.len = 4;
+ *(unsigned int *)data = vcpu->arch.gprs[rd];
+ break;
+ case stxd_op:
+ run->mmio.len = 8;
+ *(unsigned long *)data = vcpu->arch.gprs[rd];
+ break;
+ default:
+ ret = EMULATE_FAIL;
+ break;
+ }
+ break;
+ default:
+ ret = EMULATE_FAIL;
+ }
+
+ if (ret == EMULATE_DO_MMIO) {
+ trace_kvm_mmio(KVM_TRACE_MMIO_WRITE, run->mmio.len, run->mmio.phys_addr, data);
+
+ /*
+ * If mmio device such as PCH-PIC is emulated in KVM,
+ * it need not return to user space to handle the mmio
+ * exception.
+ */
+ idx = srcu_read_lock(&vcpu->kvm->srcu);
+ ret = kvm_io_bus_write(vcpu, KVM_MMIO_BUS, vcpu->arch.badv, run->mmio.len, data);
+ srcu_read_unlock(&vcpu->kvm->srcu, idx);
+ if (!ret)
+ return EMULATE_DONE;
+
+ run->mmio.is_write = 1;
+ vcpu->mmio_needed = 1;
+ vcpu->mmio_is_write = 1;
+ return EMULATE_DO_MMIO;
+ }
+
+ vcpu->arch.pc = curr_pc;
+ kvm_err("Write not supported Inst=0x%08x @%lx BadVaddr:%#lx\n",
+ inst.word, vcpu->arch.pc, vcpu->arch.badv);
+ kvm_arch_vcpu_dump_regs(vcpu);
+ /* Rollback PC if emulation was unsuccessful */
+
+ return ret;
+}
+
+static int kvm_handle_rdwr_fault(struct kvm_vcpu *vcpu, bool write, int ecode)
+{
+ int ret;
+ larch_inst inst;
+ enum emulation_result er = EMULATE_DONE;
+ struct kvm_run *run = vcpu->run;
+ unsigned long badv = vcpu->arch.badv;
+
+ /* Inject ADE exception if exceed max GPA size */
+ if (unlikely(badv >= vcpu->kvm->arch.gpa_size)) {
+ kvm_queue_exception(vcpu, EXCCODE_ADE, EXSUBCODE_ADEM);
+ return RESUME_GUEST;
+ }
+
+ ret = kvm_handle_mm_fault(vcpu, badv, write, ecode);
+ if (ret) {
+ /* Treat as MMIO */
+ inst.word = vcpu->arch.badi;
+ if (write) {
+ er = kvm_emu_mmio_write(vcpu, inst);
+ } else {
+ /* A code fetch fault doesn't count as an MMIO */
+ if (kvm_is_ifetch_fault(&vcpu->arch)) {
+ kvm_queue_exception(vcpu, EXCCODE_ADE, EXSUBCODE_ADEF);
+ return RESUME_GUEST;
+ }
+
+ er = kvm_emu_mmio_read(vcpu, inst);
+ }
+ }
+
+ if (er == EMULATE_DONE) {
+ ret = RESUME_GUEST;
+ } else if (er == EMULATE_DO_MMIO) {
+ run->exit_reason = KVM_EXIT_MMIO;
+ ret = RESUME_HOST;
+ } else {
+ kvm_queue_exception(vcpu, EXCCODE_ADE, EXSUBCODE_ADEM);
+ ret = RESUME_GUEST;
+ }
+
+ return ret;
+}
+
+static int kvm_handle_read_fault(struct kvm_vcpu *vcpu, int ecode)
+{
+ return kvm_handle_rdwr_fault(vcpu, false, ecode);
+}
+
+static int kvm_handle_write_fault(struct kvm_vcpu *vcpu, int ecode)
+{
+ return kvm_handle_rdwr_fault(vcpu, true, ecode);
+}
+
+int kvm_complete_user_service(struct kvm_vcpu *vcpu, struct kvm_run *run)
+{
+ update_pc(&vcpu->arch);
+ kvm_write_reg(vcpu, LOONGARCH_GPR_A0, run->hypercall.ret);
+
+ return 0;
+}
+
+/**
+ * kvm_handle_fpu_disabled() - Guest used fpu however it is disabled at host
+ * @vcpu: Virtual CPU context.
+ * @ecode: Exception code.
+ *
+ * Handle when the guest attempts to use fpu which hasn't been allowed
+ * by the root context.
+ */
+static int kvm_handle_fpu_disabled(struct kvm_vcpu *vcpu, int ecode)
+{
+ struct kvm_run *run = vcpu->run;
+
+ if (!kvm_guest_has_fpu(&vcpu->arch)) {
+ kvm_queue_exception(vcpu, EXCCODE_INE, 0);
+ return RESUME_GUEST;
+ }
+
+ /*
+ * If guest FPU not present, the FPU operation should have been
+ * treated as a reserved instruction!
+ * If FPU already in use, we shouldn't get this at all.
+ */
+ if (WARN_ON(vcpu->arch.aux_inuse & KVM_LARCH_FPU)) {
+ kvm_err("%s internal error\n", __func__);
+ run->exit_reason = KVM_EXIT_INTERNAL_ERROR;
+ return RESUME_HOST;
+ }
+
+ kvm_own_fpu(vcpu);
+
+ return RESUME_GUEST;
+}
+
+static long kvm_save_notify(struct kvm_vcpu *vcpu)
+{
+ unsigned long id, data;
+
+ id = kvm_read_reg(vcpu, LOONGARCH_GPR_A1);
+ data = kvm_read_reg(vcpu, LOONGARCH_GPR_A2);
+ switch (id) {
+ case BIT(KVM_FEATURE_STEAL_TIME):
+ if (data & ~(KVM_STEAL_PHYS_MASK | KVM_STEAL_PHYS_VALID))
+ return KVM_HCALL_INVALID_PARAMETER;
+
+ vcpu->arch.st.guest_addr = data;
+ if (!(data & KVM_STEAL_PHYS_VALID))
+ return 0;
+
+ vcpu->arch.st.last_steal = current->sched_info.run_delay;
+ kvm_make_request(KVM_REQ_STEAL_UPDATE, vcpu);
+ return 0;
+ default:
+ return KVM_HCALL_INVALID_CODE;
+ };
+
+ return KVM_HCALL_INVALID_CODE;
+};
+
+/*
+ * kvm_handle_lsx_disabled() - Guest used LSX while disabled in root.
+ * @vcpu: Virtual CPU context.
+ * @ecode: Exception code.
+ *
+ * Handle when the guest attempts to use LSX when it is disabled in the root
+ * context.
+ */
+static int kvm_handle_lsx_disabled(struct kvm_vcpu *vcpu, int ecode)
+{
+ if (kvm_own_lsx(vcpu))
+ kvm_queue_exception(vcpu, EXCCODE_INE, 0);
+
+ return RESUME_GUEST;
+}
+
+/*
+ * kvm_handle_lasx_disabled() - Guest used LASX while disabled in root.
+ * @vcpu: Virtual CPU context.
+ * @ecode: Exception code.
+ *
+ * Handle when the guest attempts to use LASX when it is disabled in the root
+ * context.
+ */
+static int kvm_handle_lasx_disabled(struct kvm_vcpu *vcpu, int ecode)
+{
+ if (kvm_own_lasx(vcpu))
+ kvm_queue_exception(vcpu, EXCCODE_INE, 0);
+
+ return RESUME_GUEST;
+}
+
+static int kvm_handle_lbt_disabled(struct kvm_vcpu *vcpu, int ecode)
+{
+ if (kvm_own_lbt(vcpu))
+ kvm_queue_exception(vcpu, EXCCODE_INE, 0);
+
+ return RESUME_GUEST;
+}
+
+static int kvm_send_pv_ipi(struct kvm_vcpu *vcpu)
+{
+ unsigned int min, cpu, i;
+ unsigned long ipi_bitmap;
+ struct kvm_vcpu *dest;
+
+ min = kvm_read_reg(vcpu, LOONGARCH_GPR_A3);
+ for (i = 0; i < 2; i++, min += BITS_PER_LONG) {
+ ipi_bitmap = kvm_read_reg(vcpu, LOONGARCH_GPR_A1 + i);
+ if (!ipi_bitmap)
+ continue;
+
+ cpu = find_first_bit((void *)&ipi_bitmap, BITS_PER_LONG);
+ while (cpu < BITS_PER_LONG) {
+ dest = kvm_get_vcpu_by_cpuid(vcpu->kvm, cpu + min);
+ cpu = find_next_bit((void *)&ipi_bitmap, BITS_PER_LONG, cpu + 1);
+ if (!dest)
+ continue;
+
+ /* Send SWI0 to dest vcpu to emulate IPI interrupt */
+ kvm_queue_irq(dest, INT_SWI0);
+ kvm_vcpu_kick(dest);
+ }
+ }
+
+ return 0;
+}
+
+/*
+ * Hypercall emulation always return to guest, Caller should check retval.
+ */
+static void kvm_handle_service(struct kvm_vcpu *vcpu)
+{
+ long ret = KVM_HCALL_INVALID_CODE;
+ unsigned long func = kvm_read_reg(vcpu, LOONGARCH_GPR_A0);
+
+ switch (func) {
+ case KVM_HCALL_FUNC_IPI:
+ if (kvm_guest_has_pv_feature(vcpu, KVM_FEATURE_IPI)) {
+ kvm_send_pv_ipi(vcpu);
+ ret = KVM_HCALL_SUCCESS;
+ }
+ break;
+ case KVM_HCALL_FUNC_NOTIFY:
+ if (kvm_guest_has_pv_feature(vcpu, KVM_FEATURE_STEAL_TIME))
+ ret = kvm_save_notify(vcpu);
+ break;
+ default:
+ break;
+ }
+
+ kvm_write_reg(vcpu, LOONGARCH_GPR_A0, ret);
+}
+
+static int kvm_handle_hypercall(struct kvm_vcpu *vcpu, int ecode)
+{
+ int ret;
+ larch_inst inst;
+ unsigned int code;
+
+ inst.word = vcpu->arch.badi;
+ code = inst.reg0i15_format.immediate;
+ ret = RESUME_GUEST;
+
+ switch (code) {
+ case KVM_HCALL_SERVICE:
+ vcpu->stat.hypercall_exits++;
+ kvm_handle_service(vcpu);
+ break;
+ case KVM_HCALL_USER_SERVICE:
+ if (!kvm_guest_has_pv_feature(vcpu, KVM_FEATURE_USER_HCALL)) {
+ kvm_write_reg(vcpu, LOONGARCH_GPR_A0, KVM_HCALL_INVALID_CODE);
+ break;
+ }
+
+ vcpu->stat.hypercall_exits++;
+ vcpu->run->exit_reason = KVM_EXIT_HYPERCALL;
+ vcpu->run->hypercall.nr = KVM_HCALL_USER_SERVICE;
+ vcpu->run->hypercall.args[0] = kvm_read_reg(vcpu, LOONGARCH_GPR_A0);
+ vcpu->run->hypercall.args[1] = kvm_read_reg(vcpu, LOONGARCH_GPR_A1);
+ vcpu->run->hypercall.args[2] = kvm_read_reg(vcpu, LOONGARCH_GPR_A2);
+ vcpu->run->hypercall.args[3] = kvm_read_reg(vcpu, LOONGARCH_GPR_A3);
+ vcpu->run->hypercall.args[4] = kvm_read_reg(vcpu, LOONGARCH_GPR_A4);
+ vcpu->run->hypercall.args[5] = kvm_read_reg(vcpu, LOONGARCH_GPR_A5);
+ vcpu->run->hypercall.flags = 0;
+ /*
+ * Set invalid return value by default, let user-mode VMM modify it.
+ */
+ vcpu->run->hypercall.ret = KVM_HCALL_INVALID_CODE;
+ ret = RESUME_HOST;
+ break;
+ case KVM_HCALL_SWDBG:
+ /* KVM_HCALL_SWDBG only in effective when SW_BP is enabled */
+ if (vcpu->guest_debug & KVM_GUESTDBG_SW_BP_MASK) {
+ vcpu->run->exit_reason = KVM_EXIT_DEBUG;
+ ret = RESUME_HOST;
+ break;
+ }
+ fallthrough;
+ default:
+ /* Treat it as noop intruction, only set return value */
+ kvm_write_reg(vcpu, LOONGARCH_GPR_A0, KVM_HCALL_INVALID_CODE);
+ break;
+ }
+
+ if (ret == RESUME_GUEST)
+ update_pc(&vcpu->arch);
+
+ return ret;
+}
+
+/*
+ * LoongArch KVM callback handling for unimplemented guest exiting
+ */
+static int kvm_fault_ni(struct kvm_vcpu *vcpu, int ecode)
+{
+ unsigned int inst;
+ unsigned long badv;
+
+ /* Fetch the instruction */
+ inst = vcpu->arch.badi;
+ badv = vcpu->arch.badv;
+ kvm_err("ECode: %d PC=%#lx Inst=0x%08x BadVaddr=%#lx ESTAT=%#lx\n",
+ ecode, vcpu->arch.pc, inst, badv, read_gcsr_estat());
+ kvm_arch_vcpu_dump_regs(vcpu);
+ kvm_queue_exception(vcpu, EXCCODE_INE, 0);
+
+ return RESUME_GUEST;
+}
+
+static exit_handle_fn kvm_fault_tables[EXCCODE_INT_START] = {
+ [0 ... EXCCODE_INT_START - 1] = kvm_fault_ni,
+ [EXCCODE_TLBI] = kvm_handle_read_fault,
+ [EXCCODE_TLBL] = kvm_handle_read_fault,
+ [EXCCODE_TLBS] = kvm_handle_write_fault,
+ [EXCCODE_TLBM] = kvm_handle_write_fault,
+ [EXCCODE_FPDIS] = kvm_handle_fpu_disabled,
+ [EXCCODE_LSXDIS] = kvm_handle_lsx_disabled,
+ [EXCCODE_LASXDIS] = kvm_handle_lasx_disabled,
+ [EXCCODE_BTDIS] = kvm_handle_lbt_disabled,
+ [EXCCODE_GSPR] = kvm_handle_gspr,
+ [EXCCODE_HVC] = kvm_handle_hypercall,
+};
+
+int kvm_handle_fault(struct kvm_vcpu *vcpu, int fault)
+{
+ return kvm_fault_tables[fault](vcpu, fault);
+}
diff --git a/arch/loongarch/kvm/intc/eiointc.c b/arch/loongarch/kvm/intc/eiointc.c
new file mode 100644
index 000000000000..f39929d7bf8a
--- /dev/null
+++ b/arch/loongarch/kvm/intc/eiointc.c
@@ -0,0 +1,1027 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Copyright (C) 2024 Loongson Technology Corporation Limited
+ */
+
+#include <asm/kvm_eiointc.h>
+#include <asm/kvm_vcpu.h>
+#include <linux/count_zeros.h>
+
+static void eiointc_set_sw_coreisr(struct loongarch_eiointc *s)
+{
+ int ipnum, cpu, irq_index, irq_mask, irq;
+
+ for (irq = 0; irq < EIOINTC_IRQS; irq++) {
+ ipnum = s->ipmap.reg_u8[irq / 32];
+ if (!(s->status & BIT(EIOINTC_ENABLE_INT_ENCODE))) {
+ ipnum = count_trailing_zeros(ipnum);
+ ipnum = (ipnum >= 0 && ipnum < 4) ? ipnum : 0;
+ }
+ irq_index = irq / 32;
+ irq_mask = BIT(irq & 0x1f);
+
+ cpu = s->coremap.reg_u8[irq];
+ if (!!(s->coreisr.reg_u32[cpu][irq_index] & irq_mask))
+ set_bit(irq, s->sw_coreisr[cpu][ipnum]);
+ else
+ clear_bit(irq, s->sw_coreisr[cpu][ipnum]);
+ }
+}
+
+static void eiointc_update_irq(struct loongarch_eiointc *s, int irq, int level)
+{
+ int ipnum, cpu, found, irq_index, irq_mask;
+ struct kvm_vcpu *vcpu;
+ struct kvm_interrupt vcpu_irq;
+
+ ipnum = s->ipmap.reg_u8[irq / 32];
+ if (!(s->status & BIT(EIOINTC_ENABLE_INT_ENCODE))) {
+ ipnum = count_trailing_zeros(ipnum);
+ ipnum = (ipnum >= 0 && ipnum < 4) ? ipnum : 0;
+ }
+
+ cpu = s->sw_coremap[irq];
+ vcpu = kvm_get_vcpu(s->kvm, cpu);
+ irq_index = irq / 32;
+ irq_mask = BIT(irq & 0x1f);
+
+ if (level) {
+ /* if not enable return false */
+ if (((s->enable.reg_u32[irq_index]) & irq_mask) == 0)
+ return;
+ s->coreisr.reg_u32[cpu][irq_index] |= irq_mask;
+ found = find_first_bit(s->sw_coreisr[cpu][ipnum], EIOINTC_IRQS);
+ set_bit(irq, s->sw_coreisr[cpu][ipnum]);
+ } else {
+ s->coreisr.reg_u32[cpu][irq_index] &= ~irq_mask;
+ clear_bit(irq, s->sw_coreisr[cpu][ipnum]);
+ found = find_first_bit(s->sw_coreisr[cpu][ipnum], EIOINTC_IRQS);
+ }
+
+ if (found < EIOINTC_IRQS)
+ return; /* other irq is handling, needn't update parent irq */
+
+ vcpu_irq.irq = level ? (INT_HWI0 + ipnum) : -(INT_HWI0 + ipnum);
+ kvm_vcpu_ioctl_interrupt(vcpu, &vcpu_irq);
+}
+
+static inline void eiointc_update_sw_coremap(struct loongarch_eiointc *s,
+ int irq, void *pvalue, u32 len, bool notify)
+{
+ int i, cpu;
+ u64 val = *(u64 *)pvalue;
+
+ for (i = 0; i < len; i++) {
+ cpu = val & 0xff;
+ val = val >> 8;
+
+ if (!(s->status & BIT(EIOINTC_ENABLE_CPU_ENCODE))) {
+ cpu = ffs(cpu) - 1;
+ cpu = (cpu >= 4) ? 0 : cpu;
+ }
+
+ if (s->sw_coremap[irq + i] == cpu)
+ continue;
+
+ if (notify && test_bit(irq + i, (unsigned long *)s->isr.reg_u8)) {
+ /* lower irq at old cpu and raise irq at new cpu */
+ eiointc_update_irq(s, irq + i, 0);
+ s->sw_coremap[irq + i] = cpu;
+ eiointc_update_irq(s, irq + i, 1);
+ } else {
+ s->sw_coremap[irq + i] = cpu;
+ }
+ }
+}
+
+void eiointc_set_irq(struct loongarch_eiointc *s, int irq, int level)
+{
+ unsigned long flags;
+ unsigned long *isr = (unsigned long *)s->isr.reg_u8;
+
+ level ? set_bit(irq, isr) : clear_bit(irq, isr);
+ spin_lock_irqsave(&s->lock, flags);
+ eiointc_update_irq(s, irq, level);
+ spin_unlock_irqrestore(&s->lock, flags);
+}
+
+static inline void eiointc_enable_irq(struct kvm_vcpu *vcpu,
+ struct loongarch_eiointc *s, int index, u8 mask, int level)
+{
+ u8 val;
+ int irq;
+
+ val = mask & s->isr.reg_u8[index];
+ irq = ffs(val);
+ while (irq != 0) {
+ /*
+ * enable bit change from 0 to 1,
+ * need to update irq by pending bits
+ */
+ eiointc_update_irq(s, irq - 1 + index * 8, level);
+ val &= ~BIT(irq - 1);
+ irq = ffs(val);
+ }
+}
+
+static int loongarch_eiointc_readb(struct kvm_vcpu *vcpu, struct loongarch_eiointc *s,
+ gpa_t addr, int len, void *val)
+{
+ int index, ret = 0;
+ u8 data = 0;
+ gpa_t offset;
+
+ offset = addr - EIOINTC_BASE;
+ switch (offset) {
+ case EIOINTC_NODETYPE_START ... EIOINTC_NODETYPE_END:
+ index = offset - EIOINTC_NODETYPE_START;
+ data = s->nodetype.reg_u8[index];
+ break;
+ case EIOINTC_IPMAP_START ... EIOINTC_IPMAP_END:
+ index = offset - EIOINTC_IPMAP_START;
+ data = s->ipmap.reg_u8[index];
+ break;
+ case EIOINTC_ENABLE_START ... EIOINTC_ENABLE_END:
+ index = offset - EIOINTC_ENABLE_START;
+ data = s->enable.reg_u8[index];
+ break;
+ case EIOINTC_BOUNCE_START ... EIOINTC_BOUNCE_END:
+ index = offset - EIOINTC_BOUNCE_START;
+ data = s->bounce.reg_u8[index];
+ break;
+ case EIOINTC_COREISR_START ... EIOINTC_COREISR_END:
+ index = offset - EIOINTC_COREISR_START;
+ data = s->coreisr.reg_u8[vcpu->vcpu_id][index];
+ break;
+ case EIOINTC_COREMAP_START ... EIOINTC_COREMAP_END:
+ index = offset - EIOINTC_COREMAP_START;
+ data = s->coremap.reg_u8[index];
+ break;
+ default:
+ ret = -EINVAL;
+ break;
+ }
+ *(u8 *)val = data;
+
+ return ret;
+}
+
+static int loongarch_eiointc_readw(struct kvm_vcpu *vcpu, struct loongarch_eiointc *s,
+ gpa_t addr, int len, void *val)
+{
+ int index, ret = 0;
+ u16 data = 0;
+ gpa_t offset;
+
+ offset = addr - EIOINTC_BASE;
+ switch (offset) {
+ case EIOINTC_NODETYPE_START ... EIOINTC_NODETYPE_END:
+ index = (offset - EIOINTC_NODETYPE_START) >> 1;
+ data = s->nodetype.reg_u16[index];
+ break;
+ case EIOINTC_IPMAP_START ... EIOINTC_IPMAP_END:
+ index = (offset - EIOINTC_IPMAP_START) >> 1;
+ data = s->ipmap.reg_u16[index];
+ break;
+ case EIOINTC_ENABLE_START ... EIOINTC_ENABLE_END:
+ index = (offset - EIOINTC_ENABLE_START) >> 1;
+ data = s->enable.reg_u16[index];
+ break;
+ case EIOINTC_BOUNCE_START ... EIOINTC_BOUNCE_END:
+ index = (offset - EIOINTC_BOUNCE_START) >> 1;
+ data = s->bounce.reg_u16[index];
+ break;
+ case EIOINTC_COREISR_START ... EIOINTC_COREISR_END:
+ index = (offset - EIOINTC_COREISR_START) >> 1;
+ data = s->coreisr.reg_u16[vcpu->vcpu_id][index];
+ break;
+ case EIOINTC_COREMAP_START ... EIOINTC_COREMAP_END:
+ index = (offset - EIOINTC_COREMAP_START) >> 1;
+ data = s->coremap.reg_u16[index];
+ break;
+ default:
+ ret = -EINVAL;
+ break;
+ }
+ *(u16 *)val = data;
+
+ return ret;
+}
+
+static int loongarch_eiointc_readl(struct kvm_vcpu *vcpu, struct loongarch_eiointc *s,
+ gpa_t addr, int len, void *val)
+{
+ int index, ret = 0;
+ u32 data = 0;
+ gpa_t offset;
+
+ offset = addr - EIOINTC_BASE;
+ switch (offset) {
+ case EIOINTC_NODETYPE_START ... EIOINTC_NODETYPE_END:
+ index = (offset - EIOINTC_NODETYPE_START) >> 2;
+ data = s->nodetype.reg_u32[index];
+ break;
+ case EIOINTC_IPMAP_START ... EIOINTC_IPMAP_END:
+ index = (offset - EIOINTC_IPMAP_START) >> 2;
+ data = s->ipmap.reg_u32[index];
+ break;
+ case EIOINTC_ENABLE_START ... EIOINTC_ENABLE_END:
+ index = (offset - EIOINTC_ENABLE_START) >> 2;
+ data = s->enable.reg_u32[index];
+ break;
+ case EIOINTC_BOUNCE_START ... EIOINTC_BOUNCE_END:
+ index = (offset - EIOINTC_BOUNCE_START) >> 2;
+ data = s->bounce.reg_u32[index];
+ break;
+ case EIOINTC_COREISR_START ... EIOINTC_COREISR_END:
+ index = (offset - EIOINTC_COREISR_START) >> 2;
+ data = s->coreisr.reg_u32[vcpu->vcpu_id][index];
+ break;
+ case EIOINTC_COREMAP_START ... EIOINTC_COREMAP_END:
+ index = (offset - EIOINTC_COREMAP_START) >> 2;
+ data = s->coremap.reg_u32[index];
+ break;
+ default:
+ ret = -EINVAL;
+ break;
+ }
+ *(u32 *)val = data;
+
+ return ret;
+}
+
+static int loongarch_eiointc_readq(struct kvm_vcpu *vcpu, struct loongarch_eiointc *s,
+ gpa_t addr, int len, void *val)
+{
+ int index, ret = 0;
+ u64 data = 0;
+ gpa_t offset;
+
+ offset = addr - EIOINTC_BASE;
+ switch (offset) {
+ case EIOINTC_NODETYPE_START ... EIOINTC_NODETYPE_END:
+ index = (offset - EIOINTC_NODETYPE_START) >> 3;
+ data = s->nodetype.reg_u64[index];
+ break;
+ case EIOINTC_IPMAP_START ... EIOINTC_IPMAP_END:
+ index = (offset - EIOINTC_IPMAP_START) >> 3;
+ data = s->ipmap.reg_u64;
+ break;
+ case EIOINTC_ENABLE_START ... EIOINTC_ENABLE_END:
+ index = (offset - EIOINTC_ENABLE_START) >> 3;
+ data = s->enable.reg_u64[index];
+ break;
+ case EIOINTC_BOUNCE_START ... EIOINTC_BOUNCE_END:
+ index = (offset - EIOINTC_BOUNCE_START) >> 3;
+ data = s->bounce.reg_u64[index];
+ break;
+ case EIOINTC_COREISR_START ... EIOINTC_COREISR_END:
+ index = (offset - EIOINTC_COREISR_START) >> 3;
+ data = s->coreisr.reg_u64[vcpu->vcpu_id][index];
+ break;
+ case EIOINTC_COREMAP_START ... EIOINTC_COREMAP_END:
+ index = (offset - EIOINTC_COREMAP_START) >> 3;
+ data = s->coremap.reg_u64[index];
+ break;
+ default:
+ ret = -EINVAL;
+ break;
+ }
+ *(u64 *)val = data;
+
+ return ret;
+}
+
+static int kvm_eiointc_read(struct kvm_vcpu *vcpu,
+ struct kvm_io_device *dev,
+ gpa_t addr, int len, void *val)
+{
+ int ret = -EINVAL;
+ unsigned long flags;
+ struct loongarch_eiointc *eiointc = vcpu->kvm->arch.eiointc;
+
+ if (!eiointc) {
+ kvm_err("%s: eiointc irqchip not valid!\n", __func__);
+ return -EINVAL;
+ }
+
+ vcpu->kvm->stat.eiointc_read_exits++;
+ spin_lock_irqsave(&eiointc->lock, flags);
+ switch (len) {
+ case 1:
+ ret = loongarch_eiointc_readb(vcpu, eiointc, addr, len, val);
+ break;
+ case 2:
+ ret = loongarch_eiointc_readw(vcpu, eiointc, addr, len, val);
+ break;
+ case 4:
+ ret = loongarch_eiointc_readl(vcpu, eiointc, addr, len, val);
+ break;
+ case 8:
+ ret = loongarch_eiointc_readq(vcpu, eiointc, addr, len, val);
+ break;
+ default:
+ WARN_ONCE(1, "%s: Abnormal address access: addr 0x%llx, size %d\n",
+ __func__, addr, len);
+ }
+ spin_unlock_irqrestore(&eiointc->lock, flags);
+
+ return ret;
+}
+
+static int loongarch_eiointc_writeb(struct kvm_vcpu *vcpu,
+ struct loongarch_eiointc *s,
+ gpa_t addr, int len, const void *val)
+{
+ int index, irq, bits, ret = 0;
+ u8 cpu;
+ u8 data, old_data;
+ u8 coreisr, old_coreisr;
+ gpa_t offset;
+
+ data = *(u8 *)val;
+ offset = addr - EIOINTC_BASE;
+
+ switch (offset) {
+ case EIOINTC_NODETYPE_START ... EIOINTC_NODETYPE_END:
+ index = (offset - EIOINTC_NODETYPE_START);
+ s->nodetype.reg_u8[index] = data;
+ break;
+ case EIOINTC_IPMAP_START ... EIOINTC_IPMAP_END:
+ /*
+ * ipmap cannot be set at runtime, can be set only at the beginning
+ * of irqchip driver, need not update upper irq level
+ */
+ index = (offset - EIOINTC_IPMAP_START);
+ s->ipmap.reg_u8[index] = data;
+ break;
+ case EIOINTC_ENABLE_START ... EIOINTC_ENABLE_END:
+ index = (offset - EIOINTC_ENABLE_START);
+ old_data = s->enable.reg_u8[index];
+ s->enable.reg_u8[index] = data;
+ /*
+ * 1: enable irq.
+ * update irq when isr is set.
+ */
+ data = s->enable.reg_u8[index] & ~old_data & s->isr.reg_u8[index];
+ eiointc_enable_irq(vcpu, s, index, data, 1);
+ /*
+ * 0: disable irq.
+ * update irq when isr is set.
+ */
+ data = ~s->enable.reg_u8[index] & old_data & s->isr.reg_u8[index];
+ eiointc_enable_irq(vcpu, s, index, data, 0);
+ break;
+ case EIOINTC_BOUNCE_START ... EIOINTC_BOUNCE_END:
+ /* do not emulate hw bounced irq routing */
+ index = offset - EIOINTC_BOUNCE_START;
+ s->bounce.reg_u8[index] = data;
+ break;
+ case EIOINTC_COREISR_START ... EIOINTC_COREISR_END:
+ index = (offset - EIOINTC_COREISR_START);
+ /* use attrs to get current cpu index */
+ cpu = vcpu->vcpu_id;
+ coreisr = data;
+ old_coreisr = s->coreisr.reg_u8[cpu][index];
+ /* write 1 to clear interrupt */
+ s->coreisr.reg_u8[cpu][index] = old_coreisr & ~coreisr;
+ coreisr &= old_coreisr;
+ bits = sizeof(data) * 8;
+ irq = find_first_bit((void *)&coreisr, bits);
+ while (irq < bits) {
+ eiointc_update_irq(s, irq + index * bits, 0);
+ bitmap_clear((void *)&coreisr, irq, 1);
+ irq = find_first_bit((void *)&coreisr, bits);
+ }
+ break;
+ case EIOINTC_COREMAP_START ... EIOINTC_COREMAP_END:
+ irq = offset - EIOINTC_COREMAP_START;
+ index = irq;
+ s->coremap.reg_u8[index] = data;
+ eiointc_update_sw_coremap(s, irq, (void *)&data, sizeof(data), true);
+ break;
+ default:
+ ret = -EINVAL;
+ break;
+ }
+
+ return ret;
+}
+
+static int loongarch_eiointc_writew(struct kvm_vcpu *vcpu,
+ struct loongarch_eiointc *s,
+ gpa_t addr, int len, const void *val)
+{
+ int i, index, irq, bits, ret = 0;
+ u8 cpu;
+ u16 data, old_data;
+ u16 coreisr, old_coreisr;
+ gpa_t offset;
+
+ data = *(u16 *)val;
+ offset = addr - EIOINTC_BASE;
+
+ switch (offset) {
+ case EIOINTC_NODETYPE_START ... EIOINTC_NODETYPE_END:
+ index = (offset - EIOINTC_NODETYPE_START) >> 1;
+ s->nodetype.reg_u16[index] = data;
+ break;
+ case EIOINTC_IPMAP_START ... EIOINTC_IPMAP_END:
+ /*
+ * ipmap cannot be set at runtime, can be set only at the beginning
+ * of irqchip driver, need not update upper irq level
+ */
+ index = (offset - EIOINTC_IPMAP_START) >> 1;
+ s->ipmap.reg_u16[index] = data;
+ break;
+ case EIOINTC_ENABLE_START ... EIOINTC_ENABLE_END:
+ index = (offset - EIOINTC_ENABLE_START) >> 1;
+ old_data = s->enable.reg_u32[index];
+ s->enable.reg_u16[index] = data;
+ /*
+ * 1: enable irq.
+ * update irq when isr is set.
+ */
+ data = s->enable.reg_u16[index] & ~old_data & s->isr.reg_u16[index];
+ index = index << 1;
+ for (i = 0; i < sizeof(data); i++) {
+ u8 mask = (data >> (i * 8)) & 0xff;
+ eiointc_enable_irq(vcpu, s, index + i, mask, 1);
+ }
+ /*
+ * 0: disable irq.
+ * update irq when isr is set.
+ */
+ data = ~s->enable.reg_u16[index] & old_data & s->isr.reg_u16[index];
+ for (i = 0; i < sizeof(data); i++) {
+ u8 mask = (data >> (i * 8)) & 0xff;
+ eiointc_enable_irq(vcpu, s, index, mask, 0);
+ }
+ break;
+ case EIOINTC_BOUNCE_START ... EIOINTC_BOUNCE_END:
+ /* do not emulate hw bounced irq routing */
+ index = (offset - EIOINTC_BOUNCE_START) >> 1;
+ s->bounce.reg_u16[index] = data;
+ break;
+ case EIOINTC_COREISR_START ... EIOINTC_COREISR_END:
+ index = (offset - EIOINTC_COREISR_START) >> 1;
+ /* use attrs to get current cpu index */
+ cpu = vcpu->vcpu_id;
+ coreisr = data;
+ old_coreisr = s->coreisr.reg_u16[cpu][index];
+ /* write 1 to clear interrupt */
+ s->coreisr.reg_u16[cpu][index] = old_coreisr & ~coreisr;
+ coreisr &= old_coreisr;
+ bits = sizeof(data) * 8;
+ irq = find_first_bit((void *)&coreisr, bits);
+ while (irq < bits) {
+ eiointc_update_irq(s, irq + index * bits, 0);
+ bitmap_clear((void *)&coreisr, irq, 1);
+ irq = find_first_bit((void *)&coreisr, bits);
+ }
+ break;
+ case EIOINTC_COREMAP_START ... EIOINTC_COREMAP_END:
+ irq = offset - EIOINTC_COREMAP_START;
+ index = irq >> 1;
+ s->coremap.reg_u16[index] = data;
+ eiointc_update_sw_coremap(s, irq, (void *)&data, sizeof(data), true);
+ break;
+ default:
+ ret = -EINVAL;
+ break;
+ }
+
+ return ret;
+}
+
+static int loongarch_eiointc_writel(struct kvm_vcpu *vcpu,
+ struct loongarch_eiointc *s,
+ gpa_t addr, int len, const void *val)
+{
+ int i, index, irq, bits, ret = 0;
+ u8 cpu;
+ u32 data, old_data;
+ u32 coreisr, old_coreisr;
+ gpa_t offset;
+
+ data = *(u32 *)val;
+ offset = addr - EIOINTC_BASE;
+
+ switch (offset) {
+ case EIOINTC_NODETYPE_START ... EIOINTC_NODETYPE_END:
+ index = (offset - EIOINTC_NODETYPE_START) >> 2;
+ s->nodetype.reg_u32[index] = data;
+ break;
+ case EIOINTC_IPMAP_START ... EIOINTC_IPMAP_END:
+ /*
+ * ipmap cannot be set at runtime, can be set only at the beginning
+ * of irqchip driver, need not update upper irq level
+ */
+ index = (offset - EIOINTC_IPMAP_START) >> 2;
+ s->ipmap.reg_u32[index] = data;
+ break;
+ case EIOINTC_ENABLE_START ... EIOINTC_ENABLE_END:
+ index = (offset - EIOINTC_ENABLE_START) >> 2;
+ old_data = s->enable.reg_u32[index];
+ s->enable.reg_u32[index] = data;
+ /*
+ * 1: enable irq.
+ * update irq when isr is set.
+ */
+ data = s->enable.reg_u32[index] & ~old_data & s->isr.reg_u32[index];
+ index = index << 2;
+ for (i = 0; i < sizeof(data); i++) {
+ u8 mask = (data >> (i * 8)) & 0xff;
+ eiointc_enable_irq(vcpu, s, index + i, mask, 1);
+ }
+ /*
+ * 0: disable irq.
+ * update irq when isr is set.
+ */
+ data = ~s->enable.reg_u32[index] & old_data & s->isr.reg_u32[index];
+ for (i = 0; i < sizeof(data); i++) {
+ u8 mask = (data >> (i * 8)) & 0xff;
+ eiointc_enable_irq(vcpu, s, index, mask, 0);
+ }
+ break;
+ case EIOINTC_BOUNCE_START ... EIOINTC_BOUNCE_END:
+ /* do not emulate hw bounced irq routing */
+ index = (offset - EIOINTC_BOUNCE_START) >> 2;
+ s->bounce.reg_u32[index] = data;
+ break;
+ case EIOINTC_COREISR_START ... EIOINTC_COREISR_END:
+ index = (offset - EIOINTC_COREISR_START) >> 2;
+ /* use attrs to get current cpu index */
+ cpu = vcpu->vcpu_id;
+ coreisr = data;
+ old_coreisr = s->coreisr.reg_u32[cpu][index];
+ /* write 1 to clear interrupt */
+ s->coreisr.reg_u32[cpu][index] = old_coreisr & ~coreisr;
+ coreisr &= old_coreisr;
+ bits = sizeof(data) * 8;
+ irq = find_first_bit((void *)&coreisr, bits);
+ while (irq < bits) {
+ eiointc_update_irq(s, irq + index * bits, 0);
+ bitmap_clear((void *)&coreisr, irq, 1);
+ irq = find_first_bit((void *)&coreisr, bits);
+ }
+ break;
+ case EIOINTC_COREMAP_START ... EIOINTC_COREMAP_END:
+ irq = offset - EIOINTC_COREMAP_START;
+ index = irq >> 2;
+ s->coremap.reg_u32[index] = data;
+ eiointc_update_sw_coremap(s, irq, (void *)&data, sizeof(data), true);
+ break;
+ default:
+ ret = -EINVAL;
+ break;
+ }
+
+ return ret;
+}
+
+static int loongarch_eiointc_writeq(struct kvm_vcpu *vcpu,
+ struct loongarch_eiointc *s,
+ gpa_t addr, int len, const void *val)
+{
+ int i, index, irq, bits, ret = 0;
+ u8 cpu;
+ u64 data, old_data;
+ u64 coreisr, old_coreisr;
+ gpa_t offset;
+
+ data = *(u64 *)val;
+ offset = addr - EIOINTC_BASE;
+
+ switch (offset) {
+ case EIOINTC_NODETYPE_START ... EIOINTC_NODETYPE_END:
+ index = (offset - EIOINTC_NODETYPE_START) >> 3;
+ s->nodetype.reg_u64[index] = data;
+ break;
+ case EIOINTC_IPMAP_START ... EIOINTC_IPMAP_END:
+ /*
+ * ipmap cannot be set at runtime, can be set only at the beginning
+ * of irqchip driver, need not update upper irq level
+ */
+ index = (offset - EIOINTC_IPMAP_START) >> 3;
+ s->ipmap.reg_u64 = data;
+ break;
+ case EIOINTC_ENABLE_START ... EIOINTC_ENABLE_END:
+ index = (offset - EIOINTC_ENABLE_START) >> 3;
+ old_data = s->enable.reg_u64[index];
+ s->enable.reg_u64[index] = data;
+ /*
+ * 1: enable irq.
+ * update irq when isr is set.
+ */
+ data = s->enable.reg_u64[index] & ~old_data & s->isr.reg_u64[index];
+ index = index << 3;
+ for (i = 0; i < sizeof(data); i++) {
+ u8 mask = (data >> (i * 8)) & 0xff;
+ eiointc_enable_irq(vcpu, s, index + i, mask, 1);
+ }
+ /*
+ * 0: disable irq.
+ * update irq when isr is set.
+ */
+ data = ~s->enable.reg_u64[index] & old_data & s->isr.reg_u64[index];
+ for (i = 0; i < sizeof(data); i++) {
+ u8 mask = (data >> (i * 8)) & 0xff;
+ eiointc_enable_irq(vcpu, s, index, mask, 0);
+ }
+ break;
+ case EIOINTC_BOUNCE_START ... EIOINTC_BOUNCE_END:
+ /* do not emulate hw bounced irq routing */
+ index = (offset - EIOINTC_BOUNCE_START) >> 3;
+ s->bounce.reg_u64[index] = data;
+ break;
+ case EIOINTC_COREISR_START ... EIOINTC_COREISR_END:
+ index = (offset - EIOINTC_COREISR_START) >> 3;
+ /* use attrs to get current cpu index */
+ cpu = vcpu->vcpu_id;
+ coreisr = data;
+ old_coreisr = s->coreisr.reg_u64[cpu][index];
+ /* write 1 to clear interrupt */
+ s->coreisr.reg_u64[cpu][index] = old_coreisr & ~coreisr;
+ coreisr &= old_coreisr;
+ bits = sizeof(data) * 8;
+ irq = find_first_bit((void *)&coreisr, bits);
+ while (irq < bits) {
+ eiointc_update_irq(s, irq + index * bits, 0);
+ bitmap_clear((void *)&coreisr, irq, 1);
+ irq = find_first_bit((void *)&coreisr, bits);
+ }
+ break;
+ case EIOINTC_COREMAP_START ... EIOINTC_COREMAP_END:
+ irq = offset - EIOINTC_COREMAP_START;
+ index = irq >> 3;
+ s->coremap.reg_u64[index] = data;
+ eiointc_update_sw_coremap(s, irq, (void *)&data, sizeof(data), true);
+ break;
+ default:
+ ret = -EINVAL;
+ break;
+ }
+
+ return ret;
+}
+
+static int kvm_eiointc_write(struct kvm_vcpu *vcpu,
+ struct kvm_io_device *dev,
+ gpa_t addr, int len, const void *val)
+{
+ int ret = -EINVAL;
+ unsigned long flags;
+ struct loongarch_eiointc *eiointc = vcpu->kvm->arch.eiointc;
+
+ if (!eiointc) {
+ kvm_err("%s: eiointc irqchip not valid!\n", __func__);
+ return -EINVAL;
+ }
+
+ vcpu->kvm->stat.eiointc_write_exits++;
+ spin_lock_irqsave(&eiointc->lock, flags);
+ switch (len) {
+ case 1:
+ ret = loongarch_eiointc_writeb(vcpu, eiointc, addr, len, val);
+ break;
+ case 2:
+ ret = loongarch_eiointc_writew(vcpu, eiointc, addr, len, val);
+ break;
+ case 4:
+ ret = loongarch_eiointc_writel(vcpu, eiointc, addr, len, val);
+ break;
+ case 8:
+ ret = loongarch_eiointc_writeq(vcpu, eiointc, addr, len, val);
+ break;
+ default:
+ WARN_ONCE(1, "%s: Abnormal address access: addr 0x%llx, size %d\n",
+ __func__, addr, len);
+ }
+ spin_unlock_irqrestore(&eiointc->lock, flags);
+
+ return ret;
+}
+
+static const struct kvm_io_device_ops kvm_eiointc_ops = {
+ .read = kvm_eiointc_read,
+ .write = kvm_eiointc_write,
+};
+
+static int kvm_eiointc_virt_read(struct kvm_vcpu *vcpu,
+ struct kvm_io_device *dev,
+ gpa_t addr, int len, void *val)
+{
+ unsigned long flags;
+ u32 *data = val;
+ struct loongarch_eiointc *eiointc = vcpu->kvm->arch.eiointc;
+
+ if (!eiointc) {
+ kvm_err("%s: eiointc irqchip not valid!\n", __func__);
+ return -EINVAL;
+ }
+
+ addr -= EIOINTC_VIRT_BASE;
+ spin_lock_irqsave(&eiointc->lock, flags);
+ switch (addr) {
+ case EIOINTC_VIRT_FEATURES:
+ *data = eiointc->features;
+ break;
+ case EIOINTC_VIRT_CONFIG:
+ *data = eiointc->status;
+ break;
+ default:
+ break;
+ }
+ spin_unlock_irqrestore(&eiointc->lock, flags);
+
+ return 0;
+}
+
+static int kvm_eiointc_virt_write(struct kvm_vcpu *vcpu,
+ struct kvm_io_device *dev,
+ gpa_t addr, int len, const void *val)
+{
+ int ret = 0;
+ unsigned long flags;
+ u32 value = *(u32 *)val;
+ struct loongarch_eiointc *eiointc = vcpu->kvm->arch.eiointc;
+
+ if (!eiointc) {
+ kvm_err("%s: eiointc irqchip not valid!\n", __func__);
+ return -EINVAL;
+ }
+
+ addr -= EIOINTC_VIRT_BASE;
+ spin_lock_irqsave(&eiointc->lock, flags);
+ switch (addr) {
+ case EIOINTC_VIRT_FEATURES:
+ ret = -EPERM;
+ break;
+ case EIOINTC_VIRT_CONFIG:
+ /*
+ * eiointc features can only be set at disabled status
+ */
+ if ((eiointc->status & BIT(EIOINTC_ENABLE)) && value) {
+ ret = -EPERM;
+ break;
+ }
+ eiointc->status = value & eiointc->features;
+ break;
+ default:
+ break;
+ }
+ spin_unlock_irqrestore(&eiointc->lock, flags);
+
+ return ret;
+}
+
+static const struct kvm_io_device_ops kvm_eiointc_virt_ops = {
+ .read = kvm_eiointc_virt_read,
+ .write = kvm_eiointc_virt_write,
+};
+
+static int kvm_eiointc_ctrl_access(struct kvm_device *dev,
+ struct kvm_device_attr *attr)
+{
+ int ret = 0;
+ unsigned long flags;
+ unsigned long type = (unsigned long)attr->attr;
+ u32 i, start_irq;
+ void __user *data;
+ struct loongarch_eiointc *s = dev->kvm->arch.eiointc;
+
+ data = (void __user *)attr->addr;
+ spin_lock_irqsave(&s->lock, flags);
+ switch (type) {
+ case KVM_DEV_LOONGARCH_EXTIOI_CTRL_INIT_NUM_CPU:
+ if (copy_from_user(&s->num_cpu, data, 4))
+ ret = -EFAULT;
+ break;
+ case KVM_DEV_LOONGARCH_EXTIOI_CTRL_INIT_FEATURE:
+ if (copy_from_user(&s->features, data, 4))
+ ret = -EFAULT;
+ if (!(s->features & BIT(EIOINTC_HAS_VIRT_EXTENSION)))
+ s->status |= BIT(EIOINTC_ENABLE);
+ break;
+ case KVM_DEV_LOONGARCH_EXTIOI_CTRL_LOAD_FINISHED:
+ eiointc_set_sw_coreisr(s);
+ for (i = 0; i < (EIOINTC_IRQS / 4); i++) {
+ start_irq = i * 4;
+ eiointc_update_sw_coremap(s, start_irq,
+ (void *)&s->coremap.reg_u32[i], sizeof(u32), false);
+ }
+ break;
+ default:
+ break;
+ }
+ spin_unlock_irqrestore(&s->lock, flags);
+
+ return ret;
+}
+
+static int kvm_eiointc_regs_access(struct kvm_device *dev,
+ struct kvm_device_attr *attr,
+ bool is_write)
+{
+ int addr, cpuid, offset, ret = 0;
+ unsigned long flags;
+ void *p = NULL;
+ void __user *data;
+ struct loongarch_eiointc *s;
+
+ s = dev->kvm->arch.eiointc;
+ addr = attr->attr;
+ cpuid = addr >> 16;
+ addr &= 0xffff;
+ data = (void __user *)attr->addr;
+ switch (addr) {
+ case EIOINTC_NODETYPE_START ... EIOINTC_NODETYPE_END:
+ offset = (addr - EIOINTC_NODETYPE_START) / 4;
+ p = &s->nodetype.reg_u32[offset];
+ break;
+ case EIOINTC_IPMAP_START ... EIOINTC_IPMAP_END:
+ offset = (addr - EIOINTC_IPMAP_START) / 4;
+ p = &s->ipmap.reg_u32[offset];
+ break;
+ case EIOINTC_ENABLE_START ... EIOINTC_ENABLE_END:
+ offset = (addr - EIOINTC_ENABLE_START) / 4;
+ p = &s->enable.reg_u32[offset];
+ break;
+ case EIOINTC_BOUNCE_START ... EIOINTC_BOUNCE_END:
+ offset = (addr - EIOINTC_BOUNCE_START) / 4;
+ p = &s->bounce.reg_u32[offset];
+ break;
+ case EIOINTC_ISR_START ... EIOINTC_ISR_END:
+ offset = (addr - EIOINTC_ISR_START) / 4;
+ p = &s->isr.reg_u32[offset];
+ break;
+ case EIOINTC_COREISR_START ... EIOINTC_COREISR_END:
+ offset = (addr - EIOINTC_COREISR_START) / 4;
+ p = &s->coreisr.reg_u32[cpuid][offset];
+ break;
+ case EIOINTC_COREMAP_START ... EIOINTC_COREMAP_END:
+ offset = (addr - EIOINTC_COREMAP_START) / 4;
+ p = &s->coremap.reg_u32[offset];
+ break;
+ default:
+ kvm_err("%s: unknown eiointc register, addr = %d\n", __func__, addr);
+ return -EINVAL;
+ }
+
+ spin_lock_irqsave(&s->lock, flags);
+ if (is_write) {
+ if (copy_from_user(p, data, 4))
+ ret = -EFAULT;
+ } else {
+ if (copy_to_user(data, p, 4))
+ ret = -EFAULT;
+ }
+ spin_unlock_irqrestore(&s->lock, flags);
+
+ return ret;
+}
+
+static int kvm_eiointc_sw_status_access(struct kvm_device *dev,
+ struct kvm_device_attr *attr,
+ bool is_write)
+{
+ int addr, ret = 0;
+ unsigned long flags;
+ void *p = NULL;
+ void __user *data;
+ struct loongarch_eiointc *s;
+
+ s = dev->kvm->arch.eiointc;
+ addr = attr->attr;
+ addr &= 0xffff;
+
+ data = (void __user *)attr->addr;
+ switch (addr) {
+ case KVM_DEV_LOONGARCH_EXTIOI_SW_STATUS_NUM_CPU:
+ p = &s->num_cpu;
+ break;
+ case KVM_DEV_LOONGARCH_EXTIOI_SW_STATUS_FEATURE:
+ p = &s->features;
+ break;
+ case KVM_DEV_LOONGARCH_EXTIOI_SW_STATUS_STATE:
+ p = &s->status;
+ break;
+ default:
+ kvm_err("%s: unknown eiointc register, addr = %d\n", __func__, addr);
+ return -EINVAL;
+ }
+ spin_lock_irqsave(&s->lock, flags);
+ if (is_write) {
+ if (copy_from_user(p, data, 4))
+ ret = -EFAULT;
+ } else {
+ if (copy_to_user(data, p, 4))
+ ret = -EFAULT;
+ }
+ spin_unlock_irqrestore(&s->lock, flags);
+
+ return ret;
+}
+
+static int kvm_eiointc_get_attr(struct kvm_device *dev,
+ struct kvm_device_attr *attr)
+{
+ switch (attr->group) {
+ case KVM_DEV_LOONGARCH_EXTIOI_GRP_REGS:
+ return kvm_eiointc_regs_access(dev, attr, false);
+ case KVM_DEV_LOONGARCH_EXTIOI_GRP_SW_STATUS:
+ return kvm_eiointc_sw_status_access(dev, attr, false);
+ default:
+ return -EINVAL;
+ }
+}
+
+static int kvm_eiointc_set_attr(struct kvm_device *dev,
+ struct kvm_device_attr *attr)
+{
+ switch (attr->group) {
+ case KVM_DEV_LOONGARCH_EXTIOI_GRP_CTRL:
+ return kvm_eiointc_ctrl_access(dev, attr);
+ case KVM_DEV_LOONGARCH_EXTIOI_GRP_REGS:
+ return kvm_eiointc_regs_access(dev, attr, true);
+ case KVM_DEV_LOONGARCH_EXTIOI_GRP_SW_STATUS:
+ return kvm_eiointc_sw_status_access(dev, attr, true);
+ default:
+ return -EINVAL;
+ }
+}
+
+static int kvm_eiointc_create(struct kvm_device *dev, u32 type)
+{
+ int ret;
+ struct loongarch_eiointc *s;
+ struct kvm_io_device *device, *device1;
+ struct kvm *kvm = dev->kvm;
+
+ /* eiointc has been created */
+ if (kvm->arch.eiointc)
+ return -EINVAL;
+
+ s = kzalloc(sizeof(struct loongarch_eiointc), GFP_KERNEL);
+ if (!s)
+ return -ENOMEM;
+
+ spin_lock_init(&s->lock);
+ s->kvm = kvm;
+
+ /*
+ * Initialize IOCSR device
+ */
+ device = &s->device;
+ kvm_iodevice_init(device, &kvm_eiointc_ops);
+ mutex_lock(&kvm->slots_lock);
+ ret = kvm_io_bus_register_dev(kvm, KVM_IOCSR_BUS,
+ EIOINTC_BASE, EIOINTC_SIZE, device);
+ mutex_unlock(&kvm->slots_lock);
+ if (ret < 0) {
+ kfree(s);
+ return ret;
+ }
+
+ device1 = &s->device_vext;
+ kvm_iodevice_init(device1, &kvm_eiointc_virt_ops);
+ ret = kvm_io_bus_register_dev(kvm, KVM_IOCSR_BUS,
+ EIOINTC_VIRT_BASE, EIOINTC_VIRT_SIZE, device1);
+ if (ret < 0) {
+ kvm_io_bus_unregister_dev(kvm, KVM_IOCSR_BUS, &s->device);
+ kfree(s);
+ return ret;
+ }
+ kvm->arch.eiointc = s;
+
+ return 0;
+}
+
+static void kvm_eiointc_destroy(struct kvm_device *dev)
+{
+ struct kvm *kvm;
+ struct loongarch_eiointc *eiointc;
+
+ if (!dev || !dev->kvm || !dev->kvm->arch.eiointc)
+ return;
+
+ kvm = dev->kvm;
+ eiointc = kvm->arch.eiointc;
+ kvm_io_bus_unregister_dev(kvm, KVM_IOCSR_BUS, &eiointc->device);
+ kvm_io_bus_unregister_dev(kvm, KVM_IOCSR_BUS, &eiointc->device_vext);
+ kfree(eiointc);
+}
+
+static struct kvm_device_ops kvm_eiointc_dev_ops = {
+ .name = "kvm-loongarch-eiointc",
+ .create = kvm_eiointc_create,
+ .destroy = kvm_eiointc_destroy,
+ .set_attr = kvm_eiointc_set_attr,
+ .get_attr = kvm_eiointc_get_attr,
+};
+
+int kvm_loongarch_register_eiointc_device(void)
+{
+ return kvm_register_device_ops(&kvm_eiointc_dev_ops, KVM_DEV_TYPE_LOONGARCH_EIOINTC);
+}
diff --git a/arch/loongarch/kvm/intc/ipi.c b/arch/loongarch/kvm/intc/ipi.c
new file mode 100644
index 000000000000..fe734dc062ed
--- /dev/null
+++ b/arch/loongarch/kvm/intc/ipi.c
@@ -0,0 +1,479 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Copyright (C) 2024 Loongson Technology Corporation Limited
+ */
+
+#include <linux/kvm_host.h>
+#include <asm/kvm_ipi.h>
+#include <asm/kvm_vcpu.h>
+
+static void ipi_send(struct kvm *kvm, uint64_t data)
+{
+ int cpu, action;
+ uint32_t status;
+ struct kvm_vcpu *vcpu;
+ struct kvm_interrupt irq;
+
+ cpu = ((data & 0xffffffff) >> 16) & 0x3ff;
+ vcpu = kvm_get_vcpu_by_cpuid(kvm, cpu);
+ if (unlikely(vcpu == NULL)) {
+ kvm_err("%s: invalid target cpu: %d\n", __func__, cpu);
+ return;
+ }
+
+ action = BIT(data & 0x1f);
+ spin_lock(&vcpu->arch.ipi_state.lock);
+ status = vcpu->arch.ipi_state.status;
+ vcpu->arch.ipi_state.status |= action;
+ spin_unlock(&vcpu->arch.ipi_state.lock);
+ if (status == 0) {
+ irq.irq = LARCH_INT_IPI;
+ kvm_vcpu_ioctl_interrupt(vcpu, &irq);
+ }
+}
+
+static void ipi_clear(struct kvm_vcpu *vcpu, uint64_t data)
+{
+ uint32_t status;
+ struct kvm_interrupt irq;
+
+ spin_lock(&vcpu->arch.ipi_state.lock);
+ vcpu->arch.ipi_state.status &= ~data;
+ status = vcpu->arch.ipi_state.status;
+ spin_unlock(&vcpu->arch.ipi_state.lock);
+ if (status == 0) {
+ irq.irq = -LARCH_INT_IPI;
+ kvm_vcpu_ioctl_interrupt(vcpu, &irq);
+ }
+}
+
+static uint64_t read_mailbox(struct kvm_vcpu *vcpu, int offset, int len)
+{
+ uint64_t data = 0;
+
+ spin_lock(&vcpu->arch.ipi_state.lock);
+ data = *(ulong *)((void *)vcpu->arch.ipi_state.buf + (offset - 0x20));
+ spin_unlock(&vcpu->arch.ipi_state.lock);
+
+ switch (len) {
+ case 1:
+ return data & 0xff;
+ case 2:
+ return data & 0xffff;
+ case 4:
+ return data & 0xffffffff;
+ case 8:
+ return data;
+ default:
+ kvm_err("%s: unknown data len: %d\n", __func__, len);
+ return 0;
+ }
+}
+
+static void write_mailbox(struct kvm_vcpu *vcpu, int offset, uint64_t data, int len)
+{
+ void *pbuf;
+
+ spin_lock(&vcpu->arch.ipi_state.lock);
+ pbuf = (void *)vcpu->arch.ipi_state.buf + (offset - 0x20);
+
+ switch (len) {
+ case 1:
+ *(unsigned char *)pbuf = (unsigned char)data;
+ break;
+ case 2:
+ *(unsigned short *)pbuf = (unsigned short)data;
+ break;
+ case 4:
+ *(unsigned int *)pbuf = (unsigned int)data;
+ break;
+ case 8:
+ *(unsigned long *)pbuf = (unsigned long)data;
+ break;
+ default:
+ kvm_err("%s: unknown data len: %d\n", __func__, len);
+ }
+ spin_unlock(&vcpu->arch.ipi_state.lock);
+}
+
+static int send_ipi_data(struct kvm_vcpu *vcpu, gpa_t addr, uint64_t data)
+{
+ int i, idx, ret;
+ uint32_t val = 0, mask = 0;
+
+ /*
+ * Bit 27-30 is mask for byte writing.
+ * If the mask is 0, we need not to do anything.
+ */
+ if ((data >> 27) & 0xf) {
+ /* Read the old val */
+ idx = srcu_read_lock(&vcpu->kvm->srcu);
+ ret = kvm_io_bus_read(vcpu, KVM_IOCSR_BUS, addr, sizeof(val), &val);
+ srcu_read_unlock(&vcpu->kvm->srcu, idx);
+ if (unlikely(ret)) {
+ kvm_err("%s: : read data from addr %llx failed\n", __func__, addr);
+ return ret;
+ }
+ /* Construct the mask by scanning the bit 27-30 */
+ for (i = 0; i < 4; i++) {
+ if (data & (BIT(27 + i)))
+ mask |= (0xff << (i * 8));
+ }
+ /* Save the old part of val */
+ val &= mask;
+ }
+ val |= ((uint32_t)(data >> 32) & ~mask);
+ idx = srcu_read_lock(&vcpu->kvm->srcu);
+ ret = kvm_io_bus_write(vcpu, KVM_IOCSR_BUS, addr, sizeof(val), &val);
+ srcu_read_unlock(&vcpu->kvm->srcu, idx);
+ if (unlikely(ret))
+ kvm_err("%s: : write data to addr %llx failed\n", __func__, addr);
+
+ return ret;
+}
+
+static int mail_send(struct kvm *kvm, uint64_t data)
+{
+ int cpu, mailbox, offset;
+ struct kvm_vcpu *vcpu;
+
+ cpu = ((data & 0xffffffff) >> 16) & 0x3ff;
+ vcpu = kvm_get_vcpu_by_cpuid(kvm, cpu);
+ if (unlikely(vcpu == NULL)) {
+ kvm_err("%s: invalid target cpu: %d\n", __func__, cpu);
+ return -EINVAL;
+ }
+ mailbox = ((data & 0xffffffff) >> 2) & 0x7;
+ offset = IOCSR_IPI_BASE + IOCSR_IPI_BUF_20 + mailbox * 4;
+
+ return send_ipi_data(vcpu, offset, data);
+}
+
+static int any_send(struct kvm *kvm, uint64_t data)
+{
+ int cpu, offset;
+ struct kvm_vcpu *vcpu;
+
+ cpu = ((data & 0xffffffff) >> 16) & 0x3ff;
+ vcpu = kvm_get_vcpu_by_cpuid(kvm, cpu);
+ if (unlikely(vcpu == NULL)) {
+ kvm_err("%s: invalid target cpu: %d\n", __func__, cpu);
+ return -EINVAL;
+ }
+ offset = data & 0xffff;
+
+ return send_ipi_data(vcpu, offset, data);
+}
+
+static int loongarch_ipi_readl(struct kvm_vcpu *vcpu, gpa_t addr, int len, void *val)
+{
+ int ret = 0;
+ uint32_t offset;
+ uint64_t res = 0;
+
+ offset = (uint32_t)(addr & 0x1ff);
+ WARN_ON_ONCE(offset & (len - 1));
+
+ switch (offset) {
+ case IOCSR_IPI_STATUS:
+ spin_lock(&vcpu->arch.ipi_state.lock);
+ res = vcpu->arch.ipi_state.status;
+ spin_unlock(&vcpu->arch.ipi_state.lock);
+ break;
+ case IOCSR_IPI_EN:
+ spin_lock(&vcpu->arch.ipi_state.lock);
+ res = vcpu->arch.ipi_state.en;
+ spin_unlock(&vcpu->arch.ipi_state.lock);
+ break;
+ case IOCSR_IPI_SET:
+ res = 0;
+ break;
+ case IOCSR_IPI_CLEAR:
+ res = 0;
+ break;
+ case IOCSR_IPI_BUF_20 ... IOCSR_IPI_BUF_38 + 7:
+ if (offset + len > IOCSR_IPI_BUF_38 + 8) {
+ kvm_err("%s: invalid offset or len: offset = %d, len = %d\n",
+ __func__, offset, len);
+ ret = -EINVAL;
+ break;
+ }
+ res = read_mailbox(vcpu, offset, len);
+ break;
+ default:
+ kvm_err("%s: unknown addr: %llx\n", __func__, addr);
+ ret = -EINVAL;
+ break;
+ }
+ *(uint64_t *)val = res;
+
+ return ret;
+}
+
+static int loongarch_ipi_writel(struct kvm_vcpu *vcpu, gpa_t addr, int len, const void *val)
+{
+ int ret = 0;
+ uint64_t data;
+ uint32_t offset;
+
+ data = *(uint64_t *)val;
+
+ offset = (uint32_t)(addr & 0x1ff);
+ WARN_ON_ONCE(offset & (len - 1));
+
+ switch (offset) {
+ case IOCSR_IPI_STATUS:
+ ret = -EINVAL;
+ break;
+ case IOCSR_IPI_EN:
+ spin_lock(&vcpu->arch.ipi_state.lock);
+ vcpu->arch.ipi_state.en = data;
+ spin_unlock(&vcpu->arch.ipi_state.lock);
+ break;
+ case IOCSR_IPI_SET:
+ ret = -EINVAL;
+ break;
+ case IOCSR_IPI_CLEAR:
+ /* Just clear the status of the current vcpu */
+ ipi_clear(vcpu, data);
+ break;
+ case IOCSR_IPI_BUF_20 ... IOCSR_IPI_BUF_38 + 7:
+ if (offset + len > IOCSR_IPI_BUF_38 + 8) {
+ kvm_err("%s: invalid offset or len: offset = %d, len = %d\n",
+ __func__, offset, len);
+ ret = -EINVAL;
+ break;
+ }
+ write_mailbox(vcpu, offset, data, len);
+ break;
+ case IOCSR_IPI_SEND:
+ ipi_send(vcpu->kvm, data);
+ break;
+ case IOCSR_MAIL_SEND:
+ ret = mail_send(vcpu->kvm, *(uint64_t *)val);
+ break;
+ case IOCSR_ANY_SEND:
+ ret = any_send(vcpu->kvm, *(uint64_t *)val);
+ break;
+ default:
+ kvm_err("%s: unknown addr: %llx\n", __func__, addr);
+ ret = -EINVAL;
+ break;
+ }
+
+ return ret;
+}
+
+static int kvm_ipi_read(struct kvm_vcpu *vcpu,
+ struct kvm_io_device *dev,
+ gpa_t addr, int len, void *val)
+{
+ int ret;
+ struct loongarch_ipi *ipi;
+
+ ipi = vcpu->kvm->arch.ipi;
+ if (!ipi) {
+ kvm_err("%s: ipi irqchip not valid!\n", __func__);
+ return -EINVAL;
+ }
+ ipi->kvm->stat.ipi_read_exits++;
+ ret = loongarch_ipi_readl(vcpu, addr, len, val);
+
+ return ret;
+}
+
+static int kvm_ipi_write(struct kvm_vcpu *vcpu,
+ struct kvm_io_device *dev,
+ gpa_t addr, int len, const void *val)
+{
+ int ret;
+ struct loongarch_ipi *ipi;
+
+ ipi = vcpu->kvm->arch.ipi;
+ if (!ipi) {
+ kvm_err("%s: ipi irqchip not valid!\n", __func__);
+ return -EINVAL;
+ }
+ ipi->kvm->stat.ipi_write_exits++;
+ ret = loongarch_ipi_writel(vcpu, addr, len, val);
+
+ return ret;
+}
+
+static const struct kvm_io_device_ops kvm_ipi_ops = {
+ .read = kvm_ipi_read,
+ .write = kvm_ipi_write,
+};
+
+static int kvm_ipi_regs_access(struct kvm_device *dev,
+ struct kvm_device_attr *attr,
+ bool is_write)
+{
+ int len = 4;
+ int cpu, addr;
+ uint64_t val;
+ void *p = NULL;
+ struct kvm_vcpu *vcpu;
+
+ cpu = (attr->attr >> 16) & 0x3ff;
+ addr = attr->attr & 0xff;
+
+ vcpu = kvm_get_vcpu(dev->kvm, cpu);
+ if (unlikely(vcpu == NULL)) {
+ kvm_err("%s: invalid target cpu: %d\n", __func__, cpu);
+ return -EINVAL;
+ }
+
+ switch (addr) {
+ case IOCSR_IPI_STATUS:
+ p = &vcpu->arch.ipi_state.status;
+ break;
+ case IOCSR_IPI_EN:
+ p = &vcpu->arch.ipi_state.en;
+ break;
+ case IOCSR_IPI_SET:
+ p = &vcpu->arch.ipi_state.set;
+ break;
+ case IOCSR_IPI_CLEAR:
+ p = &vcpu->arch.ipi_state.clear;
+ break;
+ case IOCSR_IPI_BUF_20:
+ p = &vcpu->arch.ipi_state.buf[0];
+ len = 8;
+ break;
+ case IOCSR_IPI_BUF_28:
+ p = &vcpu->arch.ipi_state.buf[1];
+ len = 8;
+ break;
+ case IOCSR_IPI_BUF_30:
+ p = &vcpu->arch.ipi_state.buf[2];
+ len = 8;
+ break;
+ case IOCSR_IPI_BUF_38:
+ p = &vcpu->arch.ipi_state.buf[3];
+ len = 8;
+ break;
+ default:
+ kvm_err("%s: unknown ipi register, addr = %d\n", __func__, addr);
+ return -EINVAL;
+ }
+
+ if (is_write) {
+ if (len == 4) {
+ if (get_user(val, (uint32_t __user *)attr->addr))
+ return -EFAULT;
+ *(uint32_t *)p = (uint32_t)val;
+ } else if (len == 8) {
+ if (get_user(val, (uint64_t __user *)attr->addr))
+ return -EFAULT;
+ *(uint64_t *)p = val;
+ }
+ } else {
+ if (len == 4) {
+ val = *(uint32_t *)p;
+ return put_user(val, (uint32_t __user *)attr->addr);
+ } else if (len == 8) {
+ val = *(uint64_t *)p;
+ return put_user(val, (uint64_t __user *)attr->addr);
+ }
+ }
+
+ return 0;
+}
+
+static int kvm_ipi_get_attr(struct kvm_device *dev,
+ struct kvm_device_attr *attr)
+{
+ switch (attr->group) {
+ case KVM_DEV_LOONGARCH_IPI_GRP_REGS:
+ return kvm_ipi_regs_access(dev, attr, false);
+ default:
+ kvm_err("%s: unknown group (%d)\n", __func__, attr->group);
+ return -EINVAL;
+ }
+}
+
+static int kvm_ipi_set_attr(struct kvm_device *dev,
+ struct kvm_device_attr *attr)
+{
+ switch (attr->group) {
+ case KVM_DEV_LOONGARCH_IPI_GRP_REGS:
+ return kvm_ipi_regs_access(dev, attr, true);
+ default:
+ kvm_err("%s: unknown group (%d)\n", __func__, attr->group);
+ return -EINVAL;
+ }
+}
+
+static int kvm_ipi_create(struct kvm_device *dev, u32 type)
+{
+ int ret;
+ struct kvm *kvm;
+ struct kvm_io_device *device;
+ struct loongarch_ipi *s;
+
+ if (!dev) {
+ kvm_err("%s: kvm_device ptr is invalid!\n", __func__);
+ return -EINVAL;
+ }
+
+ kvm = dev->kvm;
+ if (kvm->arch.ipi) {
+ kvm_err("%s: LoongArch IPI has already been created!\n", __func__);
+ return -EINVAL;
+ }
+
+ s = kzalloc(sizeof(struct loongarch_ipi), GFP_KERNEL);
+ if (!s)
+ return -ENOMEM;
+
+ spin_lock_init(&s->lock);
+ s->kvm = kvm;
+
+ /*
+ * Initialize IOCSR device
+ */
+ device = &s->device;
+ kvm_iodevice_init(device, &kvm_ipi_ops);
+ mutex_lock(&kvm->slots_lock);
+ ret = kvm_io_bus_register_dev(kvm, KVM_IOCSR_BUS, IOCSR_IPI_BASE, IOCSR_IPI_SIZE, device);
+ mutex_unlock(&kvm->slots_lock);
+ if (ret < 0) {
+ kvm_err("%s: Initialize IOCSR dev failed, ret = %d\n", __func__, ret);
+ goto err;
+ }
+
+ kvm->arch.ipi = s;
+ return 0;
+
+err:
+ kfree(s);
+ return -EFAULT;
+}
+
+static void kvm_ipi_destroy(struct kvm_device *dev)
+{
+ struct kvm *kvm;
+ struct loongarch_ipi *ipi;
+
+ if (!dev || !dev->kvm || !dev->kvm->arch.ipi)
+ return;
+
+ kvm = dev->kvm;
+ ipi = kvm->arch.ipi;
+ kvm_io_bus_unregister_dev(kvm, KVM_IOCSR_BUS, &ipi->device);
+ kfree(ipi);
+}
+
+static struct kvm_device_ops kvm_ipi_dev_ops = {
+ .name = "kvm-loongarch-ipi",
+ .create = kvm_ipi_create,
+ .destroy = kvm_ipi_destroy,
+ .set_attr = kvm_ipi_set_attr,
+ .get_attr = kvm_ipi_get_attr,
+};
+
+int kvm_loongarch_register_ipi_device(void)
+{
+ return kvm_register_device_ops(&kvm_ipi_dev_ops, KVM_DEV_TYPE_LOONGARCH_IPI);
+}
diff --git a/arch/loongarch/kvm/intc/pch_pic.c b/arch/loongarch/kvm/intc/pch_pic.c
new file mode 100644
index 000000000000..08fce845f668
--- /dev/null
+++ b/arch/loongarch/kvm/intc/pch_pic.c
@@ -0,0 +1,519 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Copyright (C) 2024 Loongson Technology Corporation Limited
+ */
+
+#include <asm/kvm_eiointc.h>
+#include <asm/kvm_pch_pic.h>
+#include <asm/kvm_vcpu.h>
+#include <linux/count_zeros.h>
+
+/* update the isr according to irq level and route irq to eiointc */
+static void pch_pic_update_irq(struct loongarch_pch_pic *s, int irq, int level)
+{
+ u64 mask = BIT(irq);
+
+ /*
+ * set isr and route irq to eiointc and
+ * the route table is in htmsi_vector[]
+ */
+ if (level) {
+ if (mask & s->irr & ~s->mask) {
+ s->isr |= mask;
+ irq = s->htmsi_vector[irq];
+ eiointc_set_irq(s->kvm->arch.eiointc, irq, level);
+ }
+ } else {
+ if (mask & s->isr & ~s->irr) {
+ s->isr &= ~mask;
+ irq = s->htmsi_vector[irq];
+ eiointc_set_irq(s->kvm->arch.eiointc, irq, level);
+ }
+ }
+}
+
+/* update batch irqs, the irq_mask is a bitmap of irqs */
+static void pch_pic_update_batch_irqs(struct loongarch_pch_pic *s, u64 irq_mask, int level)
+{
+ int irq, bits;
+
+ /* find each irq by irqs bitmap and update each irq */
+ bits = sizeof(irq_mask) * 8;
+ irq = find_first_bit((void *)&irq_mask, bits);
+ while (irq < bits) {
+ pch_pic_update_irq(s, irq, level);
+ bitmap_clear((void *)&irq_mask, irq, 1);
+ irq = find_first_bit((void *)&irq_mask, bits);
+ }
+}
+
+/* called when a irq is triggered in pch pic */
+void pch_pic_set_irq(struct loongarch_pch_pic *s, int irq, int level)
+{
+ u64 mask = BIT(irq);
+
+ spin_lock(&s->lock);
+ if (level)
+ s->irr |= mask; /* set irr */
+ else {
+ /*
+ * In edge triggered mode, 0 does not mean to clear irq
+ * The irr register variable is cleared when cpu writes to the
+ * PCH_PIC_CLEAR_START address area
+ */
+ if (s->edge & mask) {
+ spin_unlock(&s->lock);
+ return;
+ }
+ s->irr &= ~mask;
+ }
+ pch_pic_update_irq(s, irq, level);
+ spin_unlock(&s->lock);
+}
+
+/* msi irq handler */
+void pch_msi_set_irq(struct kvm *kvm, int irq, int level)
+{
+ eiointc_set_irq(kvm->arch.eiointc, irq, level);
+}
+
+/*
+ * pch pic register is 64-bit, but it is accessed by 32-bit,
+ * so we use high to get whether low or high 32 bits we want
+ * to read.
+ */
+static u32 pch_pic_read_reg(u64 *s, int high)
+{
+ u64 val = *s;
+
+ /* read the high 32 bits when high is 1 */
+ return high ? (u32)(val >> 32) : (u32)val;
+}
+
+/*
+ * pch pic register is 64-bit, but it is accessed by 32-bit,
+ * so we use high to get whether low or high 32 bits we want
+ * to write.
+ */
+static u32 pch_pic_write_reg(u64 *s, int high, u32 v)
+{
+ u64 val = *s, data = v;
+
+ if (high) {
+ /*
+ * Clear val high 32 bits
+ * Write the high 32 bits when the high is 1
+ */
+ *s = (val << 32 >> 32) | (data << 32);
+ val >>= 32;
+ } else
+ /*
+ * Clear val low 32 bits
+ * Write the low 32 bits when the high is 0
+ */
+ *s = (val >> 32 << 32) | v;
+
+ return (u32)val;
+}
+
+static int loongarch_pch_pic_read(struct loongarch_pch_pic *s, gpa_t addr, int len, void *val)
+{
+ int offset, index, ret = 0;
+ u32 data = 0;
+ u64 int_id = 0;
+
+ offset = addr - s->pch_pic_base;
+
+ spin_lock(&s->lock);
+ switch (offset) {
+ case PCH_PIC_INT_ID_START ... PCH_PIC_INT_ID_END:
+ /* int id version */
+ int_id |= (u64)PCH_PIC_INT_ID_VER << 32;
+ /* irq number */
+ int_id |= (u64)31 << (32 + 16);
+ /* int id value */
+ int_id |= PCH_PIC_INT_ID_VAL;
+ *(u64 *)val = int_id;
+ break;
+ case PCH_PIC_MASK_START ... PCH_PIC_MASK_END:
+ offset -= PCH_PIC_MASK_START;
+ index = offset >> 2;
+ /* read mask reg */
+ data = pch_pic_read_reg(&s->mask, index);
+ *(u32 *)val = data;
+ break;
+ case PCH_PIC_HTMSI_EN_START ... PCH_PIC_HTMSI_EN_END:
+ offset -= PCH_PIC_HTMSI_EN_START;
+ index = offset >> 2;
+ /* read htmsi enable reg */
+ data = pch_pic_read_reg(&s->htmsi_en, index);
+ *(u32 *)val = data;
+ break;
+ case PCH_PIC_EDGE_START ... PCH_PIC_EDGE_END:
+ offset -= PCH_PIC_EDGE_START;
+ index = offset >> 2;
+ /* read edge enable reg */
+ data = pch_pic_read_reg(&s->edge, index);
+ *(u32 *)val = data;
+ break;
+ case PCH_PIC_AUTO_CTRL0_START ... PCH_PIC_AUTO_CTRL0_END:
+ case PCH_PIC_AUTO_CTRL1_START ... PCH_PIC_AUTO_CTRL1_END:
+ /* we only use default mode: fixed interrupt distribution mode */
+ *(u32 *)val = 0;
+ break;
+ case PCH_PIC_ROUTE_ENTRY_START ... PCH_PIC_ROUTE_ENTRY_END:
+ /* only route to int0: eiointc */
+ *(u8 *)val = 1;
+ break;
+ case PCH_PIC_HTMSI_VEC_START ... PCH_PIC_HTMSI_VEC_END:
+ offset -= PCH_PIC_HTMSI_VEC_START;
+ /* read htmsi vector */
+ data = s->htmsi_vector[offset];
+ *(u8 *)val = data;
+ break;
+ case PCH_PIC_POLARITY_START ... PCH_PIC_POLARITY_END:
+ /* we only use defalut value 0: high level triggered */
+ *(u32 *)val = 0;
+ break;
+ default:
+ ret = -EINVAL;
+ }
+ spin_unlock(&s->lock);
+
+ return ret;
+}
+
+static int kvm_pch_pic_read(struct kvm_vcpu *vcpu,
+ struct kvm_io_device *dev,
+ gpa_t addr, int len, void *val)
+{
+ int ret;
+ struct loongarch_pch_pic *s = vcpu->kvm->arch.pch_pic;
+
+ if (!s) {
+ kvm_err("%s: pch pic irqchip not valid!\n", __func__);
+ return -EINVAL;
+ }
+
+ /* statistics of pch pic reading */
+ vcpu->kvm->stat.pch_pic_read_exits++;
+ ret = loongarch_pch_pic_read(s, addr, len, val);
+
+ return ret;
+}
+
+static int loongarch_pch_pic_write(struct loongarch_pch_pic *s, gpa_t addr,
+ int len, const void *val)
+{
+ int ret;
+ u32 old, data, offset, index;
+ u64 irq;
+
+ ret = 0;
+ data = *(u32 *)val;
+ offset = addr - s->pch_pic_base;
+
+ spin_lock(&s->lock);
+ switch (offset) {
+ case PCH_PIC_MASK_START ... PCH_PIC_MASK_END:
+ offset -= PCH_PIC_MASK_START;
+ /* get whether high or low 32 bits we want to write */
+ index = offset >> 2;
+ old = pch_pic_write_reg(&s->mask, index, data);
+ /* enable irq when mask value change to 0 */
+ irq = (old & ~data) << (32 * index);
+ pch_pic_update_batch_irqs(s, irq, 1);
+ /* disable irq when mask value change to 1 */
+ irq = (~old & data) << (32 * index);
+ pch_pic_update_batch_irqs(s, irq, 0);
+ break;
+ case PCH_PIC_HTMSI_EN_START ... PCH_PIC_HTMSI_EN_END:
+ offset -= PCH_PIC_HTMSI_EN_START;
+ index = offset >> 2;
+ pch_pic_write_reg(&s->htmsi_en, index, data);
+ break;
+ case PCH_PIC_EDGE_START ... PCH_PIC_EDGE_END:
+ offset -= PCH_PIC_EDGE_START;
+ index = offset >> 2;
+ /* 1: edge triggered, 0: level triggered */
+ pch_pic_write_reg(&s->edge, index, data);
+ break;
+ case PCH_PIC_CLEAR_START ... PCH_PIC_CLEAR_END:
+ offset -= PCH_PIC_CLEAR_START;
+ index = offset >> 2;
+ /* write 1 to clear edge irq */
+ old = pch_pic_read_reg(&s->irr, index);
+ /*
+ * get the irq bitmap which is edge triggered and
+ * already set and to be cleared
+ */
+ irq = old & pch_pic_read_reg(&s->edge, index) & data;
+ /* write irr to the new state where irqs have been cleared */
+ pch_pic_write_reg(&s->irr, index, old & ~irq);
+ /* update cleared irqs */
+ pch_pic_update_batch_irqs(s, irq, 0);
+ break;
+ case PCH_PIC_AUTO_CTRL0_START ... PCH_PIC_AUTO_CTRL0_END:
+ offset -= PCH_PIC_AUTO_CTRL0_START;
+ index = offset >> 2;
+ /* we only use default mode: fixed interrupt distribution mode */
+ pch_pic_write_reg(&s->auto_ctrl0, index, 0);
+ break;
+ case PCH_PIC_AUTO_CTRL1_START ... PCH_PIC_AUTO_CTRL1_END:
+ offset -= PCH_PIC_AUTO_CTRL1_START;
+ index = offset >> 2;
+ /* we only use default mode: fixed interrupt distribution mode */
+ pch_pic_write_reg(&s->auto_ctrl1, index, 0);
+ break;
+ case PCH_PIC_ROUTE_ENTRY_START ... PCH_PIC_ROUTE_ENTRY_END:
+ offset -= PCH_PIC_ROUTE_ENTRY_START;
+ /* only route to int0: eiointc */
+ s->route_entry[offset] = 1;
+ break;
+ case PCH_PIC_HTMSI_VEC_START ... PCH_PIC_HTMSI_VEC_END:
+ /* route table to eiointc */
+ offset -= PCH_PIC_HTMSI_VEC_START;
+ s->htmsi_vector[offset] = (u8)data;
+ break;
+ case PCH_PIC_POLARITY_START ... PCH_PIC_POLARITY_END:
+ offset -= PCH_PIC_POLARITY_START;
+ index = offset >> 2;
+ /* we only use defalut value 0: high level triggered */
+ pch_pic_write_reg(&s->polarity, index, 0);
+ break;
+ default:
+ ret = -EINVAL;
+ break;
+ }
+ spin_unlock(&s->lock);
+
+ return ret;
+}
+
+static int kvm_pch_pic_write(struct kvm_vcpu *vcpu,
+ struct kvm_io_device *dev,
+ gpa_t addr, int len, const void *val)
+{
+ int ret;
+ struct loongarch_pch_pic *s = vcpu->kvm->arch.pch_pic;
+
+ if (!s) {
+ kvm_err("%s: pch pic irqchip not valid!\n", __func__);
+ return -EINVAL;
+ }
+
+ /* statistics of pch pic writing */
+ vcpu->kvm->stat.pch_pic_write_exits++;
+ ret = loongarch_pch_pic_write(s, addr, len, val);
+
+ return ret;
+}
+
+static const struct kvm_io_device_ops kvm_pch_pic_ops = {
+ .read = kvm_pch_pic_read,
+ .write = kvm_pch_pic_write,
+};
+
+static int kvm_pch_pic_init(struct kvm_device *dev, u64 addr)
+{
+ int ret;
+ struct kvm *kvm = dev->kvm;
+ struct kvm_io_device *device;
+ struct loongarch_pch_pic *s = dev->kvm->arch.pch_pic;
+
+ s->pch_pic_base = addr;
+ device = &s->device;
+ /* init device by pch pic writing and reading ops */
+ kvm_iodevice_init(device, &kvm_pch_pic_ops);
+ mutex_lock(&kvm->slots_lock);
+ /* register pch pic device */
+ ret = kvm_io_bus_register_dev(kvm, KVM_MMIO_BUS, addr, PCH_PIC_SIZE, device);
+ mutex_unlock(&kvm->slots_lock);
+
+ return (ret < 0) ? -EFAULT : 0;
+}
+
+/* used by user space to get or set pch pic registers */
+static int kvm_pch_pic_regs_access(struct kvm_device *dev,
+ struct kvm_device_attr *attr,
+ bool is_write)
+{
+ int addr, offset, len = 8, ret = 0;
+ void __user *data;
+ void *p = NULL;
+ struct loongarch_pch_pic *s;
+
+ s = dev->kvm->arch.pch_pic;
+ addr = attr->attr;
+ data = (void __user *)attr->addr;
+
+ /* get pointer to pch pic register by addr */
+ switch (addr) {
+ case PCH_PIC_MASK_START:
+ p = &s->mask;
+ break;
+ case PCH_PIC_HTMSI_EN_START:
+ p = &s->htmsi_en;
+ break;
+ case PCH_PIC_EDGE_START:
+ p = &s->edge;
+ break;
+ case PCH_PIC_AUTO_CTRL0_START:
+ p = &s->auto_ctrl0;
+ break;
+ case PCH_PIC_AUTO_CTRL1_START:
+ p = &s->auto_ctrl1;
+ break;
+ case PCH_PIC_ROUTE_ENTRY_START ... PCH_PIC_ROUTE_ENTRY_END:
+ offset = addr - PCH_PIC_ROUTE_ENTRY_START;
+ p = &s->route_entry[offset];
+ len = 1;
+ break;
+ case PCH_PIC_HTMSI_VEC_START ... PCH_PIC_HTMSI_VEC_END:
+ offset = addr - PCH_PIC_HTMSI_VEC_START;
+ p = &s->htmsi_vector[offset];
+ len = 1;
+ break;
+ case PCH_PIC_INT_IRR_START:
+ p = &s->irr;
+ break;
+ case PCH_PIC_INT_ISR_START:
+ p = &s->isr;
+ break;
+ case PCH_PIC_POLARITY_START:
+ p = &s->polarity;
+ break;
+ default:
+ return -EINVAL;
+ }
+
+ spin_lock(&s->lock);
+ /* write or read value according to is_write */
+ if (is_write) {
+ if (copy_from_user(p, data, len))
+ ret = -EFAULT;
+ } else {
+ if (copy_to_user(data, p, len))
+ ret = -EFAULT;
+ }
+ spin_unlock(&s->lock);
+
+ return ret;
+}
+
+static int kvm_pch_pic_get_attr(struct kvm_device *dev,
+ struct kvm_device_attr *attr)
+{
+ switch (attr->group) {
+ case KVM_DEV_LOONGARCH_PCH_PIC_GRP_REGS:
+ return kvm_pch_pic_regs_access(dev, attr, false);
+ default:
+ return -EINVAL;
+ }
+}
+
+static int kvm_pch_pic_set_attr(struct kvm_device *dev,
+ struct kvm_device_attr *attr)
+{
+ u64 addr;
+ void __user *uaddr = (void __user *)(long)attr->addr;
+
+ switch (attr->group) {
+ case KVM_DEV_LOONGARCH_PCH_PIC_GRP_CTRL:
+ switch (attr->attr) {
+ case KVM_DEV_LOONGARCH_PCH_PIC_CTRL_INIT:
+ if (copy_from_user(&addr, uaddr, sizeof(addr)))
+ return -EFAULT;
+
+ if (!dev->kvm->arch.pch_pic) {
+ kvm_err("%s: please create pch_pic irqchip first!\n", __func__);
+ return -ENODEV;
+ }
+
+ return kvm_pch_pic_init(dev, addr);
+ default:
+ kvm_err("%s: unknown group (%d) attr (%lld)\n", __func__, attr->group,
+ attr->attr);
+ return -EINVAL;
+ }
+ case KVM_DEV_LOONGARCH_PCH_PIC_GRP_REGS:
+ return kvm_pch_pic_regs_access(dev, attr, true);
+ default:
+ return -EINVAL;
+ }
+}
+
+static int kvm_setup_default_irq_routing(struct kvm *kvm)
+{
+ int i, ret;
+ u32 nr = KVM_IRQCHIP_NUM_PINS;
+ struct kvm_irq_routing_entry *entries;
+
+ entries = kcalloc(nr, sizeof(*entries), GFP_KERNEL);
+ if (!entries)
+ return -ENOMEM;
+
+ for (i = 0; i < nr; i++) {
+ entries[i].gsi = i;
+ entries[i].type = KVM_IRQ_ROUTING_IRQCHIP;
+ entries[i].u.irqchip.irqchip = 0;
+ entries[i].u.irqchip.pin = i;
+ }
+ ret = kvm_set_irq_routing(kvm, entries, nr, 0);
+ kfree(entries);
+
+ return ret;
+}
+
+static int kvm_pch_pic_create(struct kvm_device *dev, u32 type)
+{
+ int ret;
+ struct kvm *kvm = dev->kvm;
+ struct loongarch_pch_pic *s;
+
+ /* pch pic should not has been created */
+ if (kvm->arch.pch_pic)
+ return -EINVAL;
+
+ ret = kvm_setup_default_irq_routing(kvm);
+ if (ret)
+ return -ENOMEM;
+
+ s = kzalloc(sizeof(struct loongarch_pch_pic), GFP_KERNEL);
+ if (!s)
+ return -ENOMEM;
+
+ spin_lock_init(&s->lock);
+ s->kvm = kvm;
+ kvm->arch.pch_pic = s;
+
+ return 0;
+}
+
+static void kvm_pch_pic_destroy(struct kvm_device *dev)
+{
+ struct kvm *kvm;
+ struct loongarch_pch_pic *s;
+
+ if (!dev || !dev->kvm || !dev->kvm->arch.pch_pic)
+ return;
+
+ kvm = dev->kvm;
+ s = kvm->arch.pch_pic;
+ /* unregister pch pic device and free it's memory */
+ kvm_io_bus_unregister_dev(kvm, KVM_MMIO_BUS, &s->device);
+ kfree(s);
+}
+
+static struct kvm_device_ops kvm_pch_pic_dev_ops = {
+ .name = "kvm-loongarch-pch-pic",
+ .create = kvm_pch_pic_create,
+ .destroy = kvm_pch_pic_destroy,
+ .set_attr = kvm_pch_pic_set_attr,
+ .get_attr = kvm_pch_pic_get_attr,
+};
+
+int kvm_loongarch_register_pch_pic_device(void)
+{
+ return kvm_register_device_ops(&kvm_pch_pic_dev_ops, KVM_DEV_TYPE_LOONGARCH_PCHPIC);
+}
diff --git a/arch/loongarch/kvm/interrupt.c b/arch/loongarch/kvm/interrupt.c
new file mode 100644
index 000000000000..4c3f22de4b40
--- /dev/null
+++ b/arch/loongarch/kvm/interrupt.c
@@ -0,0 +1,183 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Copyright (C) 2020-2023 Loongson Technology Corporation Limited
+ */
+
+#include <linux/err.h>
+#include <linux/errno.h>
+#include <asm/kvm_csr.h>
+#include <asm/kvm_vcpu.h>
+
+static unsigned int priority_to_irq[EXCCODE_INT_NUM] = {
+ [INT_TI] = CPU_TIMER,
+ [INT_IPI] = CPU_IPI,
+ [INT_SWI0] = CPU_SIP0,
+ [INT_SWI1] = CPU_SIP1,
+ [INT_HWI0] = CPU_IP0,
+ [INT_HWI1] = CPU_IP1,
+ [INT_HWI2] = CPU_IP2,
+ [INT_HWI3] = CPU_IP3,
+ [INT_HWI4] = CPU_IP4,
+ [INT_HWI5] = CPU_IP5,
+ [INT_HWI6] = CPU_IP6,
+ [INT_HWI7] = CPU_IP7,
+};
+
+static int kvm_irq_deliver(struct kvm_vcpu *vcpu, unsigned int priority)
+{
+ unsigned int irq = 0;
+
+ clear_bit(priority, &vcpu->arch.irq_pending);
+ if (priority < EXCCODE_INT_NUM)
+ irq = priority_to_irq[priority];
+
+ switch (priority) {
+ case INT_TI:
+ case INT_IPI:
+ case INT_SWI0:
+ case INT_SWI1:
+ set_gcsr_estat(irq);
+ break;
+
+ case INT_HWI0 ... INT_HWI7:
+ set_csr_gintc(irq);
+ break;
+
+ default:
+ break;
+ }
+
+ return 1;
+}
+
+static int kvm_irq_clear(struct kvm_vcpu *vcpu, unsigned int priority)
+{
+ unsigned int irq = 0;
+
+ clear_bit(priority, &vcpu->arch.irq_clear);
+ if (priority < EXCCODE_INT_NUM)
+ irq = priority_to_irq[priority];
+
+ switch (priority) {
+ case INT_TI:
+ case INT_IPI:
+ case INT_SWI0:
+ case INT_SWI1:
+ clear_gcsr_estat(irq);
+ break;
+
+ case INT_HWI0 ... INT_HWI7:
+ clear_csr_gintc(irq);
+ break;
+
+ default:
+ break;
+ }
+
+ return 1;
+}
+
+void kvm_deliver_intr(struct kvm_vcpu *vcpu)
+{
+ unsigned int priority;
+ unsigned long *pending = &vcpu->arch.irq_pending;
+ unsigned long *pending_clr = &vcpu->arch.irq_clear;
+
+ if (!(*pending) && !(*pending_clr))
+ return;
+
+ if (*pending_clr) {
+ priority = __ffs(*pending_clr);
+ while (priority <= INT_IPI) {
+ kvm_irq_clear(vcpu, priority);
+ priority = find_next_bit(pending_clr,
+ BITS_PER_BYTE * sizeof(*pending_clr),
+ priority + 1);
+ }
+ }
+
+ if (*pending) {
+ priority = __ffs(*pending);
+ while (priority <= INT_IPI) {
+ kvm_irq_deliver(vcpu, priority);
+ priority = find_next_bit(pending,
+ BITS_PER_BYTE * sizeof(*pending),
+ priority + 1);
+ }
+ }
+}
+
+int kvm_pending_timer(struct kvm_vcpu *vcpu)
+{
+ return test_bit(INT_TI, &vcpu->arch.irq_pending);
+}
+
+/*
+ * Only support illegal instruction or illegal Address Error exception,
+ * Other exceptions are injected by hardware in kvm mode
+ */
+static void _kvm_deliver_exception(struct kvm_vcpu *vcpu,
+ unsigned int code, unsigned int subcode)
+{
+ unsigned long val, vec_size;
+
+ /*
+ * BADV is added for EXCCODE_ADE exception
+ * Use PC register (GVA address) if it is instruction exeception
+ * Else use BADV from host side (GPA address) for data exeception
+ */
+ if (code == EXCCODE_ADE) {
+ if (subcode == EXSUBCODE_ADEF)
+ val = vcpu->arch.pc;
+ else
+ val = vcpu->arch.badv;
+ kvm_write_hw_gcsr(LOONGARCH_CSR_BADV, val);
+ }
+
+ /* Set exception instruction */
+ kvm_write_hw_gcsr(LOONGARCH_CSR_BADI, vcpu->arch.badi);
+
+ /*
+ * Save CRMD in PRMD
+ * Set IRQ disabled and PLV0 with CRMD
+ */
+ val = kvm_read_hw_gcsr(LOONGARCH_CSR_CRMD);
+ kvm_write_hw_gcsr(LOONGARCH_CSR_PRMD, val);
+ val = val & ~(CSR_CRMD_PLV | CSR_CRMD_IE);
+ kvm_write_hw_gcsr(LOONGARCH_CSR_CRMD, val);
+
+ /* Set exception PC address */
+ kvm_write_hw_gcsr(LOONGARCH_CSR_ERA, vcpu->arch.pc);
+
+ /*
+ * Set exception code
+ * Exception and interrupt can be inject at the same time
+ * Hardware will handle exception first and then extern interrupt
+ * Exception code is Ecode in ESTAT[16:21]
+ * Interrupt code in ESTAT[0:12]
+ */
+ val = kvm_read_hw_gcsr(LOONGARCH_CSR_ESTAT);
+ val = (val & ~CSR_ESTAT_EXC) | code;
+ kvm_write_hw_gcsr(LOONGARCH_CSR_ESTAT, val);
+
+ /* Calculate expcetion entry address */
+ val = kvm_read_hw_gcsr(LOONGARCH_CSR_ECFG);
+ vec_size = (val & CSR_ECFG_VS) >> CSR_ECFG_VS_SHIFT;
+ if (vec_size)
+ vec_size = (1 << vec_size) * 4;
+ val = kvm_read_hw_gcsr(LOONGARCH_CSR_EENTRY);
+ vcpu->arch.pc = val + code * vec_size;
+}
+
+void kvm_deliver_exception(struct kvm_vcpu *vcpu)
+{
+ unsigned int code;
+ unsigned long *pending = &vcpu->arch.exception_pending;
+
+ if (*pending) {
+ code = __ffs(*pending);
+ _kvm_deliver_exception(vcpu, code, vcpu->arch.esubcode);
+ *pending = 0;
+ vcpu->arch.esubcode = 0;
+ }
+}
diff --git a/arch/loongarch/kvm/irqfd.c b/arch/loongarch/kvm/irqfd.c
new file mode 100644
index 000000000000..9a39627aecf0
--- /dev/null
+++ b/arch/loongarch/kvm/irqfd.c
@@ -0,0 +1,89 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Copyright (C) 2024 Loongson Technology Corporation Limited
+ */
+
+#include <linux/kvm_host.h>
+#include <trace/events/kvm.h>
+#include <asm/kvm_pch_pic.h>
+
+static int kvm_set_pic_irq(struct kvm_kernel_irq_routing_entry *e,
+ struct kvm *kvm, int irq_source_id, int level, bool line_status)
+{
+ /* PCH-PIC pin (0 ~ 64) <---> GSI (0 ~ 64) */
+ pch_pic_set_irq(kvm->arch.pch_pic, e->irqchip.pin, level);
+
+ return 0;
+}
+
+/*
+ * kvm_set_msi: inject the MSI corresponding to the
+ * MSI routing entry
+ *
+ * This is the entry point for irqfd MSI injection
+ * and userspace MSI injection.
+ */
+int kvm_set_msi(struct kvm_kernel_irq_routing_entry *e,
+ struct kvm *kvm, int irq_source_id, int level, bool line_status)
+{
+ if (!level)
+ return -1;
+
+ pch_msi_set_irq(kvm, e->msi.data, level);
+
+ return 0;
+}
+
+/*
+ * kvm_set_routing_entry: populate a kvm routing entry
+ * from a user routing entry
+ *
+ * @kvm: the VM this entry is applied to
+ * @e: kvm kernel routing entry handle
+ * @ue: user api routing entry handle
+ * return 0 on success, -EINVAL on errors.
+ */
+int kvm_set_routing_entry(struct kvm *kvm,
+ struct kvm_kernel_irq_routing_entry *e,
+ const struct kvm_irq_routing_entry *ue)
+{
+ switch (ue->type) {
+ case KVM_IRQ_ROUTING_IRQCHIP:
+ e->set = kvm_set_pic_irq;
+ e->irqchip.irqchip = ue->u.irqchip.irqchip;
+ e->irqchip.pin = ue->u.irqchip.pin;
+
+ if (e->irqchip.pin >= KVM_IRQCHIP_NUM_PINS)
+ return -EINVAL;
+
+ return 0;
+ case KVM_IRQ_ROUTING_MSI:
+ e->set = kvm_set_msi;
+ e->msi.address_lo = ue->u.msi.address_lo;
+ e->msi.address_hi = ue->u.msi.address_hi;
+ e->msi.data = ue->u.msi.data;
+ return 0;
+ default:
+ return -EINVAL;
+ }
+}
+
+int kvm_arch_set_irq_inatomic(struct kvm_kernel_irq_routing_entry *e,
+ struct kvm *kvm, int irq_source_id, int level, bool line_status)
+{
+ switch (e->type) {
+ case KVM_IRQ_ROUTING_IRQCHIP:
+ pch_pic_set_irq(kvm->arch.pch_pic, e->irqchip.pin, level);
+ return 0;
+ case KVM_IRQ_ROUTING_MSI:
+ pch_msi_set_irq(kvm, e->msi.data, level);
+ return 0;
+ default:
+ return -EWOULDBLOCK;
+ }
+}
+
+bool kvm_arch_intc_initialized(struct kvm *kvm)
+{
+ return kvm_arch_irqchip_in_kernel(kvm);
+}
diff --git a/arch/loongarch/kvm/main.c b/arch/loongarch/kvm/main.c
new file mode 100644
index 000000000000..80ea63d465b8
--- /dev/null
+++ b/arch/loongarch/kvm/main.c
@@ -0,0 +1,463 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Copyright (C) 2020-2023 Loongson Technology Corporation Limited
+ */
+
+#include <linux/err.h>
+#include <linux/module.h>
+#include <linux/kvm_host.h>
+#include <asm/cacheflush.h>
+#include <asm/cpufeature.h>
+#include <asm/kvm_csr.h>
+#include <asm/kvm_eiointc.h>
+#include <asm/kvm_pch_pic.h>
+#include "trace.h"
+
+unsigned long vpid_mask;
+struct kvm_world_switch *kvm_loongarch_ops;
+static int gcsr_flag[CSR_MAX_NUMS];
+static struct kvm_context __percpu *vmcs;
+
+int get_gcsr_flag(int csr)
+{
+ if (csr < CSR_MAX_NUMS)
+ return gcsr_flag[csr];
+
+ return INVALID_GCSR;
+}
+
+static inline void set_gcsr_sw_flag(int csr)
+{
+ if (csr < CSR_MAX_NUMS)
+ gcsr_flag[csr] |= SW_GCSR;
+}
+
+static inline void set_gcsr_hw_flag(int csr)
+{
+ if (csr < CSR_MAX_NUMS)
+ gcsr_flag[csr] |= HW_GCSR;
+}
+
+/*
+ * The default value of gcsr_flag[CSR] is 0, and we use this
+ * function to set the flag to 1 (SW_GCSR) or 2 (HW_GCSR) if the
+ * gcsr is software or hardware. It will be used by get/set_gcsr,
+ * if gcsr_flag is HW we should use gcsrrd/gcsrwr to access it,
+ * else use software csr to emulate it.
+ */
+static void kvm_init_gcsr_flag(void)
+{
+ set_gcsr_hw_flag(LOONGARCH_CSR_CRMD);
+ set_gcsr_hw_flag(LOONGARCH_CSR_PRMD);
+ set_gcsr_hw_flag(LOONGARCH_CSR_EUEN);
+ set_gcsr_hw_flag(LOONGARCH_CSR_MISC);
+ set_gcsr_hw_flag(LOONGARCH_CSR_ECFG);
+ set_gcsr_hw_flag(LOONGARCH_CSR_ESTAT);
+ set_gcsr_hw_flag(LOONGARCH_CSR_ERA);
+ set_gcsr_hw_flag(LOONGARCH_CSR_BADV);
+ set_gcsr_hw_flag(LOONGARCH_CSR_BADI);
+ set_gcsr_hw_flag(LOONGARCH_CSR_EENTRY);
+ set_gcsr_hw_flag(LOONGARCH_CSR_TLBIDX);
+ set_gcsr_hw_flag(LOONGARCH_CSR_TLBEHI);
+ set_gcsr_hw_flag(LOONGARCH_CSR_TLBELO0);
+ set_gcsr_hw_flag(LOONGARCH_CSR_TLBELO1);
+ set_gcsr_hw_flag(LOONGARCH_CSR_ASID);
+ set_gcsr_hw_flag(LOONGARCH_CSR_PGDL);
+ set_gcsr_hw_flag(LOONGARCH_CSR_PGDH);
+ set_gcsr_hw_flag(LOONGARCH_CSR_PGD);
+ set_gcsr_hw_flag(LOONGARCH_CSR_PWCTL0);
+ set_gcsr_hw_flag(LOONGARCH_CSR_PWCTL1);
+ set_gcsr_hw_flag(LOONGARCH_CSR_STLBPGSIZE);
+ set_gcsr_hw_flag(LOONGARCH_CSR_RVACFG);
+ set_gcsr_hw_flag(LOONGARCH_CSR_CPUID);
+ set_gcsr_hw_flag(LOONGARCH_CSR_PRCFG1);
+ set_gcsr_hw_flag(LOONGARCH_CSR_PRCFG2);
+ set_gcsr_hw_flag(LOONGARCH_CSR_PRCFG3);
+ set_gcsr_hw_flag(LOONGARCH_CSR_KS0);
+ set_gcsr_hw_flag(LOONGARCH_CSR_KS1);
+ set_gcsr_hw_flag(LOONGARCH_CSR_KS2);
+ set_gcsr_hw_flag(LOONGARCH_CSR_KS3);
+ set_gcsr_hw_flag(LOONGARCH_CSR_KS4);
+ set_gcsr_hw_flag(LOONGARCH_CSR_KS5);
+ set_gcsr_hw_flag(LOONGARCH_CSR_KS6);
+ set_gcsr_hw_flag(LOONGARCH_CSR_KS7);
+ set_gcsr_hw_flag(LOONGARCH_CSR_TMID);
+ set_gcsr_hw_flag(LOONGARCH_CSR_TCFG);
+ set_gcsr_hw_flag(LOONGARCH_CSR_TVAL);
+ set_gcsr_hw_flag(LOONGARCH_CSR_TINTCLR);
+ set_gcsr_hw_flag(LOONGARCH_CSR_CNTC);
+ set_gcsr_hw_flag(LOONGARCH_CSR_LLBCTL);
+ set_gcsr_hw_flag(LOONGARCH_CSR_TLBRENTRY);
+ set_gcsr_hw_flag(LOONGARCH_CSR_TLBRBADV);
+ set_gcsr_hw_flag(LOONGARCH_CSR_TLBRERA);
+ set_gcsr_hw_flag(LOONGARCH_CSR_TLBRSAVE);
+ set_gcsr_hw_flag(LOONGARCH_CSR_TLBRELO0);
+ set_gcsr_hw_flag(LOONGARCH_CSR_TLBRELO1);
+ set_gcsr_hw_flag(LOONGARCH_CSR_TLBREHI);
+ set_gcsr_hw_flag(LOONGARCH_CSR_TLBRPRMD);
+ set_gcsr_hw_flag(LOONGARCH_CSR_DMWIN0);
+ set_gcsr_hw_flag(LOONGARCH_CSR_DMWIN1);
+ set_gcsr_hw_flag(LOONGARCH_CSR_DMWIN2);
+ set_gcsr_hw_flag(LOONGARCH_CSR_DMWIN3);
+
+ set_gcsr_sw_flag(LOONGARCH_CSR_IMPCTL1);
+ set_gcsr_sw_flag(LOONGARCH_CSR_IMPCTL2);
+ set_gcsr_sw_flag(LOONGARCH_CSR_MERRCTL);
+ set_gcsr_sw_flag(LOONGARCH_CSR_MERRINFO1);
+ set_gcsr_sw_flag(LOONGARCH_CSR_MERRINFO2);
+ set_gcsr_sw_flag(LOONGARCH_CSR_MERRENTRY);
+ set_gcsr_sw_flag(LOONGARCH_CSR_MERRERA);
+ set_gcsr_sw_flag(LOONGARCH_CSR_MERRSAVE);
+ set_gcsr_sw_flag(LOONGARCH_CSR_CTAG);
+ set_gcsr_sw_flag(LOONGARCH_CSR_DEBUG);
+ set_gcsr_sw_flag(LOONGARCH_CSR_DERA);
+ set_gcsr_sw_flag(LOONGARCH_CSR_DESAVE);
+
+ set_gcsr_sw_flag(LOONGARCH_CSR_FWPC);
+ set_gcsr_sw_flag(LOONGARCH_CSR_FWPS);
+ set_gcsr_sw_flag(LOONGARCH_CSR_MWPC);
+ set_gcsr_sw_flag(LOONGARCH_CSR_MWPS);
+
+ set_gcsr_sw_flag(LOONGARCH_CSR_DB0ADDR);
+ set_gcsr_sw_flag(LOONGARCH_CSR_DB0MASK);
+ set_gcsr_sw_flag(LOONGARCH_CSR_DB0CTRL);
+ set_gcsr_sw_flag(LOONGARCH_CSR_DB0ASID);
+ set_gcsr_sw_flag(LOONGARCH_CSR_DB1ADDR);
+ set_gcsr_sw_flag(LOONGARCH_CSR_DB1MASK);
+ set_gcsr_sw_flag(LOONGARCH_CSR_DB1CTRL);
+ set_gcsr_sw_flag(LOONGARCH_CSR_DB1ASID);
+ set_gcsr_sw_flag(LOONGARCH_CSR_DB2ADDR);
+ set_gcsr_sw_flag(LOONGARCH_CSR_DB2MASK);
+ set_gcsr_sw_flag(LOONGARCH_CSR_DB2CTRL);
+ set_gcsr_sw_flag(LOONGARCH_CSR_DB2ASID);
+ set_gcsr_sw_flag(LOONGARCH_CSR_DB3ADDR);
+ set_gcsr_sw_flag(LOONGARCH_CSR_DB3MASK);
+ set_gcsr_sw_flag(LOONGARCH_CSR_DB3CTRL);
+ set_gcsr_sw_flag(LOONGARCH_CSR_DB3ASID);
+ set_gcsr_sw_flag(LOONGARCH_CSR_DB4ADDR);
+ set_gcsr_sw_flag(LOONGARCH_CSR_DB4MASK);
+ set_gcsr_sw_flag(LOONGARCH_CSR_DB4CTRL);
+ set_gcsr_sw_flag(LOONGARCH_CSR_DB4ASID);
+ set_gcsr_sw_flag(LOONGARCH_CSR_DB5ADDR);
+ set_gcsr_sw_flag(LOONGARCH_CSR_DB5MASK);
+ set_gcsr_sw_flag(LOONGARCH_CSR_DB5CTRL);
+ set_gcsr_sw_flag(LOONGARCH_CSR_DB5ASID);
+ set_gcsr_sw_flag(LOONGARCH_CSR_DB6ADDR);
+ set_gcsr_sw_flag(LOONGARCH_CSR_DB6MASK);
+ set_gcsr_sw_flag(LOONGARCH_CSR_DB6CTRL);
+ set_gcsr_sw_flag(LOONGARCH_CSR_DB6ASID);
+ set_gcsr_sw_flag(LOONGARCH_CSR_DB7ADDR);
+ set_gcsr_sw_flag(LOONGARCH_CSR_DB7MASK);
+ set_gcsr_sw_flag(LOONGARCH_CSR_DB7CTRL);
+ set_gcsr_sw_flag(LOONGARCH_CSR_DB7ASID);
+
+ set_gcsr_sw_flag(LOONGARCH_CSR_IB0ADDR);
+ set_gcsr_sw_flag(LOONGARCH_CSR_IB0MASK);
+ set_gcsr_sw_flag(LOONGARCH_CSR_IB0CTRL);
+ set_gcsr_sw_flag(LOONGARCH_CSR_IB0ASID);
+ set_gcsr_sw_flag(LOONGARCH_CSR_IB1ADDR);
+ set_gcsr_sw_flag(LOONGARCH_CSR_IB1MASK);
+ set_gcsr_sw_flag(LOONGARCH_CSR_IB1CTRL);
+ set_gcsr_sw_flag(LOONGARCH_CSR_IB1ASID);
+ set_gcsr_sw_flag(LOONGARCH_CSR_IB2ADDR);
+ set_gcsr_sw_flag(LOONGARCH_CSR_IB2MASK);
+ set_gcsr_sw_flag(LOONGARCH_CSR_IB2CTRL);
+ set_gcsr_sw_flag(LOONGARCH_CSR_IB2ASID);
+ set_gcsr_sw_flag(LOONGARCH_CSR_IB3ADDR);
+ set_gcsr_sw_flag(LOONGARCH_CSR_IB3MASK);
+ set_gcsr_sw_flag(LOONGARCH_CSR_IB3CTRL);
+ set_gcsr_sw_flag(LOONGARCH_CSR_IB3ASID);
+ set_gcsr_sw_flag(LOONGARCH_CSR_IB4ADDR);
+ set_gcsr_sw_flag(LOONGARCH_CSR_IB4MASK);
+ set_gcsr_sw_flag(LOONGARCH_CSR_IB4CTRL);
+ set_gcsr_sw_flag(LOONGARCH_CSR_IB4ASID);
+ set_gcsr_sw_flag(LOONGARCH_CSR_IB5ADDR);
+ set_gcsr_sw_flag(LOONGARCH_CSR_IB5MASK);
+ set_gcsr_sw_flag(LOONGARCH_CSR_IB5CTRL);
+ set_gcsr_sw_flag(LOONGARCH_CSR_IB5ASID);
+ set_gcsr_sw_flag(LOONGARCH_CSR_IB6ADDR);
+ set_gcsr_sw_flag(LOONGARCH_CSR_IB6MASK);
+ set_gcsr_sw_flag(LOONGARCH_CSR_IB6CTRL);
+ set_gcsr_sw_flag(LOONGARCH_CSR_IB6ASID);
+ set_gcsr_sw_flag(LOONGARCH_CSR_IB7ADDR);
+ set_gcsr_sw_flag(LOONGARCH_CSR_IB7MASK);
+ set_gcsr_sw_flag(LOONGARCH_CSR_IB7CTRL);
+ set_gcsr_sw_flag(LOONGARCH_CSR_IB7ASID);
+
+ set_gcsr_sw_flag(LOONGARCH_CSR_PERFCTRL0);
+ set_gcsr_sw_flag(LOONGARCH_CSR_PERFCNTR0);
+ set_gcsr_sw_flag(LOONGARCH_CSR_PERFCTRL1);
+ set_gcsr_sw_flag(LOONGARCH_CSR_PERFCNTR1);
+ set_gcsr_sw_flag(LOONGARCH_CSR_PERFCTRL2);
+ set_gcsr_sw_flag(LOONGARCH_CSR_PERFCNTR2);
+ set_gcsr_sw_flag(LOONGARCH_CSR_PERFCTRL3);
+ set_gcsr_sw_flag(LOONGARCH_CSR_PERFCNTR3);
+}
+
+static void kvm_update_vpid(struct kvm_vcpu *vcpu, int cpu)
+{
+ unsigned long vpid;
+ struct kvm_context *context;
+
+ context = per_cpu_ptr(vcpu->kvm->arch.vmcs, cpu);
+ vpid = context->vpid_cache + 1;
+ if (!(vpid & vpid_mask)) {
+ /* finish round of vpid loop */
+ if (unlikely(!vpid))
+ vpid = vpid_mask + 1;
+
+ ++vpid; /* vpid 0 reserved for root */
+
+ /* start new vpid cycle */
+ kvm_flush_tlb_all();
+ }
+
+ context->vpid_cache = vpid;
+ vcpu->arch.vpid = vpid;
+}
+
+void kvm_check_vpid(struct kvm_vcpu *vcpu)
+{
+ int cpu;
+ bool migrated;
+ unsigned long ver, old, vpid;
+ struct kvm_context *context;
+
+ cpu = smp_processor_id();
+ /*
+ * Are we entering guest context on a different CPU to last time?
+ * If so, the vCPU's guest TLB state on this CPU may be stale.
+ */
+ context = per_cpu_ptr(vcpu->kvm->arch.vmcs, cpu);
+ migrated = (vcpu->cpu != cpu);
+
+ /*
+ * Check if our vpid is of an older version
+ *
+ * We also discard the stored vpid if we've executed on
+ * another CPU, as the guest mappings may have changed without
+ * hypervisor knowledge.
+ */
+ ver = vcpu->arch.vpid & ~vpid_mask;
+ old = context->vpid_cache & ~vpid_mask;
+ if (migrated || (ver != old)) {
+ kvm_update_vpid(vcpu, cpu);
+ trace_kvm_vpid_change(vcpu, vcpu->arch.vpid);
+ vcpu->cpu = cpu;
+ kvm_clear_request(KVM_REQ_TLB_FLUSH_GPA, vcpu);
+
+ /*
+ * LLBCTL is a separated guest CSR register from host, a general
+ * exception ERET instruction clears the host LLBCTL register in
+ * host mode, and clears the guest LLBCTL register in guest mode.
+ * ERET in tlb refill exception does not clear LLBCTL register.
+ *
+ * When secondary mmu mapping is changed, guest OS does not know
+ * even if the content is changed after mapping is changed.
+ *
+ * Here clear WCLLB of the guest LLBCTL register when mapping is
+ * changed. Otherwise, if mmu mapping is changed while guest is
+ * executing LL/SC pair, LL loads with the old address and set
+ * the LLBCTL flag, SC checks the LLBCTL flag and will store the
+ * new address successfully since LLBCTL_WCLLB is on, even if
+ * memory with new address is changed on other VCPUs.
+ */
+ set_gcsr_llbctl(CSR_LLBCTL_WCLLB);
+ }
+
+ /* Restore GSTAT(0x50).vpid */
+ vpid = (vcpu->arch.vpid & vpid_mask) << CSR_GSTAT_GID_SHIFT;
+ change_csr_gstat(vpid_mask << CSR_GSTAT_GID_SHIFT, vpid);
+}
+
+void kvm_init_vmcs(struct kvm *kvm)
+{
+ kvm->arch.vmcs = vmcs;
+}
+
+long kvm_arch_dev_ioctl(struct file *filp,
+ unsigned int ioctl, unsigned long arg)
+{
+ return -ENOIOCTLCMD;
+}
+
+int kvm_arch_enable_virtualization_cpu(void)
+{
+ unsigned long env, gcfg = 0;
+
+ env = read_csr_gcfg();
+
+ /* First init gcfg, gstat, gintc, gtlbc. All guest use the same config */
+ write_csr_gcfg(0);
+ write_csr_gstat(0);
+ write_csr_gintc(0);
+ clear_csr_gtlbc(CSR_GTLBC_USETGID | CSR_GTLBC_TOTI);
+
+ /*
+ * Enable virtualization features granting guest direct control of
+ * certain features:
+ * GCI=2: Trap on init or unimplemented cache instruction.
+ * TORU=0: Trap on Root Unimplement.
+ * CACTRL=1: Root control cache.
+ * TOP=0: Trap on Privilege.
+ * TOE=0: Trap on Exception.
+ * TIT=0: Trap on Timer.
+ */
+ if (env & CSR_GCFG_GCIP_SECURE)
+ gcfg |= CSR_GCFG_GCI_SECURE;
+ if (env & CSR_GCFG_MATP_ROOT)
+ gcfg |= CSR_GCFG_MATC_ROOT;
+
+ write_csr_gcfg(gcfg);
+
+ kvm_flush_tlb_all();
+
+ /* Enable using TGID */
+ set_csr_gtlbc(CSR_GTLBC_USETGID);
+ kvm_debug("GCFG:%lx GSTAT:%lx GINTC:%lx GTLBC:%lx",
+ read_csr_gcfg(), read_csr_gstat(), read_csr_gintc(), read_csr_gtlbc());
+
+ /*
+ * HW Guest CSR registers are lost after CPU suspend and resume.
+ * Clear last_vcpu so that Guest CSR registers forced to reload
+ * from vCPU SW state.
+ */
+ this_cpu_ptr(vmcs)->last_vcpu = NULL;
+
+ return 0;
+}
+
+void kvm_arch_disable_virtualization_cpu(void)
+{
+ write_csr_gcfg(0);
+ write_csr_gstat(0);
+ write_csr_gintc(0);
+ clear_csr_gtlbc(CSR_GTLBC_USETGID | CSR_GTLBC_TOTI);
+
+ /* Flush any remaining guest TLB entries */
+ kvm_flush_tlb_all();
+}
+
+static int kvm_loongarch_env_init(void)
+{
+ int cpu, order, ret;
+ void *addr;
+ struct kvm_context *context;
+
+ vmcs = alloc_percpu(struct kvm_context);
+ if (!vmcs) {
+ pr_err("kvm: failed to allocate percpu kvm_context\n");
+ return -ENOMEM;
+ }
+
+ kvm_loongarch_ops = kzalloc(sizeof(*kvm_loongarch_ops), GFP_KERNEL);
+ if (!kvm_loongarch_ops) {
+ free_percpu(vmcs);
+ vmcs = NULL;
+ return -ENOMEM;
+ }
+
+ /*
+ * PGD register is shared between root kernel and kvm hypervisor.
+ * So world switch entry should be in DMW area rather than TLB area
+ * to avoid page fault reenter.
+ *
+ * In future if hardware pagetable walking is supported, we won't
+ * need to copy world switch code to DMW area.
+ */
+ order = get_order(kvm_exception_size + kvm_enter_guest_size);
+ addr = (void *)__get_free_pages(GFP_KERNEL, order);
+ if (!addr) {
+ free_percpu(vmcs);
+ vmcs = NULL;
+ kfree(kvm_loongarch_ops);
+ kvm_loongarch_ops = NULL;
+ return -ENOMEM;
+ }
+
+ memcpy(addr, kvm_exc_entry, kvm_exception_size);
+ memcpy(addr + kvm_exception_size, kvm_enter_guest, kvm_enter_guest_size);
+ flush_icache_range((unsigned long)addr, (unsigned long)addr + kvm_exception_size + kvm_enter_guest_size);
+ kvm_loongarch_ops->exc_entry = addr;
+ kvm_loongarch_ops->enter_guest = addr + kvm_exception_size;
+ kvm_loongarch_ops->page_order = order;
+
+ vpid_mask = read_csr_gstat();
+ vpid_mask = (vpid_mask & CSR_GSTAT_GIDBIT) >> CSR_GSTAT_GIDBIT_SHIFT;
+ if (vpid_mask)
+ vpid_mask = GENMASK(vpid_mask - 1, 0);
+
+ for_each_possible_cpu(cpu) {
+ context = per_cpu_ptr(vmcs, cpu);
+ context->vpid_cache = vpid_mask + 1;
+ context->last_vcpu = NULL;
+ }
+
+ kvm_init_gcsr_flag();
+ kvm_register_perf_callbacks(NULL);
+
+ /* Register LoongArch IPI interrupt controller interface. */
+ ret = kvm_loongarch_register_ipi_device();
+ if (ret)
+ return ret;
+
+ /* Register LoongArch EIOINTC interrupt controller interface. */
+ ret = kvm_loongarch_register_eiointc_device();
+ if (ret)
+ return ret;
+
+ /* Register LoongArch PCH-PIC interrupt controller interface. */
+ ret = kvm_loongarch_register_pch_pic_device();
+
+ return ret;
+}
+
+static void kvm_loongarch_env_exit(void)
+{
+ unsigned long addr;
+
+ if (vmcs)
+ free_percpu(vmcs);
+
+ if (kvm_loongarch_ops) {
+ if (kvm_loongarch_ops->exc_entry) {
+ addr = (unsigned long)kvm_loongarch_ops->exc_entry;
+ free_pages(addr, kvm_loongarch_ops->page_order);
+ }
+ kfree(kvm_loongarch_ops);
+ }
+
+ kvm_unregister_perf_callbacks();
+}
+
+static int kvm_loongarch_init(void)
+{
+ int r;
+
+ if (!cpu_has_lvz) {
+ kvm_info("Hardware virtualization not available\n");
+ return -ENODEV;
+ }
+ r = kvm_loongarch_env_init();
+ if (r)
+ return r;
+
+ return kvm_init(sizeof(struct kvm_vcpu), 0, THIS_MODULE);
+}
+
+static void kvm_loongarch_exit(void)
+{
+ kvm_exit();
+ kvm_loongarch_env_exit();
+}
+
+module_init(kvm_loongarch_init);
+module_exit(kvm_loongarch_exit);
+
+#ifdef MODULE
+static const struct cpu_feature kvm_feature[] = {
+ { .feature = cpu_feature(LOONGARCH_LVZ) },
+ {},
+};
+MODULE_DEVICE_TABLE(cpu, kvm_feature);
+#endif
diff --git a/arch/loongarch/kvm/mmu.c b/arch/loongarch/kvm/mmu.c
new file mode 100644
index 000000000000..ed956c5cf2cc
--- /dev/null
+++ b/arch/loongarch/kvm/mmu.c
@@ -0,0 +1,947 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Copyright (C) 2020-2023 Loongson Technology Corporation Limited
+ */
+
+#include <linux/highmem.h>
+#include <linux/hugetlb.h>
+#include <linux/kvm_host.h>
+#include <linux/page-flags.h>
+#include <linux/uaccess.h>
+#include <asm/mmu_context.h>
+#include <asm/pgalloc.h>
+#include <asm/tlb.h>
+#include <asm/kvm_mmu.h>
+
+static inline bool kvm_hugepage_capable(struct kvm_memory_slot *slot)
+{
+ return slot->arch.flags & KVM_MEM_HUGEPAGE_CAPABLE;
+}
+
+static inline bool kvm_hugepage_incapable(struct kvm_memory_slot *slot)
+{
+ return slot->arch.flags & KVM_MEM_HUGEPAGE_INCAPABLE;
+}
+
+static inline void kvm_ptw_prepare(struct kvm *kvm, kvm_ptw_ctx *ctx)
+{
+ ctx->level = kvm->arch.root_level;
+ /* pte table */
+ ctx->invalid_ptes = kvm->arch.invalid_ptes;
+ ctx->pte_shifts = kvm->arch.pte_shifts;
+ ctx->pgtable_shift = ctx->pte_shifts[ctx->level];
+ ctx->invalid_entry = ctx->invalid_ptes[ctx->level];
+ ctx->opaque = kvm;
+}
+
+/*
+ * Mark a range of guest physical address space old (all accesses fault) in the
+ * VM's GPA page table to allow detection of commonly used pages.
+ */
+static int kvm_mkold_pte(kvm_pte_t *pte, phys_addr_t addr, kvm_ptw_ctx *ctx)
+{
+ if (kvm_pte_young(*pte)) {
+ *pte = kvm_pte_mkold(*pte);
+ return 1;
+ }
+
+ return 0;
+}
+
+/*
+ * Mark a range of guest physical address space clean (writes fault) in the VM's
+ * GPA page table to allow dirty page tracking.
+ */
+static int kvm_mkclean_pte(kvm_pte_t *pte, phys_addr_t addr, kvm_ptw_ctx *ctx)
+{
+ gfn_t offset;
+ kvm_pte_t val;
+
+ val = *pte;
+ /*
+ * For kvm_arch_mmu_enable_log_dirty_pt_masked with mask, start and end
+ * may cross hugepage, for first huge page parameter addr is equal to
+ * start, however for the second huge page addr is base address of
+ * this huge page, rather than start or end address
+ */
+ if ((ctx->flag & _KVM_HAS_PGMASK) && !kvm_pte_huge(val)) {
+ offset = (addr >> PAGE_SHIFT) - ctx->gfn;
+ if (!(BIT(offset) & ctx->mask))
+ return 0;
+ }
+
+ /*
+ * Need not split huge page now, just set write-proect pte bit
+ * Split huge page until next write fault
+ */
+ if (kvm_pte_dirty(val)) {
+ *pte = kvm_pte_mkclean(val);
+ return 1;
+ }
+
+ return 0;
+}
+
+/*
+ * Clear pte entry
+ */
+static int kvm_flush_pte(kvm_pte_t *pte, phys_addr_t addr, kvm_ptw_ctx *ctx)
+{
+ struct kvm *kvm;
+
+ kvm = ctx->opaque;
+ if (ctx->level)
+ kvm->stat.hugepages--;
+ else
+ kvm->stat.pages--;
+
+ *pte = ctx->invalid_entry;
+
+ return 1;
+}
+
+/*
+ * kvm_pgd_alloc() - Allocate and initialise a KVM GPA page directory.
+ *
+ * Allocate a blank KVM GPA page directory (PGD) for representing guest physical
+ * to host physical page mappings.
+ *
+ * Returns: Pointer to new KVM GPA page directory.
+ * NULL on allocation failure.
+ */
+kvm_pte_t *kvm_pgd_alloc(void)
+{
+ kvm_pte_t *pgd;
+
+ pgd = (kvm_pte_t *)__get_free_pages(GFP_KERNEL, 0);
+ if (pgd)
+ pgd_init((void *)pgd);
+
+ return pgd;
+}
+
+static void _kvm_pte_init(void *addr, unsigned long val)
+{
+ unsigned long *p, *end;
+
+ p = (unsigned long *)addr;
+ end = p + PTRS_PER_PTE;
+ do {
+ p[0] = val;
+ p[1] = val;
+ p[2] = val;
+ p[3] = val;
+ p[4] = val;
+ p += 8;
+ p[-3] = val;
+ p[-2] = val;
+ p[-1] = val;
+ } while (p != end);
+}
+
+/*
+ * Caller must hold kvm->mm_lock
+ *
+ * Walk the page tables of kvm to find the PTE corresponding to the
+ * address @addr. If page tables don't exist for @addr, they will be created
+ * from the MMU cache if @cache is not NULL.
+ */
+static kvm_pte_t *kvm_populate_gpa(struct kvm *kvm,
+ struct kvm_mmu_memory_cache *cache,
+ unsigned long addr, int level)
+{
+ kvm_ptw_ctx ctx;
+ kvm_pte_t *entry, *child;
+
+ kvm_ptw_prepare(kvm, &ctx);
+ child = kvm->arch.pgd;
+ while (ctx.level > level) {
+ entry = kvm_pgtable_offset(&ctx, child, addr);
+ if (kvm_pte_none(&ctx, entry)) {
+ if (!cache)
+ return NULL;
+
+ child = kvm_mmu_memory_cache_alloc(cache);
+ _kvm_pte_init(child, ctx.invalid_ptes[ctx.level - 1]);
+ smp_wmb(); /* Make pte visible before pmd */
+ kvm_set_pte(entry, __pa(child));
+ } else if (kvm_pte_huge(*entry)) {
+ return entry;
+ } else
+ child = (kvm_pte_t *)__va(PHYSADDR(*entry));
+ kvm_ptw_enter(&ctx);
+ }
+
+ entry = kvm_pgtable_offset(&ctx, child, addr);
+
+ return entry;
+}
+
+/*
+ * Page walker for VM shadow mmu at last level
+ * The last level is small pte page or huge pmd page
+ */
+static int kvm_ptw_leaf(kvm_pte_t *dir, phys_addr_t addr, phys_addr_t end, kvm_ptw_ctx *ctx)
+{
+ int ret;
+ phys_addr_t next, start, size;
+ struct list_head *list;
+ kvm_pte_t *entry, *child;
+
+ ret = 0;
+ start = addr;
+ child = (kvm_pte_t *)__va(PHYSADDR(*dir));
+ entry = kvm_pgtable_offset(ctx, child, addr);
+ do {
+ next = addr + (0x1UL << ctx->pgtable_shift);
+ if (!kvm_pte_present(ctx, entry))
+ continue;
+
+ ret |= ctx->ops(entry, addr, ctx);
+ } while (entry++, addr = next, addr < end);
+
+ if (kvm_need_flush(ctx)) {
+ size = 0x1UL << (ctx->pgtable_shift + PAGE_SHIFT - 3);
+ if (start + size == end) {
+ list = (struct list_head *)child;
+ list_add_tail(list, &ctx->list);
+ *dir = ctx->invalid_ptes[ctx->level + 1];
+ }
+ }
+
+ return ret;
+}
+
+/*
+ * Page walker for VM shadow mmu at page table dir level
+ */
+static int kvm_ptw_dir(kvm_pte_t *dir, phys_addr_t addr, phys_addr_t end, kvm_ptw_ctx *ctx)
+{
+ int ret;
+ phys_addr_t next, start, size;
+ struct list_head *list;
+ kvm_pte_t *entry, *child;
+
+ ret = 0;
+ start = addr;
+ child = (kvm_pte_t *)__va(PHYSADDR(*dir));
+ entry = kvm_pgtable_offset(ctx, child, addr);
+ do {
+ next = kvm_pgtable_addr_end(ctx, addr, end);
+ if (!kvm_pte_present(ctx, entry))
+ continue;
+
+ if (kvm_pte_huge(*entry)) {
+ ret |= ctx->ops(entry, addr, ctx);
+ continue;
+ }
+
+ kvm_ptw_enter(ctx);
+ if (ctx->level == 0)
+ ret |= kvm_ptw_leaf(entry, addr, next, ctx);
+ else
+ ret |= kvm_ptw_dir(entry, addr, next, ctx);
+ kvm_ptw_exit(ctx);
+ } while (entry++, addr = next, addr < end);
+
+ if (kvm_need_flush(ctx)) {
+ size = 0x1UL << (ctx->pgtable_shift + PAGE_SHIFT - 3);
+ if (start + size == end) {
+ list = (struct list_head *)child;
+ list_add_tail(list, &ctx->list);
+ *dir = ctx->invalid_ptes[ctx->level + 1];
+ }
+ }
+
+ return ret;
+}
+
+/*
+ * Page walker for VM shadow mmu at page root table
+ */
+static int kvm_ptw_top(kvm_pte_t *dir, phys_addr_t addr, phys_addr_t end, kvm_ptw_ctx *ctx)
+{
+ int ret;
+ phys_addr_t next;
+ kvm_pte_t *entry;
+
+ ret = 0;
+ entry = kvm_pgtable_offset(ctx, dir, addr);
+ do {
+ next = kvm_pgtable_addr_end(ctx, addr, end);
+ if (!kvm_pte_present(ctx, entry))
+ continue;
+
+ kvm_ptw_enter(ctx);
+ ret |= kvm_ptw_dir(entry, addr, next, ctx);
+ kvm_ptw_exit(ctx);
+ } while (entry++, addr = next, addr < end);
+
+ return ret;
+}
+
+/*
+ * kvm_flush_range() - Flush a range of guest physical addresses.
+ * @kvm: KVM pointer.
+ * @start_gfn: Guest frame number of first page in GPA range to flush.
+ * @end_gfn: Guest frame number of last page in GPA range to flush.
+ * @lock: Whether to hold mmu_lock or not
+ *
+ * Flushes a range of GPA mappings from the GPA page tables.
+ */
+static void kvm_flush_range(struct kvm *kvm, gfn_t start_gfn, gfn_t end_gfn, int lock)
+{
+ int ret;
+ kvm_ptw_ctx ctx;
+ struct list_head *pos, *temp;
+
+ ctx.ops = kvm_flush_pte;
+ ctx.flag = _KVM_FLUSH_PGTABLE;
+ kvm_ptw_prepare(kvm, &ctx);
+ INIT_LIST_HEAD(&ctx.list);
+
+ if (lock) {
+ spin_lock(&kvm->mmu_lock);
+ ret = kvm_ptw_top(kvm->arch.pgd, start_gfn << PAGE_SHIFT,
+ end_gfn << PAGE_SHIFT, &ctx);
+ spin_unlock(&kvm->mmu_lock);
+ } else
+ ret = kvm_ptw_top(kvm->arch.pgd, start_gfn << PAGE_SHIFT,
+ end_gfn << PAGE_SHIFT, &ctx);
+
+ /* Flush vpid for each vCPU individually */
+ if (ret)
+ kvm_flush_remote_tlbs(kvm);
+
+ /*
+ * free pte table page after mmu_lock
+ * the pte table page is linked together with ctx.list
+ */
+ list_for_each_safe(pos, temp, &ctx.list) {
+ list_del(pos);
+ free_page((unsigned long)pos);
+ }
+}
+
+/*
+ * kvm_mkclean_gpa_pt() - Make a range of guest physical addresses clean.
+ * @kvm: KVM pointer.
+ * @start_gfn: Guest frame number of first page in GPA range to flush.
+ * @end_gfn: Guest frame number of last page in GPA range to flush.
+ *
+ * Make a range of GPA mappings clean so that guest writes will fault and
+ * trigger dirty page logging.
+ *
+ * The caller must hold the @kvm->mmu_lock spinlock.
+ *
+ * Returns: Whether any GPA mappings were modified, which would require
+ * derived mappings (GVA page tables & TLB enties) to be
+ * invalidated.
+ */
+static int kvm_mkclean_gpa_pt(struct kvm *kvm, gfn_t start_gfn, gfn_t end_gfn)
+{
+ kvm_ptw_ctx ctx;
+
+ ctx.ops = kvm_mkclean_pte;
+ ctx.flag = 0;
+ kvm_ptw_prepare(kvm, &ctx);
+ return kvm_ptw_top(kvm->arch.pgd, start_gfn << PAGE_SHIFT, end_gfn << PAGE_SHIFT, &ctx);
+}
+
+/*
+ * kvm_arch_mmu_enable_log_dirty_pt_masked() - write protect dirty pages
+ * @kvm: The KVM pointer
+ * @slot: The memory slot associated with mask
+ * @gfn_offset: The gfn offset in memory slot
+ * @mask: The mask of dirty pages at offset 'gfn_offset' in this memory
+ * slot to be write protected
+ *
+ * Walks bits set in mask write protects the associated pte's. Caller must
+ * acquire @kvm->mmu_lock.
+ */
+void kvm_arch_mmu_enable_log_dirty_pt_masked(struct kvm *kvm,
+ struct kvm_memory_slot *slot, gfn_t gfn_offset, unsigned long mask)
+{
+ kvm_ptw_ctx ctx;
+ gfn_t base_gfn = slot->base_gfn + gfn_offset;
+ gfn_t start = base_gfn + __ffs(mask);
+ gfn_t end = base_gfn + __fls(mask) + 1;
+
+ ctx.ops = kvm_mkclean_pte;
+ ctx.flag = _KVM_HAS_PGMASK;
+ ctx.mask = mask;
+ ctx.gfn = base_gfn;
+ kvm_ptw_prepare(kvm, &ctx);
+
+ kvm_ptw_top(kvm->arch.pgd, start << PAGE_SHIFT, end << PAGE_SHIFT, &ctx);
+}
+
+int kvm_arch_prepare_memory_region(struct kvm *kvm, const struct kvm_memory_slot *old,
+ struct kvm_memory_slot *new, enum kvm_mr_change change)
+{
+ gpa_t gpa_start;
+ hva_t hva_start;
+ size_t size, gpa_offset, hva_offset;
+
+ if ((change != KVM_MR_MOVE) && (change != KVM_MR_CREATE))
+ return 0;
+ /*
+ * Prevent userspace from creating a memory region outside of the
+ * VM GPA address space
+ */
+ if ((new->base_gfn + new->npages) > (kvm->arch.gpa_size >> PAGE_SHIFT))
+ return -ENOMEM;
+
+ new->arch.flags = 0;
+ size = new->npages * PAGE_SIZE;
+ gpa_start = new->base_gfn << PAGE_SHIFT;
+ hva_start = new->userspace_addr;
+ if (IS_ALIGNED(size, PMD_SIZE) && IS_ALIGNED(gpa_start, PMD_SIZE)
+ && IS_ALIGNED(hva_start, PMD_SIZE))
+ new->arch.flags |= KVM_MEM_HUGEPAGE_CAPABLE;
+ else {
+ /*
+ * Pages belonging to memslots that don't have the same
+ * alignment within a PMD for userspace and GPA cannot be
+ * mapped with PMD entries, because we'll end up mapping
+ * the wrong pages.
+ *
+ * Consider a layout like the following:
+ *
+ * memslot->userspace_addr:
+ * +-----+--------------------+--------------------+---+
+ * |abcde|fgh Stage-1 block | Stage-1 block tv|xyz|
+ * +-----+--------------------+--------------------+---+
+ *
+ * memslot->base_gfn << PAGE_SIZE:
+ * +---+--------------------+--------------------+-----+
+ * |abc|def Stage-2 block | Stage-2 block |tvxyz|
+ * +---+--------------------+--------------------+-----+
+ *
+ * If we create those stage-2 blocks, we'll end up with this
+ * incorrect mapping:
+ * d -> f
+ * e -> g
+ * f -> h
+ */
+ gpa_offset = gpa_start & (PMD_SIZE - 1);
+ hva_offset = hva_start & (PMD_SIZE - 1);
+ if (gpa_offset != hva_offset) {
+ new->arch.flags |= KVM_MEM_HUGEPAGE_INCAPABLE;
+ } else {
+ if (gpa_offset == 0)
+ gpa_offset = PMD_SIZE;
+ if ((size + gpa_offset) < (PMD_SIZE * 2))
+ new->arch.flags |= KVM_MEM_HUGEPAGE_INCAPABLE;
+ }
+ }
+
+ return 0;
+}
+
+void kvm_arch_commit_memory_region(struct kvm *kvm,
+ struct kvm_memory_slot *old,
+ const struct kvm_memory_slot *new,
+ enum kvm_mr_change change)
+{
+ int needs_flush;
+ u32 old_flags = old ? old->flags : 0;
+ u32 new_flags = new ? new->flags : 0;
+ bool log_dirty_pages = new_flags & KVM_MEM_LOG_DIRTY_PAGES;
+
+ /* Only track memslot flags changed */
+ if (change != KVM_MR_FLAGS_ONLY)
+ return;
+
+ /* Discard dirty page tracking on readonly memslot */
+ if ((old_flags & new_flags) & KVM_MEM_READONLY)
+ return;
+
+ /*
+ * If dirty page logging is enabled, write protect all pages in the slot
+ * ready for dirty logging.
+ *
+ * There is no need to do this in any of the following cases:
+ * CREATE: No dirty mappings will already exist.
+ * MOVE/DELETE: The old mappings will already have been cleaned up by
+ * kvm_arch_flush_shadow_memslot()
+ */
+ if (!(old_flags & KVM_MEM_LOG_DIRTY_PAGES) && log_dirty_pages) {
+ /*
+ * Initially-all-set does not require write protecting any page
+ * because they're all assumed to be dirty.
+ */
+ if (kvm_dirty_log_manual_protect_and_init_set(kvm))
+ return;
+
+ spin_lock(&kvm->mmu_lock);
+ /* Write protect GPA page table entries */
+ needs_flush = kvm_mkclean_gpa_pt(kvm, new->base_gfn,
+ new->base_gfn + new->npages);
+ spin_unlock(&kvm->mmu_lock);
+ if (needs_flush)
+ kvm_flush_remote_tlbs(kvm);
+ }
+}
+
+void kvm_arch_flush_shadow_all(struct kvm *kvm)
+{
+ kvm_flush_range(kvm, 0, kvm->arch.gpa_size >> PAGE_SHIFT, 0);
+}
+
+void kvm_arch_flush_shadow_memslot(struct kvm *kvm, struct kvm_memory_slot *slot)
+{
+ /*
+ * The slot has been made invalid (ready for moving or deletion), so we
+ * need to ensure that it can no longer be accessed by any guest vCPUs.
+ */
+ kvm_flush_range(kvm, slot->base_gfn, slot->base_gfn + slot->npages, 1);
+}
+
+bool kvm_unmap_gfn_range(struct kvm *kvm, struct kvm_gfn_range *range)
+{
+ kvm_ptw_ctx ctx;
+
+ ctx.flag = 0;
+ ctx.ops = kvm_flush_pte;
+ kvm_ptw_prepare(kvm, &ctx);
+ INIT_LIST_HEAD(&ctx.list);
+
+ return kvm_ptw_top(kvm->arch.pgd, range->start << PAGE_SHIFT,
+ range->end << PAGE_SHIFT, &ctx);
+}
+
+bool kvm_age_gfn(struct kvm *kvm, struct kvm_gfn_range *range)
+{
+ kvm_ptw_ctx ctx;
+
+ ctx.flag = 0;
+ ctx.ops = kvm_mkold_pte;
+ kvm_ptw_prepare(kvm, &ctx);
+
+ return kvm_ptw_top(kvm->arch.pgd, range->start << PAGE_SHIFT,
+ range->end << PAGE_SHIFT, &ctx);
+}
+
+bool kvm_test_age_gfn(struct kvm *kvm, struct kvm_gfn_range *range)
+{
+ gpa_t gpa = range->start << PAGE_SHIFT;
+ kvm_pte_t *ptep = kvm_populate_gpa(kvm, NULL, gpa, 0);
+
+ if (ptep && kvm_pte_present(NULL, ptep) && kvm_pte_young(*ptep))
+ return true;
+
+ return false;
+}
+
+/*
+ * kvm_map_page_fast() - Fast path GPA fault handler.
+ * @vcpu: vCPU pointer.
+ * @gpa: Guest physical address of fault.
+ * @write: Whether the fault was due to a write.
+ *
+ * Perform fast path GPA fault handling, doing all that can be done without
+ * calling into KVM. This handles marking old pages young (for idle page
+ * tracking), and dirtying of clean pages (for dirty page logging).
+ *
+ * Returns: 0 on success, in which case we can update derived mappings and
+ * resume guest execution.
+ * -EFAULT on failure due to absent GPA mapping or write to
+ * read-only page, in which case KVM must be consulted.
+ */
+static int kvm_map_page_fast(struct kvm_vcpu *vcpu, unsigned long gpa, bool write)
+{
+ int ret = 0;
+ kvm_pte_t *ptep, changed, new;
+ gfn_t gfn = gpa >> PAGE_SHIFT;
+ struct kvm *kvm = vcpu->kvm;
+ struct kvm_memory_slot *slot;
+
+ spin_lock(&kvm->mmu_lock);
+
+ /* Fast path - just check GPA page table for an existing entry */
+ ptep = kvm_populate_gpa(kvm, NULL, gpa, 0);
+ if (!ptep || !kvm_pte_present(NULL, ptep)) {
+ ret = -EFAULT;
+ goto out;
+ }
+
+ /* Track access to pages marked old */
+ new = kvm_pte_mkyoung(*ptep);
+ if (write && !kvm_pte_dirty(new)) {
+ if (!kvm_pte_write(new)) {
+ ret = -EFAULT;
+ goto out;
+ }
+
+ if (kvm_pte_huge(new)) {
+ /*
+ * Do not set write permission when dirty logging is
+ * enabled for HugePages
+ */
+ slot = gfn_to_memslot(kvm, gfn);
+ if (kvm_slot_dirty_track_enabled(slot)) {
+ ret = -EFAULT;
+ goto out;
+ }
+ }
+
+ /* Track dirtying of writeable pages */
+ new = kvm_pte_mkdirty(new);
+ }
+
+ changed = new ^ (*ptep);
+ if (changed)
+ kvm_set_pte(ptep, new);
+
+ spin_unlock(&kvm->mmu_lock);
+
+ if (kvm_pte_dirty(changed))
+ mark_page_dirty(kvm, gfn);
+
+ return ret;
+out:
+ spin_unlock(&kvm->mmu_lock);
+ return ret;
+}
+
+static bool fault_supports_huge_mapping(struct kvm_memory_slot *memslot,
+ unsigned long hva, bool write)
+{
+ hva_t start, end;
+
+ /* Disable dirty logging on HugePages */
+ if (kvm_slot_dirty_track_enabled(memslot) && write)
+ return false;
+
+ if (kvm_hugepage_capable(memslot))
+ return true;
+
+ if (kvm_hugepage_incapable(memslot))
+ return false;
+
+ start = memslot->userspace_addr;
+ end = start + memslot->npages * PAGE_SIZE;
+
+ /*
+ * Next, let's make sure we're not trying to map anything not covered
+ * by the memslot. This means we have to prohibit block size mappings
+ * for the beginning and end of a non-block aligned and non-block sized
+ * memory slot (illustrated by the head and tail parts of the
+ * userspace view above containing pages 'abcde' and 'xyz',
+ * respectively).
+ *
+ * Note that it doesn't matter if we do the check using the
+ * userspace_addr or the base_gfn, as both are equally aligned (per
+ * the check above) and equally sized.
+ */
+ return (hva >= ALIGN(start, PMD_SIZE)) && (hva < ALIGN_DOWN(end, PMD_SIZE));
+}
+
+/*
+ * Lookup the mapping level for @gfn in the current mm.
+ *
+ * WARNING! Use of host_pfn_mapping_level() requires the caller and the end
+ * consumer to be tied into KVM's handlers for MMU notifier events!
+ *
+ * There are several ways to safely use this helper:
+ *
+ * - Check mmu_invalidate_retry_gfn() after grabbing the mapping level, before
+ * consuming it. In this case, mmu_lock doesn't need to be held during the
+ * lookup, but it does need to be held while checking the MMU notifier.
+ *
+ * - Hold mmu_lock AND ensure there is no in-progress MMU notifier invalidation
+ * event for the hva. This can be done by explicit checking the MMU notifier
+ * or by ensuring that KVM already has a valid mapping that covers the hva.
+ *
+ * - Do not use the result to install new mappings, e.g. use the host mapping
+ * level only to decide whether or not to zap an entry. In this case, it's
+ * not required to hold mmu_lock (though it's highly likely the caller will
+ * want to hold mmu_lock anyways, e.g. to modify SPTEs).
+ *
+ * Note! The lookup can still race with modifications to host page tables, but
+ * the above "rules" ensure KVM will not _consume_ the result of the walk if a
+ * race with the primary MMU occurs.
+ */
+static int host_pfn_mapping_level(struct kvm *kvm, gfn_t gfn,
+ const struct kvm_memory_slot *slot)
+{
+ int level = 0;
+ unsigned long hva;
+ unsigned long flags;
+ pgd_t pgd;
+ p4d_t p4d;
+ pud_t pud;
+ pmd_t pmd;
+
+ /*
+ * Note, using the already-retrieved memslot and __gfn_to_hva_memslot()
+ * is not solely for performance, it's also necessary to avoid the
+ * "writable" check in __gfn_to_hva_many(), which will always fail on
+ * read-only memslots due to gfn_to_hva() assuming writes. Earlier
+ * page fault steps have already verified the guest isn't writing a
+ * read-only memslot.
+ */
+ hva = __gfn_to_hva_memslot(slot, gfn);
+
+ /*
+ * Disable IRQs to prevent concurrent tear down of host page tables,
+ * e.g. if the primary MMU promotes a P*D to a huge page and then frees
+ * the original page table.
+ */
+ local_irq_save(flags);
+
+ /*
+ * Read each entry once. As above, a non-leaf entry can be promoted to
+ * a huge page _during_ this walk. Re-reading the entry could send the
+ * walk into the weeks, e.g. p*d_leaf() returns false (sees the old
+ * value) and then p*d_offset() walks into the target huge page instead
+ * of the old page table (sees the new value).
+ */
+ pgd = pgdp_get(pgd_offset(kvm->mm, hva));
+ if (pgd_none(pgd))
+ goto out;
+
+ p4d = p4dp_get(p4d_offset(&pgd, hva));
+ if (p4d_none(p4d) || !p4d_present(p4d))
+ goto out;
+
+ pud = pudp_get(pud_offset(&p4d, hva));
+ if (pud_none(pud) || !pud_present(pud))
+ goto out;
+
+ pmd = pmdp_get(pmd_offset(&pud, hva));
+ if (pmd_none(pmd) || !pmd_present(pmd))
+ goto out;
+
+ if (kvm_pte_huge(pmd_val(pmd)))
+ level = 1;
+
+out:
+ local_irq_restore(flags);
+ return level;
+}
+
+/*
+ * Split huge page
+ */
+static kvm_pte_t *kvm_split_huge(struct kvm_vcpu *vcpu, kvm_pte_t *ptep, gfn_t gfn)
+{
+ int i;
+ kvm_pte_t val, *child;
+ struct kvm *kvm = vcpu->kvm;
+ struct kvm_mmu_memory_cache *memcache;
+
+ memcache = &vcpu->arch.mmu_page_cache;
+ child = kvm_mmu_memory_cache_alloc(memcache);
+ val = kvm_pte_mksmall(*ptep);
+ for (i = 0; i < PTRS_PER_PTE; i++) {
+ kvm_set_pte(child + i, val);
+ val += PAGE_SIZE;
+ }
+
+ smp_wmb(); /* Make pte visible before pmd */
+ /* The later kvm_flush_tlb_gpa() will flush hugepage tlb */
+ kvm_set_pte(ptep, __pa(child));
+
+ kvm->stat.hugepages--;
+ kvm->stat.pages += PTRS_PER_PTE;
+
+ return child + (gfn & (PTRS_PER_PTE - 1));
+}
+
+/*
+ * kvm_map_page() - Map a guest physical page.
+ * @vcpu: vCPU pointer.
+ * @gpa: Guest physical address of fault.
+ * @write: Whether the fault was due to a write.
+ *
+ * Handle GPA faults by creating a new GPA mapping (or updating an existing
+ * one).
+ *
+ * This takes care of marking pages young or dirty (idle/dirty page tracking),
+ * asking KVM for the corresponding PFN, and creating a mapping in the GPA page
+ * tables. Derived mappings (GVA page tables and TLBs) must be handled by the
+ * caller.
+ *
+ * Returns: 0 on success
+ * -EFAULT if there is no memory region at @gpa or a write was
+ * attempted to a read-only memory region. This is usually handled
+ * as an MMIO access.
+ */
+static int kvm_map_page(struct kvm_vcpu *vcpu, unsigned long gpa, bool write)
+{
+ bool writeable;
+ int srcu_idx, err, retry_no = 0, level;
+ unsigned long hva, mmu_seq, prot_bits;
+ kvm_pfn_t pfn;
+ kvm_pte_t *ptep, new_pte;
+ gfn_t gfn = gpa >> PAGE_SHIFT;
+ struct kvm *kvm = vcpu->kvm;
+ struct kvm_memory_slot *memslot;
+ struct kvm_mmu_memory_cache *memcache = &vcpu->arch.mmu_page_cache;
+ struct page *page;
+
+ /* Try the fast path to handle old / clean pages */
+ srcu_idx = srcu_read_lock(&kvm->srcu);
+ err = kvm_map_page_fast(vcpu, gpa, write);
+ if (!err)
+ goto out;
+
+ memslot = gfn_to_memslot(kvm, gfn);
+ hva = gfn_to_hva_memslot_prot(memslot, gfn, &writeable);
+ if (kvm_is_error_hva(hva) || (write && !writeable)) {
+ err = -EFAULT;
+ goto out;
+ }
+
+ /* We need a minimum of cached pages ready for page table creation */
+ err = kvm_mmu_topup_memory_cache(memcache, KVM_MMU_CACHE_MIN_PAGES);
+ if (err)
+ goto out;
+
+retry:
+ /*
+ * Used to check for invalidations in progress, of the pfn that is
+ * returned by pfn_to_pfn_prot below.
+ */
+ mmu_seq = kvm->mmu_invalidate_seq;
+ /*
+ * Ensure the read of mmu_invalidate_seq isn't reordered with PTE reads in
+ * kvm_faultin_pfn() (which calls get_user_pages()), so that we don't
+ * risk the page we get a reference to getting unmapped before we have a
+ * chance to grab the mmu_lock without mmu_invalidate_retry() noticing.
+ *
+ * This smp_rmb() pairs with the effective smp_wmb() of the combination
+ * of the pte_unmap_unlock() after the PTE is zapped, and the
+ * spin_lock() in kvm_mmu_invalidate_invalidate_<page|range_end>() before
+ * mmu_invalidate_seq is incremented.
+ */
+ smp_rmb();
+
+ /* Slow path - ask KVM core whether we can access this GPA */
+ pfn = kvm_faultin_pfn(vcpu, gfn, write, &writeable, &page);
+ if (is_error_noslot_pfn(pfn)) {
+ err = -EFAULT;
+ goto out;
+ }
+
+ /* Check if an invalidation has taken place since we got pfn */
+ spin_lock(&kvm->mmu_lock);
+ if (mmu_invalidate_retry_gfn(kvm, mmu_seq, gfn)) {
+ /*
+ * This can happen when mappings are changed asynchronously, but
+ * also synchronously if a COW is triggered by
+ * kvm_faultin_pfn().
+ */
+ spin_unlock(&kvm->mmu_lock);
+ kvm_release_page_unused(page);
+ if (retry_no > 100) {
+ retry_no = 0;
+ schedule();
+ }
+ retry_no++;
+ goto retry;
+ }
+
+ /*
+ * For emulated devices such virtio device, actual cache attribute is
+ * determined by physical machine.
+ * For pass through physical device, it should be uncachable
+ */
+ prot_bits = _PAGE_PRESENT | __READABLE;
+ if (pfn_valid(pfn))
+ prot_bits |= _CACHE_CC;
+ else
+ prot_bits |= _CACHE_SUC;
+
+ if (writeable) {
+ prot_bits |= _PAGE_WRITE;
+ if (write)
+ prot_bits |= __WRITEABLE;
+ }
+
+ /* Disable dirty logging on HugePages */
+ level = 0;
+ if (fault_supports_huge_mapping(memslot, hva, write)) {
+ /* Check page level about host mmu*/
+ level = host_pfn_mapping_level(kvm, gfn, memslot);
+ if (level == 1) {
+ /*
+ * Check page level about secondary mmu
+ * Disable hugepage if it is normal page on
+ * secondary mmu already
+ */
+ ptep = kvm_populate_gpa(kvm, NULL, gpa, 0);
+ if (ptep && !kvm_pte_huge(*ptep))
+ level = 0;
+ }
+
+ if (level == 1) {
+ gfn = gfn & ~(PTRS_PER_PTE - 1);
+ pfn = pfn & ~(PTRS_PER_PTE - 1);
+ }
+ }
+
+ /* Ensure page tables are allocated */
+ ptep = kvm_populate_gpa(kvm, memcache, gpa, level);
+ new_pte = kvm_pfn_pte(pfn, __pgprot(prot_bits));
+ if (level == 1) {
+ new_pte = kvm_pte_mkhuge(new_pte);
+ /*
+ * previous pmd entry is invalid_pte_table
+ * there is invalid tlb with small page
+ * need flush these invalid tlbs for current vcpu
+ */
+ kvm_make_request(KVM_REQ_TLB_FLUSH, vcpu);
+ ++kvm->stat.hugepages;
+ } else if (kvm_pte_huge(*ptep) && write)
+ ptep = kvm_split_huge(vcpu, ptep, gfn);
+ else
+ ++kvm->stat.pages;
+ kvm_set_pte(ptep, new_pte);
+
+ kvm_release_faultin_page(kvm, page, false, writeable);
+ spin_unlock(&kvm->mmu_lock);
+
+ if (prot_bits & _PAGE_DIRTY)
+ mark_page_dirty_in_slot(kvm, memslot, gfn);
+
+out:
+ srcu_read_unlock(&kvm->srcu, srcu_idx);
+ return err;
+}
+
+int kvm_handle_mm_fault(struct kvm_vcpu *vcpu, unsigned long gpa, bool write, int ecode)
+{
+ int ret;
+
+ ret = kvm_map_page(vcpu, gpa, write);
+ if (ret)
+ return ret;
+
+ /* Invalidate this entry in the TLB */
+ if (!cpu_has_ptw || (ecode == EXCCODE_TLBM)) {
+ /*
+ * With HW PTW, invalid TLB is not added when page fault. But
+ * for EXCCODE_TLBM exception, stale TLB may exist because of
+ * the last read access.
+ *
+ * With SW PTW, invalid TLB is added in TLB refill exception.
+ */
+ vcpu->arch.flush_gpa = gpa;
+ kvm_make_request(KVM_REQ_TLB_FLUSH_GPA, vcpu);
+ }
+
+ return 0;
+}
+
+void kvm_arch_sync_dirty_log(struct kvm *kvm, struct kvm_memory_slot *memslot)
+{
+}
+
+void kvm_arch_flush_remote_tlbs_memslot(struct kvm *kvm,
+ const struct kvm_memory_slot *memslot)
+{
+ kvm_flush_remote_tlbs(kvm);
+}
diff --git a/arch/loongarch/kvm/switch.S b/arch/loongarch/kvm/switch.S
new file mode 100644
index 000000000000..f1768b7a6194
--- /dev/null
+++ b/arch/loongarch/kvm/switch.S
@@ -0,0 +1,278 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * Copyright (C) 2020-2023 Loongson Technology Corporation Limited
+ */
+
+#include <linux/linkage.h>
+#include <asm/asm.h>
+#include <asm/asmmacro.h>
+#include <asm/loongarch.h>
+#include <asm/regdef.h>
+#include <asm/unwind_hints.h>
+
+#define HGPR_OFFSET(x) (PT_R0 + 8*x)
+#define GGPR_OFFSET(x) (KVM_ARCH_GGPR + 8*x)
+
+.macro kvm_save_host_gpr base
+ .irp n,1,2,3,22,23,24,25,26,27,28,29,30,31
+ st.d $r\n, \base, HGPR_OFFSET(\n)
+ .endr
+.endm
+
+.macro kvm_restore_host_gpr base
+ .irp n,1,2,3,22,23,24,25,26,27,28,29,30,31
+ ld.d $r\n, \base, HGPR_OFFSET(\n)
+ .endr
+.endm
+
+/*
+ * Save and restore all GPRs except base register,
+ * and default value of base register is a2.
+ */
+.macro kvm_save_guest_gprs base
+ .irp n,1,2,3,4,5,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31
+ st.d $r\n, \base, GGPR_OFFSET(\n)
+ .endr
+.endm
+
+.macro kvm_restore_guest_gprs base
+ .irp n,1,2,3,4,5,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31
+ ld.d $r\n, \base, GGPR_OFFSET(\n)
+ .endr
+.endm
+
+/*
+ * Prepare switch to guest, save host regs and restore guest regs.
+ * a2: kvm_vcpu_arch, don't touch it until 'ertn'
+ * t0, t1: temp register
+ */
+.macro kvm_switch_to_guest
+ /* Set host ECFG.VS=0, all exceptions share one exception entry */
+ csrrd t0, LOONGARCH_CSR_ECFG
+ bstrins.w t0, zero, CSR_ECFG_VS_SHIFT_END, CSR_ECFG_VS_SHIFT
+ csrwr t0, LOONGARCH_CSR_ECFG
+
+ /* Load up the new EENTRY */
+ ld.d t0, a2, KVM_ARCH_GEENTRY
+ csrwr t0, LOONGARCH_CSR_EENTRY
+
+ /* Set Guest ERA */
+ ld.d t0, a2, KVM_ARCH_GPC
+ csrwr t0, LOONGARCH_CSR_ERA
+
+ /* Load PGD for KVM hypervisor */
+ ld.d t0, a2, KVM_ARCH_KVMPGD
+ csrwr t0, LOONGARCH_CSR_PGDL
+
+ /* Mix GID and RID */
+ csrrd t1, LOONGARCH_CSR_GSTAT
+ bstrpick.w t1, t1, CSR_GSTAT_GID_SHIFT_END, CSR_GSTAT_GID_SHIFT
+ csrrd t0, LOONGARCH_CSR_GTLBC
+ bstrins.w t0, t1, CSR_GTLBC_TGID_SHIFT_END, CSR_GTLBC_TGID_SHIFT
+ csrwr t0, LOONGARCH_CSR_GTLBC
+
+ /*
+ * Enable intr in root mode with future ertn so that host interrupt
+ * can be responsed during VM runs
+ * Guest CRMD comes from separate GCSR_CRMD register
+ */
+ ori t0, zero, CSR_PRMD_PIE
+ csrwr t0, LOONGARCH_CSR_PRMD
+
+ /* Set PVM bit to setup ertn to guest context */
+ ori t0, zero, CSR_GSTAT_PVM
+ csrxchg t0, t0, LOONGARCH_CSR_GSTAT
+
+ /* Load Guest GPRs */
+ kvm_restore_guest_gprs a2
+ /* Load KVM_ARCH register */
+ ld.d a2, a2, (KVM_ARCH_GGPR + 8 * REG_A2)
+
+ ertn /* Switch to guest: GSTAT.PGM = 1, ERRCTL.ISERR = 0, TLBRPRMD.ISTLBR = 0 */
+.endm
+
+ /*
+ * Exception entry for general exception from guest mode
+ * - IRQ is disabled
+ * - kernel privilege in root mode
+ * - page mode keep unchanged from previous PRMD in root mode
+ * - Fixme: tlb exception cannot happen since registers relative with TLB
+ * - is still in guest mode, such as pgd table/vmid registers etc,
+ * - will fix with hw page walk enabled in future
+ * load kvm_vcpu from reserved CSR KVM_VCPU_KS, and save a2 to KVM_TEMP_KS
+ */
+ .text
+ .cfi_sections .debug_frame
+SYM_CODE_START(kvm_exc_entry)
+ UNWIND_HINT_UNDEFINED
+ csrwr a2, KVM_TEMP_KS
+ csrrd a2, KVM_VCPU_KS
+ addi.d a2, a2, KVM_VCPU_ARCH
+
+ /* After save GPRs, free to use any GPR */
+ kvm_save_guest_gprs a2
+ /* Save guest A2 */
+ csrrd t0, KVM_TEMP_KS
+ st.d t0, a2, (KVM_ARCH_GGPR + 8 * REG_A2)
+
+ /* A2 is kvm_vcpu_arch, A1 is free to use */
+ csrrd s1, KVM_VCPU_KS
+ ld.d s0, s1, KVM_VCPU_RUN
+
+ csrrd t0, LOONGARCH_CSR_ESTAT
+ st.d t0, a2, KVM_ARCH_HESTAT
+ csrrd t0, LOONGARCH_CSR_ERA
+ st.d t0, a2, KVM_ARCH_GPC
+ csrrd t0, LOONGARCH_CSR_BADV
+ st.d t0, a2, KVM_ARCH_HBADV
+ csrrd t0, LOONGARCH_CSR_BADI
+ st.d t0, a2, KVM_ARCH_HBADI
+
+ /* Restore host ECFG.VS */
+ csrrd t0, LOONGARCH_CSR_ECFG
+ ld.d t1, a2, KVM_ARCH_HECFG
+ or t0, t0, t1
+ csrwr t0, LOONGARCH_CSR_ECFG
+
+ /* Restore host EENTRY */
+ ld.d t0, a2, KVM_ARCH_HEENTRY
+ csrwr t0, LOONGARCH_CSR_EENTRY
+
+ /* Restore host pgd table */
+ ld.d t0, a2, KVM_ARCH_HPGD
+ csrwr t0, LOONGARCH_CSR_PGDL
+
+ /*
+ * Disable PGM bit to enter root mode by default with next ertn
+ */
+ ori t0, zero, CSR_GSTAT_PVM
+ csrxchg zero, t0, LOONGARCH_CSR_GSTAT
+
+ /*
+ * Clear GTLBC.TGID field
+ * 0: for root tlb update in future tlb instr
+ * others: for guest tlb update like gpa to hpa in future tlb instr
+ */
+ csrrd t0, LOONGARCH_CSR_GTLBC
+ bstrins.w t0, zero, CSR_GTLBC_TGID_SHIFT_END, CSR_GTLBC_TGID_SHIFT
+ csrwr t0, LOONGARCH_CSR_GTLBC
+ ld.d tp, a2, KVM_ARCH_HTP
+ ld.d sp, a2, KVM_ARCH_HSP
+ /* restore per cpu register */
+ ld.d u0, a2, KVM_ARCH_HPERCPU
+ addi.d sp, sp, -PT_SIZE
+
+ /* Prepare handle exception */
+ or a0, s0, zero
+ or a1, s1, zero
+ ld.d t8, a2, KVM_ARCH_HANDLE_EXIT
+ jirl ra, t8, 0
+
+ or a2, s1, zero
+ addi.d a2, a2, KVM_VCPU_ARCH
+
+ /* Resume host when ret <= 0 */
+ blez a0, ret_to_host
+
+ /*
+ * Return to guest
+ * Save per cpu register again, maybe switched to another cpu
+ */
+ st.d u0, a2, KVM_ARCH_HPERCPU
+
+ /* Save kvm_vcpu to kscratch */
+ csrwr s1, KVM_VCPU_KS
+ kvm_switch_to_guest
+
+ret_to_host:
+ ld.d a2, a2, KVM_ARCH_HSP
+ addi.d a2, a2, -PT_SIZE
+ kvm_restore_host_gpr a2
+ jr ra
+
+SYM_INNER_LABEL(kvm_exc_entry_end, SYM_L_LOCAL)
+SYM_CODE_END(kvm_exc_entry)
+
+/*
+ * int kvm_enter_guest(struct kvm_run *run, struct kvm_vcpu *vcpu)
+ *
+ * @register_param:
+ * a0: kvm_run* run
+ * a1: kvm_vcpu* vcpu
+ */
+SYM_FUNC_START(kvm_enter_guest)
+ /* Allocate space in stack bottom */
+ addi.d a2, sp, -PT_SIZE
+ /* Save host GPRs */
+ kvm_save_host_gpr a2
+
+ addi.d a2, a1, KVM_VCPU_ARCH
+ st.d sp, a2, KVM_ARCH_HSP
+ st.d tp, a2, KVM_ARCH_HTP
+ /* Save per cpu register */
+ st.d u0, a2, KVM_ARCH_HPERCPU
+
+ /* Save kvm_vcpu to kscratch */
+ csrwr a1, KVM_VCPU_KS
+ kvm_switch_to_guest
+SYM_INNER_LABEL(kvm_enter_guest_end, SYM_L_LOCAL)
+SYM_FUNC_END(kvm_enter_guest)
+
+SYM_FUNC_START(kvm_save_fpu)
+ fpu_save_csr a0 t1
+ fpu_save_double a0 t1
+ fpu_save_cc a0 t1 t2
+ jr ra
+SYM_FUNC_END(kvm_save_fpu)
+
+SYM_FUNC_START(kvm_restore_fpu)
+ fpu_restore_double a0 t1
+ fpu_restore_csr a0 t1 t2
+ fpu_restore_cc a0 t1 t2
+ jr ra
+SYM_FUNC_END(kvm_restore_fpu)
+
+#ifdef CONFIG_CPU_HAS_LSX
+SYM_FUNC_START(kvm_save_lsx)
+ fpu_save_csr a0 t1
+ fpu_save_cc a0 t1 t2
+ lsx_save_data a0 t1
+ jr ra
+SYM_FUNC_END(kvm_save_lsx)
+
+SYM_FUNC_START(kvm_restore_lsx)
+ lsx_restore_data a0 t1
+ fpu_restore_cc a0 t1 t2
+ fpu_restore_csr a0 t1 t2
+ jr ra
+SYM_FUNC_END(kvm_restore_lsx)
+#endif
+
+#ifdef CONFIG_CPU_HAS_LASX
+SYM_FUNC_START(kvm_save_lasx)
+ fpu_save_csr a0 t1
+ fpu_save_cc a0 t1 t2
+ lasx_save_data a0 t1
+ jr ra
+SYM_FUNC_END(kvm_save_lasx)
+
+SYM_FUNC_START(kvm_restore_lasx)
+ lasx_restore_data a0 t1
+ fpu_restore_cc a0 t1 t2
+ fpu_restore_csr a0 t1 t2
+ jr ra
+SYM_FUNC_END(kvm_restore_lasx)
+#endif
+ .section ".rodata"
+SYM_DATA(kvm_exception_size, .quad kvm_exc_entry_end - kvm_exc_entry)
+SYM_DATA(kvm_enter_guest_size, .quad kvm_enter_guest_end - kvm_enter_guest)
+
+#ifdef CONFIG_CPU_HAS_LBT
+STACK_FRAME_NON_STANDARD kvm_restore_fpu
+#ifdef CONFIG_CPU_HAS_LSX
+STACK_FRAME_NON_STANDARD kvm_restore_lsx
+#endif
+#ifdef CONFIG_CPU_HAS_LASX
+STACK_FRAME_NON_STANDARD kvm_restore_lasx
+#endif
+#endif
diff --git a/arch/loongarch/kvm/timer.c b/arch/loongarch/kvm/timer.c
new file mode 100644
index 000000000000..32dc213374be
--- /dev/null
+++ b/arch/loongarch/kvm/timer.c
@@ -0,0 +1,191 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Copyright (C) 2020-2023 Loongson Technology Corporation Limited
+ */
+
+#include <linux/kvm_host.h>
+#include <asm/kvm_csr.h>
+#include <asm/kvm_vcpu.h>
+
+/*
+ * ktime_to_tick() - Scale ktime_t to timer tick value.
+ */
+static inline u64 ktime_to_tick(struct kvm_vcpu *vcpu, ktime_t now)
+{
+ u64 delta;
+
+ delta = ktime_to_ns(now);
+ return div_u64(delta * vcpu->arch.timer_mhz, MNSEC_PER_SEC);
+}
+
+static inline u64 tick_to_ns(struct kvm_vcpu *vcpu, u64 tick)
+{
+ return div_u64(tick * MNSEC_PER_SEC, vcpu->arch.timer_mhz);
+}
+
+/* Low level hrtimer wake routine */
+enum hrtimer_restart kvm_swtimer_wakeup(struct hrtimer *timer)
+{
+ struct kvm_vcpu *vcpu;
+
+ vcpu = container_of(timer, struct kvm_vcpu, arch.swtimer);
+ kvm_queue_irq(vcpu, INT_TI);
+ rcuwait_wake_up(&vcpu->wait);
+
+ return HRTIMER_NORESTART;
+}
+
+/*
+ * Initialise the timer to the specified frequency, zero it
+ */
+void kvm_init_timer(struct kvm_vcpu *vcpu, unsigned long timer_hz)
+{
+ vcpu->arch.timer_mhz = timer_hz >> 20;
+
+ /* Starting at 0 */
+ kvm_write_sw_gcsr(vcpu->arch.csr, LOONGARCH_CSR_TVAL, 0);
+}
+
+/*
+ * Restore soft timer state from saved context.
+ */
+void kvm_restore_timer(struct kvm_vcpu *vcpu)
+{
+ unsigned long cfg, estat;
+ unsigned long ticks, delta, period;
+ ktime_t expire, now;
+ struct loongarch_csrs *csr = vcpu->arch.csr;
+
+ /*
+ * Set guest stable timer cfg csr
+ * Disable timer before restore estat CSR register, avoid to
+ * get invalid timer interrupt for old timer cfg
+ */
+ cfg = kvm_read_sw_gcsr(csr, LOONGARCH_CSR_TCFG);
+
+ write_gcsr_timercfg(0);
+ kvm_restore_hw_gcsr(csr, LOONGARCH_CSR_ESTAT);
+ kvm_restore_hw_gcsr(csr, LOONGARCH_CSR_TCFG);
+ if (!(cfg & CSR_TCFG_EN)) {
+ /* Guest timer is disabled, just restore timer registers */
+ kvm_restore_hw_gcsr(csr, LOONGARCH_CSR_TVAL);
+ return;
+ }
+
+ /*
+ * Freeze the soft-timer and sync the guest stable timer with it.
+ */
+ if (kvm_vcpu_is_blocking(vcpu))
+ hrtimer_cancel(&vcpu->arch.swtimer);
+
+ /*
+ * From LoongArch Reference Manual Volume 1 Chapter 7.6.2
+ * If oneshot timer is fired, CSR TVAL will be -1, there are two
+ * conditions:
+ * 1) timer is fired during exiting to host
+ * 2) timer is fired and vm is doing timer irq, and then exiting to
+ * host. Host should not inject timer irq to avoid spurious
+ * timer interrupt again
+ */
+ ticks = kvm_read_sw_gcsr(csr, LOONGARCH_CSR_TVAL);
+ estat = kvm_read_sw_gcsr(csr, LOONGARCH_CSR_ESTAT);
+ if (!(cfg & CSR_TCFG_PERIOD) && (ticks > cfg)) {
+ /*
+ * Writing 0 to LOONGARCH_CSR_TVAL will inject timer irq
+ * and set CSR TVAL with -1
+ */
+ write_gcsr_timertick(0);
+
+ /*
+ * Writing CSR_TINTCLR_TI to LOONGARCH_CSR_TINTCLR will clear
+ * timer interrupt, and CSR TVAL keeps unchanged with -1, it
+ * avoids spurious timer interrupt
+ */
+ if (!(estat & CPU_TIMER))
+ gcsr_write(CSR_TINTCLR_TI, LOONGARCH_CSR_TINTCLR);
+ return;
+ }
+
+ /*
+ * Set remainder tick value if not expired
+ */
+ delta = 0;
+ now = ktime_get();
+ expire = vcpu->arch.expire;
+ if (ktime_before(now, expire))
+ delta = ktime_to_tick(vcpu, ktime_sub(expire, now));
+ else if (cfg & CSR_TCFG_PERIOD) {
+ period = cfg & CSR_TCFG_VAL;
+ delta = ktime_to_tick(vcpu, ktime_sub(now, expire));
+ delta = period - (delta % period);
+
+ /*
+ * Inject timer here though sw timer should inject timer
+ * interrupt async already, since sw timer may be cancelled
+ * during injecting intr async
+ */
+ kvm_queue_irq(vcpu, INT_TI);
+ }
+
+ write_gcsr_timertick(delta);
+}
+
+/*
+ * Save guest timer state and switch to software emulation of guest
+ * timer. The hard timer must already be in use, so preemption should be
+ * disabled.
+ */
+static void _kvm_save_timer(struct kvm_vcpu *vcpu)
+{
+ unsigned long ticks, delta, cfg;
+ ktime_t expire;
+ struct loongarch_csrs *csr = vcpu->arch.csr;
+
+ cfg = kvm_read_sw_gcsr(csr, LOONGARCH_CSR_TCFG);
+ ticks = kvm_read_sw_gcsr(csr, LOONGARCH_CSR_TVAL);
+
+ /*
+ * From LoongArch Reference Manual Volume 1 Chapter 7.6.2
+ * If period timer is fired, CSR TVAL will be reloaded from CSR TCFG
+ * If oneshot timer is fired, CSR TVAL will be -1
+ * Here judge one-shot timer fired by checking whether TVAL is larger
+ * than TCFG
+ */
+ if (ticks < cfg)
+ delta = tick_to_ns(vcpu, ticks);
+ else
+ delta = 0;
+
+ expire = ktime_add_ns(ktime_get(), delta);
+ vcpu->arch.expire = expire;
+ if (kvm_vcpu_is_blocking(vcpu)) {
+
+ /*
+ * HRTIMER_MODE_PINNED_HARD is suggested since vcpu may run in
+ * the same physical cpu in next time, and the timer should run
+ * in hardirq context even in the PREEMPT_RT case.
+ */
+ hrtimer_start(&vcpu->arch.swtimer, expire, HRTIMER_MODE_ABS_PINNED_HARD);
+ }
+}
+
+/*
+ * Save guest timer state and switch to soft guest timer if hard timer was in
+ * use.
+ */
+void kvm_save_timer(struct kvm_vcpu *vcpu)
+{
+ struct loongarch_csrs *csr = vcpu->arch.csr;
+
+ preempt_disable();
+
+ /* Save hard timer state */
+ kvm_save_hw_gcsr(csr, LOONGARCH_CSR_TCFG);
+ kvm_save_hw_gcsr(csr, LOONGARCH_CSR_TVAL);
+ if (kvm_read_sw_gcsr(csr, LOONGARCH_CSR_TCFG) & CSR_TCFG_EN)
+ _kvm_save_timer(vcpu);
+
+ /* Save timer-related state to vCPU context */
+ kvm_save_hw_gcsr(csr, LOONGARCH_CSR_ESTAT);
+ preempt_enable();
+}
diff --git a/arch/loongarch/kvm/tlb.c b/arch/loongarch/kvm/tlb.c
new file mode 100644
index 000000000000..ebdbe9264e9c
--- /dev/null
+++ b/arch/loongarch/kvm/tlb.c
@@ -0,0 +1,29 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Copyright (C) 2020-2023 Loongson Technology Corporation Limited
+ */
+
+#include <linux/kvm_host.h>
+#include <asm/tlb.h>
+#include <asm/kvm_csr.h>
+
+/*
+ * kvm_flush_tlb_all() - Flush all root TLB entries for guests.
+ *
+ * Invalidate all entries including GVA-->GPA and GPA-->HPA mappings.
+ */
+void kvm_flush_tlb_all(void)
+{
+ unsigned long flags;
+
+ local_irq_save(flags);
+ invtlb_all(INVTLB_ALLGID, 0, 0);
+ local_irq_restore(flags);
+}
+
+void kvm_flush_tlb_gpa(struct kvm_vcpu *vcpu, unsigned long gpa)
+{
+ lockdep_assert_irqs_disabled();
+ gpa &= (PAGE_MASK << 1);
+ invtlb(INVTLB_GID_ADDR, read_csr_gstat() & CSR_GSTAT_GID, gpa);
+}
diff --git a/arch/loongarch/kvm/trace.h b/arch/loongarch/kvm/trace.h
new file mode 100644
index 000000000000..1783397b1bc8
--- /dev/null
+++ b/arch/loongarch/kvm/trace.h
@@ -0,0 +1,174 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * Copyright (C) 2020-2023 Loongson Technology Corporation Limited
+ */
+
+#if !defined(_TRACE_KVM_H) || defined(TRACE_HEADER_MULTI_READ)
+#define _TRACE_KVM_H
+
+#include <linux/tracepoint.h>
+#include <asm/kvm_csr.h>
+
+#undef TRACE_SYSTEM
+#define TRACE_SYSTEM kvm
+
+/*
+ * Tracepoints for VM enters
+ */
+DECLARE_EVENT_CLASS(kvm_transition,
+ TP_PROTO(struct kvm_vcpu *vcpu),
+ TP_ARGS(vcpu),
+ TP_STRUCT__entry(
+ __field(unsigned int, vcpu_id)
+ __field(unsigned long, pc)
+ ),
+
+ TP_fast_assign(
+ __entry->vcpu_id = vcpu->vcpu_id;
+ __entry->pc = vcpu->arch.pc;
+ ),
+
+ TP_printk("vcpu %u PC: 0x%08lx", __entry->vcpu_id, __entry->pc)
+);
+
+DEFINE_EVENT(kvm_transition, kvm_enter,
+ TP_PROTO(struct kvm_vcpu *vcpu),
+ TP_ARGS(vcpu));
+
+DEFINE_EVENT(kvm_transition, kvm_reenter,
+ TP_PROTO(struct kvm_vcpu *vcpu),
+ TP_ARGS(vcpu));
+
+DEFINE_EVENT(kvm_transition, kvm_out,
+ TP_PROTO(struct kvm_vcpu *vcpu),
+ TP_ARGS(vcpu));
+
+/* Further exit reasons */
+#define KVM_TRACE_EXIT_IDLE 64
+#define KVM_TRACE_EXIT_CACHE 65
+
+/* Tracepoints for VM exits */
+#define kvm_trace_symbol_exit_types \
+ { KVM_TRACE_EXIT_IDLE, "IDLE" }, \
+ { KVM_TRACE_EXIT_CACHE, "CACHE" }
+
+DECLARE_EVENT_CLASS(kvm_exit,
+ TP_PROTO(struct kvm_vcpu *vcpu, unsigned int reason),
+ TP_ARGS(vcpu, reason),
+ TP_STRUCT__entry(
+ __field(unsigned int, vcpu_id)
+ __field(unsigned long, pc)
+ __field(unsigned int, reason)
+ ),
+
+ TP_fast_assign(
+ __entry->vcpu_id = vcpu->vcpu_id;
+ __entry->pc = vcpu->arch.pc;
+ __entry->reason = reason;
+ ),
+
+ TP_printk("vcpu %u [%s] PC: 0x%08lx",
+ __entry->vcpu_id,
+ __print_symbolic(__entry->reason,
+ kvm_trace_symbol_exit_types),
+ __entry->pc)
+);
+
+DEFINE_EVENT(kvm_exit, kvm_exit_idle,
+ TP_PROTO(struct kvm_vcpu *vcpu, unsigned int reason),
+ TP_ARGS(vcpu, reason));
+
+DEFINE_EVENT(kvm_exit, kvm_exit_cache,
+ TP_PROTO(struct kvm_vcpu *vcpu, unsigned int reason),
+ TP_ARGS(vcpu, reason));
+
+DEFINE_EVENT(kvm_exit, kvm_exit,
+ TP_PROTO(struct kvm_vcpu *vcpu, unsigned int reason),
+ TP_ARGS(vcpu, reason));
+
+TRACE_EVENT(kvm_exit_gspr,
+ TP_PROTO(struct kvm_vcpu *vcpu, unsigned int inst_word),
+ TP_ARGS(vcpu, inst_word),
+ TP_STRUCT__entry(
+ __field(unsigned int, vcpu_id)
+ __field(unsigned int, inst_word)
+ ),
+
+ TP_fast_assign(
+ __entry->vcpu_id = vcpu->vcpu_id;
+ __entry->inst_word = inst_word;
+ ),
+
+ TP_printk("vcpu %u Inst word: 0x%08x", __entry->vcpu_id,
+ __entry->inst_word)
+);
+
+#define KVM_TRACE_AUX_SAVE 0
+#define KVM_TRACE_AUX_RESTORE 1
+#define KVM_TRACE_AUX_ENABLE 2
+#define KVM_TRACE_AUX_DISABLE 3
+#define KVM_TRACE_AUX_DISCARD 4
+
+#define KVM_TRACE_AUX_FPU 1
+#define KVM_TRACE_AUX_LSX 2
+#define KVM_TRACE_AUX_LASX 3
+
+#define kvm_trace_symbol_aux_op \
+ { KVM_TRACE_AUX_SAVE, "save" }, \
+ { KVM_TRACE_AUX_RESTORE, "restore" }, \
+ { KVM_TRACE_AUX_ENABLE, "enable" }, \
+ { KVM_TRACE_AUX_DISABLE, "disable" }, \
+ { KVM_TRACE_AUX_DISCARD, "discard" }
+
+#define kvm_trace_symbol_aux_state \
+ { KVM_TRACE_AUX_FPU, "FPU" }, \
+ { KVM_TRACE_AUX_LSX, "LSX" }, \
+ { KVM_TRACE_AUX_LASX, "LASX" }
+
+TRACE_EVENT(kvm_aux,
+ TP_PROTO(struct kvm_vcpu *vcpu, unsigned int op,
+ unsigned int state),
+ TP_ARGS(vcpu, op, state),
+ TP_STRUCT__entry(
+ __field(unsigned long, pc)
+ __field(u8, op)
+ __field(u8, state)
+ ),
+
+ TP_fast_assign(
+ __entry->pc = vcpu->arch.pc;
+ __entry->op = op;
+ __entry->state = state;
+ ),
+
+ TP_printk("%s %s PC: 0x%08lx",
+ __print_symbolic(__entry->op,
+ kvm_trace_symbol_aux_op),
+ __print_symbolic(__entry->state,
+ kvm_trace_symbol_aux_state),
+ __entry->pc)
+);
+
+TRACE_EVENT(kvm_vpid_change,
+ TP_PROTO(struct kvm_vcpu *vcpu, unsigned long vpid),
+ TP_ARGS(vcpu, vpid),
+ TP_STRUCT__entry(
+ __field(unsigned long, vpid)
+ ),
+
+ TP_fast_assign(
+ __entry->vpid = vpid;
+ ),
+
+ TP_printk("VPID: 0x%08lx", __entry->vpid)
+);
+
+#endif /* _TRACE_KVM_H */
+
+#undef TRACE_INCLUDE_PATH
+#define TRACE_INCLUDE_PATH ../../arch/loongarch/kvm
+#undef TRACE_INCLUDE_FILE
+#define TRACE_INCLUDE_FILE trace
+
+/* This part must be outside protection */
+#include <trace/define_trace.h>
diff --git a/arch/loongarch/kvm/vcpu.c b/arch/loongarch/kvm/vcpu.c
new file mode 100644
index 000000000000..5af32ec62cb1
--- /dev/null
+++ b/arch/loongarch/kvm/vcpu.c
@@ -0,0 +1,1815 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Copyright (C) 2020-2023 Loongson Technology Corporation Limited
+ */
+
+#include <linux/kvm_host.h>
+#include <linux/entry-kvm.h>
+#include <asm/fpu.h>
+#include <asm/lbt.h>
+#include <asm/loongarch.h>
+#include <asm/setup.h>
+#include <asm/time.h>
+
+#define CREATE_TRACE_POINTS
+#include "trace.h"
+
+const struct _kvm_stats_desc kvm_vcpu_stats_desc[] = {
+ KVM_GENERIC_VCPU_STATS(),
+ STATS_DESC_COUNTER(VCPU, int_exits),
+ STATS_DESC_COUNTER(VCPU, idle_exits),
+ STATS_DESC_COUNTER(VCPU, cpucfg_exits),
+ STATS_DESC_COUNTER(VCPU, signal_exits),
+ STATS_DESC_COUNTER(VCPU, hypercall_exits)
+};
+
+const struct kvm_stats_header kvm_vcpu_stats_header = {
+ .name_size = KVM_STATS_NAME_SIZE,
+ .num_desc = ARRAY_SIZE(kvm_vcpu_stats_desc),
+ .id_offset = sizeof(struct kvm_stats_header),
+ .desc_offset = sizeof(struct kvm_stats_header) + KVM_STATS_NAME_SIZE,
+ .data_offset = sizeof(struct kvm_stats_header) + KVM_STATS_NAME_SIZE +
+ sizeof(kvm_vcpu_stats_desc),
+};
+
+static inline void kvm_save_host_pmu(struct kvm_vcpu *vcpu)
+{
+ struct kvm_context *context;
+
+ context = this_cpu_ptr(vcpu->kvm->arch.vmcs);
+ context->perf_cntr[0] = read_csr_perfcntr0();
+ context->perf_cntr[1] = read_csr_perfcntr1();
+ context->perf_cntr[2] = read_csr_perfcntr2();
+ context->perf_cntr[3] = read_csr_perfcntr3();
+ context->perf_ctrl[0] = write_csr_perfctrl0(0);
+ context->perf_ctrl[1] = write_csr_perfctrl1(0);
+ context->perf_ctrl[2] = write_csr_perfctrl2(0);
+ context->perf_ctrl[3] = write_csr_perfctrl3(0);
+}
+
+static inline void kvm_restore_host_pmu(struct kvm_vcpu *vcpu)
+{
+ struct kvm_context *context;
+
+ context = this_cpu_ptr(vcpu->kvm->arch.vmcs);
+ write_csr_perfcntr0(context->perf_cntr[0]);
+ write_csr_perfcntr1(context->perf_cntr[1]);
+ write_csr_perfcntr2(context->perf_cntr[2]);
+ write_csr_perfcntr3(context->perf_cntr[3]);
+ write_csr_perfctrl0(context->perf_ctrl[0]);
+ write_csr_perfctrl1(context->perf_ctrl[1]);
+ write_csr_perfctrl2(context->perf_ctrl[2]);
+ write_csr_perfctrl3(context->perf_ctrl[3]);
+}
+
+
+static inline void kvm_save_guest_pmu(struct kvm_vcpu *vcpu)
+{
+ struct loongarch_csrs *csr = vcpu->arch.csr;
+
+ kvm_save_hw_gcsr(csr, LOONGARCH_CSR_PERFCNTR0);
+ kvm_save_hw_gcsr(csr, LOONGARCH_CSR_PERFCNTR1);
+ kvm_save_hw_gcsr(csr, LOONGARCH_CSR_PERFCNTR2);
+ kvm_save_hw_gcsr(csr, LOONGARCH_CSR_PERFCNTR3);
+ kvm_read_clear_hw_gcsr(csr, LOONGARCH_CSR_PERFCTRL0);
+ kvm_read_clear_hw_gcsr(csr, LOONGARCH_CSR_PERFCTRL1);
+ kvm_read_clear_hw_gcsr(csr, LOONGARCH_CSR_PERFCTRL2);
+ kvm_read_clear_hw_gcsr(csr, LOONGARCH_CSR_PERFCTRL3);
+}
+
+static inline void kvm_restore_guest_pmu(struct kvm_vcpu *vcpu)
+{
+ struct loongarch_csrs *csr = vcpu->arch.csr;
+
+ kvm_restore_hw_gcsr(csr, LOONGARCH_CSR_PERFCNTR0);
+ kvm_restore_hw_gcsr(csr, LOONGARCH_CSR_PERFCNTR1);
+ kvm_restore_hw_gcsr(csr, LOONGARCH_CSR_PERFCNTR2);
+ kvm_restore_hw_gcsr(csr, LOONGARCH_CSR_PERFCNTR3);
+ kvm_restore_hw_gcsr(csr, LOONGARCH_CSR_PERFCTRL0);
+ kvm_restore_hw_gcsr(csr, LOONGARCH_CSR_PERFCTRL1);
+ kvm_restore_hw_gcsr(csr, LOONGARCH_CSR_PERFCTRL2);
+ kvm_restore_hw_gcsr(csr, LOONGARCH_CSR_PERFCTRL3);
+}
+
+static int kvm_own_pmu(struct kvm_vcpu *vcpu)
+{
+ unsigned long val;
+
+ if (!kvm_guest_has_pmu(&vcpu->arch))
+ return -EINVAL;
+
+ kvm_save_host_pmu(vcpu);
+
+ /* Set PM0-PM(num) to guest */
+ val = read_csr_gcfg() & ~CSR_GCFG_GPERF;
+ val |= (kvm_get_pmu_num(&vcpu->arch) + 1) << CSR_GCFG_GPERF_SHIFT;
+ write_csr_gcfg(val);
+
+ kvm_restore_guest_pmu(vcpu);
+
+ return 0;
+}
+
+static void kvm_lose_pmu(struct kvm_vcpu *vcpu)
+{
+ unsigned long val;
+ struct loongarch_csrs *csr = vcpu->arch.csr;
+
+ if (!(vcpu->arch.aux_inuse & KVM_LARCH_PMU))
+ return;
+
+ kvm_save_guest_pmu(vcpu);
+
+ /* Disable pmu access from guest */
+ write_csr_gcfg(read_csr_gcfg() & ~CSR_GCFG_GPERF);
+
+ /*
+ * Clear KVM_LARCH_PMU if the guest is not using PMU CSRs when
+ * exiting the guest, so that the next time trap into the guest.
+ * We don't need to deal with PMU CSRs contexts.
+ */
+ val = kvm_read_sw_gcsr(csr, LOONGARCH_CSR_PERFCTRL0);
+ val |= kvm_read_sw_gcsr(csr, LOONGARCH_CSR_PERFCTRL1);
+ val |= kvm_read_sw_gcsr(csr, LOONGARCH_CSR_PERFCTRL2);
+ val |= kvm_read_sw_gcsr(csr, LOONGARCH_CSR_PERFCTRL3);
+ if (!(val & KVM_PMU_EVENT_ENABLED))
+ vcpu->arch.aux_inuse &= ~KVM_LARCH_PMU;
+
+ kvm_restore_host_pmu(vcpu);
+}
+
+static void kvm_restore_pmu(struct kvm_vcpu *vcpu)
+{
+ if ((vcpu->arch.aux_inuse & KVM_LARCH_PMU))
+ kvm_make_request(KVM_REQ_PMU, vcpu);
+}
+
+static void kvm_check_pmu(struct kvm_vcpu *vcpu)
+{
+ if (kvm_check_request(KVM_REQ_PMU, vcpu)) {
+ kvm_own_pmu(vcpu);
+ vcpu->arch.aux_inuse |= KVM_LARCH_PMU;
+ }
+}
+
+static void kvm_update_stolen_time(struct kvm_vcpu *vcpu)
+{
+ u32 version;
+ u64 steal;
+ gpa_t gpa;
+ struct kvm_memslots *slots;
+ struct kvm_steal_time __user *st;
+ struct gfn_to_hva_cache *ghc;
+
+ ghc = &vcpu->arch.st.cache;
+ gpa = vcpu->arch.st.guest_addr;
+ if (!(gpa & KVM_STEAL_PHYS_VALID))
+ return;
+
+ gpa &= KVM_STEAL_PHYS_MASK;
+ slots = kvm_memslots(vcpu->kvm);
+ if (slots->generation != ghc->generation || gpa != ghc->gpa) {
+ if (kvm_gfn_to_hva_cache_init(vcpu->kvm, ghc, gpa, sizeof(*st))) {
+ ghc->gpa = INVALID_GPA;
+ return;
+ }
+ }
+
+ st = (struct kvm_steal_time __user *)ghc->hva;
+ unsafe_get_user(version, &st->version, out);
+ if (version & 1)
+ version += 1; /* first time write, random junk */
+
+ version += 1;
+ unsafe_put_user(version, &st->version, out);
+ smp_wmb();
+
+ unsafe_get_user(steal, &st->steal, out);
+ steal += current->sched_info.run_delay - vcpu->arch.st.last_steal;
+ vcpu->arch.st.last_steal = current->sched_info.run_delay;
+ unsafe_put_user(steal, &st->steal, out);
+
+ smp_wmb();
+ version += 1;
+ unsafe_put_user(version, &st->version, out);
+out:
+ mark_page_dirty_in_slot(vcpu->kvm, ghc->memslot, gpa_to_gfn(ghc->gpa));
+}
+
+/*
+ * kvm_check_requests - check and handle pending vCPU requests
+ *
+ * Return: RESUME_GUEST if we should enter the guest
+ * RESUME_HOST if we should exit to userspace
+ */
+static int kvm_check_requests(struct kvm_vcpu *vcpu)
+{
+ if (!kvm_request_pending(vcpu))
+ return RESUME_GUEST;
+
+ if (kvm_check_request(KVM_REQ_TLB_FLUSH, vcpu))
+ vcpu->arch.vpid = 0; /* Drop vpid for this vCPU */
+
+ if (kvm_dirty_ring_check_request(vcpu))
+ return RESUME_HOST;
+
+ if (kvm_check_request(KVM_REQ_STEAL_UPDATE, vcpu))
+ kvm_update_stolen_time(vcpu);
+
+ return RESUME_GUEST;
+}
+
+static void kvm_late_check_requests(struct kvm_vcpu *vcpu)
+{
+ lockdep_assert_irqs_disabled();
+ if (kvm_check_request(KVM_REQ_TLB_FLUSH_GPA, vcpu))
+ if (vcpu->arch.flush_gpa != INVALID_GPA) {
+ kvm_flush_tlb_gpa(vcpu, vcpu->arch.flush_gpa);
+ vcpu->arch.flush_gpa = INVALID_GPA;
+ }
+}
+
+/*
+ * Check and handle pending signal and vCPU requests etc
+ * Run with irq enabled and preempt enabled
+ *
+ * Return: RESUME_GUEST if we should enter the guest
+ * RESUME_HOST if we should exit to userspace
+ * < 0 if we should exit to userspace, where the return value
+ * indicates an error
+ */
+static int kvm_enter_guest_check(struct kvm_vcpu *vcpu)
+{
+ int idx, ret;
+
+ /*
+ * Check conditions before entering the guest
+ */
+ ret = xfer_to_guest_mode_handle_work(vcpu);
+ if (ret < 0)
+ return ret;
+
+ idx = srcu_read_lock(&vcpu->kvm->srcu);
+ ret = kvm_check_requests(vcpu);
+ srcu_read_unlock(&vcpu->kvm->srcu, idx);
+
+ return ret;
+}
+
+/*
+ * Called with irq enabled
+ *
+ * Return: RESUME_GUEST if we should enter the guest, and irq disabled
+ * Others if we should exit to userspace
+ */
+static int kvm_pre_enter_guest(struct kvm_vcpu *vcpu)
+{
+ int ret;
+
+ do {
+ ret = kvm_enter_guest_check(vcpu);
+ if (ret != RESUME_GUEST)
+ break;
+
+ /*
+ * Handle vcpu timer, interrupts, check requests and
+ * check vmid before vcpu enter guest
+ */
+ local_irq_disable();
+ kvm_deliver_intr(vcpu);
+ kvm_deliver_exception(vcpu);
+ /* Make sure the vcpu mode has been written */
+ smp_store_mb(vcpu->mode, IN_GUEST_MODE);
+ kvm_check_vpid(vcpu);
+ kvm_check_pmu(vcpu);
+
+ /*
+ * Called after function kvm_check_vpid()
+ * Since it updates CSR.GSTAT used by kvm_flush_tlb_gpa(),
+ * and it may also clear KVM_REQ_TLB_FLUSH_GPA pending bit
+ */
+ kvm_late_check_requests(vcpu);
+ vcpu->arch.host_eentry = csr_read64(LOONGARCH_CSR_EENTRY);
+ /* Clear KVM_LARCH_SWCSR_LATEST as CSR will change when enter guest */
+ vcpu->arch.aux_inuse &= ~KVM_LARCH_SWCSR_LATEST;
+
+ if (kvm_request_pending(vcpu) || xfer_to_guest_mode_work_pending()) {
+ kvm_lose_pmu(vcpu);
+ /* make sure the vcpu mode has been written */
+ smp_store_mb(vcpu->mode, OUTSIDE_GUEST_MODE);
+ local_irq_enable();
+ ret = -EAGAIN;
+ }
+ } while (ret != RESUME_GUEST);
+
+ return ret;
+}
+
+/*
+ * Return 1 for resume guest and "<= 0" for resume host.
+ */
+static int kvm_handle_exit(struct kvm_run *run, struct kvm_vcpu *vcpu)
+{
+ int ret = RESUME_GUEST;
+ unsigned long estat = vcpu->arch.host_estat;
+ u32 intr = estat & CSR_ESTAT_IS;
+ u32 ecode = (estat & CSR_ESTAT_EXC) >> CSR_ESTAT_EXC_SHIFT;
+
+ vcpu->mode = OUTSIDE_GUEST_MODE;
+
+ /* Set a default exit reason */
+ run->exit_reason = KVM_EXIT_UNKNOWN;
+
+ kvm_lose_pmu(vcpu);
+
+ guest_timing_exit_irqoff();
+ guest_state_exit_irqoff();
+ local_irq_enable();
+
+ trace_kvm_exit(vcpu, ecode);
+ if (ecode) {
+ ret = kvm_handle_fault(vcpu, ecode);
+ } else {
+ WARN(!intr, "vm exiting with suspicious irq\n");
+ ++vcpu->stat.int_exits;
+ }
+
+ if (ret == RESUME_GUEST)
+ ret = kvm_pre_enter_guest(vcpu);
+
+ if (ret != RESUME_GUEST) {
+ local_irq_disable();
+ return ret;
+ }
+
+ guest_timing_enter_irqoff();
+ guest_state_enter_irqoff();
+ trace_kvm_reenter(vcpu);
+
+ return RESUME_GUEST;
+}
+
+int kvm_arch_vcpu_runnable(struct kvm_vcpu *vcpu)
+{
+ return !!(vcpu->arch.irq_pending) &&
+ vcpu->arch.mp_state.mp_state == KVM_MP_STATE_RUNNABLE;
+}
+
+int kvm_arch_vcpu_should_kick(struct kvm_vcpu *vcpu)
+{
+ return kvm_vcpu_exiting_guest_mode(vcpu) == IN_GUEST_MODE;
+}
+
+bool kvm_arch_vcpu_in_kernel(struct kvm_vcpu *vcpu)
+{
+ unsigned long val;
+
+ preempt_disable();
+ val = gcsr_read(LOONGARCH_CSR_CRMD);
+ preempt_enable();
+
+ return (val & CSR_PRMD_PPLV) == PLV_KERN;
+}
+
+#ifdef CONFIG_GUEST_PERF_EVENTS
+unsigned long kvm_arch_vcpu_get_ip(struct kvm_vcpu *vcpu)
+{
+ return vcpu->arch.pc;
+}
+
+/*
+ * Returns true if a Performance Monitoring Interrupt (PMI), a.k.a. perf event,
+ * arrived in guest context. For LoongArch64, if PMU is not passthrough to VM,
+ * any event that arrives while a vCPU is loaded is considered to be "in guest".
+ */
+bool kvm_arch_pmi_in_guest(struct kvm_vcpu *vcpu)
+{
+ return (vcpu && !(vcpu->arch.aux_inuse & KVM_LARCH_PMU));
+}
+#endif
+
+bool kvm_arch_vcpu_preempted_in_kernel(struct kvm_vcpu *vcpu)
+{
+ return false;
+}
+
+vm_fault_t kvm_arch_vcpu_fault(struct kvm_vcpu *vcpu, struct vm_fault *vmf)
+{
+ return VM_FAULT_SIGBUS;
+}
+
+int kvm_arch_vcpu_ioctl_translate(struct kvm_vcpu *vcpu,
+ struct kvm_translation *tr)
+{
+ return -EINVAL;
+}
+
+int kvm_cpu_has_pending_timer(struct kvm_vcpu *vcpu)
+{
+ int ret;
+
+ /* Protect from TOD sync and vcpu_load/put() */
+ preempt_disable();
+ ret = kvm_pending_timer(vcpu) ||
+ kvm_read_hw_gcsr(LOONGARCH_CSR_ESTAT) & (1 << INT_TI);
+ preempt_enable();
+
+ return ret;
+}
+
+int kvm_arch_vcpu_dump_regs(struct kvm_vcpu *vcpu)
+{
+ int i;
+
+ kvm_debug("vCPU Register Dump:\n");
+ kvm_debug("\tPC = 0x%08lx\n", vcpu->arch.pc);
+ kvm_debug("\tExceptions: %08lx\n", vcpu->arch.irq_pending);
+
+ for (i = 0; i < 32; i += 4) {
+ kvm_debug("\tGPR%02d: %08lx %08lx %08lx %08lx\n", i,
+ vcpu->arch.gprs[i], vcpu->arch.gprs[i + 1],
+ vcpu->arch.gprs[i + 2], vcpu->arch.gprs[i + 3]);
+ }
+
+ kvm_debug("\tCRMD: 0x%08lx, ESTAT: 0x%08lx\n",
+ kvm_read_hw_gcsr(LOONGARCH_CSR_CRMD),
+ kvm_read_hw_gcsr(LOONGARCH_CSR_ESTAT));
+
+ kvm_debug("\tERA: 0x%08lx\n", kvm_read_hw_gcsr(LOONGARCH_CSR_ERA));
+
+ return 0;
+}
+
+int kvm_arch_vcpu_ioctl_get_mpstate(struct kvm_vcpu *vcpu,
+ struct kvm_mp_state *mp_state)
+{
+ *mp_state = vcpu->arch.mp_state;
+
+ return 0;
+}
+
+int kvm_arch_vcpu_ioctl_set_mpstate(struct kvm_vcpu *vcpu,
+ struct kvm_mp_state *mp_state)
+{
+ int ret = 0;
+
+ switch (mp_state->mp_state) {
+ case KVM_MP_STATE_RUNNABLE:
+ vcpu->arch.mp_state = *mp_state;
+ break;
+ default:
+ ret = -EINVAL;
+ }
+
+ return ret;
+}
+
+int kvm_arch_vcpu_ioctl_set_guest_debug(struct kvm_vcpu *vcpu,
+ struct kvm_guest_debug *dbg)
+{
+ if (dbg->control & ~KVM_GUESTDBG_VALID_MASK)
+ return -EINVAL;
+
+ if (dbg->control & KVM_GUESTDBG_ENABLE)
+ vcpu->guest_debug = dbg->control;
+ else
+ vcpu->guest_debug = 0;
+
+ return 0;
+}
+
+static inline int kvm_set_cpuid(struct kvm_vcpu *vcpu, u64 val)
+{
+ int cpuid;
+ struct kvm_phyid_map *map;
+ struct loongarch_csrs *csr = vcpu->arch.csr;
+
+ if (val >= KVM_MAX_PHYID)
+ return -EINVAL;
+
+ map = vcpu->kvm->arch.phyid_map;
+ cpuid = kvm_read_sw_gcsr(csr, LOONGARCH_CSR_CPUID);
+
+ spin_lock(&vcpu->kvm->arch.phyid_map_lock);
+ if ((cpuid < KVM_MAX_PHYID) && map->phys_map[cpuid].enabled) {
+ /* Discard duplicated CPUID set operation */
+ if (cpuid == val) {
+ spin_unlock(&vcpu->kvm->arch.phyid_map_lock);
+ return 0;
+ }
+
+ /*
+ * CPUID is already set before
+ * Forbid changing to a different CPUID at runtime
+ */
+ spin_unlock(&vcpu->kvm->arch.phyid_map_lock);
+ return -EINVAL;
+ }
+
+ if (map->phys_map[val].enabled) {
+ /* Discard duplicated CPUID set operation */
+ if (vcpu == map->phys_map[val].vcpu) {
+ spin_unlock(&vcpu->kvm->arch.phyid_map_lock);
+ return 0;
+ }
+
+ /*
+ * New CPUID is already set with other vcpu
+ * Forbid sharing the same CPUID between different vcpus
+ */
+ spin_unlock(&vcpu->kvm->arch.phyid_map_lock);
+ return -EINVAL;
+ }
+
+ kvm_write_sw_gcsr(csr, LOONGARCH_CSR_CPUID, val);
+ map->phys_map[val].enabled = true;
+ map->phys_map[val].vcpu = vcpu;
+ spin_unlock(&vcpu->kvm->arch.phyid_map_lock);
+
+ return 0;
+}
+
+static inline void kvm_drop_cpuid(struct kvm_vcpu *vcpu)
+{
+ int cpuid;
+ struct kvm_phyid_map *map;
+ struct loongarch_csrs *csr = vcpu->arch.csr;
+
+ map = vcpu->kvm->arch.phyid_map;
+ cpuid = kvm_read_sw_gcsr(csr, LOONGARCH_CSR_CPUID);
+
+ if (cpuid >= KVM_MAX_PHYID)
+ return;
+
+ spin_lock(&vcpu->kvm->arch.phyid_map_lock);
+ if (map->phys_map[cpuid].enabled) {
+ map->phys_map[cpuid].vcpu = NULL;
+ map->phys_map[cpuid].enabled = false;
+ kvm_write_sw_gcsr(csr, LOONGARCH_CSR_CPUID, KVM_MAX_PHYID);
+ }
+ spin_unlock(&vcpu->kvm->arch.phyid_map_lock);
+}
+
+struct kvm_vcpu *kvm_get_vcpu_by_cpuid(struct kvm *kvm, int cpuid)
+{
+ struct kvm_phyid_map *map;
+
+ if (cpuid >= KVM_MAX_PHYID)
+ return NULL;
+
+ map = kvm->arch.phyid_map;
+ if (!map->phys_map[cpuid].enabled)
+ return NULL;
+
+ return map->phys_map[cpuid].vcpu;
+}
+
+static int _kvm_getcsr(struct kvm_vcpu *vcpu, unsigned int id, u64 *val)
+{
+ unsigned long gintc;
+ struct loongarch_csrs *csr = vcpu->arch.csr;
+
+ if (get_gcsr_flag(id) & INVALID_GCSR)
+ return -EINVAL;
+
+ if (id == LOONGARCH_CSR_ESTAT) {
+ preempt_disable();
+ vcpu_load(vcpu);
+ /*
+ * Sync pending interrupts into ESTAT so that interrupt
+ * remains during VM migration stage
+ */
+ kvm_deliver_intr(vcpu);
+ vcpu->arch.aux_inuse &= ~KVM_LARCH_SWCSR_LATEST;
+ vcpu_put(vcpu);
+ preempt_enable();
+
+ /* ESTAT IP0~IP7 get from GINTC */
+ gintc = kvm_read_sw_gcsr(csr, LOONGARCH_CSR_GINTC) & 0xff;
+ *val = kvm_read_sw_gcsr(csr, LOONGARCH_CSR_ESTAT) | (gintc << 2);
+ return 0;
+ }
+
+ /*
+ * Get software CSR state since software state is consistent
+ * with hardware for synchronous ioctl
+ */
+ *val = kvm_read_sw_gcsr(csr, id);
+
+ return 0;
+}
+
+static int _kvm_setcsr(struct kvm_vcpu *vcpu, unsigned int id, u64 val)
+{
+ int ret = 0, gintc;
+ struct loongarch_csrs *csr = vcpu->arch.csr;
+
+ if (get_gcsr_flag(id) & INVALID_GCSR)
+ return -EINVAL;
+
+ if (id == LOONGARCH_CSR_CPUID)
+ return kvm_set_cpuid(vcpu, val);
+
+ if (id == LOONGARCH_CSR_ESTAT) {
+ /* ESTAT IP0~IP7 inject through GINTC */
+ gintc = (val >> 2) & 0xff;
+ kvm_set_sw_gcsr(csr, LOONGARCH_CSR_GINTC, gintc);
+
+ gintc = val & ~(0xffUL << 2);
+ kvm_set_sw_gcsr(csr, LOONGARCH_CSR_ESTAT, gintc);
+
+ return ret;
+ }
+
+ kvm_write_sw_gcsr(csr, id, val);
+
+ /*
+ * After modifying the PMU CSR register value of the vcpu.
+ * If the PMU CSRs are used, we need to set KVM_REQ_PMU.
+ */
+ if (id >= LOONGARCH_CSR_PERFCTRL0 && id <= LOONGARCH_CSR_PERFCNTR3) {
+ unsigned long val;
+
+ val = kvm_read_sw_gcsr(csr, LOONGARCH_CSR_PERFCTRL0) |
+ kvm_read_sw_gcsr(csr, LOONGARCH_CSR_PERFCTRL1) |
+ kvm_read_sw_gcsr(csr, LOONGARCH_CSR_PERFCTRL2) |
+ kvm_read_sw_gcsr(csr, LOONGARCH_CSR_PERFCTRL3);
+
+ if (val & KVM_PMU_EVENT_ENABLED)
+ kvm_make_request(KVM_REQ_PMU, vcpu);
+ }
+
+ return ret;
+}
+
+static int _kvm_get_cpucfg_mask(int id, u64 *v)
+{
+ if (id < 0 || id >= KVM_MAX_CPUCFG_REGS)
+ return -EINVAL;
+
+ switch (id) {
+ case LOONGARCH_CPUCFG0:
+ *v = GENMASK(31, 0);
+ return 0;
+ case LOONGARCH_CPUCFG1:
+ /* CPUCFG1_MSGINT is not supported by KVM */
+ *v = GENMASK(25, 0);
+ return 0;
+ case LOONGARCH_CPUCFG2:
+ /* CPUCFG2 features unconditionally supported by KVM */
+ *v = CPUCFG2_FP | CPUCFG2_FPSP | CPUCFG2_FPDP |
+ CPUCFG2_FPVERS | CPUCFG2_LLFTP | CPUCFG2_LLFTPREV |
+ CPUCFG2_LSPW | CPUCFG2_LAM;
+ /*
+ * For the ISA extensions listed below, if one is supported
+ * by the host, then it is also supported by KVM.
+ */
+ if (cpu_has_lsx)
+ *v |= CPUCFG2_LSX;
+ if (cpu_has_lasx)
+ *v |= CPUCFG2_LASX;
+ if (cpu_has_lbt_x86)
+ *v |= CPUCFG2_X86BT;
+ if (cpu_has_lbt_arm)
+ *v |= CPUCFG2_ARMBT;
+ if (cpu_has_lbt_mips)
+ *v |= CPUCFG2_MIPSBT;
+
+ return 0;
+ case LOONGARCH_CPUCFG3:
+ *v = GENMASK(16, 0);
+ return 0;
+ case LOONGARCH_CPUCFG4:
+ case LOONGARCH_CPUCFG5:
+ *v = GENMASK(31, 0);
+ return 0;
+ case LOONGARCH_CPUCFG6:
+ if (cpu_has_pmp)
+ *v = GENMASK(14, 0);
+ else
+ *v = 0;
+ return 0;
+ case LOONGARCH_CPUCFG16:
+ *v = GENMASK(16, 0);
+ return 0;
+ case LOONGARCH_CPUCFG17 ... LOONGARCH_CPUCFG20:
+ *v = GENMASK(30, 0);
+ return 0;
+ default:
+ /*
+ * CPUCFG bits should be zero if reserved by HW or not
+ * supported by KVM.
+ */
+ *v = 0;
+ return 0;
+ }
+}
+
+static int kvm_check_cpucfg(int id, u64 val)
+{
+ int ret;
+ u64 mask = 0;
+
+ ret = _kvm_get_cpucfg_mask(id, &mask);
+ if (ret)
+ return ret;
+
+ if (val & ~mask)
+ /* Unsupported features and/or the higher 32 bits should not be set */
+ return -EINVAL;
+
+ switch (id) {
+ case LOONGARCH_CPUCFG2:
+ if (!(val & CPUCFG2_LLFTP))
+ /* Guests must have a constant timer */
+ return -EINVAL;
+ if ((val & CPUCFG2_FP) && (!(val & CPUCFG2_FPSP) || !(val & CPUCFG2_FPDP)))
+ /* Single and double float point must both be set when FP is enabled */
+ return -EINVAL;
+ if ((val & CPUCFG2_LSX) && !(val & CPUCFG2_FP))
+ /* LSX architecturally implies FP but val does not satisfy that */
+ return -EINVAL;
+ if ((val & CPUCFG2_LASX) && !(val & CPUCFG2_LSX))
+ /* LASX architecturally implies LSX and FP but val does not satisfy that */
+ return -EINVAL;
+ return 0;
+ case LOONGARCH_CPUCFG6:
+ if (val & CPUCFG6_PMP) {
+ u32 host = read_cpucfg(LOONGARCH_CPUCFG6);
+ if ((val & CPUCFG6_PMBITS) != (host & CPUCFG6_PMBITS))
+ return -EINVAL;
+ if ((val & CPUCFG6_PMNUM) > (host & CPUCFG6_PMNUM))
+ return -EINVAL;
+ if ((val & CPUCFG6_UPM) && !(host & CPUCFG6_UPM))
+ return -EINVAL;
+ }
+ return 0;
+ default:
+ /*
+ * Values for the other CPUCFG IDs are not being further validated
+ * besides the mask check above.
+ */
+ return 0;
+ }
+}
+
+static int kvm_get_one_reg(struct kvm_vcpu *vcpu,
+ const struct kvm_one_reg *reg, u64 *v)
+{
+ int id, ret = 0;
+ u64 type = reg->id & KVM_REG_LOONGARCH_MASK;
+
+ switch (type) {
+ case KVM_REG_LOONGARCH_CSR:
+ id = KVM_GET_IOC_CSR_IDX(reg->id);
+ ret = _kvm_getcsr(vcpu, id, v);
+ break;
+ case KVM_REG_LOONGARCH_CPUCFG:
+ id = KVM_GET_IOC_CPUCFG_IDX(reg->id);
+ if (id >= 0 && id < KVM_MAX_CPUCFG_REGS)
+ *v = vcpu->arch.cpucfg[id];
+ else
+ ret = -EINVAL;
+ break;
+ case KVM_REG_LOONGARCH_LBT:
+ if (!kvm_guest_has_lbt(&vcpu->arch))
+ return -ENXIO;
+
+ switch (reg->id) {
+ case KVM_REG_LOONGARCH_LBT_SCR0:
+ *v = vcpu->arch.lbt.scr0;
+ break;
+ case KVM_REG_LOONGARCH_LBT_SCR1:
+ *v = vcpu->arch.lbt.scr1;
+ break;
+ case KVM_REG_LOONGARCH_LBT_SCR2:
+ *v = vcpu->arch.lbt.scr2;
+ break;
+ case KVM_REG_LOONGARCH_LBT_SCR3:
+ *v = vcpu->arch.lbt.scr3;
+ break;
+ case KVM_REG_LOONGARCH_LBT_EFLAGS:
+ *v = vcpu->arch.lbt.eflags;
+ break;
+ case KVM_REG_LOONGARCH_LBT_FTOP:
+ *v = vcpu->arch.fpu.ftop;
+ break;
+ default:
+ ret = -EINVAL;
+ break;
+ }
+ break;
+ case KVM_REG_LOONGARCH_KVM:
+ switch (reg->id) {
+ case KVM_REG_LOONGARCH_COUNTER:
+ *v = drdtime() + vcpu->kvm->arch.time_offset;
+ break;
+ case KVM_REG_LOONGARCH_DEBUG_INST:
+ *v = INSN_HVCL | KVM_HCALL_SWDBG;
+ break;
+ default:
+ ret = -EINVAL;
+ break;
+ }
+ break;
+ default:
+ ret = -EINVAL;
+ break;
+ }
+
+ return ret;
+}
+
+static int kvm_get_reg(struct kvm_vcpu *vcpu, const struct kvm_one_reg *reg)
+{
+ int ret = 0;
+ u64 v, size = reg->id & KVM_REG_SIZE_MASK;
+
+ switch (size) {
+ case KVM_REG_SIZE_U64:
+ ret = kvm_get_one_reg(vcpu, reg, &v);
+ if (ret)
+ return ret;
+ ret = put_user(v, (u64 __user *)(long)reg->addr);
+ break;
+ default:
+ ret = -EINVAL;
+ break;
+ }
+
+ return ret;
+}
+
+static int kvm_set_one_reg(struct kvm_vcpu *vcpu,
+ const struct kvm_one_reg *reg, u64 v)
+{
+ int id, ret = 0;
+ u64 type = reg->id & KVM_REG_LOONGARCH_MASK;
+
+ switch (type) {
+ case KVM_REG_LOONGARCH_CSR:
+ id = KVM_GET_IOC_CSR_IDX(reg->id);
+ ret = _kvm_setcsr(vcpu, id, v);
+ break;
+ case KVM_REG_LOONGARCH_CPUCFG:
+ id = KVM_GET_IOC_CPUCFG_IDX(reg->id);
+ ret = kvm_check_cpucfg(id, v);
+ if (ret)
+ break;
+ vcpu->arch.cpucfg[id] = (u32)v;
+ if (id == LOONGARCH_CPUCFG6)
+ vcpu->arch.max_pmu_csrid =
+ LOONGARCH_CSR_PERFCTRL0 + 2 * kvm_get_pmu_num(&vcpu->arch) + 1;
+ break;
+ case KVM_REG_LOONGARCH_LBT:
+ if (!kvm_guest_has_lbt(&vcpu->arch))
+ return -ENXIO;
+
+ switch (reg->id) {
+ case KVM_REG_LOONGARCH_LBT_SCR0:
+ vcpu->arch.lbt.scr0 = v;
+ break;
+ case KVM_REG_LOONGARCH_LBT_SCR1:
+ vcpu->arch.lbt.scr1 = v;
+ break;
+ case KVM_REG_LOONGARCH_LBT_SCR2:
+ vcpu->arch.lbt.scr2 = v;
+ break;
+ case KVM_REG_LOONGARCH_LBT_SCR3:
+ vcpu->arch.lbt.scr3 = v;
+ break;
+ case KVM_REG_LOONGARCH_LBT_EFLAGS:
+ vcpu->arch.lbt.eflags = v;
+ break;
+ case KVM_REG_LOONGARCH_LBT_FTOP:
+ vcpu->arch.fpu.ftop = v;
+ break;
+ default:
+ ret = -EINVAL;
+ break;
+ }
+ break;
+ case KVM_REG_LOONGARCH_KVM:
+ switch (reg->id) {
+ case KVM_REG_LOONGARCH_COUNTER:
+ /*
+ * gftoffset is relative with board, not vcpu
+ * only set for the first time for smp system
+ */
+ if (vcpu->vcpu_id == 0)
+ vcpu->kvm->arch.time_offset = (signed long)(v - drdtime());
+ break;
+ case KVM_REG_LOONGARCH_VCPU_RESET:
+ vcpu->arch.st.guest_addr = 0;
+ memset(&vcpu->arch.irq_pending, 0, sizeof(vcpu->arch.irq_pending));
+ memset(&vcpu->arch.irq_clear, 0, sizeof(vcpu->arch.irq_clear));
+
+ /*
+ * When vCPU reset, clear the ESTAT and GINTC registers
+ * Other CSR registers are cleared with function _kvm_setcsr().
+ */
+ kvm_write_sw_gcsr(vcpu->arch.csr, LOONGARCH_CSR_GINTC, 0);
+ kvm_write_sw_gcsr(vcpu->arch.csr, LOONGARCH_CSR_ESTAT, 0);
+ break;
+ default:
+ ret = -EINVAL;
+ break;
+ }
+ break;
+ default:
+ ret = -EINVAL;
+ break;
+ }
+
+ return ret;
+}
+
+static int kvm_set_reg(struct kvm_vcpu *vcpu, const struct kvm_one_reg *reg)
+{
+ int ret = 0;
+ u64 v, size = reg->id & KVM_REG_SIZE_MASK;
+
+ switch (size) {
+ case KVM_REG_SIZE_U64:
+ ret = get_user(v, (u64 __user *)(long)reg->addr);
+ if (ret)
+ return ret;
+ break;
+ default:
+ return -EINVAL;
+ }
+
+ return kvm_set_one_reg(vcpu, reg, v);
+}
+
+int kvm_arch_vcpu_ioctl_get_sregs(struct kvm_vcpu *vcpu, struct kvm_sregs *sregs)
+{
+ return -ENOIOCTLCMD;
+}
+
+int kvm_arch_vcpu_ioctl_set_sregs(struct kvm_vcpu *vcpu, struct kvm_sregs *sregs)
+{
+ return -ENOIOCTLCMD;
+}
+
+int kvm_arch_vcpu_ioctl_get_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs)
+{
+ int i;
+
+ for (i = 0; i < ARRAY_SIZE(vcpu->arch.gprs); i++)
+ regs->gpr[i] = vcpu->arch.gprs[i];
+
+ regs->pc = vcpu->arch.pc;
+
+ return 0;
+}
+
+int kvm_arch_vcpu_ioctl_set_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs)
+{
+ int i;
+
+ for (i = 1; i < ARRAY_SIZE(vcpu->arch.gprs); i++)
+ vcpu->arch.gprs[i] = regs->gpr[i];
+
+ vcpu->arch.gprs[0] = 0; /* zero is special, and cannot be set. */
+ vcpu->arch.pc = regs->pc;
+
+ return 0;
+}
+
+static int kvm_vcpu_ioctl_enable_cap(struct kvm_vcpu *vcpu,
+ struct kvm_enable_cap *cap)
+{
+ /* FPU is enabled by default, will support LSX/LASX later. */
+ return -EINVAL;
+}
+
+static int kvm_loongarch_cpucfg_has_attr(struct kvm_vcpu *vcpu,
+ struct kvm_device_attr *attr)
+{
+ switch (attr->attr) {
+ case LOONGARCH_CPUCFG2:
+ case LOONGARCH_CPUCFG6:
+ return 0;
+ case CPUCFG_KVM_FEATURE:
+ return 0;
+ default:
+ return -ENXIO;
+ }
+
+ return -ENXIO;
+}
+
+static int kvm_loongarch_pvtime_has_attr(struct kvm_vcpu *vcpu,
+ struct kvm_device_attr *attr)
+{
+ if (!kvm_guest_has_pv_feature(vcpu, KVM_FEATURE_STEAL_TIME)
+ || attr->attr != KVM_LOONGARCH_VCPU_PVTIME_GPA)
+ return -ENXIO;
+
+ return 0;
+}
+
+static int kvm_loongarch_vcpu_has_attr(struct kvm_vcpu *vcpu,
+ struct kvm_device_attr *attr)
+{
+ int ret = -ENXIO;
+
+ switch (attr->group) {
+ case KVM_LOONGARCH_VCPU_CPUCFG:
+ ret = kvm_loongarch_cpucfg_has_attr(vcpu, attr);
+ break;
+ case KVM_LOONGARCH_VCPU_PVTIME_CTRL:
+ ret = kvm_loongarch_pvtime_has_attr(vcpu, attr);
+ break;
+ default:
+ break;
+ }
+
+ return ret;
+}
+
+static int kvm_loongarch_cpucfg_get_attr(struct kvm_vcpu *vcpu,
+ struct kvm_device_attr *attr)
+{
+ int ret = 0;
+ uint64_t val;
+ uint64_t __user *uaddr = (uint64_t __user *)attr->addr;
+
+ switch (attr->attr) {
+ case 0 ... (KVM_MAX_CPUCFG_REGS - 1):
+ ret = _kvm_get_cpucfg_mask(attr->attr, &val);
+ if (ret)
+ return ret;
+ break;
+ case CPUCFG_KVM_FEATURE:
+ val = vcpu->kvm->arch.pv_features & LOONGARCH_PV_FEAT_MASK;
+ break;
+ default:
+ return -ENXIO;
+ }
+
+ put_user(val, uaddr);
+
+ return ret;
+}
+
+static int kvm_loongarch_pvtime_get_attr(struct kvm_vcpu *vcpu,
+ struct kvm_device_attr *attr)
+{
+ u64 gpa;
+ u64 __user *user = (u64 __user *)attr->addr;
+
+ if (!kvm_guest_has_pv_feature(vcpu, KVM_FEATURE_STEAL_TIME)
+ || attr->attr != KVM_LOONGARCH_VCPU_PVTIME_GPA)
+ return -ENXIO;
+
+ gpa = vcpu->arch.st.guest_addr;
+ if (put_user(gpa, user))
+ return -EFAULT;
+
+ return 0;
+}
+
+static int kvm_loongarch_vcpu_get_attr(struct kvm_vcpu *vcpu,
+ struct kvm_device_attr *attr)
+{
+ int ret = -ENXIO;
+
+ switch (attr->group) {
+ case KVM_LOONGARCH_VCPU_CPUCFG:
+ ret = kvm_loongarch_cpucfg_get_attr(vcpu, attr);
+ break;
+ case KVM_LOONGARCH_VCPU_PVTIME_CTRL:
+ ret = kvm_loongarch_pvtime_get_attr(vcpu, attr);
+ break;
+ default:
+ break;
+ }
+
+ return ret;
+}
+
+static int kvm_loongarch_cpucfg_set_attr(struct kvm_vcpu *vcpu,
+ struct kvm_device_attr *attr)
+{
+ u64 val, valid;
+ u64 __user *user = (u64 __user *)attr->addr;
+ struct kvm *kvm = vcpu->kvm;
+
+ switch (attr->attr) {
+ case CPUCFG_KVM_FEATURE:
+ if (get_user(val, user))
+ return -EFAULT;
+
+ valid = LOONGARCH_PV_FEAT_MASK;
+ if (val & ~valid)
+ return -EINVAL;
+
+ /* All vCPUs need set the same PV features */
+ if ((kvm->arch.pv_features & LOONGARCH_PV_FEAT_UPDATED)
+ && ((kvm->arch.pv_features & valid) != val))
+ return -EINVAL;
+ kvm->arch.pv_features = val | LOONGARCH_PV_FEAT_UPDATED;
+ return 0;
+ default:
+ return -ENXIO;
+ }
+}
+
+static int kvm_loongarch_pvtime_set_attr(struct kvm_vcpu *vcpu,
+ struct kvm_device_attr *attr)
+{
+ int idx, ret = 0;
+ u64 gpa, __user *user = (u64 __user *)attr->addr;
+ struct kvm *kvm = vcpu->kvm;
+
+ if (!kvm_guest_has_pv_feature(vcpu, KVM_FEATURE_STEAL_TIME)
+ || attr->attr != KVM_LOONGARCH_VCPU_PVTIME_GPA)
+ return -ENXIO;
+
+ if (get_user(gpa, user))
+ return -EFAULT;
+
+ if (gpa & ~(KVM_STEAL_PHYS_MASK | KVM_STEAL_PHYS_VALID))
+ return -EINVAL;
+
+ if (!(gpa & KVM_STEAL_PHYS_VALID)) {
+ vcpu->arch.st.guest_addr = gpa;
+ return 0;
+ }
+
+ /* Check the address is in a valid memslot */
+ idx = srcu_read_lock(&kvm->srcu);
+ if (kvm_is_error_hva(gfn_to_hva(kvm, gpa >> PAGE_SHIFT)))
+ ret = -EINVAL;
+ srcu_read_unlock(&kvm->srcu, idx);
+
+ if (!ret) {
+ vcpu->arch.st.guest_addr = gpa;
+ vcpu->arch.st.last_steal = current->sched_info.run_delay;
+ kvm_make_request(KVM_REQ_STEAL_UPDATE, vcpu);
+ }
+
+ return ret;
+}
+
+static int kvm_loongarch_vcpu_set_attr(struct kvm_vcpu *vcpu,
+ struct kvm_device_attr *attr)
+{
+ int ret = -ENXIO;
+
+ switch (attr->group) {
+ case KVM_LOONGARCH_VCPU_CPUCFG:
+ ret = kvm_loongarch_cpucfg_set_attr(vcpu, attr);
+ break;
+ case KVM_LOONGARCH_VCPU_PVTIME_CTRL:
+ ret = kvm_loongarch_pvtime_set_attr(vcpu, attr);
+ break;
+ default:
+ break;
+ }
+
+ return ret;
+}
+
+long kvm_arch_vcpu_ioctl(struct file *filp,
+ unsigned int ioctl, unsigned long arg)
+{
+ long r;
+ struct kvm_device_attr attr;
+ void __user *argp = (void __user *)arg;
+ struct kvm_vcpu *vcpu = filp->private_data;
+
+ /*
+ * Only software CSR should be modified
+ *
+ * If any hardware CSR register is modified, vcpu_load/vcpu_put pair
+ * should be used. Since CSR registers owns by this vcpu, if switch
+ * to other vcpus, other vcpus need reload CSR registers.
+ *
+ * If software CSR is modified, bit KVM_LARCH_HWCSR_USABLE should
+ * be clear in vcpu->arch.aux_inuse, and vcpu_load will check
+ * aux_inuse flag and reload CSR registers form software.
+ */
+
+ switch (ioctl) {
+ case KVM_SET_ONE_REG:
+ case KVM_GET_ONE_REG: {
+ struct kvm_one_reg reg;
+
+ r = -EFAULT;
+ if (copy_from_user(&reg, argp, sizeof(reg)))
+ break;
+ if (ioctl == KVM_SET_ONE_REG) {
+ r = kvm_set_reg(vcpu, &reg);
+ vcpu->arch.aux_inuse &= ~KVM_LARCH_HWCSR_USABLE;
+ } else
+ r = kvm_get_reg(vcpu, &reg);
+ break;
+ }
+ case KVM_ENABLE_CAP: {
+ struct kvm_enable_cap cap;
+
+ r = -EFAULT;
+ if (copy_from_user(&cap, argp, sizeof(cap)))
+ break;
+ r = kvm_vcpu_ioctl_enable_cap(vcpu, &cap);
+ break;
+ }
+ case KVM_HAS_DEVICE_ATTR: {
+ r = -EFAULT;
+ if (copy_from_user(&attr, argp, sizeof(attr)))
+ break;
+ r = kvm_loongarch_vcpu_has_attr(vcpu, &attr);
+ break;
+ }
+ case KVM_GET_DEVICE_ATTR: {
+ r = -EFAULT;
+ if (copy_from_user(&attr, argp, sizeof(attr)))
+ break;
+ r = kvm_loongarch_vcpu_get_attr(vcpu, &attr);
+ break;
+ }
+ case KVM_SET_DEVICE_ATTR: {
+ r = -EFAULT;
+ if (copy_from_user(&attr, argp, sizeof(attr)))
+ break;
+ r = kvm_loongarch_vcpu_set_attr(vcpu, &attr);
+ break;
+ }
+ default:
+ r = -ENOIOCTLCMD;
+ break;
+ }
+
+ return r;
+}
+
+int kvm_arch_vcpu_ioctl_get_fpu(struct kvm_vcpu *vcpu, struct kvm_fpu *fpu)
+{
+ int i = 0;
+
+ fpu->fcc = vcpu->arch.fpu.fcc;
+ fpu->fcsr = vcpu->arch.fpu.fcsr;
+ for (i = 0; i < NUM_FPU_REGS; i++)
+ memcpy(&fpu->fpr[i], &vcpu->arch.fpu.fpr[i], FPU_REG_WIDTH / 64);
+
+ return 0;
+}
+
+int kvm_arch_vcpu_ioctl_set_fpu(struct kvm_vcpu *vcpu, struct kvm_fpu *fpu)
+{
+ int i = 0;
+
+ vcpu->arch.fpu.fcc = fpu->fcc;
+ vcpu->arch.fpu.fcsr = fpu->fcsr;
+ for (i = 0; i < NUM_FPU_REGS; i++)
+ memcpy(&vcpu->arch.fpu.fpr[i], &fpu->fpr[i], FPU_REG_WIDTH / 64);
+
+ return 0;
+}
+
+#ifdef CONFIG_CPU_HAS_LBT
+int kvm_own_lbt(struct kvm_vcpu *vcpu)
+{
+ if (!kvm_guest_has_lbt(&vcpu->arch))
+ return -EINVAL;
+
+ preempt_disable();
+ set_csr_euen(CSR_EUEN_LBTEN);
+ _restore_lbt(&vcpu->arch.lbt);
+ vcpu->arch.aux_inuse |= KVM_LARCH_LBT;
+ preempt_enable();
+
+ return 0;
+}
+
+static void kvm_lose_lbt(struct kvm_vcpu *vcpu)
+{
+ preempt_disable();
+ if (vcpu->arch.aux_inuse & KVM_LARCH_LBT) {
+ _save_lbt(&vcpu->arch.lbt);
+ clear_csr_euen(CSR_EUEN_LBTEN);
+ vcpu->arch.aux_inuse &= ~KVM_LARCH_LBT;
+ }
+ preempt_enable();
+}
+
+static void kvm_check_fcsr(struct kvm_vcpu *vcpu, unsigned long fcsr)
+{
+ /*
+ * If TM is enabled, top register save/restore will
+ * cause lbt exception, here enable lbt in advance
+ */
+ if (fcsr & FPU_CSR_TM)
+ kvm_own_lbt(vcpu);
+}
+
+static void kvm_check_fcsr_alive(struct kvm_vcpu *vcpu)
+{
+ if (vcpu->arch.aux_inuse & KVM_LARCH_FPU) {
+ if (vcpu->arch.aux_inuse & KVM_LARCH_LBT)
+ return;
+ kvm_check_fcsr(vcpu, read_fcsr(LOONGARCH_FCSR0));
+ }
+}
+#else
+static inline void kvm_lose_lbt(struct kvm_vcpu *vcpu) { }
+static inline void kvm_check_fcsr(struct kvm_vcpu *vcpu, unsigned long fcsr) { }
+static inline void kvm_check_fcsr_alive(struct kvm_vcpu *vcpu) { }
+#endif
+
+/* Enable FPU and restore context */
+void kvm_own_fpu(struct kvm_vcpu *vcpu)
+{
+ preempt_disable();
+
+ /*
+ * Enable FPU for guest
+ * Set FR and FRE according to guest context
+ */
+ kvm_check_fcsr(vcpu, vcpu->arch.fpu.fcsr);
+ set_csr_euen(CSR_EUEN_FPEN);
+
+ kvm_restore_fpu(&vcpu->arch.fpu);
+ vcpu->arch.aux_inuse |= KVM_LARCH_FPU;
+ trace_kvm_aux(vcpu, KVM_TRACE_AUX_RESTORE, KVM_TRACE_AUX_FPU);
+
+ preempt_enable();
+}
+
+#ifdef CONFIG_CPU_HAS_LSX
+/* Enable LSX and restore context */
+int kvm_own_lsx(struct kvm_vcpu *vcpu)
+{
+ if (!kvm_guest_has_fpu(&vcpu->arch) || !kvm_guest_has_lsx(&vcpu->arch))
+ return -EINVAL;
+
+ preempt_disable();
+
+ /* Enable LSX for guest */
+ kvm_check_fcsr(vcpu, vcpu->arch.fpu.fcsr);
+ set_csr_euen(CSR_EUEN_LSXEN | CSR_EUEN_FPEN);
+ switch (vcpu->arch.aux_inuse & KVM_LARCH_FPU) {
+ case KVM_LARCH_FPU:
+ /*
+ * Guest FPU state already loaded,
+ * only restore upper LSX state
+ */
+ _restore_lsx_upper(&vcpu->arch.fpu);
+ break;
+ default:
+ /* Neither FP or LSX already active,
+ * restore full LSX state
+ */
+ kvm_restore_lsx(&vcpu->arch.fpu);
+ break;
+ }
+
+ trace_kvm_aux(vcpu, KVM_TRACE_AUX_RESTORE, KVM_TRACE_AUX_LSX);
+ vcpu->arch.aux_inuse |= KVM_LARCH_LSX | KVM_LARCH_FPU;
+ preempt_enable();
+
+ return 0;
+}
+#endif
+
+#ifdef CONFIG_CPU_HAS_LASX
+/* Enable LASX and restore context */
+int kvm_own_lasx(struct kvm_vcpu *vcpu)
+{
+ if (!kvm_guest_has_fpu(&vcpu->arch) || !kvm_guest_has_lsx(&vcpu->arch) || !kvm_guest_has_lasx(&vcpu->arch))
+ return -EINVAL;
+
+ preempt_disable();
+
+ kvm_check_fcsr(vcpu, vcpu->arch.fpu.fcsr);
+ set_csr_euen(CSR_EUEN_FPEN | CSR_EUEN_LSXEN | CSR_EUEN_LASXEN);
+ switch (vcpu->arch.aux_inuse & (KVM_LARCH_FPU | KVM_LARCH_LSX)) {
+ case KVM_LARCH_LSX:
+ case KVM_LARCH_LSX | KVM_LARCH_FPU:
+ /* Guest LSX state already loaded, only restore upper LASX state */
+ _restore_lasx_upper(&vcpu->arch.fpu);
+ break;
+ case KVM_LARCH_FPU:
+ /* Guest FP state already loaded, only restore upper LSX & LASX state */
+ _restore_lsx_upper(&vcpu->arch.fpu);
+ _restore_lasx_upper(&vcpu->arch.fpu);
+ break;
+ default:
+ /* Neither FP or LSX already active, restore full LASX state */
+ kvm_restore_lasx(&vcpu->arch.fpu);
+ break;
+ }
+
+ trace_kvm_aux(vcpu, KVM_TRACE_AUX_RESTORE, KVM_TRACE_AUX_LASX);
+ vcpu->arch.aux_inuse |= KVM_LARCH_LASX | KVM_LARCH_LSX | KVM_LARCH_FPU;
+ preempt_enable();
+
+ return 0;
+}
+#endif
+
+/* Save context and disable FPU */
+void kvm_lose_fpu(struct kvm_vcpu *vcpu)
+{
+ preempt_disable();
+
+ kvm_check_fcsr_alive(vcpu);
+ if (vcpu->arch.aux_inuse & KVM_LARCH_LASX) {
+ kvm_save_lasx(&vcpu->arch.fpu);
+ vcpu->arch.aux_inuse &= ~(KVM_LARCH_LSX | KVM_LARCH_FPU | KVM_LARCH_LASX);
+ trace_kvm_aux(vcpu, KVM_TRACE_AUX_SAVE, KVM_TRACE_AUX_LASX);
+
+ /* Disable LASX & LSX & FPU */
+ clear_csr_euen(CSR_EUEN_FPEN | CSR_EUEN_LSXEN | CSR_EUEN_LASXEN);
+ } else if (vcpu->arch.aux_inuse & KVM_LARCH_LSX) {
+ kvm_save_lsx(&vcpu->arch.fpu);
+ vcpu->arch.aux_inuse &= ~(KVM_LARCH_LSX | KVM_LARCH_FPU);
+ trace_kvm_aux(vcpu, KVM_TRACE_AUX_SAVE, KVM_TRACE_AUX_LSX);
+
+ /* Disable LSX & FPU */
+ clear_csr_euen(CSR_EUEN_FPEN | CSR_EUEN_LSXEN);
+ } else if (vcpu->arch.aux_inuse & KVM_LARCH_FPU) {
+ kvm_save_fpu(&vcpu->arch.fpu);
+ vcpu->arch.aux_inuse &= ~KVM_LARCH_FPU;
+ trace_kvm_aux(vcpu, KVM_TRACE_AUX_SAVE, KVM_TRACE_AUX_FPU);
+
+ /* Disable FPU */
+ clear_csr_euen(CSR_EUEN_FPEN);
+ }
+ kvm_lose_lbt(vcpu);
+
+ preempt_enable();
+}
+
+int kvm_vcpu_ioctl_interrupt(struct kvm_vcpu *vcpu, struct kvm_interrupt *irq)
+{
+ int intr = (int)irq->irq;
+
+ if (intr > 0)
+ kvm_queue_irq(vcpu, intr);
+ else if (intr < 0)
+ kvm_dequeue_irq(vcpu, -intr);
+ else {
+ kvm_err("%s: invalid interrupt ioctl %d\n", __func__, irq->irq);
+ return -EINVAL;
+ }
+
+ kvm_vcpu_kick(vcpu);
+
+ return 0;
+}
+
+long kvm_arch_vcpu_async_ioctl(struct file *filp,
+ unsigned int ioctl, unsigned long arg)
+{
+ void __user *argp = (void __user *)arg;
+ struct kvm_vcpu *vcpu = filp->private_data;
+
+ if (ioctl == KVM_INTERRUPT) {
+ struct kvm_interrupt irq;
+
+ if (copy_from_user(&irq, argp, sizeof(irq)))
+ return -EFAULT;
+
+ kvm_debug("[%d] %s: irq: %d\n", vcpu->vcpu_id, __func__, irq.irq);
+
+ return kvm_vcpu_ioctl_interrupt(vcpu, &irq);
+ }
+
+ return -ENOIOCTLCMD;
+}
+
+int kvm_arch_vcpu_precreate(struct kvm *kvm, unsigned int id)
+{
+ return 0;
+}
+
+int kvm_arch_vcpu_create(struct kvm_vcpu *vcpu)
+{
+ unsigned long timer_hz;
+ struct loongarch_csrs *csr;
+
+ vcpu->arch.vpid = 0;
+ vcpu->arch.flush_gpa = INVALID_GPA;
+
+ hrtimer_setup(&vcpu->arch.swtimer, kvm_swtimer_wakeup, CLOCK_MONOTONIC,
+ HRTIMER_MODE_ABS_PINNED_HARD);
+
+ /* Get GPA (=HVA) of PGD for kvm hypervisor */
+ vcpu->arch.kvm_pgd = __pa(vcpu->kvm->arch.pgd);
+
+ /*
+ * Get PGD for primary mmu, virtual address is used since there is
+ * memory access after loading from CSR_PGD in tlb exception fast path.
+ */
+ vcpu->arch.host_pgd = (unsigned long)vcpu->kvm->mm->pgd;
+
+ vcpu->arch.handle_exit = kvm_handle_exit;
+ vcpu->arch.guest_eentry = (unsigned long)kvm_loongarch_ops->exc_entry;
+ vcpu->arch.csr = kzalloc(sizeof(struct loongarch_csrs), GFP_KERNEL);
+ if (!vcpu->arch.csr)
+ return -ENOMEM;
+
+ /*
+ * All kvm exceptions share one exception entry, and host <-> guest
+ * switch also switch ECFG.VS field, keep host ECFG.VS info here.
+ */
+ vcpu->arch.host_ecfg = (read_csr_ecfg() & CSR_ECFG_VS);
+
+ /* Init */
+ vcpu->arch.last_sched_cpu = -1;
+
+ /* Init ipi_state lock */
+ spin_lock_init(&vcpu->arch.ipi_state.lock);
+
+ /*
+ * Initialize guest register state to valid architectural reset state.
+ */
+ timer_hz = calc_const_freq();
+ kvm_init_timer(vcpu, timer_hz);
+
+ /* Set Initialize mode for guest */
+ csr = vcpu->arch.csr;
+ kvm_write_sw_gcsr(csr, LOONGARCH_CSR_CRMD, CSR_CRMD_DA);
+
+ /* Set cpuid */
+ kvm_write_sw_gcsr(csr, LOONGARCH_CSR_TMID, vcpu->vcpu_id);
+ kvm_write_sw_gcsr(csr, LOONGARCH_CSR_CPUID, KVM_MAX_PHYID);
+
+ /* Start with no pending virtual guest interrupts */
+ csr->csrs[LOONGARCH_CSR_GINTC] = 0;
+
+ return 0;
+}
+
+void kvm_arch_vcpu_postcreate(struct kvm_vcpu *vcpu)
+{
+}
+
+void kvm_arch_vcpu_destroy(struct kvm_vcpu *vcpu)
+{
+ int cpu;
+ struct kvm_context *context;
+
+ hrtimer_cancel(&vcpu->arch.swtimer);
+ kvm_mmu_free_memory_cache(&vcpu->arch.mmu_page_cache);
+ kvm_drop_cpuid(vcpu);
+ kfree(vcpu->arch.csr);
+
+ /*
+ * If the vCPU is freed and reused as another vCPU, we don't want the
+ * matching pointer wrongly hanging around in last_vcpu.
+ */
+ for_each_possible_cpu(cpu) {
+ context = per_cpu_ptr(vcpu->kvm->arch.vmcs, cpu);
+ if (context->last_vcpu == vcpu)
+ context->last_vcpu = NULL;
+ }
+}
+
+static int _kvm_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
+{
+ bool migrated;
+ struct kvm_context *context;
+ struct loongarch_csrs *csr = vcpu->arch.csr;
+
+ /*
+ * Have we migrated to a different CPU?
+ * If so, any old guest TLB state may be stale.
+ */
+ migrated = (vcpu->arch.last_sched_cpu != cpu);
+
+ /*
+ * Was this the last vCPU to run on this CPU?
+ * If not, any old guest state from this vCPU will have been clobbered.
+ */
+ context = per_cpu_ptr(vcpu->kvm->arch.vmcs, cpu);
+ if (migrated || (context->last_vcpu != vcpu))
+ vcpu->arch.aux_inuse &= ~KVM_LARCH_HWCSR_USABLE;
+ context->last_vcpu = vcpu;
+
+ /* Restore timer state regardless */
+ kvm_restore_timer(vcpu);
+ kvm_make_request(KVM_REQ_STEAL_UPDATE, vcpu);
+
+ /* Restore hardware PMU CSRs */
+ kvm_restore_pmu(vcpu);
+
+ /* Don't bother restoring registers multiple times unless necessary */
+ if (vcpu->arch.aux_inuse & KVM_LARCH_HWCSR_USABLE)
+ return 0;
+
+ write_csr_gcntc((ulong)vcpu->kvm->arch.time_offset);
+
+ /* Restore guest CSR registers */
+ kvm_restore_hw_gcsr(csr, LOONGARCH_CSR_CRMD);
+ kvm_restore_hw_gcsr(csr, LOONGARCH_CSR_PRMD);
+ kvm_restore_hw_gcsr(csr, LOONGARCH_CSR_EUEN);
+ kvm_restore_hw_gcsr(csr, LOONGARCH_CSR_MISC);
+ kvm_restore_hw_gcsr(csr, LOONGARCH_CSR_ECFG);
+ kvm_restore_hw_gcsr(csr, LOONGARCH_CSR_ERA);
+ kvm_restore_hw_gcsr(csr, LOONGARCH_CSR_BADV);
+ kvm_restore_hw_gcsr(csr, LOONGARCH_CSR_BADI);
+ kvm_restore_hw_gcsr(csr, LOONGARCH_CSR_EENTRY);
+ kvm_restore_hw_gcsr(csr, LOONGARCH_CSR_TLBIDX);
+ kvm_restore_hw_gcsr(csr, LOONGARCH_CSR_TLBEHI);
+ kvm_restore_hw_gcsr(csr, LOONGARCH_CSR_TLBELO0);
+ kvm_restore_hw_gcsr(csr, LOONGARCH_CSR_TLBELO1);
+ kvm_restore_hw_gcsr(csr, LOONGARCH_CSR_ASID);
+ kvm_restore_hw_gcsr(csr, LOONGARCH_CSR_PGDL);
+ kvm_restore_hw_gcsr(csr, LOONGARCH_CSR_PGDH);
+ kvm_restore_hw_gcsr(csr, LOONGARCH_CSR_PWCTL0);
+ kvm_restore_hw_gcsr(csr, LOONGARCH_CSR_PWCTL1);
+ kvm_restore_hw_gcsr(csr, LOONGARCH_CSR_STLBPGSIZE);
+ kvm_restore_hw_gcsr(csr, LOONGARCH_CSR_RVACFG);
+ kvm_restore_hw_gcsr(csr, LOONGARCH_CSR_CPUID);
+ kvm_restore_hw_gcsr(csr, LOONGARCH_CSR_KS0);
+ kvm_restore_hw_gcsr(csr, LOONGARCH_CSR_KS1);
+ kvm_restore_hw_gcsr(csr, LOONGARCH_CSR_KS2);
+ kvm_restore_hw_gcsr(csr, LOONGARCH_CSR_KS3);
+ kvm_restore_hw_gcsr(csr, LOONGARCH_CSR_KS4);
+ kvm_restore_hw_gcsr(csr, LOONGARCH_CSR_KS5);
+ kvm_restore_hw_gcsr(csr, LOONGARCH_CSR_KS6);
+ kvm_restore_hw_gcsr(csr, LOONGARCH_CSR_KS7);
+ kvm_restore_hw_gcsr(csr, LOONGARCH_CSR_TMID);
+ kvm_restore_hw_gcsr(csr, LOONGARCH_CSR_CNTC);
+ kvm_restore_hw_gcsr(csr, LOONGARCH_CSR_TLBRENTRY);
+ kvm_restore_hw_gcsr(csr, LOONGARCH_CSR_TLBRBADV);
+ kvm_restore_hw_gcsr(csr, LOONGARCH_CSR_TLBRERA);
+ kvm_restore_hw_gcsr(csr, LOONGARCH_CSR_TLBRSAVE);
+ kvm_restore_hw_gcsr(csr, LOONGARCH_CSR_TLBRELO0);
+ kvm_restore_hw_gcsr(csr, LOONGARCH_CSR_TLBRELO1);
+ kvm_restore_hw_gcsr(csr, LOONGARCH_CSR_TLBREHI);
+ kvm_restore_hw_gcsr(csr, LOONGARCH_CSR_TLBRPRMD);
+ kvm_restore_hw_gcsr(csr, LOONGARCH_CSR_DMWIN0);
+ kvm_restore_hw_gcsr(csr, LOONGARCH_CSR_DMWIN1);
+ kvm_restore_hw_gcsr(csr, LOONGARCH_CSR_DMWIN2);
+ kvm_restore_hw_gcsr(csr, LOONGARCH_CSR_DMWIN3);
+ kvm_restore_hw_gcsr(csr, LOONGARCH_CSR_LLBCTL);
+
+ /* Restore Root.GINTC from unused Guest.GINTC register */
+ write_csr_gintc(csr->csrs[LOONGARCH_CSR_GINTC]);
+
+ /*
+ * We should clear linked load bit to break interrupted atomics. This
+ * prevents a SC on the next vCPU from succeeding by matching a LL on
+ * the previous vCPU.
+ */
+ if (vcpu->kvm->created_vcpus > 1)
+ set_gcsr_llbctl(CSR_LLBCTL_WCLLB);
+
+ vcpu->arch.aux_inuse |= KVM_LARCH_HWCSR_USABLE;
+
+ return 0;
+}
+
+void kvm_arch_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
+{
+ unsigned long flags;
+
+ local_irq_save(flags);
+ /* Restore guest state to registers */
+ _kvm_vcpu_load(vcpu, cpu);
+ local_irq_restore(flags);
+}
+
+static int _kvm_vcpu_put(struct kvm_vcpu *vcpu, int cpu)
+{
+ struct loongarch_csrs *csr = vcpu->arch.csr;
+
+ kvm_lose_fpu(vcpu);
+
+ /*
+ * Update CSR state from hardware if software CSR state is stale,
+ * most CSR registers are kept unchanged during process context
+ * switch except CSR registers like remaining timer tick value and
+ * injected interrupt state.
+ */
+ if (vcpu->arch.aux_inuse & KVM_LARCH_SWCSR_LATEST)
+ goto out;
+
+ kvm_save_hw_gcsr(csr, LOONGARCH_CSR_CRMD);
+ kvm_save_hw_gcsr(csr, LOONGARCH_CSR_PRMD);
+ kvm_save_hw_gcsr(csr, LOONGARCH_CSR_EUEN);
+ kvm_save_hw_gcsr(csr, LOONGARCH_CSR_MISC);
+ kvm_save_hw_gcsr(csr, LOONGARCH_CSR_ECFG);
+ kvm_save_hw_gcsr(csr, LOONGARCH_CSR_ERA);
+ kvm_save_hw_gcsr(csr, LOONGARCH_CSR_BADV);
+ kvm_save_hw_gcsr(csr, LOONGARCH_CSR_BADI);
+ kvm_save_hw_gcsr(csr, LOONGARCH_CSR_EENTRY);
+ kvm_save_hw_gcsr(csr, LOONGARCH_CSR_TLBIDX);
+ kvm_save_hw_gcsr(csr, LOONGARCH_CSR_TLBEHI);
+ kvm_save_hw_gcsr(csr, LOONGARCH_CSR_TLBELO0);
+ kvm_save_hw_gcsr(csr, LOONGARCH_CSR_TLBELO1);
+ kvm_save_hw_gcsr(csr, LOONGARCH_CSR_ASID);
+ kvm_save_hw_gcsr(csr, LOONGARCH_CSR_PGDL);
+ kvm_save_hw_gcsr(csr, LOONGARCH_CSR_PGDH);
+ kvm_save_hw_gcsr(csr, LOONGARCH_CSR_PWCTL0);
+ kvm_save_hw_gcsr(csr, LOONGARCH_CSR_PWCTL1);
+ kvm_save_hw_gcsr(csr, LOONGARCH_CSR_STLBPGSIZE);
+ kvm_save_hw_gcsr(csr, LOONGARCH_CSR_RVACFG);
+ kvm_save_hw_gcsr(csr, LOONGARCH_CSR_CPUID);
+ kvm_save_hw_gcsr(csr, LOONGARCH_CSR_PRCFG1);
+ kvm_save_hw_gcsr(csr, LOONGARCH_CSR_PRCFG2);
+ kvm_save_hw_gcsr(csr, LOONGARCH_CSR_PRCFG3);
+ kvm_save_hw_gcsr(csr, LOONGARCH_CSR_KS0);
+ kvm_save_hw_gcsr(csr, LOONGARCH_CSR_KS1);
+ kvm_save_hw_gcsr(csr, LOONGARCH_CSR_KS2);
+ kvm_save_hw_gcsr(csr, LOONGARCH_CSR_KS3);
+ kvm_save_hw_gcsr(csr, LOONGARCH_CSR_KS4);
+ kvm_save_hw_gcsr(csr, LOONGARCH_CSR_KS5);
+ kvm_save_hw_gcsr(csr, LOONGARCH_CSR_KS6);
+ kvm_save_hw_gcsr(csr, LOONGARCH_CSR_KS7);
+ kvm_save_hw_gcsr(csr, LOONGARCH_CSR_TMID);
+ kvm_save_hw_gcsr(csr, LOONGARCH_CSR_CNTC);
+ kvm_save_hw_gcsr(csr, LOONGARCH_CSR_LLBCTL);
+ kvm_save_hw_gcsr(csr, LOONGARCH_CSR_TLBRENTRY);
+ kvm_save_hw_gcsr(csr, LOONGARCH_CSR_TLBRBADV);
+ kvm_save_hw_gcsr(csr, LOONGARCH_CSR_TLBRERA);
+ kvm_save_hw_gcsr(csr, LOONGARCH_CSR_TLBRSAVE);
+ kvm_save_hw_gcsr(csr, LOONGARCH_CSR_TLBRELO0);
+ kvm_save_hw_gcsr(csr, LOONGARCH_CSR_TLBRELO1);
+ kvm_save_hw_gcsr(csr, LOONGARCH_CSR_TLBREHI);
+ kvm_save_hw_gcsr(csr, LOONGARCH_CSR_TLBRPRMD);
+ kvm_save_hw_gcsr(csr, LOONGARCH_CSR_DMWIN0);
+ kvm_save_hw_gcsr(csr, LOONGARCH_CSR_DMWIN1);
+ kvm_save_hw_gcsr(csr, LOONGARCH_CSR_DMWIN2);
+ kvm_save_hw_gcsr(csr, LOONGARCH_CSR_DMWIN3);
+
+ vcpu->arch.aux_inuse |= KVM_LARCH_SWCSR_LATEST;
+
+out:
+ kvm_save_timer(vcpu);
+ /* Save Root.GINTC into unused Guest.GINTC register */
+ csr->csrs[LOONGARCH_CSR_GINTC] = read_csr_gintc();
+
+ return 0;
+}
+
+void kvm_arch_vcpu_put(struct kvm_vcpu *vcpu)
+{
+ int cpu;
+ unsigned long flags;
+
+ local_irq_save(flags);
+ cpu = smp_processor_id();
+ vcpu->arch.last_sched_cpu = cpu;
+
+ /* Save guest state in registers */
+ _kvm_vcpu_put(vcpu, cpu);
+ local_irq_restore(flags);
+}
+
+int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu)
+{
+ int r = -EINTR;
+ struct kvm_run *run = vcpu->run;
+
+ if (vcpu->mmio_needed) {
+ if (!vcpu->mmio_is_write)
+ kvm_complete_mmio_read(vcpu, run);
+ vcpu->mmio_needed = 0;
+ }
+
+ switch (run->exit_reason) {
+ case KVM_EXIT_HYPERCALL:
+ kvm_complete_user_service(vcpu, run);
+ break;
+ case KVM_EXIT_LOONGARCH_IOCSR:
+ if (!run->iocsr_io.is_write)
+ kvm_complete_iocsr_read(vcpu, run);
+ break;
+ }
+
+ if (!vcpu->wants_to_run)
+ return r;
+
+ /* Clear exit_reason */
+ run->exit_reason = KVM_EXIT_UNKNOWN;
+ lose_fpu(1);
+ vcpu_load(vcpu);
+ kvm_sigset_activate(vcpu);
+ r = kvm_pre_enter_guest(vcpu);
+ if (r != RESUME_GUEST)
+ goto out;
+
+ guest_timing_enter_irqoff();
+ guest_state_enter_irqoff();
+ trace_kvm_enter(vcpu);
+ r = kvm_loongarch_ops->enter_guest(run, vcpu);
+
+ trace_kvm_out(vcpu);
+ /*
+ * Guest exit is already recorded at kvm_handle_exit()
+ * return value must not be RESUME_GUEST
+ */
+ local_irq_enable();
+out:
+ kvm_sigset_deactivate(vcpu);
+ vcpu_put(vcpu);
+
+ return r;
+}
diff --git a/arch/loongarch/kvm/vm.c b/arch/loongarch/kvm/vm.c
new file mode 100644
index 000000000000..edccfc8c9cd8
--- /dev/null
+++ b/arch/loongarch/kvm/vm.c
@@ -0,0 +1,197 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Copyright (C) 2020-2023 Loongson Technology Corporation Limited
+ */
+
+#include <linux/kvm_host.h>
+#include <asm/kvm_mmu.h>
+#include <asm/kvm_vcpu.h>
+#include <asm/kvm_eiointc.h>
+#include <asm/kvm_pch_pic.h>
+
+const struct _kvm_stats_desc kvm_vm_stats_desc[] = {
+ KVM_GENERIC_VM_STATS(),
+ STATS_DESC_ICOUNTER(VM, pages),
+ STATS_DESC_ICOUNTER(VM, hugepages),
+};
+
+const struct kvm_stats_header kvm_vm_stats_header = {
+ .name_size = KVM_STATS_NAME_SIZE,
+ .num_desc = ARRAY_SIZE(kvm_vm_stats_desc),
+ .id_offset = sizeof(struct kvm_stats_header),
+ .desc_offset = sizeof(struct kvm_stats_header) + KVM_STATS_NAME_SIZE,
+ .data_offset = sizeof(struct kvm_stats_header) + KVM_STATS_NAME_SIZE +
+ sizeof(kvm_vm_stats_desc),
+};
+
+int kvm_arch_init_vm(struct kvm *kvm, unsigned long type)
+{
+ int i;
+
+ /* Allocate page table to map GPA -> RPA */
+ kvm->arch.pgd = kvm_pgd_alloc();
+ if (!kvm->arch.pgd)
+ return -ENOMEM;
+
+ kvm->arch.phyid_map = kvzalloc(sizeof(struct kvm_phyid_map), GFP_KERNEL_ACCOUNT);
+ if (!kvm->arch.phyid_map) {
+ free_page((unsigned long)kvm->arch.pgd);
+ kvm->arch.pgd = NULL;
+ return -ENOMEM;
+ }
+ spin_lock_init(&kvm->arch.phyid_map_lock);
+
+ kvm_init_vmcs(kvm);
+
+ /* Enable all PV features by default */
+ kvm->arch.pv_features = BIT(KVM_FEATURE_IPI);
+ if (kvm_pvtime_supported())
+ kvm->arch.pv_features |= BIT(KVM_FEATURE_STEAL_TIME);
+
+ /*
+ * cpu_vabits means user address space only (a half of total).
+ * GPA size of VM is the same with the size of user address space.
+ */
+ kvm->arch.gpa_size = BIT(cpu_vabits);
+ kvm->arch.root_level = CONFIG_PGTABLE_LEVELS - 1;
+ kvm->arch.invalid_ptes[0] = 0;
+ kvm->arch.invalid_ptes[1] = (unsigned long)invalid_pte_table;
+#if CONFIG_PGTABLE_LEVELS > 2
+ kvm->arch.invalid_ptes[2] = (unsigned long)invalid_pmd_table;
+#endif
+#if CONFIG_PGTABLE_LEVELS > 3
+ kvm->arch.invalid_ptes[3] = (unsigned long)invalid_pud_table;
+#endif
+ for (i = 0; i <= kvm->arch.root_level; i++)
+ kvm->arch.pte_shifts[i] = PAGE_SHIFT + i * (PAGE_SHIFT - 3);
+
+ return 0;
+}
+
+void kvm_arch_destroy_vm(struct kvm *kvm)
+{
+ kvm_destroy_vcpus(kvm);
+ free_page((unsigned long)kvm->arch.pgd);
+ kvm->arch.pgd = NULL;
+ kvfree(kvm->arch.phyid_map);
+ kvm->arch.phyid_map = NULL;
+}
+
+int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext)
+{
+ int r;
+
+ switch (ext) {
+ case KVM_CAP_IRQCHIP:
+ case KVM_CAP_ONE_REG:
+ case KVM_CAP_ENABLE_CAP:
+ case KVM_CAP_READONLY_MEM:
+ case KVM_CAP_SYNC_MMU:
+ case KVM_CAP_IMMEDIATE_EXIT:
+ case KVM_CAP_IOEVENTFD:
+ case KVM_CAP_MP_STATE:
+ case KVM_CAP_SET_GUEST_DEBUG:
+ r = 1;
+ break;
+ case KVM_CAP_NR_VCPUS:
+ r = num_online_cpus();
+ break;
+ case KVM_CAP_MAX_VCPUS:
+ r = KVM_MAX_VCPUS;
+ break;
+ case KVM_CAP_MAX_VCPU_ID:
+ r = KVM_MAX_VCPU_IDS;
+ break;
+ case KVM_CAP_NR_MEMSLOTS:
+ r = KVM_USER_MEM_SLOTS;
+ break;
+ default:
+ r = 0;
+ break;
+ }
+
+ return r;
+}
+
+static int kvm_vm_feature_has_attr(struct kvm *kvm, struct kvm_device_attr *attr)
+{
+ switch (attr->attr) {
+ case KVM_LOONGARCH_VM_FEAT_LSX:
+ if (cpu_has_lsx)
+ return 0;
+ return -ENXIO;
+ case KVM_LOONGARCH_VM_FEAT_LASX:
+ if (cpu_has_lasx)
+ return 0;
+ return -ENXIO;
+ case KVM_LOONGARCH_VM_FEAT_X86BT:
+ if (cpu_has_lbt_x86)
+ return 0;
+ return -ENXIO;
+ case KVM_LOONGARCH_VM_FEAT_ARMBT:
+ if (cpu_has_lbt_arm)
+ return 0;
+ return -ENXIO;
+ case KVM_LOONGARCH_VM_FEAT_MIPSBT:
+ if (cpu_has_lbt_mips)
+ return 0;
+ return -ENXIO;
+ case KVM_LOONGARCH_VM_FEAT_PMU:
+ if (cpu_has_pmp)
+ return 0;
+ return -ENXIO;
+ case KVM_LOONGARCH_VM_FEAT_PV_IPI:
+ return 0;
+ case KVM_LOONGARCH_VM_FEAT_PV_STEALTIME:
+ if (kvm_pvtime_supported())
+ return 0;
+ return -ENXIO;
+ default:
+ return -ENXIO;
+ }
+}
+
+static int kvm_vm_has_attr(struct kvm *kvm, struct kvm_device_attr *attr)
+{
+ switch (attr->group) {
+ case KVM_LOONGARCH_VM_FEAT_CTRL:
+ return kvm_vm_feature_has_attr(kvm, attr);
+ default:
+ return -ENXIO;
+ }
+}
+
+int kvm_arch_vm_ioctl(struct file *filp, unsigned int ioctl, unsigned long arg)
+{
+ void __user *argp = (void __user *)arg;
+ struct kvm *kvm = filp->private_data;
+ struct kvm_device_attr attr;
+
+ switch (ioctl) {
+ case KVM_CREATE_IRQCHIP:
+ return 0;
+ case KVM_HAS_DEVICE_ATTR:
+ if (copy_from_user(&attr, argp, sizeof(attr)))
+ return -EFAULT;
+
+ return kvm_vm_has_attr(kvm, &attr);
+ default:
+ return -ENOIOCTLCMD;
+ }
+}
+
+int kvm_vm_ioctl_irq_line(struct kvm *kvm, struct kvm_irq_level *irq_event, bool line_status)
+{
+ if (!kvm_arch_irqchip_in_kernel(kvm))
+ return -ENXIO;
+
+ irq_event->status = kvm_set_irq(kvm, KVM_USERSPACE_IRQ_SOURCE_ID,
+ irq_event->irq, irq_event->level, line_status);
+
+ return 0;
+}
+
+bool kvm_arch_irqchip_in_kernel(struct kvm *kvm)
+{
+ return (kvm->arch.ipi && kvm->arch.eiointc && kvm->arch.pch_pic);
+}