aboutsummaryrefslogtreecommitdiffstatshomepage
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2025-07-10 09:06:53 -0700
committerLinus Torvalds <torvalds@linux-foundation.org>2025-07-10 09:06:53 -0700
commit73d7cf07109e79b093d1a1fb57a88d4048cd9b4b (patch)
treeb472565858ee1de9c06ddaf6475cf78a78bd0b4e
parenteventpoll: don't decrement ep refcount while still holding the ep mutex (diff)
parentKVM: x86: avoid underflow when scaling TSC frequency (diff)
downloadwireguard-linux-73d7cf07109e79b093d1a1fb57a88d4048cd9b4b.tar.xz
wireguard-linux-73d7cf07109e79b093d1a1fb57a88d4048cd9b4b.zip
Merge tag 'for-linus' of git://git.kernel.org/pub/scm/virt/kvm/kvm
Pull KVM fixes from Paolo Bonzini: "Many patches, pretty much all of them small, that accumulated while I was on vacation. ARM: - Remove the last leftovers of the ill-fated FPSIMD host state mapping at EL2 stage-1 - Fix unexpected advertisement to the guest of unimplemented S2 base granule sizes - Gracefully fail initialising pKVM if the interrupt controller isn't GICv3 - Also gracefully fail initialising pKVM if the carveout allocation fails - Fix the computing of the minimum MMIO range required for the host on stage-2 fault - Fix the generation of the GICv3 Maintenance Interrupt in nested mode x86: - Reject SEV{-ES} intra-host migration if one or more vCPUs are actively being created, so as not to create a non-SEV{-ES} vCPU in an SEV{-ES} VM - Use a pre-allocated, per-vCPU buffer for handling de-sparsification of vCPU masks in Hyper-V hypercalls; fixes a "stack frame too large" issue - Allow out-of-range/invalid Xen event channel ports when configuring IRQ routing, to avoid dictating a specific ioctl() ordering to userspace - Conditionally reschedule when setting memory attributes to avoid soft lockups when userspace converts huge swaths of memory to/from private - Add back MWAIT as a required feature for the MONITOR/MWAIT selftest - Add a missing field in struct sev_data_snp_launch_start that resulted in the guest-visible workarounds field being filled at the wrong offset - Skip non-canonical address when processing Hyper-V PV TLB flushes to avoid VM-Fail on INVVPID - Advertise supported TDX TDVMCALLs to userspace - Pass SetupEventNotifyInterrupt arguments to userspace - Fix TSC frequency underflow" * tag 'for-linus' of git://git.kernel.org/pub/scm/virt/kvm/kvm: KVM: x86: avoid underflow when scaling TSC frequency KVM: arm64: Remove kvm_arch_vcpu_run_map_fp() KVM: arm64: Fix handling of FEAT_GTG for unimplemented granule sizes KVM: arm64: Don't free hyp pages with pKVM on GICv2 KVM: arm64: Fix error path in init_hyp_mode() KVM: arm64: Adjust range correctly during host stage-2 faults KVM: arm64: nv: Fix MI line level calculation in vgic_v3_nested_update_mi() KVM: x86/hyper-v: Skip non-canonical addresses during PV TLB flush KVM: SVM: Add missing member in SNP_LAUNCH_START command structure Documentation: KVM: Fix unexpected unindent warnings KVM: selftests: Add back the missing check of MONITOR/MWAIT availability KVM: Allow CPU to reschedule while setting per-page memory attributes KVM: x86/xen: Allow 'out of range' event channel ports in IRQ routing table. KVM: x86/hyper-v: Use preallocated per-vCPU buffer for de-sparsified vCPU masks KVM: SVM: Initialize vmsa_pa in VMCB to INVALID_PAGE if VMSA page is NULL KVM: SVM: Reject SEV{-ES} intra host migration if vCPU creation is in-flight KVM: TDX: Report supported optional TDVMCALLs in TDX capabilities KVM: TDX: Exit to userspace for SetupEventNotifyInterrupt
-rw-r--r--Documentation/virt/kvm/api.rst37
-rw-r--r--Documentation/virt/kvm/x86/intel-tdx.rst15
-rw-r--r--arch/arm64/include/asm/kvm_host.h1
-rw-r--r--arch/arm64/kvm/arm.c16
-rw-r--r--arch/arm64/kvm/fpsimd.c26
-rw-r--r--arch/arm64/kvm/hyp/nvhe/mem_protect.c20
-rw-r--r--arch/arm64/kvm/nested.c26
-rw-r--r--arch/arm64/kvm/vgic/vgic-v3-nested.c4
-rw-r--r--arch/x86/include/asm/kvm_host.h7
-rw-r--r--arch/x86/include/asm/shared/tdx.h1
-rw-r--r--arch/x86/include/uapi/asm/kvm.h8
-rw-r--r--arch/x86/kvm/hyperv.c5
-rw-r--r--arch/x86/kvm/svm/sev.c12
-rw-r--r--arch/x86/kvm/vmx/tdx.c30
-rw-r--r--arch/x86/kvm/x86.c4
-rw-r--r--arch/x86/kvm/xen.c15
-rw-r--r--include/linux/psp-sev.h2
-rw-r--r--include/uapi/linux/kvm.h4
-rw-r--r--tools/testing/selftests/kvm/x86/monitor_mwait_test.c1
-rw-r--r--virt/kvm/kvm_main.c3
20 files changed, 166 insertions, 71 deletions
diff --git a/Documentation/virt/kvm/api.rst b/Documentation/virt/kvm/api.rst
index 9abf93ee5f65..43ed57e048a8 100644
--- a/Documentation/virt/kvm/api.rst
+++ b/Documentation/virt/kvm/api.rst
@@ -7196,6 +7196,10 @@ The valid value for 'flags' is:
u64 leaf;
u64 r11, r12, r13, r14;
} get_tdvmcall_info;
+ struct {
+ u64 ret;
+ u64 vector;
+ } setup_event_notify;
};
} tdx;
@@ -7210,21 +7214,24 @@ number from register R11. The remaining field of the union provide the
inputs and outputs of the TDVMCALL. Currently the following values of
``nr`` are defined:
-* ``TDVMCALL_GET_QUOTE``: the guest has requested to generate a TD-Quote
-signed by a service hosting TD-Quoting Enclave operating on the host.
-Parameters and return value are in the ``get_quote`` field of the union.
-The ``gpa`` field and ``size`` specify the guest physical address
-(without the shared bit set) and the size of a shared-memory buffer, in
-which the TDX guest passes a TD Report. The ``ret`` field represents
-the return value of the GetQuote request. When the request has been
-queued successfully, the TDX guest can poll the status field in the
-shared-memory area to check whether the Quote generation is completed or
-not. When completed, the generated Quote is returned via the same buffer.
-
-* ``TDVMCALL_GET_TD_VM_CALL_INFO``: the guest has requested the support
-status of TDVMCALLs. The output values for the given leaf should be
-placed in fields from ``r11`` to ``r14`` of the ``get_tdvmcall_info``
-field of the union.
+ * ``TDVMCALL_GET_QUOTE``: the guest has requested to generate a TD-Quote
+ signed by a service hosting TD-Quoting Enclave operating on the host.
+ Parameters and return value are in the ``get_quote`` field of the union.
+ The ``gpa`` field and ``size`` specify the guest physical address
+ (without the shared bit set) and the size of a shared-memory buffer, in
+ which the TDX guest passes a TD Report. The ``ret`` field represents
+ the return value of the GetQuote request. When the request has been
+ queued successfully, the TDX guest can poll the status field in the
+ shared-memory area to check whether the Quote generation is completed or
+ not. When completed, the generated Quote is returned via the same buffer.
+
+ * ``TDVMCALL_GET_TD_VM_CALL_INFO``: the guest has requested the support
+ status of TDVMCALLs. The output values for the given leaf should be
+ placed in fields from ``r11`` to ``r14`` of the ``get_tdvmcall_info``
+ field of the union.
+
+* ``TDVMCALL_SETUP_EVENT_NOTIFY_INTERRUPT``: the guest has requested to
+set up a notification interrupt for vector ``vector``.
KVM may add support for more values in the future that may cause a userspace
exit, even without calls to ``KVM_ENABLE_CAP`` or similar. In this case,
diff --git a/Documentation/virt/kvm/x86/intel-tdx.rst b/Documentation/virt/kvm/x86/intel-tdx.rst
index 76bdd95334d6..5efac62c92c7 100644
--- a/Documentation/virt/kvm/x86/intel-tdx.rst
+++ b/Documentation/virt/kvm/x86/intel-tdx.rst
@@ -79,7 +79,20 @@ to be configured to the TDX guest.
struct kvm_tdx_capabilities {
__u64 supported_attrs;
__u64 supported_xfam;
- __u64 reserved[254];
+
+ /* TDG.VP.VMCALL hypercalls executed in kernel and forwarded to
+ * userspace, respectively
+ */
+ __u64 kernel_tdvmcallinfo_1_r11;
+ __u64 user_tdvmcallinfo_1_r11;
+
+ /* TDG.VP.VMCALL instruction executions subfunctions executed in kernel
+ * and forwarded to userspace, respectively
+ */
+ __u64 kernel_tdvmcallinfo_1_r12;
+ __u64 user_tdvmcallinfo_1_r12;
+
+ __u64 reserved[250];
/* Configurable CPUID bits for userspace */
struct kvm_cpuid2 cpuid;
diff --git a/arch/arm64/include/asm/kvm_host.h b/arch/arm64/include/asm/kvm_host.h
index d27079968341..3e41a880b062 100644
--- a/arch/arm64/include/asm/kvm_host.h
+++ b/arch/arm64/include/asm/kvm_host.h
@@ -1480,7 +1480,6 @@ int kvm_vm_ioctl_get_reg_writable_masks(struct kvm *kvm,
struct reg_mask_range *range);
/* Guest/host FPSIMD coordination helpers */
-int kvm_arch_vcpu_run_map_fp(struct kvm_vcpu *vcpu);
void kvm_arch_vcpu_load_fp(struct kvm_vcpu *vcpu);
void kvm_arch_vcpu_ctxflush_fp(struct kvm_vcpu *vcpu);
void kvm_arch_vcpu_ctxsync_fp(struct kvm_vcpu *vcpu);
diff --git a/arch/arm64/kvm/arm.c b/arch/arm64/kvm/arm.c
index 38a91bb5d4c7..23dd3f3fc3eb 100644
--- a/arch/arm64/kvm/arm.c
+++ b/arch/arm64/kvm/arm.c
@@ -825,10 +825,6 @@ int kvm_arch_vcpu_run_pid_change(struct kvm_vcpu *vcpu)
if (!kvm_arm_vcpu_is_finalized(vcpu))
return -EPERM;
- ret = kvm_arch_vcpu_run_map_fp(vcpu);
- if (ret)
- return ret;
-
if (likely(vcpu_has_run_once(vcpu)))
return 0;
@@ -2129,7 +2125,7 @@ static void cpu_hyp_init(void *discard)
static void cpu_hyp_uninit(void *discard)
{
- if (__this_cpu_read(kvm_hyp_initialized)) {
+ if (!is_protected_kvm_enabled() && __this_cpu_read(kvm_hyp_initialized)) {
cpu_hyp_reset();
__this_cpu_write(kvm_hyp_initialized, 0);
}
@@ -2345,8 +2341,13 @@ static void __init teardown_hyp_mode(void)
free_hyp_pgds();
for_each_possible_cpu(cpu) {
+ if (per_cpu(kvm_hyp_initialized, cpu))
+ continue;
+
free_pages(per_cpu(kvm_arm_hyp_stack_base, cpu), NVHE_STACK_SHIFT - PAGE_SHIFT);
- free_pages(kvm_nvhe_sym(kvm_arm_hyp_percpu_base)[cpu], nvhe_percpu_order());
+
+ if (!kvm_nvhe_sym(kvm_arm_hyp_percpu_base)[cpu])
+ continue;
if (free_sve) {
struct cpu_sve_state *sve_state;
@@ -2354,6 +2355,9 @@ static void __init teardown_hyp_mode(void)
sve_state = per_cpu_ptr_nvhe_sym(kvm_host_data, cpu)->sve_state;
free_pages((unsigned long) sve_state, pkvm_host_sve_state_order());
}
+
+ free_pages(kvm_nvhe_sym(kvm_arm_hyp_percpu_base)[cpu], nvhe_percpu_order());
+
}
}
diff --git a/arch/arm64/kvm/fpsimd.c b/arch/arm64/kvm/fpsimd.c
index 8f6c8f57c6b9..15e17aca1dec 100644
--- a/arch/arm64/kvm/fpsimd.c
+++ b/arch/arm64/kvm/fpsimd.c
@@ -15,32 +15,6 @@
#include <asm/sysreg.h>
/*
- * Called on entry to KVM_RUN unless this vcpu previously ran at least
- * once and the most recent prior KVM_RUN for this vcpu was called from
- * the same task as current (highly likely).
- *
- * This is guaranteed to execute before kvm_arch_vcpu_load_fp(vcpu),
- * such that on entering hyp the relevant parts of current are already
- * mapped.
- */
-int kvm_arch_vcpu_run_map_fp(struct kvm_vcpu *vcpu)
-{
- struct user_fpsimd_state *fpsimd = &current->thread.uw.fpsimd_state;
- int ret;
-
- /* pKVM has its own tracking of the host fpsimd state. */
- if (is_protected_kvm_enabled())
- return 0;
-
- /* Make sure the host task fpsimd state is visible to hyp: */
- ret = kvm_share_hyp(fpsimd, fpsimd + 1);
- if (ret)
- return ret;
-
- return 0;
-}
-
-/*
* Prepare vcpu for saving the host's FPSIMD state and loading the guest's.
* The actual loading is done by the FPSIMD access trap taken to hyp.
*
diff --git a/arch/arm64/kvm/hyp/nvhe/mem_protect.c b/arch/arm64/kvm/hyp/nvhe/mem_protect.c
index 95d7534c9679..8957734d6183 100644
--- a/arch/arm64/kvm/hyp/nvhe/mem_protect.c
+++ b/arch/arm64/kvm/hyp/nvhe/mem_protect.c
@@ -479,6 +479,7 @@ static int host_stage2_adjust_range(u64 addr, struct kvm_mem_range *range)
{
struct kvm_mem_range cur;
kvm_pte_t pte;
+ u64 granule;
s8 level;
int ret;
@@ -496,18 +497,21 @@ static int host_stage2_adjust_range(u64 addr, struct kvm_mem_range *range)
return -EPERM;
}
- do {
- u64 granule = kvm_granule_size(level);
+ for (; level <= KVM_PGTABLE_LAST_LEVEL; level++) {
+ if (!kvm_level_supports_block_mapping(level))
+ continue;
+ granule = kvm_granule_size(level);
cur.start = ALIGN_DOWN(addr, granule);
cur.end = cur.start + granule;
- level++;
- } while ((level <= KVM_PGTABLE_LAST_LEVEL) &&
- !(kvm_level_supports_block_mapping(level) &&
- range_included(&cur, range)));
+ if (!range_included(&cur, range))
+ continue;
+ *range = cur;
+ return 0;
+ }
- *range = cur;
+ WARN_ON(1);
- return 0;
+ return -EINVAL;
}
int host_stage2_idmap_locked(phys_addr_t addr, u64 size,
diff --git a/arch/arm64/kvm/nested.c b/arch/arm64/kvm/nested.c
index 5b191f4dc566..dc1d26559bfa 100644
--- a/arch/arm64/kvm/nested.c
+++ b/arch/arm64/kvm/nested.c
@@ -1402,6 +1402,21 @@ static void kvm_map_l1_vncr(struct kvm_vcpu *vcpu)
}
}
+#define has_tgran_2(__r, __sz) \
+ ({ \
+ u64 _s1, _s2, _mmfr0 = __r; \
+ \
+ _s2 = SYS_FIELD_GET(ID_AA64MMFR0_EL1, \
+ TGRAN##__sz##_2, _mmfr0); \
+ \
+ _s1 = SYS_FIELD_GET(ID_AA64MMFR0_EL1, \
+ TGRAN##__sz, _mmfr0); \
+ \
+ ((_s2 != ID_AA64MMFR0_EL1_TGRAN##__sz##_2_NI && \
+ _s2 != ID_AA64MMFR0_EL1_TGRAN##__sz##_2_TGRAN##__sz) || \
+ (_s2 == ID_AA64MMFR0_EL1_TGRAN##__sz##_2_TGRAN##__sz && \
+ _s1 != ID_AA64MMFR0_EL1_TGRAN##__sz##_NI)); \
+ })
/*
* Our emulated CPU doesn't support all the possible features. For the
* sake of simplicity (and probably mental sanity), wipe out a number
@@ -1411,6 +1426,8 @@ static void kvm_map_l1_vncr(struct kvm_vcpu *vcpu)
*/
u64 limit_nv_id_reg(struct kvm *kvm, u32 reg, u64 val)
{
+ u64 orig_val = val;
+
switch (reg) {
case SYS_ID_AA64ISAR0_EL1:
/* Support everything but TME */
@@ -1480,13 +1497,16 @@ u64 limit_nv_id_reg(struct kvm *kvm, u32 reg, u64 val)
*/
switch (PAGE_SIZE) {
case SZ_4K:
- val |= SYS_FIELD_PREP_ENUM(ID_AA64MMFR0_EL1, TGRAN4_2, IMP);
+ if (has_tgran_2(orig_val, 4))
+ val |= SYS_FIELD_PREP_ENUM(ID_AA64MMFR0_EL1, TGRAN4_2, IMP);
fallthrough;
case SZ_16K:
- val |= SYS_FIELD_PREP_ENUM(ID_AA64MMFR0_EL1, TGRAN16_2, IMP);
+ if (has_tgran_2(orig_val, 16))
+ val |= SYS_FIELD_PREP_ENUM(ID_AA64MMFR0_EL1, TGRAN16_2, IMP);
fallthrough;
case SZ_64K:
- val |= SYS_FIELD_PREP_ENUM(ID_AA64MMFR0_EL1, TGRAN64_2, IMP);
+ if (has_tgran_2(orig_val, 64))
+ val |= SYS_FIELD_PREP_ENUM(ID_AA64MMFR0_EL1, TGRAN64_2, IMP);
break;
}
diff --git a/arch/arm64/kvm/vgic/vgic-v3-nested.c b/arch/arm64/kvm/vgic/vgic-v3-nested.c
index a50fb7e6841f..679aafe77de2 100644
--- a/arch/arm64/kvm/vgic/vgic-v3-nested.c
+++ b/arch/arm64/kvm/vgic/vgic-v3-nested.c
@@ -401,9 +401,7 @@ void vgic_v3_nested_update_mi(struct kvm_vcpu *vcpu)
{
bool level;
- level = __vcpu_sys_reg(vcpu, ICH_HCR_EL2) & ICH_HCR_EL2_En;
- if (level)
- level &= vgic_v3_get_misr(vcpu);
+ level = (__vcpu_sys_reg(vcpu, ICH_HCR_EL2) & ICH_HCR_EL2_En) && vgic_v3_get_misr(vcpu);
kvm_vgic_inject_irq(vcpu->kvm, vcpu,
vcpu->kvm->arch.vgic.mi_intid, level, vcpu);
}
diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
index fb01e456b624..f7af967aa16f 100644
--- a/arch/x86/include/asm/kvm_host.h
+++ b/arch/x86/include/asm/kvm_host.h
@@ -700,8 +700,13 @@ struct kvm_vcpu_hv {
struct kvm_vcpu_hv_tlb_flush_fifo tlb_flush_fifo[HV_NR_TLB_FLUSH_FIFOS];
- /* Preallocated buffer for handling hypercalls passing sparse vCPU set */
+ /*
+ * Preallocated buffers for handling hypercalls that pass sparse vCPU
+ * sets (for high vCPU counts, they're too large to comfortably fit on
+ * the stack).
+ */
u64 sparse_banks[HV_MAX_SPARSE_VCPU_BANKS];
+ DECLARE_BITMAP(vcpu_mask, KVM_MAX_VCPUS);
struct hv_vp_assist_page vp_assist_page;
diff --git a/arch/x86/include/asm/shared/tdx.h b/arch/x86/include/asm/shared/tdx.h
index d8525e6ef50a..8bc074c8d7c6 100644
--- a/arch/x86/include/asm/shared/tdx.h
+++ b/arch/x86/include/asm/shared/tdx.h
@@ -72,6 +72,7 @@
#define TDVMCALL_MAP_GPA 0x10001
#define TDVMCALL_GET_QUOTE 0x10002
#define TDVMCALL_REPORT_FATAL_ERROR 0x10003
+#define TDVMCALL_SETUP_EVENT_NOTIFY_INTERRUPT 0x10004ULL
/*
* TDG.VP.VMCALL Status Codes (returned in R10)
diff --git a/arch/x86/include/uapi/asm/kvm.h b/arch/x86/include/uapi/asm/kvm.h
index 6f3499507c5e..0f15d683817d 100644
--- a/arch/x86/include/uapi/asm/kvm.h
+++ b/arch/x86/include/uapi/asm/kvm.h
@@ -965,7 +965,13 @@ struct kvm_tdx_cmd {
struct kvm_tdx_capabilities {
__u64 supported_attrs;
__u64 supported_xfam;
- __u64 reserved[254];
+
+ __u64 kernel_tdvmcallinfo_1_r11;
+ __u64 user_tdvmcallinfo_1_r11;
+ __u64 kernel_tdvmcallinfo_1_r12;
+ __u64 user_tdvmcallinfo_1_r12;
+
+ __u64 reserved[250];
/* Configurable CPUID bits for userspace */
struct kvm_cpuid2 cpuid;
diff --git a/arch/x86/kvm/hyperv.c b/arch/x86/kvm/hyperv.c
index 24f0318c50d7..ee27064dd72f 100644
--- a/arch/x86/kvm/hyperv.c
+++ b/arch/x86/kvm/hyperv.c
@@ -1979,6 +1979,9 @@ int kvm_hv_vcpu_flush_tlb(struct kvm_vcpu *vcpu)
if (entries[i] == KVM_HV_TLB_FLUSHALL_ENTRY)
goto out_flush_all;
+ if (is_noncanonical_invlpg_address(entries[i], vcpu))
+ continue;
+
/*
* Lower 12 bits of 'address' encode the number of additional
* pages to flush.
@@ -2001,11 +2004,11 @@ out_flush_all:
static u64 kvm_hv_flush_tlb(struct kvm_vcpu *vcpu, struct kvm_hv_hcall *hc)
{
struct kvm_vcpu_hv *hv_vcpu = to_hv_vcpu(vcpu);
+ unsigned long *vcpu_mask = hv_vcpu->vcpu_mask;
u64 *sparse_banks = hv_vcpu->sparse_banks;
struct kvm *kvm = vcpu->kvm;
struct hv_tlb_flush_ex flush_ex;
struct hv_tlb_flush flush;
- DECLARE_BITMAP(vcpu_mask, KVM_MAX_VCPUS);
struct kvm_vcpu_hv_tlb_flush_fifo *tlb_flush_fifo;
/*
* Normally, there can be no more than 'KVM_HV_TLB_FLUSH_FIFO_SIZE'
diff --git a/arch/x86/kvm/svm/sev.c b/arch/x86/kvm/svm/sev.c
index 459c3b791fd4..b201f77fcd49 100644
--- a/arch/x86/kvm/svm/sev.c
+++ b/arch/x86/kvm/svm/sev.c
@@ -1971,6 +1971,10 @@ static int sev_check_source_vcpus(struct kvm *dst, struct kvm *src)
struct kvm_vcpu *src_vcpu;
unsigned long i;
+ if (src->created_vcpus != atomic_read(&src->online_vcpus) ||
+ dst->created_vcpus != atomic_read(&dst->online_vcpus))
+ return -EBUSY;
+
if (!sev_es_guest(src))
return 0;
@@ -4445,8 +4449,12 @@ static void sev_es_init_vmcb(struct vcpu_svm *svm)
* the VMSA will be NULL if this vCPU is the destination for intrahost
* migration, and will be copied later.
*/
- if (svm->sev_es.vmsa && !svm->sev_es.snp_has_guest_vmsa)
- svm->vmcb->control.vmsa_pa = __pa(svm->sev_es.vmsa);
+ if (!svm->sev_es.snp_has_guest_vmsa) {
+ if (svm->sev_es.vmsa)
+ svm->vmcb->control.vmsa_pa = __pa(svm->sev_es.vmsa);
+ else
+ svm->vmcb->control.vmsa_pa = INVALID_PAGE;
+ }
if (cpu_feature_enabled(X86_FEATURE_ALLOWED_SEV_FEATURES))
svm->vmcb->control.allowed_sev_features = sev->vmsa_features |
diff --git a/arch/x86/kvm/vmx/tdx.c b/arch/x86/kvm/vmx/tdx.c
index 1ad20c273f3b..f31ccdeb905b 100644
--- a/arch/x86/kvm/vmx/tdx.c
+++ b/arch/x86/kvm/vmx/tdx.c
@@ -173,6 +173,9 @@ static void td_init_cpuid_entry2(struct kvm_cpuid_entry2 *entry, unsigned char i
tdx_clear_unsupported_cpuid(entry);
}
+#define TDVMCALLINFO_GET_QUOTE BIT(0)
+#define TDVMCALLINFO_SETUP_EVENT_NOTIFY_INTERRUPT BIT(1)
+
static int init_kvm_tdx_caps(const struct tdx_sys_info_td_conf *td_conf,
struct kvm_tdx_capabilities *caps)
{
@@ -188,6 +191,10 @@ static int init_kvm_tdx_caps(const struct tdx_sys_info_td_conf *td_conf,
caps->cpuid.nent = td_conf->num_cpuid_config;
+ caps->user_tdvmcallinfo_1_r11 =
+ TDVMCALLINFO_GET_QUOTE |
+ TDVMCALLINFO_SETUP_EVENT_NOTIFY_INTERRUPT;
+
for (i = 0; i < td_conf->num_cpuid_config; i++)
td_init_cpuid_entry2(&caps->cpuid.entries[i], i);
@@ -1530,6 +1537,27 @@ static int tdx_get_quote(struct kvm_vcpu *vcpu)
return 0;
}
+static int tdx_setup_event_notify_interrupt(struct kvm_vcpu *vcpu)
+{
+ struct vcpu_tdx *tdx = to_tdx(vcpu);
+ u64 vector = tdx->vp_enter_args.r12;
+
+ if (vector < 32 || vector > 255) {
+ tdvmcall_set_return_code(vcpu, TDVMCALL_STATUS_INVALID_OPERAND);
+ return 1;
+ }
+
+ vcpu->run->exit_reason = KVM_EXIT_TDX;
+ vcpu->run->tdx.flags = 0;
+ vcpu->run->tdx.nr = TDVMCALL_SETUP_EVENT_NOTIFY_INTERRUPT;
+ vcpu->run->tdx.setup_event_notify.ret = TDVMCALL_STATUS_SUBFUNC_UNSUPPORTED;
+ vcpu->run->tdx.setup_event_notify.vector = vector;
+
+ vcpu->arch.complete_userspace_io = tdx_complete_simple;
+
+ return 0;
+}
+
static int handle_tdvmcall(struct kvm_vcpu *vcpu)
{
switch (tdvmcall_leaf(vcpu)) {
@@ -1541,6 +1569,8 @@ static int handle_tdvmcall(struct kvm_vcpu *vcpu)
return tdx_get_td_vm_call_info(vcpu);
case TDVMCALL_GET_QUOTE:
return tdx_get_quote(vcpu);
+ case TDVMCALL_SETUP_EVENT_NOTIFY_INTERRUPT:
+ return tdx_setup_event_notify_interrupt(vcpu);
default:
break;
}
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index a9d992d5652f..357b9e3a6cef 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -3258,9 +3258,11 @@ int kvm_guest_time_update(struct kvm_vcpu *v)
/* With all the info we got, fill in the values */
- if (kvm_caps.has_tsc_control)
+ if (kvm_caps.has_tsc_control) {
tgt_tsc_khz = kvm_scale_tsc(tgt_tsc_khz,
v->arch.l1_tsc_scaling_ratio);
+ tgt_tsc_khz = tgt_tsc_khz ? : 1;
+ }
if (unlikely(vcpu->hw_tsc_khz != tgt_tsc_khz)) {
kvm_get_time_scale(NSEC_PER_SEC, tgt_tsc_khz * 1000LL,
diff --git a/arch/x86/kvm/xen.c b/arch/x86/kvm/xen.c
index 9b029bb29a16..5fa2cca43653 100644
--- a/arch/x86/kvm/xen.c
+++ b/arch/x86/kvm/xen.c
@@ -1971,8 +1971,19 @@ int kvm_xen_setup_evtchn(struct kvm *kvm,
{
struct kvm_vcpu *vcpu;
- if (ue->u.xen_evtchn.port >= max_evtchn_port(kvm))
- return -EINVAL;
+ /*
+ * Don't check for the port being within range of max_evtchn_port().
+ * Userspace can configure what ever targets it likes; events just won't
+ * be delivered if/while the target is invalid, just like userspace can
+ * configure MSIs which target non-existent APICs.
+ *
+ * This allow on Live Migration and Live Update, the IRQ routing table
+ * can be restored *independently* of other things like creating vCPUs,
+ * without imposing an ordering dependency on userspace. In this
+ * particular case, the problematic ordering would be with setting the
+ * Xen 'long mode' flag, which changes max_evtchn_port() to allow 4096
+ * instead of 1024 event channels.
+ */
/* We only support 2 level event channels for now */
if (ue->u.xen_evtchn.priority != KVM_IRQ_ROUTING_XEN_EVTCHN_PRIO_2LEVEL)
diff --git a/include/linux/psp-sev.h b/include/linux/psp-sev.h
index 0b3a36bdaa90..0f5f94137f6d 100644
--- a/include/linux/psp-sev.h
+++ b/include/linux/psp-sev.h
@@ -594,6 +594,7 @@ struct sev_data_snp_addr {
* @imi_en: launch flow is launching an IMI (Incoming Migration Image) for the
* purpose of guest-assisted migration.
* @rsvd: reserved
+ * @desired_tsc_khz: hypervisor desired mean TSC freq in kHz of the guest
* @gosvw: guest OS-visible workarounds, as defined by hypervisor
*/
struct sev_data_snp_launch_start {
@@ -603,6 +604,7 @@ struct sev_data_snp_launch_start {
u32 ma_en:1; /* In */
u32 imi_en:1; /* In */
u32 rsvd:30;
+ u32 desired_tsc_khz; /* In */
u8 gosvw[16]; /* In */
} __packed;
diff --git a/include/uapi/linux/kvm.h b/include/uapi/linux/kvm.h
index 37891580d05d..7a4c35ff03fe 100644
--- a/include/uapi/linux/kvm.h
+++ b/include/uapi/linux/kvm.h
@@ -467,6 +467,10 @@ struct kvm_run {
__u64 leaf;
__u64 r11, r12, r13, r14;
} get_tdvmcall_info;
+ struct {
+ __u64 ret;
+ __u64 vector;
+ } setup_event_notify;
};
} tdx;
/* Fix the size of the union. */
diff --git a/tools/testing/selftests/kvm/x86/monitor_mwait_test.c b/tools/testing/selftests/kvm/x86/monitor_mwait_test.c
index 390ae2d87493..0eb371c62ab8 100644
--- a/tools/testing/selftests/kvm/x86/monitor_mwait_test.c
+++ b/tools/testing/selftests/kvm/x86/monitor_mwait_test.c
@@ -74,6 +74,7 @@ int main(int argc, char *argv[])
int testcase;
char test[80];
+ TEST_REQUIRE(this_cpu_has(X86_FEATURE_MWAIT));
TEST_REQUIRE(kvm_has_cap(KVM_CAP_DISABLE_QUIRKS2));
ksft_print_header();
diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c
index eec82775c5bf..222f0e894a0c 100644
--- a/virt/kvm/kvm_main.c
+++ b/virt/kvm/kvm_main.c
@@ -2572,6 +2572,8 @@ static int kvm_vm_set_mem_attributes(struct kvm *kvm, gfn_t start, gfn_t end,
r = xa_reserve(&kvm->mem_attr_array, i, GFP_KERNEL_ACCOUNT);
if (r)
goto out_unlock;
+
+ cond_resched();
}
kvm_handle_gfn_range(kvm, &pre_set_range);
@@ -2580,6 +2582,7 @@ static int kvm_vm_set_mem_attributes(struct kvm *kvm, gfn_t start, gfn_t end,
r = xa_err(xa_store(&kvm->mem_attr_array, i, entry,
GFP_KERNEL_ACCOUNT));
KVM_BUG_ON(r, kvm);
+ cond_resched();
}
kvm_handle_gfn_range(kvm, &post_set_range);