aboutsummaryrefslogtreecommitdiffstats
path: root/Documentation/virtual/kvm
diff options
context:
space:
mode:
Diffstat (limited to 'Documentation/virtual/kvm')
-rw-r--r--Documentation/virtual/kvm/amd-memory-encryption.rst3
-rw-r--r--Documentation/virtual/kvm/api.txt78
-rw-r--r--Documentation/virtual/kvm/arm/psci.txt31
-rw-r--r--Documentation/virtual/kvm/cpuid.rst107
-rw-r--r--Documentation/virtual/kvm/cpuid.txt83
-rw-r--r--Documentation/virtual/kvm/devices/arm-vgic-its.txt2
-rw-r--r--Documentation/virtual/kvm/hypercalls.txt11
-rw-r--r--Documentation/virtual/kvm/index.rst11
-rw-r--r--Documentation/virtual/kvm/locking.txt4
-rw-r--r--Documentation/virtual/kvm/msr.txt9
10 files changed, 235 insertions, 104 deletions
diff --git a/Documentation/virtual/kvm/amd-memory-encryption.rst b/Documentation/virtual/kvm/amd-memory-encryption.rst
index 659bbc093b52..d18c97b4e140 100644
--- a/Documentation/virtual/kvm/amd-memory-encryption.rst
+++ b/Documentation/virtual/kvm/amd-memory-encryption.rst
@@ -241,6 +241,9 @@ Returns: 0 on success, -negative on error
References
==========
+
+See [white-paper]_, [api-spec]_, [amd-apm]_ and [kvm-forum]_ for more info.
+
.. [white-paper] http://amd-dev.wpengine.netdna-cdn.com/wordpress/media/2013/12/AMD_Memory_Encryption_Whitepaper_v7-Public.pdf
.. [api-spec] http://support.amd.com/TechDocs/55766_SEV-KM_API_Specification.pdf
.. [amd-apm] http://support.amd.com/TechDocs/24593.pdf (section 15.34)
diff --git a/Documentation/virtual/kvm/api.txt b/Documentation/virtual/kvm/api.txt
index ba6c42c576dd..2cd6250b2896 100644
--- a/Documentation/virtual/kvm/api.txt
+++ b/Documentation/virtual/kvm/api.txt
@@ -1079,7 +1079,7 @@ yet and must be cleared on entry.
4.35 KVM_SET_USER_MEMORY_REGION
-Capability: KVM_CAP_USER_MEM
+Capability: KVM_CAP_USER_MEMORY
Architectures: all
Type: vm ioctl
Parameters: struct kvm_userspace_memory_region (in)
@@ -2205,7 +2205,7 @@ max_vq. This is the maximum vector length available to the guest on
this vcpu, and determines which register slices are visible through
this ioctl interface.
-(See Documentation/arm64/sve.txt for an explanation of the "vq"
+(See Documentation/arm64/sve.rst for an explanation of the "vq"
nomenclature.)
KVM_REG_ARM64_SVE_VLS is only accessible after KVM_ARM_VCPU_INIT.
@@ -3857,43 +3857,59 @@ Type: vcpu ioctl
Parameters: struct kvm_nested_state (in/out)
Returns: 0 on success, -1 on error
Errors:
- E2BIG: the total state size (including the fixed-size part of struct
- kvm_nested_state) exceeds the value of 'size' specified by
+ E2BIG: the total state size exceeds the value of 'size' specified by
the user; the size required will be written into size.
struct kvm_nested_state {
__u16 flags;
__u16 format;
__u32 size;
+
union {
- struct kvm_vmx_nested_state vmx;
- struct kvm_svm_nested_state svm;
+ struct kvm_vmx_nested_state_hdr vmx;
+ struct kvm_svm_nested_state_hdr svm;
+
+ /* Pad the header to 128 bytes. */
__u8 pad[120];
- };
- __u8 data[0];
+ } hdr;
+
+ union {
+ struct kvm_vmx_nested_state_data vmx[0];
+ struct kvm_svm_nested_state_data svm[0];
+ } data;
};
#define KVM_STATE_NESTED_GUEST_MODE 0x00000001
#define KVM_STATE_NESTED_RUN_PENDING 0x00000002
+#define KVM_STATE_NESTED_EVMCS 0x00000004
-#define KVM_STATE_NESTED_SMM_GUEST_MODE 0x00000001
-#define KVM_STATE_NESTED_SMM_VMXON 0x00000002
+#define KVM_STATE_NESTED_FORMAT_VMX 0
+#define KVM_STATE_NESTED_FORMAT_SVM 1
-struct kvm_vmx_nested_state {
+#define KVM_STATE_NESTED_VMX_VMCS_SIZE 0x1000
+
+#define KVM_STATE_NESTED_VMX_SMM_GUEST_MODE 0x00000001
+#define KVM_STATE_NESTED_VMX_SMM_VMXON 0x00000002
+
+struct kvm_vmx_nested_state_hdr {
__u64 vmxon_pa;
- __u64 vmcs_pa;
+ __u64 vmcs12_pa;
struct {
__u16 flags;
} smm;
};
+struct kvm_vmx_nested_state_data {
+ __u8 vmcs12[KVM_STATE_NESTED_VMX_VMCS_SIZE];
+ __u8 shadow_vmcs12[KVM_STATE_NESTED_VMX_VMCS_SIZE];
+};
+
This ioctl copies the vcpu's nested virtualization state from the kernel to
userspace.
-The maximum size of the state, including the fixed-size part of struct
-kvm_nested_state, can be retrieved by passing KVM_CAP_NESTED_STATE to
-the KVM_CHECK_EXTENSION ioctl().
+The maximum size of the state can be retrieved by passing KVM_CAP_NESTED_STATE
+to the KVM_CHECK_EXTENSION ioctl().
4.115 KVM_SET_NESTED_STATE
@@ -3903,8 +3919,8 @@ Type: vcpu ioctl
Parameters: struct kvm_nested_state (in)
Returns: 0 on success, -1 on error
-This copies the vcpu's kvm_nested_state struct from userspace to the kernel. For
-the definition of struct kvm_nested_state, see KVM_GET_NESTED_STATE.
+This copies the vcpu's kvm_nested_state struct from userspace to the kernel.
+For the definition of struct kvm_nested_state, see KVM_GET_NESTED_STATE.
4.116 KVM_(UN)REGISTER_COALESCED_MMIO
@@ -4065,6 +4081,32 @@ KVM_ARM_VCPU_FINALIZE call.
See KVM_ARM_VCPU_INIT for details of vcpu features that require finalization
using this ioctl.
+4.120 KVM_SET_PMU_EVENT_FILTER
+
+Capability: KVM_CAP_PMU_EVENT_FILTER
+Architectures: x86
+Type: vm ioctl
+Parameters: struct kvm_pmu_event_filter (in)
+Returns: 0 on success, -1 on error
+
+struct kvm_pmu_event_filter {
+ __u32 action;
+ __u32 nevents;
+ __u64 events[0];
+};
+
+This ioctl restricts the set of PMU events that the guest can program.
+The argument holds a list of events which will be allowed or denied.
+The eventsel+umask of each event the guest attempts to program is compared
+against the events field to determine whether the guest should have access.
+This only affects general purpose counters; fixed purpose counters can
+be disabled by changing the perfmon CPUID leaf.
+
+Valid values for 'action':
+#define KVM_PMU_EVENT_ALLOW 0
+#define KVM_PMU_EVENT_DENY 1
+
+
5. The kvm_run structure
------------------------
@@ -4893,6 +4935,8 @@ Valid bits in args[0] are
#define KVM_X86_DISABLE_EXITS_MWAIT (1 << 0)
#define KVM_X86_DISABLE_EXITS_HLT (1 << 1)
+#define KVM_X86_DISABLE_EXITS_PAUSE (1 << 2)
+#define KVM_X86_DISABLE_EXITS_CSTATE (1 << 3)
Enabling this capability on a VM provides userspace with a way to no
longer intercept some instructions for improved latency in some
diff --git a/Documentation/virtual/kvm/arm/psci.txt b/Documentation/virtual/kvm/arm/psci.txt
index aafdab887b04..559586fc9d37 100644
--- a/Documentation/virtual/kvm/arm/psci.txt
+++ b/Documentation/virtual/kvm/arm/psci.txt
@@ -28,3 +28,34 @@ The following register is defined:
- Allows any PSCI version implemented by KVM and compatible with
v0.2 to be set with SET_ONE_REG
- Affects the whole VM (even if the register view is per-vcpu)
+
+* KVM_REG_ARM_SMCCC_ARCH_WORKAROUND_1:
+ Holds the state of the firmware support to mitigate CVE-2017-5715, as
+ offered by KVM to the guest via a HVC call. The workaround is described
+ under SMCCC_ARCH_WORKAROUND_1 in [1].
+ Accepted values are:
+ KVM_REG_ARM_SMCCC_ARCH_WORKAROUND_1_NOT_AVAIL: KVM does not offer
+ firmware support for the workaround. The mitigation status for the
+ guest is unknown.
+ KVM_REG_ARM_SMCCC_ARCH_WORKAROUND_1_AVAIL: The workaround HVC call is
+ available to the guest and required for the mitigation.
+ KVM_REG_ARM_SMCCC_ARCH_WORKAROUND_1_NOT_REQUIRED: The workaround HVC call
+ is available to the guest, but it is not needed on this VCPU.
+
+* KVM_REG_ARM_SMCCC_ARCH_WORKAROUND_2:
+ Holds the state of the firmware support to mitigate CVE-2018-3639, as
+ offered by KVM to the guest via a HVC call. The workaround is described
+ under SMCCC_ARCH_WORKAROUND_2 in [1].
+ Accepted values are:
+ KVM_REG_ARM_SMCCC_ARCH_WORKAROUND_2_NOT_AVAIL: A workaround is not
+ available. KVM does not offer firmware support for the workaround.
+ KVM_REG_ARM_SMCCC_ARCH_WORKAROUND_2_UNKNOWN: The workaround state is
+ unknown. KVM does not offer firmware support for the workaround.
+ KVM_REG_ARM_SMCCC_ARCH_WORKAROUND_2_AVAIL: The workaround is available,
+ and can be disabled by a vCPU. If
+ KVM_REG_ARM_SMCCC_ARCH_WORKAROUND_2_ENABLED is set, it is active for
+ this vCPU.
+ KVM_REG_ARM_SMCCC_ARCH_WORKAROUND_2_NOT_REQUIRED: The workaround is
+ always active on this vCPU or it is not needed.
+
+[1] https://developer.arm.com/-/media/developer/pdf/ARM_DEN_0070A_Firmware_interfaces_for_mitigating_CVE-2017-5715.pdf
diff --git a/Documentation/virtual/kvm/cpuid.rst b/Documentation/virtual/kvm/cpuid.rst
new file mode 100644
index 000000000000..01b081f6e7ea
--- /dev/null
+++ b/Documentation/virtual/kvm/cpuid.rst
@@ -0,0 +1,107 @@
+.. SPDX-License-Identifier: GPL-2.0
+
+==============
+KVM CPUID bits
+==============
+
+:Author: Glauber Costa <glommer@gmail.com>
+
+A guest running on a kvm host, can check some of its features using
+cpuid. This is not always guaranteed to work, since userspace can
+mask-out some, or even all KVM-related cpuid features before launching
+a guest.
+
+KVM cpuid functions are:
+
+function: KVM_CPUID_SIGNATURE (0x40000000)
+
+returns::
+
+ eax = 0x40000001
+ ebx = 0x4b4d564b
+ ecx = 0x564b4d56
+ edx = 0x4d
+
+Note that this value in ebx, ecx and edx corresponds to the string "KVMKVMKVM".
+The value in eax corresponds to the maximum cpuid function present in this leaf,
+and will be updated if more functions are added in the future.
+Note also that old hosts set eax value to 0x0. This should
+be interpreted as if the value was 0x40000001.
+This function queries the presence of KVM cpuid leafs.
+
+function: define KVM_CPUID_FEATURES (0x40000001)
+
+returns::
+
+ ebx, ecx
+ eax = an OR'ed group of (1 << flag)
+
+where ``flag`` is defined as below:
+
+================================= =========== ================================
+flag value meaning
+================================= =========== ================================
+KVM_FEATURE_CLOCKSOURCE 0 kvmclock available at msrs
+ 0x11 and 0x12
+
+KVM_FEATURE_NOP_IO_DELAY 1 not necessary to perform delays
+ on PIO operations
+
+KVM_FEATURE_MMU_OP 2 deprecated
+
+KVM_FEATURE_CLOCKSOURCE2 3 kvmclock available at msrs
+
+ 0x4b564d00 and 0x4b564d01
+KVM_FEATURE_ASYNC_PF 4 async pf can be enabled by
+ writing to msr 0x4b564d02
+
+KVM_FEATURE_STEAL_TIME 5 steal time can be enabled by
+ writing to msr 0x4b564d03
+
+KVM_FEATURE_PV_EOI 6 paravirtualized end of interrupt
+ handler can be enabled by
+ writing to msr 0x4b564d04
+
+KVM_FEATURE_PV_UNHAULT 7 guest checks this feature bit
+ before enabling paravirtualized
+ spinlock support
+
+KVM_FEATURE_PV_TLB_FLUSH 9 guest checks this feature bit
+ before enabling paravirtualized
+ tlb flush
+
+KVM_FEATURE_ASYNC_PF_VMEXIT 10 paravirtualized async PF VM EXIT
+ can be enabled by setting bit 2
+ when writing to msr 0x4b564d02
+
+KVM_FEATURE_PV_SEND_IPI 11 guest checks this feature bit
+ before enabling paravirtualized
+ sebd IPIs
+
+KVM_FEATURE_PV_POLL_CONTROL 12 host-side polling on HLT can
+ be disabled by writing
+ to msr 0x4b564d05.
+
+KVM_FEATURE_PV_SCHED_YIELD 13 guest checks this feature bit
+ before using paravirtualized
+ sched yield.
+
+KVM_FEATURE_CLOCSOURCE_STABLE_BIT 24 host will warn if no guest-side
+ per-cpu warps are expeced in
+ kvmclock
+================================= =========== ================================
+
+::
+
+ edx = an OR'ed group of (1 << flag)
+
+Where ``flag`` here is defined as below:
+
+================== ============ =================================
+flag value meaning
+================== ============ =================================
+KVM_HINTS_REALTIME 0 guest checks this feature bit to
+ determine that vCPUs are never
+ preempted for an unlimited time
+ allowing optimizations
+================== ============ =================================
diff --git a/Documentation/virtual/kvm/cpuid.txt b/Documentation/virtual/kvm/cpuid.txt
deleted file mode 100644
index 97ca1940a0dc..000000000000
--- a/Documentation/virtual/kvm/cpuid.txt
+++ /dev/null
@@ -1,83 +0,0 @@
-KVM CPUID bits
-Glauber Costa <glommer@redhat.com>, Red Hat Inc, 2010
-=====================================================
-
-A guest running on a kvm host, can check some of its features using
-cpuid. This is not always guaranteed to work, since userspace can
-mask-out some, or even all KVM-related cpuid features before launching
-a guest.
-
-KVM cpuid functions are:
-
-function: KVM_CPUID_SIGNATURE (0x40000000)
-returns : eax = 0x40000001,
- ebx = 0x4b4d564b,
- ecx = 0x564b4d56,
- edx = 0x4d.
-Note that this value in ebx, ecx and edx corresponds to the string "KVMKVMKVM".
-The value in eax corresponds to the maximum cpuid function present in this leaf,
-and will be updated if more functions are added in the future.
-Note also that old hosts set eax value to 0x0. This should
-be interpreted as if the value was 0x40000001.
-This function queries the presence of KVM cpuid leafs.
-
-
-function: define KVM_CPUID_FEATURES (0x40000001)
-returns : ebx, ecx
- eax = an OR'ed group of (1 << flag), where each flags is:
-
-
-flag || value || meaning
-=============================================================================
-KVM_FEATURE_CLOCKSOURCE || 0 || kvmclock available at msrs
- || || 0x11 and 0x12.
-------------------------------------------------------------------------------
-KVM_FEATURE_NOP_IO_DELAY || 1 || not necessary to perform delays
- || || on PIO operations.
-------------------------------------------------------------------------------
-KVM_FEATURE_MMU_OP || 2 || deprecated.
-------------------------------------------------------------------------------
-KVM_FEATURE_CLOCKSOURCE2 || 3 || kvmclock available at msrs
- || || 0x4b564d00 and 0x4b564d01
-------------------------------------------------------------------------------
-KVM_FEATURE_ASYNC_PF || 4 || async pf can be enabled by
- || || writing to msr 0x4b564d02
-------------------------------------------------------------------------------
-KVM_FEATURE_STEAL_TIME || 5 || steal time can be enabled by
- || || writing to msr 0x4b564d03.
-------------------------------------------------------------------------------
-KVM_FEATURE_PV_EOI || 6 || paravirtualized end of interrupt
- || || handler can be enabled by writing
- || || to msr 0x4b564d04.
-------------------------------------------------------------------------------
-KVM_FEATURE_PV_UNHALT || 7 || guest checks this feature bit
- || || before enabling paravirtualized
- || || spinlock support.
-------------------------------------------------------------------------------
-KVM_FEATURE_PV_TLB_FLUSH || 9 || guest checks this feature bit
- || || before enabling paravirtualized
- || || tlb flush.
-------------------------------------------------------------------------------
-KVM_FEATURE_ASYNC_PF_VMEXIT || 10 || paravirtualized async PF VM exit
- || || can be enabled by setting bit 2
- || || when writing to msr 0x4b564d02
-------------------------------------------------------------------------------
-KVM_FEATURE_PV_SEND_IPI || 11 || guest checks this feature bit
- || || before using paravirtualized
- || || send IPIs.
-------------------------------------------------------------------------------
-KVM_FEATURE_CLOCKSOURCE_STABLE_BIT || 24 || host will warn if no guest-side
- || || per-cpu warps are expected in
- || || kvmclock.
-------------------------------------------------------------------------------
-
- edx = an OR'ed group of (1 << flag), where each flags is:
-
-
-flag || value || meaning
-==================================================================================
-KVM_HINTS_REALTIME || 0 || guest checks this feature bit to
- || || determine that vCPUs are never
- || || preempted for an unlimited time,
- || || allowing optimizations
-----------------------------------------------------------------------------------
diff --git a/Documentation/virtual/kvm/devices/arm-vgic-its.txt b/Documentation/virtual/kvm/devices/arm-vgic-its.txt
index 4f0c9fc40365..eeaa95b893a8 100644
--- a/Documentation/virtual/kvm/devices/arm-vgic-its.txt
+++ b/Documentation/virtual/kvm/devices/arm-vgic-its.txt
@@ -103,7 +103,7 @@ Groups:
The following ordering must be followed when restoring the GIC and the ITS:
a) restore all guest memory and create vcpus
b) restore all redistributors
-c) provide the its base address
+c) provide the ITS base address
(KVM_DEV_ARM_VGIC_GRP_ADDR)
d) restore the ITS in the following order:
1. Restore GITS_CBASER
diff --git a/Documentation/virtual/kvm/hypercalls.txt b/Documentation/virtual/kvm/hypercalls.txt
index da24c138c8d1..da210651f714 100644
--- a/Documentation/virtual/kvm/hypercalls.txt
+++ b/Documentation/virtual/kvm/hypercalls.txt
@@ -141,3 +141,14 @@ a0 corresponds to the APIC ID in the third argument (a2), bit 1
corresponds to the APIC ID a2+1, and so on.
Returns the number of CPUs to which the IPIs were delivered successfully.
+
+7. KVM_HC_SCHED_YIELD
+------------------------
+Architecture: x86
+Status: active
+Purpose: Hypercall used to yield if the IPI target vCPU is preempted
+
+a0: destination APIC ID
+
+Usage example: When sending a call-function IPI-many to vCPUs, yield if
+any of the IPI target vCPUs was preempted.
diff --git a/Documentation/virtual/kvm/index.rst b/Documentation/virtual/kvm/index.rst
new file mode 100644
index 000000000000..0b206a06f5be
--- /dev/null
+++ b/Documentation/virtual/kvm/index.rst
@@ -0,0 +1,11 @@
+.. SPDX-License-Identifier: GPL-2.0
+
+===
+KVM
+===
+
+.. toctree::
+ :maxdepth: 2
+
+ amd-memory-encryption
+ cpuid
diff --git a/Documentation/virtual/kvm/locking.txt b/Documentation/virtual/kvm/locking.txt
index 1bb8bcaf8497..635cd6eaf714 100644
--- a/Documentation/virtual/kvm/locking.txt
+++ b/Documentation/virtual/kvm/locking.txt
@@ -15,8 +15,6 @@ The acquisition orders for mutexes are as follows:
On x86, vcpu->mutex is taken outside kvm->arch.hyperv.hv_lock.
-For spinlocks, kvm_lock is taken outside kvm->mmu_lock.
-
Everything else is a leaf: no other lock is taken inside the critical
sections.
@@ -169,7 +167,7 @@ which time it will be set using the Dirty tracking mechanism described above.
------------
Name: kvm_lock
-Type: spinlock_t
+Type: mutex
Arch: any
Protects: - vm_list
diff --git a/Documentation/virtual/kvm/msr.txt b/Documentation/virtual/kvm/msr.txt
index f3f0d57ced8e..df1f4338b3ca 100644
--- a/Documentation/virtual/kvm/msr.txt
+++ b/Documentation/virtual/kvm/msr.txt
@@ -273,3 +273,12 @@ MSR_KVM_EOI_EN: 0x4b564d04
guest must both read the least significant bit in the memory area and
clear it using a single CPU instruction, such as test and clear, or
compare and exchange.
+
+MSR_KVM_POLL_CONTROL: 0x4b564d05
+ Control host-side polling.
+
+ data: Bit 0 enables (1) or disables (0) host-side HLT polling logic.
+
+ KVM guests can request the host not to poll on HLT, for example if
+ they are performing polling themselves.
+