aboutsummaryrefslogtreecommitdiffstatshomepage
path: root/arch/s390/kvm/kvm-s390.c
diff options
context:
space:
mode:
Diffstat (limited to 'arch/s390/kvm/kvm-s390.c')
-rw-r--r--arch/s390/kvm/kvm-s390.c601
1 files changed, 512 insertions, 89 deletions
diff --git a/arch/s390/kvm/kvm-s390.c b/arch/s390/kvm/kvm-s390.c
index c2e6d4ba4e23..d05bb040fd42 100644
--- a/arch/s390/kvm/kvm-s390.c
+++ b/arch/s390/kvm/kvm-s390.c
@@ -2,7 +2,7 @@
/*
* hosting IBM Z kernel virtual machines (s390x)
*
- * Copyright IBM Corp. 2008, 2018
+ * Copyright IBM Corp. 2008, 2020
*
* Author(s): Carsten Otte <cotte@de.ibm.com>
* Christian Borntraeger <borntraeger@de.ibm.com>
@@ -44,6 +44,7 @@
#include <asm/cpacf.h>
#include <asm/timex.h>
#include <asm/ap.h>
+#include <asm/uv.h>
#include "kvm-s390.h"
#include "gaccess.h"
@@ -184,6 +185,11 @@ static u8 halt_poll_max_steal = 10;
module_param(halt_poll_max_steal, byte, 0644);
MODULE_PARM_DESC(halt_poll_max_steal, "Maximum percentage of steal time to allow polling");
+/* if set to true, the GISA will be initialized and used if available */
+static bool use_gisa = true;
+module_param(use_gisa, bool, 0644);
+MODULE_PARM_DESC(use_gisa, "Use the GISA if the host supports it.");
+
/*
* For now we handle at most 16 double words as this is what the s390 base
* kernel handles and stores in the prefix page. If we ever need to go beyond
@@ -220,6 +226,7 @@ static struct kvm_s390_vm_cpu_subfunc kvm_s390_available_subfunc;
static struct gmap_notifier gmap_notifier;
static struct gmap_notifier vsie_gmap_notifier;
debug_info_t *kvm_s390_dbf;
+debug_info_t *kvm_s390_dbf_uv;
/* Section: not file related */
int kvm_arch_hardware_enable(void)
@@ -228,13 +235,15 @@ int kvm_arch_hardware_enable(void)
return 0;
}
-int kvm_arch_check_processor_compat(void)
+int kvm_arch_check_processor_compat(void *opaque)
{
return 0;
}
+/* forward declarations */
static void kvm_gmap_notifier(struct gmap *gmap, unsigned long start,
unsigned long end);
+static int sca_switch_to_extended(struct kvm *kvm);
static void kvm_clock_sync_scb(struct kvm_s390_sie_block *scb, u64 delta)
{
@@ -293,7 +302,7 @@ static struct notifier_block kvm_clock_notifier = {
.notifier_call = kvm_clock_sync,
};
-int kvm_arch_hardware_setup(void)
+int kvm_arch_hardware_setup(void *opaque)
{
gmap_notifier.notifier_call = kvm_gmap_notifier;
gmap_register_pte_notifier(&gmap_notifier);
@@ -460,7 +469,12 @@ int kvm_arch_init(void *opaque)
if (!kvm_s390_dbf)
return -ENOMEM;
- if (debug_register_view(kvm_s390_dbf, &debug_sprintf_view))
+ kvm_s390_dbf_uv = debug_register("kvm-uv", 32, 1, 7 * sizeof(long));
+ if (!kvm_s390_dbf_uv)
+ goto out;
+
+ if (debug_register_view(kvm_s390_dbf, &debug_sprintf_view) ||
+ debug_register_view(kvm_s390_dbf_uv, &debug_sprintf_view))
goto out;
kvm_s390_cpu_feat_init();
@@ -487,6 +501,7 @@ void kvm_arch_exit(void)
{
kvm_s390_gib_destroy();
debug_unregister(kvm_s390_dbf);
+ debug_unregister(kvm_s390_dbf_uv);
}
/* Section: device related */
@@ -530,6 +545,7 @@ int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext)
case KVM_CAP_S390_AIS:
case KVM_CAP_S390_AIS_MIGRATION:
case KVM_CAP_S390_VCPU_RESETS:
+ case KVM_CAP_SET_GUEST_DEBUG:
r = 1;
break;
case KVM_CAP_S390_HPAGE_1M:
@@ -564,14 +580,16 @@ int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext)
case KVM_CAP_S390_BPB:
r = test_facility(82);
break;
+ case KVM_CAP_S390_PROTECTED:
+ r = is_prot_virt_host();
+ break;
default:
r = 0;
}
return r;
}
-static void kvm_s390_sync_dirty_log(struct kvm *kvm,
- struct kvm_memory_slot *memslot)
+void kvm_arch_sync_dirty_log(struct kvm *kvm, struct kvm_memory_slot *memslot)
{
int i;
gfn_t cur_gfn, last_gfn;
@@ -612,9 +630,8 @@ int kvm_vm_ioctl_get_dirty_log(struct kvm *kvm,
{
int r;
unsigned long n;
- struct kvm_memslots *slots;
struct kvm_memory_slot *memslot;
- int is_dirty = 0;
+ int is_dirty;
if (kvm_is_ucontrol(kvm))
return -EINVAL;
@@ -625,14 +642,7 @@ int kvm_vm_ioctl_get_dirty_log(struct kvm *kvm,
if (log->slot >= KVM_USER_MEM_SLOTS)
goto out;
- slots = kvm_memslots(kvm);
- memslot = id_to_memslot(slots, log->slot);
- r = -ENOENT;
- if (!memslot->dirty_bitmap)
- goto out;
-
- kvm_s390_sync_dirty_log(kvm, memslot);
- r = kvm_get_dirty_log(kvm, log, &is_dirty);
+ r = kvm_get_dirty_log(kvm, log, &is_dirty, &memslot);
if (r)
goto out;
@@ -1930,6 +1940,9 @@ static int gfn_to_memslot_approx(struct kvm_memslots *slots, gfn_t gfn)
start = slot + 1;
}
+ if (start >= slots->used_slots)
+ return slots->used_slots - 1;
+
if (gfn >= memslots[start].base_gfn &&
gfn < memslots[start].base_gfn + memslots[start].npages) {
atomic_set(&slots->lru_slot, start);
@@ -1993,6 +2006,9 @@ static int kvm_s390_get_cmma(struct kvm *kvm, struct kvm_s390_cmma_log *args,
struct kvm_memslots *slots = kvm_memslots(kvm);
struct kvm_memory_slot *ms;
+ if (unlikely(!slots->used_slots))
+ return 0;
+
cur_gfn = kvm_s390_next_dirty_cmma(slots, args->start_gfn);
ms = gfn_to_memslot(kvm, cur_gfn);
args->count = 0;
@@ -2158,6 +2174,194 @@ out:
return r;
}
+static int kvm_s390_cpus_from_pv(struct kvm *kvm, u16 *rcp, u16 *rrcp)
+{
+ struct kvm_vcpu *vcpu;
+ u16 rc, rrc;
+ int ret = 0;
+ int i;
+
+ /*
+ * We ignore failures and try to destroy as many CPUs as possible.
+ * At the same time we must not free the assigned resources when
+ * this fails, as the ultravisor has still access to that memory.
+ * So kvm_s390_pv_destroy_cpu can leave a "wanted" memory leak
+ * behind.
+ * We want to return the first failure rc and rrc, though.
+ */
+ kvm_for_each_vcpu(i, vcpu, kvm) {
+ mutex_lock(&vcpu->mutex);
+ if (kvm_s390_pv_destroy_cpu(vcpu, &rc, &rrc) && !ret) {
+ *rcp = rc;
+ *rrcp = rrc;
+ ret = -EIO;
+ }
+ mutex_unlock(&vcpu->mutex);
+ }
+ return ret;
+}
+
+static int kvm_s390_cpus_to_pv(struct kvm *kvm, u16 *rc, u16 *rrc)
+{
+ int i, r = 0;
+ u16 dummy;
+
+ struct kvm_vcpu *vcpu;
+
+ kvm_for_each_vcpu(i, vcpu, kvm) {
+ mutex_lock(&vcpu->mutex);
+ r = kvm_s390_pv_create_cpu(vcpu, rc, rrc);
+ mutex_unlock(&vcpu->mutex);
+ if (r)
+ break;
+ }
+ if (r)
+ kvm_s390_cpus_from_pv(kvm, &dummy, &dummy);
+ return r;
+}
+
+static int kvm_s390_handle_pv(struct kvm *kvm, struct kvm_pv_cmd *cmd)
+{
+ int r = 0;
+ u16 dummy;
+ void __user *argp = (void __user *)cmd->data;
+
+ switch (cmd->cmd) {
+ case KVM_PV_ENABLE: {
+ r = -EINVAL;
+ if (kvm_s390_pv_is_protected(kvm))
+ break;
+
+ /*
+ * FMT 4 SIE needs esca. As we never switch back to bsca from
+ * esca, we need no cleanup in the error cases below
+ */
+ r = sca_switch_to_extended(kvm);
+ if (r)
+ break;
+
+ down_write(&current->mm->mmap_sem);
+ r = gmap_mark_unmergeable();
+ up_write(&current->mm->mmap_sem);
+ if (r)
+ break;
+
+ r = kvm_s390_pv_init_vm(kvm, &cmd->rc, &cmd->rrc);
+ if (r)
+ break;
+
+ r = kvm_s390_cpus_to_pv(kvm, &cmd->rc, &cmd->rrc);
+ if (r)
+ kvm_s390_pv_deinit_vm(kvm, &dummy, &dummy);
+
+ /* we need to block service interrupts from now on */
+ set_bit(IRQ_PEND_EXT_SERVICE, &kvm->arch.float_int.masked_irqs);
+ break;
+ }
+ case KVM_PV_DISABLE: {
+ r = -EINVAL;
+ if (!kvm_s390_pv_is_protected(kvm))
+ break;
+
+ r = kvm_s390_cpus_from_pv(kvm, &cmd->rc, &cmd->rrc);
+ /*
+ * If a CPU could not be destroyed, destroy VM will also fail.
+ * There is no point in trying to destroy it. Instead return
+ * the rc and rrc from the first CPU that failed destroying.
+ */
+ if (r)
+ break;
+ r = kvm_s390_pv_deinit_vm(kvm, &cmd->rc, &cmd->rrc);
+
+ /* no need to block service interrupts any more */
+ clear_bit(IRQ_PEND_EXT_SERVICE, &kvm->arch.float_int.masked_irqs);
+ break;
+ }
+ case KVM_PV_SET_SEC_PARMS: {
+ struct kvm_s390_pv_sec_parm parms = {};
+ void *hdr;
+
+ r = -EINVAL;
+ if (!kvm_s390_pv_is_protected(kvm))
+ break;
+
+ r = -EFAULT;
+ if (copy_from_user(&parms, argp, sizeof(parms)))
+ break;
+
+ /* Currently restricted to 8KB */
+ r = -EINVAL;
+ if (parms.length > PAGE_SIZE * 2)
+ break;
+
+ r = -ENOMEM;
+ hdr = vmalloc(parms.length);
+ if (!hdr)
+ break;
+
+ r = -EFAULT;
+ if (!copy_from_user(hdr, (void __user *)parms.origin,
+ parms.length))
+ r = kvm_s390_pv_set_sec_parms(kvm, hdr, parms.length,
+ &cmd->rc, &cmd->rrc);
+
+ vfree(hdr);
+ break;
+ }
+ case KVM_PV_UNPACK: {
+ struct kvm_s390_pv_unp unp = {};
+
+ r = -EINVAL;
+ if (!kvm_s390_pv_is_protected(kvm))
+ break;
+
+ r = -EFAULT;
+ if (copy_from_user(&unp, argp, sizeof(unp)))
+ break;
+
+ r = kvm_s390_pv_unpack(kvm, unp.addr, unp.size, unp.tweak,
+ &cmd->rc, &cmd->rrc);
+ break;
+ }
+ case KVM_PV_VERIFY: {
+ r = -EINVAL;
+ if (!kvm_s390_pv_is_protected(kvm))
+ break;
+
+ r = uv_cmd_nodata(kvm_s390_pv_get_handle(kvm),
+ UVC_CMD_VERIFY_IMG, &cmd->rc, &cmd->rrc);
+ KVM_UV_EVENT(kvm, 3, "PROTVIRT VERIFY: rc %x rrc %x", cmd->rc,
+ cmd->rrc);
+ break;
+ }
+ case KVM_PV_PREP_RESET: {
+ r = -EINVAL;
+ if (!kvm_s390_pv_is_protected(kvm))
+ break;
+
+ r = uv_cmd_nodata(kvm_s390_pv_get_handle(kvm),
+ UVC_CMD_PREPARE_RESET, &cmd->rc, &cmd->rrc);
+ KVM_UV_EVENT(kvm, 3, "PROTVIRT PREP RESET: rc %x rrc %x",
+ cmd->rc, cmd->rrc);
+ break;
+ }
+ case KVM_PV_UNSHARE_ALL: {
+ r = -EINVAL;
+ if (!kvm_s390_pv_is_protected(kvm))
+ break;
+
+ r = uv_cmd_nodata(kvm_s390_pv_get_handle(kvm),
+ UVC_CMD_SET_UNSHARE_ALL, &cmd->rc, &cmd->rrc);
+ KVM_UV_EVENT(kvm, 3, "PROTVIRT UNSHARE: rc %x rrc %x",
+ cmd->rc, cmd->rrc);
+ break;
+ }
+ default:
+ r = -ENOTTY;
+ }
+ return r;
+}
+
long kvm_arch_vm_ioctl(struct file *filp,
unsigned int ioctl, unsigned long arg)
{
@@ -2255,6 +2459,33 @@ long kvm_arch_vm_ioctl(struct file *filp,
mutex_unlock(&kvm->slots_lock);
break;
}
+ case KVM_S390_PV_COMMAND: {
+ struct kvm_pv_cmd args;
+
+ /* protvirt means user sigp */
+ kvm->arch.user_cpu_state_ctrl = 1;
+ r = 0;
+ if (!is_prot_virt_host()) {
+ r = -EINVAL;
+ break;
+ }
+ if (copy_from_user(&args, argp, sizeof(args))) {
+ r = -EFAULT;
+ break;
+ }
+ if (args.flags) {
+ r = -EINVAL;
+ break;
+ }
+ mutex_lock(&kvm->lock);
+ r = kvm_s390_handle_pv(kvm, &args);
+ mutex_unlock(&kvm->lock);
+ if (copy_to_user(argp, &args, sizeof(args))) {
+ r = -EFAULT;
+ break;
+ }
+ break;
+ }
default:
r = -ENOTTY;
}
@@ -2504,7 +2735,8 @@ int kvm_arch_init_vm(struct kvm *kvm, unsigned long type)
kvm->arch.use_skf = sclp.has_skey;
spin_lock_init(&kvm->arch.start_stop_lock);
kvm_s390_vsie_init(kvm);
- kvm_s390_gisa_init(kvm);
+ if (use_gisa)
+ kvm_s390_gisa_init(kvm);
KVM_EVENT(3, "vm 0x%pK created by pid %u", kvm, current->pid);
return 0;
@@ -2518,6 +2750,8 @@ out_err:
void kvm_arch_vcpu_destroy(struct kvm_vcpu *vcpu)
{
+ u16 rc, rrc;
+
VCPU_EVENT(vcpu, 3, "%s", "free cpu");
trace_kvm_s390_destroy_vcpu(vcpu->vcpu_id);
kvm_s390_clear_local_irqs(vcpu);
@@ -2530,6 +2764,9 @@ void kvm_arch_vcpu_destroy(struct kvm_vcpu *vcpu)
if (vcpu->kvm->arch.use_cmma)
kvm_s390_vcpu_unsetup_cmma(vcpu);
+ /* We can not hold the vcpu mutex here, we are already dying */
+ if (kvm_s390_pv_cpu_get_handle(vcpu))
+ kvm_s390_pv_destroy_cpu(vcpu, &rc, &rrc);
free_page((unsigned long)(vcpu->arch.sie_block));
}
@@ -2551,10 +2788,20 @@ static void kvm_free_vcpus(struct kvm *kvm)
void kvm_arch_destroy_vm(struct kvm *kvm)
{
+ u16 rc, rrc;
+
kvm_free_vcpus(kvm);
sca_dispose(kvm);
- debug_unregister(kvm->arch.dbf);
kvm_s390_gisa_destroy(kvm);
+ /*
+ * We are already at the end of life and kvm->lock is not taken.
+ * This is ok as the file descriptor is closed by now and nobody
+ * can mess with the pv state. To avoid lockdep_assert_held from
+ * complaining we do not use kvm_s390_pv_is_protected.
+ */
+ if (kvm_s390_pv_get_handle(kvm))
+ kvm_s390_pv_deinit_vm(kvm, &rc, &rrc);
+ debug_unregister(kvm->arch.dbf);
free_page((unsigned long)kvm->arch.sie_page2);
if (!kvm_is_ucontrol(kvm))
gmap_remove(kvm->arch.gmap);
@@ -2650,6 +2897,9 @@ static int sca_switch_to_extended(struct kvm *kvm)
unsigned int vcpu_idx;
u32 scaol, scaoh;
+ if (kvm->arch.use_esca)
+ return 0;
+
new_sca = alloc_pages_exact(sizeof(*new_sca), GFP_KERNEL|__GFP_ZERO);
if (!new_sca)
return -ENOMEM;
@@ -2901,6 +3151,7 @@ static void kvm_s390_vcpu_setup_model(struct kvm_vcpu *vcpu)
static int kvm_s390_vcpu_setup(struct kvm_vcpu *vcpu)
{
int rc = 0;
+ u16 uvrc, uvrrc;
atomic_set(&vcpu->arch.sie_block->cpuflags, CPUSTAT_ZARCH |
CPUSTAT_SM |
@@ -2968,6 +3219,14 @@ static int kvm_s390_vcpu_setup(struct kvm_vcpu *vcpu)
kvm_s390_vcpu_crypto_setup(vcpu);
+ mutex_lock(&vcpu->kvm->lock);
+ if (kvm_s390_pv_is_protected(vcpu->kvm)) {
+ rc = kvm_s390_pv_create_cpu(vcpu, &uvrc, &uvrrc);
+ if (rc)
+ kvm_s390_vcpu_unsetup_cmma(vcpu);
+ }
+ mutex_unlock(&vcpu->kvm->lock);
+
return rc;
}
@@ -3277,7 +3536,6 @@ static void kvm_arch_vcpu_ioctl_initial_reset(struct kvm_vcpu *vcpu)
kvm_s390_set_prefix(vcpu, 0);
kvm_s390_set_cpu_timer(vcpu, 0);
vcpu->arch.sie_block->ckc = 0;
- vcpu->arch.sie_block->todpr = 0;
memset(vcpu->arch.sie_block->gcr, 0, sizeof(vcpu->arch.sie_block->gcr));
vcpu->arch.sie_block->gcr[0] = CR0_INITIAL_MASK;
vcpu->arch.sie_block->gcr[14] = CR14_INITIAL_MASK;
@@ -3295,9 +3553,17 @@ static void kvm_arch_vcpu_ioctl_initial_reset(struct kvm_vcpu *vcpu)
vcpu->run->s.regs.pp = 0;
vcpu->run->s.regs.gbea = 1;
vcpu->run->s.regs.fpc = 0;
- vcpu->arch.sie_block->gbea = 1;
- vcpu->arch.sie_block->pp = 0;
- vcpu->arch.sie_block->fpf &= ~FPF_BPBC;
+ /*
+ * Do not reset these registers in the protected case, as some of
+ * them are overlayed and they are not accessible in this case
+ * anyway.
+ */
+ if (!kvm_s390_pv_cpu_is_protected(vcpu)) {
+ vcpu->arch.sie_block->gbea = 1;
+ vcpu->arch.sie_block->pp = 0;
+ vcpu->arch.sie_block->fpf &= ~FPF_BPBC;
+ vcpu->arch.sie_block->todpr = 0;
+ }
}
static void kvm_arch_vcpu_ioctl_clear_reset(struct kvm_vcpu *vcpu)
@@ -3487,14 +3753,20 @@ int kvm_arch_vcpu_ioctl_set_mpstate(struct kvm_vcpu *vcpu,
switch (mp_state->mp_state) {
case KVM_MP_STATE_STOPPED:
- kvm_s390_vcpu_stop(vcpu);
+ rc = kvm_s390_vcpu_stop(vcpu);
break;
case KVM_MP_STATE_OPERATING:
- kvm_s390_vcpu_start(vcpu);
+ rc = kvm_s390_vcpu_start(vcpu);
break;
case KVM_MP_STATE_LOAD:
+ if (!kvm_s390_pv_cpu_is_protected(vcpu)) {
+ rc = -ENXIO;
+ break;
+ }
+ rc = kvm_s390_pv_set_cpu_state(vcpu, PV_CPU_STATE_OPR_LOAD);
+ break;
case KVM_MP_STATE_CHECK_STOP:
- /* fall through - CHECK_STOP and LOAD are not supported yet */
+ fallthrough; /* CHECK_STOP and LOAD are not supported yet */
default:
rc = -ENXIO;
}
@@ -3844,9 +4116,11 @@ static int vcpu_post_run(struct kvm_vcpu *vcpu, int exit_reason)
return vcpu_post_run_fault_in_sie(vcpu);
}
+#define PSW_INT_MASK (PSW_MASK_EXT | PSW_MASK_IO | PSW_MASK_MCHECK)
static int __vcpu_run(struct kvm_vcpu *vcpu)
{
int rc, exit_reason;
+ struct sie_page *sie_page = (struct sie_page *)vcpu->arch.sie_block;
/*
* We try to hold kvm->srcu during most of vcpu_run (except when run-
@@ -3868,8 +4142,28 @@ static int __vcpu_run(struct kvm_vcpu *vcpu)
guest_enter_irqoff();
__disable_cpu_timer_accounting(vcpu);
local_irq_enable();
+ if (kvm_s390_pv_cpu_is_protected(vcpu)) {
+ memcpy(sie_page->pv_grregs,
+ vcpu->run->s.regs.gprs,
+ sizeof(sie_page->pv_grregs));
+ }
exit_reason = sie64a(vcpu->arch.sie_block,
vcpu->run->s.regs.gprs);
+ if (kvm_s390_pv_cpu_is_protected(vcpu)) {
+ memcpy(vcpu->run->s.regs.gprs,
+ sie_page->pv_grregs,
+ sizeof(sie_page->pv_grregs));
+ /*
+ * We're not allowed to inject interrupts on intercepts
+ * that leave the guest state in an "in-between" state
+ * where the next SIE entry will do a continuation.
+ * Fence interrupts in our "internal" PSW.
+ */
+ if (vcpu->arch.sie_block->icptcode == ICPT_PV_INSTR ||
+ vcpu->arch.sie_block->icptcode == ICPT_PV_PREF) {
+ vcpu->arch.sie_block->gpsw.mask &= ~PSW_INT_MASK;
+ }
+ }
local_irq_disable();
__enable_cpu_timer_accounting(vcpu);
guest_exit_irqoff();
@@ -3883,7 +4177,7 @@ static int __vcpu_run(struct kvm_vcpu *vcpu)
return rc;
}
-static void sync_regs(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
+static void sync_regs_fmt2(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
{
struct runtime_instr_cb *riccb;
struct gs_cb *gscb;
@@ -3892,16 +4186,7 @@ static void sync_regs(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
gscb = (struct gs_cb *) &kvm_run->s.regs.gscb;
vcpu->arch.sie_block->gpsw.mask = kvm_run->psw_mask;
vcpu->arch.sie_block->gpsw.addr = kvm_run->psw_addr;
- if (kvm_run->kvm_dirty_regs & KVM_SYNC_PREFIX)
- kvm_s390_set_prefix(vcpu, kvm_run->s.regs.prefix);
- if (kvm_run->kvm_dirty_regs & KVM_SYNC_CRS) {
- memcpy(&vcpu->arch.sie_block->gcr, &kvm_run->s.regs.crs, 128);
- /* some control register changes require a tlb flush */
- kvm_make_request(KVM_REQ_TLB_FLUSH, vcpu);
- }
if (kvm_run->kvm_dirty_regs & KVM_SYNC_ARCH0) {
- kvm_s390_set_cpu_timer(vcpu, kvm_run->s.regs.cputm);
- vcpu->arch.sie_block->ckc = kvm_run->s.regs.ckc;
vcpu->arch.sie_block->todpr = kvm_run->s.regs.todpr;
vcpu->arch.sie_block->pp = kvm_run->s.regs.pp;
vcpu->arch.sie_block->gbea = kvm_run->s.regs.gbea;
@@ -3942,6 +4227,36 @@ static void sync_regs(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
vcpu->arch.sie_block->fpf &= ~FPF_BPBC;
vcpu->arch.sie_block->fpf |= kvm_run->s.regs.bpbc ? FPF_BPBC : 0;
}
+ if (MACHINE_HAS_GS) {
+ preempt_disable();
+ __ctl_set_bit(2, 4);
+ if (current->thread.gs_cb) {
+ vcpu->arch.host_gscb = current->thread.gs_cb;
+ save_gs_cb(vcpu->arch.host_gscb);
+ }
+ if (vcpu->arch.gs_enabled) {
+ current->thread.gs_cb = (struct gs_cb *)
+ &vcpu->run->s.regs.gscb;
+ restore_gs_cb(current->thread.gs_cb);
+ }
+ preempt_enable();
+ }
+ /* SIE will load etoken directly from SDNX and therefore kvm_run */
+}
+
+static void sync_regs(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
+{
+ if (kvm_run->kvm_dirty_regs & KVM_SYNC_PREFIX)
+ kvm_s390_set_prefix(vcpu, kvm_run->s.regs.prefix);
+ if (kvm_run->kvm_dirty_regs & KVM_SYNC_CRS) {
+ memcpy(&vcpu->arch.sie_block->gcr, &kvm_run->s.regs.crs, 128);
+ /* some control register changes require a tlb flush */
+ kvm_make_request(KVM_REQ_TLB_FLUSH, vcpu);
+ }
+ if (kvm_run->kvm_dirty_regs & KVM_SYNC_ARCH0) {
+ kvm_s390_set_cpu_timer(vcpu, kvm_run->s.regs.cputm);
+ vcpu->arch.sie_block->ckc = kvm_run->s.regs.ckc;
+ }
save_access_regs(vcpu->arch.host_acrs);
restore_access_regs(vcpu->run->s.regs.acrs);
/* save host (userspace) fprs/vrs */
@@ -3956,23 +4271,47 @@ static void sync_regs(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
if (test_fp_ctl(current->thread.fpu.fpc))
/* User space provided an invalid FPC, let's clear it */
current->thread.fpu.fpc = 0;
+
+ /* Sync fmt2 only data */
+ if (likely(!kvm_s390_pv_cpu_is_protected(vcpu))) {
+ sync_regs_fmt2(vcpu, kvm_run);
+ } else {
+ /*
+ * In several places we have to modify our internal view to
+ * not do things that are disallowed by the ultravisor. For
+ * example we must not inject interrupts after specific exits
+ * (e.g. 112 prefix page not secure). We do this by turning
+ * off the machine check, external and I/O interrupt bits
+ * of our PSW copy. To avoid getting validity intercepts, we
+ * do only accept the condition code from userspace.
+ */
+ vcpu->arch.sie_block->gpsw.mask &= ~PSW_MASK_CC;
+ vcpu->arch.sie_block->gpsw.mask |= kvm_run->psw_mask &
+ PSW_MASK_CC;
+ }
+
+ kvm_run->kvm_dirty_regs = 0;
+}
+
+static void store_regs_fmt2(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
+{
+ kvm_run->s.regs.todpr = vcpu->arch.sie_block->todpr;
+ kvm_run->s.regs.pp = vcpu->arch.sie_block->pp;
+ kvm_run->s.regs.gbea = vcpu->arch.sie_block->gbea;
+ kvm_run->s.regs.bpbc = (vcpu->arch.sie_block->fpf & FPF_BPBC) == FPF_BPBC;
if (MACHINE_HAS_GS) {
- preempt_disable();
__ctl_set_bit(2, 4);
- if (current->thread.gs_cb) {
- vcpu->arch.host_gscb = current->thread.gs_cb;
- save_gs_cb(vcpu->arch.host_gscb);
- }
- if (vcpu->arch.gs_enabled) {
- current->thread.gs_cb = (struct gs_cb *)
- &vcpu->run->s.regs.gscb;
- restore_gs_cb(current->thread.gs_cb);
- }
+ if (vcpu->arch.gs_enabled)
+ save_gs_cb(current->thread.gs_cb);
+ preempt_disable();
+ current->thread.gs_cb = vcpu->arch.host_gscb;
+ restore_gs_cb(vcpu->arch.host_gscb);
preempt_enable();
+ if (!vcpu->arch.host_gscb)
+ __ctl_clear_bit(2, 4);
+ vcpu->arch.host_gscb = NULL;
}
- /* SIE will load etoken directly from SDNX and therefore kvm_run */
-
- kvm_run->kvm_dirty_regs = 0;
+ /* SIE will save etoken directly into SDNX and therefore kvm_run */
}
static void store_regs(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
@@ -3983,13 +4322,9 @@ static void store_regs(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
memcpy(&kvm_run->s.regs.crs, &vcpu->arch.sie_block->gcr, 128);
kvm_run->s.regs.cputm = kvm_s390_get_cpu_timer(vcpu);
kvm_run->s.regs.ckc = vcpu->arch.sie_block->ckc;
- kvm_run->s.regs.todpr = vcpu->arch.sie_block->todpr;
- kvm_run->s.regs.pp = vcpu->arch.sie_block->pp;
- kvm_run->s.regs.gbea = vcpu->arch.sie_block->gbea;
kvm_run->s.regs.pft = vcpu->arch.pfault_token;
kvm_run->s.regs.pfs = vcpu->arch.pfault_select;
kvm_run->s.regs.pfc = vcpu->arch.pfault_compare;
- kvm_run->s.regs.bpbc = (vcpu->arch.sie_block->fpf & FPF_BPBC) == FPF_BPBC;
save_access_regs(vcpu->run->s.regs.acrs);
restore_access_regs(vcpu->arch.host_acrs);
/* Save guest register state */
@@ -3998,19 +4333,8 @@ static void store_regs(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
/* Restore will be done lazily at return */
current->thread.fpu.fpc = vcpu->arch.host_fpregs.fpc;
current->thread.fpu.regs = vcpu->arch.host_fpregs.regs;
- if (MACHINE_HAS_GS) {
- __ctl_set_bit(2, 4);
- if (vcpu->arch.gs_enabled)
- save_gs_cb(current->thread.gs_cb);
- preempt_disable();
- current->thread.gs_cb = vcpu->arch.host_gscb;
- restore_gs_cb(vcpu->arch.host_gscb);
- preempt_enable();
- if (!vcpu->arch.host_gscb)
- __ctl_clear_bit(2, 4);
- vcpu->arch.host_gscb = NULL;
- }
- /* SIE will save etoken directly into SDNX and therefore kvm_run */
+ if (likely(!kvm_s390_pv_cpu_is_protected(vcpu)))
+ store_regs_fmt2(vcpu, kvm_run);
}
int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
@@ -4034,6 +4358,10 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
kvm_sigset_activate(vcpu);
+ /*
+ * no need to check the return value of vcpu_start as it can only have
+ * an error for protvirt, but protvirt means user cpu state
+ */
if (!kvm_s390_user_cpu_state_ctrl(vcpu->kvm)) {
kvm_s390_vcpu_start(vcpu);
} else if (is_vcpu_stopped(vcpu)) {
@@ -4171,18 +4499,27 @@ static void __enable_ibs_on_vcpu(struct kvm_vcpu *vcpu)
kvm_s390_sync_request(KVM_REQ_ENABLE_IBS, vcpu);
}
-void kvm_s390_vcpu_start(struct kvm_vcpu *vcpu)
+int kvm_s390_vcpu_start(struct kvm_vcpu *vcpu)
{
- int i, online_vcpus, started_vcpus = 0;
+ int i, online_vcpus, r = 0, started_vcpus = 0;
if (!is_vcpu_stopped(vcpu))
- return;
+ return 0;
trace_kvm_s390_vcpu_start_stop(vcpu->vcpu_id, 1);
/* Only one cpu at a time may enter/leave the STOPPED state. */
spin_lock(&vcpu->kvm->arch.start_stop_lock);
online_vcpus = atomic_read(&vcpu->kvm->online_vcpus);
+ /* Let's tell the UV that we want to change into the operating state */
+ if (kvm_s390_pv_cpu_is_protected(vcpu)) {
+ r = kvm_s390_pv_set_cpu_state(vcpu, PV_CPU_STATE_OPR);
+ if (r) {
+ spin_unlock(&vcpu->kvm->arch.start_stop_lock);
+ return r;
+ }
+ }
+
for (i = 0; i < online_vcpus; i++) {
if (!is_vcpu_stopped(vcpu->kvm->vcpus[i]))
started_vcpus++;
@@ -4202,27 +4539,43 @@ void kvm_s390_vcpu_start(struct kvm_vcpu *vcpu)
kvm_s390_clear_cpuflags(vcpu, CPUSTAT_STOPPED);
/*
+ * The real PSW might have changed due to a RESTART interpreted by the
+ * ultravisor. We block all interrupts and let the next sie exit
+ * refresh our view.
+ */
+ if (kvm_s390_pv_cpu_is_protected(vcpu))
+ vcpu->arch.sie_block->gpsw.mask &= ~PSW_INT_MASK;
+ /*
* Another VCPU might have used IBS while we were offline.
* Let's play safe and flush the VCPU at startup.
*/
kvm_make_request(KVM_REQ_TLB_FLUSH, vcpu);
spin_unlock(&vcpu->kvm->arch.start_stop_lock);
- return;
+ return 0;
}
-void kvm_s390_vcpu_stop(struct kvm_vcpu *vcpu)
+int kvm_s390_vcpu_stop(struct kvm_vcpu *vcpu)
{
- int i, online_vcpus, started_vcpus = 0;
+ int i, online_vcpus, r = 0, started_vcpus = 0;
struct kvm_vcpu *started_vcpu = NULL;
if (is_vcpu_stopped(vcpu))
- return;
+ return 0;
trace_kvm_s390_vcpu_start_stop(vcpu->vcpu_id, 0);
/* Only one cpu at a time may enter/leave the STOPPED state. */
spin_lock(&vcpu->kvm->arch.start_stop_lock);
online_vcpus = atomic_read(&vcpu->kvm->online_vcpus);
+ /* Let's tell the UV that we want to change into the stopped state */
+ if (kvm_s390_pv_cpu_is_protected(vcpu)) {
+ r = kvm_s390_pv_set_cpu_state(vcpu, PV_CPU_STATE_STP);
+ if (r) {
+ spin_unlock(&vcpu->kvm->arch.start_stop_lock);
+ return r;
+ }
+ }
+
/* SIGP STOP and SIGP STOP AND STORE STATUS has been fully processed */
kvm_s390_clear_stop_irq(vcpu);
@@ -4245,7 +4598,7 @@ void kvm_s390_vcpu_stop(struct kvm_vcpu *vcpu)
}
spin_unlock(&vcpu->kvm->arch.start_stop_lock);
- return;
+ return 0;
}
static int kvm_vcpu_ioctl_enable_cap(struct kvm_vcpu *vcpu,
@@ -4272,12 +4625,40 @@ static int kvm_vcpu_ioctl_enable_cap(struct kvm_vcpu *vcpu,
return r;
}
+static long kvm_s390_guest_sida_op(struct kvm_vcpu *vcpu,
+ struct kvm_s390_mem_op *mop)
+{
+ void __user *uaddr = (void __user *)mop->buf;
+ int r = 0;
+
+ if (mop->flags || !mop->size)
+ return -EINVAL;
+ if (mop->size + mop->sida_offset < mop->size)
+ return -EINVAL;
+ if (mop->size + mop->sida_offset > sida_size(vcpu->arch.sie_block))
+ return -E2BIG;
+
+ switch (mop->op) {
+ case KVM_S390_MEMOP_SIDA_READ:
+ if (copy_to_user(uaddr, (void *)(sida_origin(vcpu->arch.sie_block) +
+ mop->sida_offset), mop->size))
+ r = -EFAULT;
+
+ break;
+ case KVM_S390_MEMOP_SIDA_WRITE:
+ if (copy_from_user((void *)(sida_origin(vcpu->arch.sie_block) +
+ mop->sida_offset), uaddr, mop->size))
+ r = -EFAULT;
+ break;
+ }
+ return r;
+}
static long kvm_s390_guest_mem_op(struct kvm_vcpu *vcpu,
struct kvm_s390_mem_op *mop)
{
void __user *uaddr = (void __user *)mop->buf;
void *tmpbuf = NULL;
- int r, srcu_idx;
+ int r = 0;
const u64 supported_flags = KVM_S390_MEMOP_F_INJECT_EXCEPTION
| KVM_S390_MEMOP_F_CHECK_ONLY;
@@ -4287,14 +4668,15 @@ static long kvm_s390_guest_mem_op(struct kvm_vcpu *vcpu,
if (mop->size > MEM_OP_MAX_SIZE)
return -E2BIG;
+ if (kvm_s390_pv_cpu_is_protected(vcpu))
+ return -EINVAL;
+
if (!(mop->flags & KVM_S390_MEMOP_F_CHECK_ONLY)) {
tmpbuf = vmalloc(mop->size);
if (!tmpbuf)
return -ENOMEM;
}
- srcu_idx = srcu_read_lock(&vcpu->kvm->srcu);
-
switch (mop->op) {
case KVM_S390_MEMOP_LOGICAL_READ:
if (mop->flags & KVM_S390_MEMOP_F_CHECK_ONLY) {
@@ -4320,12 +4702,8 @@ static long kvm_s390_guest_mem_op(struct kvm_vcpu *vcpu,
}
r = write_guest(vcpu, mop->gaddr, mop->ar, tmpbuf, mop->size);
break;
- default:
- r = -EINVAL;
}
- srcu_read_unlock(&vcpu->kvm->srcu, srcu_idx);
-
if (r > 0 && (mop->flags & KVM_S390_MEMOP_F_INJECT_EXCEPTION) != 0)
kvm_s390_inject_prog_irq(vcpu, &vcpu->arch.pgm);
@@ -4333,6 +4711,31 @@ static long kvm_s390_guest_mem_op(struct kvm_vcpu *vcpu,
return r;
}
+static long kvm_s390_guest_memsida_op(struct kvm_vcpu *vcpu,
+ struct kvm_s390_mem_op *mop)
+{
+ int r, srcu_idx;
+
+ srcu_idx = srcu_read_lock(&vcpu->kvm->srcu);
+
+ switch (mop->op) {
+ case KVM_S390_MEMOP_LOGICAL_READ:
+ case KVM_S390_MEMOP_LOGICAL_WRITE:
+ r = kvm_s390_guest_mem_op(vcpu, mop);
+ break;
+ case KVM_S390_MEMOP_SIDA_READ:
+ case KVM_S390_MEMOP_SIDA_WRITE:
+ /* we are locked against sida going away by the vcpu->mutex */
+ r = kvm_s390_guest_sida_op(vcpu, mop);
+ break;
+ default:
+ r = -EINVAL;
+ }
+
+ srcu_read_unlock(&vcpu->kvm->srcu, srcu_idx);
+ return r;
+}
+
long kvm_arch_vcpu_async_ioctl(struct file *filp,
unsigned int ioctl, unsigned long arg)
{
@@ -4368,6 +4771,7 @@ long kvm_arch_vcpu_ioctl(struct file *filp,
void __user *argp = (void __user *)arg;
int idx;
long r;
+ u16 rc, rrc;
vcpu_load(vcpu);
@@ -4389,18 +4793,40 @@ long kvm_arch_vcpu_ioctl(struct file *filp,
case KVM_S390_CLEAR_RESET:
r = 0;
kvm_arch_vcpu_ioctl_clear_reset(vcpu);
+ if (kvm_s390_pv_cpu_is_protected(vcpu)) {
+ r = uv_cmd_nodata(kvm_s390_pv_cpu_get_handle(vcpu),
+ UVC_CMD_CPU_RESET_CLEAR, &rc, &rrc);
+ VCPU_EVENT(vcpu, 3, "PROTVIRT RESET CLEAR VCPU: rc %x rrc %x",
+ rc, rrc);
+ }
break;
case KVM_S390_INITIAL_RESET:
r = 0;
kvm_arch_vcpu_ioctl_initial_reset(vcpu);
+ if (kvm_s390_pv_cpu_is_protected(vcpu)) {
+ r = uv_cmd_nodata(kvm_s390_pv_cpu_get_handle(vcpu),
+ UVC_CMD_CPU_RESET_INITIAL,
+ &rc, &rrc);
+ VCPU_EVENT(vcpu, 3, "PROTVIRT RESET INITIAL VCPU: rc %x rrc %x",
+ rc, rrc);
+ }
break;
case KVM_S390_NORMAL_RESET:
r = 0;
kvm_arch_vcpu_ioctl_normal_reset(vcpu);
+ if (kvm_s390_pv_cpu_is_protected(vcpu)) {
+ r = uv_cmd_nodata(kvm_s390_pv_cpu_get_handle(vcpu),
+ UVC_CMD_CPU_RESET, &rc, &rrc);
+ VCPU_EVENT(vcpu, 3, "PROTVIRT RESET NORMAL VCPU: rc %x rrc %x",
+ rc, rrc);
+ }
break;
case KVM_SET_ONE_REG:
case KVM_GET_ONE_REG: {
struct kvm_one_reg reg;
+ r = -EINVAL;
+ if (kvm_s390_pv_cpu_is_protected(vcpu))
+ break;
r = -EFAULT;
if (copy_from_user(&reg, argp, sizeof(reg)))
break;
@@ -4463,7 +4889,7 @@ long kvm_arch_vcpu_ioctl(struct file *filp,
struct kvm_s390_mem_op mem_op;
if (copy_from_user(&mem_op, argp, sizeof(mem_op)) == 0)
- r = kvm_s390_guest_mem_op(vcpu, &mem_op);
+ r = kvm_s390_guest_memsida_op(vcpu, &mem_op);
else
r = -EFAULT;
break;
@@ -4523,12 +4949,6 @@ vm_fault_t kvm_arch_vcpu_fault(struct kvm_vcpu *vcpu, struct vm_fault *vmf)
return VM_FAULT_SIGBUS;
}
-int kvm_arch_create_memslot(struct kvm *kvm, struct kvm_memory_slot *slot,
- unsigned long npages)
-{
- return 0;
-}
-
/* Section: memory related */
int kvm_arch_prepare_memory_region(struct kvm *kvm,
struct kvm_memory_slot *memslot,
@@ -4549,12 +4969,15 @@ int kvm_arch_prepare_memory_region(struct kvm *kvm,
if (mem->guest_phys_addr + mem->memory_size > kvm->arch.mem_limit)
return -EINVAL;
+ /* When we are protected, we should not change the memory slots */
+ if (kvm_s390_pv_get_handle(kvm))
+ return -EINVAL;
return 0;
}
void kvm_arch_commit_memory_region(struct kvm *kvm,
const struct kvm_userspace_memory_region *mem,
- const struct kvm_memory_slot *old,
+ struct kvm_memory_slot *old,
const struct kvm_memory_slot *new,
enum kvm_mr_change change)
{
@@ -4570,7 +4993,7 @@ void kvm_arch_commit_memory_region(struct kvm *kvm,
old->npages * PAGE_SIZE);
if (rc)
break;
- /* FALLTHROUGH */
+ fallthrough;
case KVM_MR_CREATE:
rc = gmap_map_segment(kvm->arch.gmap, mem->userspace_addr,
mem->guest_phys_addr, mem->memory_size);