aboutsummaryrefslogtreecommitdiffstatshomepage
path: root/arch/s390/kvm/gaccess.c
diff options
context:
space:
mode:
Diffstat (limited to 'arch/s390/kvm/gaccess.c')
-rw-r--r--arch/s390/kvm/gaccess.c518
1 files changed, 259 insertions, 259 deletions
diff --git a/arch/s390/kvm/gaccess.c b/arch/s390/kvm/gaccess.c
index 227ed0009354..21c2e61fece4 100644
--- a/arch/s390/kvm/gaccess.c
+++ b/arch/s390/kvm/gaccess.c
@@ -11,169 +11,14 @@
#include <linux/err.h>
#include <linux/pgtable.h>
#include <linux/bitfield.h>
-
+#include <asm/access-regs.h>
+#include <asm/fault.h>
#include <asm/gmap.h>
+#include <asm/dat-bits.h>
#include "kvm-s390.h"
#include "gaccess.h"
-#include <asm/switch_to.h>
-
-union asce {
- unsigned long val;
- struct {
- unsigned long origin : 52; /* Region- or Segment-Table Origin */
- unsigned long : 2;
- unsigned long g : 1; /* Subspace Group Control */
- unsigned long p : 1; /* Private Space Control */
- unsigned long s : 1; /* Storage-Alteration-Event Control */
- unsigned long x : 1; /* Space-Switch-Event Control */
- unsigned long r : 1; /* Real-Space Control */
- unsigned long : 1;
- unsigned long dt : 2; /* Designation-Type Control */
- unsigned long tl : 2; /* Region- or Segment-Table Length */
- };
-};
-
-enum {
- ASCE_TYPE_SEGMENT = 0,
- ASCE_TYPE_REGION3 = 1,
- ASCE_TYPE_REGION2 = 2,
- ASCE_TYPE_REGION1 = 3
-};
-union region1_table_entry {
- unsigned long val;
- struct {
- unsigned long rto: 52;/* Region-Table Origin */
- unsigned long : 2;
- unsigned long p : 1; /* DAT-Protection Bit */
- unsigned long : 1;
- unsigned long tf : 2; /* Region-Second-Table Offset */
- unsigned long i : 1; /* Region-Invalid Bit */
- unsigned long : 1;
- unsigned long tt : 2; /* Table-Type Bits */
- unsigned long tl : 2; /* Region-Second-Table Length */
- };
-};
-
-union region2_table_entry {
- unsigned long val;
- struct {
- unsigned long rto: 52;/* Region-Table Origin */
- unsigned long : 2;
- unsigned long p : 1; /* DAT-Protection Bit */
- unsigned long : 1;
- unsigned long tf : 2; /* Region-Third-Table Offset */
- unsigned long i : 1; /* Region-Invalid Bit */
- unsigned long : 1;
- unsigned long tt : 2; /* Table-Type Bits */
- unsigned long tl : 2; /* Region-Third-Table Length */
- };
-};
-
-struct region3_table_entry_fc0 {
- unsigned long sto: 52;/* Segment-Table Origin */
- unsigned long : 1;
- unsigned long fc : 1; /* Format-Control */
- unsigned long p : 1; /* DAT-Protection Bit */
- unsigned long : 1;
- unsigned long tf : 2; /* Segment-Table Offset */
- unsigned long i : 1; /* Region-Invalid Bit */
- unsigned long cr : 1; /* Common-Region Bit */
- unsigned long tt : 2; /* Table-Type Bits */
- unsigned long tl : 2; /* Segment-Table Length */
-};
-
-struct region3_table_entry_fc1 {
- unsigned long rfaa : 33; /* Region-Frame Absolute Address */
- unsigned long : 14;
- unsigned long av : 1; /* ACCF-Validity Control */
- unsigned long acc: 4; /* Access-Control Bits */
- unsigned long f : 1; /* Fetch-Protection Bit */
- unsigned long fc : 1; /* Format-Control */
- unsigned long p : 1; /* DAT-Protection Bit */
- unsigned long iep: 1; /* Instruction-Execution-Protection */
- unsigned long : 2;
- unsigned long i : 1; /* Region-Invalid Bit */
- unsigned long cr : 1; /* Common-Region Bit */
- unsigned long tt : 2; /* Table-Type Bits */
- unsigned long : 2;
-};
-
-union region3_table_entry {
- unsigned long val;
- struct region3_table_entry_fc0 fc0;
- struct region3_table_entry_fc1 fc1;
- struct {
- unsigned long : 53;
- unsigned long fc : 1; /* Format-Control */
- unsigned long : 4;
- unsigned long i : 1; /* Region-Invalid Bit */
- unsigned long cr : 1; /* Common-Region Bit */
- unsigned long tt : 2; /* Table-Type Bits */
- unsigned long : 2;
- };
-};
-
-struct segment_entry_fc0 {
- unsigned long pto: 53;/* Page-Table Origin */
- unsigned long fc : 1; /* Format-Control */
- unsigned long p : 1; /* DAT-Protection Bit */
- unsigned long : 3;
- unsigned long i : 1; /* Segment-Invalid Bit */
- unsigned long cs : 1; /* Common-Segment Bit */
- unsigned long tt : 2; /* Table-Type Bits */
- unsigned long : 2;
-};
-
-struct segment_entry_fc1 {
- unsigned long sfaa : 44; /* Segment-Frame Absolute Address */
- unsigned long : 3;
- unsigned long av : 1; /* ACCF-Validity Control */
- unsigned long acc: 4; /* Access-Control Bits */
- unsigned long f : 1; /* Fetch-Protection Bit */
- unsigned long fc : 1; /* Format-Control */
- unsigned long p : 1; /* DAT-Protection Bit */
- unsigned long iep: 1; /* Instruction-Execution-Protection */
- unsigned long : 2;
- unsigned long i : 1; /* Segment-Invalid Bit */
- unsigned long cs : 1; /* Common-Segment Bit */
- unsigned long tt : 2; /* Table-Type Bits */
- unsigned long : 2;
-};
-
-union segment_table_entry {
- unsigned long val;
- struct segment_entry_fc0 fc0;
- struct segment_entry_fc1 fc1;
- struct {
- unsigned long : 53;
- unsigned long fc : 1; /* Format-Control */
- unsigned long : 4;
- unsigned long i : 1; /* Segment-Invalid Bit */
- unsigned long cs : 1; /* Common-Segment Bit */
- unsigned long tt : 2; /* Table-Type Bits */
- unsigned long : 2;
- };
-};
-
-enum {
- TABLE_TYPE_SEGMENT = 0,
- TABLE_TYPE_REGION3 = 1,
- TABLE_TYPE_REGION2 = 2,
- TABLE_TYPE_REGION1 = 3
-};
-
-union page_table_entry {
- unsigned long val;
- struct {
- unsigned long pfra : 52; /* Page-Frame Real Address */
- unsigned long z : 1; /* Zero Bit */
- unsigned long i : 1; /* Page-Invalid Bit */
- unsigned long p : 1; /* DAT-Protection Bit */
- unsigned long iep: 1; /* Instruction-Execution-Protection */
- unsigned long : 8;
- };
-};
+#define GMAP_SHADOW_FAKE_TABLE 1ULL
/*
* vaddress union in order to easily decode a virtual address into its
@@ -262,119 +107,119 @@ struct aste {
/* .. more fields there */
};
-int ipte_lock_held(struct kvm_vcpu *vcpu)
+int ipte_lock_held(struct kvm *kvm)
{
- if (vcpu->arch.sie_block->eca & ECA_SII) {
+ if (sclp.has_siif) {
int rc;
- read_lock(&vcpu->kvm->arch.sca_lock);
- rc = kvm_s390_get_ipte_control(vcpu->kvm)->kh != 0;
- read_unlock(&vcpu->kvm->arch.sca_lock);
+ read_lock(&kvm->arch.sca_lock);
+ rc = kvm_s390_get_ipte_control(kvm)->kh != 0;
+ read_unlock(&kvm->arch.sca_lock);
return rc;
}
- return vcpu->kvm->arch.ipte_lock_count != 0;
+ return kvm->arch.ipte_lock_count != 0;
}
-static void ipte_lock_simple(struct kvm_vcpu *vcpu)
+static void ipte_lock_simple(struct kvm *kvm)
{
union ipte_control old, new, *ic;
- mutex_lock(&vcpu->kvm->arch.ipte_mutex);
- vcpu->kvm->arch.ipte_lock_count++;
- if (vcpu->kvm->arch.ipte_lock_count > 1)
+ mutex_lock(&kvm->arch.ipte_mutex);
+ kvm->arch.ipte_lock_count++;
+ if (kvm->arch.ipte_lock_count > 1)
goto out;
retry:
- read_lock(&vcpu->kvm->arch.sca_lock);
- ic = kvm_s390_get_ipte_control(vcpu->kvm);
+ read_lock(&kvm->arch.sca_lock);
+ ic = kvm_s390_get_ipte_control(kvm);
+ old = READ_ONCE(*ic);
do {
- old = READ_ONCE(*ic);
if (old.k) {
- read_unlock(&vcpu->kvm->arch.sca_lock);
+ read_unlock(&kvm->arch.sca_lock);
cond_resched();
goto retry;
}
new = old;
new.k = 1;
- } while (cmpxchg(&ic->val, old.val, new.val) != old.val);
- read_unlock(&vcpu->kvm->arch.sca_lock);
+ } while (!try_cmpxchg(&ic->val, &old.val, new.val));
+ read_unlock(&kvm->arch.sca_lock);
out:
- mutex_unlock(&vcpu->kvm->arch.ipte_mutex);
+ mutex_unlock(&kvm->arch.ipte_mutex);
}
-static void ipte_unlock_simple(struct kvm_vcpu *vcpu)
+static void ipte_unlock_simple(struct kvm *kvm)
{
union ipte_control old, new, *ic;
- mutex_lock(&vcpu->kvm->arch.ipte_mutex);
- vcpu->kvm->arch.ipte_lock_count--;
- if (vcpu->kvm->arch.ipte_lock_count)
+ mutex_lock(&kvm->arch.ipte_mutex);
+ kvm->arch.ipte_lock_count--;
+ if (kvm->arch.ipte_lock_count)
goto out;
- read_lock(&vcpu->kvm->arch.sca_lock);
- ic = kvm_s390_get_ipte_control(vcpu->kvm);
+ read_lock(&kvm->arch.sca_lock);
+ ic = kvm_s390_get_ipte_control(kvm);
+ old = READ_ONCE(*ic);
do {
- old = READ_ONCE(*ic);
new = old;
new.k = 0;
- } while (cmpxchg(&ic->val, old.val, new.val) != old.val);
- read_unlock(&vcpu->kvm->arch.sca_lock);
- wake_up(&vcpu->kvm->arch.ipte_wq);
+ } while (!try_cmpxchg(&ic->val, &old.val, new.val));
+ read_unlock(&kvm->arch.sca_lock);
+ wake_up(&kvm->arch.ipte_wq);
out:
- mutex_unlock(&vcpu->kvm->arch.ipte_mutex);
+ mutex_unlock(&kvm->arch.ipte_mutex);
}
-static void ipte_lock_siif(struct kvm_vcpu *vcpu)
+static void ipte_lock_siif(struct kvm *kvm)
{
union ipte_control old, new, *ic;
retry:
- read_lock(&vcpu->kvm->arch.sca_lock);
- ic = kvm_s390_get_ipte_control(vcpu->kvm);
+ read_lock(&kvm->arch.sca_lock);
+ ic = kvm_s390_get_ipte_control(kvm);
+ old = READ_ONCE(*ic);
do {
- old = READ_ONCE(*ic);
if (old.kg) {
- read_unlock(&vcpu->kvm->arch.sca_lock);
+ read_unlock(&kvm->arch.sca_lock);
cond_resched();
goto retry;
}
new = old;
new.k = 1;
new.kh++;
- } while (cmpxchg(&ic->val, old.val, new.val) != old.val);
- read_unlock(&vcpu->kvm->arch.sca_lock);
+ } while (!try_cmpxchg(&ic->val, &old.val, new.val));
+ read_unlock(&kvm->arch.sca_lock);
}
-static void ipte_unlock_siif(struct kvm_vcpu *vcpu)
+static void ipte_unlock_siif(struct kvm *kvm)
{
union ipte_control old, new, *ic;
- read_lock(&vcpu->kvm->arch.sca_lock);
- ic = kvm_s390_get_ipte_control(vcpu->kvm);
+ read_lock(&kvm->arch.sca_lock);
+ ic = kvm_s390_get_ipte_control(kvm);
+ old = READ_ONCE(*ic);
do {
- old = READ_ONCE(*ic);
new = old;
new.kh--;
if (!new.kh)
new.k = 0;
- } while (cmpxchg(&ic->val, old.val, new.val) != old.val);
- read_unlock(&vcpu->kvm->arch.sca_lock);
+ } while (!try_cmpxchg(&ic->val, &old.val, new.val));
+ read_unlock(&kvm->arch.sca_lock);
if (!new.kh)
- wake_up(&vcpu->kvm->arch.ipte_wq);
+ wake_up(&kvm->arch.ipte_wq);
}
-void ipte_lock(struct kvm_vcpu *vcpu)
+void ipte_lock(struct kvm *kvm)
{
- if (vcpu->arch.sie_block->eca & ECA_SII)
- ipte_lock_siif(vcpu);
+ if (sclp.has_siif)
+ ipte_lock_siif(kvm);
else
- ipte_lock_simple(vcpu);
+ ipte_lock_simple(kvm);
}
-void ipte_unlock(struct kvm_vcpu *vcpu)
+void ipte_unlock(struct kvm *kvm)
{
- if (vcpu->arch.sie_block->eca & ECA_SII)
- ipte_unlock_siif(vcpu);
+ if (sclp.has_siif)
+ ipte_unlock_siif(kvm);
else
- ipte_unlock_simple(vcpu);
+ ipte_unlock_simple(kvm);
}
static int ar_translation(struct kvm_vcpu *vcpu, union asce *asce, u8 ar,
@@ -391,7 +236,8 @@ static int ar_translation(struct kvm_vcpu *vcpu, union asce *asce, u8 ar,
if (ar >= NUM_ACRS)
return -EINVAL;
- save_access_regs(vcpu->run->s.regs.acrs);
+ if (vcpu->arch.acrs_loaded)
+ save_access_regs(vcpu->run->s.regs.acrs);
alet.val = vcpu->run->s.regs.acrs[ar];
if (ar == 0 || alet.val == 0) {
@@ -466,64 +312,53 @@ static int ar_translation(struct kvm_vcpu *vcpu, union asce *asce, u8 ar,
return 0;
}
-struct trans_exc_code_bits {
- unsigned long addr : 52; /* Translation-exception Address */
- unsigned long fsi : 2; /* Access Exception Fetch/Store Indication */
- unsigned long : 2;
- unsigned long b56 : 1;
- unsigned long : 3;
- unsigned long b60 : 1;
- unsigned long b61 : 1;
- unsigned long as : 2; /* ASCE Identifier */
-};
-
-enum {
- FSI_UNKNOWN = 0, /* Unknown wether fetch or store */
- FSI_STORE = 1, /* Exception was due to store operation */
- FSI_FETCH = 2 /* Exception was due to fetch operation */
-};
-
enum prot_type {
PROT_TYPE_LA = 0,
PROT_TYPE_KEYC = 1,
PROT_TYPE_ALC = 2,
PROT_TYPE_DAT = 3,
PROT_TYPE_IEP = 4,
+ /* Dummy value for passing an initialized value when code != PGM_PROTECTION */
+ PROT_TYPE_DUMMY,
};
static int trans_exc_ending(struct kvm_vcpu *vcpu, int code, unsigned long gva, u8 ar,
enum gacc_mode mode, enum prot_type prot, bool terminate)
{
struct kvm_s390_pgm_info *pgm = &vcpu->arch.pgm;
- struct trans_exc_code_bits *tec;
+ union teid *teid;
memset(pgm, 0, sizeof(*pgm));
pgm->code = code;
- tec = (struct trans_exc_code_bits *)&pgm->trans_exc_code;
+ teid = (union teid *)&pgm->trans_exc_code;
switch (code) {
case PGM_PROTECTION:
switch (prot) {
+ case PROT_TYPE_DUMMY:
+ /* We should never get here, acts like termination */
+ WARN_ON_ONCE(1);
+ break;
case PROT_TYPE_IEP:
- tec->b61 = 1;
+ teid->b61 = 1;
fallthrough;
case PROT_TYPE_LA:
- tec->b56 = 1;
+ teid->b56 = 1;
break;
case PROT_TYPE_KEYC:
- tec->b60 = 1;
+ teid->b60 = 1;
break;
case PROT_TYPE_ALC:
- tec->b60 = 1;
+ teid->b60 = 1;
fallthrough;
case PROT_TYPE_DAT:
- tec->b61 = 1;
+ teid->b61 = 1;
break;
}
if (terminate) {
- tec->b56 = 0;
- tec->b60 = 0;
- tec->b61 = 0;
+ teid->b56 = 0;
+ teid->b60 = 0;
+ teid->b61 = 0;
}
fallthrough;
case PGM_ASCE_TYPE:
@@ -537,9 +372,9 @@ static int trans_exc_ending(struct kvm_vcpu *vcpu, int code, unsigned long gva,
* exc_access_id has to be set to 0 for some instructions. Both
* cases have to be handled by the caller.
*/
- tec->addr = gva >> PAGE_SHIFT;
- tec->fsi = mode == GACC_STORE ? FSI_STORE : FSI_FETCH;
- tec->as = psw_bits(vcpu->arch.sie_block->gpsw).as;
+ teid->addr = gva >> PAGE_SHIFT;
+ teid->fsi = mode == GACC_STORE ? TEID_FSI_STORE : TEID_FSI_FETCH;
+ teid->as = psw_bits(vcpu->arch.sie_block->gpsw).as;
fallthrough;
case PGM_ALEN_TRANSLATION:
case PGM_ALE_SEQUENCE:
@@ -619,7 +454,7 @@ static int deref_table(struct kvm *kvm, unsigned long gpa, unsigned long *val)
* Returns: - zero on success; @gpa contains the resulting absolute address
* - a negative value if guest access failed due to e.g. broken
* guest mapping
- * - a positve value if an access exception happened. In this case
+ * - a positive value if an access exception happened. In this case
* the returned value is the program interruption code as defined
* by the architecture
*/
@@ -642,7 +477,7 @@ static unsigned long guest_translate(struct kvm_vcpu *vcpu, unsigned long gva,
iep = ctlreg0.iep && test_kvm_facility(vcpu->kvm, 130);
if (asce.r)
goto real_address;
- ptr = asce.origin * PAGE_SIZE;
+ ptr = asce.rsto * PAGE_SIZE;
switch (asce.dt) {
case ASCE_TYPE_REGION1:
if (vaddr.rfx01 > asce.tl)
@@ -675,7 +510,7 @@ static unsigned long guest_translate(struct kvm_vcpu *vcpu, unsigned long gva,
case ASCE_TYPE_REGION1: {
union region1_table_entry rfte;
- if (kvm_is_error_gpa(vcpu->kvm, ptr))
+ if (!kvm_is_gpa_in_memslot(vcpu->kvm, ptr))
return PGM_ADDRESSING;
if (deref_table(vcpu->kvm, ptr, &rfte.val))
return -EFAULT;
@@ -693,7 +528,7 @@ static unsigned long guest_translate(struct kvm_vcpu *vcpu, unsigned long gva,
case ASCE_TYPE_REGION2: {
union region2_table_entry rste;
- if (kvm_is_error_gpa(vcpu->kvm, ptr))
+ if (!kvm_is_gpa_in_memslot(vcpu->kvm, ptr))
return PGM_ADDRESSING;
if (deref_table(vcpu->kvm, ptr, &rste.val))
return -EFAULT;
@@ -711,7 +546,7 @@ static unsigned long guest_translate(struct kvm_vcpu *vcpu, unsigned long gva,
case ASCE_TYPE_REGION3: {
union region3_table_entry rtte;
- if (kvm_is_error_gpa(vcpu->kvm, ptr))
+ if (!kvm_is_gpa_in_memslot(vcpu->kvm, ptr))
return PGM_ADDRESSING;
if (deref_table(vcpu->kvm, ptr, &rtte.val))
return -EFAULT;
@@ -739,7 +574,7 @@ static unsigned long guest_translate(struct kvm_vcpu *vcpu, unsigned long gva,
case ASCE_TYPE_SEGMENT: {
union segment_table_entry ste;
- if (kvm_is_error_gpa(vcpu->kvm, ptr))
+ if (!kvm_is_gpa_in_memslot(vcpu->kvm, ptr))
return PGM_ADDRESSING;
if (deref_table(vcpu->kvm, ptr, &ste.val))
return -EFAULT;
@@ -759,7 +594,7 @@ static unsigned long guest_translate(struct kvm_vcpu *vcpu, unsigned long gva,
ptr = ste.fc0.pto * (PAGE_SIZE / 2) + vaddr.px * 8;
}
}
- if (kvm_is_error_gpa(vcpu->kvm, ptr))
+ if (!kvm_is_gpa_in_memslot(vcpu->kvm, ptr))
return PGM_ADDRESSING;
if (deref_table(vcpu->kvm, ptr, &pte.val))
return -EFAULT;
@@ -781,7 +616,7 @@ absolute_address:
*prot = PROT_TYPE_IEP;
return PGM_PROTECTION;
}
- if (kvm_is_error_gpa(vcpu->kvm, raddr.addr))
+ if (!kvm_is_gpa_in_memslot(vcpu->kvm, raddr.addr))
return PGM_ADDRESSING;
*gpa = raddr.addr;
return 0;
@@ -968,8 +803,10 @@ static int guest_range_to_gpas(struct kvm_vcpu *vcpu, unsigned long ga, u8 ar,
return rc;
} else {
gpa = kvm_s390_real_to_abs(vcpu, ga);
- if (kvm_is_error_gpa(vcpu->kvm, gpa))
+ if (!kvm_is_gpa_in_memslot(vcpu->kvm, gpa)) {
rc = PGM_ADDRESSING;
+ prot = PROT_TYPE_DUMMY;
+ }
}
if (rc)
return trans_exc(vcpu, rc, ga, ar, mode, prot);
@@ -993,6 +830,8 @@ static int access_guest_page(struct kvm *kvm, enum gacc_mode mode, gpa_t gpa,
const gfn_t gfn = gpa_to_gfn(gpa);
int rc;
+ if (!gfn_to_memslot(kvm, gfn))
+ return PGM_ADDRESSING;
if (mode == GACC_STORE)
rc = kvm_write_guest_page(kvm, gfn, data, offset, len);
else
@@ -1086,7 +925,7 @@ int access_guest_with_key(struct kvm_vcpu *vcpu, unsigned long ga, u8 ar,
try_storage_prot_override = storage_prot_override_applicable(vcpu);
need_ipte_lock = psw_bits(*psw).dat && !asce.r;
if (need_ipte_lock)
- ipte_lock(vcpu);
+ ipte_lock(vcpu->kvm);
/*
* Since we do the access further down ultimately via a move instruction
* that does key checking and returns an error in case of a protection
@@ -1112,8 +951,6 @@ int access_guest_with_key(struct kvm_vcpu *vcpu, unsigned long ga, u8 ar,
if (rc == PGM_PROTECTION && try_storage_prot_override)
rc = access_guest_page_with_key(vcpu->kvm, mode, gpas[idx],
data, fragment_len, PAGE_SPO_ACC);
- if (rc == PGM_PROTECTION)
- prot = PROT_TYPE_KEYC;
if (rc)
break;
len -= fragment_len;
@@ -1123,11 +960,15 @@ int access_guest_with_key(struct kvm_vcpu *vcpu, unsigned long ga, u8 ar,
if (rc > 0) {
bool terminate = (mode == GACC_STORE) && (idx > 0);
+ if (rc == PGM_PROTECTION)
+ prot = PROT_TYPE_KEYC;
+ else
+ prot = PROT_TYPE_DUMMY;
rc = trans_exc_ending(vcpu, rc, ga, ar, mode, prot, terminate);
}
out_unlock:
if (need_ipte_lock)
- ipte_unlock(vcpu);
+ ipte_unlock(vcpu->kvm);
if (nr_pages > ARRAY_SIZE(gpa_array))
vfree(gpas);
return rc;
@@ -1148,10 +989,121 @@ int access_guest_real(struct kvm_vcpu *vcpu, unsigned long gra,
gra += fragment_len;
data += fragment_len;
}
+ if (rc > 0)
+ vcpu->arch.pgm.code = rc;
return rc;
}
/**
+ * cmpxchg_guest_abs_with_key() - Perform cmpxchg on guest absolute address.
+ * @kvm: Virtual machine instance.
+ * @gpa: Absolute guest address of the location to be changed.
+ * @len: Operand length of the cmpxchg, required: 1 <= len <= 16. Providing a
+ * non power of two will result in failure.
+ * @old_addr: Pointer to old value. If the location at @gpa contains this value,
+ * the exchange will succeed. After calling cmpxchg_guest_abs_with_key()
+ * *@old_addr contains the value at @gpa before the attempt to
+ * exchange the value.
+ * @new: The value to place at @gpa.
+ * @access_key: The access key to use for the guest access.
+ * @success: output value indicating if an exchange occurred.
+ *
+ * Atomically exchange the value at @gpa by @new, if it contains *@old.
+ * Honors storage keys.
+ *
+ * Return: * 0: successful exchange
+ * * >0: a program interruption code indicating the reason cmpxchg could
+ * not be attempted
+ * * -EINVAL: address misaligned or len not power of two
+ * * -EAGAIN: transient failure (len 1 or 2)
+ * * -EOPNOTSUPP: read-only memslot (should never occur)
+ */
+int cmpxchg_guest_abs_with_key(struct kvm *kvm, gpa_t gpa, int len,
+ __uint128_t *old_addr, __uint128_t new,
+ u8 access_key, bool *success)
+{
+ gfn_t gfn = gpa_to_gfn(gpa);
+ struct kvm_memory_slot *slot = gfn_to_memslot(kvm, gfn);
+ bool writable;
+ hva_t hva;
+ int ret;
+
+ if (!IS_ALIGNED(gpa, len))
+ return -EINVAL;
+
+ hva = gfn_to_hva_memslot_prot(slot, gfn, &writable);
+ if (kvm_is_error_hva(hva))
+ return PGM_ADDRESSING;
+ /*
+ * Check if it's a read-only memslot, even though that cannot occur
+ * since those are unsupported.
+ * Don't try to actually handle that case.
+ */
+ if (!writable)
+ return -EOPNOTSUPP;
+
+ hva += offset_in_page(gpa);
+ /*
+ * The cmpxchg_user_key macro depends on the type of "old", so we need
+ * a case for each valid length and get some code duplication as long
+ * as we don't introduce a new macro.
+ */
+ switch (len) {
+ case 1: {
+ u8 old;
+
+ ret = cmpxchg_user_key((u8 __user *)hva, &old, *old_addr, new, access_key);
+ *success = !ret && old == *old_addr;
+ *old_addr = old;
+ break;
+ }
+ case 2: {
+ u16 old;
+
+ ret = cmpxchg_user_key((u16 __user *)hva, &old, *old_addr, new, access_key);
+ *success = !ret && old == *old_addr;
+ *old_addr = old;
+ break;
+ }
+ case 4: {
+ u32 old;
+
+ ret = cmpxchg_user_key((u32 __user *)hva, &old, *old_addr, new, access_key);
+ *success = !ret && old == *old_addr;
+ *old_addr = old;
+ break;
+ }
+ case 8: {
+ u64 old;
+
+ ret = cmpxchg_user_key((u64 __user *)hva, &old, *old_addr, new, access_key);
+ *success = !ret && old == *old_addr;
+ *old_addr = old;
+ break;
+ }
+ case 16: {
+ __uint128_t old;
+
+ ret = cmpxchg_user_key((__uint128_t __user *)hva, &old, *old_addr, new, access_key);
+ *success = !ret && old == *old_addr;
+ *old_addr = old;
+ break;
+ }
+ default:
+ return -EINVAL;
+ }
+ if (*success)
+ mark_page_dirty_in_slot(kvm, slot, gfn);
+ /*
+ * Assume that the fault is caused by protection, either key protection
+ * or user page write protection.
+ */
+ if (ret == -EFAULT)
+ ret = PGM_PROTECTION;
+ return ret;
+}
+
+/**
* guest_translate_address_with_key - translate guest logical into guest absolute address
* @vcpu: virtual cpu
* @gva: Guest virtual address
@@ -1199,10 +1151,10 @@ int check_gva_range(struct kvm_vcpu *vcpu, unsigned long gva, u8 ar,
rc = get_vcpu_asce(vcpu, &asce, gva, ar, mode);
if (rc)
return rc;
- ipte_lock(vcpu);
+ ipte_lock(vcpu->kvm);
rc = guest_range_to_gpas(vcpu, gva, ar, NULL, length, asce, mode,
access_key);
- ipte_unlock(vcpu);
+ ipte_unlock(vcpu->kvm);
return rc;
}
@@ -1263,6 +1215,7 @@ static int kvm_s390_shadow_tables(struct gmap *sg, unsigned long saddr,
unsigned long *pgt, int *dat_protection,
int *fake)
{
+ struct kvm *kvm;
struct gmap *parent;
union asce asce;
union vaddress vaddr;
@@ -1271,10 +1224,11 @@ static int kvm_s390_shadow_tables(struct gmap *sg, unsigned long saddr,
*fake = 0;
*dat_protection = 0;
+ kvm = sg->private;
parent = sg->parent;
vaddr.addr = saddr;
asce.val = sg->orig_asce;
- ptr = asce.origin * PAGE_SIZE;
+ ptr = asce.rsto * PAGE_SIZE;
if (asce.r) {
*fake = 1;
ptr = 0;
@@ -1331,6 +1285,7 @@ shadow_r2t:
rc = gmap_shadow_r2t(sg, saddr, rfte.val, *fake);
if (rc)
return rc;
+ kvm->stat.gmap_shadow_r1_entry++;
}
fallthrough;
case ASCE_TYPE_REGION2: {
@@ -1359,6 +1314,7 @@ shadow_r3t:
rc = gmap_shadow_r3t(sg, saddr, rste.val, *fake);
if (rc)
return rc;
+ kvm->stat.gmap_shadow_r2_entry++;
}
fallthrough;
case ASCE_TYPE_REGION3: {
@@ -1396,6 +1352,7 @@ shadow_sgt:
rc = gmap_shadow_sgt(sg, saddr, rtte.val, *fake);
if (rc)
return rc;
+ kvm->stat.gmap_shadow_r3_entry++;
}
fallthrough;
case ASCE_TYPE_SEGMENT: {
@@ -1429,6 +1386,7 @@ shadow_pgt:
rc = gmap_shadow_pgt(sg, saddr, ste.val, *fake);
if (rc)
return rc;
+ kvm->stat.gmap_shadow_sg_entry++;
}
}
/* Return the parent address of the page table */
@@ -1437,6 +1395,44 @@ shadow_pgt:
}
/**
+ * shadow_pgt_lookup() - find a shadow page table
+ * @sg: pointer to the shadow guest address space structure
+ * @saddr: the address in the shadow aguest address space
+ * @pgt: parent gmap address of the page table to get shadowed
+ * @dat_protection: if the pgtable is marked as protected by dat
+ * @fake: pgt references contiguous guest memory block, not a pgtable
+ *
+ * Returns 0 if the shadow page table was found and -EAGAIN if the page
+ * table was not found.
+ *
+ * Called with sg->mm->mmap_lock in read.
+ */
+static int shadow_pgt_lookup(struct gmap *sg, unsigned long saddr, unsigned long *pgt,
+ int *dat_protection, int *fake)
+{
+ unsigned long pt_index;
+ unsigned long *table;
+ struct page *page;
+ int rc;
+
+ spin_lock(&sg->guest_table_lock);
+ table = gmap_table_walk(sg, saddr, 1); /* get segment pointer */
+ if (table && !(*table & _SEGMENT_ENTRY_INVALID)) {
+ /* Shadow page tables are full pages (pte+pgste) */
+ page = pfn_to_page(*table >> PAGE_SHIFT);
+ pt_index = gmap_pgste_get_pgt_addr(page_to_virt(page));
+ *pgt = pt_index & ~GMAP_SHADOW_FAKE_TABLE;
+ *dat_protection = !!(*table & _SEGMENT_ENTRY_PROTECT);
+ *fake = !!(pt_index & GMAP_SHADOW_FAKE_TABLE);
+ rc = 0;
+ } else {
+ rc = -EAGAIN;
+ }
+ spin_unlock(&sg->guest_table_lock);
+ return rc;
+}
+
+/**
* kvm_s390_shadow_fault - handle fault on a shadow page table
* @vcpu: virtual cpu
* @sg: pointer to the shadow guest address space structure
@@ -1459,15 +1455,18 @@ int kvm_s390_shadow_fault(struct kvm_vcpu *vcpu, struct gmap *sg,
int dat_protection, fake;
int rc;
+ if (KVM_BUG_ON(!gmap_is_shadow(sg), vcpu->kvm))
+ return -EFAULT;
+
mmap_read_lock(sg->mm);
/*
* We don't want any guest-2 tables to change - so the parent
* tables/pointers we read stay valid - unshadowing is however
* always possible - only guest_table_lock protects us.
*/
- ipte_lock(vcpu);
+ ipte_lock(vcpu->kvm);
- rc = gmap_shadow_pgt_lookup(sg, saddr, &pgt, &dat_protection, &fake);
+ rc = shadow_pgt_lookup(sg, saddr, &pgt, &dat_protection, &fake);
if (rc)
rc = kvm_s390_shadow_tables(sg, saddr, &pgt, &dat_protection,
&fake);
@@ -1499,7 +1498,8 @@ shadow_page:
pte.p |= dat_protection;
if (!rc)
rc = gmap_shadow_page(sg, saddr, __pte(pte.val));
- ipte_unlock(vcpu);
+ vcpu->kvm->stat.gmap_shadow_pg_entry++;
+ ipte_unlock(vcpu->kvm);
mmap_read_unlock(sg->mm);
return rc;
}