diff options
Diffstat (limited to 'arch/s390/kvm/gaccess.c')
-rw-r--r-- | arch/s390/kvm/gaccess.c | 518 |
1 files changed, 259 insertions, 259 deletions
diff --git a/arch/s390/kvm/gaccess.c b/arch/s390/kvm/gaccess.c index 227ed0009354..21c2e61fece4 100644 --- a/arch/s390/kvm/gaccess.c +++ b/arch/s390/kvm/gaccess.c @@ -11,169 +11,14 @@ #include <linux/err.h> #include <linux/pgtable.h> #include <linux/bitfield.h> - +#include <asm/access-regs.h> +#include <asm/fault.h> #include <asm/gmap.h> +#include <asm/dat-bits.h> #include "kvm-s390.h" #include "gaccess.h" -#include <asm/switch_to.h> - -union asce { - unsigned long val; - struct { - unsigned long origin : 52; /* Region- or Segment-Table Origin */ - unsigned long : 2; - unsigned long g : 1; /* Subspace Group Control */ - unsigned long p : 1; /* Private Space Control */ - unsigned long s : 1; /* Storage-Alteration-Event Control */ - unsigned long x : 1; /* Space-Switch-Event Control */ - unsigned long r : 1; /* Real-Space Control */ - unsigned long : 1; - unsigned long dt : 2; /* Designation-Type Control */ - unsigned long tl : 2; /* Region- or Segment-Table Length */ - }; -}; - -enum { - ASCE_TYPE_SEGMENT = 0, - ASCE_TYPE_REGION3 = 1, - ASCE_TYPE_REGION2 = 2, - ASCE_TYPE_REGION1 = 3 -}; -union region1_table_entry { - unsigned long val; - struct { - unsigned long rto: 52;/* Region-Table Origin */ - unsigned long : 2; - unsigned long p : 1; /* DAT-Protection Bit */ - unsigned long : 1; - unsigned long tf : 2; /* Region-Second-Table Offset */ - unsigned long i : 1; /* Region-Invalid Bit */ - unsigned long : 1; - unsigned long tt : 2; /* Table-Type Bits */ - unsigned long tl : 2; /* Region-Second-Table Length */ - }; -}; - -union region2_table_entry { - unsigned long val; - struct { - unsigned long rto: 52;/* Region-Table Origin */ - unsigned long : 2; - unsigned long p : 1; /* DAT-Protection Bit */ - unsigned long : 1; - unsigned long tf : 2; /* Region-Third-Table Offset */ - unsigned long i : 1; /* Region-Invalid Bit */ - unsigned long : 1; - unsigned long tt : 2; /* Table-Type Bits */ - unsigned long tl : 2; /* Region-Third-Table Length */ - }; -}; - -struct region3_table_entry_fc0 { - unsigned long sto: 52;/* Segment-Table Origin */ - unsigned long : 1; - unsigned long fc : 1; /* Format-Control */ - unsigned long p : 1; /* DAT-Protection Bit */ - unsigned long : 1; - unsigned long tf : 2; /* Segment-Table Offset */ - unsigned long i : 1; /* Region-Invalid Bit */ - unsigned long cr : 1; /* Common-Region Bit */ - unsigned long tt : 2; /* Table-Type Bits */ - unsigned long tl : 2; /* Segment-Table Length */ -}; - -struct region3_table_entry_fc1 { - unsigned long rfaa : 33; /* Region-Frame Absolute Address */ - unsigned long : 14; - unsigned long av : 1; /* ACCF-Validity Control */ - unsigned long acc: 4; /* Access-Control Bits */ - unsigned long f : 1; /* Fetch-Protection Bit */ - unsigned long fc : 1; /* Format-Control */ - unsigned long p : 1; /* DAT-Protection Bit */ - unsigned long iep: 1; /* Instruction-Execution-Protection */ - unsigned long : 2; - unsigned long i : 1; /* Region-Invalid Bit */ - unsigned long cr : 1; /* Common-Region Bit */ - unsigned long tt : 2; /* Table-Type Bits */ - unsigned long : 2; -}; - -union region3_table_entry { - unsigned long val; - struct region3_table_entry_fc0 fc0; - struct region3_table_entry_fc1 fc1; - struct { - unsigned long : 53; - unsigned long fc : 1; /* Format-Control */ - unsigned long : 4; - unsigned long i : 1; /* Region-Invalid Bit */ - unsigned long cr : 1; /* Common-Region Bit */ - unsigned long tt : 2; /* Table-Type Bits */ - unsigned long : 2; - }; -}; - -struct segment_entry_fc0 { - unsigned long pto: 53;/* Page-Table Origin */ - unsigned long fc : 1; /* Format-Control */ - unsigned long p : 1; /* DAT-Protection Bit */ - unsigned long : 3; - unsigned long i : 1; /* Segment-Invalid Bit */ - unsigned long cs : 1; /* Common-Segment Bit */ - unsigned long tt : 2; /* Table-Type Bits */ - unsigned long : 2; -}; - -struct segment_entry_fc1 { - unsigned long sfaa : 44; /* Segment-Frame Absolute Address */ - unsigned long : 3; - unsigned long av : 1; /* ACCF-Validity Control */ - unsigned long acc: 4; /* Access-Control Bits */ - unsigned long f : 1; /* Fetch-Protection Bit */ - unsigned long fc : 1; /* Format-Control */ - unsigned long p : 1; /* DAT-Protection Bit */ - unsigned long iep: 1; /* Instruction-Execution-Protection */ - unsigned long : 2; - unsigned long i : 1; /* Segment-Invalid Bit */ - unsigned long cs : 1; /* Common-Segment Bit */ - unsigned long tt : 2; /* Table-Type Bits */ - unsigned long : 2; -}; - -union segment_table_entry { - unsigned long val; - struct segment_entry_fc0 fc0; - struct segment_entry_fc1 fc1; - struct { - unsigned long : 53; - unsigned long fc : 1; /* Format-Control */ - unsigned long : 4; - unsigned long i : 1; /* Segment-Invalid Bit */ - unsigned long cs : 1; /* Common-Segment Bit */ - unsigned long tt : 2; /* Table-Type Bits */ - unsigned long : 2; - }; -}; - -enum { - TABLE_TYPE_SEGMENT = 0, - TABLE_TYPE_REGION3 = 1, - TABLE_TYPE_REGION2 = 2, - TABLE_TYPE_REGION1 = 3 -}; - -union page_table_entry { - unsigned long val; - struct { - unsigned long pfra : 52; /* Page-Frame Real Address */ - unsigned long z : 1; /* Zero Bit */ - unsigned long i : 1; /* Page-Invalid Bit */ - unsigned long p : 1; /* DAT-Protection Bit */ - unsigned long iep: 1; /* Instruction-Execution-Protection */ - unsigned long : 8; - }; -}; +#define GMAP_SHADOW_FAKE_TABLE 1ULL /* * vaddress union in order to easily decode a virtual address into its @@ -262,119 +107,119 @@ struct aste { /* .. more fields there */ }; -int ipte_lock_held(struct kvm_vcpu *vcpu) +int ipte_lock_held(struct kvm *kvm) { - if (vcpu->arch.sie_block->eca & ECA_SII) { + if (sclp.has_siif) { int rc; - read_lock(&vcpu->kvm->arch.sca_lock); - rc = kvm_s390_get_ipte_control(vcpu->kvm)->kh != 0; - read_unlock(&vcpu->kvm->arch.sca_lock); + read_lock(&kvm->arch.sca_lock); + rc = kvm_s390_get_ipte_control(kvm)->kh != 0; + read_unlock(&kvm->arch.sca_lock); return rc; } - return vcpu->kvm->arch.ipte_lock_count != 0; + return kvm->arch.ipte_lock_count != 0; } -static void ipte_lock_simple(struct kvm_vcpu *vcpu) +static void ipte_lock_simple(struct kvm *kvm) { union ipte_control old, new, *ic; - mutex_lock(&vcpu->kvm->arch.ipte_mutex); - vcpu->kvm->arch.ipte_lock_count++; - if (vcpu->kvm->arch.ipte_lock_count > 1) + mutex_lock(&kvm->arch.ipte_mutex); + kvm->arch.ipte_lock_count++; + if (kvm->arch.ipte_lock_count > 1) goto out; retry: - read_lock(&vcpu->kvm->arch.sca_lock); - ic = kvm_s390_get_ipte_control(vcpu->kvm); + read_lock(&kvm->arch.sca_lock); + ic = kvm_s390_get_ipte_control(kvm); + old = READ_ONCE(*ic); do { - old = READ_ONCE(*ic); if (old.k) { - read_unlock(&vcpu->kvm->arch.sca_lock); + read_unlock(&kvm->arch.sca_lock); cond_resched(); goto retry; } new = old; new.k = 1; - } while (cmpxchg(&ic->val, old.val, new.val) != old.val); - read_unlock(&vcpu->kvm->arch.sca_lock); + } while (!try_cmpxchg(&ic->val, &old.val, new.val)); + read_unlock(&kvm->arch.sca_lock); out: - mutex_unlock(&vcpu->kvm->arch.ipte_mutex); + mutex_unlock(&kvm->arch.ipte_mutex); } -static void ipte_unlock_simple(struct kvm_vcpu *vcpu) +static void ipte_unlock_simple(struct kvm *kvm) { union ipte_control old, new, *ic; - mutex_lock(&vcpu->kvm->arch.ipte_mutex); - vcpu->kvm->arch.ipte_lock_count--; - if (vcpu->kvm->arch.ipte_lock_count) + mutex_lock(&kvm->arch.ipte_mutex); + kvm->arch.ipte_lock_count--; + if (kvm->arch.ipte_lock_count) goto out; - read_lock(&vcpu->kvm->arch.sca_lock); - ic = kvm_s390_get_ipte_control(vcpu->kvm); + read_lock(&kvm->arch.sca_lock); + ic = kvm_s390_get_ipte_control(kvm); + old = READ_ONCE(*ic); do { - old = READ_ONCE(*ic); new = old; new.k = 0; - } while (cmpxchg(&ic->val, old.val, new.val) != old.val); - read_unlock(&vcpu->kvm->arch.sca_lock); - wake_up(&vcpu->kvm->arch.ipte_wq); + } while (!try_cmpxchg(&ic->val, &old.val, new.val)); + read_unlock(&kvm->arch.sca_lock); + wake_up(&kvm->arch.ipte_wq); out: - mutex_unlock(&vcpu->kvm->arch.ipte_mutex); + mutex_unlock(&kvm->arch.ipte_mutex); } -static void ipte_lock_siif(struct kvm_vcpu *vcpu) +static void ipte_lock_siif(struct kvm *kvm) { union ipte_control old, new, *ic; retry: - read_lock(&vcpu->kvm->arch.sca_lock); - ic = kvm_s390_get_ipte_control(vcpu->kvm); + read_lock(&kvm->arch.sca_lock); + ic = kvm_s390_get_ipte_control(kvm); + old = READ_ONCE(*ic); do { - old = READ_ONCE(*ic); if (old.kg) { - read_unlock(&vcpu->kvm->arch.sca_lock); + read_unlock(&kvm->arch.sca_lock); cond_resched(); goto retry; } new = old; new.k = 1; new.kh++; - } while (cmpxchg(&ic->val, old.val, new.val) != old.val); - read_unlock(&vcpu->kvm->arch.sca_lock); + } while (!try_cmpxchg(&ic->val, &old.val, new.val)); + read_unlock(&kvm->arch.sca_lock); } -static void ipte_unlock_siif(struct kvm_vcpu *vcpu) +static void ipte_unlock_siif(struct kvm *kvm) { union ipte_control old, new, *ic; - read_lock(&vcpu->kvm->arch.sca_lock); - ic = kvm_s390_get_ipte_control(vcpu->kvm); + read_lock(&kvm->arch.sca_lock); + ic = kvm_s390_get_ipte_control(kvm); + old = READ_ONCE(*ic); do { - old = READ_ONCE(*ic); new = old; new.kh--; if (!new.kh) new.k = 0; - } while (cmpxchg(&ic->val, old.val, new.val) != old.val); - read_unlock(&vcpu->kvm->arch.sca_lock); + } while (!try_cmpxchg(&ic->val, &old.val, new.val)); + read_unlock(&kvm->arch.sca_lock); if (!new.kh) - wake_up(&vcpu->kvm->arch.ipte_wq); + wake_up(&kvm->arch.ipte_wq); } -void ipte_lock(struct kvm_vcpu *vcpu) +void ipte_lock(struct kvm *kvm) { - if (vcpu->arch.sie_block->eca & ECA_SII) - ipte_lock_siif(vcpu); + if (sclp.has_siif) + ipte_lock_siif(kvm); else - ipte_lock_simple(vcpu); + ipte_lock_simple(kvm); } -void ipte_unlock(struct kvm_vcpu *vcpu) +void ipte_unlock(struct kvm *kvm) { - if (vcpu->arch.sie_block->eca & ECA_SII) - ipte_unlock_siif(vcpu); + if (sclp.has_siif) + ipte_unlock_siif(kvm); else - ipte_unlock_simple(vcpu); + ipte_unlock_simple(kvm); } static int ar_translation(struct kvm_vcpu *vcpu, union asce *asce, u8 ar, @@ -391,7 +236,8 @@ static int ar_translation(struct kvm_vcpu *vcpu, union asce *asce, u8 ar, if (ar >= NUM_ACRS) return -EINVAL; - save_access_regs(vcpu->run->s.regs.acrs); + if (vcpu->arch.acrs_loaded) + save_access_regs(vcpu->run->s.regs.acrs); alet.val = vcpu->run->s.regs.acrs[ar]; if (ar == 0 || alet.val == 0) { @@ -466,64 +312,53 @@ static int ar_translation(struct kvm_vcpu *vcpu, union asce *asce, u8 ar, return 0; } -struct trans_exc_code_bits { - unsigned long addr : 52; /* Translation-exception Address */ - unsigned long fsi : 2; /* Access Exception Fetch/Store Indication */ - unsigned long : 2; - unsigned long b56 : 1; - unsigned long : 3; - unsigned long b60 : 1; - unsigned long b61 : 1; - unsigned long as : 2; /* ASCE Identifier */ -}; - -enum { - FSI_UNKNOWN = 0, /* Unknown wether fetch or store */ - FSI_STORE = 1, /* Exception was due to store operation */ - FSI_FETCH = 2 /* Exception was due to fetch operation */ -}; - enum prot_type { PROT_TYPE_LA = 0, PROT_TYPE_KEYC = 1, PROT_TYPE_ALC = 2, PROT_TYPE_DAT = 3, PROT_TYPE_IEP = 4, + /* Dummy value for passing an initialized value when code != PGM_PROTECTION */ + PROT_TYPE_DUMMY, }; static int trans_exc_ending(struct kvm_vcpu *vcpu, int code, unsigned long gva, u8 ar, enum gacc_mode mode, enum prot_type prot, bool terminate) { struct kvm_s390_pgm_info *pgm = &vcpu->arch.pgm; - struct trans_exc_code_bits *tec; + union teid *teid; memset(pgm, 0, sizeof(*pgm)); pgm->code = code; - tec = (struct trans_exc_code_bits *)&pgm->trans_exc_code; + teid = (union teid *)&pgm->trans_exc_code; switch (code) { case PGM_PROTECTION: switch (prot) { + case PROT_TYPE_DUMMY: + /* We should never get here, acts like termination */ + WARN_ON_ONCE(1); + break; case PROT_TYPE_IEP: - tec->b61 = 1; + teid->b61 = 1; fallthrough; case PROT_TYPE_LA: - tec->b56 = 1; + teid->b56 = 1; break; case PROT_TYPE_KEYC: - tec->b60 = 1; + teid->b60 = 1; break; case PROT_TYPE_ALC: - tec->b60 = 1; + teid->b60 = 1; fallthrough; case PROT_TYPE_DAT: - tec->b61 = 1; + teid->b61 = 1; break; } if (terminate) { - tec->b56 = 0; - tec->b60 = 0; - tec->b61 = 0; + teid->b56 = 0; + teid->b60 = 0; + teid->b61 = 0; } fallthrough; case PGM_ASCE_TYPE: @@ -537,9 +372,9 @@ static int trans_exc_ending(struct kvm_vcpu *vcpu, int code, unsigned long gva, * exc_access_id has to be set to 0 for some instructions. Both * cases have to be handled by the caller. */ - tec->addr = gva >> PAGE_SHIFT; - tec->fsi = mode == GACC_STORE ? FSI_STORE : FSI_FETCH; - tec->as = psw_bits(vcpu->arch.sie_block->gpsw).as; + teid->addr = gva >> PAGE_SHIFT; + teid->fsi = mode == GACC_STORE ? TEID_FSI_STORE : TEID_FSI_FETCH; + teid->as = psw_bits(vcpu->arch.sie_block->gpsw).as; fallthrough; case PGM_ALEN_TRANSLATION: case PGM_ALE_SEQUENCE: @@ -619,7 +454,7 @@ static int deref_table(struct kvm *kvm, unsigned long gpa, unsigned long *val) * Returns: - zero on success; @gpa contains the resulting absolute address * - a negative value if guest access failed due to e.g. broken * guest mapping - * - a positve value if an access exception happened. In this case + * - a positive value if an access exception happened. In this case * the returned value is the program interruption code as defined * by the architecture */ @@ -642,7 +477,7 @@ static unsigned long guest_translate(struct kvm_vcpu *vcpu, unsigned long gva, iep = ctlreg0.iep && test_kvm_facility(vcpu->kvm, 130); if (asce.r) goto real_address; - ptr = asce.origin * PAGE_SIZE; + ptr = asce.rsto * PAGE_SIZE; switch (asce.dt) { case ASCE_TYPE_REGION1: if (vaddr.rfx01 > asce.tl) @@ -675,7 +510,7 @@ static unsigned long guest_translate(struct kvm_vcpu *vcpu, unsigned long gva, case ASCE_TYPE_REGION1: { union region1_table_entry rfte; - if (kvm_is_error_gpa(vcpu->kvm, ptr)) + if (!kvm_is_gpa_in_memslot(vcpu->kvm, ptr)) return PGM_ADDRESSING; if (deref_table(vcpu->kvm, ptr, &rfte.val)) return -EFAULT; @@ -693,7 +528,7 @@ static unsigned long guest_translate(struct kvm_vcpu *vcpu, unsigned long gva, case ASCE_TYPE_REGION2: { union region2_table_entry rste; - if (kvm_is_error_gpa(vcpu->kvm, ptr)) + if (!kvm_is_gpa_in_memslot(vcpu->kvm, ptr)) return PGM_ADDRESSING; if (deref_table(vcpu->kvm, ptr, &rste.val)) return -EFAULT; @@ -711,7 +546,7 @@ static unsigned long guest_translate(struct kvm_vcpu *vcpu, unsigned long gva, case ASCE_TYPE_REGION3: { union region3_table_entry rtte; - if (kvm_is_error_gpa(vcpu->kvm, ptr)) + if (!kvm_is_gpa_in_memslot(vcpu->kvm, ptr)) return PGM_ADDRESSING; if (deref_table(vcpu->kvm, ptr, &rtte.val)) return -EFAULT; @@ -739,7 +574,7 @@ static unsigned long guest_translate(struct kvm_vcpu *vcpu, unsigned long gva, case ASCE_TYPE_SEGMENT: { union segment_table_entry ste; - if (kvm_is_error_gpa(vcpu->kvm, ptr)) + if (!kvm_is_gpa_in_memslot(vcpu->kvm, ptr)) return PGM_ADDRESSING; if (deref_table(vcpu->kvm, ptr, &ste.val)) return -EFAULT; @@ -759,7 +594,7 @@ static unsigned long guest_translate(struct kvm_vcpu *vcpu, unsigned long gva, ptr = ste.fc0.pto * (PAGE_SIZE / 2) + vaddr.px * 8; } } - if (kvm_is_error_gpa(vcpu->kvm, ptr)) + if (!kvm_is_gpa_in_memslot(vcpu->kvm, ptr)) return PGM_ADDRESSING; if (deref_table(vcpu->kvm, ptr, &pte.val)) return -EFAULT; @@ -781,7 +616,7 @@ absolute_address: *prot = PROT_TYPE_IEP; return PGM_PROTECTION; } - if (kvm_is_error_gpa(vcpu->kvm, raddr.addr)) + if (!kvm_is_gpa_in_memslot(vcpu->kvm, raddr.addr)) return PGM_ADDRESSING; *gpa = raddr.addr; return 0; @@ -968,8 +803,10 @@ static int guest_range_to_gpas(struct kvm_vcpu *vcpu, unsigned long ga, u8 ar, return rc; } else { gpa = kvm_s390_real_to_abs(vcpu, ga); - if (kvm_is_error_gpa(vcpu->kvm, gpa)) + if (!kvm_is_gpa_in_memslot(vcpu->kvm, gpa)) { rc = PGM_ADDRESSING; + prot = PROT_TYPE_DUMMY; + } } if (rc) return trans_exc(vcpu, rc, ga, ar, mode, prot); @@ -993,6 +830,8 @@ static int access_guest_page(struct kvm *kvm, enum gacc_mode mode, gpa_t gpa, const gfn_t gfn = gpa_to_gfn(gpa); int rc; + if (!gfn_to_memslot(kvm, gfn)) + return PGM_ADDRESSING; if (mode == GACC_STORE) rc = kvm_write_guest_page(kvm, gfn, data, offset, len); else @@ -1086,7 +925,7 @@ int access_guest_with_key(struct kvm_vcpu *vcpu, unsigned long ga, u8 ar, try_storage_prot_override = storage_prot_override_applicable(vcpu); need_ipte_lock = psw_bits(*psw).dat && !asce.r; if (need_ipte_lock) - ipte_lock(vcpu); + ipte_lock(vcpu->kvm); /* * Since we do the access further down ultimately via a move instruction * that does key checking and returns an error in case of a protection @@ -1112,8 +951,6 @@ int access_guest_with_key(struct kvm_vcpu *vcpu, unsigned long ga, u8 ar, if (rc == PGM_PROTECTION && try_storage_prot_override) rc = access_guest_page_with_key(vcpu->kvm, mode, gpas[idx], data, fragment_len, PAGE_SPO_ACC); - if (rc == PGM_PROTECTION) - prot = PROT_TYPE_KEYC; if (rc) break; len -= fragment_len; @@ -1123,11 +960,15 @@ int access_guest_with_key(struct kvm_vcpu *vcpu, unsigned long ga, u8 ar, if (rc > 0) { bool terminate = (mode == GACC_STORE) && (idx > 0); + if (rc == PGM_PROTECTION) + prot = PROT_TYPE_KEYC; + else + prot = PROT_TYPE_DUMMY; rc = trans_exc_ending(vcpu, rc, ga, ar, mode, prot, terminate); } out_unlock: if (need_ipte_lock) - ipte_unlock(vcpu); + ipte_unlock(vcpu->kvm); if (nr_pages > ARRAY_SIZE(gpa_array)) vfree(gpas); return rc; @@ -1148,10 +989,121 @@ int access_guest_real(struct kvm_vcpu *vcpu, unsigned long gra, gra += fragment_len; data += fragment_len; } + if (rc > 0) + vcpu->arch.pgm.code = rc; return rc; } /** + * cmpxchg_guest_abs_with_key() - Perform cmpxchg on guest absolute address. + * @kvm: Virtual machine instance. + * @gpa: Absolute guest address of the location to be changed. + * @len: Operand length of the cmpxchg, required: 1 <= len <= 16. Providing a + * non power of two will result in failure. + * @old_addr: Pointer to old value. If the location at @gpa contains this value, + * the exchange will succeed. After calling cmpxchg_guest_abs_with_key() + * *@old_addr contains the value at @gpa before the attempt to + * exchange the value. + * @new: The value to place at @gpa. + * @access_key: The access key to use for the guest access. + * @success: output value indicating if an exchange occurred. + * + * Atomically exchange the value at @gpa by @new, if it contains *@old. + * Honors storage keys. + * + * Return: * 0: successful exchange + * * >0: a program interruption code indicating the reason cmpxchg could + * not be attempted + * * -EINVAL: address misaligned or len not power of two + * * -EAGAIN: transient failure (len 1 or 2) + * * -EOPNOTSUPP: read-only memslot (should never occur) + */ +int cmpxchg_guest_abs_with_key(struct kvm *kvm, gpa_t gpa, int len, + __uint128_t *old_addr, __uint128_t new, + u8 access_key, bool *success) +{ + gfn_t gfn = gpa_to_gfn(gpa); + struct kvm_memory_slot *slot = gfn_to_memslot(kvm, gfn); + bool writable; + hva_t hva; + int ret; + + if (!IS_ALIGNED(gpa, len)) + return -EINVAL; + + hva = gfn_to_hva_memslot_prot(slot, gfn, &writable); + if (kvm_is_error_hva(hva)) + return PGM_ADDRESSING; + /* + * Check if it's a read-only memslot, even though that cannot occur + * since those are unsupported. + * Don't try to actually handle that case. + */ + if (!writable) + return -EOPNOTSUPP; + + hva += offset_in_page(gpa); + /* + * The cmpxchg_user_key macro depends on the type of "old", so we need + * a case for each valid length and get some code duplication as long + * as we don't introduce a new macro. + */ + switch (len) { + case 1: { + u8 old; + + ret = cmpxchg_user_key((u8 __user *)hva, &old, *old_addr, new, access_key); + *success = !ret && old == *old_addr; + *old_addr = old; + break; + } + case 2: { + u16 old; + + ret = cmpxchg_user_key((u16 __user *)hva, &old, *old_addr, new, access_key); + *success = !ret && old == *old_addr; + *old_addr = old; + break; + } + case 4: { + u32 old; + + ret = cmpxchg_user_key((u32 __user *)hva, &old, *old_addr, new, access_key); + *success = !ret && old == *old_addr; + *old_addr = old; + break; + } + case 8: { + u64 old; + + ret = cmpxchg_user_key((u64 __user *)hva, &old, *old_addr, new, access_key); + *success = !ret && old == *old_addr; + *old_addr = old; + break; + } + case 16: { + __uint128_t old; + + ret = cmpxchg_user_key((__uint128_t __user *)hva, &old, *old_addr, new, access_key); + *success = !ret && old == *old_addr; + *old_addr = old; + break; + } + default: + return -EINVAL; + } + if (*success) + mark_page_dirty_in_slot(kvm, slot, gfn); + /* + * Assume that the fault is caused by protection, either key protection + * or user page write protection. + */ + if (ret == -EFAULT) + ret = PGM_PROTECTION; + return ret; +} + +/** * guest_translate_address_with_key - translate guest logical into guest absolute address * @vcpu: virtual cpu * @gva: Guest virtual address @@ -1199,10 +1151,10 @@ int check_gva_range(struct kvm_vcpu *vcpu, unsigned long gva, u8 ar, rc = get_vcpu_asce(vcpu, &asce, gva, ar, mode); if (rc) return rc; - ipte_lock(vcpu); + ipte_lock(vcpu->kvm); rc = guest_range_to_gpas(vcpu, gva, ar, NULL, length, asce, mode, access_key); - ipte_unlock(vcpu); + ipte_unlock(vcpu->kvm); return rc; } @@ -1263,6 +1215,7 @@ static int kvm_s390_shadow_tables(struct gmap *sg, unsigned long saddr, unsigned long *pgt, int *dat_protection, int *fake) { + struct kvm *kvm; struct gmap *parent; union asce asce; union vaddress vaddr; @@ -1271,10 +1224,11 @@ static int kvm_s390_shadow_tables(struct gmap *sg, unsigned long saddr, *fake = 0; *dat_protection = 0; + kvm = sg->private; parent = sg->parent; vaddr.addr = saddr; asce.val = sg->orig_asce; - ptr = asce.origin * PAGE_SIZE; + ptr = asce.rsto * PAGE_SIZE; if (asce.r) { *fake = 1; ptr = 0; @@ -1331,6 +1285,7 @@ shadow_r2t: rc = gmap_shadow_r2t(sg, saddr, rfte.val, *fake); if (rc) return rc; + kvm->stat.gmap_shadow_r1_entry++; } fallthrough; case ASCE_TYPE_REGION2: { @@ -1359,6 +1314,7 @@ shadow_r3t: rc = gmap_shadow_r3t(sg, saddr, rste.val, *fake); if (rc) return rc; + kvm->stat.gmap_shadow_r2_entry++; } fallthrough; case ASCE_TYPE_REGION3: { @@ -1396,6 +1352,7 @@ shadow_sgt: rc = gmap_shadow_sgt(sg, saddr, rtte.val, *fake); if (rc) return rc; + kvm->stat.gmap_shadow_r3_entry++; } fallthrough; case ASCE_TYPE_SEGMENT: { @@ -1429,6 +1386,7 @@ shadow_pgt: rc = gmap_shadow_pgt(sg, saddr, ste.val, *fake); if (rc) return rc; + kvm->stat.gmap_shadow_sg_entry++; } } /* Return the parent address of the page table */ @@ -1437,6 +1395,44 @@ shadow_pgt: } /** + * shadow_pgt_lookup() - find a shadow page table + * @sg: pointer to the shadow guest address space structure + * @saddr: the address in the shadow aguest address space + * @pgt: parent gmap address of the page table to get shadowed + * @dat_protection: if the pgtable is marked as protected by dat + * @fake: pgt references contiguous guest memory block, not a pgtable + * + * Returns 0 if the shadow page table was found and -EAGAIN if the page + * table was not found. + * + * Called with sg->mm->mmap_lock in read. + */ +static int shadow_pgt_lookup(struct gmap *sg, unsigned long saddr, unsigned long *pgt, + int *dat_protection, int *fake) +{ + unsigned long pt_index; + unsigned long *table; + struct page *page; + int rc; + + spin_lock(&sg->guest_table_lock); + table = gmap_table_walk(sg, saddr, 1); /* get segment pointer */ + if (table && !(*table & _SEGMENT_ENTRY_INVALID)) { + /* Shadow page tables are full pages (pte+pgste) */ + page = pfn_to_page(*table >> PAGE_SHIFT); + pt_index = gmap_pgste_get_pgt_addr(page_to_virt(page)); + *pgt = pt_index & ~GMAP_SHADOW_FAKE_TABLE; + *dat_protection = !!(*table & _SEGMENT_ENTRY_PROTECT); + *fake = !!(pt_index & GMAP_SHADOW_FAKE_TABLE); + rc = 0; + } else { + rc = -EAGAIN; + } + spin_unlock(&sg->guest_table_lock); + return rc; +} + +/** * kvm_s390_shadow_fault - handle fault on a shadow page table * @vcpu: virtual cpu * @sg: pointer to the shadow guest address space structure @@ -1459,15 +1455,18 @@ int kvm_s390_shadow_fault(struct kvm_vcpu *vcpu, struct gmap *sg, int dat_protection, fake; int rc; + if (KVM_BUG_ON(!gmap_is_shadow(sg), vcpu->kvm)) + return -EFAULT; + mmap_read_lock(sg->mm); /* * We don't want any guest-2 tables to change - so the parent * tables/pointers we read stay valid - unshadowing is however * always possible - only guest_table_lock protects us. */ - ipte_lock(vcpu); + ipte_lock(vcpu->kvm); - rc = gmap_shadow_pgt_lookup(sg, saddr, &pgt, &dat_protection, &fake); + rc = shadow_pgt_lookup(sg, saddr, &pgt, &dat_protection, &fake); if (rc) rc = kvm_s390_shadow_tables(sg, saddr, &pgt, &dat_protection, &fake); @@ -1499,7 +1498,8 @@ shadow_page: pte.p |= dat_protection; if (!rc) rc = gmap_shadow_page(sg, saddr, __pte(pte.val)); - ipte_unlock(vcpu); + vcpu->kvm->stat.gmap_shadow_pg_entry++; + ipte_unlock(vcpu->kvm); mmap_read_unlock(sg->mm); return rc; } |