diff options
Diffstat (limited to 'arch/s390/kvm/gaccess.c')
-rw-r--r-- | arch/s390/kvm/gaccess.c | 589 |
1 files changed, 450 insertions, 139 deletions
diff --git a/arch/s390/kvm/gaccess.c b/arch/s390/kvm/gaccess.c index 07d30ffcfa41..0243b6e38d36 100644 --- a/arch/s390/kvm/gaccess.c +++ b/arch/s390/kvm/gaccess.c @@ -9,8 +9,9 @@ #include <linux/vmalloc.h> #include <linux/mm_types.h> #include <linux/err.h> +#include <linux/pgtable.h> +#include <linux/bitfield.h> -#include <asm/pgtable.h> #include <asm/gmap.h> #include "kvm-s390.h" #include "gaccess.h" @@ -261,77 +262,77 @@ struct aste { /* .. more fields there */ }; -int ipte_lock_held(struct kvm_vcpu *vcpu) +int ipte_lock_held(struct kvm *kvm) { - if (vcpu->arch.sie_block->eca & ECA_SII) { + if (sclp.has_siif) { int rc; - read_lock(&vcpu->kvm->arch.sca_lock); - rc = kvm_s390_get_ipte_control(vcpu->kvm)->kh != 0; - read_unlock(&vcpu->kvm->arch.sca_lock); + read_lock(&kvm->arch.sca_lock); + rc = kvm_s390_get_ipte_control(kvm)->kh != 0; + read_unlock(&kvm->arch.sca_lock); return rc; } - return vcpu->kvm->arch.ipte_lock_count != 0; + return kvm->arch.ipte_lock_count != 0; } -static void ipte_lock_simple(struct kvm_vcpu *vcpu) +static void ipte_lock_simple(struct kvm *kvm) { union ipte_control old, new, *ic; - mutex_lock(&vcpu->kvm->arch.ipte_mutex); - vcpu->kvm->arch.ipte_lock_count++; - if (vcpu->kvm->arch.ipte_lock_count > 1) + mutex_lock(&kvm->arch.ipte_mutex); + kvm->arch.ipte_lock_count++; + if (kvm->arch.ipte_lock_count > 1) goto out; retry: - read_lock(&vcpu->kvm->arch.sca_lock); - ic = kvm_s390_get_ipte_control(vcpu->kvm); + read_lock(&kvm->arch.sca_lock); + ic = kvm_s390_get_ipte_control(kvm); do { old = READ_ONCE(*ic); if (old.k) { - read_unlock(&vcpu->kvm->arch.sca_lock); + read_unlock(&kvm->arch.sca_lock); cond_resched(); goto retry; } new = old; new.k = 1; } while (cmpxchg(&ic->val, old.val, new.val) != old.val); - read_unlock(&vcpu->kvm->arch.sca_lock); + read_unlock(&kvm->arch.sca_lock); out: - mutex_unlock(&vcpu->kvm->arch.ipte_mutex); + mutex_unlock(&kvm->arch.ipte_mutex); } -static void ipte_unlock_simple(struct kvm_vcpu *vcpu) +static void ipte_unlock_simple(struct kvm *kvm) { union ipte_control old, new, *ic; - mutex_lock(&vcpu->kvm->arch.ipte_mutex); - vcpu->kvm->arch.ipte_lock_count--; - if (vcpu->kvm->arch.ipte_lock_count) + mutex_lock(&kvm->arch.ipte_mutex); + kvm->arch.ipte_lock_count--; + if (kvm->arch.ipte_lock_count) goto out; - read_lock(&vcpu->kvm->arch.sca_lock); - ic = kvm_s390_get_ipte_control(vcpu->kvm); + read_lock(&kvm->arch.sca_lock); + ic = kvm_s390_get_ipte_control(kvm); do { old = READ_ONCE(*ic); new = old; new.k = 0; } while (cmpxchg(&ic->val, old.val, new.val) != old.val); - read_unlock(&vcpu->kvm->arch.sca_lock); - wake_up(&vcpu->kvm->arch.ipte_wq); + read_unlock(&kvm->arch.sca_lock); + wake_up(&kvm->arch.ipte_wq); out: - mutex_unlock(&vcpu->kvm->arch.ipte_mutex); + mutex_unlock(&kvm->arch.ipte_mutex); } -static void ipte_lock_siif(struct kvm_vcpu *vcpu) +static void ipte_lock_siif(struct kvm *kvm) { union ipte_control old, new, *ic; retry: - read_lock(&vcpu->kvm->arch.sca_lock); - ic = kvm_s390_get_ipte_control(vcpu->kvm); + read_lock(&kvm->arch.sca_lock); + ic = kvm_s390_get_ipte_control(kvm); do { old = READ_ONCE(*ic); if (old.kg) { - read_unlock(&vcpu->kvm->arch.sca_lock); + read_unlock(&kvm->arch.sca_lock); cond_resched(); goto retry; } @@ -339,15 +340,15 @@ retry: new.k = 1; new.kh++; } while (cmpxchg(&ic->val, old.val, new.val) != old.val); - read_unlock(&vcpu->kvm->arch.sca_lock); + read_unlock(&kvm->arch.sca_lock); } -static void ipte_unlock_siif(struct kvm_vcpu *vcpu) +static void ipte_unlock_siif(struct kvm *kvm) { union ipte_control old, new, *ic; - read_lock(&vcpu->kvm->arch.sca_lock); - ic = kvm_s390_get_ipte_control(vcpu->kvm); + read_lock(&kvm->arch.sca_lock); + ic = kvm_s390_get_ipte_control(kvm); do { old = READ_ONCE(*ic); new = old; @@ -355,25 +356,25 @@ static void ipte_unlock_siif(struct kvm_vcpu *vcpu) if (!new.kh) new.k = 0; } while (cmpxchg(&ic->val, old.val, new.val) != old.val); - read_unlock(&vcpu->kvm->arch.sca_lock); + read_unlock(&kvm->arch.sca_lock); if (!new.kh) - wake_up(&vcpu->kvm->arch.ipte_wq); + wake_up(&kvm->arch.ipte_wq); } -void ipte_lock(struct kvm_vcpu *vcpu) +void ipte_lock(struct kvm *kvm) { - if (vcpu->arch.sie_block->eca & ECA_SII) - ipte_lock_siif(vcpu); + if (sclp.has_siif) + ipte_lock_siif(kvm); else - ipte_lock_simple(vcpu); + ipte_lock_simple(kvm); } -void ipte_unlock(struct kvm_vcpu *vcpu) +void ipte_unlock(struct kvm *kvm) { - if (vcpu->arch.sie_block->eca & ECA_SII) - ipte_unlock_siif(vcpu); + if (sclp.has_siif) + ipte_unlock_siif(kvm); else - ipte_unlock_simple(vcpu); + ipte_unlock_simple(kvm); } static int ar_translation(struct kvm_vcpu *vcpu, union asce *asce, u8 ar, @@ -488,10 +489,12 @@ enum prot_type { PROT_TYPE_ALC = 2, PROT_TYPE_DAT = 3, PROT_TYPE_IEP = 4, + /* Dummy value for passing an initialized value when code != PGM_PROTECTION */ + PROT_NONE, }; -static int trans_exc(struct kvm_vcpu *vcpu, int code, unsigned long gva, - u8 ar, enum gacc_mode mode, enum prot_type prot) +static int trans_exc_ending(struct kvm_vcpu *vcpu, int code, unsigned long gva, u8 ar, + enum gacc_mode mode, enum prot_type prot, bool terminate) { struct kvm_s390_pgm_info *pgm = &vcpu->arch.pgm; struct trans_exc_code_bits *tec; @@ -503,9 +506,13 @@ static int trans_exc(struct kvm_vcpu *vcpu, int code, unsigned long gva, switch (code) { case PGM_PROTECTION: switch (prot) { + case PROT_NONE: + /* We should never get here, acts like termination */ + WARN_ON_ONCE(1); + break; case PROT_TYPE_IEP: tec->b61 = 1; - /* FALL THROUGH */ + fallthrough; case PROT_TYPE_LA: tec->b56 = 1; break; @@ -514,12 +521,17 @@ static int trans_exc(struct kvm_vcpu *vcpu, int code, unsigned long gva, break; case PROT_TYPE_ALC: tec->b60 = 1; - /* FALL THROUGH */ + fallthrough; case PROT_TYPE_DAT: tec->b61 = 1; break; } - /* FALL THROUGH */ + if (terminate) { + tec->b56 = 0; + tec->b60 = 0; + tec->b61 = 0; + } + fallthrough; case PGM_ASCE_TYPE: case PGM_PAGE_TRANSLATION: case PGM_REGION_FIRST_TRANS: @@ -534,7 +546,7 @@ static int trans_exc(struct kvm_vcpu *vcpu, int code, unsigned long gva, tec->addr = gva >> PAGE_SHIFT; tec->fsi = mode == GACC_STORE ? FSI_STORE : FSI_FETCH; tec->as = psw_bits(vcpu->arch.sie_block->gpsw).as; - /* FALL THROUGH */ + fallthrough; case PGM_ALEN_TRANSLATION: case PGM_ALE_SEQUENCE: case PGM_ASTE_VALIDITY: @@ -551,6 +563,12 @@ static int trans_exc(struct kvm_vcpu *vcpu, int code, unsigned long gva, return code; } +static int trans_exc(struct kvm_vcpu *vcpu, int code, unsigned long gva, u8 ar, + enum gacc_mode mode, enum prot_type prot) +{ + return trans_exc_ending(vcpu, code, gva, ar, mode, prot, false); +} + static int get_vcpu_asce(struct kvm_vcpu *vcpu, union asce *asce, unsigned long ga, u8 ar, enum gacc_mode mode) { @@ -677,7 +695,7 @@ static unsigned long guest_translate(struct kvm_vcpu *vcpu, unsigned long gva, dat_protection |= rfte.p; ptr = rfte.rto * PAGE_SIZE + vaddr.rsx * 8; } - /* fallthrough */ + fallthrough; case ASCE_TYPE_REGION2: { union region2_table_entry rste; @@ -695,7 +713,7 @@ static unsigned long guest_translate(struct kvm_vcpu *vcpu, unsigned long gva, dat_protection |= rste.p; ptr = rste.rto * PAGE_SIZE + vaddr.rtx * 8; } - /* fallthrough */ + fallthrough; case ASCE_TYPE_REGION3: { union region3_table_entry rtte; @@ -723,7 +741,7 @@ static unsigned long guest_translate(struct kvm_vcpu *vcpu, unsigned long gva, dat_protection |= rtte.fc0.p; ptr = rtte.fc0.sto * PAGE_SIZE + vaddr.sx * 8; } - /* fallthrough */ + fallthrough; case ASCE_TYPE_SEGMENT: { union segment_table_entry ste; @@ -794,48 +812,270 @@ static int low_address_protection_enabled(struct kvm_vcpu *vcpu, return 1; } -static int guest_page_range(struct kvm_vcpu *vcpu, unsigned long ga, u8 ar, - unsigned long *pages, unsigned long nr_pages, - const union asce asce, enum gacc_mode mode) +static int vm_check_access_key(struct kvm *kvm, u8 access_key, + enum gacc_mode mode, gpa_t gpa) +{ + u8 storage_key, access_control; + bool fetch_protected; + unsigned long hva; + int r; + + if (access_key == 0) + return 0; + + hva = gfn_to_hva(kvm, gpa_to_gfn(gpa)); + if (kvm_is_error_hva(hva)) + return PGM_ADDRESSING; + + mmap_read_lock(current->mm); + r = get_guest_storage_key(current->mm, hva, &storage_key); + mmap_read_unlock(current->mm); + if (r) + return r; + access_control = FIELD_GET(_PAGE_ACC_BITS, storage_key); + if (access_control == access_key) + return 0; + fetch_protected = storage_key & _PAGE_FP_BIT; + if ((mode == GACC_FETCH || mode == GACC_IFETCH) && !fetch_protected) + return 0; + return PGM_PROTECTION; +} + +static bool fetch_prot_override_applicable(struct kvm_vcpu *vcpu, enum gacc_mode mode, + union asce asce) +{ + psw_t *psw = &vcpu->arch.sie_block->gpsw; + unsigned long override; + + if (mode == GACC_FETCH || mode == GACC_IFETCH) { + /* check if fetch protection override enabled */ + override = vcpu->arch.sie_block->gcr[0]; + override &= CR0_FETCH_PROTECTION_OVERRIDE; + /* not applicable if subject to DAT && private space */ + override = override && !(psw_bits(*psw).dat && asce.p); + return override; + } + return false; +} + +static bool fetch_prot_override_applies(unsigned long ga, unsigned int len) +{ + return ga < 2048 && ga + len <= 2048; +} + +static bool storage_prot_override_applicable(struct kvm_vcpu *vcpu) +{ + /* check if storage protection override enabled */ + return vcpu->arch.sie_block->gcr[0] & CR0_STORAGE_PROTECTION_OVERRIDE; +} + +static bool storage_prot_override_applies(u8 access_control) +{ + /* matches special storage protection override key (9) -> allow */ + return access_control == PAGE_SPO_ACC; +} + +static int vcpu_check_access_key(struct kvm_vcpu *vcpu, u8 access_key, + enum gacc_mode mode, union asce asce, gpa_t gpa, + unsigned long ga, unsigned int len) +{ + u8 storage_key, access_control; + unsigned long hva; + int r; + + /* access key 0 matches any storage key -> allow */ + if (access_key == 0) + return 0; + /* + * caller needs to ensure that gfn is accessible, so we can + * assume that this cannot fail + */ + hva = gfn_to_hva(vcpu->kvm, gpa_to_gfn(gpa)); + mmap_read_lock(current->mm); + r = get_guest_storage_key(current->mm, hva, &storage_key); + mmap_read_unlock(current->mm); + if (r) + return r; + access_control = FIELD_GET(_PAGE_ACC_BITS, storage_key); + /* access key matches storage key -> allow */ + if (access_control == access_key) + return 0; + if (mode == GACC_FETCH || mode == GACC_IFETCH) { + /* it is a fetch and fetch protection is off -> allow */ + if (!(storage_key & _PAGE_FP_BIT)) + return 0; + if (fetch_prot_override_applicable(vcpu, mode, asce) && + fetch_prot_override_applies(ga, len)) + return 0; + } + if (storage_prot_override_applicable(vcpu) && + storage_prot_override_applies(access_control)) + return 0; + return PGM_PROTECTION; +} + +/** + * guest_range_to_gpas() - Calculate guest physical addresses of page fragments + * covering a logical range + * @vcpu: virtual cpu + * @ga: guest address, start of range + * @ar: access register + * @gpas: output argument, may be NULL + * @len: length of range in bytes + * @asce: address-space-control element to use for translation + * @mode: access mode + * @access_key: access key to mach the range's storage keys against + * + * Translate a logical range to a series of guest absolute addresses, + * such that the concatenation of page fragments starting at each gpa make up + * the whole range. + * The translation is performed as if done by the cpu for the given @asce, @ar, + * @mode and state of the @vcpu. + * If the translation causes an exception, its program interruption code is + * returned and the &struct kvm_s390_pgm_info pgm member of @vcpu is modified + * such that a subsequent call to kvm_s390_inject_prog_vcpu() will inject + * a correct exception into the guest. + * The resulting gpas are stored into @gpas, unless it is NULL. + * + * Note: All fragments except the first one start at the beginning of a page. + * When deriving the boundaries of a fragment from a gpa, all but the last + * fragment end at the end of the page. + * + * Return: + * * 0 - success + * * <0 - translation could not be performed, for example if guest + * memory could not be accessed + * * >0 - an access exception occurred. In this case the returned value + * is the program interruption code and the contents of pgm may + * be used to inject an exception into the guest. + */ +static int guest_range_to_gpas(struct kvm_vcpu *vcpu, unsigned long ga, u8 ar, + unsigned long *gpas, unsigned long len, + const union asce asce, enum gacc_mode mode, + u8 access_key) { psw_t *psw = &vcpu->arch.sie_block->gpsw; + unsigned int offset = offset_in_page(ga); + unsigned int fragment_len; int lap_enabled, rc = 0; enum prot_type prot; + unsigned long gpa; lap_enabled = low_address_protection_enabled(vcpu, asce); - while (nr_pages) { + while (min(PAGE_SIZE - offset, len) > 0) { + fragment_len = min(PAGE_SIZE - offset, len); ga = kvm_s390_logical_to_effective(vcpu, ga); if (mode == GACC_STORE && lap_enabled && is_low_address(ga)) return trans_exc(vcpu, PGM_PROTECTION, ga, ar, mode, PROT_TYPE_LA); - ga &= PAGE_MASK; if (psw_bits(*psw).dat) { - rc = guest_translate(vcpu, ga, pages, asce, mode, &prot); + rc = guest_translate(vcpu, ga, &gpa, asce, mode, &prot); if (rc < 0) return rc; } else { - *pages = kvm_s390_real_to_abs(vcpu, ga); - if (kvm_is_error_gpa(vcpu->kvm, *pages)) + gpa = kvm_s390_real_to_abs(vcpu, ga); + if (kvm_is_error_gpa(vcpu->kvm, gpa)) { rc = PGM_ADDRESSING; + prot = PROT_NONE; + } } if (rc) return trans_exc(vcpu, rc, ga, ar, mode, prot); - ga += PAGE_SIZE; - pages++; - nr_pages--; + rc = vcpu_check_access_key(vcpu, access_key, mode, asce, gpa, ga, + fragment_len); + if (rc) + return trans_exc(vcpu, rc, ga, ar, mode, PROT_TYPE_KEYC); + if (gpas) + *gpas++ = gpa; + offset = 0; + ga += fragment_len; + len -= fragment_len; } return 0; } -int access_guest(struct kvm_vcpu *vcpu, unsigned long ga, u8 ar, void *data, - unsigned long len, enum gacc_mode mode) +static int access_guest_page(struct kvm *kvm, enum gacc_mode mode, gpa_t gpa, + void *data, unsigned int len) +{ + const unsigned int offset = offset_in_page(gpa); + const gfn_t gfn = gpa_to_gfn(gpa); + int rc; + + if (mode == GACC_STORE) + rc = kvm_write_guest_page(kvm, gfn, data, offset, len); + else + rc = kvm_read_guest_page(kvm, gfn, data, offset, len); + return rc; +} + +static int +access_guest_page_with_key(struct kvm *kvm, enum gacc_mode mode, gpa_t gpa, + void *data, unsigned int len, u8 access_key) +{ + struct kvm_memory_slot *slot; + bool writable; + gfn_t gfn; + hva_t hva; + int rc; + + gfn = gpa >> PAGE_SHIFT; + slot = gfn_to_memslot(kvm, gfn); + hva = gfn_to_hva_memslot_prot(slot, gfn, &writable); + + if (kvm_is_error_hva(hva)) + return PGM_ADDRESSING; + /* + * Check if it's a ro memslot, even tho that can't occur (they're unsupported). + * Don't try to actually handle that case. + */ + if (!writable && mode == GACC_STORE) + return -EOPNOTSUPP; + hva += offset_in_page(gpa); + if (mode == GACC_STORE) + rc = copy_to_user_key((void __user *)hva, data, len, access_key); + else + rc = copy_from_user_key(data, (void __user *)hva, len, access_key); + if (rc) + return PGM_PROTECTION; + if (mode == GACC_STORE) + mark_page_dirty_in_slot(kvm, slot, gfn); + return 0; +} + +int access_guest_abs_with_key(struct kvm *kvm, gpa_t gpa, void *data, + unsigned long len, enum gacc_mode mode, u8 access_key) +{ + int offset = offset_in_page(gpa); + int fragment_len; + int rc; + + while (min(PAGE_SIZE - offset, len) > 0) { + fragment_len = min(PAGE_SIZE - offset, len); + rc = access_guest_page_with_key(kvm, mode, gpa, data, fragment_len, access_key); + if (rc) + return rc; + offset = 0; + len -= fragment_len; + data += fragment_len; + gpa += fragment_len; + } + return 0; +} + +int access_guest_with_key(struct kvm_vcpu *vcpu, unsigned long ga, u8 ar, + void *data, unsigned long len, enum gacc_mode mode, + u8 access_key) { psw_t *psw = &vcpu->arch.sie_block->gpsw; - unsigned long _len, nr_pages, gpa, idx; - unsigned long pages_array[2]; - unsigned long *pages; + unsigned long nr_pages, idx; + unsigned long gpa_array[2]; + unsigned int fragment_len; + unsigned long *gpas; + enum prot_type prot; int need_ipte_lock; union asce asce; + bool try_storage_prot_override; + bool try_fetch_prot_override; int rc; if (!len) @@ -845,55 +1085,90 @@ int access_guest(struct kvm_vcpu *vcpu, unsigned long ga, u8 ar, void *data, if (rc) return rc; nr_pages = (((ga & ~PAGE_MASK) + len - 1) >> PAGE_SHIFT) + 1; - pages = pages_array; - if (nr_pages > ARRAY_SIZE(pages_array)) - pages = vmalloc(array_size(nr_pages, sizeof(unsigned long))); - if (!pages) + gpas = gpa_array; + if (nr_pages > ARRAY_SIZE(gpa_array)) + gpas = vmalloc(array_size(nr_pages, sizeof(unsigned long))); + if (!gpas) return -ENOMEM; + try_fetch_prot_override = fetch_prot_override_applicable(vcpu, mode, asce); + try_storage_prot_override = storage_prot_override_applicable(vcpu); need_ipte_lock = psw_bits(*psw).dat && !asce.r; if (need_ipte_lock) - ipte_lock(vcpu); - rc = guest_page_range(vcpu, ga, ar, pages, nr_pages, asce, mode); - for (idx = 0; idx < nr_pages && !rc; idx++) { - gpa = *(pages + idx) + (ga & ~PAGE_MASK); - _len = min(PAGE_SIZE - (gpa & ~PAGE_MASK), len); - if (mode == GACC_STORE) - rc = kvm_write_guest(vcpu->kvm, gpa, data, _len); + ipte_lock(vcpu->kvm); + /* + * Since we do the access further down ultimately via a move instruction + * that does key checking and returns an error in case of a protection + * violation, we don't need to do the check during address translation. + * Skip it by passing access key 0, which matches any storage key, + * obviating the need for any further checks. As a result the check is + * handled entirely in hardware on access, we only need to take care to + * forego key protection checking if fetch protection override applies or + * retry with the special key 9 in case of storage protection override. + */ + rc = guest_range_to_gpas(vcpu, ga, ar, gpas, len, asce, mode, 0); + if (rc) + goto out_unlock; + for (idx = 0; idx < nr_pages; idx++) { + fragment_len = min(PAGE_SIZE - offset_in_page(gpas[idx]), len); + if (try_fetch_prot_override && fetch_prot_override_applies(ga, fragment_len)) { + rc = access_guest_page(vcpu->kvm, mode, gpas[idx], + data, fragment_len); + } else { + rc = access_guest_page_with_key(vcpu->kvm, mode, gpas[idx], + data, fragment_len, access_key); + } + if (rc == PGM_PROTECTION && try_storage_prot_override) + rc = access_guest_page_with_key(vcpu->kvm, mode, gpas[idx], + data, fragment_len, PAGE_SPO_ACC); + if (rc) + break; + len -= fragment_len; + data += fragment_len; + ga = kvm_s390_logical_to_effective(vcpu, ga + fragment_len); + } + if (rc > 0) { + bool terminate = (mode == GACC_STORE) && (idx > 0); + + if (rc == PGM_PROTECTION) + prot = PROT_TYPE_KEYC; else - rc = kvm_read_guest(vcpu->kvm, gpa, data, _len); - len -= _len; - ga += _len; - data += _len; + prot = PROT_NONE; + rc = trans_exc_ending(vcpu, rc, ga, ar, mode, prot, terminate); } +out_unlock: if (need_ipte_lock) - ipte_unlock(vcpu); - if (nr_pages > ARRAY_SIZE(pages_array)) - vfree(pages); + ipte_unlock(vcpu->kvm); + if (nr_pages > ARRAY_SIZE(gpa_array)) + vfree(gpas); return rc; } int access_guest_real(struct kvm_vcpu *vcpu, unsigned long gra, void *data, unsigned long len, enum gacc_mode mode) { - unsigned long _len, gpa; + unsigned int fragment_len; + unsigned long gpa; int rc = 0; while (len && !rc) { gpa = kvm_s390_real_to_abs(vcpu, gra); - _len = min(PAGE_SIZE - (gpa & ~PAGE_MASK), len); - if (mode) - rc = write_guest_abs(vcpu, gpa, data, _len); - else - rc = read_guest_abs(vcpu, gpa, data, _len); - len -= _len; - gra += _len; - data += _len; + fragment_len = min(PAGE_SIZE - offset_in_page(gpa), len); + rc = access_guest_page(vcpu->kvm, mode, gpa, data, fragment_len); + len -= fragment_len; + gra += fragment_len; + data += fragment_len; } return rc; } /** - * guest_translate_address - translate guest logical into guest absolute address + * guest_translate_address_with_key - translate guest logical into guest absolute address + * @vcpu: virtual cpu + * @gva: Guest virtual address + * @ar: Access register + * @gpa: Guest physical address + * @mode: Translation access mode + * @access_key: access key to mach the storage key with * * Parameter semantics are the same as the ones from guest_translate. * The memory contents at the guest address are not changed. @@ -901,11 +1176,10 @@ int access_guest_real(struct kvm_vcpu *vcpu, unsigned long gra, * Note: The IPTE lock is not taken during this function, so the caller * has to take care of this. */ -int guest_translate_address(struct kvm_vcpu *vcpu, unsigned long gva, u8 ar, - unsigned long *gpa, enum gacc_mode mode) +int guest_translate_address_with_key(struct kvm_vcpu *vcpu, unsigned long gva, u8 ar, + unsigned long *gpa, enum gacc_mode mode, + u8 access_key) { - psw_t *psw = &vcpu->arch.sie_block->gpsw; - enum prot_type prot; union asce asce; int rc; @@ -913,49 +1187,62 @@ int guest_translate_address(struct kvm_vcpu *vcpu, unsigned long gva, u8 ar, rc = get_vcpu_asce(vcpu, &asce, gva, ar, mode); if (rc) return rc; - if (is_low_address(gva) && low_address_protection_enabled(vcpu, asce)) { - if (mode == GACC_STORE) - return trans_exc(vcpu, PGM_PROTECTION, gva, 0, - mode, PROT_TYPE_LA); - } + return guest_range_to_gpas(vcpu, gva, ar, gpa, 1, asce, mode, + access_key); +} - if (psw_bits(*psw).dat && !asce.r) { /* Use DAT? */ - rc = guest_translate(vcpu, gva, gpa, asce, mode, &prot); - if (rc > 0) - return trans_exc(vcpu, rc, gva, 0, mode, prot); - } else { - *gpa = kvm_s390_real_to_abs(vcpu, gva); - if (kvm_is_error_gpa(vcpu->kvm, *gpa)) - return trans_exc(vcpu, rc, gva, PGM_ADDRESSING, mode, 0); - } +/** + * check_gva_range - test a range of guest virtual addresses for accessibility + * @vcpu: virtual cpu + * @gva: Guest virtual address + * @ar: Access register + * @length: Length of test range + * @mode: Translation access mode + * @access_key: access key to mach the storage keys with + */ +int check_gva_range(struct kvm_vcpu *vcpu, unsigned long gva, u8 ar, + unsigned long length, enum gacc_mode mode, u8 access_key) +{ + union asce asce; + int rc = 0; + + rc = get_vcpu_asce(vcpu, &asce, gva, ar, mode); + if (rc) + return rc; + ipte_lock(vcpu->kvm); + rc = guest_range_to_gpas(vcpu, gva, ar, NULL, length, asce, mode, + access_key); + ipte_unlock(vcpu->kvm); return rc; } /** - * check_gva_range - test a range of guest virtual addresses for accessibility + * check_gpa_range - test a range of guest physical addresses for accessibility + * @kvm: virtual machine instance + * @gpa: guest physical address + * @length: length of test range + * @mode: access mode to test, relevant for storage keys + * @access_key: access key to mach the storage keys with */ -int check_gva_range(struct kvm_vcpu *vcpu, unsigned long gva, u8 ar, - unsigned long length, enum gacc_mode mode) +int check_gpa_range(struct kvm *kvm, unsigned long gpa, unsigned long length, + enum gacc_mode mode, u8 access_key) { - unsigned long gpa; - unsigned long currlen; + unsigned int fragment_len; int rc = 0; - ipte_lock(vcpu); - while (length > 0 && !rc) { - currlen = min(length, PAGE_SIZE - (gva % PAGE_SIZE)); - rc = guest_translate_address(vcpu, gva, ar, &gpa, mode); - gva += currlen; - length -= currlen; + while (length && !rc) { + fragment_len = min(PAGE_SIZE - offset_in_page(gpa), length); + rc = vm_check_access_key(kvm, access_key, mode, gpa); + length -= fragment_len; + gpa += fragment_len; } - ipte_unlock(vcpu); - return rc; } /** * kvm_s390_check_low_addr_prot_real - check for low-address protection + * @vcpu: virtual cpu * @gra: Guest real address * * Checks whether an address is subject to low-address protection and set @@ -976,7 +1263,10 @@ int kvm_s390_check_low_addr_prot_real(struct kvm_vcpu *vcpu, unsigned long gra) * kvm_s390_shadow_tables - walk the guest page table and create shadow tables * @sg: pointer to the shadow guest address space structure * @saddr: faulting address in the shadow gmap - * @pgt: pointer to the page table address result + * @pgt: pointer to the beginning of the page table for the given address if + * successful (return value 0), or to the first invalid DAT entry in + * case of exceptions (return value > 0) + * @dat_protection: referenced memory is write protected * @fake: pgt references contiguous guest memory block, not a pgtable */ static int kvm_s390_shadow_tables(struct gmap *sg, unsigned long saddr, @@ -1034,6 +1324,7 @@ static int kvm_s390_shadow_tables(struct gmap *sg, unsigned long saddr, rfte.val = ptr; goto shadow_r2t; } + *pgt = ptr + vaddr.rfx * 8; rc = gmap_read_table(parent, ptr + vaddr.rfx * 8, &rfte.val); if (rc) return rc; @@ -1050,7 +1341,8 @@ shadow_r2t: rc = gmap_shadow_r2t(sg, saddr, rfte.val, *fake); if (rc) return rc; - } /* fallthrough */ + } + fallthrough; case ASCE_TYPE_REGION2: { union region2_table_entry rste; @@ -1059,6 +1351,7 @@ shadow_r2t: rste.val = ptr; goto shadow_r3t; } + *pgt = ptr + vaddr.rsx * 8; rc = gmap_read_table(parent, ptr + vaddr.rsx * 8, &rste.val); if (rc) return rc; @@ -1076,7 +1369,8 @@ shadow_r3t: rc = gmap_shadow_r3t(sg, saddr, rste.val, *fake); if (rc) return rc; - } /* fallthrough */ + } + fallthrough; case ASCE_TYPE_REGION3: { union region3_table_entry rtte; @@ -1085,6 +1379,7 @@ shadow_r3t: rtte.val = ptr; goto shadow_sgt; } + *pgt = ptr + vaddr.rtx * 8; rc = gmap_read_table(parent, ptr + vaddr.rtx * 8, &rtte.val); if (rc) return rc; @@ -1111,7 +1406,8 @@ shadow_sgt: rc = gmap_shadow_sgt(sg, saddr, rtte.val, *fake); if (rc) return rc; - } /* fallthrough */ + } + fallthrough; case ASCE_TYPE_SEGMENT: { union segment_table_entry ste; @@ -1120,6 +1416,7 @@ shadow_sgt: ste.val = ptr; goto shadow_pgt; } + *pgt = ptr + vaddr.sx * 8; rc = gmap_read_table(parent, ptr + vaddr.sx * 8, &ste.val); if (rc) return rc; @@ -1154,6 +1451,8 @@ shadow_pgt: * @vcpu: virtual cpu * @sg: pointer to the shadow guest address space structure * @saddr: faulting address in the shadow gmap + * @datptr: will contain the address of the faulting DAT table entry, or of + * the valid leaf, plus some flags * * Returns: - 0 if the shadow fault was successfully resolved * - > 0 (pgm exception code) on exceptions while faulting @@ -1162,21 +1461,21 @@ shadow_pgt: * - -ENOMEM if out of memory */ int kvm_s390_shadow_fault(struct kvm_vcpu *vcpu, struct gmap *sg, - unsigned long saddr) + unsigned long saddr, unsigned long *datptr) { union vaddress vaddr; union page_table_entry pte; - unsigned long pgt; + unsigned long pgt = 0; int dat_protection, fake; int rc; - down_read(&sg->mm->mmap_sem); + mmap_read_lock(sg->mm); /* * We don't want any guest-2 tables to change - so the parent * tables/pointers we read stay valid - unshadowing is however * always possible - only guest_table_lock protects us. */ - ipte_lock(vcpu); + ipte_lock(vcpu->kvm); rc = gmap_shadow_pgt_lookup(sg, saddr, &pgt, &dat_protection, &fake); if (rc) @@ -1188,8 +1487,20 @@ int kvm_s390_shadow_fault(struct kvm_vcpu *vcpu, struct gmap *sg, pte.val = pgt + vaddr.px * PAGE_SIZE; goto shadow_page; } - if (!rc) - rc = gmap_read_table(sg->parent, pgt + vaddr.px * 8, &pte.val); + + switch (rc) { + case PGM_SEGMENT_TRANSLATION: + case PGM_REGION_THIRD_TRANS: + case PGM_REGION_SECOND_TRANS: + case PGM_REGION_FIRST_TRANS: + pgt |= PEI_NOT_PTE; + break; + case 0: + pgt += vaddr.px * 8; + rc = gmap_read_table(sg->parent, pgt, &pte.val); + } + if (datptr) + *datptr = pgt | dat_protection * PEI_DAT_PROT; if (!rc && pte.i) rc = PGM_PAGE_TRANSLATION; if (!rc && pte.z) @@ -1198,7 +1509,7 @@ shadow_page: pte.p |= dat_protection; if (!rc) rc = gmap_shadow_page(sg, saddr, __pte(pte.val)); - ipte_unlock(vcpu); - up_read(&sg->mm->mmap_sem); + ipte_unlock(vcpu->kvm); + mmap_read_unlock(sg->mm); return rc; } |