aboutsummaryrefslogtreecommitdiffstatshomepage
diff options
context:
space:
mode:
authorOliver Upton <oliver.upton@linux.dev>2024-11-11 18:36:12 +0000
committerOliver Upton <oliver.upton@linux.dev>2024-11-11 18:36:12 +0000
commit2865463442f8be5943686c767eb45089f29fb157 (patch)
tree92279ee16b1029eb7b075fd002e028924f253550
parentLinux 6.12-rc5 (diff)
parentKVM: arm64: Handle WXN attribute (diff)
downloadwireguard-linux-2865463442f8be5943686c767eb45089f29fb157.tar.xz
wireguard-linux-2865463442f8be5943686c767eb45089f29fb157.zip
Merge branch kvm-arm64/nv-s1pie-s1poe into kvmarm/next
* kvm-arm64/nv-s1pie-s1poe: (36 commits) : NV support for S1PIE/S1POE, courtesy of Marc Zyngier : : Complete support for S1PIE/S1POE at vEL2, including: : : - Save/restore of the vEL2 sysreg context : : - Use the S1PIE/S1POE context for fast-path AT emulation : : - Enlightening the software walker to the behavior of S1PIE/S1POE : : - Like any other good NV series, some trap routing descriptions KVM: arm64: Handle WXN attribute KVM: arm64: Handle stage-1 permission overlays KVM: arm64: Make PAN conditions part of the S1 walk context KVM: arm64: Disable hierarchical permissions when POE is enabled KVM: arm64: Add POE save/restore for AT emulation fast-path KVM: arm64: Add save/restore support for POR_EL2 KVM: arm64: Add basic support for POR_EL2 KVM: arm64: Add kvm_has_s1poe() helper KVM: arm64: Subject S1PIE/S1POE registers to HCR_EL2.{TVM,TRVM} KVM: arm64: Drop bogus CPTR_EL2.E0POE trap routing arm64: Add encoding for POR_EL2 KVM: arm64: Rely on visibility to let PIR*_ELx/TCR2_ELx UNDEF KVM: arm64: Hide S1PIE registers from userspace when disabled for guests KVM: arm64: Hide TCR2_EL1 from userspace when disabled for guests KVM: arm64: Define helper for EL2 registers with custom visibility KVM: arm64: Add a composite EL2 visibility helper KVM: arm64: Implement AT S1PIE support KVM: arm64: Disable hierarchical permissions when S1PIE is enabled KVM: arm64: Split S1 permission evaluation into direct and hierarchical parts KVM: arm64: Add AT fast-path support for S1PIE ... Signed-off-by: Oliver Upton <oliver.upton@linux.dev>
-rw-r--r--arch/arm64/include/asm/kvm_host.h40
-rw-r--r--arch/arm64/include/asm/vncr_mapping.h1
-rw-r--r--arch/arm64/kvm/at.c470
-rw-r--r--arch/arm64/kvm/emulate-nested.c12
-rw-r--r--arch/arm64/kvm/hyp/include/hyp/sysreg-sr.h11
-rw-r--r--arch/arm64/kvm/hyp/nvhe/sysreg-sr.c2
-rw-r--r--arch/arm64/kvm/hyp/vhe/sysreg-sr.c160
-rw-r--r--arch/arm64/kvm/nested.c40
-rw-r--r--arch/arm64/kvm/sys_regs.c138
-rw-r--r--arch/arm64/tools/sysreg12
-rw-r--r--include/kvm/arm_arch_timer.h3
11 files changed, 771 insertions, 118 deletions
diff --git a/arch/arm64/include/asm/kvm_host.h b/arch/arm64/include/asm/kvm_host.h
index bf64fed9820e..28ea7e72477c 100644
--- a/arch/arm64/include/asm/kvm_host.h
+++ b/arch/arm64/include/asm/kvm_host.h
@@ -374,7 +374,7 @@ struct kvm_arch {
u64 ctr_el0;
- /* Masks for VNCR-baked sysregs */
+ /* Masks for VNCR-backed and general EL2 sysregs */
struct kvm_sysreg_masks *sysreg_masks;
/*
@@ -408,6 +408,9 @@ struct kvm_vcpu_fault_info {
r = __VNCR_START__ + ((VNCR_ ## r) / 8), \
__after_##r = __MAX__(__before_##r - 1, r)
+#define MARKER(m) \
+ m, __after_##m = m - 1
+
enum vcpu_sysreg {
__INVALID_SYSREG__, /* 0 is reserved as an invalid value */
MPIDR_EL1, /* MultiProcessor Affinity Register */
@@ -475,6 +478,9 @@ enum vcpu_sysreg {
TTBR0_EL2, /* Translation Table Base Register 0 (EL2) */
TTBR1_EL2, /* Translation Table Base Register 1 (EL2) */
TCR_EL2, /* Translation Control Register (EL2) */
+ PIRE0_EL2, /* Permission Indirection Register 0 (EL2) */
+ PIR_EL2, /* Permission Indirection Register 1 (EL2) */
+ POR_EL2, /* Permission Overlay Register 2 (EL2) */
SPSR_EL2, /* EL2 saved program status register */
ELR_EL2, /* EL2 exception link register */
AFSR0_EL2, /* Auxiliary Fault Status Register 0 (EL2) */
@@ -494,7 +500,12 @@ enum vcpu_sysreg {
CNTHV_CTL_EL2,
CNTHV_CVAL_EL2,
- __VNCR_START__, /* Any VNCR-capable reg goes after this point */
+ /* Anything from this can be RES0/RES1 sanitised */
+ MARKER(__SANITISED_REG_START__),
+ TCR2_EL2, /* Extended Translation Control Register (EL2) */
+
+ /* Any VNCR-capable reg goes after this point */
+ MARKER(__VNCR_START__),
VNCR(SCTLR_EL1),/* System Control Register */
VNCR(ACTLR_EL1),/* Auxiliary Control Register */
@@ -554,7 +565,7 @@ struct kvm_sysreg_masks {
struct {
u64 res0;
u64 res1;
- } mask[NR_SYS_REGS - __VNCR_START__];
+ } mask[NR_SYS_REGS - __SANITISED_REG_START__];
};
struct kvm_cpu_context {
@@ -1002,13 +1013,13 @@ static inline u64 *___ctxt_sys_reg(const struct kvm_cpu_context *ctxt, int r)
#define ctxt_sys_reg(c,r) (*__ctxt_sys_reg(c,r))
-u64 kvm_vcpu_sanitise_vncr_reg(const struct kvm_vcpu *, enum vcpu_sysreg);
+u64 kvm_vcpu_apply_reg_masks(const struct kvm_vcpu *, enum vcpu_sysreg, u64);
#define __vcpu_sys_reg(v,r) \
(*({ \
const struct kvm_cpu_context *ctxt = &(v)->arch.ctxt; \
u64 *__r = __ctxt_sys_reg(ctxt, (r)); \
- if (vcpu_has_nv((v)) && (r) >= __VNCR_START__) \
- *__r = kvm_vcpu_sanitise_vncr_reg((v), (r)); \
+ if (vcpu_has_nv((v)) && (r) >= __SANITISED_REG_START__) \
+ *__r = kvm_vcpu_apply_reg_masks((v), (r), *__r);\
__r; \
}))
@@ -1037,6 +1048,10 @@ static inline bool __vcpu_read_sys_reg_from_cpu(int reg, u64 *val)
case TTBR0_EL1: *val = read_sysreg_s(SYS_TTBR0_EL12); break;
case TTBR1_EL1: *val = read_sysreg_s(SYS_TTBR1_EL12); break;
case TCR_EL1: *val = read_sysreg_s(SYS_TCR_EL12); break;
+ case TCR2_EL1: *val = read_sysreg_s(SYS_TCR2_EL12); break;
+ case PIR_EL1: *val = read_sysreg_s(SYS_PIR_EL12); break;
+ case PIRE0_EL1: *val = read_sysreg_s(SYS_PIRE0_EL12); break;
+ case POR_EL1: *val = read_sysreg_s(SYS_POR_EL12); break;
case ESR_EL1: *val = read_sysreg_s(SYS_ESR_EL12); break;
case AFSR0_EL1: *val = read_sysreg_s(SYS_AFSR0_EL12); break;
case AFSR1_EL1: *val = read_sysreg_s(SYS_AFSR1_EL12); break;
@@ -1083,6 +1098,10 @@ static inline bool __vcpu_write_sys_reg_to_cpu(u64 val, int reg)
case TTBR0_EL1: write_sysreg_s(val, SYS_TTBR0_EL12); break;
case TTBR1_EL1: write_sysreg_s(val, SYS_TTBR1_EL12); break;
case TCR_EL1: write_sysreg_s(val, SYS_TCR_EL12); break;
+ case TCR2_EL1: write_sysreg_s(val, SYS_TCR2_EL12); break;
+ case PIR_EL1: write_sysreg_s(val, SYS_PIR_EL12); break;
+ case PIRE0_EL1: write_sysreg_s(val, SYS_PIRE0_EL12); break;
+ case POR_EL1: write_sysreg_s(val, SYS_POR_EL12); break;
case ESR_EL1: write_sysreg_s(val, SYS_ESR_EL12); break;
case AFSR0_EL1: write_sysreg_s(val, SYS_AFSR0_EL12); break;
case AFSR1_EL1: write_sysreg_s(val, SYS_AFSR1_EL12); break;
@@ -1503,4 +1522,13 @@ void kvm_set_vm_id_reg(struct kvm *kvm, u32 reg, u64 val);
(system_supports_fpmr() && \
kvm_has_feat((k), ID_AA64PFR2_EL1, FPMR, IMP))
+#define kvm_has_tcr2(k) \
+ (kvm_has_feat((k), ID_AA64MMFR3_EL1, TCRX, IMP))
+
+#define kvm_has_s1pie(k) \
+ (kvm_has_feat((k), ID_AA64MMFR3_EL1, S1PIE, IMP))
+
+#define kvm_has_s1poe(k) \
+ (kvm_has_feat((k), ID_AA64MMFR3_EL1, S1POE, IMP))
+
#endif /* __ARM64_KVM_HOST_H__ */
diff --git a/arch/arm64/include/asm/vncr_mapping.h b/arch/arm64/include/asm/vncr_mapping.h
index 06f8ec0906a6..4f9bbd4d6c26 100644
--- a/arch/arm64/include/asm/vncr_mapping.h
+++ b/arch/arm64/include/asm/vncr_mapping.h
@@ -50,7 +50,6 @@
#define VNCR_VBAR_EL1 0x250
#define VNCR_TCR2_EL1 0x270
#define VNCR_PIRE0_EL1 0x290
-#define VNCR_PIRE0_EL2 0x298
#define VNCR_PIR_EL1 0x2A0
#define VNCR_POR_EL1 0x2A8
#define VNCR_ICH_LR0_EL2 0x400
diff --git a/arch/arm64/kvm/at.c b/arch/arm64/kvm/at.c
index 39f0e87a340e..8c5d7990e5b3 100644
--- a/arch/arm64/kvm/at.c
+++ b/arch/arm64/kvm/at.c
@@ -24,6 +24,9 @@ struct s1_walk_info {
unsigned int txsz;
int sl;
bool hpd;
+ bool e0poe;
+ bool poe;
+ bool pan;
bool be;
bool s2;
};
@@ -37,6 +40,16 @@ struct s1_walk_result {
u8 APTable;
bool UXNTable;
bool PXNTable;
+ bool uwxn;
+ bool uov;
+ bool ur;
+ bool uw;
+ bool ux;
+ bool pwxn;
+ bool pov;
+ bool pr;
+ bool pw;
+ bool px;
};
struct {
u8 fst;
@@ -87,6 +100,51 @@ static enum trans_regime compute_translation_regime(struct kvm_vcpu *vcpu, u32 o
}
}
+static bool s1pie_enabled(struct kvm_vcpu *vcpu, enum trans_regime regime)
+{
+ if (!kvm_has_s1pie(vcpu->kvm))
+ return false;
+
+ switch (regime) {
+ case TR_EL2:
+ case TR_EL20:
+ return vcpu_read_sys_reg(vcpu, TCR2_EL2) & TCR2_EL2_PIE;
+ case TR_EL10:
+ return (__vcpu_sys_reg(vcpu, HCRX_EL2) & HCRX_EL2_TCR2En) &&
+ (__vcpu_sys_reg(vcpu, TCR2_EL1) & TCR2_EL1x_PIE);
+ default:
+ BUG();
+ }
+}
+
+static void compute_s1poe(struct kvm_vcpu *vcpu, struct s1_walk_info *wi)
+{
+ u64 val;
+
+ if (!kvm_has_s1poe(vcpu->kvm)) {
+ wi->poe = wi->e0poe = false;
+ return;
+ }
+
+ switch (wi->regime) {
+ case TR_EL2:
+ case TR_EL20:
+ val = vcpu_read_sys_reg(vcpu, TCR2_EL2);
+ wi->poe = val & TCR2_EL2_POE;
+ wi->e0poe = (wi->regime == TR_EL20) && (val & TCR2_EL2_E0POE);
+ break;
+ case TR_EL10:
+ if (__vcpu_sys_reg(vcpu, HCRX_EL2) & HCRX_EL2_TCR2En) {
+ wi->poe = wi->e0poe = false;
+ return;
+ }
+
+ val = __vcpu_sys_reg(vcpu, TCR2_EL1);
+ wi->poe = val & TCR2_EL1x_POE;
+ wi->e0poe = val & TCR2_EL1x_E0POE;
+ }
+}
+
static int setup_s1_walk(struct kvm_vcpu *vcpu, u32 op, struct s1_walk_info *wi,
struct s1_walk_result *wr, u64 va)
{
@@ -98,6 +156,8 @@ static int setup_s1_walk(struct kvm_vcpu *vcpu, u32 op, struct s1_walk_info *wi,
wi->regime = compute_translation_regime(vcpu, op);
as_el0 = (op == OP_AT_S1E0R || op == OP_AT_S1E0W);
+ wi->pan = (op == OP_AT_S1E1RP || op == OP_AT_S1E1WP) &&
+ (*vcpu_cpsr(vcpu) & PSR_PAN_BIT);
va55 = va & BIT(55);
@@ -180,6 +240,14 @@ static int setup_s1_walk(struct kvm_vcpu *vcpu, u32 op, struct s1_walk_info *wi,
(va55 ?
FIELD_GET(TCR_HPD1, tcr) :
FIELD_GET(TCR_HPD0, tcr)));
+ /* R_JHSVW */
+ wi->hpd |= s1pie_enabled(vcpu, wi->regime);
+
+ /* Do we have POE? */
+ compute_s1poe(vcpu, wi);
+
+ /* R_BVXDG */
+ wi->hpd |= (wi->poe || wi->e0poe);
/* Someone was silly enough to encode TG0/TG1 differently */
if (va55) {
@@ -412,6 +480,11 @@ struct mmu_config {
u64 ttbr1;
u64 tcr;
u64 mair;
+ u64 tcr2;
+ u64 pir;
+ u64 pire0;
+ u64 por_el0;
+ u64 por_el1;
u64 sctlr;
u64 vttbr;
u64 vtcr;
@@ -424,6 +497,17 @@ static void __mmu_config_save(struct mmu_config *config)
config->ttbr1 = read_sysreg_el1(SYS_TTBR1);
config->tcr = read_sysreg_el1(SYS_TCR);
config->mair = read_sysreg_el1(SYS_MAIR);
+ if (cpus_have_final_cap(ARM64_HAS_TCR2)) {
+ config->tcr2 = read_sysreg_el1(SYS_TCR2);
+ if (cpus_have_final_cap(ARM64_HAS_S1PIE)) {
+ config->pir = read_sysreg_el1(SYS_PIR);
+ config->pire0 = read_sysreg_el1(SYS_PIRE0);
+ }
+ if (system_supports_poe()) {
+ config->por_el1 = read_sysreg_el1(SYS_POR);
+ config->por_el0 = read_sysreg_s(SYS_POR_EL0);
+ }
+ }
config->sctlr = read_sysreg_el1(SYS_SCTLR);
config->vttbr = read_sysreg(vttbr_el2);
config->vtcr = read_sysreg(vtcr_el2);
@@ -444,6 +528,17 @@ static void __mmu_config_restore(struct mmu_config *config)
write_sysreg_el1(config->ttbr1, SYS_TTBR1);
write_sysreg_el1(config->tcr, SYS_TCR);
write_sysreg_el1(config->mair, SYS_MAIR);
+ if (cpus_have_final_cap(ARM64_HAS_TCR2)) {
+ write_sysreg_el1(config->tcr2, SYS_TCR2);
+ if (cpus_have_final_cap(ARM64_HAS_S1PIE)) {
+ write_sysreg_el1(config->pir, SYS_PIR);
+ write_sysreg_el1(config->pire0, SYS_PIRE0);
+ }
+ if (system_supports_poe()) {
+ write_sysreg_el1(config->por_el1, SYS_POR);
+ write_sysreg_s(config->por_el0, SYS_POR_EL0);
+ }
+ }
write_sysreg_el1(config->sctlr, SYS_SCTLR);
write_sysreg(config->vttbr, vttbr_el2);
write_sysreg(config->vtcr, vtcr_el2);
@@ -739,6 +834,9 @@ static bool pan3_enabled(struct kvm_vcpu *vcpu, enum trans_regime regime)
if (!kvm_has_feat(vcpu->kvm, ID_AA64MMFR1_EL1, PAN, PAN3))
return false;
+ if (s1pie_enabled(vcpu, regime))
+ return true;
+
if (regime == TR_EL10)
sctlr = vcpu_read_sys_reg(vcpu, SCTLR_EL1);
else
@@ -747,111 +845,343 @@ static bool pan3_enabled(struct kvm_vcpu *vcpu, enum trans_regime regime)
return sctlr & SCTLR_EL1_EPAN;
}
-static u64 handle_at_slow(struct kvm_vcpu *vcpu, u32 op, u64 vaddr)
+static void compute_s1_direct_permissions(struct kvm_vcpu *vcpu,
+ struct s1_walk_info *wi,
+ struct s1_walk_result *wr)
{
- bool perm_fail, ur, uw, ux, pr, pw, px;
- struct s1_walk_result wr = {};
- struct s1_walk_info wi = {};
- int ret, idx;
+ bool wxn;
- ret = setup_s1_walk(vcpu, op, &wi, &wr, vaddr);
- if (ret)
- goto compute_par;
-
- if (wr.level == S1_MMU_DISABLED)
- goto compute_par;
-
- idx = srcu_read_lock(&vcpu->kvm->srcu);
-
- ret = walk_s1(vcpu, &wi, &wr, vaddr);
-
- srcu_read_unlock(&vcpu->kvm->srcu, idx);
-
- if (ret)
- goto compute_par;
-
- /* FIXME: revisit when adding indirect permission support */
- /* AArch64.S1DirectBasePermissions() */
- if (wi.regime != TR_EL2) {
- switch (FIELD_GET(PTE_USER | PTE_RDONLY, wr.desc)) {
+ /* Non-hierarchical part of AArch64.S1DirectBasePermissions() */
+ if (wi->regime != TR_EL2) {
+ switch (FIELD_GET(PTE_USER | PTE_RDONLY, wr->desc)) {
case 0b00:
- pr = pw = true;
- ur = uw = false;
+ wr->pr = wr->pw = true;
+ wr->ur = wr->uw = false;
break;
case 0b01:
- pr = pw = ur = uw = true;
+ wr->pr = wr->pw = wr->ur = wr->uw = true;
break;
case 0b10:
- pr = true;
- pw = ur = uw = false;
+ wr->pr = true;
+ wr->pw = wr->ur = wr->uw = false;
break;
case 0b11:
- pr = ur = true;
- pw = uw = false;
+ wr->pr = wr->ur = true;
+ wr->pw = wr->uw = false;
break;
}
- switch (wr.APTable) {
+ /* We don't use px for anything yet, but hey... */
+ wr->px = !((wr->desc & PTE_PXN) || wr->uw);
+ wr->ux = !(wr->desc & PTE_UXN);
+ } else {
+ wr->ur = wr->uw = wr->ux = false;
+
+ if (!(wr->desc & PTE_RDONLY)) {
+ wr->pr = wr->pw = true;
+ } else {
+ wr->pr = true;
+ wr->pw = false;
+ }
+
+ /* XN maps to UXN */
+ wr->px = !(wr->desc & PTE_UXN);
+ }
+
+ switch (wi->regime) {
+ case TR_EL2:
+ case TR_EL20:
+ wxn = (vcpu_read_sys_reg(vcpu, SCTLR_EL2) & SCTLR_ELx_WXN);
+ break;
+ case TR_EL10:
+ wxn = (__vcpu_sys_reg(vcpu, SCTLR_EL1) & SCTLR_ELx_WXN);
+ break;
+ }
+
+ wr->pwxn = wr->uwxn = wxn;
+ wr->pov = wi->poe;
+ wr->uov = wi->e0poe;
+}
+
+static void compute_s1_hierarchical_permissions(struct kvm_vcpu *vcpu,
+ struct s1_walk_info *wi,
+ struct s1_walk_result *wr)
+{
+ /* Hierarchical part of AArch64.S1DirectBasePermissions() */
+ if (wi->regime != TR_EL2) {
+ switch (wr->APTable) {
case 0b00:
break;
case 0b01:
- ur = uw = false;
+ wr->ur = wr->uw = false;
break;
case 0b10:
- pw = uw = false;
+ wr->pw = wr->uw = false;
break;
case 0b11:
- pw = ur = uw = false;
+ wr->pw = wr->ur = wr->uw = false;
break;
}
- /* We don't use px for anything yet, but hey... */
- px = !((wr.desc & PTE_PXN) || wr.PXNTable || uw);
- ux = !((wr.desc & PTE_UXN) || wr.UXNTable);
+ wr->px &= !wr->PXNTable;
+ wr->ux &= !wr->UXNTable;
+ } else {
+ if (wr->APTable & BIT(1))
+ wr->pw = false;
+
+ /* XN maps to UXN */
+ wr->px &= !wr->UXNTable;
+ }
+}
- if (op == OP_AT_S1E1RP || op == OP_AT_S1E1WP) {
- bool pan;
+#define perm_idx(v, r, i) ((vcpu_read_sys_reg((v), (r)) >> ((i) * 4)) & 0xf)
+
+#define set_priv_perms(wr, r, w, x) \
+ do { \
+ (wr)->pr = (r); \
+ (wr)->pw = (w); \
+ (wr)->px = (x); \
+ } while (0)
+
+#define set_unpriv_perms(wr, r, w, x) \
+ do { \
+ (wr)->ur = (r); \
+ (wr)->uw = (w); \
+ (wr)->ux = (x); \
+ } while (0)
+
+#define set_priv_wxn(wr, v) \
+ do { \
+ (wr)->pwxn = (v); \
+ } while (0)
+
+#define set_unpriv_wxn(wr, v) \
+ do { \
+ (wr)->uwxn = (v); \
+ } while (0)
+
+/* Similar to AArch64.S1IndirectBasePermissions(), without GCS */
+#define set_perms(w, wr, ip) \
+ do { \
+ /* R_LLZDZ */ \
+ switch ((ip)) { \
+ case 0b0000: \
+ set_ ## w ## _perms((wr), false, false, false); \
+ break; \
+ case 0b0001: \
+ set_ ## w ## _perms((wr), true , false, false); \
+ break; \
+ case 0b0010: \
+ set_ ## w ## _perms((wr), false, false, true ); \
+ break; \
+ case 0b0011: \
+ set_ ## w ## _perms((wr), true , false, true ); \
+ break; \
+ case 0b0100: \
+ set_ ## w ## _perms((wr), false, false, false); \
+ break; \
+ case 0b0101: \
+ set_ ## w ## _perms((wr), true , true , false); \
+ break; \
+ case 0b0110: \
+ set_ ## w ## _perms((wr), true , true , true ); \
+ break; \
+ case 0b0111: \
+ set_ ## w ## _perms((wr), true , true , true ); \
+ break; \
+ case 0b1000: \
+ set_ ## w ## _perms((wr), true , false, false); \
+ break; \
+ case 0b1001: \
+ set_ ## w ## _perms((wr), true , false, false); \
+ break; \
+ case 0b1010: \
+ set_ ## w ## _perms((wr), true , false, true ); \
+ break; \
+ case 0b1011: \
+ set_ ## w ## _perms((wr), false, false, false); \
+ break; \
+ case 0b1100: \
+ set_ ## w ## _perms((wr), true , true , false); \
+ break; \
+ case 0b1101: \
+ set_ ## w ## _perms((wr), false, false, false); \
+ break; \
+ case 0b1110: \
+ set_ ## w ## _perms((wr), true , true , true ); \
+ break; \
+ case 0b1111: \
+ set_ ## w ## _perms((wr), false, false, false); \
+ break; \
+ } \
+ \
+ /* R_HJYGR */ \
+ set_ ## w ## _wxn((wr), ((ip) == 0b0110)); \
+ \
+ } while (0)
+
+static void compute_s1_indirect_permissions(struct kvm_vcpu *vcpu,
+ struct s1_walk_info *wi,
+ struct s1_walk_result *wr)
+{
+ u8 up, pp, idx;
- pan = *vcpu_cpsr(vcpu) & PSR_PAN_BIT;
- pan &= ur || uw || (pan3_enabled(vcpu, wi.regime) && ux);
- pw &= !pan;
- pr &= !pan;
- }
- } else {
- ur = uw = ux = false;
+ idx = pte_pi_index(wr->desc);
- if (!(wr.desc & PTE_RDONLY)) {
- pr = pw = true;
- } else {
- pr = true;
- pw = false;
- }
+ switch (wi->regime) {
+ case TR_EL10:
+ pp = perm_idx(vcpu, PIR_EL1, idx);
+ up = perm_idx(vcpu, PIRE0_EL1, idx);
+ break;
+ case TR_EL20:
+ pp = perm_idx(vcpu, PIR_EL2, idx);
+ up = perm_idx(vcpu, PIRE0_EL2, idx);
+ break;
+ case TR_EL2:
+ pp = perm_idx(vcpu, PIR_EL2, idx);
+ up = 0;
+ break;
+ }
- if (wr.APTable & BIT(1))
- pw = false;
+ set_perms(priv, wr, pp);
- /* XN maps to UXN */
- px = !((wr.desc & PTE_UXN) || wr.UXNTable);
+ if (wi->regime != TR_EL2)
+ set_perms(unpriv, wr, up);
+ else
+ set_unpriv_perms(wr, false, false, false);
+
+ wr->pov = wi->poe && !(pp & BIT(3));
+ wr->uov = wi->e0poe && !(up & BIT(3));
+
+ /* R_VFPJF */
+ if (wr->px && wr->uw) {
+ set_priv_perms(wr, false, false, false);
+ set_unpriv_perms(wr, false, false, false);
+ }
+}
+
+static void compute_s1_overlay_permissions(struct kvm_vcpu *vcpu,
+ struct s1_walk_info *wi,
+ struct s1_walk_result *wr)
+{
+ u8 idx, pov_perms, uov_perms;
+
+ idx = FIELD_GET(PTE_PO_IDX_MASK, wr->desc);
+
+ switch (wi->regime) {
+ case TR_EL10:
+ pov_perms = perm_idx(vcpu, POR_EL1, idx);
+ uov_perms = perm_idx(vcpu, POR_EL0, idx);
+ break;
+ case TR_EL20:
+ pov_perms = perm_idx(vcpu, POR_EL2, idx);
+ uov_perms = perm_idx(vcpu, POR_EL0, idx);
+ break;
+ case TR_EL2:
+ pov_perms = perm_idx(vcpu, POR_EL2, idx);
+ uov_perms = 0;
+ break;
+ }
+
+ if (pov_perms & ~POE_RXW)
+ pov_perms = POE_NONE;
+
+ if (wi->poe && wr->pov) {
+ wr->pr &= pov_perms & POE_R;
+ wr->px &= pov_perms & POE_X;
+ wr->pw &= pov_perms & POE_W;
+ }
+
+ if (uov_perms & ~POE_RXW)
+ uov_perms = POE_NONE;
+
+ if (wi->e0poe && wr->uov) {
+ wr->ur &= uov_perms & POE_R;
+ wr->ux &= uov_perms & POE_X;
+ wr->uw &= uov_perms & POE_W;
}
+}
+
+static void compute_s1_permissions(struct kvm_vcpu *vcpu,
+ struct s1_walk_info *wi,
+ struct s1_walk_result *wr)
+{
+ bool pan;
+
+ if (!s1pie_enabled(vcpu, wi->regime))
+ compute_s1_direct_permissions(vcpu, wi, wr);
+ else
+ compute_s1_indirect_permissions(vcpu, wi, wr);
+
+ if (!wi->hpd)
+ compute_s1_hierarchical_permissions(vcpu, wi, wr);
+
+ if (wi->poe || wi->e0poe)
+ compute_s1_overlay_permissions(vcpu, wi, wr);
+
+ /* R_QXXPC */
+ if (wr->pwxn) {
+ if (!wr->pov && wr->pw)
+ wr->px = false;
+ if (wr->pov && wr->px)
+ wr->pw = false;
+ }
+
+ /* R_NPBXC */
+ if (wr->uwxn) {
+ if (!wr->uov && wr->uw)
+ wr->ux = false;
+ if (wr->uov && wr->ux)
+ wr->uw = false;
+ }
+
+ pan = wi->pan && (wr->ur || wr->uw ||
+ (pan3_enabled(vcpu, wi->regime) && wr->ux));
+ wr->pw &= !pan;
+ wr->pr &= !pan;
+}
- perm_fail = false;
+static u64 handle_at_slow(struct kvm_vcpu *vcpu, u32 op, u64 vaddr)
+{
+ struct s1_walk_result wr = {};
+ struct s1_walk_info wi = {};
+ bool perm_fail = false;
+ int ret, idx;
+
+ ret = setup_s1_walk(vcpu, op, &wi, &wr, vaddr);
+ if (ret)
+ goto compute_par;
+
+ if (wr.level == S1_MMU_DISABLED)
+ goto compute_par;
+
+ idx = srcu_read_lock(&vcpu->kvm->srcu);
+
+ ret = walk_s1(vcpu, &wi, &wr, vaddr);
+
+ srcu_read_unlock(&vcpu->kvm->srcu, idx);
+
+ if (ret)
+ goto compute_par;
+
+ compute_s1_permissions(vcpu, &wi, &wr);
switch (op) {
case OP_AT_S1E1RP:
case OP_AT_S1E1R:
case OP_AT_S1E2R:
- perm_fail = !pr;
+ perm_fail = !wr.pr;
break;
case OP_AT_S1E1WP:
case OP_AT_S1E1W:
case OP_AT_S1E2W:
- perm_fail = !pw;
+ perm_fail = !wr.pw;
break;
case OP_AT_S1E0R:
- perm_fail = !ur;
+ perm_fail = !wr.ur;
break;
case OP_AT_S1E0W:
- perm_fail = !uw;
+ perm_fail = !wr.uw;
break;
case OP_AT_S1E1A:
case OP_AT_S1E2A:
@@ -914,6 +1244,17 @@ static u64 __kvm_at_s1e01_fast(struct kvm_vcpu *vcpu, u32 op, u64 vaddr)
write_sysreg_el1(vcpu_read_sys_reg(vcpu, TTBR1_EL1), SYS_TTBR1);
write_sysreg_el1(vcpu_read_sys_reg(vcpu, TCR_EL1), SYS_TCR);
write_sysreg_el1(vcpu_read_sys_reg(vcpu, MAIR_EL1), SYS_MAIR);
+ if (kvm_has_tcr2(vcpu->kvm)) {
+ write_sysreg_el1(vcpu_read_sys_reg(vcpu, TCR2_EL1), SYS_TCR2);
+ if (kvm_has_s1pie(vcpu->kvm)) {
+ write_sysreg_el1(vcpu_read_sys_reg(vcpu, PIR_EL1), SYS_PIR);
+ write_sysreg_el1(vcpu_read_sys_reg(vcpu, PIRE0_EL1), SYS_PIRE0);
+ }
+ if (kvm_has_s1poe(vcpu->kvm)) {
+ write_sysreg_el1(vcpu_read_sys_reg(vcpu, POR_EL1), SYS_POR);
+ write_sysreg_s(vcpu_read_sys_reg(vcpu, POR_EL0), SYS_POR_EL0);
+ }
+ }
write_sysreg_el1(vcpu_read_sys_reg(vcpu, SCTLR_EL1), SYS_SCTLR);
__load_stage2(mmu, mmu->arch);
@@ -992,12 +1333,9 @@ void __kvm_at_s1e2(struct kvm_vcpu *vcpu, u32 op, u64 vaddr)
* switching context behind everybody's back, disable interrupts...
*/
scoped_guard(write_lock_irqsave, &vcpu->kvm->mmu_lock) {
- struct kvm_s2_mmu *mmu;
u64 val, hcr;
bool fail;
- mmu = &vcpu->kvm->arch.mmu;
-
val = hcr = read_sysreg(hcr_el2);
val &= ~HCR_TGE;
val |= HCR_VM;
diff --git a/arch/arm64/kvm/emulate-nested.c b/arch/arm64/kvm/emulate-nested.c
index 05b6435d02a9..0ab090553354 100644
--- a/arch/arm64/kvm/emulate-nested.c
+++ b/arch/arm64/kvm/emulate-nested.c
@@ -79,7 +79,6 @@ enum cgt_group_id {
CGT_MDCR_E2TB,
CGT_MDCR_TDCC,
- CGT_CPACR_E0POE,
CGT_CPTR_TAM,
CGT_CPTR_TCPAC,
@@ -362,12 +361,6 @@ static const struct trap_bits coarse_trap_bits[] = {
.mask = MDCR_EL2_TDCC,
.behaviour = BEHAVE_FORWARD_ANY,
},
- [CGT_CPACR_E0POE] = {
- .index = CPTR_EL2,
- .value = CPACR_ELx_E0POE,
- .mask = CPACR_ELx_E0POE,
- .behaviour = BEHAVE_FORWARD_ANY,
- },
[CGT_CPTR_TAM] = {
.index = CPTR_EL2,
.value = CPTR_EL2_TAM,
@@ -711,6 +704,10 @@ static const struct encoding_to_trap_config encoding_to_cgt[] __initconst = {
SR_TRAP(SYS_MAIR_EL1, CGT_HCR_TVM_TRVM),
SR_TRAP(SYS_AMAIR_EL1, CGT_HCR_TVM_TRVM),
SR_TRAP(SYS_CONTEXTIDR_EL1, CGT_HCR_TVM_TRVM),
+ SR_TRAP(SYS_PIR_EL1, CGT_HCR_TVM_TRVM),
+ SR_TRAP(SYS_PIRE0_EL1, CGT_HCR_TVM_TRVM),
+ SR_TRAP(SYS_POR_EL0, CGT_HCR_TVM_TRVM),
+ SR_TRAP(SYS_POR_EL1, CGT_HCR_TVM_TRVM),
SR_TRAP(SYS_TCR2_EL1, CGT_HCR_TVM_TRVM_HCRX_TCR2En),
SR_TRAP(SYS_DC_ZVA, CGT_HCR_TDZ),
SR_TRAP(SYS_DC_GVA, CGT_HCR_TDZ),
@@ -1141,7 +1138,6 @@ static const struct encoding_to_trap_config encoding_to_cgt[] __initconst = {
SR_TRAP(SYS_AMEVTYPER1_EL0(13), CGT_CPTR_TAM),
SR_TRAP(SYS_AMEVTYPER1_EL0(14), CGT_CPTR_TAM),
SR_TRAP(SYS_AMEVTYPER1_EL0(15), CGT_CPTR_TAM),
- SR_TRAP(SYS_POR_EL0, CGT_CPACR_E0POE),
/* op0=2, op1=1, and CRn<0b1000 */
SR_RANGE_TRAP(sys_reg(2, 1, 0, 0, 0),
sys_reg(2, 1, 7, 15, 7), CGT_CPTR_TTA),
diff --git a/arch/arm64/kvm/hyp/include/hyp/sysreg-sr.h b/arch/arm64/kvm/hyp/include/hyp/sysreg-sr.h
index 1579a3c08a36..a651c43ad679 100644
--- a/arch/arm64/kvm/hyp/include/hyp/sysreg-sr.h
+++ b/arch/arm64/kvm/hyp/include/hyp/sysreg-sr.h
@@ -58,7 +58,7 @@ static inline bool ctxt_has_s1pie(struct kvm_cpu_context *ctxt)
return false;
vcpu = ctxt_to_vcpu(ctxt);
- return kvm_has_feat(kern_hyp_va(vcpu->kvm), ID_AA64MMFR3_EL1, S1PIE, IMP);
+ return kvm_has_s1pie(kern_hyp_va(vcpu->kvm));
}
static inline bool ctxt_has_tcrx(struct kvm_cpu_context *ctxt)
@@ -69,7 +69,7 @@ static inline bool ctxt_has_tcrx(struct kvm_cpu_context *ctxt)
return false;
vcpu = ctxt_to_vcpu(ctxt);
- return kvm_has_feat(kern_hyp_va(vcpu->kvm), ID_AA64MMFR3_EL1, TCRX, IMP);
+ return kvm_has_tcr2(kern_hyp_va(vcpu->kvm));
}
static inline bool ctxt_has_s1poe(struct kvm_cpu_context *ctxt)
@@ -80,7 +80,7 @@ static inline bool ctxt_has_s1poe(struct kvm_cpu_context *ctxt)
return false;
vcpu = ctxt_to_vcpu(ctxt);
- return kvm_has_feat(kern_hyp_va(vcpu->kvm), ID_AA64MMFR3_EL1, S1POE, IMP);
+ return kvm_has_s1poe(kern_hyp_va(vcpu->kvm));
}
static inline void __sysreg_save_el1_state(struct kvm_cpu_context *ctxt)
@@ -152,9 +152,10 @@ static inline void __sysreg_restore_user_state(struct kvm_cpu_context *ctxt)
write_sysreg(ctxt_sys_reg(ctxt, TPIDRRO_EL0), tpidrro_el0);
}
-static inline void __sysreg_restore_el1_state(struct kvm_cpu_context *ctxt)
+static inline void __sysreg_restore_el1_state(struct kvm_cpu_context *ctxt,
+ u64 mpidr)
{
- write_sysreg(ctxt_sys_reg(ctxt, MPIDR_EL1), vmpidr_el2);
+ write_sysreg(mpidr, vmpidr_el2);
if (has_vhe() ||
!cpus_have_final_cap(ARM64_WORKAROUND_SPECULATIVE_AT)) {
diff --git a/arch/arm64/kvm/hyp/nvhe/sysreg-sr.c b/arch/arm64/kvm/hyp/nvhe/sysreg-sr.c
index 29305022bc04..dba101565de3 100644
--- a/arch/arm64/kvm/hyp/nvhe/sysreg-sr.c
+++ b/arch/arm64/kvm/hyp/nvhe/sysreg-sr.c
@@ -28,7 +28,7 @@ void __sysreg_save_state_nvhe(struct kvm_cpu_context *ctxt)
void __sysreg_restore_state_nvhe(struct kvm_cpu_context *ctxt)
{
- __sysreg_restore_el1_state(ctxt);
+ __sysreg_restore_el1_state(ctxt, ctxt_sys_reg(ctxt, MPIDR_EL1));
__sysreg_restore_common_state(ctxt);
__sysreg_restore_user_state(ctxt);
__sysreg_restore_el2_return_state(ctxt);
diff --git a/arch/arm64/kvm/hyp/vhe/sysreg-sr.c b/arch/arm64/kvm/hyp/vhe/sysreg-sr.c
index e12bd7d6d2dc..5f78a39053a7 100644
--- a/arch/arm64/kvm/hyp/vhe/sysreg-sr.c
+++ b/arch/arm64/kvm/hyp/vhe/sysreg-sr.c
@@ -15,6 +15,131 @@
#include <asm/kvm_hyp.h>
#include <asm/kvm_nested.h>
+static void __sysreg_save_vel2_state(struct kvm_vcpu *vcpu)
+{
+ /* These registers are common with EL1 */
+ __vcpu_sys_reg(vcpu, PAR_EL1) = read_sysreg(par_el1);
+ __vcpu_sys_reg(vcpu, TPIDR_EL1) = read_sysreg(tpidr_el1);
+
+ __vcpu_sys_reg(vcpu, ESR_EL2) = read_sysreg_el1(SYS_ESR);
+ __vcpu_sys_reg(vcpu, AFSR0_EL2) = read_sysreg_el1(SYS_AFSR0);
+ __vcpu_sys_reg(vcpu, AFSR1_EL2) = read_sysreg_el1(SYS_AFSR1);
+ __vcpu_sys_reg(vcpu, FAR_EL2) = read_sysreg_el1(SYS_FAR);
+ __vcpu_sys_reg(vcpu, MAIR_EL2) = read_sysreg_el1(SYS_MAIR);
+ __vcpu_sys_reg(vcpu, VBAR_EL2) = read_sysreg_el1(SYS_VBAR);
+ __vcpu_sys_reg(vcpu, CONTEXTIDR_EL2) = read_sysreg_el1(SYS_CONTEXTIDR);
+ __vcpu_sys_reg(vcpu, AMAIR_EL2) = read_sysreg_el1(SYS_AMAIR);
+
+ /*
+ * In VHE mode those registers are compatible between EL1 and EL2,
+ * and the guest uses the _EL1 versions on the CPU naturally.
+ * So we save them into their _EL2 versions here.
+ * For nVHE mode we trap accesses to those registers, so our
+ * _EL2 copy in sys_regs[] is always up-to-date and we don't need
+ * to save anything here.
+ */
+ if (vcpu_el2_e2h_is_set(vcpu)) {
+ u64 val;
+
+ /*
+ * We don't save CPTR_EL2, as accesses to CPACR_EL1
+ * are always trapped, ensuring that the in-memory
+ * copy is always up-to-date. A small blessing...
+ */
+ __vcpu_sys_reg(vcpu, SCTLR_EL2) = read_sysreg_el1(SYS_SCTLR);
+ __vcpu_sys_reg(vcpu, TTBR0_EL2) = read_sysreg_el1(SYS_TTBR0);
+ __vcpu_sys_reg(vcpu, TTBR1_EL2) = read_sysreg_el1(SYS_TTBR1);
+ __vcpu_sys_reg(vcpu, TCR_EL2) = read_sysreg_el1(SYS_TCR);
+
+ if (ctxt_has_tcrx(&vcpu->arch.ctxt)) {
+ __vcpu_sys_reg(vcpu, TCR2_EL2) = read_sysreg_el1(SYS_TCR2);
+
+ if (ctxt_has_s1pie(&vcpu->arch.ctxt)) {
+ __vcpu_sys_reg(vcpu, PIRE0_EL2) = read_sysreg_el1(SYS_PIRE0);
+ __vcpu_sys_reg(vcpu, PIR_EL2) = read_sysreg_el1(SYS_PIR);
+ }
+
+ if (ctxt_has_s1poe(&vcpu->arch.ctxt))
+ __vcpu_sys_reg(vcpu, POR_EL2) = read_sysreg_el1(SYS_POR);
+ }
+
+ /*
+ * The EL1 view of CNTKCTL_EL1 has a bunch of RES0 bits where
+ * the interesting CNTHCTL_EL2 bits live. So preserve these
+ * bits when reading back the guest-visible value.
+ */
+ val = read_sysreg_el1(SYS_CNTKCTL);
+ val &= CNTKCTL_VALID_BITS;
+ __vcpu_sys_reg(vcpu, CNTHCTL_EL2) &= ~CNTKCTL_VALID_BITS;
+ __vcpu_sys_reg(vcpu, CNTHCTL_EL2) |= val;
+ }
+
+ __vcpu_sys_reg(vcpu, SP_EL2) = read_sysreg(sp_el1);
+ __vcpu_sys_reg(vcpu, ELR_EL2) = read_sysreg_el1(SYS_ELR);
+ __vcpu_sys_reg(vcpu, SPSR_EL2) = read_sysreg_el1(SYS_SPSR);
+}
+
+static void __sysreg_restore_vel2_state(struct kvm_vcpu *vcpu)
+{
+ u64 val;
+
+ /* These registers are common with EL1 */
+ write_sysreg(__vcpu_sys_reg(vcpu, PAR_EL1), par_el1);
+ write_sysreg(__vcpu_sys_reg(vcpu, TPIDR_EL1), tpidr_el1);
+
+ write_sysreg(__vcpu_sys_reg(vcpu, MPIDR_EL1), vmpidr_el2);
+ write_sysreg_el1(__vcpu_sys_reg(vcpu, MAIR_EL2), SYS_MAIR);
+ write_sysreg_el1(__vcpu_sys_reg(vcpu, VBAR_EL2), SYS_VBAR);
+ write_sysreg_el1(__vcpu_sys_reg(vcpu, CONTEXTIDR_EL2), SYS_CONTEXTIDR);
+ write_sysreg_el1(__vcpu_sys_reg(vcpu, AMAIR_EL2), SYS_AMAIR);
+
+ if (vcpu_el2_e2h_is_set(vcpu)) {
+ /*
+ * In VHE mode those registers are compatible between
+ * EL1 and EL2.
+ */
+ write_sysreg_el1(__vcpu_sys_reg(vcpu, SCTLR_EL2), SYS_SCTLR);
+ write_sysreg_el1(__vcpu_sys_reg(vcpu, CPTR_EL2), SYS_CPACR);
+ write_sysreg_el1(__vcpu_sys_reg(vcpu, TTBR0_EL2), SYS_TTBR0);
+ write_sysreg_el1(__vcpu_sys_reg(vcpu, TTBR1_EL2), SYS_TTBR1);
+ write_sysreg_el1(__vcpu_sys_reg(vcpu, TCR_EL2), SYS_TCR);
+ write_sysreg_el1(__vcpu_sys_reg(vcpu, CNTHCTL_EL2), SYS_CNTKCTL);
+ } else {
+ /*
+ * CNTHCTL_EL2 only affects EL1 when running nVHE, so
+ * no need to restore it.
+ */
+ val = translate_sctlr_el2_to_sctlr_el1(__vcpu_sys_reg(vcpu, SCTLR_EL2));
+ write_sysreg_el1(val, SYS_SCTLR);
+ val = translate_cptr_el2_to_cpacr_el1(__vcpu_sys_reg(vcpu, CPTR_EL2));
+ write_sysreg_el1(val, SYS_CPACR);
+ val = translate_ttbr0_el2_to_ttbr0_el1(__vcpu_sys_reg(vcpu, TTBR0_EL2));
+ write_sysreg_el1(val, SYS_TTBR0);
+ val = translate_tcr_el2_to_tcr_el1(__vcpu_sys_reg(vcpu, TCR_EL2));
+ write_sysreg_el1(val, SYS_TCR);
+ }
+
+ if (ctxt_has_tcrx(&vcpu->arch.ctxt)) {
+ write_sysreg_el1(__vcpu_sys_reg(vcpu, TCR2_EL2), SYS_TCR2);
+
+ if (ctxt_has_s1pie(&vcpu->arch.ctxt)) {
+ write_sysreg_el1(__vcpu_sys_reg(vcpu, PIR_EL2), SYS_PIR);
+ write_sysreg_el1(__vcpu_sys_reg(vcpu, PIRE0_EL2), SYS_PIRE0);
+ }
+
+ if (ctxt_has_s1poe(&vcpu->arch.ctxt))
+ write_sysreg_el1(__vcpu_sys_reg(vcpu, POR_EL2), SYS_POR);
+ }
+
+ write_sysreg_el1(__vcpu_sys_reg(vcpu, ESR_EL2), SYS_ESR);
+ write_sysreg_el1(__vcpu_sys_reg(vcpu, AFSR0_EL2), SYS_AFSR0);
+ write_sysreg_el1(__vcpu_sys_reg(vcpu, AFSR1_EL2), SYS_AFSR1);
+ write_sysreg_el1(__vcpu_sys_reg(vcpu, FAR_EL2), SYS_FAR);
+ write_sysreg(__vcpu_sys_reg(vcpu, SP_EL2), sp_el1);
+ write_sysreg_el1(__vcpu_sys_reg(vcpu, ELR_EL2), SYS_ELR);
+ write_sysreg_el1(__vcpu_sys_reg(vcpu, SPSR_EL2), SYS_SPSR);
+}
+
/*
* VHE: Host and guest must save mdscr_el1 and sp_el0 (and the PC and
* pstate, which are handled as part of the el2 return state) on every
@@ -66,6 +191,7 @@ void __vcpu_load_switch_sysregs(struct kvm_vcpu *vcpu)
{
struct kvm_cpu_context *guest_ctxt = &vcpu->arch.ctxt;
struct kvm_cpu_context *host_ctxt;
+ u64 mpidr;
host_ctxt = host_data_ptr(host_ctxt);
__sysreg_save_user_state(host_ctxt);
@@ -89,7 +215,29 @@ void __vcpu_load_switch_sysregs(struct kvm_vcpu *vcpu)
*/
__sysreg32_restore_state(vcpu);
__sysreg_restore_user_state(guest_ctxt);
- __sysreg_restore_el1_state(guest_ctxt);
+
+ if (unlikely(__is_hyp_ctxt(guest_ctxt))) {
+ __sysreg_restore_vel2_state(vcpu);
+ } else {
+ if (vcpu_has_nv(vcpu)) {
+ /*
+ * Use the guest hypervisor's VPIDR_EL2 when in a
+ * nested state. The hardware value of MIDR_EL1 gets
+ * restored on put.
+ */
+ write_sysreg(ctxt_sys_reg(guest_ctxt, VPIDR_EL2), vpidr_el2);
+
+ /*
+ * As we're restoring a nested guest, set the value
+ * provided by the guest hypervisor.
+ */
+ mpidr = ctxt_sys_reg(guest_ctxt, VMPIDR_EL2);
+ } else {
+ mpidr = ctxt_sys_reg(guest_ctxt, MPIDR_EL1);
+ }
+
+ __sysreg_restore_el1_state(guest_ctxt, mpidr);
+ }
vcpu_set_flag(vcpu, SYSREGS_ON_CPU);
}
@@ -112,12 +260,20 @@ void __vcpu_put_switch_sysregs(struct kvm_vcpu *vcpu)
host_ctxt = host_data_ptr(host_ctxt);
- __sysreg_save_el1_state(guest_ctxt);
+ if (unlikely(__is_hyp_ctxt(guest_ctxt)))
+ __sysreg_save_vel2_state(vcpu);
+ else
+ __sysreg_save_el1_state(guest_ctxt);
+
__sysreg_save_user_state(guest_ctxt);
__sysreg32_save_state(vcpu);
/* Restore host user state */
__sysreg_restore_user_state(host_ctxt);
+ /* If leaving a nesting guest, restore MIDR_EL1 default view */
+ if (vcpu_has_nv(vcpu))
+ write_sysreg(read_cpuid_id(), vpidr_el2);
+
vcpu_clear_flag(vcpu, SYSREGS_ON_CPU);
}
diff --git a/arch/arm64/kvm/nested.c b/arch/arm64/kvm/nested.c
index c4b17d90fc49..be88a36da071 100644
--- a/arch/arm64/kvm/nested.c
+++ b/arch/arm64/kvm/nested.c
@@ -933,15 +933,15 @@ static void limit_nv_id_regs(struct kvm *kvm)
kvm_set_vm_id_reg(kvm, SYS_ID_AA64DFR0_EL1, val);
}
-u64 kvm_vcpu_sanitise_vncr_reg(const struct kvm_vcpu *vcpu, enum vcpu_sysreg sr)
+u64 kvm_vcpu_apply_reg_masks(const struct kvm_vcpu *vcpu,
+ enum vcpu_sysreg sr, u64 v)
{
- u64 v = ctxt_sys_reg(&vcpu->arch.ctxt, sr);
struct kvm_sysreg_masks *masks;
masks = vcpu->kvm->arch.sysreg_masks;
if (masks) {
- sr -= __VNCR_START__;
+ sr -= __SANITISED_REG_START__;
v &= ~masks->mask[sr].res0;
v |= masks->mask[sr].res1;
@@ -952,7 +952,11 @@ u64 kvm_vcpu_sanitise_vncr_reg(const struct kvm_vcpu *vcpu, enum vcpu_sysreg sr)
static void set_sysreg_masks(struct kvm *kvm, int sr, u64 res0, u64 res1)
{
- int i = sr - __VNCR_START__;
+ int i = sr - __SANITISED_REG_START__;
+
+ BUILD_BUG_ON(!__builtin_constant_p(sr));
+ BUILD_BUG_ON(sr < __SANITISED_REG_START__);
+ BUILD_BUG_ON(sr >= NR_SYS_REGS);
kvm->arch.sysreg_masks->mask[i].res0 = res0;
kvm->arch.sysreg_masks->mask[i].res1 = res1;
@@ -1050,7 +1054,7 @@ int kvm_init_nv_sysregs(struct kvm *kvm)
res0 |= HCRX_EL2_PTTWI;
if (!kvm_has_feat(kvm, ID_AA64MMFR3_EL1, SCTLRX, IMP))
res0 |= HCRX_EL2_SCTLR2En;
- if (!kvm_has_feat(kvm, ID_AA64MMFR3_EL1, TCRX, IMP))
+ if (!kvm_has_tcr2(kvm))
res0 |= HCRX_EL2_TCR2En;
if (!kvm_has_feat(kvm, ID_AA64ISAR2_EL1, MOPS, IMP))
res0 |= (HCRX_EL2_MSCEn | HCRX_EL2_MCE2);
@@ -1101,9 +1105,9 @@ int kvm_init_nv_sysregs(struct kvm *kvm)
res0 |= (HFGxTR_EL2_nSMPRI_EL1 | HFGxTR_EL2_nTPIDR2_EL0);
if (!kvm_has_feat(kvm, ID_AA64PFR1_EL1, THE, IMP))
res0 |= HFGxTR_EL2_nRCWMASK_EL1;
- if (!kvm_has_feat(kvm, ID_AA64MMFR3_EL1, S1PIE, IMP))
+ if (!kvm_has_s1pie(kvm))
res0 |= (HFGxTR_EL2_nPIRE0_EL1 | HFGxTR_EL2_nPIR_EL1);
- if (!kvm_has_feat(kvm, ID_AA64MMFR3_EL1, S1POE, IMP))
+ if (!kvm_has_s1poe(kvm))
res0 |= (HFGxTR_EL2_nPOR_EL0 | HFGxTR_EL2_nPOR_EL1);
if (!kvm_has_feat(kvm, ID_AA64MMFR3_EL1, S2POE, IMP))
res0 |= HFGxTR_EL2_nS2POR_EL1;
@@ -1200,6 +1204,28 @@ int kvm_init_nv_sysregs(struct kvm *kvm)
res0 |= ~(res0 | res1);
set_sysreg_masks(kvm, HAFGRTR_EL2, res0, res1);
+ /* TCR2_EL2 */
+ res0 = TCR2_EL2_RES0;
+ res1 = TCR2_EL2_RES1;
+ if (!kvm_has_feat(kvm, ID_AA64MMFR3_EL1, D128, IMP))
+ res0 |= (TCR2_EL2_DisCH0 | TCR2_EL2_DisCH1 | TCR2_EL2_D128);
+ if (!kvm_has_feat(kvm, ID_AA64MMFR3_EL1, MEC, IMP))
+ res0 |= TCR2_EL2_AMEC1 | TCR2_EL2_AMEC0;
+ if (!kvm_has_feat(kvm, ID_AA64MMFR1_EL1, HAFDBS, HAFT))
+ res0 |= TCR2_EL2_HAFT;
+ if (!kvm_has_feat(kvm, ID_AA64PFR1_EL1, THE, IMP))
+ res0 |= TCR2_EL2_PTTWI | TCR2_EL2_PnCH;
+ if (!kvm_has_feat(kvm, ID_AA64MMFR3_EL1, AIE, IMP))
+ res0 |= TCR2_EL2_AIE;
+ if (!kvm_has_s1poe(kvm))
+ res0 |= TCR2_EL2_POE | TCR2_EL2_E0POE;
+ if (!kvm_has_s1pie(kvm))
+ res0 |= TCR2_EL2_PIE;
+ if (!kvm_has_feat(kvm, ID_AA64MMFR1_EL1, VH, IMP))
+ res0 |= (TCR2_EL2_E0POE | TCR2_EL2_D128 |
+ TCR2_EL2_AMEC1 | TCR2_EL2_DisCH0 | TCR2_EL2_DisCH1);
+ set_sysreg_masks(kvm, TCR2_EL2, res0, res1);
+
/* SCTLR_EL1 */
res0 = SCTLR_EL1_RES0;
res1 = SCTLR_EL1_RES1;
diff --git a/arch/arm64/kvm/sys_regs.c b/arch/arm64/kvm/sys_regs.c
index ff8c4e1b847e..4f4fe84fb0e8 100644
--- a/arch/arm64/kvm/sys_regs.c
+++ b/arch/arm64/kvm/sys_regs.c
@@ -110,6 +110,14 @@ static bool get_el2_to_el1_mapping(unsigned int reg,
PURE_EL2_SYSREG( RVBAR_EL2 );
PURE_EL2_SYSREG( TPIDR_EL2 );
PURE_EL2_SYSREG( HPFAR_EL2 );
+ PURE_EL2_SYSREG( HCRX_EL2 );
+ PURE_EL2_SYSREG( HFGRTR_EL2 );
+ PURE_EL2_SYSREG( HFGWTR_EL2 );
+ PURE_EL2_SYSREG( HFGITR_EL2 );
+ PURE_EL2_SYSREG( HDFGRTR_EL2 );
+ PURE_EL2_SYSREG( HDFGWTR_EL2 );
+ PURE_EL2_SYSREG( HAFGRTR_EL2 );
+ PURE_EL2_SYSREG( CNTVOFF_EL2 );
PURE_EL2_SYSREG( CNTHCTL_EL2 );
MAPPED_EL2_SYSREG(SCTLR_EL2, SCTLR_EL1,
translate_sctlr_el2_to_sctlr_el1 );
@@ -126,10 +134,15 @@ static bool get_el2_to_el1_mapping(unsigned int reg,
MAPPED_EL2_SYSREG(ESR_EL2, ESR_EL1, NULL );
MAPPED_EL2_SYSREG(FAR_EL2, FAR_EL1, NULL );
MAPPED_EL2_SYSREG(MAIR_EL2, MAIR_EL1, NULL );
+ MAPPED_EL2_SYSREG(TCR2_EL2, TCR2_EL1, NULL );
+ MAPPED_EL2_SYSREG(PIR_EL2, PIR_EL1, NULL );
+ MAPPED_EL2_SYSREG(PIRE0_EL2, PIRE0_EL1, NULL );
+ MAPPED_EL2_SYSREG(POR_EL2, POR_EL1, NULL );
MAPPED_EL2_SYSREG(AMAIR_EL2, AMAIR_EL1, NULL );
MAPPED_EL2_SYSREG(ELR_EL2, ELR_EL1, NULL );
MAPPED_EL2_SYSREG(SPSR_EL2, SPSR_EL1, NULL );
MAPPED_EL2_SYSREG(ZCR_EL2, ZCR_EL1, NULL );
+ MAPPED_EL2_SYSREG(CONTEXTIDR_EL2, CONTEXTIDR_EL1, NULL );
default:
return false;
}
@@ -149,6 +162,21 @@ u64 vcpu_read_sys_reg(const struct kvm_vcpu *vcpu, int reg)
goto memory_read;
/*
+ * CNTHCTL_EL2 requires some special treatment to
+ * account for the bits that can be set via CNTKCTL_EL1.
+ */
+ switch (reg) {
+ case CNTHCTL_EL2:
+ if (vcpu_el2_e2h_is_set(vcpu)) {
+ val = read_sysreg_el1(SYS_CNTKCTL);
+ val &= CNTKCTL_VALID_BITS;
+ val |= __vcpu_sys_reg(vcpu, reg) & ~CNTKCTL_VALID_BITS;
+ return val;
+ }
+ break;
+ }
+
+ /*
* If this register does not have an EL1 counterpart,
* then read the stored EL2 version.
*/
@@ -165,6 +193,9 @@ u64 vcpu_read_sys_reg(const struct kvm_vcpu *vcpu, int reg)
/* Get the current version of the EL1 counterpart. */
WARN_ON(!__vcpu_read_sys_reg_from_cpu(el1r, &val));
+ if (reg >= __SANITISED_REG_START__)
+ val = kvm_vcpu_apply_reg_masks(vcpu, reg, val);
+
return val;
}
@@ -198,6 +229,19 @@ void vcpu_write_sys_reg(struct kvm_vcpu *vcpu, u64 val, int reg)
*/
__vcpu_sys_reg(vcpu, reg) = val;
+ switch (reg) {
+ case CNTHCTL_EL2:
+ /*
+ * If E2H=0, CNHTCTL_EL2 is a pure shadow register.
+ * Otherwise, some of the bits are backed by
+ * CNTKCTL_EL1, while the rest is kept in memory.
+ * Yes, this is fun stuff.
+ */
+ if (vcpu_el2_e2h_is_set(vcpu))
+ write_sysreg_el1(val, SYS_CNTKCTL);
+ return;
+ }
+
/* No EL1 counterpart? We're done here.? */
if (reg == el1r)
return;
@@ -390,10 +434,6 @@ static bool access_vm_reg(struct kvm_vcpu *vcpu,
bool was_enabled = vcpu_has_cache_enabled(vcpu);
u64 val, mask, shift;
- if (reg_to_encoding(r) == SYS_TCR2_EL1 &&
- !kvm_has_feat(vcpu->kvm, ID_AA64MMFR3_EL1, TCRX, IMP))
- return undef_access(vcpu, p, r);
-
BUG_ON(!p->is_write);
get_access_mask(r, &mask, &shift);
@@ -2104,6 +2144,15 @@ static bool bad_redir_trap(struct kvm_vcpu *vcpu,
.val = v, \
}
+#define EL2_REG_FILTERED(name, acc, rst, v, filter) { \
+ SYS_DESC(SYS_##name), \
+ .access = acc, \
+ .reset = rst, \
+ .reg = name, \
+ .visibility = filter, \
+ .val = v, \
+}
+
#define EL2_REG_VNCR(name, rst, v) EL2_REG(name, bad_vncr_trap, rst, v)
#define EL2_REG_REDIR(name, rst, v) EL2_REG(name, bad_redir_trap, rst, v)
@@ -2236,16 +2285,18 @@ static u64 reset_hcr(struct kvm_vcpu *vcpu, const struct sys_reg_desc *r)
return __vcpu_sys_reg(vcpu, r->reg) = val;
}
+static unsigned int __el2_visibility(const struct kvm_vcpu *vcpu,
+ const struct sys_reg_desc *rd,
+ unsigned int (*fn)(const struct kvm_vcpu *,
+ const struct sys_reg_desc *))
+{
+ return el2_visibility(vcpu, rd) ?: fn(vcpu, rd);
+}
+
static unsigned int sve_el2_visibility(const struct kvm_vcpu *vcpu,
const struct sys_reg_desc *rd)
{
- unsigned int r;
-
- r = el2_visibility(vcpu, rd);
- if (r)
- return r;
-
- return sve_visibility(vcpu, rd);
+ return __el2_visibility(vcpu, rd, sve_visibility);
}
static bool access_zcr_el2(struct kvm_vcpu *vcpu,
@@ -2273,12 +2324,48 @@ static bool access_zcr_el2(struct kvm_vcpu *vcpu,
static unsigned int s1poe_visibility(const struct kvm_vcpu *vcpu,
const struct sys_reg_desc *rd)
{
- if (kvm_has_feat(vcpu->kvm, ID_AA64MMFR3_EL1, S1POE, IMP))
+ if (kvm_has_s1poe(vcpu->kvm))
return 0;
return REG_HIDDEN;
}
+static unsigned int s1poe_el2_visibility(const struct kvm_vcpu *vcpu,
+ const struct sys_reg_desc *rd)
+{
+ return __el2_visibility(vcpu, rd, s1poe_visibility);
+}
+
+static unsigned int tcr2_visibility(const struct kvm_vcpu *vcpu,
+ const struct sys_reg_desc *rd)
+{
+ if (kvm_has_tcr2(vcpu->kvm))
+ return 0;
+
+ return REG_HIDDEN;
+}
+
+static unsigned int tcr2_el2_visibility(const struct kvm_vcpu *vcpu,
+ const struct sys_reg_desc *rd)
+{
+ return __el2_visibility(vcpu, rd, tcr2_visibility);
+}
+
+static unsigned int s1pie_visibility(const struct kvm_vcpu *vcpu,
+ const struct sys_reg_desc *rd)
+{
+ if (kvm_has_s1pie(vcpu->kvm))
+ return 0;
+
+ return REG_HIDDEN;
+}
+
+static unsigned int s1pie_el2_visibility(const struct kvm_vcpu *vcpu,
+ const struct sys_reg_desc *rd)
+{
+ return __el2_visibility(vcpu, rd, s1pie_visibility);
+}
+
/*
* Architected system registers.
* Important: Must be sorted ascending by Op0, Op1, CRn, CRm, Op2
@@ -2489,7 +2576,8 @@ static const struct sys_reg_desc sys_reg_descs[] = {
{ SYS_DESC(SYS_TTBR0_EL1), access_vm_reg, reset_unknown, TTBR0_EL1 },
{ SYS_DESC(SYS_TTBR1_EL1), access_vm_reg, reset_unknown, TTBR1_EL1 },
{ SYS_DESC(SYS_TCR_EL1), access_vm_reg, reset_val, TCR_EL1, 0 },
- { SYS_DESC(SYS_TCR2_EL1), access_vm_reg, reset_val, TCR2_EL1, 0 },
+ { SYS_DESC(SYS_TCR2_EL1), access_vm_reg, reset_val, TCR2_EL1, 0,
+ .visibility = tcr2_visibility },
PTRAUTH_KEY(APIA),
PTRAUTH_KEY(APIB),
@@ -2543,8 +2631,10 @@ static const struct sys_reg_desc sys_reg_descs[] = {
{ SYS_DESC(SYS_PMMIR_EL1), trap_raz_wi },
{ SYS_DESC(SYS_MAIR_EL1), access_vm_reg, reset_unknown, MAIR_EL1 },
- { SYS_DESC(SYS_PIRE0_EL1), NULL, reset_unknown, PIRE0_EL1 },
- { SYS_DESC(SYS_PIR_EL1), NULL, reset_unknown, PIR_EL1 },
+ { SYS_DESC(SYS_PIRE0_EL1), NULL, reset_unknown, PIRE0_EL1,
+ .visibility = s1pie_visibility },
+ { SYS_DESC(SYS_PIR_EL1), NULL, reset_unknown, PIR_EL1,
+ .visibility = s1pie_visibility },
{ SYS_DESC(SYS_POR_EL1), NULL, reset_unknown, POR_EL1,
.visibility = s1poe_visibility },
{ SYS_DESC(SYS_AMAIR_EL1), access_vm_reg, reset_amair_el1, AMAIR_EL1 },
@@ -2818,14 +2908,16 @@ static const struct sys_reg_desc sys_reg_descs[] = {
EL2_REG_VNCR(HFGITR_EL2, reset_val, 0),
EL2_REG_VNCR(HACR_EL2, reset_val, 0),
- { SYS_DESC(SYS_ZCR_EL2), .access = access_zcr_el2, .reset = reset_val,
- .visibility = sve_el2_visibility, .reg = ZCR_EL2 },
+ EL2_REG_FILTERED(ZCR_EL2, access_zcr_el2, reset_val, 0,
+ sve_el2_visibility),
EL2_REG_VNCR(HCRX_EL2, reset_val, 0),
EL2_REG(TTBR0_EL2, access_rw, reset_val, 0),
EL2_REG(TTBR1_EL2, access_rw, reset_val, 0),
EL2_REG(TCR_EL2, access_rw, reset_val, TCR_EL2_RES1),
+ EL2_REG_FILTERED(TCR2_EL2, access_rw, reset_val, TCR2_EL2_RES1,
+ tcr2_el2_visibility),
EL2_REG_VNCR(VTTBR_EL2, reset_val, 0),
EL2_REG_VNCR(VTCR_EL2, reset_val, 0),
@@ -2853,6 +2945,12 @@ static const struct sys_reg_desc sys_reg_descs[] = {
EL2_REG(HPFAR_EL2, access_rw, reset_val, 0),
EL2_REG(MAIR_EL2, access_rw, reset_val, 0),
+ EL2_REG_FILTERED(PIRE0_EL2, access_rw, reset_val, 0,
+ s1pie_el2_visibility),
+ EL2_REG_FILTERED(PIR_EL2, access_rw, reset_val, 0,
+ s1pie_el2_visibility),
+ EL2_REG_FILTERED(POR_EL2, access_rw, reset_val, 0,
+ s1poe_el2_visibility),
EL2_REG(AMAIR_EL2, access_rw, reset_val, 0),
EL2_REG(VBAR_EL2, access_rw, reset_val, 0),
@@ -4719,7 +4817,7 @@ void kvm_calculate_traps(struct kvm_vcpu *vcpu)
if (kvm_has_feat(kvm, ID_AA64ISAR2_EL1, MOPS, IMP))
vcpu->arch.hcrx_el2 |= (HCRX_EL2_MSCEn | HCRX_EL2_MCE2);
- if (kvm_has_feat(kvm, ID_AA64MMFR3_EL1, TCRX, IMP))
+ if (kvm_has_tcr2(kvm))
vcpu->arch.hcrx_el2 |= HCRX_EL2_TCR2En;
if (kvm_has_fpmr(kvm))
@@ -4769,11 +4867,11 @@ void kvm_calculate_traps(struct kvm_vcpu *vcpu)
kvm->arch.fgu[HFGITR_GROUP] |= (HFGITR_EL2_ATS1E1RP |
HFGITR_EL2_ATS1E1WP);
- if (!kvm_has_feat(kvm, ID_AA64MMFR3_EL1, S1PIE, IMP))
+ if (!kvm_has_s1pie(kvm))
kvm->arch.fgu[HFGxTR_GROUP] |= (HFGxTR_EL2_nPIRE0_EL1 |
HFGxTR_EL2_nPIR_EL1);
- if (!kvm_has_feat(kvm, ID_AA64MMFR3_EL1, S1POE, IMP))
+ if (!kvm_has_s1poe(kvm))
kvm->arch.fgu[HFGxTR_GROUP] |= (HFGxTR_EL2_nPOR_EL1 |
HFGxTR_EL2_nPOR_EL0);
diff --git a/arch/arm64/tools/sysreg b/arch/arm64/tools/sysreg
index 8d637ac4b7c6..a33136243bdf 100644
--- a/arch/arm64/tools/sysreg
+++ b/arch/arm64/tools/sysreg
@@ -1688,6 +1688,7 @@ UnsignedEnum 3:0 HAFDBS
0b0000 NI
0b0001 AF
0b0010 DBM
+ 0b0011 HAFT
EndEnum
EndSysreg
@@ -2819,8 +2820,7 @@ Field 13 AMEC1
Field 12 AMEC0
Field 11 HAFT
Field 10 PTTWI
-Field 9:8 SKL1
-Field 7:6 SKL0
+Res0 9:6
Field 5 D128
Field 4 AIE
Field 3 POE
@@ -2883,6 +2883,10 @@ Sysreg PIRE0_EL12 3 5 10 2 2
Fields PIRx_ELx
EndSysreg
+Sysreg PIRE0_EL2 3 4 10 2 2
+Fields PIRx_ELx
+EndSysreg
+
Sysreg PIR_EL1 3 0 10 2 3
Fields PIRx_ELx
EndSysreg
@@ -2903,6 +2907,10 @@ Sysreg POR_EL1 3 0 10 2 4
Fields PIRx_ELx
EndSysreg
+Sysreg POR_EL2 3 4 10 2 4
+Fields PIRx_ELx
+EndSysreg
+
Sysreg POR_EL12 3 5 10 2 4
Fields PIRx_ELx
EndSysreg
diff --git a/include/kvm/arm_arch_timer.h b/include/kvm/arm_arch_timer.h
index c819c5d16613..fd650a8789b9 100644
--- a/include/kvm/arm_arch_timer.h
+++ b/include/kvm/arm_arch_timer.h
@@ -147,6 +147,9 @@ u64 timer_get_cval(struct arch_timer_context *ctxt);
void kvm_timer_cpu_up(void);
void kvm_timer_cpu_down(void);
+/* CNTKCTL_EL1 valid bits as of DDI0487J.a */
+#define CNTKCTL_VALID_BITS (BIT(17) | GENMASK_ULL(9, 0))
+
static inline bool has_cntpoff(void)
{
return (has_vhe() && cpus_have_final_cap(ARM64_HAS_ECV_CNTPOFF));