aboutsummaryrefslogtreecommitdiffstatshomepage
diff options
context:
space:
mode:
authorMichael Ellerman <mpe@ellerman.id.au>2021-06-23 00:19:08 +1000
committerMichael Ellerman <mpe@ellerman.id.au>2021-06-23 00:19:08 +1000
commita736143afd036f2078fe19435b16fd55abc789a9 (patch)
tree3ae0c967f9e7a25f928badfb48c2d36b6402c76a
parentpowerpc/boot: Add a boot wrapper for Microwatt (diff)
parentKVM: PPC: Book3S HV: Workaround high stack usage with clang (diff)
downloadwireguard-linux-a736143afd036f2078fe19435b16fd55abc789a9.tar.xz
wireguard-linux-a736143afd036f2078fe19435b16fd55abc789a9.zip
Merge branch 'topic/ppc-kvm' into next
Pull in some more ppc KVM patches we are keeping in our topic branch. In particular this brings in the series to add H_RPT_INVALIDATE.
-rw-r--r--Documentation/virt/kvm/api.rst18
-rw-r--r--arch/powerpc/include/asm/book3s/64/mmu.h1
-rw-r--r--arch/powerpc/include/asm/book3s/64/tlbflush-radix.h4
-rw-r--r--arch/powerpc/include/asm/cputhreads.h30
-rw-r--r--arch/powerpc/include/asm/hvcall.h4
-rw-r--r--arch/powerpc/include/asm/kvm_book3s.h3
-rw-r--r--arch/powerpc/include/asm/mmu_context.h12
-rw-r--r--arch/powerpc/kvm/book3s_64_mmu_radix.c27
-rw-r--r--arch/powerpc/kvm/book3s_hv.c102
-rw-r--r--arch/powerpc/kvm/book3s_hv_builtin.c2
-rw-r--r--arch/powerpc/kvm/book3s_hv_nested.c122
-rw-r--r--arch/powerpc/kvm/book3s_hv_rm_mmu.c2
-rw-r--r--arch/powerpc/kvm/powerpc.c3
-rw-r--r--arch/powerpc/mm/book3s64/radix_pgtable.c5
-rw-r--r--arch/powerpc/mm/book3s64/radix_tlb.c178
-rw-r--r--include/uapi/linux/kvm.h1
16 files changed, 492 insertions, 22 deletions
diff --git a/Documentation/virt/kvm/api.rst b/Documentation/virt/kvm/api.rst
index 7fcb2fd38f42..9977e845633f 100644
--- a/Documentation/virt/kvm/api.rst
+++ b/Documentation/virt/kvm/api.rst
@@ -6362,6 +6362,24 @@ default.
See Documentation/x86/sgx/2.Kernel-internals.rst for more details.
+7.26 KVM_CAP_PPC_RPT_INVALIDATE
+-------------------------------
+
+:Capability: KVM_CAP_PPC_RPT_INVALIDATE
+:Architectures: ppc
+:Type: vm
+
+This capability indicates that the kernel is capable of handling
+H_RPT_INVALIDATE hcall.
+
+In order to enable the use of H_RPT_INVALIDATE in the guest,
+user space might have to advertise it for the guest. For example,
+IBM pSeries (sPAPR) guest starts using it if "hcall-rpt-invalidate" is
+present in the "ibm,hypertas-functions" device-tree property.
+
+This capability is enabled for hypervisors on platforms like POWER9
+that support radix MMU.
+
8. Other capabilities.
======================
diff --git a/arch/powerpc/include/asm/book3s/64/mmu.h b/arch/powerpc/include/asm/book3s/64/mmu.h
index eace8c3f7b0a..c02f42d1031e 100644
--- a/arch/powerpc/include/asm/book3s/64/mmu.h
+++ b/arch/powerpc/include/asm/book3s/64/mmu.h
@@ -19,6 +19,7 @@ struct mmu_psize_def {
int penc[MMU_PAGE_COUNT]; /* HPTE encoding */
unsigned int tlbiel; /* tlbiel supported for that page size */
unsigned long avpnm; /* bits to mask out in AVPN in the HPTE */
+ unsigned long h_rpt_pgsize; /* H_RPT_INVALIDATE page size encoding */
union {
unsigned long sllp; /* SLB L||LP (exact mask to use in slbmte) */
unsigned long ap; /* Ap encoding used by PowerISA 3.0 */
diff --git a/arch/powerpc/include/asm/book3s/64/tlbflush-radix.h b/arch/powerpc/include/asm/book3s/64/tlbflush-radix.h
index 8b33601cdb9d..a46fd37ad552 100644
--- a/arch/powerpc/include/asm/book3s/64/tlbflush-radix.h
+++ b/arch/powerpc/include/asm/book3s/64/tlbflush-radix.h
@@ -4,6 +4,10 @@
#include <asm/hvcall.h>
+#define RIC_FLUSH_TLB 0
+#define RIC_FLUSH_PWC 1
+#define RIC_FLUSH_ALL 2
+
struct vm_area_struct;
struct mm_struct;
struct mmu_gather;
diff --git a/arch/powerpc/include/asm/cputhreads.h b/arch/powerpc/include/asm/cputhreads.h
index 98c8bd155bf9..b167186aaee4 100644
--- a/arch/powerpc/include/asm/cputhreads.h
+++ b/arch/powerpc/include/asm/cputhreads.h
@@ -98,6 +98,36 @@ static inline int cpu_last_thread_sibling(int cpu)
return cpu | (threads_per_core - 1);
}
+/*
+ * tlb_thread_siblings are siblings which share a TLB. This is not
+ * architected, is not something a hypervisor could emulate and a future
+ * CPU may change behaviour even in compat mode, so this should only be
+ * used on PowerNV, and only with care.
+ */
+static inline int cpu_first_tlb_thread_sibling(int cpu)
+{
+ if (cpu_has_feature(CPU_FTR_ARCH_300) && (threads_per_core == 8))
+ return cpu & ~0x6; /* Big Core */
+ else
+ return cpu_first_thread_sibling(cpu);
+}
+
+static inline int cpu_last_tlb_thread_sibling(int cpu)
+{
+ if (cpu_has_feature(CPU_FTR_ARCH_300) && (threads_per_core == 8))
+ return cpu | 0x6; /* Big Core */
+ else
+ return cpu_last_thread_sibling(cpu);
+}
+
+static inline int cpu_tlb_thread_sibling_step(void)
+{
+ if (cpu_has_feature(CPU_FTR_ARCH_300) && (threads_per_core == 8))
+ return 2; /* Big Core */
+ else
+ return 1;
+}
+
static inline u32 get_tensr(void)
{
#ifdef CONFIG_BOOKE
diff --git a/arch/powerpc/include/asm/hvcall.h b/arch/powerpc/include/asm/hvcall.h
index 2a6346b6472f..9bcf345cb208 100644
--- a/arch/powerpc/include/asm/hvcall.h
+++ b/arch/powerpc/include/asm/hvcall.h
@@ -423,9 +423,9 @@
#define H_RPTI_TYPE_NESTED 0x0001 /* Invalidate nested guest partition-scope */
#define H_RPTI_TYPE_TLB 0x0002 /* Invalidate TLB */
#define H_RPTI_TYPE_PWC 0x0004 /* Invalidate Page Walk Cache */
-/* Invalidate Process Table Entries if H_RPTI_TYPE_NESTED is clear */
+/* Invalidate caching of Process Table Entries if H_RPTI_TYPE_NESTED is clear */
#define H_RPTI_TYPE_PRT 0x0008
-/* Invalidate Partition Table Entries if H_RPTI_TYPE_NESTED is set */
+/* Invalidate caching of Partition Table Entries if H_RPTI_TYPE_NESTED is set */
#define H_RPTI_TYPE_PAT 0x0008
#define H_RPTI_TYPE_ALL (H_RPTI_TYPE_TLB | H_RPTI_TYPE_PWC | \
H_RPTI_TYPE_PRT)
diff --git a/arch/powerpc/include/asm/kvm_book3s.h b/arch/powerpc/include/asm/kvm_book3s.h
index e6b53c6e21e3..caaa0f592d8e 100644
--- a/arch/powerpc/include/asm/kvm_book3s.h
+++ b/arch/powerpc/include/asm/kvm_book3s.h
@@ -307,6 +307,9 @@ void kvmhv_set_ptbl_entry(unsigned int lpid, u64 dw0, u64 dw1);
void kvmhv_release_all_nested(struct kvm *kvm);
long kvmhv_enter_nested_guest(struct kvm_vcpu *vcpu);
long kvmhv_do_nested_tlbie(struct kvm_vcpu *vcpu);
+long do_h_rpt_invalidate_pat(struct kvm_vcpu *vcpu, unsigned long lpid,
+ unsigned long type, unsigned long pg_sizes,
+ unsigned long start, unsigned long end);
int kvmhv_run_single_vcpu(struct kvm_vcpu *vcpu,
u64 time_limit, unsigned long lpcr);
void kvmhv_save_hv_regs(struct kvm_vcpu *vcpu, struct hv_guest_state *hr);
diff --git a/arch/powerpc/include/asm/mmu_context.h b/arch/powerpc/include/asm/mmu_context.h
index 1450ddf95691..9ba6b585337f 100644
--- a/arch/powerpc/include/asm/mmu_context.h
+++ b/arch/powerpc/include/asm/mmu_context.h
@@ -215,6 +215,18 @@ static inline void mm_context_add_copro(struct mm_struct *mm) { }
static inline void mm_context_remove_copro(struct mm_struct *mm) { }
#endif
+#if defined(CONFIG_KVM_BOOK3S_HV_POSSIBLE) && defined(CONFIG_PPC_RADIX_MMU)
+void do_h_rpt_invalidate_prt(unsigned long pid, unsigned long lpid,
+ unsigned long type, unsigned long pg_sizes,
+ unsigned long start, unsigned long end);
+#else
+static inline void do_h_rpt_invalidate_prt(unsigned long pid,
+ unsigned long lpid,
+ unsigned long type,
+ unsigned long pg_sizes,
+ unsigned long start,
+ unsigned long end) { }
+#endif
extern void switch_mm_irqs_off(struct mm_struct *prev, struct mm_struct *next,
struct task_struct *tsk);
diff --git a/arch/powerpc/kvm/book3s_64_mmu_radix.c b/arch/powerpc/kvm/book3s_64_mmu_radix.c
index d909c069363e..b5905ae4377c 100644
--- a/arch/powerpc/kvm/book3s_64_mmu_radix.c
+++ b/arch/powerpc/kvm/book3s_64_mmu_radix.c
@@ -21,6 +21,7 @@
#include <asm/pte-walk.h>
#include <asm/ultravisor.h>
#include <asm/kvm_book3s_uvmem.h>
+#include <asm/plpar_wrappers.h>
/*
* Supported radix tree geometry.
@@ -318,9 +319,19 @@ void kvmppc_radix_tlbie_page(struct kvm *kvm, unsigned long addr,
}
psi = shift_to_mmu_psize(pshift);
- rb = addr | (mmu_get_ap(psi) << PPC_BITLSHIFT(58));
- rc = plpar_hcall_norets(H_TLB_INVALIDATE, H_TLBIE_P1_ENC(0, 0, 1),
- lpid, rb);
+
+ if (!firmware_has_feature(FW_FEATURE_RPT_INVALIDATE)) {
+ rb = addr | (mmu_get_ap(psi) << PPC_BITLSHIFT(58));
+ rc = plpar_hcall_norets(H_TLB_INVALIDATE, H_TLBIE_P1_ENC(0, 0, 1),
+ lpid, rb);
+ } else {
+ rc = pseries_rpt_invalidate(lpid, H_RPTI_TARGET_CMMU,
+ H_RPTI_TYPE_NESTED |
+ H_RPTI_TYPE_TLB,
+ psize_to_rpti_pgsize(psi),
+ addr, addr + psize);
+ }
+
if (rc)
pr_err("KVM: TLB page invalidation hcall failed, rc=%ld\n", rc);
}
@@ -334,8 +345,14 @@ static void kvmppc_radix_flush_pwc(struct kvm *kvm, unsigned int lpid)
return;
}
- rc = plpar_hcall_norets(H_TLB_INVALIDATE, H_TLBIE_P1_ENC(1, 0, 1),
- lpid, TLBIEL_INVAL_SET_LPID);
+ if (!firmware_has_feature(FW_FEATURE_RPT_INVALIDATE))
+ rc = plpar_hcall_norets(H_TLB_INVALIDATE, H_TLBIE_P1_ENC(1, 0, 1),
+ lpid, TLBIEL_INVAL_SET_LPID);
+ else
+ rc = pseries_rpt_invalidate(lpid, H_RPTI_TARGET_CMMU,
+ H_RPTI_TYPE_NESTED |
+ H_RPTI_TYPE_PWC, H_RPTI_PAGE_ALL,
+ 0, -1UL);
if (rc)
pr_err("KVM: TLB PWC invalidation hcall failed, rc=%ld\n", rc);
}
diff --git a/arch/powerpc/kvm/book3s_hv.c b/arch/powerpc/kvm/book3s_hv.c
index f4dc4f0c34b5..279eae8f9dbc 100644
--- a/arch/powerpc/kvm/book3s_hv.c
+++ b/arch/powerpc/kvm/book3s_hv.c
@@ -76,6 +76,7 @@
#include <asm/kvm_book3s_uvmem.h>
#include <asm/ultravisor.h>
#include <asm/dtl.h>
+#include <asm/plpar_wrappers.h>
#include "book3s.h"
@@ -922,6 +923,68 @@ static int kvmppc_get_yield_count(struct kvm_vcpu *vcpu)
return yield_count;
}
+/*
+ * H_RPT_INVALIDATE hcall handler for nested guests.
+ *
+ * Handles only nested process-scoped invalidation requests in L0.
+ */
+static int kvmppc_nested_h_rpt_invalidate(struct kvm_vcpu *vcpu)
+{
+ unsigned long type = kvmppc_get_gpr(vcpu, 6);
+ unsigned long pid, pg_sizes, start, end;
+
+ /*
+ * The partition-scoped invalidations aren't handled here in L0.
+ */
+ if (type & H_RPTI_TYPE_NESTED)
+ return RESUME_HOST;
+
+ pid = kvmppc_get_gpr(vcpu, 4);
+ pg_sizes = kvmppc_get_gpr(vcpu, 7);
+ start = kvmppc_get_gpr(vcpu, 8);
+ end = kvmppc_get_gpr(vcpu, 9);
+
+ do_h_rpt_invalidate_prt(pid, vcpu->arch.nested->shadow_lpid,
+ type, pg_sizes, start, end);
+
+ kvmppc_set_gpr(vcpu, 3, H_SUCCESS);
+ return RESUME_GUEST;
+}
+
+static long kvmppc_h_rpt_invalidate(struct kvm_vcpu *vcpu,
+ unsigned long id, unsigned long target,
+ unsigned long type, unsigned long pg_sizes,
+ unsigned long start, unsigned long end)
+{
+ if (!kvm_is_radix(vcpu->kvm))
+ return H_UNSUPPORTED;
+
+ if (end < start)
+ return H_P5;
+
+ /*
+ * Partition-scoped invalidation for nested guests.
+ */
+ if (type & H_RPTI_TYPE_NESTED) {
+ if (!nesting_enabled(vcpu->kvm))
+ return H_FUNCTION;
+
+ /* Support only cores as target */
+ if (target != H_RPTI_TARGET_CMMU)
+ return H_P2;
+
+ return do_h_rpt_invalidate_pat(vcpu, id, type, pg_sizes,
+ start, end);
+ }
+
+ /*
+ * Process-scoped invalidation for L1 guests.
+ */
+ do_h_rpt_invalidate_prt(id, vcpu->kvm->arch.lpid,
+ type, pg_sizes, start, end);
+ return H_SUCCESS;
+}
+
int kvmppc_pseries_do_hcall(struct kvm_vcpu *vcpu)
{
struct kvm *kvm = vcpu->kvm;
@@ -1105,6 +1168,14 @@ int kvmppc_pseries_do_hcall(struct kvm_vcpu *vcpu)
if (!powernv_get_random_long(&vcpu->arch.regs.gpr[4]))
ret = H_HARDWARE;
break;
+ case H_RPT_INVALIDATE:
+ ret = kvmppc_h_rpt_invalidate(vcpu, kvmppc_get_gpr(vcpu, 4),
+ kvmppc_get_gpr(vcpu, 5),
+ kvmppc_get_gpr(vcpu, 6),
+ kvmppc_get_gpr(vcpu, 7),
+ kvmppc_get_gpr(vcpu, 8),
+ kvmppc_get_gpr(vcpu, 9));
+ break;
case H_SET_PARTITION_TABLE:
ret = H_FUNCTION;
@@ -1225,6 +1296,7 @@ static int kvmppc_hcall_impl_hv(unsigned long cmd)
case H_XIRR_X:
#endif
case H_PAGE_INIT:
+ case H_RPT_INVALIDATE:
return 1;
}
@@ -1748,6 +1820,23 @@ static int kvmppc_handle_nested_exit(struct kvm_vcpu *vcpu)
if (!xics_on_xive())
kvmppc_xics_rm_complete(vcpu, 0);
break;
+ case BOOK3S_INTERRUPT_SYSCALL:
+ {
+ unsigned long req = kvmppc_get_gpr(vcpu, 3);
+
+ /*
+ * The H_RPT_INVALIDATE hcalls issued by nested
+ * guests for process-scoped invalidations when
+ * GTSE=0, are handled here in L0.
+ */
+ if (req == H_RPT_INVALIDATE) {
+ r = kvmppc_nested_h_rpt_invalidate(vcpu);
+ break;
+ }
+
+ r = RESUME_HOST;
+ break;
+ }
default:
r = RESUME_HOST;
break;
@@ -2820,7 +2909,7 @@ static void radix_flush_cpu(struct kvm *kvm, int cpu, struct kvm_vcpu *vcpu)
cpumask_t *cpu_in_guest;
int i;
- cpu = cpu_first_thread_sibling(cpu);
+ cpu = cpu_first_tlb_thread_sibling(cpu);
if (nested) {
cpumask_set_cpu(cpu, &nested->need_tlb_flush);
cpu_in_guest = &nested->cpu_in_guest;
@@ -2834,9 +2923,10 @@ static void radix_flush_cpu(struct kvm *kvm, int cpu, struct kvm_vcpu *vcpu)
* the other side is the first smp_mb() in kvmppc_run_core().
*/
smp_mb();
- for (i = 0; i < threads_per_core; ++i)
- if (cpumask_test_cpu(cpu + i, cpu_in_guest))
- smp_call_function_single(cpu + i, do_nothing, NULL, 1);
+ for (i = cpu; i <= cpu_last_tlb_thread_sibling(cpu);
+ i += cpu_tlb_thread_sibling_step())
+ if (cpumask_test_cpu(i, cpu_in_guest))
+ smp_call_function_single(i, do_nothing, NULL, 1);
}
static void kvmppc_prepare_radix_vcpu(struct kvm_vcpu *vcpu, int pcpu)
@@ -2867,8 +2957,8 @@ static void kvmppc_prepare_radix_vcpu(struct kvm_vcpu *vcpu, int pcpu)
*/
if (prev_cpu != pcpu) {
if (prev_cpu >= 0 &&
- cpu_first_thread_sibling(prev_cpu) !=
- cpu_first_thread_sibling(pcpu))
+ cpu_first_tlb_thread_sibling(prev_cpu) !=
+ cpu_first_tlb_thread_sibling(pcpu))
radix_flush_cpu(kvm, prev_cpu, vcpu);
if (nested)
nested->prev_cpu[vcpu->arch.nested_vcpu_id] = pcpu;
diff --git a/arch/powerpc/kvm/book3s_hv_builtin.c b/arch/powerpc/kvm/book3s_hv_builtin.c
index 259492bb4153..be8ef1c5b1bf 100644
--- a/arch/powerpc/kvm/book3s_hv_builtin.c
+++ b/arch/powerpc/kvm/book3s_hv_builtin.c
@@ -721,7 +721,7 @@ void kvmppc_check_need_tlb_flush(struct kvm *kvm, int pcpu,
* Thus we make all 4 threads use the same bit.
*/
if (cpu_has_feature(CPU_FTR_ARCH_300))
- pcpu = cpu_first_thread_sibling(pcpu);
+ pcpu = cpu_first_tlb_thread_sibling(pcpu);
if (nested)
need_tlb_flush = &nested->need_tlb_flush;
diff --git a/arch/powerpc/kvm/book3s_hv_nested.c b/arch/powerpc/kvm/book3s_hv_nested.c
index 60724f674421..8543ad538b0c 100644
--- a/arch/powerpc/kvm/book3s_hv_nested.c
+++ b/arch/powerpc/kvm/book3s_hv_nested.c
@@ -19,6 +19,7 @@
#include <asm/pgalloc.h>
#include <asm/pte-walk.h>
#include <asm/reg.h>
+#include <asm/plpar_wrappers.h>
static struct patb_entry *pseries_partition_tb;
@@ -53,7 +54,8 @@ void kvmhv_save_hv_regs(struct kvm_vcpu *vcpu, struct hv_guest_state *hr)
hr->dawrx1 = vcpu->arch.dawrx1;
}
-static void byteswap_pt_regs(struct pt_regs *regs)
+/* Use noinline_for_stack due to https://bugs.llvm.org/show_bug.cgi?id=49610 */
+static noinline_for_stack void byteswap_pt_regs(struct pt_regs *regs)
{
unsigned long *addr = (unsigned long *) regs;
@@ -467,8 +469,15 @@ static void kvmhv_flush_lpid(unsigned int lpid)
return;
}
- rc = plpar_hcall_norets(H_TLB_INVALIDATE, H_TLBIE_P1_ENC(2, 0, 1),
- lpid, TLBIEL_INVAL_SET_LPID);
+ if (!firmware_has_feature(FW_FEATURE_RPT_INVALIDATE))
+ rc = plpar_hcall_norets(H_TLB_INVALIDATE, H_TLBIE_P1_ENC(2, 0, 1),
+ lpid, TLBIEL_INVAL_SET_LPID);
+ else
+ rc = pseries_rpt_invalidate(lpid, H_RPTI_TARGET_CMMU,
+ H_RPTI_TYPE_NESTED |
+ H_RPTI_TYPE_TLB | H_RPTI_TYPE_PWC |
+ H_RPTI_TYPE_PAT,
+ H_RPTI_PAGE_ALL, 0, -1UL);
if (rc)
pr_err("KVM: TLB LPID invalidation hcall failed, rc=%ld\n", rc);
}
@@ -1214,6 +1223,113 @@ long kvmhv_do_nested_tlbie(struct kvm_vcpu *vcpu)
return H_SUCCESS;
}
+static long do_tlb_invalidate_nested_all(struct kvm_vcpu *vcpu,
+ unsigned long lpid, unsigned long ric)
+{
+ struct kvm *kvm = vcpu->kvm;
+ struct kvm_nested_guest *gp;
+
+ gp = kvmhv_get_nested(kvm, lpid, false);
+ if (gp) {
+ kvmhv_emulate_tlbie_lpid(vcpu, gp, ric);
+ kvmhv_put_nested(gp);
+ }
+ return H_SUCCESS;
+}
+
+/*
+ * Number of pages above which we invalidate the entire LPID rather than
+ * flush individual pages.
+ */
+static unsigned long tlb_range_flush_page_ceiling __read_mostly = 33;
+
+static long do_tlb_invalidate_nested_tlb(struct kvm_vcpu *vcpu,
+ unsigned long lpid,
+ unsigned long pg_sizes,
+ unsigned long start,
+ unsigned long end)
+{
+ int ret = H_P4;
+ unsigned long addr, nr_pages;
+ struct mmu_psize_def *def;
+ unsigned long psize, ap, page_size;
+ bool flush_lpid;
+
+ for (psize = 0; psize < MMU_PAGE_COUNT; psize++) {
+ def = &mmu_psize_defs[psize];
+ if (!(pg_sizes & def->h_rpt_pgsize))
+ continue;
+
+ nr_pages = (end - start) >> def->shift;
+ flush_lpid = nr_pages > tlb_range_flush_page_ceiling;
+ if (flush_lpid)
+ return do_tlb_invalidate_nested_all(vcpu, lpid,
+ RIC_FLUSH_TLB);
+ addr = start;
+ ap = mmu_get_ap(psize);
+ page_size = 1UL << def->shift;
+ do {
+ ret = kvmhv_emulate_tlbie_tlb_addr(vcpu, lpid, ap,
+ get_epn(addr));
+ if (ret)
+ return H_P4;
+ addr += page_size;
+ } while (addr < end);
+ }
+ return ret;
+}
+
+/*
+ * Performs partition-scoped invalidations for nested guests
+ * as part of H_RPT_INVALIDATE hcall.
+ */
+long do_h_rpt_invalidate_pat(struct kvm_vcpu *vcpu, unsigned long lpid,
+ unsigned long type, unsigned long pg_sizes,
+ unsigned long start, unsigned long end)
+{
+ /*
+ * If L2 lpid isn't valid, we need to return H_PARAMETER.
+ *
+ * However, nested KVM issues a L2 lpid flush call when creating
+ * partition table entries for L2. This happens even before the
+ * corresponding shadow lpid is created in HV which happens in
+ * H_ENTER_NESTED call. Since we can't differentiate this case from
+ * the invalid case, we ignore such flush requests and return success.
+ */
+ if (!kvmhv_find_nested(vcpu->kvm, lpid))
+ return H_SUCCESS;
+
+ /*
+ * A flush all request can be handled by a full lpid flush only.
+ */
+ if ((type & H_RPTI_TYPE_NESTED_ALL) == H_RPTI_TYPE_NESTED_ALL)
+ return do_tlb_invalidate_nested_all(vcpu, lpid, RIC_FLUSH_ALL);
+
+ /*
+ * We don't need to handle a PWC flush like process table here,
+ * because intermediate partition scoped table in nested guest doesn't
+ * really have PWC. Only level we have PWC is in L0 and for nested
+ * invalidate at L0 we always do kvm_flush_lpid() which does
+ * radix__flush_all_lpid(). For range invalidate at any level, we
+ * are not removing the higher level page tables and hence there is
+ * no PWC invalidate needed.
+ *
+ * if (type & H_RPTI_TYPE_PWC) {
+ * ret = do_tlb_invalidate_nested_all(vcpu, lpid, RIC_FLUSH_PWC);
+ * if (ret)
+ * return H_P4;
+ * }
+ */
+
+ if (start == 0 && end == -1)
+ return do_tlb_invalidate_nested_all(vcpu, lpid, RIC_FLUSH_TLB);
+
+ if (type & H_RPTI_TYPE_TLB)
+ return do_tlb_invalidate_nested_tlb(vcpu, lpid, pg_sizes,
+ start, end);
+ return H_SUCCESS;
+}
+
/* Used to convert a nested guest real address to a L1 guest real address */
static int kvmhv_translate_addr_nested(struct kvm_vcpu *vcpu,
struct kvm_nested_guest *gp,
diff --git a/arch/powerpc/kvm/book3s_hv_rm_mmu.c b/arch/powerpc/kvm/book3s_hv_rm_mmu.c
index 7b23fe6e9ca0..632b2545072b 100644
--- a/arch/powerpc/kvm/book3s_hv_rm_mmu.c
+++ b/arch/powerpc/kvm/book3s_hv_rm_mmu.c
@@ -60,7 +60,7 @@ static int global_invalidates(struct kvm *kvm)
* so use the bit for the first thread to represent the core.
*/
if (cpu_has_feature(CPU_FTR_ARCH_300))
- cpu = cpu_first_thread_sibling(cpu);
+ cpu = cpu_first_tlb_thread_sibling(cpu);
cpumask_clear_cpu(cpu, &kvm->arch.need_tlb_flush);
}
diff --git a/arch/powerpc/kvm/powerpc.c b/arch/powerpc/kvm/powerpc.c
index a2a68a958fa0..be33b5321a76 100644
--- a/arch/powerpc/kvm/powerpc.c
+++ b/arch/powerpc/kvm/powerpc.c
@@ -682,6 +682,9 @@ int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext)
r = !!(hv_enabled && kvmppc_hv_ops->enable_dawr1 &&
!kvmppc_hv_ops->enable_dawr1(NULL));
break;
+ case KVM_CAP_PPC_RPT_INVALIDATE:
+ r = 1;
+ break;
#endif
default:
r = 0;
diff --git a/arch/powerpc/mm/book3s64/radix_pgtable.c b/arch/powerpc/mm/book3s64/radix_pgtable.c
index fe236c38ce00..6e3495221ab7 100644
--- a/arch/powerpc/mm/book3s64/radix_pgtable.c
+++ b/arch/powerpc/mm/book3s64/radix_pgtable.c
@@ -475,6 +475,7 @@ static int __init radix_dt_scan_page_sizes(unsigned long node,
def = &mmu_psize_defs[idx];
def->shift = shift;
def->ap = ap;
+ def->h_rpt_pgsize = psize_to_rpti_pgsize(idx);
}
/* needed ? */
@@ -549,9 +550,13 @@ void __init radix__early_init_devtree(void)
*/
mmu_psize_defs[MMU_PAGE_4K].shift = 12;
mmu_psize_defs[MMU_PAGE_4K].ap = 0x0;
+ mmu_psize_defs[MMU_PAGE_4K].h_rpt_pgsize =
+ psize_to_rpti_pgsize(MMU_PAGE_4K);
mmu_psize_defs[MMU_PAGE_64K].shift = 16;
mmu_psize_defs[MMU_PAGE_64K].ap = 0x5;
+ mmu_psize_defs[MMU_PAGE_64K].h_rpt_pgsize =
+ psize_to_rpti_pgsize(MMU_PAGE_64K);
}
/*
diff --git a/arch/powerpc/mm/book3s64/radix_tlb.c b/arch/powerpc/mm/book3s64/radix_tlb.c
index b827e2a96da0..ae12fb5559cb 100644
--- a/arch/powerpc/mm/book3s64/radix_tlb.c
+++ b/arch/powerpc/mm/book3s64/radix_tlb.c
@@ -20,10 +20,6 @@
#include "internal.h"
-#define RIC_FLUSH_TLB 0
-#define RIC_FLUSH_PWC 1
-#define RIC_FLUSH_ALL 2
-
/*
* tlbiel instruction for radix, set invalidation
* i.e., r=1 and is=01 or is=10 or is=11
@@ -130,6 +126,21 @@ static __always_inline void __tlbie_pid(unsigned long pid, unsigned long ric)
trace_tlbie(0, 0, rb, rs, ric, prs, r);
}
+static __always_inline void __tlbie_pid_lpid(unsigned long pid,
+ unsigned long lpid,
+ unsigned long ric)
+{
+ unsigned long rb, rs, prs, r;
+
+ rb = PPC_BIT(53); /* IS = 1 */
+ rs = (pid << PPC_BITLSHIFT(31)) | (lpid & ~(PPC_BITMASK(0, 31)));
+ prs = 1; /* process scoped */
+ r = 1; /* radix format */
+
+ asm volatile(PPC_TLBIE_5(%0, %4, %3, %2, %1)
+ : : "r"(rb), "i"(r), "i"(prs), "i"(ric), "r"(rs) : "memory");
+ trace_tlbie(0, 0, rb, rs, ric, prs, r);
+}
static __always_inline void __tlbie_lpid(unsigned long lpid, unsigned long ric)
{
unsigned long rb,rs,prs,r;
@@ -190,6 +201,23 @@ static __always_inline void __tlbie_va(unsigned long va, unsigned long pid,
trace_tlbie(0, 0, rb, rs, ric, prs, r);
}
+static __always_inline void __tlbie_va_lpid(unsigned long va, unsigned long pid,
+ unsigned long lpid,
+ unsigned long ap, unsigned long ric)
+{
+ unsigned long rb, rs, prs, r;
+
+ rb = va & ~(PPC_BITMASK(52, 63));
+ rb |= ap << PPC_BITLSHIFT(58);
+ rs = (pid << PPC_BITLSHIFT(31)) | (lpid & ~(PPC_BITMASK(0, 31)));
+ prs = 1; /* process scoped */
+ r = 1; /* radix format */
+
+ asm volatile(PPC_TLBIE_5(%0, %4, %3, %2, %1)
+ : : "r"(rb), "i"(r), "i"(prs), "i"(ric), "r"(rs) : "memory");
+ trace_tlbie(0, 0, rb, rs, ric, prs, r);
+}
+
static __always_inline void __tlbie_lpid_va(unsigned long va, unsigned long lpid,
unsigned long ap, unsigned long ric)
{
@@ -235,6 +263,22 @@ static inline void fixup_tlbie_va_range(unsigned long va, unsigned long pid,
}
}
+static inline void fixup_tlbie_va_range_lpid(unsigned long va,
+ unsigned long pid,
+ unsigned long lpid,
+ unsigned long ap)
+{
+ if (cpu_has_feature(CPU_FTR_P9_TLBIE_ERAT_BUG)) {
+ asm volatile("ptesync" : : : "memory");
+ __tlbie_pid_lpid(0, lpid, RIC_FLUSH_TLB);
+ }
+
+ if (cpu_has_feature(CPU_FTR_P9_TLBIE_STQ_BUG)) {
+ asm volatile("ptesync" : : : "memory");
+ __tlbie_va_lpid(va, pid, lpid, ap, RIC_FLUSH_TLB);
+ }
+}
+
static inline void fixup_tlbie_pid(unsigned long pid)
{
/*
@@ -254,6 +298,25 @@ static inline void fixup_tlbie_pid(unsigned long pid)
}
}
+static inline void fixup_tlbie_pid_lpid(unsigned long pid, unsigned long lpid)
+{
+ /*
+ * We can use any address for the invalidation, pick one which is
+ * probably unused as an optimisation.
+ */
+ unsigned long va = ((1UL << 52) - 1);
+
+ if (cpu_has_feature(CPU_FTR_P9_TLBIE_ERAT_BUG)) {
+ asm volatile("ptesync" : : : "memory");
+ __tlbie_pid_lpid(0, lpid, RIC_FLUSH_TLB);
+ }
+
+ if (cpu_has_feature(CPU_FTR_P9_TLBIE_STQ_BUG)) {
+ asm volatile("ptesync" : : : "memory");
+ __tlbie_va_lpid(va, pid, lpid, mmu_get_ap(MMU_PAGE_64K),
+ RIC_FLUSH_TLB);
+ }
+}
static inline void fixup_tlbie_lpid_va(unsigned long va, unsigned long lpid,
unsigned long ap)
@@ -352,6 +415,31 @@ static inline void _tlbie_pid(unsigned long pid, unsigned long ric)
asm volatile("eieio; tlbsync; ptesync": : :"memory");
}
+static inline void _tlbie_pid_lpid(unsigned long pid, unsigned long lpid,
+ unsigned long ric)
+{
+ asm volatile("ptesync" : : : "memory");
+
+ /*
+ * Workaround the fact that the "ric" argument to __tlbie_pid
+ * must be a compile-time contraint to match the "i" constraint
+ * in the asm statement.
+ */
+ switch (ric) {
+ case RIC_FLUSH_TLB:
+ __tlbie_pid_lpid(pid, lpid, RIC_FLUSH_TLB);
+ fixup_tlbie_pid_lpid(pid, lpid);
+ break;
+ case RIC_FLUSH_PWC:
+ __tlbie_pid_lpid(pid, lpid, RIC_FLUSH_PWC);
+ break;
+ case RIC_FLUSH_ALL:
+ default:
+ __tlbie_pid_lpid(pid, lpid, RIC_FLUSH_ALL);
+ fixup_tlbie_pid_lpid(pid, lpid);
+ }
+ asm volatile("eieio; tlbsync; ptesync" : : : "memory");
+}
struct tlbiel_pid {
unsigned long pid;
unsigned long ric;
@@ -477,6 +565,20 @@ static inline void __tlbie_va_range(unsigned long start, unsigned long end,
fixup_tlbie_va_range(addr - page_size, pid, ap);
}
+static inline void __tlbie_va_range_lpid(unsigned long start, unsigned long end,
+ unsigned long pid, unsigned long lpid,
+ unsigned long page_size,
+ unsigned long psize)
+{
+ unsigned long addr;
+ unsigned long ap = mmu_get_ap(psize);
+
+ for (addr = start; addr < end; addr += page_size)
+ __tlbie_va_lpid(addr, pid, lpid, ap, RIC_FLUSH_TLB);
+
+ fixup_tlbie_va_range_lpid(addr - page_size, pid, lpid, ap);
+}
+
static __always_inline void _tlbie_va(unsigned long va, unsigned long pid,
unsigned long psize, unsigned long ric)
{
@@ -557,6 +659,18 @@ static inline void _tlbie_va_range(unsigned long start, unsigned long end,
asm volatile("eieio; tlbsync; ptesync": : :"memory");
}
+static inline void _tlbie_va_range_lpid(unsigned long start, unsigned long end,
+ unsigned long pid, unsigned long lpid,
+ unsigned long page_size,
+ unsigned long psize, bool also_pwc)
+{
+ asm volatile("ptesync" : : : "memory");
+ if (also_pwc)
+ __tlbie_pid_lpid(pid, lpid, RIC_FLUSH_PWC);
+ __tlbie_va_range_lpid(start, end, pid, lpid, page_size, psize);
+ asm volatile("eieio; tlbsync; ptesync" : : : "memory");
+}
+
static inline void _tlbiel_va_range_multicast(struct mm_struct *mm,
unsigned long start, unsigned long end,
unsigned long pid, unsigned long page_size,
@@ -1344,3 +1458,59 @@ void radix__flush_tlb_all(void)
: : "r"(rb), "i"(r), "i"(prs), "i"(ric), "r"(0) : "memory");
asm volatile("eieio; tlbsync; ptesync": : :"memory");
}
+
+#ifdef CONFIG_KVM_BOOK3S_HV_POSSIBLE
+/*
+ * Performs process-scoped invalidations for a given LPID
+ * as part of H_RPT_INVALIDATE hcall.
+ */
+void do_h_rpt_invalidate_prt(unsigned long pid, unsigned long lpid,
+ unsigned long type, unsigned long pg_sizes,
+ unsigned long start, unsigned long end)
+{
+ unsigned long psize, nr_pages;
+ struct mmu_psize_def *def;
+ bool flush_pid;
+
+ /*
+ * A H_RPTI_TYPE_ALL request implies RIC=3, hence
+ * do a single IS=1 based flush.
+ */
+ if ((type & H_RPTI_TYPE_ALL) == H_RPTI_TYPE_ALL) {
+ _tlbie_pid_lpid(pid, lpid, RIC_FLUSH_ALL);
+ return;
+ }
+
+ if (type & H_RPTI_TYPE_PWC)
+ _tlbie_pid_lpid(pid, lpid, RIC_FLUSH_PWC);
+
+ /* Full PID flush */
+ if (start == 0 && end == -1)
+ return _tlbie_pid_lpid(pid, lpid, RIC_FLUSH_TLB);
+
+ /* Do range invalidation for all the valid page sizes */
+ for (psize = 0; psize < MMU_PAGE_COUNT; psize++) {
+ def = &mmu_psize_defs[psize];
+ if (!(pg_sizes & def->h_rpt_pgsize))
+ continue;
+
+ nr_pages = (end - start) >> def->shift;
+ flush_pid = nr_pages > tlb_single_page_flush_ceiling;
+
+ /*
+ * If the number of pages spanning the range is above
+ * the ceiling, convert the request into a full PID flush.
+ * And since PID flush takes out all the page sizes, there
+ * is no need to consider remaining page sizes.
+ */
+ if (flush_pid) {
+ _tlbie_pid_lpid(pid, lpid, RIC_FLUSH_TLB);
+ return;
+ }
+ _tlbie_va_range_lpid(start, end, pid, lpid,
+ (1UL << def->shift), psize, false);
+ }
+}
+EXPORT_SYMBOL_GPL(do_h_rpt_invalidate_prt);
+
+#endif /* CONFIG_KVM_BOOK3S_HV_POSSIBLE */
diff --git a/include/uapi/linux/kvm.h b/include/uapi/linux/kvm.h
index 3fd9a7e9d90c..613198a94c43 100644
--- a/include/uapi/linux/kvm.h
+++ b/include/uapi/linux/kvm.h
@@ -1082,6 +1082,7 @@ struct kvm_ppc_resize_hpt {
#define KVM_CAP_SGX_ATTRIBUTE 196
#define KVM_CAP_VM_COPY_ENC_CONTEXT_FROM 197
#define KVM_CAP_PTP_KVM 198
+#define KVM_CAP_PPC_RPT_INVALIDATE 199
#ifdef KVM_CAP_IRQ_ROUTING