aboutsummaryrefslogtreecommitdiffstats
path: root/arch/powerpc
diff options
context:
space:
mode:
Diffstat (limited to 'arch/powerpc')
-rw-r--r--arch/powerpc/Kconfig2
-rw-r--r--arch/powerpc/boot/dts/fsl/p1010si-post.dtsi8
-rw-r--r--arch/powerpc/boot/dts/fsl/p2041si-post.dtsi16
-rw-r--r--arch/powerpc/include/asm/asm-prototypes.h3
-rw-r--r--arch/powerpc/include/asm/atomic.h140
-rw-r--r--arch/powerpc/include/asm/book3s/64/mmu.h1
-rw-r--r--arch/powerpc/include/asm/book3s/64/tlbflush-radix.h4
-rw-r--r--arch/powerpc/include/asm/cmpxchg.h30
-rw-r--r--arch/powerpc/include/asm/cputhreads.h30
-rw-r--r--arch/powerpc/include/asm/exception-64s.h13
-rw-r--r--arch/powerpc/include/asm/hvcall.h7
-rw-r--r--arch/powerpc/include/asm/interrupt.h9
-rw-r--r--arch/powerpc/include/asm/irq.h5
-rw-r--r--arch/powerpc/include/asm/jump_label.h2
-rw-r--r--arch/powerpc/include/asm/kvm_asm.h1
-rw-r--r--arch/powerpc/include/asm/kvm_book3s.h3
-rw-r--r--arch/powerpc/include/asm/kvm_book3s_64.h8
-rw-r--r--arch/powerpc/include/asm/kvm_host.h22
-rw-r--r--arch/powerpc/include/asm/kvm_ppc.h18
-rw-r--r--arch/powerpc/include/asm/mmu_context.h18
-rw-r--r--arch/powerpc/include/asm/mmzone.h4
-rw-r--r--arch/powerpc/include/asm/paravirt.h22
-rw-r--r--arch/powerpc/include/asm/plpar_wrappers.h6
-rw-r--r--arch/powerpc/include/asm/pte-walk.h29
-rw-r--r--arch/powerpc/include/asm/ptrace.h45
-rw-r--r--arch/powerpc/include/asm/qspinlock.h2
-rw-r--r--arch/powerpc/include/asm/syscall.h42
-rw-r--r--arch/powerpc/include/asm/time.h12
-rw-r--r--arch/powerpc/include/asm/uaccess.h2
-rw-r--r--arch/powerpc/kernel/asm-offsets.c1
-rw-r--r--arch/powerpc/kernel/eeh.c23
-rw-r--r--arch/powerpc/kernel/exceptions-64e.S38
-rw-r--r--arch/powerpc/kernel/exceptions-64s.S250
-rw-r--r--arch/powerpc/kernel/interrupt.c4
-rw-r--r--arch/powerpc/kernel/io-workarounds.c16
-rw-r--r--arch/powerpc/kernel/iommu.c11
-rw-r--r--arch/powerpc/kernel/kprobes.c21
-rw-r--r--arch/powerpc/kernel/legacy_serial.c7
-rw-r--r--arch/powerpc/kernel/mce.c1
-rw-r--r--arch/powerpc/kernel/process.c4
-rw-r--r--arch/powerpc/kernel/security.c5
-rw-r--r--arch/powerpc/kernel/setup_64.c6
-rw-r--r--arch/powerpc/kernel/signal.h4
-rw-r--r--arch/powerpc/kernel/signal_64.c9
-rw-r--r--arch/powerpc/kernel/smp.c3
-rw-r--r--arch/powerpc/kernel/syscalls/syscall.tbl2
-rw-r--r--arch/powerpc/kernel/time.c10
-rw-r--r--arch/powerpc/kexec/core.c4
-rw-r--r--arch/powerpc/kvm/Makefile4
-rw-r--r--arch/powerpc/kvm/book3s.c108
-rw-r--r--arch/powerpc/kvm/book3s_64_entry.S416
-rw-r--r--arch/powerpc/kvm/book3s_64_mmu_hv.c2
-rw-r--r--arch/powerpc/kvm/book3s_64_mmu_radix.c27
-rw-r--r--arch/powerpc/kvm/book3s_64_vio_hv.c12
-rw-r--r--arch/powerpc/kvm/book3s_hv.c817
-rw-r--r--arch/powerpc/kvm/book3s_hv_builtin.c137
-rw-r--r--arch/powerpc/kvm/book3s_hv_interrupts.S9
-rw-r--r--arch/powerpc/kvm/book3s_hv_nested.c122
-rw-r--r--arch/powerpc/kvm/book3s_hv_p9_entry.c508
-rw-r--r--arch/powerpc/kvm/book3s_hv_rm_mmu.c29
-rw-r--r--arch/powerpc/kvm/book3s_hv_rm_xics.c15
-rw-r--r--arch/powerpc/kvm/book3s_hv_rmhandlers.S689
-rw-r--r--arch/powerpc/kvm/book3s_hv_uvmem.c3
-rw-r--r--arch/powerpc/kvm/book3s_pr.c2
-rw-r--r--arch/powerpc/kvm/book3s_pr_papr.c2
-rw-r--r--arch/powerpc/kvm/book3s_segment.S3
-rw-r--r--arch/powerpc/kvm/book3s_xive.c114
-rw-r--r--arch/powerpc/kvm/book3s_xive.h7
-rw-r--r--arch/powerpc/kvm/book3s_xive_native.c11
-rw-r--r--arch/powerpc/kvm/booke.c76
-rw-r--r--arch/powerpc/kvm/powerpc.c3
-rw-r--r--arch/powerpc/lib/feature-fixups.c114
-rw-r--r--arch/powerpc/mm/Makefile2
-rw-r--r--arch/powerpc/mm/book3s64/radix_pgtable.c33
-rw-r--r--arch/powerpc/mm/book3s64/radix_tlb.c202
-rw-r--r--arch/powerpc/mm/mem.c5
-rw-r--r--arch/powerpc/mm/mmu_context.c4
-rw-r--r--arch/powerpc/perf/core-book3s.c2
-rw-r--r--arch/powerpc/platforms/cell/Kconfig1
-rw-r--r--arch/powerpc/platforms/cell/pmu.c1
-rw-r--r--arch/powerpc/platforms/embedded6xx/flipper-pic.c1
-rw-r--r--arch/powerpc/platforms/powermac/Kconfig1
-rw-r--r--arch/powerpc/platforms/powernv/idle.c52
-rw-r--r--arch/powerpc/platforms/ps3/Kconfig1
-rw-r--r--arch/powerpc/platforms/ps3/interrupt.c5
-rw-r--r--arch/powerpc/platforms/pseries/hvCall.S10
-rw-r--r--arch/powerpc/platforms/pseries/ibmebus.c1
-rw-r--r--arch/powerpc/platforms/pseries/lpar.c29
-rw-r--r--arch/powerpc/sysdev/ehv_pic.c1
-rw-r--r--arch/powerpc/sysdev/fsl_mpic_err.c1
-rw-r--r--arch/powerpc/sysdev/i8259.c3
-rw-r--r--arch/powerpc/sysdev/mpic.c2
-rw-r--r--arch/powerpc/sysdev/tsi108_pci.c3
-rw-r--r--arch/powerpc/sysdev/xics/icp-hv.c1
-rw-r--r--arch/powerpc/sysdev/xics/icp-opal.c1
-rw-r--r--arch/powerpc/sysdev/xics/xics-common.c2
-rw-r--r--arch/powerpc/sysdev/xive/Kconfig1
-rw-r--r--arch/powerpc/xmon/xmon.c13
98 files changed, 2623 insertions, 1908 deletions
diff --git a/arch/powerpc/Kconfig b/arch/powerpc/Kconfig
index 088dd2afcfe4..14b132cf95e2 100644
--- a/arch/powerpc/Kconfig
+++ b/arch/powerpc/Kconfig
@@ -671,7 +671,7 @@ config NODES_SHIFT
int
default "8" if PPC64
default "4"
- depends on NEED_MULTIPLE_NODES
+ depends on NUMA
config USE_PERCPU_NUMA_NODE_ID
def_bool y
diff --git a/arch/powerpc/boot/dts/fsl/p1010si-post.dtsi b/arch/powerpc/boot/dts/fsl/p1010si-post.dtsi
index c2717f31925a..ccda0a91abf0 100644
--- a/arch/powerpc/boot/dts/fsl/p1010si-post.dtsi
+++ b/arch/powerpc/boot/dts/fsl/p1010si-post.dtsi
@@ -122,7 +122,15 @@
};
/include/ "pq3-i2c-0.dtsi"
+ i2c@3000 {
+ fsl,i2c-erratum-a004447;
+ };
+
/include/ "pq3-i2c-1.dtsi"
+ i2c@3100 {
+ fsl,i2c-erratum-a004447;
+ };
+
/include/ "pq3-duart-0.dtsi"
/include/ "pq3-espi-0.dtsi"
spi0: spi@7000 {
diff --git a/arch/powerpc/boot/dts/fsl/p2041si-post.dtsi b/arch/powerpc/boot/dts/fsl/p2041si-post.dtsi
index 872e4485dc3f..ddc018d42252 100644
--- a/arch/powerpc/boot/dts/fsl/p2041si-post.dtsi
+++ b/arch/powerpc/boot/dts/fsl/p2041si-post.dtsi
@@ -371,7 +371,23 @@
};
/include/ "qoriq-i2c-0.dtsi"
+ i2c@118000 {
+ fsl,i2c-erratum-a004447;
+ };
+
+ i2c@118100 {
+ fsl,i2c-erratum-a004447;
+ };
+
/include/ "qoriq-i2c-1.dtsi"
+ i2c@119000 {
+ fsl,i2c-erratum-a004447;
+ };
+
+ i2c@119100 {
+ fsl,i2c-erratum-a004447;
+ };
+
/include/ "qoriq-duart-0.dtsi"
/include/ "qoriq-duart-1.dtsi"
/include/ "qoriq-gpio-0.dtsi"
diff --git a/arch/powerpc/include/asm/asm-prototypes.h b/arch/powerpc/include/asm/asm-prototypes.h
index 1c7b75834e04..02ee6f5ac9fe 100644
--- a/arch/powerpc/include/asm/asm-prototypes.h
+++ b/arch/powerpc/include/asm/asm-prototypes.h
@@ -120,6 +120,7 @@ extern s32 patch__call_flush_branch_caches3;
extern s32 patch__flush_count_cache_return;
extern s32 patch__flush_link_stack_return;
extern s32 patch__call_kvm_flush_link_stack;
+extern s32 patch__call_kvm_flush_link_stack_p9;
extern s32 patch__memset_nocache, patch__memcpy_nocache;
extern long flush_branch_caches;
@@ -140,7 +141,7 @@ void kvmhv_load_host_pmu(void);
void kvmhv_save_guest_pmu(struct kvm_vcpu *vcpu, bool pmu_in_use);
void kvmhv_load_guest_pmu(struct kvm_vcpu *vcpu);
-int __kvmhv_vcpu_entry_p9(struct kvm_vcpu *vcpu);
+void kvmppc_p9_enter_guest(struct kvm_vcpu *vcpu);
long kvmppc_h_set_dabr(struct kvm_vcpu *vcpu, unsigned long dabr);
long kvmppc_h_set_xdabr(struct kvm_vcpu *vcpu, unsigned long dabr,
diff --git a/arch/powerpc/include/asm/atomic.h b/arch/powerpc/include/asm/atomic.h
index 61c6e8b200e8..a1732a79e92a 100644
--- a/arch/powerpc/include/asm/atomic.h
+++ b/arch/powerpc/include/asm/atomic.h
@@ -23,7 +23,7 @@
#define __atomic_release_fence() \
__asm__ __volatile__(PPC_RELEASE_BARRIER "" : : : "memory")
-static __inline__ int atomic_read(const atomic_t *v)
+static __inline__ int arch_atomic_read(const atomic_t *v)
{
int t;
@@ -32,13 +32,13 @@ static __inline__ int atomic_read(const atomic_t *v)
return t;
}
-static __inline__ void atomic_set(atomic_t *v, int i)
+static __inline__ void arch_atomic_set(atomic_t *v, int i)
{
__asm__ __volatile__("stw%U0%X0 %1,%0" : "=m"UPD_CONSTR(v->counter) : "r"(i));
}
#define ATOMIC_OP(op, asm_op) \
-static __inline__ void atomic_##op(int a, atomic_t *v) \
+static __inline__ void arch_atomic_##op(int a, atomic_t *v) \
{ \
int t; \
\
@@ -53,7 +53,7 @@ static __inline__ void atomic_##op(int a, atomic_t *v) \
} \
#define ATOMIC_OP_RETURN_RELAXED(op, asm_op) \
-static inline int atomic_##op##_return_relaxed(int a, atomic_t *v) \
+static inline int arch_atomic_##op##_return_relaxed(int a, atomic_t *v) \
{ \
int t; \
\
@@ -70,7 +70,7 @@ static inline int atomic_##op##_return_relaxed(int a, atomic_t *v) \
}
#define ATOMIC_FETCH_OP_RELAXED(op, asm_op) \
-static inline int atomic_fetch_##op##_relaxed(int a, atomic_t *v) \
+static inline int arch_atomic_fetch_##op##_relaxed(int a, atomic_t *v) \
{ \
int res, t; \
\
@@ -94,11 +94,11 @@ static inline int atomic_fetch_##op##_relaxed(int a, atomic_t *v) \
ATOMIC_OPS(add, add)
ATOMIC_OPS(sub, subf)
-#define atomic_add_return_relaxed atomic_add_return_relaxed
-#define atomic_sub_return_relaxed atomic_sub_return_relaxed
+#define arch_atomic_add_return_relaxed arch_atomic_add_return_relaxed
+#define arch_atomic_sub_return_relaxed arch_atomic_sub_return_relaxed
-#define atomic_fetch_add_relaxed atomic_fetch_add_relaxed
-#define atomic_fetch_sub_relaxed atomic_fetch_sub_relaxed
+#define arch_atomic_fetch_add_relaxed arch_atomic_fetch_add_relaxed
+#define arch_atomic_fetch_sub_relaxed arch_atomic_fetch_sub_relaxed
#undef ATOMIC_OPS
#define ATOMIC_OPS(op, asm_op) \
@@ -109,16 +109,16 @@ ATOMIC_OPS(and, and)
ATOMIC_OPS(or, or)
ATOMIC_OPS(xor, xor)
-#define atomic_fetch_and_relaxed atomic_fetch_and_relaxed
-#define atomic_fetch_or_relaxed atomic_fetch_or_relaxed
-#define atomic_fetch_xor_relaxed atomic_fetch_xor_relaxed
+#define arch_atomic_fetch_and_relaxed arch_atomic_fetch_and_relaxed
+#define arch_atomic_fetch_or_relaxed arch_atomic_fetch_or_relaxed
+#define arch_atomic_fetch_xor_relaxed arch_atomic_fetch_xor_relaxed
#undef ATOMIC_OPS
#undef ATOMIC_FETCH_OP_RELAXED
#undef ATOMIC_OP_RETURN_RELAXED
#undef ATOMIC_OP
-static __inline__ void atomic_inc(atomic_t *v)
+static __inline__ void arch_atomic_inc(atomic_t *v)
{
int t;
@@ -131,9 +131,9 @@ static __inline__ void atomic_inc(atomic_t *v)
: "r" (&v->counter)
: "cc", "xer");
}
-#define atomic_inc atomic_inc
+#define arch_atomic_inc arch_atomic_inc
-static __inline__ int atomic_inc_return_relaxed(atomic_t *v)
+static __inline__ int arch_atomic_inc_return_relaxed(atomic_t *v)
{
int t;
@@ -149,7 +149,7 @@ static __inline__ int atomic_inc_return_relaxed(atomic_t *v)
return t;
}
-static __inline__ void atomic_dec(atomic_t *v)
+static __inline__ void arch_atomic_dec(atomic_t *v)
{
int t;
@@ -162,9 +162,9 @@ static __inline__ void atomic_dec(atomic_t *v)
: "r" (&v->counter)
: "cc", "xer");
}
-#define atomic_dec atomic_dec
+#define arch_atomic_dec arch_atomic_dec
-static __inline__ int atomic_dec_return_relaxed(atomic_t *v)
+static __inline__ int arch_atomic_dec_return_relaxed(atomic_t *v)
{
int t;
@@ -180,17 +180,20 @@ static __inline__ int atomic_dec_return_relaxed(atomic_t *v)
return t;
}
-#define atomic_inc_return_relaxed atomic_inc_return_relaxed
-#define atomic_dec_return_relaxed atomic_dec_return_relaxed
+#define arch_atomic_inc_return_relaxed arch_atomic_inc_return_relaxed
+#define arch_atomic_dec_return_relaxed arch_atomic_dec_return_relaxed
-#define atomic_cmpxchg(v, o, n) (cmpxchg(&((v)->counter), (o), (n)))
-#define atomic_cmpxchg_relaxed(v, o, n) \
- cmpxchg_relaxed(&((v)->counter), (o), (n))
-#define atomic_cmpxchg_acquire(v, o, n) \
- cmpxchg_acquire(&((v)->counter), (o), (n))
+#define arch_atomic_cmpxchg(v, o, n) \
+ (arch_cmpxchg(&((v)->counter), (o), (n)))
+#define arch_atomic_cmpxchg_relaxed(v, o, n) \
+ arch_cmpxchg_relaxed(&((v)->counter), (o), (n))
+#define arch_atomic_cmpxchg_acquire(v, o, n) \
+ arch_cmpxchg_acquire(&((v)->counter), (o), (n))
-#define atomic_xchg(v, new) (xchg(&((v)->counter), new))
-#define atomic_xchg_relaxed(v, new) xchg_relaxed(&((v)->counter), (new))
+#define arch_atomic_xchg(v, new) \
+ (arch_xchg(&((v)->counter), new))
+#define arch_atomic_xchg_relaxed(v, new) \
+ arch_xchg_relaxed(&((v)->counter), (new))
/*
* Don't want to override the generic atomic_try_cmpxchg_acquire, because
@@ -199,7 +202,7 @@ static __inline__ int atomic_dec_return_relaxed(atomic_t *v)
* would be a surprise).
*/
static __always_inline bool
-atomic_try_cmpxchg_lock(atomic_t *v, int *old, int new)
+arch_atomic_try_cmpxchg_lock(atomic_t *v, int *old, int new)
{
int r, o = *old;
@@ -229,7 +232,7 @@ atomic_try_cmpxchg_lock(atomic_t *v, int *old, int new)
* Atomically adds @a to @v, so long as it was not @u.
* Returns the old value of @v.
*/
-static __inline__ int atomic_fetch_add_unless(atomic_t *v, int a, int u)
+static __inline__ int arch_atomic_fetch_add_unless(atomic_t *v, int a, int u)
{
int t;
@@ -250,7 +253,7 @@ static __inline__ int atomic_fetch_add_unless(atomic_t *v, int a, int u)
return t;
}
-#define atomic_fetch_add_unless atomic_fetch_add_unless
+#define arch_atomic_fetch_add_unless arch_atomic_fetch_add_unless
/**
* atomic_inc_not_zero - increment unless the number is zero
@@ -259,7 +262,7 @@ static __inline__ int atomic_fetch_add_unless(atomic_t *v, int a, int u)
* Atomically increments @v by 1, so long as @v is non-zero.
* Returns non-zero if @v was non-zero, and zero otherwise.
*/
-static __inline__ int atomic_inc_not_zero(atomic_t *v)
+static __inline__ int arch_atomic_inc_not_zero(atomic_t *v)
{
int t1, t2;
@@ -280,14 +283,14 @@ static __inline__ int atomic_inc_not_zero(atomic_t *v)
return t1;
}
-#define atomic_inc_not_zero(v) atomic_inc_not_zero((v))
+#define arch_atomic_inc_not_zero(v) arch_atomic_inc_not_zero((v))
/*
* Atomically test *v and decrement if it is greater than 0.
* The function returns the old value of *v minus 1, even if
* the atomic variable, v, was not decremented.
*/
-static __inline__ int atomic_dec_if_positive(atomic_t *v)
+static __inline__ int arch_atomic_dec_if_positive(atomic_t *v)
{
int t;
@@ -307,13 +310,13 @@ static __inline__ int atomic_dec_if_positive(atomic_t *v)
return t;
}
-#define atomic_dec_if_positive atomic_dec_if_positive
+#define arch_atomic_dec_if_positive arch_atomic_dec_if_positive
#ifdef __powerpc64__
#define ATOMIC64_INIT(i) { (i) }
-static __inline__ s64 atomic64_read(const atomic64_t *v)
+static __inline__ s64 arch_atomic64_read(const atomic64_t *v)
{
s64 t;
@@ -322,13 +325,13 @@ static __inline__ s64 atomic64_read(const atomic64_t *v)
return t;
}
-static __inline__ void atomic64_set(atomic64_t *v, s64 i)
+static __inline__ void arch_atomic64_set(atomic64_t *v, s64 i)
{
__asm__ __volatile__("std%U0%X0 %1,%0" : "=m"UPD_CONSTR(v->counter) : "r"(i));
}
#define ATOMIC64_OP(op, asm_op) \
-static __inline__ void atomic64_##op(s64 a, atomic64_t *v) \
+static __inline__ void arch_atomic64_##op(s64 a, atomic64_t *v) \
{ \
s64 t; \
\
@@ -344,7 +347,7 @@ static __inline__ void atomic64_##op(s64 a, atomic64_t *v) \
#define ATOMIC64_OP_RETURN_RELAXED(op, asm_op) \
static inline s64 \
-atomic64_##op##_return_relaxed(s64 a, atomic64_t *v) \
+arch_atomic64_##op##_return_relaxed(s64 a, atomic64_t *v) \
{ \
s64 t; \
\
@@ -362,7 +365,7 @@ atomic64_##op##_return_relaxed(s64 a, atomic64_t *v) \
#define ATOMIC64_FETCH_OP_RELAXED(op, asm_op) \
static inline s64 \
-atomic64_fetch_##op##_relaxed(s64 a, atomic64_t *v) \
+arch_atomic64_fetch_##op##_relaxed(s64 a, atomic64_t *v) \
{ \
s64 res, t; \
\
@@ -386,11 +389,11 @@ atomic64_fetch_##op##_relaxed(s64 a, atomic64_t *v) \
ATOMIC64_OPS(add, add)
ATOMIC64_OPS(sub, subf)
-#define atomic64_add_return_relaxed atomic64_add_return_relaxed
-#define atomic64_sub_return_relaxed atomic64_sub_return_relaxed
+#define arch_atomic64_add_return_relaxed arch_atomic64_add_return_relaxed
+#define arch_atomic64_sub_return_relaxed arch_atomic64_sub_return_relaxed
-#define atomic64_fetch_add_relaxed atomic64_fetch_add_relaxed
-#define atomic64_fetch_sub_relaxed atomic64_fetch_sub_relaxed
+#define arch_atomic64_fetch_add_relaxed arch_atomic64_fetch_add_relaxed
+#define arch_atomic64_fetch_sub_relaxed arch_atomic64_fetch_sub_relaxed
#undef ATOMIC64_OPS
#define ATOMIC64_OPS(op, asm_op) \
@@ -401,16 +404,16 @@ ATOMIC64_OPS(and, and)
ATOMIC64_OPS(or, or)
ATOMIC64_OPS(xor, xor)
-#define atomic64_fetch_and_relaxed atomic64_fetch_and_relaxed
-#define atomic64_fetch_or_relaxed atomic64_fetch_or_relaxed
-#define atomic64_fetch_xor_relaxed atomic64_fetch_xor_relaxed
+#define arch_atomic64_fetch_and_relaxed arch_atomic64_fetch_and_relaxed
+#define arch_atomic64_fetch_or_relaxed arch_atomic64_fetch_or_relaxed
+#define arch_atomic64_fetch_xor_relaxed arch_atomic64_fetch_xor_relaxed
#undef ATOPIC64_OPS
#undef ATOMIC64_FETCH_OP_RELAXED
#undef ATOMIC64_OP_RETURN_RELAXED
#undef ATOMIC64_OP
-static __inline__ void atomic64_inc(atomic64_t *v)
+static __inline__ void arch_atomic64_inc(atomic64_t *v)
{
s64 t;
@@ -423,9 +426,9 @@ static __inline__ void atomic64_inc(atomic64_t *v)
: "r" (&v->counter)
: "cc", "xer");
}
-#define atomic64_inc atomic64_inc
+#define arch_atomic64_inc arch_atomic64_inc
-static __inline__ s64 atomic64_inc_return_relaxed(atomic64_t *v)
+static __inline__ s64 arch_atomic64_inc_return_relaxed(atomic64_t *v)
{
s64 t;
@@ -441,7 +444,7 @@ static __inline__ s64 atomic64_inc_return_relaxed(atomic64_t *v)
return t;
}
-static __inline__ void atomic64_dec(atomic64_t *v)
+static __inline__ void arch_atomic64_dec(atomic64_t *v)
{
s64 t;
@@ -454,9 +457,9 @@ static __inline__ void atomic64_dec(atomic64_t *v)
: "r" (&v->counter)
: "cc", "xer");
}
-#define atomic64_dec atomic64_dec
+#define arch_atomic64_dec arch_atomic64_dec
-static __inline__ s64 atomic64_dec_return_relaxed(atomic64_t *v)
+static __inline__ s64 arch_atomic64_dec_return_relaxed(atomic64_t *v)
{
s64 t;
@@ -472,14 +475,14 @@ static __inline__ s64 atomic64_dec_return_relaxed(atomic64_t *v)
return t;
}
-#define atomic64_inc_return_relaxed atomic64_inc_return_relaxed
-#define atomic64_dec_return_relaxed atomic64_dec_return_relaxed
+#define arch_atomic64_inc_return_relaxed arch_atomic64_inc_return_relaxed
+#define arch_atomic64_dec_return_relaxed arch_atomic64_dec_return_relaxed
/*
* Atomically test *v and decrement if it is greater than 0.
* The function returns the old value of *v minus 1.
*/
-static __inline__ s64 atomic64_dec_if_positive(atomic64_t *v)
+static __inline__ s64 arch_atomic64_dec_if_positive(atomic64_t *v)
{
s64 t;
@@ -498,16 +501,19 @@ static __inline__ s64 atomic64_dec_if_positive(atomic64_t *v)
return t;
}
-#define atomic64_dec_if_positive atomic64_dec_if_positive
+#define arch_atomic64_dec_if_positive arch_atomic64_dec_if_positive
-#define atomic64_cmpxchg(v, o, n) (cmpxchg(&((v)->counter), (o), (n)))
-#define atomic64_cmpxchg_relaxed(v, o, n) \
- cmpxchg_relaxed(&((v)->counter), (o), (n))
-#define atomic64_cmpxchg_acquire(v, o, n) \
- cmpxchg_acquire(&((v)->counter), (o), (n))
+#define arch_atomic64_cmpxchg(v, o, n) \
+ (arch_cmpxchg(&((v)->counter), (o), (n)))
+#define arch_atomic64_cmpxchg_relaxed(v, o, n) \
+ arch_cmpxchg_relaxed(&((v)->counter), (o), (n))
+#define arch_atomic64_cmpxchg_acquire(v, o, n) \
+ arch_cmpxchg_acquire(&((v)->counter), (o), (n))
-#define atomic64_xchg(v, new) (xchg(&((v)->counter), new))
-#define atomic64_xchg_relaxed(v, new) xchg_relaxed(&((v)->counter), (new))
+#define arch_atomic64_xchg(v, new) \
+ (arch_xchg(&((v)->counter), new))
+#define arch_atomic64_xchg_relaxed(v, new) \
+ arch_xchg_relaxed(&((v)->counter), (new))
/**
* atomic64_fetch_add_unless - add unless the number is a given value
@@ -518,7 +524,7 @@ static __inline__ s64 atomic64_dec_if_positive(atomic64_t *v)
* Atomically adds @a to @v, so long as it was not @u.
* Returns the old value of @v.
*/
-static __inline__ s64 atomic64_fetch_add_unless(atomic64_t *v, s64 a, s64 u)
+static __inline__ s64 arch_atomic64_fetch_add_unless(atomic64_t *v, s64 a, s64 u)
{
s64 t;
@@ -539,7 +545,7 @@ static __inline__ s64 atomic64_fetch_add_unless(atomic64_t *v, s64 a, s64 u)
return t;
}
-#define atomic64_fetch_add_unless atomic64_fetch_add_unless
+#define arch_atomic64_fetch_add_unless arch_atomic64_fetch_add_unless
/**
* atomic_inc64_not_zero - increment unless the number is zero
@@ -548,7 +554,7 @@ static __inline__ s64 atomic64_fetch_add_unless(atomic64_t *v, s64 a, s64 u)
* Atomically increments @v by 1, so long as @v is non-zero.
* Returns non-zero if @v was non-zero, and zero otherwise.
*/
-static __inline__ int atomic64_inc_not_zero(atomic64_t *v)
+static __inline__ int arch_atomic64_inc_not_zero(atomic64_t *v)
{
s64 t1, t2;
@@ -569,7 +575,7 @@ static __inline__ int atomic64_inc_not_zero(atomic64_t *v)
return t1 != 0;
}
-#define atomic64_inc_not_zero(v) atomic64_inc_not_zero((v))
+#define arch_atomic64_inc_not_zero(v) arch_atomic64_inc_not_zero((v))
#endif /* __powerpc64__ */
diff --git a/arch/powerpc/include/asm/book3s/64/mmu.h b/arch/powerpc/include/asm/book3s/64/mmu.h
index eace8c3f7b0a..c02f42d1031e 100644
--- a/arch/powerpc/include/asm/book3s/64/mmu.h
+++ b/arch/powerpc/include/asm/book3s/64/mmu.h
@@ -19,6 +19,7 @@ struct mmu_psize_def {
int penc[MMU_PAGE_COUNT]; /* HPTE encoding */
unsigned int tlbiel; /* tlbiel supported for that page size */
unsigned long avpnm; /* bits to mask out in AVPN in the HPTE */
+ unsigned long h_rpt_pgsize; /* H_RPT_INVALIDATE page size encoding */
union {
unsigned long sllp; /* SLB L||LP (exact mask to use in slbmte) */
unsigned long ap; /* Ap encoding used by PowerISA 3.0 */
diff --git a/arch/powerpc/include/asm/book3s/64/tlbflush-radix.h b/arch/powerpc/include/asm/book3s/64/tlbflush-radix.h
index 8b33601cdb9d..a46fd37ad552 100644
--- a/arch/powerpc/include/asm/book3s/64/tlbflush-radix.h
+++ b/arch/powerpc/include/asm/book3s/64/tlbflush-radix.h
@@ -4,6 +4,10 @@
#include <asm/hvcall.h>
+#define RIC_FLUSH_TLB 0
+#define RIC_FLUSH_PWC 1
+#define RIC_FLUSH_ALL 2
+
struct vm_area_struct;
struct mm_struct;
struct mmu_gather;
diff --git a/arch/powerpc/include/asm/cmpxchg.h b/arch/powerpc/include/asm/cmpxchg.h
index cf091c4c22e5..05f246c0e36e 100644
--- a/arch/powerpc/include/asm/cmpxchg.h
+++ b/arch/powerpc/include/asm/cmpxchg.h
@@ -185,14 +185,14 @@ __xchg_relaxed(void *ptr, unsigned long x, unsigned int size)
BUILD_BUG_ON_MSG(1, "Unsupported size for __xchg_local");
return x;
}
-#define xchg_local(ptr,x) \
+#define arch_xchg_local(ptr,x) \
({ \
__typeof__(*(ptr)) _x_ = (x); \
(__typeof__(*(ptr))) __xchg_local((ptr), \
(unsigned long)_x_, sizeof(*(ptr))); \
})
-#define xchg_relaxed(ptr, x) \
+#define arch_xchg_relaxed(ptr, x) \
({ \
__typeof__(*(ptr)) _x_ = (x); \
(__typeof__(*(ptr))) __xchg_relaxed((ptr), \
@@ -467,7 +467,7 @@ __cmpxchg_acquire(void *ptr, unsigned long old, unsigned long new,
BUILD_BUG_ON_MSG(1, "Unsupported size for __cmpxchg_acquire");
return old;
}
-#define cmpxchg(ptr, o, n) \
+#define arch_cmpxchg(ptr, o, n) \
({ \
__typeof__(*(ptr)) _o_ = (o); \
__typeof__(*(ptr)) _n_ = (n); \
@@ -476,7 +476,7 @@ __cmpxchg_acquire(void *ptr, unsigned long old, unsigned long new,
})
-#define cmpxchg_local(ptr, o, n) \
+#define arch_cmpxchg_local(ptr, o, n) \
({ \
__typeof__(*(ptr)) _o_ = (o); \
__typeof__(*(ptr)) _n_ = (n); \
@@ -484,7 +484,7 @@ __cmpxchg_acquire(void *ptr, unsigned long old, unsigned long new,
(unsigned long)_n_, sizeof(*(ptr))); \
})
-#define cmpxchg_relaxed(ptr, o, n) \
+#define arch_cmpxchg_relaxed(ptr, o, n) \
({ \
__typeof__(*(ptr)) _o_ = (o); \
__typeof__(*(ptr)) _n_ = (n); \
@@ -493,7 +493,7 @@ __cmpxchg_acquire(void *ptr, unsigned long old, unsigned long new,
sizeof(*(ptr))); \
})
-#define cmpxchg_acquire(ptr, o, n) \
+#define arch_cmpxchg_acquire(ptr, o, n) \
({ \
__typeof__(*(ptr)) _o_ = (o); \
__typeof__(*(ptr)) _n_ = (n); \
@@ -502,29 +502,29 @@ __cmpxchg_acquire(void *ptr, unsigned long old, unsigned long new,
sizeof(*(ptr))); \
})
#ifdef CONFIG_PPC64
-#define cmpxchg64(ptr, o, n) \
+#define arch_cmpxchg64(ptr, o, n) \
({ \
BUILD_BUG_ON(sizeof(*(ptr)) != 8); \
- cmpxchg((ptr), (o), (n)); \
+ arch_cmpxchg((ptr), (o), (n)); \
})
-#define cmpxchg64_local(ptr, o, n) \
+#define arch_cmpxchg64_local(ptr, o, n) \
({ \
BUILD_BUG_ON(sizeof(*(ptr)) != 8); \
- cmpxchg_local((ptr), (o), (n)); \
+ arch_cmpxchg_local((ptr), (o), (n)); \
})
-#define cmpxchg64_relaxed(ptr, o, n) \
+#define arch_cmpxchg64_relaxed(ptr, o, n) \
({ \
BUILD_BUG_ON(sizeof(*(ptr)) != 8); \
- cmpxchg_relaxed((ptr), (o), (n)); \
+ arch_cmpxchg_relaxed((ptr), (o), (n)); \
})
-#define cmpxchg64_acquire(ptr, o, n) \
+#define arch_cmpxchg64_acquire(ptr, o, n) \
({ \
BUILD_BUG_ON(sizeof(*(ptr)) != 8); \
- cmpxchg_acquire((ptr), (o), (n)); \
+ arch_cmpxchg_acquire((ptr), (o), (n)); \
})
#else
#include <asm-generic/cmpxchg-local.h>
-#define cmpxchg64_local(ptr, o, n) __cmpxchg64_local_generic((ptr), (o), (n))
+#define arch_cmpxchg64_local(ptr, o, n) __generic_cmpxchg64_local((ptr), (o), (n))
#endif
#endif /* __KERNEL__ */
diff --git a/arch/powerpc/include/asm/cputhreads.h b/arch/powerpc/include/asm/cputhreads.h
index 98c8bd155bf9..b167186aaee4 100644
--- a/arch/powerpc/include/asm/cputhreads.h
+++ b/arch/powerpc/include/asm/cputhreads.h
@@ -98,6 +98,36 @@ static inline int cpu_last_thread_sibling(int cpu)
return cpu | (threads_per_core - 1);
}
+/*
+ * tlb_thread_siblings are siblings which share a TLB. This is not
+ * architected, is not something a hypervisor could emulate and a future
+ * CPU may change behaviour even in compat mode, so this should only be
+ * used on PowerNV, and only with care.
+ */
+static inline int cpu_first_tlb_thread_sibling(int cpu)
+{
+ if (cpu_has_feature(CPU_FTR_ARCH_300) && (threads_per_core == 8))
+ return cpu & ~0x6; /* Big Core */
+ else
+ return cpu_first_thread_sibling(cpu);
+}
+
+static inline int cpu_last_tlb_thread_sibling(int cpu)
+{
+ if (cpu_has_feature(CPU_FTR_ARCH_300) && (threads_per_core == 8))
+ return cpu | 0x6; /* Big Core */
+ else
+ return cpu_last_thread_sibling(cpu);
+}
+
+static inline int cpu_tlb_thread_sibling_step(void)
+{
+ if (cpu_has_feature(CPU_FTR_ARCH_300) && (threads_per_core == 8))
+ return 2; /* Big Core */
+ else
+ return 1;
+}
+
static inline u32 get_tensr(void)
{
#ifdef CONFIG_BOOKE
diff --git a/arch/powerpc/include/asm/exception-64s.h b/arch/powerpc/include/asm/exception-64s.h
index c1a8aac01cf9..bb6f78fcf981 100644
--- a/arch/powerpc/include/asm/exception-64s.h
+++ b/arch/powerpc/include/asm/exception-64s.h
@@ -35,6 +35,19 @@
/* PACA save area size in u64 units (exgen, exmc, etc) */
#define EX_SIZE 10
+/* PACA save area offsets */
+#define EX_R9 0
+#define EX_R10 8
+#define EX_R11 16
+#define EX_R12 24
+#define EX_R13 32
+#define EX_DAR 40
+#define EX_DSISR 48
+#define EX_CCR 52
+#define EX_CFAR 56
+#define EX_PPR 64
+#define EX_CTR 72
+
/*
* maximum recursive depth of MCE exceptions
*/
diff --git a/arch/powerpc/include/asm/hvcall.h b/arch/powerpc/include/asm/hvcall.h
index 443050906018..7e4b2cef40c2 100644
--- a/arch/powerpc/include/asm/hvcall.h
+++ b/arch/powerpc/include/asm/hvcall.h
@@ -413,9 +413,9 @@
#define H_RPTI_TYPE_NESTED 0x0001 /* Invalidate nested guest partition-scope */
#define H_RPTI_TYPE_TLB 0x0002 /* Invalidate TLB */
#define H_RPTI_TYPE_PWC 0x0004 /* Invalidate Page Walk Cache */
-/* Invalidate Process Table Entries if H_RPTI_TYPE_NESTED is clear */
+/* Invalidate caching of Process Table Entries if H_RPTI_TYPE_NESTED is clear */
#define H_RPTI_TYPE_PRT 0x0008
-/* Invalidate Partition Table Entries if H_RPTI_TYPE_NESTED is set */
+/* Invalidate caching of Partition Table Entries if H_RPTI_TYPE_NESTED is set */
#define H_RPTI_TYPE_PAT 0x0008
#define H_RPTI_TYPE_ALL (H_RPTI_TYPE_TLB | H_RPTI_TYPE_PWC | \
H_RPTI_TYPE_PRT)
@@ -448,6 +448,9 @@
*/
long plpar_hcall_norets(unsigned long opcode, ...);
+/* Variant which does not do hcall tracing */
+long plpar_hcall_norets_notrace(unsigned long opcode, ...);
+
/**
* plpar_hcall: - Make a pseries hypervisor call
* @opcode: The hypervisor call to make.
diff --git a/arch/powerpc/include/asm/interrupt.h b/arch/powerpc/include/asm/interrupt.h
index 44cde2e129b8..59f704408d65 100644
--- a/arch/powerpc/include/asm/interrupt.h
+++ b/arch/powerpc/include/asm/interrupt.h
@@ -153,8 +153,6 @@ static inline void interrupt_enter_prepare(struct pt_regs *regs, struct interrup
*/
static inline void interrupt_exit_prepare(struct pt_regs *regs, struct interrupt_state *state)
{
- if (user_mode(regs))
- kuep_unlock();
}
static inline void interrupt_async_enter_prepare(struct pt_regs *regs, struct interrupt_state *state)
@@ -222,6 +220,13 @@ static inline void interrupt_nmi_enter_prepare(struct pt_regs *regs, struct inte
local_paca->irq_soft_mask = IRQS_ALL_DISABLED;
local_paca->irq_happened |= PACA_IRQ_HARD_DIS;
+ if (IS_ENABLED(CONFIG_PPC_BOOK3S_64) && !(regs->msr & MSR_PR) &&
+ regs->nip < (unsigned long)__end_interrupts) {
+ // Kernel code running below __end_interrupts is
+ // implicitly soft-masked.
+ regs->softe = IRQS_ALL_DISABLED;
+ }
+
/* Don't do any per-CPU operations until interrupt state is fixed */
if (nmi_disables_ftrace(regs)) {
diff --git a/arch/powerpc/include/asm/irq.h b/arch/powerpc/include/asm/irq.h
index b2bd58830430..4982f3711fc3 100644
--- a/arch/powerpc/include/asm/irq.h
+++ b/arch/powerpc/include/asm/irq.h
@@ -6,7 +6,6 @@
/*
*/
-#include <linux/irqdomain.h>
#include <linux/threads.h>
#include <linux/list.h>
#include <linux/radix-tree.h>
@@ -23,8 +22,8 @@ extern atomic_t ppc_n_lost_interrupts;
/* Total number of virq in the platform */
#define NR_IRQS CONFIG_NR_IRQS
-/* Same thing, used by the generic IRQ code */
-#define NR_IRQS_LEGACY NUM_ISA_INTERRUPTS
+/* Number of irqs reserved for a legacy isa controller */
+#define NR_IRQS_LEGACY 16
extern irq_hw_number_t virq_to_hw(unsigned int virq);
diff --git a/arch/powerpc/include/asm/jump_label.h b/arch/powerpc/include/asm/jump_label.h
index 2d5c6bec2b4f..93ce3ec25387 100644
--- a/arch/powerpc/include/asm/jump_label.h
+++ b/arch/powerpc/include/asm/jump_label.h
@@ -50,7 +50,7 @@ l_yes:
1098: nop; \
.pushsection __jump_table, "aw"; \
.long 1098b - ., LABEL - .; \
- FTR_ENTRY_LONG KEY; \
+ FTR_ENTRY_LONG KEY - .; \
.popsection
#endif
diff --git a/arch/powerpc/include/asm/kvm_asm.h b/arch/powerpc/include/asm/kvm_asm.h
index a3633560493b..fbbf3cec92e9 100644
--- a/arch/powerpc/include/asm/kvm_asm.h
+++ b/arch/powerpc/include/asm/kvm_asm.h
@@ -147,6 +147,7 @@
#define KVM_GUEST_MODE_SKIP 2
#define KVM_GUEST_MODE_GUEST_HV 3
#define KVM_GUEST_MODE_HOST_HV 4
+#define KVM_GUEST_MODE_HV_P9 5 /* ISA >= v3.0 path */
#define KVM_INST_FETCH_FAILED -1
diff --git a/arch/powerpc/include/asm/kvm_book3s.h b/arch/powerpc/include/asm/kvm_book3s.h
index e6b53c6e21e3..caaa0f592d8e 100644
--- a/arch/powerpc/include/asm/kvm_book3s.h
+++ b/arch/powerpc/include/asm/kvm_book3s.h
@@ -307,6 +307,9 @@ void kvmhv_set_ptbl_entry(unsigned int lpid, u64 dw0, u64 dw1);
void kvmhv_release_all_nested(struct kvm *kvm);
long kvmhv_enter_nested_guest(struct kvm_vcpu *vcpu);
long kvmhv_do_nested_tlbie(struct kvm_vcpu *vcpu);
+long do_h_rpt_invalidate_pat(struct kvm_vcpu *vcpu, unsigned long lpid,
+ unsigned long type, unsigned long pg_sizes,
+ unsigned long start, unsigned long end);
int kvmhv_run_single_vcpu(struct kvm_vcpu *vcpu,
u64 time_limit, unsigned long lpcr);
void kvmhv_save_hv_regs(struct kvm_vcpu *vcpu, struct hv_guest_state *hr);
diff --git a/arch/powerpc/include/asm/kvm_book3s_64.h b/arch/powerpc/include/asm/kvm_book3s_64.h
index 9bb9bb370b53..eaf3a562bf1e 100644
--- a/arch/powerpc/include/asm/kvm_book3s_64.h
+++ b/arch/powerpc/include/asm/kvm_book3s_64.h
@@ -153,10 +153,18 @@ static inline bool kvmhv_vcpu_is_radix(struct kvm_vcpu *vcpu)
return radix;
}
+int kvmhv_vcpu_entry_p9(struct kvm_vcpu *vcpu, u64 time_limit, unsigned long lpcr);
+
#define KVM_DEFAULT_HPT_ORDER 24 /* 16MB HPT by default */
#endif
/*
+ * Invalid HDSISR value which is used to indicate when HW has not set the reg.
+ * Used to work around an errata.
+ */
+#define HDSISR_CANARY 0x7fff
+
+/*
* We use a lock bit in HPTE dword 0 to synchronize updates and
* accesses to each HPTE, and another bit to indicate non-present
* HPTEs.
diff --git a/arch/powerpc/include/asm/kvm_host.h b/arch/powerpc/include/asm/kvm_host.h
index 1e83359f286b..9f52f282b1aa 100644
--- a/arch/powerpc/include/asm/kvm_host.h
+++ b/arch/powerpc/include/asm/kvm_host.h
@@ -51,6 +51,7 @@
/* PPC-specific vcpu->requests bit members */
#define KVM_REQ_WATCHDOG KVM_ARCH_REQ(0)
#define KVM_REQ_EPR_EXIT KVM_ARCH_REQ(1)
+#define KVM_REQ_PENDING_TIMER KVM_ARCH_REQ(2)
#include <linux/mmu_notifier.h>
@@ -80,12 +81,13 @@ struct kvmppc_book3s_shadow_vcpu;
struct kvm_nested_guest;
struct kvm_vm_stat {
- ulong remote_tlb_flush;
- ulong num_2M_pages;
- ulong num_1G_pages;
+ struct kvm_vm_stat_generic generic;
+ u64 num_2M_pages;
+ u64 num_1G_pages;
};
struct kvm_vcpu_stat {
+ struct kvm_vcpu_stat_generic generic;
u64 sum_exits;
u64 mmio_exits;
u64 signal_exits;
@@ -101,14 +103,8 @@ struct kvm_vcpu_stat {
u64 emulated_inst_exits;
u64 dec_exits;
u64 ext_intr_exits;
- u64 halt_poll_success_ns;
- u64 halt_poll_fail_ns;
u64 halt_wait_ns;
- u64 halt_successful_poll;
- u64 halt_attempted_poll;
u64 halt_successful_wait;
- u64 halt_poll_invalid;
- u64 halt_wakeup;
u64 dbell_exits;
u64 gdbell_exits;
u64 ld;
@@ -297,7 +293,6 @@ struct kvm_arch {
u8 fwnmi_enabled;
u8 secure_guest;
u8 svm_enabled;
- bool threads_indep;
bool nested_enable;
bool dawr1_enabled;
pgd_t *pgtable;
@@ -683,7 +678,12 @@ struct kvm_vcpu_arch {
ulong fault_dar;
u32 fault_dsisr;
unsigned long intr_msr;
- ulong fault_gpa; /* guest real address of page fault (POWER9) */
+ /*
+ * POWER9 and later: fault_gpa contains the guest real address of page
+ * fault for a radix guest, or segment descriptor (equivalent to result
+ * from slbmfev of SLB entry that translated the EA) for hash guests.
+ */
+ ulong fault_gpa;
#endif
#ifdef CONFIG_BOOKE
diff --git a/arch/powerpc/include/asm/kvm_ppc.h b/arch/powerpc/include/asm/kvm_ppc.h
index 5bf8ae9bb2cc..2d88944f9f34 100644
--- a/arch/powerpc/include/asm/kvm_ppc.h
+++ b/arch/powerpc/include/asm/kvm_ppc.h
@@ -129,6 +129,7 @@ extern void kvmppc_core_vcpu_put(struct kvm_vcpu *vcpu);
extern int kvmppc_core_prepare_to_enter(struct kvm_vcpu *vcpu);
extern int kvmppc_core_pending_dec(struct kvm_vcpu *vcpu);
extern void kvmppc_core_queue_machine_check(struct kvm_vcpu *vcpu, ulong flags);
+extern void kvmppc_core_queue_syscall(struct kvm_vcpu *vcpu);
extern void kvmppc_core_queue_program(struct kvm_vcpu *vcpu, ulong flags);
extern void kvmppc_core_queue_fpunavail(struct kvm_vcpu *vcpu);
extern void kvmppc_core_queue_vec_unavail(struct kvm_vcpu *vcpu);
@@ -606,6 +607,7 @@ extern void kvmppc_free_pimap(struct kvm *kvm);
extern int kvmppc_xics_rm_complete(struct kvm_vcpu *vcpu, u32 hcall);
extern void kvmppc_xics_free_icp(struct kvm_vcpu *vcpu);
extern int kvmppc_xics_hcall(struct kvm_vcpu *vcpu, u32 cmd);
+extern int kvmppc_xive_xics_hcall(struct kvm_vcpu *vcpu, u32 req);
extern u64 kvmppc_xics_get_icp(struct kvm_vcpu *vcpu);
extern int kvmppc_xics_set_icp(struct kvm_vcpu *vcpu, u64 icpval);
extern int kvmppc_xics_connect_vcpu(struct kvm_device *dev,
@@ -638,6 +640,8 @@ static inline int kvmppc_xics_enabled(struct kvm_vcpu *vcpu)
static inline void kvmppc_xics_free_icp(struct kvm_vcpu *vcpu) { }
static inline int kvmppc_xics_hcall(struct kvm_vcpu *vcpu, u32 cmd)
{ return 0; }
+static inline int kvmppc_xive_xics_hcall(struct kvm_vcpu *vcpu, u32 req)
+ { return 0; }
#endif
#ifdef CONFIG_KVM_XIVE
@@ -655,8 +659,6 @@ extern int kvmppc_xive_get_xive(struct kvm *kvm, u32 irq, u32 *server,
u32 *priority);
extern int kvmppc_xive_int_on(struct kvm *kvm, u32 irq);
extern int kvmppc_xive_int_off(struct kvm *kvm, u32 irq);
-extern void kvmppc_xive_init_module(void);
-extern void kvmppc_xive_exit_module(void);
extern int kvmppc_xive_connect_vcpu(struct kvm_device *dev,
struct kvm_vcpu *vcpu, u32 cpu);
@@ -671,6 +673,8 @@ extern int kvmppc_xive_set_icp(struct kvm_vcpu *vcpu, u64 icpval);
extern int kvmppc_xive_set_irq(struct kvm *kvm, int irq_source_id, u32 irq,
int level, bool line_status);
extern void kvmppc_xive_push_vcpu(struct kvm_vcpu *vcpu);
+extern void kvmppc_xive_pull_vcpu(struct kvm_vcpu *vcpu);
+extern void kvmppc_xive_rearm_escalation(struct kvm_vcpu *vcpu);
static inline int kvmppc_xive_enabled(struct kvm_vcpu *vcpu)
{
@@ -680,8 +684,6 @@ static inline int kvmppc_xive_enabled(struct kvm_vcpu *vcpu)
extern int kvmppc_xive_native_connect_vcpu(struct kvm_device *dev,
struct kvm_vcpu *vcpu, u32 cpu);
extern void kvmppc_xive_native_cleanup_vcpu(struct kvm_vcpu *vcpu);
-extern void kvmppc_xive_native_init_module(void);
-extern void kvmppc_xive_native_exit_module(void);
extern int kvmppc_xive_native_get_vp(struct kvm_vcpu *vcpu,
union kvmppc_one_reg *val);
extern int kvmppc_xive_native_set_vp(struct kvm_vcpu *vcpu,
@@ -695,8 +697,6 @@ static inline int kvmppc_xive_get_xive(struct kvm *kvm, u32 irq, u32 *server,
u32 *priority) { return -1; }
static inline int kvmppc_xive_int_on(struct kvm *kvm, u32 irq) { return -1; }
static inline int kvmppc_xive_int_off(struct kvm *kvm, u32 irq) { return -1; }
-static inline void kvmppc_xive_init_module(void) { }
-static inline void kvmppc_xive_exit_module(void) { }
static inline int kvmppc_xive_connect_vcpu(struct kvm_device *dev,
struct kvm_vcpu *vcpu, u32 cpu) { return -EBUSY; }
@@ -711,14 +711,14 @@ static inline int kvmppc_xive_set_icp(struct kvm_vcpu *vcpu, u64 icpval) { retur
static inline int kvmppc_xive_set_irq(struct kvm *kvm, int irq_source_id, u32 irq,
int level, bool line_status) { return -ENODEV; }
static inline void kvmppc_xive_push_vcpu(struct kvm_vcpu *vcpu) { }
+static inline void kvmppc_xive_pull_vcpu(struct kvm_vcpu *vcpu) { }
+static inline void kvmppc_xive_rearm_escalation(struct kvm_vcpu *vcpu) { }
static inline int kvmppc_xive_enabled(struct kvm_vcpu *vcpu)
{ return 0; }
static inline int kvmppc_xive_native_connect_vcpu(struct kvm_device *dev,
struct kvm_vcpu *vcpu, u32 cpu) { return -EBUSY; }
static inline void kvmppc_xive_native_cleanup_vcpu(struct kvm_vcpu *vcpu) { }
-static inline void kvmppc_xive_native_init_module(void) { }
-static inline void kvmppc_xive_native_exit_module(void) { }
static inline int kvmppc_xive_native_get_vp(struct kvm_vcpu *vcpu,
union kvmppc_one_reg *val)
{ return 0; }
@@ -754,7 +754,7 @@ long kvmppc_rm_h_stuff_tce(struct kvm_vcpu *vcpu,
unsigned long tce_value, unsigned long npages);
long int kvmppc_rm_h_confer(struct kvm_vcpu *vcpu, int target,
unsigned int yield_count);
-long kvmppc_h_random(struct kvm_vcpu *vcpu);
+long kvmppc_rm_h_random(struct kvm_vcpu *vcpu);
void kvmhv_commence_exit(int trap);
void kvmppc_realmode_machine_check(struct kvm_vcpu *vcpu);
void kvmppc_subcore_enter_guest(void);
diff --git a/arch/powerpc/include/asm/mmu_context.h b/arch/powerpc/include/asm/mmu_context.h
index 4bc45d3ed8b0..db186c539d37 100644
--- a/arch/powerpc/include/asm/mmu_context.h
+++ b/arch/powerpc/include/asm/mmu_context.h
@@ -122,12 +122,6 @@ static inline bool need_extra_context(struct mm_struct *mm, unsigned long ea)
}
#endif
-#if defined(CONFIG_KVM_BOOK3S_HV_POSSIBLE) && defined(CONFIG_PPC_RADIX_MMU)
-extern void radix_kvm_prefetch_workaround(struct mm_struct *mm);
-#else
-static inline void radix_kvm_prefetch_workaround(struct mm_struct *mm) { }
-#endif
-
extern void switch_cop(struct mm_struct *next);
extern int use_cop(unsigned long acop, struct mm_struct *mm);
extern void drop_cop(unsigned long acop, struct mm_struct *mm);
@@ -222,6 +216,18 @@ static inline void mm_context_add_copro(struct mm_struct *mm) { }
static inline void mm_context_remove_copro(struct mm_struct *mm) { }
#endif
+#if defined(CONFIG_KVM_BOOK3S_HV_POSSIBLE) && defined(CONFIG_PPC_RADIX_MMU)
+void do_h_rpt_invalidate_prt(unsigned long pid, unsigned long lpid,
+ unsigned long type, unsigned long pg_sizes,
+ unsigned long start, unsigned long end);
+#else
+static inline void do_h_rpt_invalidate_prt(unsigned long pid,
+ unsigned long lpid,
+ unsigned long type,
+ unsigned long pg_sizes,
+ unsigned long start,
+ unsigned long end) { }
+#endif
extern void switch_mm_irqs_off(struct mm_struct *prev, struct mm_struct *next,
struct task_struct *tsk);
diff --git a/arch/powerpc/include/asm/mmzone.h b/arch/powerpc/include/asm/mmzone.h
index 6cda76b57c5d..4c6c6dbd182f 100644
--- a/arch/powerpc/include/asm/mmzone.h
+++ b/arch/powerpc/include/asm/mmzone.h
@@ -18,7 +18,7 @@
* flags field of the struct page
*/
-#ifdef CONFIG_NEED_MULTIPLE_NODES
+#ifdef CONFIG_NUMA
extern struct pglist_data *node_data[];
/*
@@ -41,7 +41,7 @@ u64 memory_hotplug_max(void);
#else
#define memory_hotplug_max() memblock_end_of_DRAM()
-#endif /* CONFIG_NEED_MULTIPLE_NODES */
+#endif /* CONFIG_NUMA */
#ifdef CONFIG_FA_DUMP
#define __HAVE_ARCH_RESERVED_KERNEL_PAGES
#endif
diff --git a/arch/powerpc/include/asm/paravirt.h b/arch/powerpc/include/asm/paravirt.h
index 5d1726bb28e7..bcb7b5f917be 100644
--- a/arch/powerpc/include/asm/paravirt.h
+++ b/arch/powerpc/include/asm/paravirt.h
@@ -28,19 +28,35 @@ static inline u32 yield_count_of(int cpu)
return be32_to_cpu(yield_count);
}
+/*
+ * Spinlock code confers and prods, so don't trace the hcalls because the
+ * tracing code takes spinlocks which can cause recursion deadlocks.
+ *
+ * These calls are made while the lock is not held: the lock slowpath yields if
+ * it can not acquire the lock, and unlock slow path might prod if a waiter has
+ * yielded). So this may not be a problem for simple spin locks because the
+ * tracing does not technically recurse on the lock, but we avoid it anyway.
+ *
+ * However the queued spin lock contended path is more strictly ordered: the
+ * H_CONFER hcall is made after the task has queued itself on the lock, so then
+ * recursing on that lock will cause the task to then queue up again behind the
+ * first instance (or worse: queued spinlocks use tricks that assume a context
+ * never waits on more than one spinlock, so such recursion may cause random
+ * corruption in the lock code).
+ */
static inline void yield_to_preempted(int cpu, u32 yield_count)
{
- plpar_hcall_norets(H_CONFER, get_hard_smp_processor_id(cpu), yield_count);
+ plpar_hcall_norets_notrace(H_CONFER, get_hard_smp_processor_id(cpu), yield_count);
}
static inline void prod_cpu(int cpu)
{
- plpar_hcall_norets(H_PROD, get_hard_smp_processor_id(cpu));
+ plpar_hcall_norets_notrace(H_PROD, get_hard_smp_processor_id(cpu));
}
static inline void yield_to_any(void)
{
- plpar_hcall_norets(H_CONFER, -1, 0);
+ plpar_hcall_norets_notrace(H_CONFER, -1, 0);
}
#else
static inline bool is_shared_processor(void)
diff --git a/arch/powerpc/include/asm/plpar_wrappers.h b/arch/powerpc/include/asm/plpar_wrappers.h
index ece84a430701..83e0f701ebc6 100644
--- a/arch/powerpc/include/asm/plpar_wrappers.h
+++ b/arch/powerpc/include/asm/plpar_wrappers.h
@@ -28,7 +28,11 @@ static inline void set_cede_latency_hint(u8 latency_hint)
static inline long cede_processor(void)
{
- return plpar_hcall_norets(H_CEDE);
+ /*
+ * We cannot call tracepoints inside RCU idle regions which
+ * means we must not trace H_CEDE.
+ */
+ return plpar_hcall_norets_notrace(H_CEDE);
}
static inline long extended_cede_processor(unsigned long latency_hint)
diff --git a/arch/powerpc/include/asm/pte-walk.h b/arch/powerpc/include/asm/pte-walk.h
index 33fa5dd8ee6a..714a35f0d425 100644
--- a/arch/powerpc/include/asm/pte-walk.h
+++ b/arch/powerpc/include/asm/pte-walk.h
@@ -31,6 +31,35 @@ static inline pte_t *find_init_mm_pte(unsigned long ea, unsigned *hshift)
pgd_t *pgdir = init_mm.pgd;
return __find_linux_pte(pgdir, ea, NULL, hshift);
}
+
+/*
+ * Convert a kernel vmap virtual address (vmalloc or ioremap space) to a
+ * physical address, without taking locks. This can be used in real-mode.
+ */
+static inline phys_addr_t ppc_find_vmap_phys(unsigned long addr)
+{
+ pte_t *ptep;
+ phys_addr_t pa;
+ int hugepage_shift;
+
+ /*
+ * init_mm does not free page tables, and does not do THP. It may
+ * have huge pages from huge vmalloc / ioremap etc.
+ */
+ ptep = find_init_mm_pte(addr, &hugepage_shift);
+ if (WARN_ON(!ptep))
+ return 0;
+
+ pa = PFN_PHYS(pte_pfn(*ptep));
+
+ if (!hugepage_shift)
+ hugepage_shift = PAGE_SHIFT;
+
+ pa |= addr & ((1ul << hugepage_shift) - 1);
+
+ return pa;
+}
+
/*
* This is what we should always use. Any other lockless page table lookup needs
* careful audit against THP split.
diff --git a/arch/powerpc/include/asm/ptrace.h b/arch/powerpc/include/asm/ptrace.h
index 9c9ab2746168..b476a685f066 100644
--- a/arch/powerpc/include/asm/ptrace.h
+++ b/arch/powerpc/include/asm/ptrace.h
@@ -19,6 +19,7 @@
#ifndef _ASM_POWERPC_PTRACE_H
#define _ASM_POWERPC_PTRACE_H
+#include <linux/err.h>
#include <uapi/asm/ptrace.h>
#include <asm/asm-const.h>
@@ -152,25 +153,6 @@ extern unsigned long profile_pc(struct pt_regs *regs);
long do_syscall_trace_enter(struct pt_regs *regs);
void do_syscall_trace_leave(struct pt_regs *regs);
-#define kernel_stack_pointer(regs) ((regs)->gpr[1])
-static inline int is_syscall_success(struct pt_regs *regs)
-{
- return !(regs->ccr & 0x10000000);
-}
-
-static inline long regs_return_value(struct pt_regs *regs)
-{
- if (is_syscall_success(regs))
- return regs->gpr[3];
- else
- return -regs->gpr[3];
-}
-
-static inline void regs_set_return_value(struct pt_regs *regs, unsigned long rc)
-{
- regs->gpr[3] = rc;
-}
-
#ifdef __powerpc64__
#define user_mode(regs) ((((regs)->msr) >> MSR_PR_LG) & 0x1)
#else
@@ -235,6 +217,31 @@ static __always_inline void set_trap_norestart(struct pt_regs *regs)
regs->trap |= 0x1;
}
+#define kernel_stack_pointer(regs) ((regs)->gpr[1])
+static inline int is_syscall_success(struct pt_regs *regs)
+{
+ if (trap_is_scv(regs))
+ return !IS_ERR_VALUE((unsigned long)regs->gpr[3]);
+ else
+ return !(regs->ccr & 0x10000000);
+}
+
+static inline long regs_return_value(struct pt_regs *regs)
+{
+ if (trap_is_scv(regs))
+ return regs->gpr[3];
+
+ if (is_syscall_success(regs))
+ return regs->gpr[3];
+ else
+ return -regs->gpr[3];
+}
+
+static inline void regs_set_return_value(struct pt_regs *regs, unsigned long rc)
+{
+ regs->gpr[3] = rc;
+}
+
#define arch_has_single_step() (1)
#define arch_has_block_step() (true)
#define ARCH_HAS_USER_SINGLE_STEP_REPORT
diff --git a/arch/powerpc/include/asm/qspinlock.h b/arch/powerpc/include/asm/qspinlock.h
index 07318bc63e3d..b676c4fb90fd 100644
--- a/arch/powerpc/include/asm/qspinlock.h
+++ b/arch/powerpc/include/asm/qspinlock.h
@@ -37,7 +37,7 @@ static __always_inline void queued_spin_lock(struct qspinlock *lock)
{
u32 val = 0;
- if (likely(atomic_try_cmpxchg_lock(&lock->val, &val, _Q_LOCKED_VAL)))
+ if (likely(arch_atomic_try_cmpxchg_lock(&lock->val, &val, _Q_LOCKED_VAL)))
return;
queued_spin_lock_slowpath(lock, val);
diff --git a/arch/powerpc/include/asm/syscall.h b/arch/powerpc/include/asm/syscall.h
index fd1b518eed17..ba0f88f3a30d 100644
--- a/arch/powerpc/include/asm/syscall.h
+++ b/arch/powerpc/include/asm/syscall.h
@@ -41,11 +41,17 @@ static inline void syscall_rollback(struct task_struct *task,
static inline long syscall_get_error(struct task_struct *task,
struct pt_regs *regs)
{
- /*
- * If the system call failed,
- * regs->gpr[3] contains a positive ERRORCODE.
- */
- return (regs->ccr & 0x10000000UL) ? -regs->gpr[3] : 0;
+ if (trap_is_scv(regs)) {
+ unsigned long error = regs->gpr[3];
+
+ return IS_ERR_VALUE(error) ? error : 0;
+ } else {
+ /*
+ * If the system call failed,
+ * regs->gpr[3] contains a positive ERRORCODE.
+ */
+ return (regs->ccr & 0x10000000UL) ? -regs->gpr[3] : 0;
+ }
}
static inline long syscall_get_return_value(struct task_struct *task,
@@ -58,18 +64,22 @@ static inline void syscall_set_return_value(struct task_struct *task,
struct pt_regs *regs,
int error, long val)
{
- /*
- * In the general case it's not obvious that we must deal with CCR
- * here, as the syscall exit path will also do that for us. However
- * there are some places, eg. the signal code, which check ccr to
- * decide if the value in r3 is actually an error.
- */
- if (error) {
- regs->ccr |= 0x10000000L;
- regs->gpr[3] = error;
+ if (trap_is_scv(regs)) {
+ regs->gpr[3] = (long) error ?: val;
} else {
- regs->ccr &= ~0x10000000L;
- regs->gpr[3] = val;
+ /*
+ * In the general case it's not obvious that we must deal with
+ * CCR here, as the syscall exit path will also do that for us.
+ * However there are some places, eg. the signal code, which
+ * check ccr to decide if the value in r3 is actually an error.
+ */
+ if (error) {
+ regs->ccr |= 0x10000000L;
+ regs->gpr[3] = error;
+ } else {
+ regs->ccr &= ~0x10000000L;
+ regs->gpr[3] = val;
+ }
}
}
diff --git a/arch/powerpc/include/asm/time.h b/arch/powerpc/include/asm/time.h
index 8dd3cdb25338..8c2c3dd4ddba 100644
--- a/arch/powerpc/include/asm/time.h
+++ b/arch/powerpc/include/asm/time.h
@@ -97,6 +97,18 @@ extern void div128_by_32(u64 dividend_high, u64 dividend_low,
extern void secondary_cpu_time_init(void);
extern void __init time_init(void);
+#ifdef CONFIG_PPC64
+static inline unsigned long test_irq_work_pending(void)
+{
+ unsigned long x;
+
+ asm volatile("lbz %0,%1(13)"
+ : "=r" (x)
+ : "i" (offsetof(struct paca_struct, irq_work_pending)));
+ return x;
+}
+#endif
+
DECLARE_PER_CPU(u64, decrementers_next_tb);
/* Convert timebase ticks to nanoseconds */
diff --git a/arch/powerpc/include/asm/uaccess.h b/arch/powerpc/include/asm/uaccess.h
index a09e4240c5b1..22c79ab40006 100644
--- a/arch/powerpc/include/asm/uaccess.h
+++ b/arch/powerpc/include/asm/uaccess.h
@@ -157,7 +157,7 @@ do { \
"2: lwz%X1 %L0, %L1\n" \
EX_TABLE(1b, %l2) \
EX_TABLE(2b, %l2) \
- : "=r" (x) \
+ : "=&r" (x) \
: "m" (*addr) \
: \
: label)
diff --git a/arch/powerpc/kernel/asm-offsets.c b/arch/powerpc/kernel/asm-offsets.c
index 28af4efb4587..aa267d173ded 100644
--- a/arch/powerpc/kernel/asm-offsets.c
+++ b/arch/powerpc/kernel/asm-offsets.c
@@ -534,7 +534,6 @@ int main(void)
OFFSET(VCPU_SLB_NR, kvm_vcpu, arch.slb_nr);
OFFSET(VCPU_FAULT_DSISR, kvm_vcpu, arch.fault_dsisr);
OFFSET(VCPU_FAULT_DAR, kvm_vcpu, arch.fault_dar);
- OFFSET(VCPU_FAULT_GPA, kvm_vcpu, arch.fault_gpa);
OFFSET(VCPU_INTR_MSR, kvm_vcpu, arch.intr_msr);
OFFSET(VCPU_LAST_INST, kvm_vcpu, arch.last_inst);
OFFSET(VCPU_TRAP, kvm_vcpu, arch.trap);
diff --git a/arch/powerpc/kernel/eeh.c b/arch/powerpc/kernel/eeh.c
index f24cd53ff26e..3bbdcc86d01b 100644
--- a/arch/powerpc/kernel/eeh.c
+++ b/arch/powerpc/kernel/eeh.c
@@ -346,28 +346,7 @@ void eeh_slot_error_detail(struct eeh_pe *pe, int severity)
*/
static inline unsigned long eeh_token_to_phys(unsigned long token)
{
- pte_t *ptep;
- unsigned long pa;
- int hugepage_shift;
-
- /*
- * We won't find hugepages here(this is iomem). Hence we are not
- * worried about _PAGE_SPLITTING/collapse. Also we will not hit
- * page table free, because of init_mm.
- */
- ptep = find_init_mm_pte(token, &hugepage_shift);
- if (!ptep)
- return token;
-
- pa = pte_pfn(*ptep);
-
- /* On radix we can do hugepage mappings for io, so handle that */
- if (!hugepage_shift)
- hugepage_shift = PAGE_SHIFT;
-
- pa <<= PAGE_SHIFT;
- pa |= token & ((1ul << hugepage_shift) - 1);
- return pa;
+ return ppc_find_vmap_phys(token);
}
/*
diff --git a/arch/powerpc/kernel/exceptions-64e.S b/arch/powerpc/kernel/exceptions-64e.S
index 7c3654b0d0f4..f1ae710274bc 100644
--- a/arch/powerpc/kernel/exceptions-64e.S
+++ b/arch/powerpc/kernel/exceptions-64e.S
@@ -340,6 +340,12 @@ ret_from_mc_except:
andi. r10,r10,IRQS_DISABLED; /* yes -> go out of line */ \
bne masked_interrupt_book3e_##n
+/*
+ * Additional regs must be re-loaded from paca before EXCEPTION_COMMON* is
+ * called, because that does SAVE_NVGPRS which must see the original register
+ * values, otherwise the scratch values might be restored when exiting the
+ * interrupt.
+ */
#define PROLOG_ADDITION_2REGS_GEN(n) \
std r14,PACA_EXGEN+EX_R14(r13); \
std r15,PACA_EXGEN+EX_R15(r13)
@@ -535,6 +541,10 @@ __end_interrupts:
PROLOG_ADDITION_2REGS)
mfspr r14,SPRN_DEAR
mfspr r15,SPRN_ESR
+ std r14,_DAR(r1)
+ std r15,_DSISR(r1)
+ ld r14,PACA_EXGEN+EX_R14(r13)
+ ld r15,PACA_EXGEN+EX_R15(r13)
EXCEPTION_COMMON(0x300)
b storage_fault_common
@@ -544,6 +554,10 @@ __end_interrupts:
PROLOG_ADDITION_2REGS)
li r15,0
mr r14,r10
+ std r14,_DAR(r1)
+ std r15,_DSISR(r1)
+ ld r14,PACA_EXGEN+EX_R14(r13)
+ ld r15,PACA_EXGEN+EX_R15(r13)
EXCEPTION_COMMON(0x400)
b storage_fault_common
@@ -557,6 +571,10 @@ __end_interrupts:
PROLOG_ADDITION_2REGS)
mfspr r14,SPRN_DEAR
mfspr r15,SPRN_ESR
+ std r14,_DAR(r1)
+ std r15,_DSISR(r1)
+ ld r14,PACA_EXGEN+EX_R14(r13)
+ ld r15,PACA_EXGEN+EX_R15(r13)
EXCEPTION_COMMON(0x600)
b alignment_more /* no room, go out of line */
@@ -565,10 +583,10 @@ __end_interrupts:
NORMAL_EXCEPTION_PROLOG(0x700, BOOKE_INTERRUPT_PROGRAM,
PROLOG_ADDITION_1REG)
mfspr r14,SPRN_ESR
- EXCEPTION_COMMON(0x700)
std r14,_DSISR(r1)
- addi r3,r1,STACK_FRAME_OVERHEAD
ld r14,PACA_EXGEN+EX_R14(r13)
+ EXCEPTION_COMMON(0x700)
+ addi r3,r1,STACK_FRAME_OVERHEAD
bl program_check_exception
REST_NVGPRS(r1)
b interrupt_return
@@ -725,11 +743,11 @@ END_FTR_SECTION_IFSET(CPU_FTR_ALTIVEC)
* normal exception
*/
mfspr r14,SPRN_DBSR
- EXCEPTION_COMMON_CRIT(0xd00)
std r14,_DSISR(r1)
- addi r3,r1,STACK_FRAME_OVERHEAD
ld r14,PACA_EXCRIT+EX_R14(r13)
ld r15,PACA_EXCRIT+EX_R15(r13)
+ EXCEPTION_COMMON_CRIT(0xd00)
+ addi r3,r1,STACK_FRAME_OVERHEAD
bl DebugException
REST_NVGPRS(r1)
b interrupt_return
@@ -796,11 +814,11 @@ kernel_dbg_exc:
* normal exception
*/
mfspr r14,SPRN_DBSR
- EXCEPTION_COMMON_DBG(0xd08)
std r14,_DSISR(r1)
- addi r3,r1,STACK_FRAME_OVERHEAD
ld r14,PACA_EXDBG+EX_R14(r13)
ld r15,PACA_EXDBG+EX_R15(r13)
+ EXCEPTION_COMMON_DBG(0xd08)
+ addi r3,r1,STACK_FRAME_OVERHEAD
bl DebugException
REST_NVGPRS(r1)
b interrupt_return
@@ -931,11 +949,7 @@ masked_interrupt_book3e_0x2c0:
* original values stashed away in the PACA
*/
storage_fault_common:
- std r14,_DAR(r1)
- std r15,_DSISR(r1)
addi r3,r1,STACK_FRAME_OVERHEAD
- ld r14,PACA_EXGEN+EX_R14(r13)
- ld r15,PACA_EXGEN+EX_R15(r13)
bl do_page_fault
b interrupt_return
@@ -944,11 +958,7 @@ storage_fault_common:
* continues here.
*/
alignment_more:
- std r14,_DAR(r1)
- std r15,_DSISR(r1)
addi r3,r1,STACK_FRAME_OVERHEAD
- ld r14,PACA_EXGEN+EX_R14(r13)
- ld r15,PACA_EXGEN+EX_R15(r13)
bl alignment_exception
REST_NVGPRS(r1)
b interrupt_return
diff --git a/arch/powerpc/kernel/exceptions-64s.S b/arch/powerpc/kernel/exceptions-64s.S
index fa8e52a0239e..f7fc6e078d4e 100644
--- a/arch/powerpc/kernel/exceptions-64s.S
+++ b/arch/powerpc/kernel/exceptions-64s.S
@@ -21,22 +21,6 @@
#include <asm/feature-fixups.h>
#include <asm/kup.h>
-/* PACA save area offsets (exgen, exmc, etc) */
-#define EX_R9 0
-#define EX_R10 8
-#define EX_R11 16
-#define EX_R12 24
-#define EX_R13 32
-#define EX_DAR 40
-#define EX_DSISR 48
-#define EX_CCR 52
-#define EX_CFAR 56
-#define EX_PPR 64
-#define EX_CTR 72
-.if EX_SIZE != 10
- .error "EX_SIZE is wrong"
-.endif
-
/*
* Following are fixed section helper macros.
*
@@ -133,7 +117,6 @@ name:
#define IBRANCH_TO_COMMON .L_IBRANCH_TO_COMMON_\name\() /* ENTRY branch to common */
#define IREALMODE_COMMON .L_IREALMODE_COMMON_\name\() /* Common runs in realmode */
#define IMASK .L_IMASK_\name\() /* IRQ soft-mask bit */
-#define IKVM_SKIP .L_IKVM_SKIP_\name\() /* Generate KVM skip handler */
#define IKVM_REAL .L_IKVM_REAL_\name\() /* Real entry tests KVM */
#define __IKVM_REAL(name) .L_IKVM_REAL_ ## name
#define IKVM_VIRT .L_IKVM_VIRT_\name\() /* Virt entry tests KVM */
@@ -190,9 +173,6 @@ do_define_int n
.ifndef IMASK
IMASK=0
.endif
- .ifndef IKVM_SKIP
- IKVM_SKIP=0
- .endif
.ifndef IKVM_REAL
IKVM_REAL=0
.endif
@@ -207,8 +187,6 @@ do_define_int n
.endif
.endm
-#ifdef CONFIG_KVM_BOOK3S_64_HANDLER
-#ifdef CONFIG_KVM_BOOK3S_HV_POSSIBLE
/*
* All interrupts which set HSRR registers, as well as SRESET and MCE and
* syscall when invoked with "sc 1" switch to MSR[HV]=1 (HVMODE) to be taken,
@@ -238,88 +216,28 @@ do_define_int n
/*
* If an interrupt is taken while a guest is running, it is immediately routed
- * to KVM to handle. If both HV and PR KVM arepossible, KVM interrupts go first
- * to kvmppc_interrupt_hv, which handles the PR guest case.
+ * to KVM to handle.
*/
-#define kvmppc_interrupt kvmppc_interrupt_hv
-#else
-#define kvmppc_interrupt kvmppc_interrupt_pr
-#endif
-.macro KVMTEST name
+.macro KVMTEST name handler
+#ifdef CONFIG_KVM_BOOK3S_64_HANDLER
lbz r10,HSTATE_IN_GUEST(r13)
cmpwi r10,0
- bne \name\()_kvm
-.endm
-
-.macro GEN_KVM name
- .balign IFETCH_ALIGN_BYTES
-\name\()_kvm:
-
- .if IKVM_SKIP
- cmpwi r10,KVM_GUEST_MODE_SKIP
- beq 89f
- .else
-BEGIN_FTR_SECTION
- ld r10,IAREA+EX_CFAR(r13)
- std r10,HSTATE_CFAR(r13)
-END_FTR_SECTION_IFSET(CPU_FTR_CFAR)
- .endif
-
- ld r10,IAREA+EX_CTR(r13)
- mtctr r10
-BEGIN_FTR_SECTION
- ld r10,IAREA+EX_PPR(r13)
- std r10,HSTATE_PPR(r13)
-END_FTR_SECTION_IFSET(CPU_FTR_HAS_PPR)
- ld r11,IAREA+EX_R11(r13)
- ld r12,IAREA+EX_R12(r13)
- std r12,HSTATE_SCRATCH0(r13)
- sldi r12,r9,32
- ld r9,IAREA+EX_R9(r13)
- ld r10,IAREA+EX_R10(r13)
/* HSRR variants have the 0x2 bit added to their trap number */
.if IHSRR_IF_HVMODE
BEGIN_FTR_SECTION
- ori r12,r12,(IVEC + 0x2)
- FTR_SECTION_ELSE
- ori r12,r12,(IVEC)
- ALT_FTR_SECTION_END_IFSET(CPU_FTR_HVMODE | CPU_FTR_ARCH_206)
- .elseif IHSRR
- ori r12,r12,(IVEC+ 0x2)
- .else
- ori r12,r12,(IVEC)
- .endif
- b kvmppc_interrupt
-
- .if IKVM_SKIP
-89: mtocrf 0x80,r9
- ld r10,IAREA+EX_CTR(r13)
- mtctr r10
- ld r9,IAREA+EX_R9(r13)
- ld r10,IAREA+EX_R10(r13)
- ld r11,IAREA+EX_R11(r13)
- ld r12,IAREA+EX_R12(r13)
- .if IHSRR_IF_HVMODE
- BEGIN_FTR_SECTION
- b kvmppc_skip_Hinterrupt
+ li r10,(IVEC + 0x2)
FTR_SECTION_ELSE
- b kvmppc_skip_interrupt
+ li r10,(IVEC)
ALT_FTR_SECTION_END_IFSET(CPU_FTR_HVMODE | CPU_FTR_ARCH_206)
.elseif IHSRR
- b kvmppc_skip_Hinterrupt
+ li r10,(IVEC + 0x2)
.else
- b kvmppc_skip_interrupt
+ li r10,(IVEC)
.endif
- .endif
-.endm
-
-#else
-.macro KVMTEST name
-.endm
-.macro GEN_KVM name
-.endm
+ bne \handler
#endif
+.endm
/*
* This is the BOOK3S interrupt entry code macro.
@@ -461,7 +379,7 @@ END_FTR_SECTION_IFSET(CPU_FTR_CFAR)
DEFINE_FIXED_SYMBOL(\name\()_common_real)
\name\()_common_real:
.if IKVM_REAL
- KVMTEST \name
+ KVMTEST \name kvm_interrupt
.endif
ld r10,PACAKMSR(r13) /* get MSR value for kernel */
@@ -484,7 +402,7 @@ DEFINE_FIXED_SYMBOL(\name\()_common_real)
DEFINE_FIXED_SYMBOL(\name\()_common_virt)
\name\()_common_virt:
.if IKVM_VIRT
- KVMTEST \name
+ KVMTEST \name kvm_interrupt
1:
.endif
.endif /* IVIRT */
@@ -498,7 +416,7 @@ DEFINE_FIXED_SYMBOL(\name\()_common_virt)
DEFINE_FIXED_SYMBOL(\name\()_common_real)
\name\()_common_real:
.if IKVM_REAL
- KVMTEST \name
+ KVMTEST \name kvm_interrupt
.endif
.endm
@@ -1000,8 +918,6 @@ EXC_COMMON_BEGIN(system_reset_common)
EXCEPTION_RESTORE_REGS
RFI_TO_USER_OR_KERNEL
- GEN_KVM system_reset
-
/**
* Interrupt 0x200 - Machine Check Interrupt (MCE).
@@ -1070,7 +986,6 @@ INT_DEFINE_BEGIN(machine_check)
ISET_RI=0
IDAR=1
IDSISR=1
- IKVM_SKIP=1
IKVM_REAL=1
INT_DEFINE_END(machine_check)
@@ -1166,7 +1081,7 @@ END_FTR_SECTION_IFSET(CPU_FTR_HVMODE | CPU_FTR_ARCH_206)
/*
* Check if we are coming from guest. If yes, then run the normal
* exception handler which will take the
- * machine_check_kvm->kvmppc_interrupt branch to deliver the MC event
+ * machine_check_kvm->kvm_interrupt branch to deliver the MC event
* to guest.
*/
lbz r11,HSTATE_IN_GUEST(r13)
@@ -1236,8 +1151,6 @@ EXC_COMMON_BEGIN(machine_check_common)
bl machine_check_exception
b interrupt_return
- GEN_KVM machine_check
-
#ifdef CONFIG_PPC_P7_NAP
/*
@@ -1342,7 +1255,6 @@ INT_DEFINE_BEGIN(data_access)
IVEC=0x300
IDAR=1
IDSISR=1
- IKVM_SKIP=1
IKVM_REAL=1
INT_DEFINE_END(data_access)
@@ -1373,8 +1285,6 @@ ALT_MMU_FTR_SECTION_END_IFCLR(MMU_FTR_TYPE_RADIX)
REST_NVGPRS(r1)
b interrupt_return
- GEN_KVM data_access
-
/**
* Interrupt 0x380 - Data Segment Interrupt (DSLB).
@@ -1396,7 +1306,6 @@ ALT_MMU_FTR_SECTION_END_IFCLR(MMU_FTR_TYPE_RADIX)
INT_DEFINE_BEGIN(data_access_slb)
IVEC=0x380
IDAR=1
- IKVM_SKIP=1
IKVM_REAL=1
INT_DEFINE_END(data_access_slb)
@@ -1425,8 +1334,6 @@ ALT_MMU_FTR_SECTION_END_IFCLR(MMU_FTR_TYPE_RADIX)
bl do_bad_slb_fault
b interrupt_return
- GEN_KVM data_access_slb
-
/**
* Interrupt 0x400 - Instruction Storage Interrupt (ISI).
@@ -1463,8 +1370,6 @@ MMU_FTR_SECTION_ELSE
ALT_MMU_FTR_SECTION_END_IFCLR(MMU_FTR_TYPE_RADIX)
b interrupt_return
- GEN_KVM instruction_access
-
/**
* Interrupt 0x480 - Instruction Segment Interrupt (ISLB).
@@ -1509,8 +1414,6 @@ ALT_MMU_FTR_SECTION_END_IFCLR(MMU_FTR_TYPE_RADIX)
bl do_bad_slb_fault
b interrupt_return
- GEN_KVM instruction_access_slb
-
/**
* Interrupt 0x500 - External Interrupt.
@@ -1555,8 +1458,6 @@ EXC_COMMON_BEGIN(hardware_interrupt_common)
bl do_IRQ
b interrupt_return
- GEN_KVM hardware_interrupt
-
/**
* Interrupt 0x600 - Alignment Interrupt
@@ -1584,8 +1485,6 @@ EXC_COMMON_BEGIN(alignment_common)
REST_NVGPRS(r1) /* instruction emulation may change GPRs */
b interrupt_return
- GEN_KVM alignment
-
/**
* Interrupt 0x700 - Program Interrupt (program check).
@@ -1693,8 +1592,6 @@ EXC_COMMON_BEGIN(program_check_common)
REST_NVGPRS(r1) /* instruction emulation may change GPRs */
b interrupt_return
- GEN_KVM program_check
-
/*
* Interrupt 0x800 - Floating-Point Unavailable Interrupt.
@@ -1744,8 +1641,6 @@ END_FTR_SECTION_IFSET(CPU_FTR_TM)
b interrupt_return
#endif
- GEN_KVM fp_unavailable
-
/**
* Interrupt 0x900 - Decrementer Interrupt.
@@ -1784,8 +1679,6 @@ EXC_COMMON_BEGIN(decrementer_common)
bl timer_interrupt
b interrupt_return
- GEN_KVM decrementer
-
/**
* Interrupt 0x980 - Hypervisor Decrementer Interrupt.
@@ -1831,8 +1724,6 @@ EXC_COMMON_BEGIN(hdecrementer_common)
ld r13,PACA_EXGEN+EX_R13(r13)
HRFI_TO_KERNEL
- GEN_KVM hdecrementer
-
/**
* Interrupt 0xa00 - Directed Privileged Doorbell Interrupt.
@@ -1872,8 +1763,6 @@ EXC_COMMON_BEGIN(doorbell_super_common)
#endif
b interrupt_return
- GEN_KVM doorbell_super
-
EXC_REAL_NONE(0xb00, 0x100)
EXC_VIRT_NONE(0x4b00, 0x100)
@@ -1923,7 +1812,7 @@ INT_DEFINE_END(system_call)
GET_PACA(r13)
std r10,PACA_EXGEN+EX_R10(r13)
INTERRUPT_TO_KERNEL
- KVMTEST system_call /* uses r10, branch to system_call_kvm */
+ KVMTEST system_call kvm_hcall /* uses r10, branch to kvm_hcall */
mfctr r9
#else
mr r9,r13
@@ -1979,14 +1868,16 @@ EXC_VIRT_BEGIN(system_call, 0x4c00, 0x100)
EXC_VIRT_END(system_call, 0x4c00, 0x100)
#ifdef CONFIG_KVM_BOOK3S_64_HANDLER
-TRAMP_REAL_BEGIN(system_call_kvm)
- /*
- * This is a hcall, so register convention is as above, with these
- * differences:
- * r13 = PACA
- * ctr = orig r13
- * orig r10 saved in PACA
- */
+TRAMP_REAL_BEGIN(kvm_hcall)
+ std r9,PACA_EXGEN+EX_R9(r13)
+ std r11,PACA_EXGEN+EX_R11(r13)
+ std r12,PACA_EXGEN+EX_R12(r13)
+ mfcr r9
+ mfctr r10
+ std r10,PACA_EXGEN+EX_R13(r13)
+ li r10,0
+ std r10,PACA_EXGEN+EX_CFAR(r13)
+ std r10,PACA_EXGEN+EX_CTR(r13)
/*
* Save the PPR (on systems that support it) before changing to
* HMT_MEDIUM. That allows the KVM code to save that value into the
@@ -1994,31 +1885,24 @@ TRAMP_REAL_BEGIN(system_call_kvm)
*/
BEGIN_FTR_SECTION
mfspr r10,SPRN_PPR
- std r10,HSTATE_PPR(r13)
+ std r10,PACA_EXGEN+EX_PPR(r13)
END_FTR_SECTION_IFSET(CPU_FTR_HAS_PPR)
+
HMT_MEDIUM
- mfctr r10
- SET_SCRATCH0(r10)
- mfcr r10
- std r12,HSTATE_SCRATCH0(r13)
- sldi r12,r10,32
- ori r12,r12,0xc00
+
#ifdef CONFIG_RELOCATABLE
/*
- * Requires __LOAD_FAR_HANDLER beause kvmppc_interrupt lives
+ * Requires __LOAD_FAR_HANDLER beause kvmppc_hcall lives
* outside the head section.
*/
- __LOAD_FAR_HANDLER(r10, kvmppc_interrupt)
+ __LOAD_FAR_HANDLER(r10, kvmppc_hcall)
mtctr r10
- ld r10,PACA_EXGEN+EX_R10(r13)
bctr
#else
- ld r10,PACA_EXGEN+EX_R10(r13)
- b kvmppc_interrupt
+ b kvmppc_hcall
#endif
#endif
-
/**
* Interrupt 0xd00 - Trace Interrupt.
* This is a synchronous interrupt in response to instruction step or
@@ -2043,8 +1927,6 @@ EXC_COMMON_BEGIN(single_step_common)
bl single_step_exception
b interrupt_return
- GEN_KVM single_step
-
/**
* Interrupt 0xe00 - Hypervisor Data Storage Interrupt (HDSI).
@@ -2063,7 +1945,6 @@ INT_DEFINE_BEGIN(h_data_storage)
IHSRR=1
IDAR=1
IDSISR=1
- IKVM_SKIP=1
IKVM_REAL=1
IKVM_VIRT=1
INT_DEFINE_END(h_data_storage)
@@ -2084,8 +1965,6 @@ MMU_FTR_SECTION_ELSE
ALT_MMU_FTR_SECTION_END_IFSET(MMU_FTR_TYPE_RADIX)
b interrupt_return
- GEN_KVM h_data_storage
-
/**
* Interrupt 0xe20 - Hypervisor Instruction Storage Interrupt (HISI).
@@ -2111,8 +1990,6 @@ EXC_COMMON_BEGIN(h_instr_storage_common)
bl unknown_exception
b interrupt_return
- GEN_KVM h_instr_storage
-
/**
* Interrupt 0xe40 - Hypervisor Emulation Assistance Interrupt.
@@ -2137,8 +2014,6 @@ EXC_COMMON_BEGIN(emulation_assist_common)
REST_NVGPRS(r1) /* instruction emulation may change GPRs */
b interrupt_return
- GEN_KVM emulation_assist
-
/**
* Interrupt 0xe60 - Hypervisor Maintenance Interrupt (HMI).
@@ -2210,16 +2085,12 @@ EXC_COMMON_BEGIN(hmi_exception_early_common)
EXCEPTION_RESTORE_REGS hsrr=1
GEN_INT_ENTRY hmi_exception, virt=0
- GEN_KVM hmi_exception_early
-
EXC_COMMON_BEGIN(hmi_exception_common)
GEN_COMMON hmi_exception
addi r3,r1,STACK_FRAME_OVERHEAD
bl handle_hmi_exception
b interrupt_return
- GEN_KVM hmi_exception
-
/**
* Interrupt 0xe80 - Directed Hypervisor Doorbell Interrupt.
@@ -2250,8 +2121,6 @@ EXC_COMMON_BEGIN(h_doorbell_common)
#endif
b interrupt_return
- GEN_KVM h_doorbell
-
/**
* Interrupt 0xea0 - Hypervisor Virtualization Interrupt.
@@ -2278,8 +2147,6 @@ EXC_COMMON_BEGIN(h_virt_irq_common)
bl do_IRQ
b interrupt_return
- GEN_KVM h_virt_irq
-
EXC_REAL_NONE(0xec0, 0x20)
EXC_VIRT_NONE(0x4ec0, 0x20)
@@ -2323,8 +2190,6 @@ EXC_COMMON_BEGIN(performance_monitor_common)
bl performance_monitor_exception
b interrupt_return
- GEN_KVM performance_monitor
-
/**
* Interrupt 0xf20 - Vector Unavailable Interrupt.
@@ -2374,8 +2239,6 @@ END_FTR_SECTION_IFSET(CPU_FTR_ALTIVEC)
bl altivec_unavailable_exception
b interrupt_return
- GEN_KVM altivec_unavailable
-
/**
* Interrupt 0xf40 - VSX Unavailable Interrupt.
@@ -2424,8 +2287,6 @@ END_FTR_SECTION_IFSET(CPU_FTR_VSX)
bl vsx_unavailable_exception
b interrupt_return
- GEN_KVM vsx_unavailable
-
/**
* Interrupt 0xf60 - Facility Unavailable Interrupt.
@@ -2454,8 +2315,6 @@ EXC_COMMON_BEGIN(facility_unavailable_common)
REST_NVGPRS(r1) /* instruction emulation may change GPRs */
b interrupt_return
- GEN_KVM facility_unavailable
-
/**
* Interrupt 0xf60 - Hypervisor Facility Unavailable Interrupt.
@@ -2484,8 +2343,6 @@ EXC_COMMON_BEGIN(h_facility_unavailable_common)
REST_NVGPRS(r1) /* XXX Shouldn't be necessary in practice */
b interrupt_return
- GEN_KVM h_facility_unavailable
-
EXC_REAL_NONE(0xfa0, 0x20)
EXC_VIRT_NONE(0x4fa0, 0x20)
@@ -2515,8 +2372,6 @@ EXC_COMMON_BEGIN(cbe_system_error_common)
bl cbe_system_error_exception
b interrupt_return
- GEN_KVM cbe_system_error
-
#else /* CONFIG_CBE_RAS */
EXC_REAL_NONE(0x1200, 0x100)
EXC_VIRT_NONE(0x5200, 0x100)
@@ -2548,8 +2403,6 @@ EXC_COMMON_BEGIN(instruction_breakpoint_common)
bl instruction_breakpoint_exception
b interrupt_return
- GEN_KVM instruction_breakpoint
-
EXC_REAL_NONE(0x1400, 0x100)
EXC_VIRT_NONE(0x5400, 0x100)
@@ -2670,8 +2523,6 @@ EXC_COMMON_BEGIN(denorm_exception_common)
bl unknown_exception
b interrupt_return
- GEN_KVM denorm_exception
-
#ifdef CONFIG_CBE_RAS
INT_DEFINE_BEGIN(cbe_maintenance)
@@ -2689,8 +2540,6 @@ EXC_COMMON_BEGIN(cbe_maintenance_common)
bl cbe_maintenance_exception
b interrupt_return
- GEN_KVM cbe_maintenance
-
#else /* CONFIG_CBE_RAS */
EXC_REAL_NONE(0x1600, 0x100)
EXC_VIRT_NONE(0x5600, 0x100)
@@ -2721,8 +2570,6 @@ EXC_COMMON_BEGIN(altivec_assist_common)
#endif
b interrupt_return
- GEN_KVM altivec_assist
-
#ifdef CONFIG_CBE_RAS
INT_DEFINE_BEGIN(cbe_thermal)
@@ -2740,8 +2587,6 @@ EXC_COMMON_BEGIN(cbe_thermal_common)
bl cbe_thermal_exception
b interrupt_return
- GEN_KVM cbe_thermal
-
#else /* CONFIG_CBE_RAS */
EXC_REAL_NONE(0x1800, 0x100)
EXC_VIRT_NONE(0x5800, 0x100)
@@ -2994,6 +2839,15 @@ TRAMP_REAL_BEGIN(rfscv_flush_fallback)
USE_TEXT_SECTION()
+#ifdef CONFIG_KVM_BOOK3S_64_HANDLER
+kvm_interrupt:
+ /*
+ * The conditional branch in KVMTEST can't reach all the way,
+ * make a stub.
+ */
+ b kvmppc_interrupt
+#endif
+
_GLOBAL(do_uaccess_flush)
UACCESS_FLUSH_FIXUP_SECTION
nop
@@ -3009,32 +2863,6 @@ EXPORT_SYMBOL(do_uaccess_flush)
MASKED_INTERRUPT
MASKED_INTERRUPT hsrr=1
-#ifdef CONFIG_KVM_BOOK3S_64_HANDLER
-kvmppc_skip_interrupt:
- /*
- * Here all GPRs are unchanged from when the interrupt happened
- * except for r13, which is saved in SPRG_SCRATCH0.
- */
- mfspr r13, SPRN_SRR0
- addi r13, r13, 4
- mtspr SPRN_SRR0, r13
- GET_SCRATCH0(r13)
- RFI_TO_KERNEL
- b .
-
-kvmppc_skip_Hinterrupt:
- /*
- * Here all GPRs are unchanged from when the interrupt happened
- * except for r13, which is saved in SPRG_SCRATCH0.
- */
- mfspr r13, SPRN_HSRR0
- addi r13, r13, 4
- mtspr SPRN_HSRR0, r13
- GET_SCRATCH0(r13)
- HRFI_TO_KERNEL
- b .
-#endif
-
/*
* Relocation-on interrupts: A subset of the interrupts can be delivered
* with IR=1/DR=1, if AIL==2 and MSR.HV won't be changed by delivering
diff --git a/arch/powerpc/kernel/interrupt.c b/arch/powerpc/kernel/interrupt.c
index e4559f8914eb..e0938ba298f2 100644
--- a/arch/powerpc/kernel/interrupt.c
+++ b/arch/powerpc/kernel/interrupt.c
@@ -34,9 +34,6 @@ notrace long system_call_exception(long r3, long r4, long r5,
syscall_fn f;
kuep_lock();
-#ifdef CONFIG_PPC32
- kuap_save_and_lock(regs);
-#endif
regs->orig_gpr3 = r3;
@@ -427,6 +424,7 @@ again:
/* Restore user access locks last */
kuap_user_restore(regs);
+ kuep_unlock();
return ret;
}
diff --git a/arch/powerpc/kernel/io-workarounds.c b/arch/powerpc/kernel/io-workarounds.c
index 51bbaae94ccc..c877f074d174 100644
--- a/arch/powerpc/kernel/io-workarounds.c
+++ b/arch/powerpc/kernel/io-workarounds.c
@@ -55,7 +55,6 @@ static struct iowa_bus *iowa_pci_find(unsigned long vaddr, unsigned long paddr)
#ifdef CONFIG_PPC_INDIRECT_MMIO
struct iowa_bus *iowa_mem_find_bus(const PCI_IO_ADDR addr)
{
- unsigned hugepage_shift;
struct iowa_bus *bus;
int token;
@@ -65,22 +64,13 @@ struct iowa_bus *iowa_mem_find_bus(const PCI_IO_ADDR addr)
bus = &iowa_busses[token - 1];
else {
unsigned long vaddr, paddr;
- pte_t *ptep;
vaddr = (unsigned long)PCI_FIX_ADDR(addr);
if (vaddr < PHB_IO_BASE || vaddr >= PHB_IO_END)
return NULL;
- /*
- * We won't find huge pages here (iomem). Also can't hit
- * a page table free due to init_mm
- */
- ptep = find_init_mm_pte(vaddr, &hugepage_shift);
- if (ptep == NULL)
- paddr = 0;
- else {
- WARN_ON(hugepage_shift);
- paddr = pte_pfn(*ptep) << PAGE_SHIFT;
- }
+
+ paddr = ppc_find_vmap_phys(vaddr);
+
bus = iowa_pci_find(vaddr, paddr);
if (bus == NULL)
diff --git a/arch/powerpc/kernel/iommu.c b/arch/powerpc/kernel/iommu.c
index 57d6b85e9b96..2af89a5e379f 100644
--- a/arch/powerpc/kernel/iommu.c
+++ b/arch/powerpc/kernel/iommu.c
@@ -898,7 +898,6 @@ void *iommu_alloc_coherent(struct device *dev, struct iommu_table *tbl,
unsigned int order;
unsigned int nio_pages, io_order;
struct page *page;
- size_t size_io = size;
size = PAGE_ALIGN(size);
order = get_order(size);
@@ -925,9 +924,8 @@ void *iommu_alloc_coherent(struct device *dev, struct iommu_table *tbl,
memset(ret, 0, size);
/* Set up tces to cover the allocated range */
- size_io = IOMMU_PAGE_ALIGN(size_io, tbl);
- nio_pages = size_io >> tbl->it_page_shift;
- io_order = get_iommu_order(size_io, tbl);
+ nio_pages = size >> tbl->it_page_shift;
+ io_order = get_iommu_order(size, tbl);
mapping = iommu_alloc(dev, tbl, ret, nio_pages, DMA_BIDIRECTIONAL,
mask >> tbl->it_page_shift, io_order, 0);
if (mapping == DMA_MAPPING_ERROR) {
@@ -942,9 +940,10 @@ void iommu_free_coherent(struct iommu_table *tbl, size_t size,
void *vaddr, dma_addr_t dma_handle)
{
if (tbl) {
- size_t size_io = IOMMU_PAGE_ALIGN(size, tbl);
- unsigned int nio_pages = size_io >> tbl->it_page_shift;
+ unsigned int nio_pages;
+ size = PAGE_ALIGN(size);
+ nio_pages = size >> tbl->it_page_shift;
iommu_free(tbl, dma_handle, nio_pages);
size = PAGE_ALIGN(size);
free_pages((unsigned long)vaddr, get_order(size));
diff --git a/arch/powerpc/kernel/kprobes.c b/arch/powerpc/kernel/kprobes.c
index 01ab2163659e..c64a5feaebbe 100644
--- a/arch/powerpc/kernel/kprobes.c
+++ b/arch/powerpc/kernel/kprobes.c
@@ -108,7 +108,6 @@ int arch_prepare_kprobe(struct kprobe *p)
int ret = 0;
struct kprobe *prev;
struct ppc_inst insn = ppc_inst_read((struct ppc_inst *)p->addr);
- struct ppc_inst prefix = ppc_inst_read((struct ppc_inst *)(p->addr - 1));
if ((unsigned long)p->addr & 0x03) {
printk("Attempt to register kprobe at an unaligned address\n");
@@ -116,7 +115,8 @@ int arch_prepare_kprobe(struct kprobe *p)
} else if (IS_MTMSRD(insn) || IS_RFID(insn) || IS_RFI(insn)) {
printk("Cannot register a kprobe on rfi/rfid or mtmsr[d]\n");
ret = -EINVAL;
- } else if (ppc_inst_prefixed(prefix)) {
+ } else if ((unsigned long)p->addr & ~PAGE_MASK &&
+ ppc_inst_prefixed(ppc_inst_read((struct ppc_inst *)(p->addr - 1)))) {
printk("Cannot register a kprobe on the second word of prefixed instruction\n");
ret = -EINVAL;
}
@@ -502,23 +502,6 @@ int kprobe_fault_handler(struct pt_regs *regs, int trapnr)
case KPROBE_HIT_ACTIVE:
case KPROBE_HIT_SSDONE:
/*
- * We increment the nmissed count for accounting,
- * we can also use npre/npostfault count for accounting
- * these specific fault cases.
- */
- kprobes_inc_nmissed_count(cur);
-
- /*
- * We come here because instructions in the pre/post
- * handler caused the page_fault, this could happen
- * if handler tries to access user space by
- * copy_from_user(), get_user() etc. Let the
- * user-specified handler try to fix it first.
- */
- if (cur->fault_handler && cur->fault_handler(cur, regs, trapnr))
- return 1;
-
- /*
* In case the user-specified fault handler returned
* zero, try to fix up.
*/
diff --git a/arch/powerpc/kernel/legacy_serial.c b/arch/powerpc/kernel/legacy_serial.c
index 8b2c1a8553a0..cfc03e016ff2 100644
--- a/arch/powerpc/kernel/legacy_serial.c
+++ b/arch/powerpc/kernel/legacy_serial.c
@@ -356,13 +356,16 @@ static void __init setup_legacy_serial_console(int console)
static int __init ioremap_legacy_serial_console(void)
{
- struct legacy_serial_info *info = &legacy_serial_infos[legacy_serial_console];
- struct plat_serial8250_port *port = &legacy_serial_ports[legacy_serial_console];
+ struct plat_serial8250_port *port;
+ struct legacy_serial_info *info;
void __iomem *vaddr;
if (legacy_serial_console < 0)
return 0;
+ info = &legacy_serial_infos[legacy_serial_console];
+ port = &legacy_serial_ports[legacy_serial_console];
+
if (!info->early_addr)
return 0;
diff --git a/arch/powerpc/kernel/mce.c b/arch/powerpc/kernel/mce.c
index 9a3c2a84a2ac..15e7b4900689 100644
--- a/arch/powerpc/kernel/mce.c
+++ b/arch/powerpc/kernel/mce.c
@@ -18,6 +18,7 @@
#include <linux/extable.h>
#include <linux/ftrace.h>
#include <linux/memblock.h>
+#include <linux/of.h>
#include <asm/interrupt.h>
#include <asm/machdep.h>
diff --git a/arch/powerpc/kernel/process.c b/arch/powerpc/kernel/process.c
index 89e34aa273e2..8935c5696bce 100644
--- a/arch/powerpc/kernel/process.c
+++ b/arch/powerpc/kernel/process.c
@@ -2084,7 +2084,7 @@ static unsigned long __get_wchan(struct task_struct *p)
unsigned long ip, sp;
int count = 0;
- if (!p || p == current || p->state == TASK_RUNNING)
+ if (!p || p == current || task_is_running(p))
return 0;
sp = p->thread.ksp;
@@ -2094,7 +2094,7 @@ static unsigned long __get_wchan(struct task_struct *p)
do {
sp = *(unsigned long *)sp;
if (!validate_sp(sp, p, STACK_FRAME_OVERHEAD) ||
- p->state == TASK_RUNNING)
+ task_is_running(p))
return 0;
if (count > 0) {
ip = ((unsigned long *)sp)[STACK_FRAME_LR_SAVE];
diff --git a/arch/powerpc/kernel/security.c b/arch/powerpc/kernel/security.c
index 0fdfcdd9d880..c17d1c9362b5 100644
--- a/arch/powerpc/kernel/security.c
+++ b/arch/powerpc/kernel/security.c
@@ -432,16 +432,19 @@ device_initcall(stf_barrier_debugfs_init);
static void update_branch_cache_flush(void)
{
- u32 *site;
+ u32 *site, __maybe_unused *site2;
#ifdef CONFIG_KVM_BOOK3S_HV_POSSIBLE
site = &patch__call_kvm_flush_link_stack;
+ site2 = &patch__call_kvm_flush_link_stack_p9;
// This controls the branch from guest_exit_cont to kvm_flush_link_stack
if (link_stack_flush_type == BRANCH_CACHE_FLUSH_NONE) {
patch_instruction_site(site, ppc_inst(PPC_INST_NOP));
+ patch_instruction_site(site2, ppc_inst(PPC_INST_NOP));
} else {
// Could use HW flush, but that could also flush count cache
patch_branch_site(site, (u64)&kvm_flush_link_stack, BRANCH_SET_LINK);
+ patch_branch_site(site2, (u64)&kvm_flush_link_stack, BRANCH_SET_LINK);
}
#endif
diff --git a/arch/powerpc/kernel/setup_64.c b/arch/powerpc/kernel/setup_64.c
index b779d25761cf..a35fbf4d0bce 100644
--- a/arch/powerpc/kernel/setup_64.c
+++ b/arch/powerpc/kernel/setup_64.c
@@ -369,11 +369,11 @@ void __init early_setup(unsigned long dt_ptr)
apply_feature_fixups();
setup_feature_keys();
- early_ioremap_setup();
-
/* Initialize the hash table or TLB handling */
early_init_mmu();
+ early_ioremap_setup();
+
/*
* After firmware and early platform setup code has set things up,
* we note the SPR values for configurable control/performance
@@ -788,7 +788,7 @@ static void * __init pcpu_alloc_bootmem(unsigned int cpu, size_t size,
size_t align)
{
const unsigned long goal = __pa(MAX_DMA_ADDRESS);
-#ifdef CONFIG_NEED_MULTIPLE_NODES
+#ifdef CONFIG_NUMA
int node = early_cpu_to_node(cpu);
void *ptr;
diff --git a/arch/powerpc/kernel/signal.h b/arch/powerpc/kernel/signal.h
index f4aafa337c2e..1f07317964e4 100644
--- a/arch/powerpc/kernel/signal.h
+++ b/arch/powerpc/kernel/signal.h
@@ -166,9 +166,9 @@ copy_ckfpr_from_user(struct task_struct *task, void __user *from)
}
#endif /* CONFIG_PPC_TRANSACTIONAL_MEM */
#else
-#define unsafe_copy_fpr_to_user(to, task, label) do { } while (0)
+#define unsafe_copy_fpr_to_user(to, task, label) do { if (0) goto label;} while (0)
-#define unsafe_copy_fpr_from_user(task, from, label) do { } while (0)
+#define unsafe_copy_fpr_from_user(task, from, label) do { if (0) goto label;} while (0)
static inline unsigned long
copy_fpr_to_user(void __user *to, struct task_struct *task)
diff --git a/arch/powerpc/kernel/signal_64.c b/arch/powerpc/kernel/signal_64.c
index dca66481d0c2..f9e1f5428b9e 100644
--- a/arch/powerpc/kernel/signal_64.c
+++ b/arch/powerpc/kernel/signal_64.c
@@ -902,6 +902,10 @@ int handle_rt_signal64(struct ksignal *ksig, sigset_t *set,
unsafe_copy_to_user(&frame->uc.uc_sigmask, set, sizeof(*set), badframe_block);
user_write_access_end();
+ /* Save the siginfo outside of the unsafe block. */
+ if (copy_siginfo_to_user(&frame->info, &ksig->info))
+ goto badframe;
+
/* Make sure signal handler doesn't get spurious FP exceptions */
tsk->thread.fp_state.fpscr = 0;
@@ -915,11 +919,6 @@ int handle_rt_signal64(struct ksignal *ksig, sigset_t *set,
regs->nip = (unsigned long) &frame->tramp[0];
}
-
- /* Save the siginfo outside of the unsafe block. */
- if (copy_siginfo_to_user(&frame->info, &ksig->info))
- goto badframe;
-
/* Allocate a dummy caller frame for the signal handler. */
newsp = ((unsigned long)frame) - __SIGNAL_FRAMESIZE;
err |= put_user(regs->gpr[1], (unsigned long __user *)newsp);
diff --git a/arch/powerpc/kernel/smp.c b/arch/powerpc/kernel/smp.c
index 2e05c783440a..7ddc2d32c39e 100644
--- a/arch/powerpc/kernel/smp.c
+++ b/arch/powerpc/kernel/smp.c
@@ -1047,7 +1047,7 @@ void __init smp_prepare_cpus(unsigned int max_cpus)
zalloc_cpumask_var_node(&per_cpu(cpu_coregroup_map, cpu),
GFP_KERNEL, cpu_to_node(cpu));
-#ifdef CONFIG_NEED_MULTIPLE_NODES
+#ifdef CONFIG_NUMA
/*
* numa_node_id() works after this.
*/
@@ -1547,7 +1547,6 @@ void start_secondary(void *unused)
smp_store_cpu_info(cpu);
set_dec(tb_ticks_per_jiffy);
rcu_cpu_starting(cpu);
- preempt_disable();
cpu_callin_map[cpu] = 1;
if (smp_ops->setup_cpu)
diff --git a/arch/powerpc/kernel/syscalls/syscall.tbl b/arch/powerpc/kernel/syscalls/syscall.tbl
index 2e68fbb57cc6..aef2a290e71a 100644
--- a/arch/powerpc/kernel/syscalls/syscall.tbl
+++ b/arch/powerpc/kernel/syscalls/syscall.tbl
@@ -522,7 +522,7 @@
440 common process_madvise sys_process_madvise
441 common epoll_pwait2 sys_epoll_pwait2 compat_sys_epoll_pwait2
442 common mount_setattr sys_mount_setattr
-443 common quotactl_path sys_quotactl_path
+443 common quotactl_fd sys_quotactl_fd
444 common landlock_create_ruleset sys_landlock_create_ruleset
445 common landlock_add_rule sys_landlock_add_rule
446 common landlock_restrict_self sys_landlock_restrict_self
diff --git a/arch/powerpc/kernel/time.c b/arch/powerpc/kernel/time.c
index b67d93a609a2..da995c5fb97d 100644
--- a/arch/powerpc/kernel/time.c
+++ b/arch/powerpc/kernel/time.c
@@ -508,16 +508,6 @@ EXPORT_SYMBOL(profile_pc);
* 64-bit uses a byte in the PACA, 32-bit uses a per-cpu variable...
*/
#ifdef CONFIG_PPC64
-static inline unsigned long test_irq_work_pending(void)
-{
- unsigned long x;
-
- asm volatile("lbz %0,%1(13)"
- : "=r" (x)
- : "i" (offsetof(struct paca_struct, irq_work_pending)));
- return x;
-}
-
static inline void set_irq_work_pending_flag(void)
{
asm volatile("stb %0,%1(13)" : :
diff --git a/arch/powerpc/kexec/core.c b/arch/powerpc/kexec/core.c
index 56da5eb2b923..48525e8b5730 100644
--- a/arch/powerpc/kexec/core.c
+++ b/arch/powerpc/kexec/core.c
@@ -68,11 +68,11 @@ void machine_kexec_cleanup(struct kimage *image)
void arch_crash_save_vmcoreinfo(void)
{
-#ifdef CONFIG_NEED_MULTIPLE_NODES
+#ifdef CONFIG_NUMA
VMCOREINFO_SYMBOL(node_data);
VMCOREINFO_LENGTH(node_data, MAX_NUMNODES);
#endif
-#ifndef CONFIG_NEED_MULTIPLE_NODES
+#ifndef CONFIG_NUMA
VMCOREINFO_SYMBOL(contig_page_data);
#endif
#if defined(CONFIG_PPC64) && defined(CONFIG_SPARSEMEM_VMEMMAP)
diff --git a/arch/powerpc/kvm/Makefile b/arch/powerpc/kvm/Makefile
index 2bfeaa13befb..583c14ef596e 100644
--- a/arch/powerpc/kvm/Makefile
+++ b/arch/powerpc/kvm/Makefile
@@ -6,7 +6,7 @@
ccflags-y := -Ivirt/kvm -Iarch/powerpc/kvm
KVM := ../../../virt/kvm
-common-objs-y = $(KVM)/kvm_main.o $(KVM)/eventfd.o
+common-objs-y = $(KVM)/kvm_main.o $(KVM)/eventfd.o $(KVM)/binary_stats.o
common-objs-$(CONFIG_KVM_VFIO) += $(KVM)/vfio.o
common-objs-$(CONFIG_KVM_MMIO) += $(KVM)/coalesced_mmio.o
@@ -57,6 +57,7 @@ kvm-pr-y := \
book3s_32_mmu.o
kvm-book3s_64-builtin-objs-$(CONFIG_KVM_BOOK3S_64_HANDLER) += \
+ book3s_64_entry.o \
tm.o
ifdef CONFIG_KVM_BOOK3S_PR_POSSIBLE
@@ -86,6 +87,7 @@ kvm-book3s_64-builtin-tm-objs-$(CONFIG_PPC_TRANSACTIONAL_MEM) += \
ifdef CONFIG_KVM_BOOK3S_HV_POSSIBLE
kvm-book3s_64-builtin-objs-$(CONFIG_KVM_BOOK3S_64_HANDLER) += \
book3s_hv_hmi.o \
+ book3s_hv_p9_entry.o \
book3s_hv_rmhandlers.o \
book3s_hv_rm_mmu.o \
book3s_hv_ras.o \
diff --git a/arch/powerpc/kvm/book3s.c b/arch/powerpc/kvm/book3s.c
index 2b691f4d1f26..79833f78d1da 100644
--- a/arch/powerpc/kvm/book3s.c
+++ b/arch/powerpc/kvm/book3s.c
@@ -38,37 +38,66 @@
/* #define EXIT_DEBUG */
-struct kvm_stats_debugfs_item debugfs_entries[] = {
- VCPU_STAT("exits", sum_exits),
- VCPU_STAT("mmio", mmio_exits),
- VCPU_STAT("sig", signal_exits),
- VCPU_STAT("sysc", syscall_exits),
- VCPU_STAT("inst_emu", emulated_inst_exits),
- VCPU_STAT("dec", dec_exits),
- VCPU_STAT("ext_intr", ext_intr_exits),
- VCPU_STAT("queue_intr", queue_intr),
- VCPU_STAT("halt_poll_success_ns", halt_poll_success_ns),
- VCPU_STAT("halt_poll_fail_ns", halt_poll_fail_ns),
- VCPU_STAT("halt_wait_ns", halt_wait_ns),
- VCPU_STAT("halt_successful_poll", halt_successful_poll),
- VCPU_STAT("halt_attempted_poll", halt_attempted_poll),
- VCPU_STAT("halt_successful_wait", halt_successful_wait),
- VCPU_STAT("halt_poll_invalid", halt_poll_invalid),
- VCPU_STAT("halt_wakeup", halt_wakeup),
- VCPU_STAT("pf_storage", pf_storage),
- VCPU_STAT("sp_storage", sp_storage),
- VCPU_STAT("pf_instruc", pf_instruc),
- VCPU_STAT("sp_instruc", sp_instruc),
- VCPU_STAT("ld", ld),
- VCPU_STAT("ld_slow", ld_slow),
- VCPU_STAT("st", st),
- VCPU_STAT("st_slow", st_slow),
- VCPU_STAT("pthru_all", pthru_all),
- VCPU_STAT("pthru_host", pthru_host),
- VCPU_STAT("pthru_bad_aff", pthru_bad_aff),
- VM_STAT("largepages_2M", num_2M_pages, .mode = 0444),
- VM_STAT("largepages_1G", num_1G_pages, .mode = 0444),
- { NULL }
+const struct _kvm_stats_desc kvm_vm_stats_desc[] = {
+ KVM_GENERIC_VM_STATS(),
+ STATS_DESC_ICOUNTER(VM, num_2M_pages),
+ STATS_DESC_ICOUNTER(VM, num_1G_pages)
+};
+static_assert(ARRAY_SIZE(kvm_vm_stats_desc) ==
+ sizeof(struct kvm_vm_stat) / sizeof(u64));
+
+const struct kvm_stats_header kvm_vm_stats_header = {
+ .name_size = KVM_STATS_NAME_SIZE,
+ .num_desc = ARRAY_SIZE(kvm_vm_stats_desc),
+ .id_offset = sizeof(struct kvm_stats_header),
+ .desc_offset = sizeof(struct kvm_stats_header) + KVM_STATS_NAME_SIZE,
+ .data_offset = sizeof(struct kvm_stats_header) + KVM_STATS_NAME_SIZE +
+ sizeof(kvm_vm_stats_desc),
+};
+
+const struct _kvm_stats_desc kvm_vcpu_stats_desc[] = {
+ KVM_GENERIC_VCPU_STATS(),
+ STATS_DESC_COUNTER(VCPU, sum_exits),
+ STATS_DESC_COUNTER(VCPU, mmio_exits),
+ STATS_DESC_COUNTER(VCPU, signal_exits),
+ STATS_DESC_COUNTER(VCPU, light_exits),
+ STATS_DESC_COUNTER(VCPU, itlb_real_miss_exits),
+ STATS_DESC_COUNTER(VCPU, itlb_virt_miss_exits),
+ STATS_DESC_COUNTER(VCPU, dtlb_real_miss_exits),
+ STATS_DESC_COUNTER(VCPU, dtlb_virt_miss_exits),
+ STATS_DESC_COUNTER(VCPU, syscall_exits),
+ STATS_DESC_COUNTER(VCPU, isi_exits),
+ STATS_DESC_COUNTER(VCPU, dsi_exits),
+ STATS_DESC_COUNTER(VCPU, emulated_inst_exits),
+ STATS_DESC_COUNTER(VCPU, dec_exits),
+ STATS_DESC_COUNTER(VCPU, ext_intr_exits),
+ STATS_DESC_TIME_NSEC(VCPU, halt_wait_ns),
+ STATS_DESC_COUNTER(VCPU, halt_successful_wait),
+ STATS_DESC_COUNTER(VCPU, dbell_exits),
+ STATS_DESC_COUNTER(VCPU, gdbell_exits),
+ STATS_DESC_COUNTER(VCPU, ld),
+ STATS_DESC_COUNTER(VCPU, st),
+ STATS_DESC_COUNTER(VCPU, pf_storage),
+ STATS_DESC_COUNTER(VCPU, pf_instruc),
+ STATS_DESC_COUNTER(VCPU, sp_storage),
+ STATS_DESC_COUNTER(VCPU, sp_instruc),
+ STATS_DESC_COUNTER(VCPU, queue_intr),
+ STATS_DESC_COUNTER(VCPU, ld_slow),
+ STATS_DESC_COUNTER(VCPU, st_slow),
+ STATS_DESC_COUNTER(VCPU, pthru_all),
+ STATS_DESC_COUNTER(VCPU, pthru_host),
+ STATS_DESC_COUNTER(VCPU, pthru_bad_aff)
+};
+static_assert(ARRAY_SIZE(kvm_vcpu_stats_desc) ==
+ sizeof(struct kvm_vcpu_stat) / sizeof(u64));
+
+const struct kvm_stats_header kvm_vcpu_stats_header = {
+ .name_size = KVM_STATS_NAME_SIZE,
+ .num_desc = ARRAY_SIZE(kvm_vcpu_stats_desc),
+ .id_offset = sizeof(struct kvm_stats_header),
+ .desc_offset = sizeof(struct kvm_stats_header) + KVM_STATS_NAME_SIZE,
+ .data_offset = sizeof(struct kvm_stats_header) + KVM_STATS_NAME_SIZE +
+ sizeof(kvm_vcpu_stats_desc),
};
static inline void kvmppc_update_int_pending(struct kvm_vcpu *vcpu,
@@ -171,6 +200,12 @@ void kvmppc_core_queue_machine_check(struct kvm_vcpu *vcpu, ulong flags)
}
EXPORT_SYMBOL_GPL(kvmppc_core_queue_machine_check);
+void kvmppc_core_queue_syscall(struct kvm_vcpu *vcpu)
+{
+ kvmppc_inject_interrupt(vcpu, BOOK3S_INTERRUPT_SYSCALL, 0);
+}
+EXPORT_SYMBOL(kvmppc_core_queue_syscall);
+
void kvmppc_core_queue_program(struct kvm_vcpu *vcpu, ulong flags)
{
/* might as well deliver this straight away */
@@ -1044,13 +1079,10 @@ static int kvmppc_book3s_init(void)
#ifdef CONFIG_KVM_XICS
#ifdef CONFIG_KVM_XIVE
if (xics_on_xive()) {
- kvmppc_xive_init_module();
kvm_register_device_ops(&kvm_xive_ops, KVM_DEV_TYPE_XICS);
- if (kvmppc_xive_native_supported()) {
- kvmppc_xive_native_init_module();
+ if (kvmppc_xive_native_supported())
kvm_register_device_ops(&kvm_xive_native_ops,
KVM_DEV_TYPE_XIVE);
- }
} else
#endif
kvm_register_device_ops(&kvm_xics_ops, KVM_DEV_TYPE_XICS);
@@ -1060,12 +1092,6 @@ static int kvmppc_book3s_init(void)
static void kvmppc_book3s_exit(void)
{
-#ifdef CONFIG_KVM_XICS
- if (xics_on_xive()) {
- kvmppc_xive_exit_module();
- kvmppc_xive_native_exit_module();
- }
-#endif
#ifdef CONFIG_KVM_BOOK3S_32_HANDLER
kvmppc_book3s_exit_pr();
#endif
diff --git a/arch/powerpc/kvm/book3s_64_entry.S b/arch/powerpc/kvm/book3s_64_entry.S
new file mode 100644
index 000000000000..983b8c18bc31
--- /dev/null
+++ b/arch/powerpc/kvm/book3s_64_entry.S
@@ -0,0 +1,416 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+#include <asm/asm-offsets.h>
+#include <asm/cache.h>
+#include <asm/code-patching-asm.h>
+#include <asm/exception-64s.h>
+#include <asm/export.h>
+#include <asm/kvm_asm.h>
+#include <asm/kvm_book3s_asm.h>
+#include <asm/mmu.h>
+#include <asm/ppc_asm.h>
+#include <asm/ptrace.h>
+#include <asm/reg.h>
+#include <asm/ultravisor-api.h>
+
+/*
+ * These are branched to from interrupt handlers in exception-64s.S which set
+ * IKVM_REAL or IKVM_VIRT, if HSTATE_IN_GUEST was found to be non-zero.
+ */
+
+/*
+ * This is a hcall, so register convention is as
+ * Documentation/powerpc/papr_hcalls.rst.
+ *
+ * This may also be a syscall from PR-KVM userspace that is to be
+ * reflected to the PR guest kernel, so registers may be set up for
+ * a system call rather than hcall. We don't currently clobber
+ * anything here, but the 0xc00 handler has already clobbered CTR
+ * and CR0, so PR-KVM can not support a guest kernel that preserves
+ * those registers across its system calls.
+ *
+ * The state of registers is as kvmppc_interrupt, except CFAR is not
+ * saved, R13 is not in SCRATCH0, and R10 does not contain the trap.
+ */
+.global kvmppc_hcall
+.balign IFETCH_ALIGN_BYTES
+kvmppc_hcall:
+#ifdef CONFIG_KVM_BOOK3S_HV_POSSIBLE
+ lbz r10,HSTATE_IN_GUEST(r13)
+ cmpwi r10,KVM_GUEST_MODE_HV_P9
+ beq kvmppc_p9_exit_hcall
+#endif
+ ld r10,PACA_EXGEN+EX_R13(r13)
+ SET_SCRATCH0(r10)
+ li r10,0xc00
+ /* Now we look like kvmppc_interrupt */
+ li r11,PACA_EXGEN
+ b .Lgot_save_area
+
+/*
+ * KVM interrupt entry occurs after GEN_INT_ENTRY runs, and follows that
+ * call convention:
+ *
+ * guest R9-R13, CTR, CFAR, PPR saved in PACA EX_xxx save area
+ * guest (H)DAR, (H)DSISR are also in the save area for relevant interrupts
+ * guest R13 also saved in SCRATCH0
+ * R13 = PACA
+ * R11 = (H)SRR0
+ * R12 = (H)SRR1
+ * R9 = guest CR
+ * PPR is set to medium
+ *
+ * With the addition for KVM:
+ * R10 = trap vector
+ */
+.global kvmppc_interrupt
+.balign IFETCH_ALIGN_BYTES
+kvmppc_interrupt:
+#ifdef CONFIG_KVM_BOOK3S_HV_POSSIBLE
+ std r10,HSTATE_SCRATCH0(r13)
+ lbz r10,HSTATE_IN_GUEST(r13)
+ cmpwi r10,KVM_GUEST_MODE_HV_P9
+ beq kvmppc_p9_exit_interrupt
+ ld r10,HSTATE_SCRATCH0(r13)
+#endif
+ li r11,PACA_EXGEN
+ cmpdi r10,0x200
+ bgt+ .Lgot_save_area
+ li r11,PACA_EXMC
+ beq .Lgot_save_area
+ li r11,PACA_EXNMI
+.Lgot_save_area:
+ add r11,r11,r13
+BEGIN_FTR_SECTION
+ ld r12,EX_CFAR(r11)
+ std r12,HSTATE_CFAR(r13)
+END_FTR_SECTION_IFSET(CPU_FTR_CFAR)
+ ld r12,EX_CTR(r11)
+ mtctr r12
+BEGIN_FTR_SECTION
+ ld r12,EX_PPR(r11)
+ std r12,HSTATE_PPR(r13)
+END_FTR_SECTION_IFSET(CPU_FTR_HAS_PPR)
+ ld r12,EX_R12(r11)
+ std r12,HSTATE_SCRATCH0(r13)
+ sldi r12,r9,32
+ or r12,r12,r10
+ ld r9,EX_R9(r11)
+ ld r10,EX_R10(r11)
+ ld r11,EX_R11(r11)
+
+ /*
+ * Hcalls and other interrupts come here after normalising register
+ * contents and save locations:
+ *
+ * R12 = (guest CR << 32) | interrupt vector
+ * R13 = PACA
+ * guest R12 saved in shadow HSTATE_SCRATCH0
+ * guest R13 saved in SPRN_SCRATCH0
+ */
+ std r9,HSTATE_SCRATCH2(r13)
+ lbz r9,HSTATE_IN_GUEST(r13)
+ cmpwi r9,KVM_GUEST_MODE_SKIP
+ beq- .Lmaybe_skip
+.Lno_skip:
+#ifdef CONFIG_KVM_BOOK3S_HV_POSSIBLE
+#ifdef CONFIG_KVM_BOOK3S_PR_POSSIBLE
+ cmpwi r9,KVM_GUEST_MODE_GUEST
+ beq kvmppc_interrupt_pr
+#endif
+ b kvmppc_interrupt_hv
+#else
+ b kvmppc_interrupt_pr
+#endif
+
+/*
+ * "Skip" interrupts are part of a trick KVM uses a with hash guests to load
+ * the faulting instruction in guest memory from the the hypervisor without
+ * walking page tables.
+ *
+ * When the guest takes a fault that requires the hypervisor to load the
+ * instruction (e.g., MMIO emulation), KVM is running in real-mode with HV=1
+ * and the guest MMU context loaded. It sets KVM_GUEST_MODE_SKIP, and sets
+ * MSR[DR]=1 while leaving MSR[IR]=0, so it continues to fetch HV instructions
+ * but loads and stores will access the guest context. This is used to load
+ * the faulting instruction using the faulting guest effective address.
+ *
+ * However the guest context may not be able to translate, or it may cause a
+ * machine check or other issue, which results in a fault in the host
+ * (even with KVM-HV).
+ *
+ * These faults come here because KVM_GUEST_MODE_SKIP was set, so if they
+ * are (or are likely) caused by that load, the instruction is skipped by
+ * just returning with the PC advanced +4, where it is noticed the load did
+ * not execute and it goes to the slow path which walks the page tables to
+ * read guest memory.
+ */
+.Lmaybe_skip:
+ cmpwi r12,BOOK3S_INTERRUPT_MACHINE_CHECK
+ beq 1f
+ cmpwi r12,BOOK3S_INTERRUPT_DATA_STORAGE
+ beq 1f
+ cmpwi r12,BOOK3S_INTERRUPT_DATA_SEGMENT
+ beq 1f
+#ifdef CONFIG_KVM_BOOK3S_HV_POSSIBLE
+ /* HSRR interrupts get 2 added to interrupt number */
+ cmpwi r12,BOOK3S_INTERRUPT_H_DATA_STORAGE | 0x2
+ beq 2f
+#endif
+ b .Lno_skip
+1: mfspr r9,SPRN_SRR0
+ addi r9,r9,4
+ mtspr SPRN_SRR0,r9
+ ld r12,HSTATE_SCRATCH0(r13)
+ ld r9,HSTATE_SCRATCH2(r13)
+ GET_SCRATCH0(r13)
+ RFI_TO_KERNEL
+#ifdef CONFIG_KVM_BOOK3S_HV_POSSIBLE
+2: mfspr r9,SPRN_HSRR0
+ addi r9,r9,4
+ mtspr SPRN_HSRR0,r9
+ ld r12,HSTATE_SCRATCH0(r13)
+ ld r9,HSTATE_SCRATCH2(r13)
+ GET_SCRATCH0(r13)
+ HRFI_TO_KERNEL
+#endif
+
+#ifdef CONFIG_KVM_BOOK3S_HV_POSSIBLE
+
+/* Stack frame offsets for kvmppc_p9_enter_guest */
+#define SFS (144 + STACK_FRAME_MIN_SIZE)
+#define STACK_SLOT_NVGPRS (SFS - 144) /* 18 gprs */
+
+/*
+ * void kvmppc_p9_enter_guest(struct vcpu *vcpu);
+ *
+ * Enter the guest on a ISAv3.0 or later system.
+ */
+.balign IFETCH_ALIGN_BYTES
+_GLOBAL(kvmppc_p9_enter_guest)
+EXPORT_SYMBOL_GPL(kvmppc_p9_enter_guest)
+ mflr r0
+ std r0,PPC_LR_STKOFF(r1)
+ stdu r1,-SFS(r1)
+
+ std r1,HSTATE_HOST_R1(r13)
+
+ mfcr r4
+ stw r4,SFS+8(r1)
+
+ reg = 14
+ .rept 18
+ std reg,STACK_SLOT_NVGPRS + ((reg - 14) * 8)(r1)
+ reg = reg + 1
+ .endr
+
+ ld r4,VCPU_LR(r3)
+ mtlr r4
+ ld r4,VCPU_CTR(r3)
+ mtctr r4
+ ld r4,VCPU_XER(r3)
+ mtspr SPRN_XER,r4
+
+ ld r1,VCPU_CR(r3)
+
+BEGIN_FTR_SECTION
+ ld r4,VCPU_CFAR(r3)
+ mtspr SPRN_CFAR,r4
+END_FTR_SECTION_IFSET(CPU_FTR_CFAR)
+BEGIN_FTR_SECTION
+ ld r4,VCPU_PPR(r3)
+ mtspr SPRN_PPR,r4
+END_FTR_SECTION_IFSET(CPU_FTR_HAS_PPR)
+
+ reg = 4
+ .rept 28
+ ld reg,__VCPU_GPR(reg)(r3)
+ reg = reg + 1
+ .endr
+
+ ld r4,VCPU_KVM(r3)
+ lbz r4,KVM_SECURE_GUEST(r4)
+ cmpdi r4,0
+ ld r4,VCPU_GPR(R4)(r3)
+ bne .Lret_to_ultra
+
+ mtcr r1
+
+ ld r0,VCPU_GPR(R0)(r3)
+ ld r1,VCPU_GPR(R1)(r3)
+ ld r2,VCPU_GPR(R2)(r3)
+ ld r3,VCPU_GPR(R3)(r3)
+
+ HRFI_TO_GUEST
+ b .
+
+ /*
+ * Use UV_RETURN ultracall to return control back to the Ultravisor
+ * after processing an hypercall or interrupt that was forwarded
+ * (a.k.a. reflected) to the Hypervisor.
+ *
+ * All registers have already been reloaded except the ucall requires:
+ * R0 = hcall result
+ * R2 = SRR1, so UV can detect a synthesized interrupt (if any)
+ * R3 = UV_RETURN
+ */
+.Lret_to_ultra:
+ mtcr r1
+ ld r1,VCPU_GPR(R1)(r3)
+
+ ld r0,VCPU_GPR(R3)(r3)
+ mfspr r2,SPRN_SRR1
+ LOAD_REG_IMMEDIATE(r3, UV_RETURN)
+ sc 2
+
+/*
+ * kvmppc_p9_exit_hcall and kvmppc_p9_exit_interrupt are branched to from
+ * above if the interrupt was taken for a guest that was entered via
+ * kvmppc_p9_enter_guest().
+ *
+ * The exit code recovers the host stack and vcpu pointer, saves all guest GPRs
+ * and CR, LR, XER as well as guest MSR and NIA into the VCPU, then re-
+ * establishes the host stack and registers to return from the
+ * kvmppc_p9_enter_guest() function, which saves CTR and other guest registers
+ * (SPRs and FP, VEC, etc).
+ */
+.balign IFETCH_ALIGN_BYTES
+kvmppc_p9_exit_hcall:
+ mfspr r11,SPRN_SRR0
+ mfspr r12,SPRN_SRR1
+ li r10,0xc00
+ std r10,HSTATE_SCRATCH0(r13)
+
+.balign IFETCH_ALIGN_BYTES
+kvmppc_p9_exit_interrupt:
+ /*
+ * If set to KVM_GUEST_MODE_HV_P9 but we're still in the
+ * hypervisor, that means we can't return from the entry stack.
+ */
+ rldicl. r10,r12,64-MSR_HV_LG,63
+ bne- kvmppc_p9_bad_interrupt
+
+ std r1,HSTATE_SCRATCH1(r13)
+ std r3,HSTATE_SCRATCH2(r13)
+ ld r1,HSTATE_HOST_R1(r13)
+ ld r3,HSTATE_KVM_VCPU(r13)
+
+ std r9,VCPU_CR(r3)
+
+1:
+ std r11,VCPU_PC(r3)
+ std r12,VCPU_MSR(r3)
+
+ reg = 14
+ .rept 18
+ std reg,__VCPU_GPR(reg)(r3)
+ reg = reg + 1
+ .endr
+
+ /* r1, r3, r9-r13 are saved to vcpu by C code */
+ std r0,VCPU_GPR(R0)(r3)
+ std r2,VCPU_GPR(R2)(r3)
+ reg = 4
+ .rept 5
+ std reg,__VCPU_GPR(reg)(r3)
+ reg = reg + 1
+ .endr
+
+ ld r2,PACATOC(r13)
+
+ mflr r4
+ std r4,VCPU_LR(r3)
+ mfspr r4,SPRN_XER
+ std r4,VCPU_XER(r3)
+
+ reg = 14
+ .rept 18
+ ld reg,STACK_SLOT_NVGPRS + ((reg - 14) * 8)(r1)
+ reg = reg + 1
+ .endr
+
+ lwz r4,SFS+8(r1)
+ mtcr r4
+
+ /*
+ * Flush the link stack here, before executing the first blr on the
+ * way out of the guest.
+ *
+ * The link stack won't match coming out of the guest anyway so the
+ * only cost is the flush itself. The call clobbers r0.
+ */
+1: nop
+ patch_site 1b patch__call_kvm_flush_link_stack_p9
+
+ addi r1,r1,SFS
+ ld r0,PPC_LR_STKOFF(r1)
+ mtlr r0
+ blr
+
+/*
+ * Took an interrupt somewhere right before HRFID to guest, so registers are
+ * in a bad way. Return things hopefully enough to run host virtual code and
+ * run the Linux interrupt handler (SRESET or MCE) to print something useful.
+ *
+ * We could be really clever and save all host registers in known locations
+ * before setting HSTATE_IN_GUEST, then restoring them all here, and setting
+ * return address to a fixup that sets them up again. But that's a lot of
+ * effort for a small bit of code. Lots of other things to do first.
+ */
+kvmppc_p9_bad_interrupt:
+BEGIN_MMU_FTR_SECTION
+ /*
+ * Hash host doesn't try to recover MMU (requires host SLB reload)
+ */
+ b .
+END_MMU_FTR_SECTION_IFCLR(MMU_FTR_TYPE_RADIX)
+ /*
+ * Clean up guest registers to give host a chance to run.
+ */
+ li r10,0
+ mtspr SPRN_AMR,r10
+ mtspr SPRN_IAMR,r10
+ mtspr SPRN_CIABR,r10
+ mtspr SPRN_DAWRX0,r10
+BEGIN_FTR_SECTION
+ mtspr SPRN_DAWRX1,r10
+END_FTR_SECTION_IFSET(CPU_FTR_DAWR1)
+ mtspr SPRN_PID,r10
+
+ /*
+ * Switch to host MMU mode
+ */
+ ld r10, HSTATE_KVM_VCPU(r13)
+ ld r10, VCPU_KVM(r10)
+ lwz r10, KVM_HOST_LPID(r10)
+ mtspr SPRN_LPID,r10
+
+ ld r10, HSTATE_KVM_VCPU(r13)
+ ld r10, VCPU_KVM(r10)
+ ld r10, KVM_HOST_LPCR(r10)
+ mtspr SPRN_LPCR,r10
+
+ /*
+ * Set GUEST_MODE_NONE so the handler won't branch to KVM, and clear
+ * MSR_RI in r12 ([H]SRR1) so the handler won't try to return.
+ */
+ li r10,KVM_GUEST_MODE_NONE
+ stb r10,HSTATE_IN_GUEST(r13)
+ li r10,MSR_RI
+ andc r12,r12,r10
+
+ /*
+ * Go back to interrupt handler. MCE and SRESET have their specific
+ * PACA save area so they should be used directly. They set up their
+ * own stack. The other handlers all use EXGEN. They will use the
+ * guest r1 if it looks like a kernel stack, so just load the
+ * emergency stack and go to program check for all other interrupts.
+ */
+ ld r10,HSTATE_SCRATCH0(r13)
+ cmpwi r10,BOOK3S_INTERRUPT_MACHINE_CHECK
+ beq machine_check_common
+
+ cmpwi r10,BOOK3S_INTERRUPT_SYSTEM_RESET
+ beq system_reset_common
+
+ b .
+#endif
diff --git a/arch/powerpc/kvm/book3s_64_mmu_hv.c b/arch/powerpc/kvm/book3s_64_mmu_hv.c
index 2d9193cd73be..c63e263312a4 100644
--- a/arch/powerpc/kvm/book3s_64_mmu_hv.c
+++ b/arch/powerpc/kvm/book3s_64_mmu_hv.c
@@ -840,7 +840,7 @@ bool kvm_unmap_gfn_range_hv(struct kvm *kvm, struct kvm_gfn_range *range)
kvm_unmap_radix(kvm, range->slot, gfn);
} else {
for (gfn = range->start; gfn < range->end; gfn++)
- kvm_unmap_rmapp(kvm, range->slot, range->start);
+ kvm_unmap_rmapp(kvm, range->slot, gfn);
}
return false;
diff --git a/arch/powerpc/kvm/book3s_64_mmu_radix.c b/arch/powerpc/kvm/book3s_64_mmu_radix.c
index d909c069363e..b5905ae4377c 100644
--- a/arch/powerpc/kvm/book3s_64_mmu_radix.c
+++ b/arch/powerpc/kvm/book3s_64_mmu_radix.c
@@ -21,6 +21,7 @@
#include <asm/pte-walk.h>
#include <asm/ultravisor.h>
#include <asm/kvm_book3s_uvmem.h>
+#include <asm/plpar_wrappers.h>
/*
* Supported radix tree geometry.
@@ -318,9 +319,19 @@ void kvmppc_radix_tlbie_page(struct kvm *kvm, unsigned long addr,
}
psi = shift_to_mmu_psize(pshift);
- rb = addr | (mmu_get_ap(psi) << PPC_BITLSHIFT(58));
- rc = plpar_hcall_norets(H_TLB_INVALIDATE, H_TLBIE_P1_ENC(0, 0, 1),
- lpid, rb);
+
+ if (!firmware_has_feature(FW_FEATURE_RPT_INVALIDATE)) {
+ rb = addr | (mmu_get_ap(psi) << PPC_BITLSHIFT(58));
+ rc = plpar_hcall_norets(H_TLB_INVALIDATE, H_TLBIE_P1_ENC(0, 0, 1),
+ lpid, rb);
+ } else {
+ rc = pseries_rpt_invalidate(lpid, H_RPTI_TARGET_CMMU,
+ H_RPTI_TYPE_NESTED |
+ H_RPTI_TYPE_TLB,
+ psize_to_rpti_pgsize(psi),
+ addr, addr + psize);
+ }
+
if (rc)
pr_err("KVM: TLB page invalidation hcall failed, rc=%ld\n", rc);
}
@@ -334,8 +345,14 @@ static void kvmppc_radix_flush_pwc(struct kvm *kvm, unsigned int lpid)
return;
}
- rc = plpar_hcall_norets(H_TLB_INVALIDATE, H_TLBIE_P1_ENC(1, 0, 1),
- lpid, TLBIEL_INVAL_SET_LPID);
+ if (!firmware_has_feature(FW_FEATURE_RPT_INVALIDATE))
+ rc = plpar_hcall_norets(H_TLB_INVALIDATE, H_TLBIE_P1_ENC(1, 0, 1),
+ lpid, TLBIEL_INVAL_SET_LPID);
+ else
+ rc = pseries_rpt_invalidate(lpid, H_RPTI_TARGET_CMMU,
+ H_RPTI_TYPE_NESTED |
+ H_RPTI_TYPE_PWC, H_RPTI_PAGE_ALL,
+ 0, -1UL);
if (rc)
pr_err("KVM: TLB PWC invalidation hcall failed, rc=%ld\n", rc);
}
diff --git a/arch/powerpc/kvm/book3s_64_vio_hv.c b/arch/powerpc/kvm/book3s_64_vio_hv.c
index 083a4e037718..dc6591548f0c 100644
--- a/arch/powerpc/kvm/book3s_64_vio_hv.c
+++ b/arch/powerpc/kvm/book3s_64_vio_hv.c
@@ -391,10 +391,6 @@ long kvmppc_rm_h_put_tce(struct kvm_vcpu *vcpu, unsigned long liobn,
/* udbg_printf("H_PUT_TCE(): liobn=0x%lx ioba=0x%lx, tce=0x%lx\n", */
/* liobn, ioba, tce); */
- /* For radix, we might be in virtual mode, so punt */
- if (kvm_is_radix(vcpu->kvm))
- return H_TOO_HARD;
-
stt = kvmppc_find_table(vcpu->kvm, liobn);
if (!stt)
return H_TOO_HARD;
@@ -489,10 +485,6 @@ long kvmppc_rm_h_put_tce_indirect(struct kvm_vcpu *vcpu,
bool prereg = false;
struct kvmppc_spapr_tce_iommu_table *stit;
- /* For radix, we might be in virtual mode, so punt */
- if (kvm_is_radix(vcpu->kvm))
- return H_TOO_HARD;
-
/*
* used to check for invalidations in progress
*/
@@ -602,10 +594,6 @@ long kvmppc_rm_h_stuff_tce(struct kvm_vcpu *vcpu,
long i, ret;
struct kvmppc_spapr_tce_iommu_table *stit;
- /* For radix, we might be in virtual mode, so punt */
- if (kvm_is_radix(vcpu->kvm))
- return H_TOO_HARD;
-
stt = kvmppc_find_table(vcpu->kvm, liobn);
if (!stt)
return H_TOO_HARD;
diff --git a/arch/powerpc/kvm/book3s_hv.c b/arch/powerpc/kvm/book3s_hv.c
index 28a80d240b76..260e860d53a2 100644
--- a/arch/powerpc/kvm/book3s_hv.c
+++ b/arch/powerpc/kvm/book3s_hv.c
@@ -76,6 +76,7 @@
#include <asm/kvm_book3s_uvmem.h>
#include <asm/ultravisor.h>
#include <asm/dtl.h>
+#include <asm/plpar_wrappers.h>
#include "book3s.h"
@@ -103,13 +104,9 @@ static int target_smt_mode;
module_param(target_smt_mode, int, 0644);
MODULE_PARM_DESC(target_smt_mode, "Target threads per core (0 = max)");
-static bool indep_threads_mode = true;
-module_param(indep_threads_mode, bool, S_IRUGO | S_IWUSR);
-MODULE_PARM_DESC(indep_threads_mode, "Independent-threads mode (only on POWER9)");
-
static bool one_vm_per_core;
module_param(one_vm_per_core, bool, S_IRUGO | S_IWUSR);
-MODULE_PARM_DESC(one_vm_per_core, "Only run vCPUs from the same VM on a core (requires indep_threads_mode=N)");
+MODULE_PARM_DESC(one_vm_per_core, "Only run vCPUs from the same VM on a core (requires POWER8 or older)");
#ifdef CONFIG_KVM_XICS
static const struct kernel_param_ops module_param_ops = {
@@ -134,9 +131,6 @@ static inline bool nesting_enabled(struct kvm *kvm)
return kvm->arch.nested_enable && kvm_is_radix(kvm);
}
-/* If set, the threads on each CPU core have to be in the same MMU mode */
-static bool no_mixing_hpt_and_radix __read_mostly;
-
static int kvmppc_hv_setup_htab_rma(struct kvm_vcpu *vcpu);
/*
@@ -236,7 +230,7 @@ static void kvmppc_fast_vcpu_kick_hv(struct kvm_vcpu *vcpu)
waitp = kvm_arch_vcpu_get_wait(vcpu);
if (rcuwait_wake_up(waitp))
- ++vcpu->stat.halt_wakeup;
+ ++vcpu->stat.generic.halt_wakeup;
cpu = READ_ONCE(vcpu->arch.thread_cpu);
if (cpu >= 0 && kvmppc_ipi_thread(cpu))
@@ -807,7 +801,8 @@ static int kvmppc_h_set_mode(struct kvm_vcpu *vcpu, unsigned long mflags,
* KVM does not support mflags=2 (AIL=2) and AIL=1 is reserved.
* Keep this in synch with kvmppc_filter_guest_lpcr_hv.
*/
- if (mflags != 0 && mflags != 3)
+ if (cpu_has_feature(CPU_FTR_P9_RADIX_PREFETCH_BUG) &&
+ kvmhv_vcpu_is_radix(vcpu) && mflags == 3)
return H_UNSUPPORTED_FLAG_START;
return H_TOO_HARD;
default:
@@ -899,6 +894,10 @@ static int kvm_arch_vcpu_yield_to(struct kvm_vcpu *target)
* H_SUCCESS if the source vcore wasn't idle (e.g. if it may
* have useful work to do and should not confer) so we don't
* recheck that here.
+ *
+ * In the case of the P9 single vcpu per vcore case, the real
+ * mode handler is not called but no other threads are in the
+ * source vcore.
*/
spin_lock(&vcore->lock);
@@ -924,8 +923,71 @@ static int kvmppc_get_yield_count(struct kvm_vcpu *vcpu)
return yield_count;
}
+/*
+ * H_RPT_INVALIDATE hcall handler for nested guests.
+ *
+ * Handles only nested process-scoped invalidation requests in L0.
+ */
+static int kvmppc_nested_h_rpt_invalidate(struct kvm_vcpu *vcpu)
+{
+ unsigned long type = kvmppc_get_gpr(vcpu, 6);
+ unsigned long pid, pg_sizes, start, end;
+
+ /*
+ * The partition-scoped invalidations aren't handled here in L0.
+ */
+ if (type & H_RPTI_TYPE_NESTED)
+ return RESUME_HOST;
+
+ pid = kvmppc_get_gpr(vcpu, 4);
+ pg_sizes = kvmppc_get_gpr(vcpu, 7);
+ start = kvmppc_get_gpr(vcpu, 8);
+ end = kvmppc_get_gpr(vcpu, 9);
+
+ do_h_rpt_invalidate_prt(pid, vcpu->arch.nested->shadow_lpid,
+ type, pg_sizes, start, end);
+
+ kvmppc_set_gpr(vcpu, 3, H_SUCCESS);
+ return RESUME_GUEST;
+}
+
+static long kvmppc_h_rpt_invalidate(struct kvm_vcpu *vcpu,
+ unsigned long id, unsigned long target,
+ unsigned long type, unsigned long pg_sizes,
+ unsigned long start, unsigned long end)
+{
+ if (!kvm_is_radix(vcpu->kvm))
+ return H_UNSUPPORTED;
+
+ if (end < start)
+ return H_P5;
+
+ /*
+ * Partition-scoped invalidation for nested guests.
+ */
+ if (type & H_RPTI_TYPE_NESTED) {
+ if (!nesting_enabled(vcpu->kvm))
+ return H_FUNCTION;
+
+ /* Support only cores as target */
+ if (target != H_RPTI_TARGET_CMMU)
+ return H_P2;
+
+ return do_h_rpt_invalidate_pat(vcpu, id, type, pg_sizes,
+ start, end);
+ }
+
+ /*
+ * Process-scoped invalidation for L1 guests.
+ */
+ do_h_rpt_invalidate_prt(id, vcpu->kvm->arch.lpid,
+ type, pg_sizes, start, end);
+ return H_SUCCESS;
+}
+
int kvmppc_pseries_do_hcall(struct kvm_vcpu *vcpu)
{
+ struct kvm *kvm = vcpu->kvm;
unsigned long req = kvmppc_get_gpr(vcpu, 3);
unsigned long target, ret = H_SUCCESS;
int yield_count;
@@ -937,11 +999,57 @@ int kvmppc_pseries_do_hcall(struct kvm_vcpu *vcpu)
return RESUME_HOST;
switch (req) {
+ case H_REMOVE:
+ ret = kvmppc_h_remove(vcpu, kvmppc_get_gpr(vcpu, 4),
+ kvmppc_get_gpr(vcpu, 5),
+ kvmppc_get_gpr(vcpu, 6));
+ if (ret == H_TOO_HARD)
+ return RESUME_HOST;
+ break;
+ case H_ENTER:
+ ret = kvmppc_h_enter(vcpu, kvmppc_get_gpr(vcpu, 4),
+ kvmppc_get_gpr(vcpu, 5),
+ kvmppc_get_gpr(vcpu, 6),
+ kvmppc_get_gpr(vcpu, 7));
+ if (ret == H_TOO_HARD)
+ return RESUME_HOST;
+ break;
+ case H_READ:
+ ret = kvmppc_h_read(vcpu, kvmppc_get_gpr(vcpu, 4),
+ kvmppc_get_gpr(vcpu, 5));
+ if (ret == H_TOO_HARD)
+ return RESUME_HOST;
+ break;
+ case H_CLEAR_MOD:
+ ret = kvmppc_h_clear_mod(vcpu, kvmppc_get_gpr(vcpu, 4),
+ kvmppc_get_gpr(vcpu, 5));
+ if (ret == H_TOO_HARD)
+ return RESUME_HOST;
+ break;
+ case H_CLEAR_REF:
+ ret = kvmppc_h_clear_ref(vcpu, kvmppc_get_gpr(vcpu, 4),
+ kvmppc_get_gpr(vcpu, 5));
+ if (ret == H_TOO_HARD)
+ return RESUME_HOST;
+ break;
+ case H_PROTECT:
+ ret = kvmppc_h_protect(vcpu, kvmppc_get_gpr(vcpu, 4),
+ kvmppc_get_gpr(vcpu, 5),
+ kvmppc_get_gpr(vcpu, 6));
+ if (ret == H_TOO_HARD)
+ return RESUME_HOST;
+ break;
+ case H_BULK_REMOVE:
+ ret = kvmppc_h_bulk_remove(vcpu);
+ if (ret == H_TOO_HARD)
+ return RESUME_HOST;
+ break;
+
case H_CEDE:
break;
case H_PROD:
target = kvmppc_get_gpr(vcpu, 4);
- tvcpu = kvmppc_find_vcpu(vcpu->kvm, target);
+ tvcpu = kvmppc_find_vcpu(kvm, target);
if (!tvcpu) {
ret = H_PARAMETER;
break;
@@ -955,7 +1063,7 @@ int kvmppc_pseries_do_hcall(struct kvm_vcpu *vcpu)
target = kvmppc_get_gpr(vcpu, 4);
if (target == -1)
break;
- tvcpu = kvmppc_find_vcpu(vcpu->kvm, target);
+ tvcpu = kvmppc_find_vcpu(kvm, target);
if (!tvcpu) {
ret = H_PARAMETER;
break;
@@ -971,12 +1079,12 @@ int kvmppc_pseries_do_hcall(struct kvm_vcpu *vcpu)
kvmppc_get_gpr(vcpu, 6));
break;
case H_RTAS:
- if (list_empty(&vcpu->kvm->arch.rtas_tokens))
+ if (list_empty(&kvm->arch.rtas_tokens))
return RESUME_HOST;
- idx = srcu_read_lock(&vcpu->kvm->srcu);
+ idx = srcu_read_lock(&kvm->srcu);
rc = kvmppc_rtas_hcall(vcpu);
- srcu_read_unlock(&vcpu->kvm->srcu, idx);
+ srcu_read_unlock(&kvm->srcu, idx);
if (rc == -ENOENT)
return RESUME_HOST;
@@ -1060,15 +1168,23 @@ int kvmppc_pseries_do_hcall(struct kvm_vcpu *vcpu)
if (!powernv_get_random_long(&vcpu->arch.regs.gpr[4]))
ret = H_HARDWARE;
break;
+ case H_RPT_INVALIDATE:
+ ret = kvmppc_h_rpt_invalidate(vcpu, kvmppc_get_gpr(vcpu, 4),
+ kvmppc_get_gpr(vcpu, 5),
+ kvmppc_get_gpr(vcpu, 6),
+ kvmppc_get_gpr(vcpu, 7),
+ kvmppc_get_gpr(vcpu, 8),
+ kvmppc_get_gpr(vcpu, 9));
+ break;
case H_SET_PARTITION_TABLE:
ret = H_FUNCTION;
- if (nesting_enabled(vcpu->kvm))
+ if (nesting_enabled(kvm))
ret = kvmhv_set_partition_table(vcpu);
break;
case H_ENTER_NESTED:
ret = H_FUNCTION;
- if (!nesting_enabled(vcpu->kvm))
+ if (!nesting_enabled(kvm))
break;
ret = kvmhv_enter_nested_guest(vcpu);
if (ret == H_INTERRUPT) {
@@ -1083,12 +1199,12 @@ int kvmppc_pseries_do_hcall(struct kvm_vcpu *vcpu)
break;
case H_TLB_INVALIDATE:
ret = H_FUNCTION;
- if (nesting_enabled(vcpu->kvm))
+ if (nesting_enabled(kvm))
ret = kvmhv_do_nested_tlbie(vcpu);
break;
case H_COPY_TOFROM_GUEST:
ret = H_FUNCTION;
- if (nesting_enabled(vcpu->kvm))
+ if (nesting_enabled(kvm))
ret = kvmhv_copy_tofrom_guest_nested(vcpu);
break;
case H_PAGE_INIT:
@@ -1099,7 +1215,7 @@ int kvmppc_pseries_do_hcall(struct kvm_vcpu *vcpu)
case H_SVM_PAGE_IN:
ret = H_UNSUPPORTED;
if (kvmppc_get_srr1(vcpu) & MSR_S)
- ret = kvmppc_h_svm_page_in(vcpu->kvm,
+ ret = kvmppc_h_svm_page_in(kvm,
kvmppc_get_gpr(vcpu, 4),
kvmppc_get_gpr(vcpu, 5),
kvmppc_get_gpr(vcpu, 6));
@@ -1107,7 +1223,7 @@ int kvmppc_pseries_do_hcall(struct kvm_vcpu *vcpu)
case H_SVM_PAGE_OUT:
ret = H_UNSUPPORTED;
if (kvmppc_get_srr1(vcpu) & MSR_S)
- ret = kvmppc_h_svm_page_out(vcpu->kvm,
+ ret = kvmppc_h_svm_page_out(kvm,
kvmppc_get_gpr(vcpu, 4),
kvmppc_get_gpr(vcpu, 5),
kvmppc_get_gpr(vcpu, 6));
@@ -1115,12 +1231,12 @@ int kvmppc_pseries_do_hcall(struct kvm_vcpu *vcpu)
case H_SVM_INIT_START:
ret = H_UNSUPPORTED;
if (kvmppc_get_srr1(vcpu) & MSR_S)
- ret = kvmppc_h_svm_init_start(vcpu->kvm);
+ ret = kvmppc_h_svm_init_start(kvm);
break;
case H_SVM_INIT_DONE:
ret = H_UNSUPPORTED;
if (kvmppc_get_srr1(vcpu) & MSR_S)
- ret = kvmppc_h_svm_init_done(vcpu->kvm);
+ ret = kvmppc_h_svm_init_done(kvm);
break;
case H_SVM_INIT_ABORT:
/*
@@ -1130,24 +1246,26 @@ int kvmppc_pseries_do_hcall(struct kvm_vcpu *vcpu)
* Instead the kvm->arch.secure_guest flag is checked inside
* kvmppc_h_svm_init_abort().
*/
- ret = kvmppc_h_svm_init_abort(vcpu->kvm);
+ ret = kvmppc_h_svm_init_abort(kvm);
break;
default:
return RESUME_HOST;
}
+ WARN_ON_ONCE(ret == H_TOO_HARD);
kvmppc_set_gpr(vcpu, 3, ret);
vcpu->arch.hcall_needed = 0;
return RESUME_GUEST;
}
/*
- * Handle H_CEDE in the nested virtualization case where we haven't
- * called the real-mode hcall handlers in book3s_hv_rmhandlers.S.
+ * Handle H_CEDE in the P9 path where we don't call the real-mode hcall
+ * handlers in book3s_hv_rmhandlers.S.
+ *
* This has to be done early, not in kvmppc_pseries_do_hcall(), so
* that the cede logic in kvmppc_run_single_vcpu() works properly.
*/
-static void kvmppc_nested_cede(struct kvm_vcpu *vcpu)
+static void kvmppc_cede(struct kvm_vcpu *vcpu)
{
vcpu->arch.shregs.msr |= MSR_EE;
vcpu->arch.ceded = 1;
@@ -1178,6 +1296,7 @@ static int kvmppc_hcall_impl_hv(unsigned long cmd)
case H_XIRR_X:
#endif
case H_PAGE_INIT:
+ case H_RPT_INVALIDATE:
return 1;
}
@@ -1400,13 +1519,39 @@ static int kvmppc_handle_exit_hv(struct kvm_vcpu *vcpu,
}
case BOOK3S_INTERRUPT_SYSCALL:
{
- /* hcall - punt to userspace */
int i;
- /* hypercall with MSR_PR has already been handled in rmode,
- * and never reaches here.
- */
+ if (unlikely(vcpu->arch.shregs.msr & MSR_PR)) {
+ /*
+ * Guest userspace executed sc 1. This can only be
+ * reached by the P9 path because the old path
+ * handles this case in realmode hcall handlers.
+ */
+ if (!kvmhv_vcpu_is_radix(vcpu)) {
+ /*
+ * A guest could be running PR KVM, so this
+ * may be a PR KVM hcall. It must be reflected
+ * to the guest kernel as a sc interrupt.
+ */
+ kvmppc_core_queue_syscall(vcpu);
+ } else {
+ /*
+ * Radix guests can not run PR KVM or nested HV
+ * hash guests which might run PR KVM, so this
+ * is always a privilege fault. Send a program
+ * check to guest kernel.
+ */
+ kvmppc_core_queue_program(vcpu, SRR1_PROGPRIV);
+ }
+ r = RESUME_GUEST;
+ break;
+ }
+ /*
+ * hcall - gather args and set exit_reason. This will next be
+ * handled by kvmppc_pseries_do_hcall which may be able to deal
+ * with it and resume guest, or may punt to userspace.
+ */
run->papr_hcall.nr = kvmppc_get_gpr(vcpu, 3);
for (i = 0; i < 9; ++i)
run->papr_hcall.args[i] = kvmppc_get_gpr(vcpu, 4 + i);
@@ -1419,20 +1564,102 @@ static int kvmppc_handle_exit_hv(struct kvm_vcpu *vcpu,
* We get these next two if the guest accesses a page which it thinks
* it has mapped but which is not actually present, either because
* it is for an emulated I/O device or because the corresonding
- * host page has been paged out. Any other HDSI/HISI interrupts
- * have been handled already.
+ * host page has been paged out.
+ *
+ * Any other HDSI/HISI interrupts have been handled already for P7/8
+ * guests. For POWER9 hash guests not using rmhandlers, basic hash
+ * fault handling is done here.
*/
- case BOOK3S_INTERRUPT_H_DATA_STORAGE:
- r = RESUME_PAGE_FAULT;
+ case BOOK3S_INTERRUPT_H_DATA_STORAGE: {
+ unsigned long vsid;
+ long err;
+
+ if (vcpu->arch.fault_dsisr == HDSISR_CANARY) {
+ r = RESUME_GUEST; /* Just retry if it's the canary */
+ break;
+ }
+
+ if (kvm_is_radix(vcpu->kvm) || !cpu_has_feature(CPU_FTR_ARCH_300)) {
+ /*
+ * Radix doesn't require anything, and pre-ISAv3.0 hash
+ * already attempted to handle this in rmhandlers. The
+ * hash fault handling below is v3 only (it uses ASDR
+ * via fault_gpa).
+ */
+ r = RESUME_PAGE_FAULT;
+ break;
+ }
+
+ if (!(vcpu->arch.fault_dsisr & (DSISR_NOHPTE | DSISR_PROTFAULT))) {
+ kvmppc_core_queue_data_storage(vcpu,
+ vcpu->arch.fault_dar, vcpu->arch.fault_dsisr);
+ r = RESUME_GUEST;
+ break;
+ }
+
+ if (!(vcpu->arch.shregs.msr & MSR_DR))
+ vsid = vcpu->kvm->arch.vrma_slb_v;
+ else
+ vsid = vcpu->arch.fault_gpa;
+
+ err = kvmppc_hpte_hv_fault(vcpu, vcpu->arch.fault_dar,
+ vsid, vcpu->arch.fault_dsisr, true);
+ if (err == 0) {
+ r = RESUME_GUEST;
+ } else if (err == -1 || err == -2) {
+ r = RESUME_PAGE_FAULT;
+ } else {
+ kvmppc_core_queue_data_storage(vcpu,
+ vcpu->arch.fault_dar, err);
+ r = RESUME_GUEST;
+ }
break;
- case BOOK3S_INTERRUPT_H_INST_STORAGE:
+ }
+ case BOOK3S_INTERRUPT_H_INST_STORAGE: {
+ unsigned long vsid;
+ long err;
+
vcpu->arch.fault_dar = kvmppc_get_pc(vcpu);
vcpu->arch.fault_dsisr = vcpu->arch.shregs.msr &
DSISR_SRR1_MATCH_64S;
- if (vcpu->arch.shregs.msr & HSRR1_HISI_WRITE)
- vcpu->arch.fault_dsisr |= DSISR_ISSTORE;
- r = RESUME_PAGE_FAULT;
+ if (kvm_is_radix(vcpu->kvm) || !cpu_has_feature(CPU_FTR_ARCH_300)) {
+ /*
+ * Radix doesn't require anything, and pre-ISAv3.0 hash
+ * already attempted to handle this in rmhandlers. The
+ * hash fault handling below is v3 only (it uses ASDR
+ * via fault_gpa).
+ */
+ if (vcpu->arch.shregs.msr & HSRR1_HISI_WRITE)
+ vcpu->arch.fault_dsisr |= DSISR_ISSTORE;
+ r = RESUME_PAGE_FAULT;
+ break;
+ }
+
+ if (!(vcpu->arch.fault_dsisr & SRR1_ISI_NOPT)) {
+ kvmppc_core_queue_inst_storage(vcpu,
+ vcpu->arch.fault_dsisr);
+ r = RESUME_GUEST;
+ break;
+ }
+
+ if (!(vcpu->arch.shregs.msr & MSR_IR))
+ vsid = vcpu->kvm->arch.vrma_slb_v;
+ else
+ vsid = vcpu->arch.fault_gpa;
+
+ err = kvmppc_hpte_hv_fault(vcpu, vcpu->arch.fault_dar,
+ vsid, vcpu->arch.fault_dsisr, false);
+ if (err == 0) {
+ r = RESUME_GUEST;
+ } else if (err == -1) {
+ r = RESUME_PAGE_FAULT;
+ } else {
+ kvmppc_core_queue_inst_storage(vcpu, err);
+ r = RESUME_GUEST;
+ }
break;
+ }
+
/*
* This occurs if the guest executes an illegal instruction.
* If the guest debug is disabled, generate a program interrupt
@@ -1593,6 +1820,23 @@ static int kvmppc_handle_nested_exit(struct kvm_vcpu *vcpu)
if (!xics_on_xive())
kvmppc_xics_rm_complete(vcpu, 0);
break;
+ case BOOK3S_INTERRUPT_SYSCALL:
+ {
+ unsigned long req = kvmppc_get_gpr(vcpu, 3);
+
+ /*
+ * The H_RPT_INVALIDATE hcalls issued by nested
+ * guests for process-scoped invalidations when
+ * GTSE=0, are handled here in L0.
+ */
+ if (req == H_RPT_INVALIDATE) {
+ r = kvmppc_nested_h_rpt_invalidate(vcpu);
+ break;
+ }
+
+ r = RESUME_HOST;
+ break;
+ }
default:
r = RESUME_HOST;
break;
@@ -1654,6 +1898,14 @@ unsigned long kvmppc_filter_lpcr_hv(struct kvm *kvm, unsigned long lpcr)
lpcr &= ~LPCR_AIL;
if ((lpcr & LPCR_AIL) != LPCR_AIL_3)
lpcr &= ~LPCR_AIL; /* LPCR[AIL]=1/2 is disallowed */
+ /*
+ * On some POWER9s we force AIL off for radix guests to prevent
+ * executing in MSR[HV]=1 mode with the MMU enabled and PIDR set to
+ * guest, which can result in Q0 translations with LPID=0 PID=PIDR to
+ * be cached, which the host TLB management does not expect.
+ */
+ if (kvm_is_radix(kvm) && cpu_has_feature(CPU_FTR_P9_RADIX_PREFETCH_BUG))
+ lpcr &= ~LPCR_AIL;
/*
* On POWER9, allow userspace to enable large decrementer for the
@@ -2233,7 +2485,7 @@ static int kvmppc_set_one_reg_hv(struct kvm_vcpu *vcpu, u64 id,
*/
static int threads_per_vcore(struct kvm *kvm)
{
- if (kvm->arch.threads_indep)
+ if (cpu_has_feature(CPU_FTR_ARCH_300))
return 1;
return threads_per_subcore;
}
@@ -2657,7 +2909,7 @@ static void radix_flush_cpu(struct kvm *kvm, int cpu, struct kvm_vcpu *vcpu)
cpumask_t *cpu_in_guest;
int i;
- cpu = cpu_first_thread_sibling(cpu);
+ cpu = cpu_first_tlb_thread_sibling(cpu);
if (nested) {
cpumask_set_cpu(cpu, &nested->need_tlb_flush);
cpu_in_guest = &nested->cpu_in_guest;
@@ -2671,9 +2923,10 @@ static void radix_flush_cpu(struct kvm *kvm, int cpu, struct kvm_vcpu *vcpu)
* the other side is the first smp_mb() in kvmppc_run_core().
*/
smp_mb();
- for (i = 0; i < threads_per_core; ++i)
- if (cpumask_test_cpu(cpu + i, cpu_in_guest))
- smp_call_function_single(cpu + i, do_nothing, NULL, 1);
+ for (i = cpu; i <= cpu_last_tlb_thread_sibling(cpu);
+ i += cpu_tlb_thread_sibling_step())
+ if (cpumask_test_cpu(i, cpu_in_guest))
+ smp_call_function_single(i, do_nothing, NULL, 1);
}
static void kvmppc_prepare_radix_vcpu(struct kvm_vcpu *vcpu, int pcpu)
@@ -2704,8 +2957,8 @@ static void kvmppc_prepare_radix_vcpu(struct kvm_vcpu *vcpu, int pcpu)
*/
if (prev_cpu != pcpu) {
if (prev_cpu >= 0 &&
- cpu_first_thread_sibling(prev_cpu) !=
- cpu_first_thread_sibling(pcpu))
+ cpu_first_tlb_thread_sibling(prev_cpu) !=
+ cpu_first_tlb_thread_sibling(pcpu))
radix_flush_cpu(kvm, prev_cpu, vcpu);
if (nested)
nested->prev_cpu[vcpu->arch.nested_vcpu_id] = pcpu;
@@ -2967,9 +3220,6 @@ static void prepare_threads(struct kvmppc_vcore *vc)
for_each_runnable_thread(i, vcpu, vc) {
if (signal_pending(vcpu->arch.run_task))
vcpu->arch.ret = -EINTR;
- else if (no_mixing_hpt_and_radix &&
- kvm_is_radix(vc->kvm) != radix_enabled())
- vcpu->arch.ret = -EINVAL;
else if (vcpu->arch.vpa.update_pending ||
vcpu->arch.slb_shadow.update_pending ||
vcpu->arch.dtl.update_pending)
@@ -3176,6 +3426,9 @@ static noinline void kvmppc_run_core(struct kvmppc_vcore *vc)
int trap;
bool is_power8;
+ if (WARN_ON_ONCE(cpu_has_feature(CPU_FTR_ARCH_300)))
+ return;
+
/*
* Remove from the list any threads that have a signal pending
* or need a VPA update done
@@ -3203,9 +3456,6 @@ static noinline void kvmppc_run_core(struct kvmppc_vcore *vc)
* Make sure we are running on primary threads, and that secondary
* threads are offline. Also check if the number of threads in this
* guest are greater than the current system threads per guest.
- * On POWER9, we need to be not in independent-threads mode if
- * this is a HPT guest on a radix host machine where the
- * CPU threads may not be in different MMU modes.
*/
if ((controlled_threads > 1) &&
((vc->num_threads > threads_per_subcore) || !on_primary_thread())) {
@@ -3230,18 +3480,6 @@ static noinline void kvmppc_run_core(struct kvmppc_vcore *vc)
collect_piggybacks(&core_info, target_threads);
/*
- * On radix, arrange for TLB flushing if necessary.
- * This has to be done before disabling interrupts since
- * it uses smp_call_function().
- */
- pcpu = smp_processor_id();
- if (kvm_is_radix(vc->kvm)) {
- for (sub = 0; sub < core_info.n_subcores; ++sub)
- for_each_runnable_thread(i, vcpu, core_info.vc[sub])
- kvmppc_prepare_radix_vcpu(vcpu, pcpu);
- }
-
- /*
* Hard-disable interrupts, and check resched flag and signals.
* If we need to reschedule or deliver a signal, clean up
* and return without going into the guest(s).
@@ -3273,8 +3511,7 @@ static noinline void kvmppc_run_core(struct kvmppc_vcore *vc)
cmd_bit = stat_bit = 0;
split = core_info.n_subcores;
sip = NULL;
- is_power8 = cpu_has_feature(CPU_FTR_ARCH_207S)
- && !cpu_has_feature(CPU_FTR_ARCH_300);
+ is_power8 = cpu_has_feature(CPU_FTR_ARCH_207S);
if (split > 1) {
sip = &split_info;
@@ -3478,184 +3715,113 @@ static noinline void kvmppc_run_core(struct kvmppc_vcore *vc)
trace_kvmppc_run_core(vc, 1);
}
-/*
- * Load up hypervisor-mode registers on P9.
- */
-static int kvmhv_load_hv_regs_and_go(struct kvm_vcpu *vcpu, u64 time_limit,
- unsigned long lpcr)
+static void load_spr_state(struct kvm_vcpu *vcpu)
{
- struct kvmppc_vcore *vc = vcpu->arch.vcore;
- s64 hdec;
- u64 tb, purr, spurr;
- int trap;
- unsigned long host_hfscr = mfspr(SPRN_HFSCR);
- unsigned long host_ciabr = mfspr(SPRN_CIABR);
- unsigned long host_dawr0 = mfspr(SPRN_DAWR0);
- unsigned long host_dawrx0 = mfspr(SPRN_DAWRX0);
- unsigned long host_psscr = mfspr(SPRN_PSSCR);
- unsigned long host_pidr = mfspr(SPRN_PID);
- unsigned long host_dawr1 = 0;
- unsigned long host_dawrx1 = 0;
-
- if (cpu_has_feature(CPU_FTR_DAWR1)) {
- host_dawr1 = mfspr(SPRN_DAWR1);
- host_dawrx1 = mfspr(SPRN_DAWRX1);
- }
+ mtspr(SPRN_DSCR, vcpu->arch.dscr);
+ mtspr(SPRN_IAMR, vcpu->arch.iamr);
+ mtspr(SPRN_PSPB, vcpu->arch.pspb);
+ mtspr(SPRN_FSCR, vcpu->arch.fscr);
+ mtspr(SPRN_TAR, vcpu->arch.tar);
+ mtspr(SPRN_EBBHR, vcpu->arch.ebbhr);
+ mtspr(SPRN_EBBRR, vcpu->arch.ebbrr);
+ mtspr(SPRN_BESCR, vcpu->arch.bescr);
+ mtspr(SPRN_WORT, vcpu->arch.wort);
+ mtspr(SPRN_TIDR, vcpu->arch.tid);
+ mtspr(SPRN_AMR, vcpu->arch.amr);
+ mtspr(SPRN_UAMOR, vcpu->arch.uamor);
/*
- * P8 and P9 suppress the HDEC exception when LPCR[HDICE] = 0,
- * so set HDICE before writing HDEC.
+ * DAR, DSISR, and for nested HV, SPRGs must be set with MSR[RI]
+ * clear (or hstate set appropriately to catch those registers
+ * being clobbered if we take a MCE or SRESET), so those are done
+ * later.
*/
- mtspr(SPRN_LPCR, vcpu->kvm->arch.host_lpcr | LPCR_HDICE);
- isync();
-
- hdec = time_limit - mftb();
- if (hdec < 0) {
- mtspr(SPRN_LPCR, vcpu->kvm->arch.host_lpcr);
- isync();
- return BOOK3S_INTERRUPT_HV_DECREMENTER;
- }
- mtspr(SPRN_HDEC, hdec);
-
- if (vc->tb_offset) {
- u64 new_tb = mftb() + vc->tb_offset;
- mtspr(SPRN_TBU40, new_tb);
- tb = mftb();
- if ((tb & 0xffffff) < (new_tb & 0xffffff))
- mtspr(SPRN_TBU40, new_tb + 0x1000000);
- vc->tb_offset_applied = vc->tb_offset;
- }
-
- if (vc->pcr)
- mtspr(SPRN_PCR, vc->pcr | PCR_MASK);
- mtspr(SPRN_DPDES, vc->dpdes);
- mtspr(SPRN_VTB, vc->vtb);
-
- local_paca->kvm_hstate.host_purr = mfspr(SPRN_PURR);
- local_paca->kvm_hstate.host_spurr = mfspr(SPRN_SPURR);
- mtspr(SPRN_PURR, vcpu->arch.purr);
- mtspr(SPRN_SPURR, vcpu->arch.spurr);
-
- if (dawr_enabled()) {
- mtspr(SPRN_DAWR0, vcpu->arch.dawr0);
- mtspr(SPRN_DAWRX0, vcpu->arch.dawrx0);
- if (cpu_has_feature(CPU_FTR_DAWR1)) {
- mtspr(SPRN_DAWR1, vcpu->arch.dawr1);
- mtspr(SPRN_DAWRX1, vcpu->arch.dawrx1);
- }
- }
- mtspr(SPRN_CIABR, vcpu->arch.ciabr);
- mtspr(SPRN_IC, vcpu->arch.ic);
- mtspr(SPRN_PID, vcpu->arch.pid);
- mtspr(SPRN_PSSCR, vcpu->arch.psscr | PSSCR_EC |
- (local_paca->kvm_hstate.fake_suspend << PSSCR_FAKE_SUSPEND_LG));
-
- mtspr(SPRN_HFSCR, vcpu->arch.hfscr);
-
- mtspr(SPRN_SPRG0, vcpu->arch.shregs.sprg0);
- mtspr(SPRN_SPRG1, vcpu->arch.shregs.sprg1);
- mtspr(SPRN_SPRG2, vcpu->arch.shregs.sprg2);
- mtspr(SPRN_SPRG3, vcpu->arch.shregs.sprg3);
-
- mtspr(SPRN_AMOR, ~0UL);
-
- mtspr(SPRN_LPCR, lpcr);
- isync();
-
- kvmppc_xive_push_vcpu(vcpu);
-
- mtspr(SPRN_SRR0, vcpu->arch.shregs.srr0);
- mtspr(SPRN_SRR1, vcpu->arch.shregs.srr1);
-
- trap = __kvmhv_vcpu_entry_p9(vcpu);
-
- /* Advance host PURR/SPURR by the amount used by guest */
- purr = mfspr(SPRN_PURR);
- spurr = mfspr(SPRN_SPURR);
- mtspr(SPRN_PURR, local_paca->kvm_hstate.host_purr +
- purr - vcpu->arch.purr);
- mtspr(SPRN_SPURR, local_paca->kvm_hstate.host_spurr +
- spurr - vcpu->arch.spurr);
- vcpu->arch.purr = purr;
- vcpu->arch.spurr = spurr;
+ if (!(vcpu->arch.ctrl & 1))
+ mtspr(SPRN_CTRLT, mfspr(SPRN_CTRLF) & ~1);
+}
- vcpu->arch.ic = mfspr(SPRN_IC);
- vcpu->arch.pid = mfspr(SPRN_PID);
- vcpu->arch.psscr = mfspr(SPRN_PSSCR) & PSSCR_GUEST_VIS;
+static void store_spr_state(struct kvm_vcpu *vcpu)
+{
+ vcpu->arch.ctrl = mfspr(SPRN_CTRLF);
- vcpu->arch.shregs.sprg0 = mfspr(SPRN_SPRG0);
- vcpu->arch.shregs.sprg1 = mfspr(SPRN_SPRG1);
- vcpu->arch.shregs.sprg2 = mfspr(SPRN_SPRG2);
- vcpu->arch.shregs.sprg3 = mfspr(SPRN_SPRG3);
+ vcpu->arch.iamr = mfspr(SPRN_IAMR);
+ vcpu->arch.pspb = mfspr(SPRN_PSPB);
+ vcpu->arch.fscr = mfspr(SPRN_FSCR);
+ vcpu->arch.tar = mfspr(SPRN_TAR);
+ vcpu->arch.ebbhr = mfspr(SPRN_EBBHR);
+ vcpu->arch.ebbrr = mfspr(SPRN_EBBRR);
+ vcpu->arch.bescr = mfspr(SPRN_BESCR);
+ vcpu->arch.wort = mfspr(SPRN_WORT);
+ vcpu->arch.tid = mfspr(SPRN_TIDR);
+ vcpu->arch.amr = mfspr(SPRN_AMR);
+ vcpu->arch.uamor = mfspr(SPRN_UAMOR);
+ vcpu->arch.dscr = mfspr(SPRN_DSCR);
+}
- /* Preserve PSSCR[FAKE_SUSPEND] until we've called kvmppc_save_tm_hv */
- mtspr(SPRN_PSSCR, host_psscr |
- (local_paca->kvm_hstate.fake_suspend << PSSCR_FAKE_SUSPEND_LG));
- mtspr(SPRN_HFSCR, host_hfscr);
- mtspr(SPRN_CIABR, host_ciabr);
- mtspr(SPRN_DAWR0, host_dawr0);
- mtspr(SPRN_DAWRX0, host_dawrx0);
- if (cpu_has_feature(CPU_FTR_DAWR1)) {
- mtspr(SPRN_DAWR1, host_dawr1);
- mtspr(SPRN_DAWRX1, host_dawrx1);
- }
- mtspr(SPRN_PID, host_pidr);
+/*
+ * Privileged (non-hypervisor) host registers to save.
+ */
+struct p9_host_os_sprs {
+ unsigned long dscr;
+ unsigned long tidr;
+ unsigned long iamr;
+ unsigned long amr;
+ unsigned long fscr;
+};
- /*
- * Since this is radix, do a eieio; tlbsync; ptesync sequence in
- * case we interrupted the guest between a tlbie and a ptesync.
- */
- asm volatile("eieio; tlbsync; ptesync");
+static void save_p9_host_os_sprs(struct p9_host_os_sprs *host_os_sprs)
+{
+ host_os_sprs->dscr = mfspr(SPRN_DSCR);
+ host_os_sprs->tidr = mfspr(SPRN_TIDR);
+ host_os_sprs->iamr = mfspr(SPRN_IAMR);
+ host_os_sprs->amr = mfspr(SPRN_AMR);
+ host_os_sprs->fscr = mfspr(SPRN_FSCR);
+}
- /*
- * cp_abort is required if the processor supports local copy-paste
- * to clear the copy buffer that was under control of the guest.
- */
- if (cpu_has_feature(CPU_FTR_ARCH_31))
- asm volatile(PPC_CP_ABORT);
+/* vcpu guest regs must already be saved */
+static void restore_p9_host_os_sprs(struct kvm_vcpu *vcpu,
+ struct p9_host_os_sprs *host_os_sprs)
+{
+ mtspr(SPRN_PSPB, 0);
+ mtspr(SPRN_WORT, 0);
+ mtspr(SPRN_UAMOR, 0);
- mtspr(SPRN_LPID, vcpu->kvm->arch.host_lpid); /* restore host LPID */
- isync();
+ mtspr(SPRN_DSCR, host_os_sprs->dscr);
+ mtspr(SPRN_TIDR, host_os_sprs->tidr);
+ mtspr(SPRN_IAMR, host_os_sprs->iamr);
- vc->dpdes = mfspr(SPRN_DPDES);
- vc->vtb = mfspr(SPRN_VTB);
- mtspr(SPRN_DPDES, 0);
- if (vc->pcr)
- mtspr(SPRN_PCR, PCR_MASK);
+ if (host_os_sprs->amr != vcpu->arch.amr)
+ mtspr(SPRN_AMR, host_os_sprs->amr);
- if (vc->tb_offset_applied) {
- u64 new_tb = mftb() - vc->tb_offset_applied;
- mtspr(SPRN_TBU40, new_tb);
- tb = mftb();
- if ((tb & 0xffffff) < (new_tb & 0xffffff))
- mtspr(SPRN_TBU40, new_tb + 0x1000000);
- vc->tb_offset_applied = 0;
- }
+ if (host_os_sprs->fscr != vcpu->arch.fscr)
+ mtspr(SPRN_FSCR, host_os_sprs->fscr);
- mtspr(SPRN_HDEC, 0x7fffffff);
- mtspr(SPRN_LPCR, vcpu->kvm->arch.host_lpcr);
+ /* Save guest CTRL register, set runlatch to 1 */
+ if (!(vcpu->arch.ctrl & 1))
+ mtspr(SPRN_CTRLT, 1);
+}
- return trap;
+static inline bool hcall_is_xics(unsigned long req)
+{
+ return req == H_EOI || req == H_CPPR || req == H_IPI ||
+ req == H_IPOLL || req == H_XIRR || req == H_XIRR_X;
}
/*
- * Virtual-mode guest entry for POWER9 and later when the host and
- * guest are both using the radix MMU. The LPIDR has already been set.
+ * Guest entry for POWER9 and later CPUs.
*/
static int kvmhv_p9_guest_entry(struct kvm_vcpu *vcpu, u64 time_limit,
unsigned long lpcr)
{
struct kvmppc_vcore *vc = vcpu->arch.vcore;
- unsigned long host_dscr = mfspr(SPRN_DSCR);
- unsigned long host_tidr = mfspr(SPRN_TIDR);
- unsigned long host_iamr = mfspr(SPRN_IAMR);
- unsigned long host_amr = mfspr(SPRN_AMR);
- unsigned long host_fscr = mfspr(SPRN_FSCR);
+ struct p9_host_os_sprs host_os_sprs;
s64 dec;
u64 tb;
int trap, save_pmu;
+ WARN_ON_ONCE(vcpu->arch.ceded);
+
dec = mfspr(SPRN_DEC);
tb = mftb();
if (dec < 0)
@@ -3664,7 +3830,7 @@ static int kvmhv_p9_guest_entry(struct kvm_vcpu *vcpu, u64 time_limit,
if (local_paca->kvm_hstate.dec_expires < time_limit)
time_limit = local_paca->kvm_hstate.dec_expires;
- vcpu->arch.ceded = 0;
+ save_p9_host_os_sprs(&host_os_sprs);
kvmhv_save_host_pmu(); /* saves it to PACA kvm_hstate */
@@ -3693,24 +3859,20 @@ static int kvmhv_p9_guest_entry(struct kvm_vcpu *vcpu, u64 time_limit,
#endif
mtspr(SPRN_VRSAVE, vcpu->arch.vrsave);
- mtspr(SPRN_DSCR, vcpu->arch.dscr);
- mtspr(SPRN_IAMR, vcpu->arch.iamr);
- mtspr(SPRN_PSPB, vcpu->arch.pspb);
- mtspr(SPRN_FSCR, vcpu->arch.fscr);
- mtspr(SPRN_TAR, vcpu->arch.tar);
- mtspr(SPRN_EBBHR, vcpu->arch.ebbhr);
- mtspr(SPRN_EBBRR, vcpu->arch.ebbrr);
- mtspr(SPRN_BESCR, vcpu->arch.bescr);
- mtspr(SPRN_WORT, vcpu->arch.wort);
- mtspr(SPRN_TIDR, vcpu->arch.tid);
- mtspr(SPRN_DAR, vcpu->arch.shregs.dar);
- mtspr(SPRN_DSISR, vcpu->arch.shregs.dsisr);
- mtspr(SPRN_AMR, vcpu->arch.amr);
- mtspr(SPRN_UAMOR, vcpu->arch.uamor);
-
- if (!(vcpu->arch.ctrl & 1))
- mtspr(SPRN_CTRLT, mfspr(SPRN_CTRLF) & ~1);
+ load_spr_state(vcpu);
+ /*
+ * When setting DEC, we must always deal with irq_work_raise via NMI vs
+ * setting DEC. The problem occurs right as we switch into guest mode
+ * if a NMI hits and sets pending work and sets DEC, then that will
+ * apply to the guest and not bring us back to the host.
+ *
+ * irq_work_raise could check a flag (or possibly LPCR[HDICE] for
+ * example) and set HDEC to 1? That wouldn't solve the nested hv
+ * case which needs to abort the hcall or zero the time limit.
+ *
+ * XXX: Another day's problem.
+ */
mtspr(SPRN_DEC, vcpu->arch.dec_expires - mftb());
if (kvmhv_on_pseries()) {
@@ -3718,7 +3880,7 @@ static int kvmhv_p9_guest_entry(struct kvm_vcpu *vcpu, u64 time_limit,
* We need to save and restore the guest visible part of the
* psscr (i.e. using SPRN_PSSCR_PR) since the hypervisor
* doesn't do this for us. Note only required if pseries since
- * this is done in kvmhv_load_hv_regs_and_go() below otherwise.
+ * this is done in kvmhv_vcpu_entry_p9() below otherwise.
*/
unsigned long host_psscr;
/* call our hypervisor to load up HV regs and go */
@@ -3738,6 +3900,8 @@ static int kvmhv_p9_guest_entry(struct kvm_vcpu *vcpu, u64 time_limit,
hvregs.vcpu_token = vcpu->vcpu_id;
}
hvregs.hdec_expiry = time_limit;
+ mtspr(SPRN_DAR, vcpu->arch.shregs.dar);
+ mtspr(SPRN_DSISR, vcpu->arch.shregs.dsisr);
trap = plpar_hcall_norets(H_ENTER_NESTED, __pa(&hvregs),
__pa(&vcpu->arch.regs));
kvmhv_restore_hv_return_state(vcpu, &hvregs);
@@ -3750,15 +3914,41 @@ static int kvmhv_p9_guest_entry(struct kvm_vcpu *vcpu, u64 time_limit,
/* H_CEDE has to be handled now, not later */
if (trap == BOOK3S_INTERRUPT_SYSCALL && !vcpu->arch.nested &&
kvmppc_get_gpr(vcpu, 3) == H_CEDE) {
- kvmppc_nested_cede(vcpu);
+ kvmppc_cede(vcpu);
kvmppc_set_gpr(vcpu, 3, 0);
trap = 0;
}
} else {
- trap = kvmhv_load_hv_regs_and_go(vcpu, time_limit, lpcr);
+ kvmppc_xive_push_vcpu(vcpu);
+ trap = kvmhv_vcpu_entry_p9(vcpu, time_limit, lpcr);
+ if (trap == BOOK3S_INTERRUPT_SYSCALL && !vcpu->arch.nested &&
+ !(vcpu->arch.shregs.msr & MSR_PR)) {
+ unsigned long req = kvmppc_get_gpr(vcpu, 3);
+
+ /* H_CEDE has to be handled now, not later */
+ if (req == H_CEDE) {
+ kvmppc_cede(vcpu);
+ kvmppc_xive_rearm_escalation(vcpu); /* may un-cede */
+ kvmppc_set_gpr(vcpu, 3, 0);
+ trap = 0;
+
+ /* XICS hcalls must be handled before xive is pulled */
+ } else if (hcall_is_xics(req)) {
+ int ret;
+
+ ret = kvmppc_xive_xics_hcall(vcpu, req);
+ if (ret != H_TOO_HARD) {
+ kvmppc_set_gpr(vcpu, 3, ret);
+ trap = 0;
+ }
+ }
+ }
+ kvmppc_xive_pull_vcpu(vcpu);
+
+ if (kvm_is_radix(vcpu->kvm))
+ vcpu->arch.slb_max = 0;
}
- vcpu->arch.slb_max = 0;
dec = mfspr(SPRN_DEC);
if (!(lpcr & LPCR_LD)) /* Sign extend if not using large decrementer */
dec = (s32) dec;
@@ -3766,36 +3956,10 @@ static int kvmhv_p9_guest_entry(struct kvm_vcpu *vcpu, u64 time_limit,
vcpu->arch.dec_expires = dec + tb;
vcpu->cpu = -1;
vcpu->arch.thread_cpu = -1;
- /* Save guest CTRL register, set runlatch to 1 */
- vcpu->arch.ctrl = mfspr(SPRN_CTRLF);
- if (!(vcpu->arch.ctrl & 1))
- mtspr(SPRN_CTRLT, vcpu->arch.ctrl | 1);
-
- vcpu->arch.iamr = mfspr(SPRN_IAMR);
- vcpu->arch.pspb = mfspr(SPRN_PSPB);
- vcpu->arch.fscr = mfspr(SPRN_FSCR);
- vcpu->arch.tar = mfspr(SPRN_TAR);
- vcpu->arch.ebbhr = mfspr(SPRN_EBBHR);
- vcpu->arch.ebbrr = mfspr(SPRN_EBBRR);
- vcpu->arch.bescr = mfspr(SPRN_BESCR);
- vcpu->arch.wort = mfspr(SPRN_WORT);
- vcpu->arch.tid = mfspr(SPRN_TIDR);
- vcpu->arch.amr = mfspr(SPRN_AMR);
- vcpu->arch.uamor = mfspr(SPRN_UAMOR);
- vcpu->arch.dscr = mfspr(SPRN_DSCR);
- mtspr(SPRN_PSPB, 0);
- mtspr(SPRN_WORT, 0);
- mtspr(SPRN_UAMOR, 0);
- mtspr(SPRN_DSCR, host_dscr);
- mtspr(SPRN_TIDR, host_tidr);
- mtspr(SPRN_IAMR, host_iamr);
+ store_spr_state(vcpu);
- if (host_amr != vcpu->arch.amr)
- mtspr(SPRN_AMR, host_amr);
-
- if (host_fscr != vcpu->arch.fscr)
- mtspr(SPRN_FSCR, host_fscr);
+ restore_p9_host_os_sprs(vcpu, &host_os_sprs);
msr_check_and_set(MSR_FP | MSR_VEC | MSR_VSX);
store_fp_state(&vcpu->arch.fp);
@@ -3825,6 +3989,9 @@ static int kvmhv_p9_guest_entry(struct kvm_vcpu *vcpu, u64 time_limit,
vc->in_guest = 0;
mtspr(SPRN_DEC, local_paca->kvm_hstate.dec_expires - mftb());
+ /* We may have raced with new irq work */
+ if (test_irq_work_pending())
+ set_dec(1);
mtspr(SPRN_SPRG_VDSO_WRITE, local_paca->sprg_vdso);
kvmhv_load_host_pmu();
@@ -3925,7 +4092,7 @@ static void kvmppc_vcore_blocked(struct kvmppc_vcore *vc)
cur = start_poll = ktime_get();
if (vc->halt_poll_ns) {
ktime_t stop = ktime_add_ns(start_poll, vc->halt_poll_ns);
- ++vc->runner->stat.halt_attempted_poll;
+ ++vc->runner->stat.generic.halt_attempted_poll;
vc->vcore_state = VCORE_POLLING;
spin_unlock(&vc->lock);
@@ -3936,13 +4103,13 @@ static void kvmppc_vcore_blocked(struct kvmppc_vcore *vc)
break;
}
cur = ktime_get();
- } while (single_task_running() && ktime_before(cur, stop));
+ } while (kvm_vcpu_can_poll(cur, stop));
spin_lock(&vc->lock);
vc->vcore_state = VCORE_INACTIVE;
if (!do_sleep) {
- ++vc->runner->stat.halt_successful_poll;
+ ++vc->runner->stat.generic.halt_successful_poll;
goto out;
}
}
@@ -3954,7 +4121,7 @@ static void kvmppc_vcore_blocked(struct kvmppc_vcore *vc)
do_sleep = 0;
/* If we polled, count this as a successful poll */
if (vc->halt_poll_ns)
- ++vc->runner->stat.halt_successful_poll;
+ ++vc->runner->stat.generic.halt_successful_poll;
goto out;
}
@@ -3981,13 +4148,13 @@ out:
ktime_to_ns(cur) - ktime_to_ns(start_wait);
/* Attribute failed poll time */
if (vc->halt_poll_ns)
- vc->runner->stat.halt_poll_fail_ns +=
+ vc->runner->stat.generic.halt_poll_fail_ns +=
ktime_to_ns(start_wait) -
ktime_to_ns(start_poll);
} else {
/* Attribute successful poll time */
if (vc->halt_poll_ns)
- vc->runner->stat.halt_poll_success_ns +=
+ vc->runner->stat.generic.halt_poll_success_ns +=
ktime_to_ns(cur) -
ktime_to_ns(start_poll);
}
@@ -4014,7 +4181,6 @@ out:
/*
* This never fails for a radix guest, as none of the operations it does
* for a radix guest can fail or have a way to report failure.
- * kvmhv_run_single_vcpu() relies on this fact.
*/
static int kvmhv_setup_mmu(struct kvm_vcpu *vcpu)
{
@@ -4170,7 +4336,7 @@ int kvmhv_run_single_vcpu(struct kvm_vcpu *vcpu, u64 time_limit,
{
struct kvm_run *run = vcpu->run;
int trap, r, pcpu;
- int srcu_idx, lpid;
+ int srcu_idx;
struct kvmppc_vcore *vc;
struct kvm *kvm = vcpu->kvm;
struct kvm_nested_guest *nested = vcpu->arch.nested;
@@ -4193,8 +4359,15 @@ int kvmhv_run_single_vcpu(struct kvm_vcpu *vcpu, u64 time_limit,
vc->runner = vcpu;
/* See if the MMU is ready to go */
- if (!kvm->arch.mmu_ready)
- kvmhv_setup_mmu(vcpu);
+ if (!kvm->arch.mmu_ready) {
+ r = kvmhv_setup_mmu(vcpu);
+ if (r) {
+ run->exit_reason = KVM_EXIT_FAIL_ENTRY;
+ run->fail_entry.hardware_entry_failure_reason = 0;
+ vcpu->arch.ret = r;
+ return r;
+ }
+ }
if (need_resched())
cond_resched();
@@ -4207,7 +4380,8 @@ int kvmhv_run_single_vcpu(struct kvm_vcpu *vcpu, u64 time_limit,
preempt_disable();
pcpu = smp_processor_id();
vc->pcpu = pcpu;
- kvmppc_prepare_radix_vcpu(vcpu, pcpu);
+ if (kvm_is_radix(kvm))
+ kvmppc_prepare_radix_vcpu(vcpu, pcpu);
local_irq_disable();
hard_irq_disable();
@@ -4244,13 +4418,6 @@ int kvmhv_run_single_vcpu(struct kvm_vcpu *vcpu, u64 time_limit,
vc->vcore_state = VCORE_RUNNING;
trace_kvmppc_run_core(vc, 0);
- if (cpu_has_feature(CPU_FTR_HVMODE)) {
- lpid = nested ? nested->shadow_lpid : kvm->arch.lpid;
- mtspr(SPRN_LPID, lpid);
- isync();
- kvmppc_check_need_tlb_flush(kvm, pcpu, nested);
- }
-
guest_enter_irqoff();
srcu_idx = srcu_read_lock(&kvm->srcu);
@@ -4269,11 +4436,6 @@ int kvmhv_run_single_vcpu(struct kvm_vcpu *vcpu, u64 time_limit,
srcu_read_unlock(&kvm->srcu, srcu_idx);
- if (cpu_has_feature(CPU_FTR_HVMODE)) {
- mtspr(SPRN_LPID, kvm->arch.host_lpid);
- isync();
- }
-
set_irq_happened(trap);
kvmppc_set_host_core(pcpu);
@@ -4419,19 +4581,23 @@ static int kvmppc_vcpu_run_hv(struct kvm_vcpu *vcpu)
vcpu->arch.state = KVMPPC_VCPU_BUSY_IN_HOST;
do {
- /*
- * The TLB prefetch bug fixup is only in the kvmppc_run_vcpu
- * path, which also handles hash and dependent threads mode.
- */
- if (kvm->arch.threads_indep && kvm_is_radix(kvm) &&
- !cpu_has_feature(CPU_FTR_P9_RADIX_PREFETCH_BUG))
+ if (cpu_has_feature(CPU_FTR_ARCH_300))
r = kvmhv_run_single_vcpu(vcpu, ~(u64)0,
vcpu->arch.vcore->lpcr);
else
r = kvmppc_run_vcpu(vcpu);
- if (run->exit_reason == KVM_EXIT_PAPR_HCALL &&
- !(vcpu->arch.shregs.msr & MSR_PR)) {
+ if (run->exit_reason == KVM_EXIT_PAPR_HCALL) {
+ if (WARN_ON_ONCE(vcpu->arch.shregs.msr & MSR_PR)) {
+ /*
+ * These should have been caught reflected
+ * into the guest by now. Final sanity check:
+ * don't allow userspace to execute hcalls in
+ * the hypervisor.
+ */
+ r = RESUME_GUEST;
+ continue;
+ }
trace_kvm_hcall_enter(vcpu);
r = kvmppc_pseries_do_hcall(vcpu);
trace_kvm_hcall_exit(vcpu, r);
@@ -4455,7 +4621,6 @@ static int kvmppc_vcpu_run_hv(struct kvm_vcpu *vcpu)
mtspr(SPRN_EBBRR, ebb_regs[1]);
mtspr(SPRN_BESCR, ebb_regs[2]);
mtspr(SPRN_TAR, user_tar);
- mtspr(SPRN_FSCR, current->thread.fscr);
}
mtspr(SPRN_VRSAVE, user_vrsave);
@@ -4759,8 +4924,8 @@ static int kvmppc_hv_setup_htab_rma(struct kvm_vcpu *vcpu)
/* Look up the VMA for the start of this memory slot */
hva = memslot->userspace_addr;
mmap_read_lock(kvm->mm);
- vma = find_vma(kvm->mm, hva);
- if (!vma || vma->vm_start > hva || (vma->vm_flags & VM_IO))
+ vma = vma_lookup(kvm->mm, hva);
+ if (!vma || (vma->vm_flags & VM_IO))
goto up_out;
psize = vma_kernel_pagesize(vma);
@@ -5039,18 +5204,8 @@ static int kvmppc_core_init_vm_hv(struct kvm *kvm)
/*
* Track that we now have a HV mode VM active. This blocks secondary
* CPU threads from coming online.
- * On POWER9, we only need to do this if the "indep_threads_mode"
- * module parameter has been set to N.
*/
- if (cpu_has_feature(CPU_FTR_ARCH_300)) {
- if (!indep_threads_mode && !cpu_has_feature(CPU_FTR_HVMODE)) {
- pr_warn("KVM: Ignoring indep_threads_mode=N in nested hypervisor\n");
- kvm->arch.threads_indep = true;
- } else {
- kvm->arch.threads_indep = indep_threads_mode;
- }
- }
- if (!kvm->arch.threads_indep)
+ if (!cpu_has_feature(CPU_FTR_ARCH_300))
kvm_hv_vm_activated();
/*
@@ -5091,7 +5246,7 @@ static void kvmppc_core_destroy_vm_hv(struct kvm *kvm)
{
debugfs_remove_recursive(kvm->arch.debugfs_dir);
- if (!kvm->arch.threads_indep)
+ if (!cpu_has_feature(CPU_FTR_ARCH_300))
kvm_hv_vm_deactivated();
kvmppc_free_vcores(kvm);
@@ -5512,7 +5667,9 @@ static int kvmhv_enable_nested(struct kvm *kvm)
{
if (!nested)
return -EPERM;
- if (!cpu_has_feature(CPU_FTR_ARCH_300) || no_mixing_hpt_and_radix)
+ if (!cpu_has_feature(CPU_FTR_ARCH_300))
+ return -ENODEV;
+ if (!radix_enabled())
return -ENODEV;
/* kvm == NULL means the caller is testing if the capability exists */
@@ -5675,11 +5832,25 @@ static int kvmhv_enable_dawr1(struct kvm *kvm)
static bool kvmppc_hash_v3_possible(void)
{
- if (radix_enabled() && no_mixing_hpt_and_radix)
+ if (!cpu_has_feature(CPU_FTR_ARCH_300))
+ return false;
+
+ if (!cpu_has_feature(CPU_FTR_HVMODE))
return false;
- return cpu_has_feature(CPU_FTR_ARCH_300) &&
- cpu_has_feature(CPU_FTR_HVMODE);
+ /*
+ * POWER9 chips before version 2.02 can't have some threads in
+ * HPT mode and some in radix mode on the same core.
+ */
+ if (radix_enabled()) {
+ unsigned int pvr = mfspr(SPRN_PVR);
+ if ((pvr >> 16) == PVR_POWER9 &&
+ (((pvr & 0xe000) == 0 && (pvr & 0xfff) < 0x202) ||
+ ((pvr & 0xe000) == 0x2000 && (pvr & 0xfff) < 0x101)))
+ return false;
+ }
+
+ return true;
}
static struct kvmppc_ops kvm_ops_hv = {
@@ -5823,18 +5994,6 @@ static int kvmppc_book3s_init_hv(void)
if (kvmppc_radix_possible())
r = kvmppc_radix_init();
- /*
- * POWER9 chips before version 2.02 can't have some threads in
- * HPT mode and some in radix mode on the same core.
- */
- if (cpu_has_feature(CPU_FTR_ARCH_300)) {
- unsigned int pvr = mfspr(SPRN_PVR);
- if ((pvr >> 16) == PVR_POWER9 &&
- (((pvr & 0xe000) == 0 && (pvr & 0xfff) < 0x202) ||
- ((pvr & 0xe000) == 0x2000 && (pvr & 0xfff) < 0x101)))
- no_mixing_hpt_and_radix = true;
- }
-
r = kvmppc_uvmem_init();
if (r < 0)
pr_err("KVM-HV: kvmppc_uvmem_init failed %d\n", r);
diff --git a/arch/powerpc/kvm/book3s_hv_builtin.c b/arch/powerpc/kvm/book3s_hv_builtin.c
index 7a0e33a9c980..be8ef1c5b1bf 100644
--- a/arch/powerpc/kvm/book3s_hv_builtin.c
+++ b/arch/powerpc/kvm/book3s_hv_builtin.c
@@ -35,21 +35,6 @@
#include "book3s_xive.h"
/*
- * The XIVE module will populate these when it loads
- */
-unsigned long (*__xive_vm_h_xirr)(struct kvm_vcpu *vcpu);
-unsigned long (*__xive_vm_h_ipoll)(struct kvm_vcpu *vcpu, unsigned long server);
-int (*__xive_vm_h_ipi)(struct kvm_vcpu *vcpu, unsigned long server,
- unsigned long mfrr);
-int (*__xive_vm_h_cppr)(struct kvm_vcpu *vcpu, unsigned long cppr);
-int (*__xive_vm_h_eoi)(struct kvm_vcpu *vcpu, unsigned long xirr);
-EXPORT_SYMBOL_GPL(__xive_vm_h_xirr);
-EXPORT_SYMBOL_GPL(__xive_vm_h_ipoll);
-EXPORT_SYMBOL_GPL(__xive_vm_h_ipi);
-EXPORT_SYMBOL_GPL(__xive_vm_h_cppr);
-EXPORT_SYMBOL_GPL(__xive_vm_h_eoi);
-
-/*
* Hash page table alignment on newer cpus(CPU_FTR_ARCH_206)
* should be power of 2.
*/
@@ -196,16 +181,9 @@ int kvmppc_hwrng_present(void)
}
EXPORT_SYMBOL_GPL(kvmppc_hwrng_present);
-long kvmppc_h_random(struct kvm_vcpu *vcpu)
+long kvmppc_rm_h_random(struct kvm_vcpu *vcpu)
{
- int r;
-
- /* Only need to do the expensive mfmsr() on radix */
- if (kvm_is_radix(vcpu->kvm) && (mfmsr() & MSR_IR))
- r = powernv_get_random_long(&vcpu->arch.regs.gpr[4]);
- else
- r = powernv_get_random_real_mode(&vcpu->arch.regs.gpr[4]);
- if (r)
+ if (powernv_get_random_real_mode(&vcpu->arch.regs.gpr[4]))
return H_SUCCESS;
return H_HARDWARE;
@@ -221,15 +199,6 @@ void kvmhv_rm_send_ipi(int cpu)
void __iomem *xics_phys;
unsigned long msg = PPC_DBELL_TYPE(PPC_DBELL_SERVER);
- /* For a nested hypervisor, use the XICS via hcall */
- if (kvmhv_on_pseries()) {
- unsigned long retbuf[PLPAR_HCALL_BUFSIZE];
-
- plpar_hcall_raw(H_IPI, retbuf, get_hard_smp_processor_id(cpu),
- IPI_PRIORITY);
- return;
- }
-
/* On POWER9 we can use msgsnd for any destination cpu. */
if (cpu_has_feature(CPU_FTR_ARCH_300)) {
msg |= get_hard_smp_processor_id(cpu);
@@ -442,19 +411,12 @@ static long kvmppc_read_one_intr(bool *again)
return 1;
/* Now read the interrupt from the ICP */
- if (kvmhv_on_pseries()) {
- unsigned long retbuf[PLPAR_HCALL_BUFSIZE];
-
- rc = plpar_hcall_raw(H_XIRR, retbuf, 0xFF);
- xirr = cpu_to_be32(retbuf[0]);
- } else {
- xics_phys = local_paca->kvm_hstate.xics_phys;
- rc = 0;
- if (!xics_phys)
- rc = opal_int_get_xirr(&xirr, false);
- else
- xirr = __raw_rm_readl(xics_phys + XICS_XIRR);
- }
+ xics_phys = local_paca->kvm_hstate.xics_phys;
+ rc = 0;
+ if (!xics_phys)
+ rc = opal_int_get_xirr(&xirr, false);
+ else
+ xirr = __raw_rm_readl(xics_phys + XICS_XIRR);
if (rc < 0)
return 1;
@@ -483,13 +445,7 @@ static long kvmppc_read_one_intr(bool *again)
*/
if (xisr == XICS_IPI) {
rc = 0;
- if (kvmhv_on_pseries()) {
- unsigned long retbuf[PLPAR_HCALL_BUFSIZE];
-
- plpar_hcall_raw(H_IPI, retbuf,
- hard_smp_processor_id(), 0xff);
- plpar_hcall_raw(H_EOI, retbuf, h_xirr);
- } else if (xics_phys) {
+ if (xics_phys) {
__raw_rm_writeb(0xff, xics_phys + XICS_MFRR);
__raw_rm_writel(xirr, xics_phys + XICS_XIRR);
} else {
@@ -515,13 +471,7 @@ static long kvmppc_read_one_intr(bool *again)
/* We raced with the host,
* we need to resend that IPI, bummer
*/
- if (kvmhv_on_pseries()) {
- unsigned long retbuf[PLPAR_HCALL_BUFSIZE];
-
- plpar_hcall_raw(H_IPI, retbuf,
- hard_smp_processor_id(),
- IPI_PRIORITY);
- } else if (xics_phys)
+ if (xics_phys)
__raw_rm_writeb(IPI_PRIORITY,
xics_phys + XICS_MFRR);
else
@@ -541,22 +491,13 @@ static long kvmppc_read_one_intr(bool *again)
}
#ifdef CONFIG_KVM_XICS
-static inline bool is_rm(void)
-{
- return !(mfmsr() & MSR_DR);
-}
-
unsigned long kvmppc_rm_h_xirr(struct kvm_vcpu *vcpu)
{
if (!kvmppc_xics_enabled(vcpu))
return H_TOO_HARD;
- if (xics_on_xive()) {
- if (is_rm())
- return xive_rm_h_xirr(vcpu);
- if (unlikely(!__xive_vm_h_xirr))
- return H_NOT_AVAILABLE;
- return __xive_vm_h_xirr(vcpu);
- } else
+ if (xics_on_xive())
+ return xive_rm_h_xirr(vcpu);
+ else
return xics_rm_h_xirr(vcpu);
}
@@ -565,13 +506,9 @@ unsigned long kvmppc_rm_h_xirr_x(struct kvm_vcpu *vcpu)
if (!kvmppc_xics_enabled(vcpu))
return H_TOO_HARD;
vcpu->arch.regs.gpr[5] = get_tb();
- if (xics_on_xive()) {
- if (is_rm())
- return xive_rm_h_xirr(vcpu);
- if (unlikely(!__xive_vm_h_xirr))
- return H_NOT_AVAILABLE;
- return __xive_vm_h_xirr(vcpu);
- } else
+ if (xics_on_xive())
+ return xive_rm_h_xirr(vcpu);
+ else
return xics_rm_h_xirr(vcpu);
}
@@ -579,13 +516,9 @@ unsigned long kvmppc_rm_h_ipoll(struct kvm_vcpu *vcpu, unsigned long server)
{
if (!kvmppc_xics_enabled(vcpu))
return H_TOO_HARD;
- if (xics_on_xive()) {
- if (is_rm())
- return xive_rm_h_ipoll(vcpu, server);
- if (unlikely(!__xive_vm_h_ipoll))
- return H_NOT_AVAILABLE;
- return __xive_vm_h_ipoll(vcpu, server);
- } else
+ if (xics_on_xive())
+ return xive_rm_h_ipoll(vcpu, server);
+ else
return H_TOO_HARD;
}
@@ -594,13 +527,9 @@ int kvmppc_rm_h_ipi(struct kvm_vcpu *vcpu, unsigned long server,
{
if (!kvmppc_xics_enabled(vcpu))
return H_TOO_HARD;
- if (xics_on_xive()) {
- if (is_rm())
- return xive_rm_h_ipi(vcpu, server, mfrr);
- if (unlikely(!__xive_vm_h_ipi))
- return H_NOT_AVAILABLE;
- return __xive_vm_h_ipi(vcpu, server, mfrr);
- } else
+ if (xics_on_xive())
+ return xive_rm_h_ipi(vcpu, server, mfrr);
+ else
return xics_rm_h_ipi(vcpu, server, mfrr);
}
@@ -608,13 +537,9 @@ int kvmppc_rm_h_cppr(struct kvm_vcpu *vcpu, unsigned long cppr)
{
if (!kvmppc_xics_enabled(vcpu))
return H_TOO_HARD;
- if (xics_on_xive()) {
- if (is_rm())
- return xive_rm_h_cppr(vcpu, cppr);
- if (unlikely(!__xive_vm_h_cppr))
- return H_NOT_AVAILABLE;
- return __xive_vm_h_cppr(vcpu, cppr);
- } else
+ if (xics_on_xive())
+ return xive_rm_h_cppr(vcpu, cppr);
+ else
return xics_rm_h_cppr(vcpu, cppr);
}
@@ -622,13 +547,9 @@ int kvmppc_rm_h_eoi(struct kvm_vcpu *vcpu, unsigned long xirr)
{
if (!kvmppc_xics_enabled(vcpu))
return H_TOO_HARD;
- if (xics_on_xive()) {
- if (is_rm())
- return xive_rm_h_eoi(vcpu, xirr);
- if (unlikely(!__xive_vm_h_eoi))
- return H_NOT_AVAILABLE;
- return __xive_vm_h_eoi(vcpu, xirr);
- } else
+ if (xics_on_xive())
+ return xive_rm_h_eoi(vcpu, xirr);
+ else
return xics_rm_h_eoi(vcpu, xirr);
}
#endif /* CONFIG_KVM_XICS */
@@ -800,7 +721,7 @@ void kvmppc_check_need_tlb_flush(struct kvm *kvm, int pcpu,
* Thus we make all 4 threads use the same bit.
*/
if (cpu_has_feature(CPU_FTR_ARCH_300))
- pcpu = cpu_first_thread_sibling(pcpu);
+ pcpu = cpu_first_tlb_thread_sibling(pcpu);
if (nested)
need_tlb_flush = &nested->need_tlb_flush;
diff --git a/arch/powerpc/kvm/book3s_hv_interrupts.S b/arch/powerpc/kvm/book3s_hv_interrupts.S
index 327417d79eac..4444f83cb133 100644
--- a/arch/powerpc/kvm/book3s_hv_interrupts.S
+++ b/arch/powerpc/kvm/book3s_hv_interrupts.S
@@ -58,7 +58,7 @@ END_FTR_SECTION_IFCLR(CPU_FTR_ARCH_207S)
/*
* Put whatever is in the decrementer into the
* hypervisor decrementer.
- * Because of a hardware deviation in P8 and P9,
+ * Because of a hardware deviation in P8,
* we need to set LPCR[HDICE] before writing HDEC.
*/
ld r5, HSTATE_KVM_VCORE(r13)
@@ -67,15 +67,10 @@ END_FTR_SECTION_IFCLR(CPU_FTR_ARCH_207S)
ori r8, r9, LPCR_HDICE
mtspr SPRN_LPCR, r8
isync
- andis. r0, r9, LPCR_LD@h
mfspr r8,SPRN_DEC
mftb r7
-BEGIN_FTR_SECTION
- /* On POWER9, don't sign-extend if host LPCR[LD] bit is set */
- bne 32f
-END_FTR_SECTION_IFSET(CPU_FTR_ARCH_300)
extsw r8,r8
-32: mtspr SPRN_HDEC,r8
+ mtspr SPRN_HDEC,r8
add r8,r8,r7
std r8,HSTATE_DECEXP(r13)
diff --git a/arch/powerpc/kvm/book3s_hv_nested.c b/arch/powerpc/kvm/book3s_hv_nested.c
index 60724f674421..8543ad538b0c 100644
--- a/arch/powerpc/kvm/book3s_hv_nested.c
+++ b/arch/powerpc/kvm/book3s_hv_nested.c
@@ -19,6 +19,7 @@
#include <asm/pgalloc.h>
#include <asm/pte-walk.h>
#include <asm/reg.h>
+#include <asm/plpar_wrappers.h>
static struct patb_entry *pseries_partition_tb;
@@ -53,7 +54,8 @@ void kvmhv_save_hv_regs(struct kvm_vcpu *vcpu, struct hv_guest_state *hr)
hr->dawrx1 = vcpu->arch.dawrx1;
}
-static void byteswap_pt_regs(struct pt_regs *regs)
+/* Use noinline_for_stack due to https://bugs.llvm.org/show_bug.cgi?id=49610 */
+static noinline_for_stack void byteswap_pt_regs(struct pt_regs *regs)
{
unsigned long *addr = (unsigned long *) regs;
@@ -467,8 +469,15 @@ static void kvmhv_flush_lpid(unsigned int lpid)
return;
}
- rc = plpar_hcall_norets(H_TLB_INVALIDATE, H_TLBIE_P1_ENC(2, 0, 1),
- lpid, TLBIEL_INVAL_SET_LPID);
+ if (!firmware_has_feature(FW_FEATURE_RPT_INVALIDATE))
+ rc = plpar_hcall_norets(H_TLB_INVALIDATE, H_TLBIE_P1_ENC(2, 0, 1),
+ lpid, TLBIEL_INVAL_SET_LPID);
+ else
+ rc = pseries_rpt_invalidate(lpid, H_RPTI_TARGET_CMMU,
+ H_RPTI_TYPE_NESTED |
+ H_RPTI_TYPE_TLB | H_RPTI_TYPE_PWC |
+ H_RPTI_TYPE_PAT,
+ H_RPTI_PAGE_ALL, 0, -1UL);
if (rc)
pr_err("KVM: TLB LPID invalidation hcall failed, rc=%ld\n", rc);
}
@@ -1214,6 +1223,113 @@ long kvmhv_do_nested_tlbie(struct kvm_vcpu *vcpu)
return H_SUCCESS;
}
+static long do_tlb_invalidate_nested_all(struct kvm_vcpu *vcpu,
+ unsigned long lpid, unsigned long ric)
+{
+ struct kvm *kvm = vcpu->kvm;
+ struct kvm_nested_guest *gp;
+
+ gp = kvmhv_get_nested(kvm, lpid, false);
+ if (gp) {
+ kvmhv_emulate_tlbie_lpid(vcpu, gp, ric);
+ kvmhv_put_nested(gp);
+ }
+ return H_SUCCESS;
+}
+
+/*
+ * Number of pages above which we invalidate the entire LPID rather than
+ * flush individual pages.
+ */
+static unsigned long tlb_range_flush_page_ceiling __read_mostly = 33;
+
+static long do_tlb_invalidate_nested_tlb(struct kvm_vcpu *vcpu,
+ unsigned long lpid,
+ unsigned long pg_sizes,
+ unsigned long start,
+ unsigned long end)
+{
+ int ret = H_P4;
+ unsigned long addr, nr_pages;
+ struct mmu_psize_def *def;
+ unsigned long psize, ap, page_size;
+ bool flush_lpid;
+
+ for (psize = 0; psize < MMU_PAGE_COUNT; psize++) {
+ def = &mmu_psize_defs[psize];
+ if (!(pg_sizes & def->h_rpt_pgsize))
+ continue;
+
+ nr_pages = (end - start) >> def->shift;
+ flush_lpid = nr_pages > tlb_range_flush_page_ceiling;
+ if (flush_lpid)
+ return do_tlb_invalidate_nested_all(vcpu, lpid,
+ RIC_FLUSH_TLB);
+ addr = start;
+ ap = mmu_get_ap(psize);
+ page_size = 1UL << def->shift;
+ do {
+ ret = kvmhv_emulate_tlbie_tlb_addr(vcpu, lpid, ap,
+ get_epn(addr));
+ if (ret)
+ return H_P4;
+ addr += page_size;
+ } while (addr < end);
+ }
+ return ret;
+}
+
+/*
+ * Performs partition-scoped invalidations for nested guests
+ * as part of H_RPT_INVALIDATE hcall.
+ */
+long do_h_rpt_invalidate_pat(struct kvm_vcpu *vcpu, unsigned long lpid,
+ unsigned long type, unsigned long pg_sizes,
+ unsigned long start, unsigned long end)
+{
+ /*
+ * If L2 lpid isn't valid, we need to return H_PARAMETER.
+ *
+ * However, nested KVM issues a L2 lpid flush call when creating
+ * partition table entries for L2. This happens even before the
+ * corresponding shadow lpid is created in HV which happens in
+ * H_ENTER_NESTED call. Since we can't differentiate this case from
+ * the invalid case, we ignore such flush requests and return success.
+ */
+ if (!kvmhv_find_nested(vcpu->kvm, lpid))
+ return H_SUCCESS;
+
+ /*
+ * A flush all request can be handled by a full lpid flush only.
+ */
+ if ((type & H_RPTI_TYPE_NESTED_ALL) == H_RPTI_TYPE_NESTED_ALL)
+ return do_tlb_invalidate_nested_all(vcpu, lpid, RIC_FLUSH_ALL);
+
+ /*
+ * We don't need to handle a PWC flush like process table here,
+ * because intermediate partition scoped table in nested guest doesn't
+ * really have PWC. Only level we have PWC is in L0 and for nested
+ * invalidate at L0 we always do kvm_flush_lpid() which does
+ * radix__flush_all_lpid(). For range invalidate at any level, we
+ * are not removing the higher level page tables and hence there is
+ * no PWC invalidate needed.
+ *
+ * if (type & H_RPTI_TYPE_PWC) {
+ * ret = do_tlb_invalidate_nested_all(vcpu, lpid, RIC_FLUSH_PWC);
+ * if (ret)
+ * return H_P4;
+ * }
+ */
+
+ if (start == 0 && end == -1)
+ return do_tlb_invalidate_nested_all(vcpu, lpid, RIC_FLUSH_TLB);
+
+ if (type & H_RPTI_TYPE_TLB)
+ return do_tlb_invalidate_nested_tlb(vcpu, lpid, pg_sizes,
+ start, end);
+ return H_SUCCESS;
+}
+
/* Used to convert a nested guest real address to a L1 guest real address */
static int kvmhv_translate_addr_nested(struct kvm_vcpu *vcpu,
struct kvm_nested_guest *gp,
diff --git a/arch/powerpc/kvm/book3s_hv_p9_entry.c b/arch/powerpc/kvm/book3s_hv_p9_entry.c
new file mode 100644
index 000000000000..83f592eadcd2
--- /dev/null
+++ b/arch/powerpc/kvm/book3s_hv_p9_entry.c
@@ -0,0 +1,508 @@
+// SPDX-License-Identifier: GPL-2.0-only
+#include <linux/kernel.h>
+#include <linux/kvm_host.h>
+#include <asm/asm-prototypes.h>
+#include <asm/dbell.h>
+#include <asm/kvm_ppc.h>
+#include <asm/ppc-opcode.h>
+
+#ifdef CONFIG_KVM_BOOK3S_HV_EXIT_TIMING
+static void __start_timing(struct kvm_vcpu *vcpu, struct kvmhv_tb_accumulator *next)
+{
+ struct kvmppc_vcore *vc = vcpu->arch.vcore;
+ u64 tb = mftb() - vc->tb_offset_applied;
+
+ vcpu->arch.cur_activity = next;
+ vcpu->arch.cur_tb_start = tb;
+}
+
+static void __accumulate_time(struct kvm_vcpu *vcpu, struct kvmhv_tb_accumulator *next)
+{
+ struct kvmppc_vcore *vc = vcpu->arch.vcore;
+ struct kvmhv_tb_accumulator *curr;
+ u64 tb = mftb() - vc->tb_offset_applied;
+ u64 prev_tb;
+ u64 delta;
+ u64 seq;
+
+ curr = vcpu->arch.cur_activity;
+ vcpu->arch.cur_activity = next;
+ prev_tb = vcpu->arch.cur_tb_start;
+ vcpu->arch.cur_tb_start = tb;
+
+ if (!curr)
+ return;
+
+ delta = tb - prev_tb;
+
+ seq = curr->seqcount;
+ curr->seqcount = seq + 1;
+ smp_wmb();
+ curr->tb_total += delta;
+ if (seq == 0 || delta < curr->tb_min)
+ curr->tb_min = delta;
+ if (delta > curr->tb_max)
+ curr->tb_max = delta;
+ smp_wmb();
+ curr->seqcount = seq + 2;
+}
+
+#define start_timing(vcpu, next) __start_timing(vcpu, next)
+#define end_timing(vcpu) __start_timing(vcpu, NULL)
+#define accumulate_time(vcpu, next) __accumulate_time(vcpu, next)
+#else
+#define start_timing(vcpu, next) do {} while (0)
+#define end_timing(vcpu) do {} while (0)
+#define accumulate_time(vcpu, next) do {} while (0)
+#endif
+
+static inline void mfslb(unsigned int idx, u64 *slbee, u64 *slbev)
+{
+ asm volatile("slbmfev %0,%1" : "=r" (*slbev) : "r" (idx));
+ asm volatile("slbmfee %0,%1" : "=r" (*slbee) : "r" (idx));
+}
+
+static inline void mtslb(u64 slbee, u64 slbev)
+{
+ asm volatile("slbmte %0,%1" :: "r" (slbev), "r" (slbee));
+}
+
+static inline void clear_slb_entry(unsigned int idx)
+{
+ mtslb(idx, 0);
+}
+
+static inline void slb_clear_invalidate_partition(void)
+{
+ clear_slb_entry(0);
+ asm volatile(PPC_SLBIA(6));
+}
+
+/*
+ * Malicious or buggy radix guests may have inserted SLB entries
+ * (only 0..3 because radix always runs with UPRT=1), so these must
+ * be cleared here to avoid side-channels. slbmte is used rather
+ * than slbia, as it won't clear cached translations.
+ */
+static void radix_clear_slb(void)
+{
+ int i;
+
+ for (i = 0; i < 4; i++)
+ clear_slb_entry(i);
+}
+
+static void switch_mmu_to_guest_radix(struct kvm *kvm, struct kvm_vcpu *vcpu, u64 lpcr)
+{
+ struct kvm_nested_guest *nested = vcpu->arch.nested;
+ u32 lpid;
+
+ lpid = nested ? nested->shadow_lpid : kvm->arch.lpid;
+
+ /*
+ * All the isync()s are overkill but trivially follow the ISA
+ * requirements. Some can likely be replaced with justification
+ * comment for why they are not needed.
+ */
+ isync();
+ mtspr(SPRN_LPID, lpid);
+ isync();
+ mtspr(SPRN_LPCR, lpcr);
+ isync();
+ mtspr(SPRN_PID, vcpu->arch.pid);
+ isync();
+}
+
+static void switch_mmu_to_guest_hpt(struct kvm *kvm, struct kvm_vcpu *vcpu, u64 lpcr)
+{
+ u32 lpid;
+ int i;
+
+ lpid = kvm->arch.lpid;
+
+ mtspr(SPRN_LPID, lpid);
+ mtspr(SPRN_LPCR, lpcr);
+ mtspr(SPRN_PID, vcpu->arch.pid);
+
+ for (i = 0; i < vcpu->arch.slb_max; i++)
+ mtslb(vcpu->arch.slb[i].orige, vcpu->arch.slb[i].origv);
+
+ isync();
+}
+
+static void switch_mmu_to_host(struct kvm *kvm, u32 pid)
+{
+ isync();
+ mtspr(SPRN_PID, pid);
+ isync();
+ mtspr(SPRN_LPID, kvm->arch.host_lpid);
+ isync();
+ mtspr(SPRN_LPCR, kvm->arch.host_lpcr);
+ isync();
+
+ if (!radix_enabled())
+ slb_restore_bolted_realmode();
+}
+
+static void save_clear_host_mmu(struct kvm *kvm)
+{
+ if (!radix_enabled()) {
+ /*
+ * Hash host could save and restore host SLB entries to
+ * reduce SLB fault overheads of VM exits, but for now the
+ * existing code clears all entries and restores just the
+ * bolted ones when switching back to host.
+ */
+ slb_clear_invalidate_partition();
+ }
+}
+
+static void save_clear_guest_mmu(struct kvm *kvm, struct kvm_vcpu *vcpu)
+{
+ if (kvm_is_radix(kvm)) {
+ radix_clear_slb();
+ } else {
+ int i;
+ int nr = 0;
+
+ /*
+ * This must run before switching to host (radix host can't
+ * access all SLBs).
+ */
+ for (i = 0; i < vcpu->arch.slb_nr; i++) {
+ u64 slbee, slbev;
+ mfslb(i, &slbee, &slbev);
+ if (slbee & SLB_ESID_V) {
+ vcpu->arch.slb[nr].orige = slbee | i;
+ vcpu->arch.slb[nr].origv = slbev;
+ nr++;
+ }
+ }
+ vcpu->arch.slb_max = nr;
+ slb_clear_invalidate_partition();
+ }
+}
+
+int kvmhv_vcpu_entry_p9(struct kvm_vcpu *vcpu, u64 time_limit, unsigned long lpcr)
+{
+ struct kvm *kvm = vcpu->kvm;
+ struct kvm_nested_guest *nested = vcpu->arch.nested;
+ struct kvmppc_vcore *vc = vcpu->arch.vcore;
+ s64 hdec;
+ u64 tb, purr, spurr;
+ u64 *exsave;
+ bool ri_set;
+ int trap;
+ unsigned long msr;
+ unsigned long host_hfscr;
+ unsigned long host_ciabr;
+ unsigned long host_dawr0;
+ unsigned long host_dawrx0;
+ unsigned long host_psscr;
+ unsigned long host_pidr;
+ unsigned long host_dawr1;
+ unsigned long host_dawrx1;
+
+ hdec = time_limit - mftb();
+ if (hdec < 0)
+ return BOOK3S_INTERRUPT_HV_DECREMENTER;
+
+ WARN_ON_ONCE(vcpu->arch.shregs.msr & MSR_HV);
+ WARN_ON_ONCE(!(vcpu->arch.shregs.msr & MSR_ME));
+
+ start_timing(vcpu, &vcpu->arch.rm_entry);
+
+ vcpu->arch.ceded = 0;
+
+ if (vc->tb_offset) {
+ u64 new_tb = mftb() + vc->tb_offset;
+ mtspr(SPRN_TBU40, new_tb);
+ tb = mftb();
+ if ((tb & 0xffffff) < (new_tb & 0xffffff))
+ mtspr(SPRN_TBU40, new_tb + 0x1000000);
+ vc->tb_offset_applied = vc->tb_offset;
+ }
+
+ msr = mfmsr();
+
+ host_hfscr = mfspr(SPRN_HFSCR);
+ host_ciabr = mfspr(SPRN_CIABR);
+ host_dawr0 = mfspr(SPRN_DAWR0);
+ host_dawrx0 = mfspr(SPRN_DAWRX0);
+ host_psscr = mfspr(SPRN_PSSCR);
+ host_pidr = mfspr(SPRN_PID);
+ if (cpu_has_feature(CPU_FTR_DAWR1)) {
+ host_dawr1 = mfspr(SPRN_DAWR1);
+ host_dawrx1 = mfspr(SPRN_DAWRX1);
+ }
+
+ if (vc->pcr)
+ mtspr(SPRN_PCR, vc->pcr | PCR_MASK);
+ mtspr(SPRN_DPDES, vc->dpdes);
+ mtspr(SPRN_VTB, vc->vtb);
+
+ local_paca->kvm_hstate.host_purr = mfspr(SPRN_PURR);
+ local_paca->kvm_hstate.host_spurr = mfspr(SPRN_SPURR);
+ mtspr(SPRN_PURR, vcpu->arch.purr);
+ mtspr(SPRN_SPURR, vcpu->arch.spurr);
+
+ if (dawr_enabled()) {
+ mtspr(SPRN_DAWR0, vcpu->arch.dawr0);
+ mtspr(SPRN_DAWRX0, vcpu->arch.dawrx0);
+ if (cpu_has_feature(CPU_FTR_DAWR1)) {
+ mtspr(SPRN_DAWR1, vcpu->arch.dawr1);
+ mtspr(SPRN_DAWRX1, vcpu->arch.dawrx1);
+ }
+ }
+ mtspr(SPRN_CIABR, vcpu->arch.ciabr);
+ mtspr(SPRN_IC, vcpu->arch.ic);
+
+ mtspr(SPRN_PSSCR, vcpu->arch.psscr | PSSCR_EC |
+ (local_paca->kvm_hstate.fake_suspend << PSSCR_FAKE_SUSPEND_LG));
+
+ mtspr(SPRN_HFSCR, vcpu->arch.hfscr);
+
+ mtspr(SPRN_HSRR0, vcpu->arch.regs.nip);
+ mtspr(SPRN_HSRR1, (vcpu->arch.shregs.msr & ~MSR_HV) | MSR_ME);
+
+ /*
+ * On POWER9 DD2.1 and below, sometimes on a Hypervisor Data Storage
+ * Interrupt (HDSI) the HDSISR is not be updated at all.
+ *
+ * To work around this we put a canary value into the HDSISR before
+ * returning to a guest and then check for this canary when we take a
+ * HDSI. If we find the canary on a HDSI, we know the hardware didn't
+ * update the HDSISR. In this case we return to the guest to retake the
+ * HDSI which should correctly update the HDSISR the second time HDSI
+ * entry.
+ *
+ * Just do this on all p9 processors for now.
+ */
+ mtspr(SPRN_HDSISR, HDSISR_CANARY);
+
+ mtspr(SPRN_SPRG0, vcpu->arch.shregs.sprg0);
+ mtspr(SPRN_SPRG1, vcpu->arch.shregs.sprg1);
+ mtspr(SPRN_SPRG2, vcpu->arch.shregs.sprg2);
+ mtspr(SPRN_SPRG3, vcpu->arch.shregs.sprg3);
+
+ mtspr(SPRN_AMOR, ~0UL);
+
+ local_paca->kvm_hstate.in_guest = KVM_GUEST_MODE_HV_P9;
+
+ /*
+ * Hash host, hash guest, or radix guest with prefetch bug, all have
+ * to disable the MMU before switching to guest MMU state.
+ */
+ if (!radix_enabled() || !kvm_is_radix(kvm) ||
+ cpu_has_feature(CPU_FTR_P9_RADIX_PREFETCH_BUG))
+ __mtmsrd(msr & ~(MSR_IR|MSR_DR|MSR_RI), 0);
+
+ save_clear_host_mmu(kvm);
+
+ if (kvm_is_radix(kvm)) {
+ switch_mmu_to_guest_radix(kvm, vcpu, lpcr);
+ if (!cpu_has_feature(CPU_FTR_P9_RADIX_PREFETCH_BUG))
+ __mtmsrd(0, 1); /* clear RI */
+
+ } else {
+ switch_mmu_to_guest_hpt(kvm, vcpu, lpcr);
+ }
+
+ /* TLBIEL uses LPID=LPIDR, so run this after setting guest LPID */
+ kvmppc_check_need_tlb_flush(kvm, vc->pcpu, nested);
+
+ /*
+ * P9 suppresses the HDEC exception when LPCR[HDICE] = 0,
+ * so set guest LPCR (with HDICE) before writing HDEC.
+ */
+ mtspr(SPRN_HDEC, hdec);
+
+ mtspr(SPRN_DAR, vcpu->arch.shregs.dar);
+ mtspr(SPRN_DSISR, vcpu->arch.shregs.dsisr);
+ mtspr(SPRN_SRR0, vcpu->arch.shregs.srr0);
+ mtspr(SPRN_SRR1, vcpu->arch.shregs.srr1);
+
+ accumulate_time(vcpu, &vcpu->arch.guest_time);
+
+ kvmppc_p9_enter_guest(vcpu);
+
+ accumulate_time(vcpu, &vcpu->arch.rm_intr);
+
+ /* XXX: Could get these from r11/12 and paca exsave instead */
+ vcpu->arch.shregs.srr0 = mfspr(SPRN_SRR0);
+ vcpu->arch.shregs.srr1 = mfspr(SPRN_SRR1);
+ vcpu->arch.shregs.dar = mfspr(SPRN_DAR);
+ vcpu->arch.shregs.dsisr = mfspr(SPRN_DSISR);
+
+ /* 0x2 bit for HSRR is only used by PR and P7/8 HV paths, clear it */
+ trap = local_paca->kvm_hstate.scratch0 & ~0x2;
+
+ /* HSRR interrupts leave MSR[RI] unchanged, SRR interrupts clear it. */
+ ri_set = false;
+ if (likely(trap > BOOK3S_INTERRUPT_MACHINE_CHECK)) {
+ if (trap != BOOK3S_INTERRUPT_SYSCALL &&
+ (vcpu->arch.shregs.msr & MSR_RI))
+ ri_set = true;
+ exsave = local_paca->exgen;
+ } else if (trap == BOOK3S_INTERRUPT_SYSTEM_RESET) {
+ exsave = local_paca->exnmi;
+ } else { /* trap == 0x200 */
+ exsave = local_paca->exmc;
+ }
+
+ vcpu->arch.regs.gpr[1] = local_paca->kvm_hstate.scratch1;
+ vcpu->arch.regs.gpr[3] = local_paca->kvm_hstate.scratch2;
+
+ /*
+ * Only set RI after reading machine check regs (DAR, DSISR, SRR0/1)
+ * and hstate scratch (which we need to move into exsave to make
+ * re-entrant vs SRESET/MCE)
+ */
+ if (ri_set) {
+ if (unlikely(!(mfmsr() & MSR_RI))) {
+ __mtmsrd(MSR_RI, 1);
+ WARN_ON_ONCE(1);
+ }
+ } else {
+ WARN_ON_ONCE(mfmsr() & MSR_RI);
+ __mtmsrd(MSR_RI, 1);
+ }
+
+ vcpu->arch.regs.gpr[9] = exsave[EX_R9/sizeof(u64)];
+ vcpu->arch.regs.gpr[10] = exsave[EX_R10/sizeof(u64)];
+ vcpu->arch.regs.gpr[11] = exsave[EX_R11/sizeof(u64)];
+ vcpu->arch.regs.gpr[12] = exsave[EX_R12/sizeof(u64)];
+ vcpu->arch.regs.gpr[13] = exsave[EX_R13/sizeof(u64)];
+ vcpu->arch.ppr = exsave[EX_PPR/sizeof(u64)];
+ vcpu->arch.cfar = exsave[EX_CFAR/sizeof(u64)];
+ vcpu->arch.regs.ctr = exsave[EX_CTR/sizeof(u64)];
+
+ vcpu->arch.last_inst = KVM_INST_FETCH_FAILED;
+
+ if (unlikely(trap == BOOK3S_INTERRUPT_MACHINE_CHECK)) {
+ vcpu->arch.fault_dar = exsave[EX_DAR/sizeof(u64)];
+ vcpu->arch.fault_dsisr = exsave[EX_DSISR/sizeof(u64)];
+ kvmppc_realmode_machine_check(vcpu);
+
+ } else if (unlikely(trap == BOOK3S_INTERRUPT_HMI)) {
+ kvmppc_realmode_hmi_handler();
+
+ } else if (trap == BOOK3S_INTERRUPT_H_EMUL_ASSIST) {
+ vcpu->arch.emul_inst = mfspr(SPRN_HEIR);
+
+ } else if (trap == BOOK3S_INTERRUPT_H_DATA_STORAGE) {
+ vcpu->arch.fault_dar = exsave[EX_DAR/sizeof(u64)];
+ vcpu->arch.fault_dsisr = exsave[EX_DSISR/sizeof(u64)];
+ vcpu->arch.fault_gpa = mfspr(SPRN_ASDR);
+
+ } else if (trap == BOOK3S_INTERRUPT_H_INST_STORAGE) {
+ vcpu->arch.fault_gpa = mfspr(SPRN_ASDR);
+
+ } else if (trap == BOOK3S_INTERRUPT_H_FAC_UNAVAIL) {
+ vcpu->arch.hfscr = mfspr(SPRN_HFSCR);
+
+#ifdef CONFIG_PPC_TRANSACTIONAL_MEM
+ /*
+ * Softpatch interrupt for transactional memory emulation cases
+ * on POWER9 DD2.2. This is early in the guest exit path - we
+ * haven't saved registers or done a treclaim yet.
+ */
+ } else if (trap == BOOK3S_INTERRUPT_HV_SOFTPATCH) {
+ vcpu->arch.emul_inst = mfspr(SPRN_HEIR);
+
+ /*
+ * The cases we want to handle here are those where the guest
+ * is in real suspend mode and is trying to transition to
+ * transactional mode.
+ */
+ if (local_paca->kvm_hstate.fake_suspend &&
+ (vcpu->arch.shregs.msr & MSR_TS_S)) {
+ if (kvmhv_p9_tm_emulation_early(vcpu)) {
+ /* Prevent it being handled again. */
+ trap = 0;
+ }
+ }
+#endif
+ }
+
+ accumulate_time(vcpu, &vcpu->arch.rm_exit);
+
+ /* Advance host PURR/SPURR by the amount used by guest */
+ purr = mfspr(SPRN_PURR);
+ spurr = mfspr(SPRN_SPURR);
+ mtspr(SPRN_PURR, local_paca->kvm_hstate.host_purr +
+ purr - vcpu->arch.purr);
+ mtspr(SPRN_SPURR, local_paca->kvm_hstate.host_spurr +
+ spurr - vcpu->arch.spurr);
+ vcpu->arch.purr = purr;
+ vcpu->arch.spurr = spurr;
+
+ vcpu->arch.ic = mfspr(SPRN_IC);
+ vcpu->arch.pid = mfspr(SPRN_PID);
+ vcpu->arch.psscr = mfspr(SPRN_PSSCR) & PSSCR_GUEST_VIS;
+
+ vcpu->arch.shregs.sprg0 = mfspr(SPRN_SPRG0);
+ vcpu->arch.shregs.sprg1 = mfspr(SPRN_SPRG1);
+ vcpu->arch.shregs.sprg2 = mfspr(SPRN_SPRG2);
+ vcpu->arch.shregs.sprg3 = mfspr(SPRN_SPRG3);
+
+ /* Preserve PSSCR[FAKE_SUSPEND] until we've called kvmppc_save_tm_hv */
+ mtspr(SPRN_PSSCR, host_psscr |
+ (local_paca->kvm_hstate.fake_suspend << PSSCR_FAKE_SUSPEND_LG));
+ mtspr(SPRN_HFSCR, host_hfscr);
+ mtspr(SPRN_CIABR, host_ciabr);
+ mtspr(SPRN_DAWR0, host_dawr0);
+ mtspr(SPRN_DAWRX0, host_dawrx0);
+ if (cpu_has_feature(CPU_FTR_DAWR1)) {
+ mtspr(SPRN_DAWR1, host_dawr1);
+ mtspr(SPRN_DAWRX1, host_dawrx1);
+ }
+
+ if (kvm_is_radix(kvm)) {
+ /*
+ * Since this is radix, do a eieio; tlbsync; ptesync sequence
+ * in case we interrupted the guest between a tlbie and a
+ * ptesync.
+ */
+ asm volatile("eieio; tlbsync; ptesync");
+ }
+
+ /*
+ * cp_abort is required if the processor supports local copy-paste
+ * to clear the copy buffer that was under control of the guest.
+ */
+ if (cpu_has_feature(CPU_FTR_ARCH_31))
+ asm volatile(PPC_CP_ABORT);
+
+ vc->dpdes = mfspr(SPRN_DPDES);
+ vc->vtb = mfspr(SPRN_VTB);
+ mtspr(SPRN_DPDES, 0);
+ if (vc->pcr)
+ mtspr(SPRN_PCR, PCR_MASK);
+
+ if (vc->tb_offset_applied) {
+ u64 new_tb = mftb() - vc->tb_offset_applied;
+ mtspr(SPRN_TBU40, new_tb);
+ tb = mftb();
+ if ((tb & 0xffffff) < (new_tb & 0xffffff))
+ mtspr(SPRN_TBU40, new_tb + 0x1000000);
+ vc->tb_offset_applied = 0;
+ }
+
+ mtspr(SPRN_HDEC, 0x7fffffff);
+
+ save_clear_guest_mmu(kvm, vcpu);
+ switch_mmu_to_host(kvm, host_pidr);
+ local_paca->kvm_hstate.in_guest = KVM_GUEST_MODE_NONE;
+
+ /*
+ * If we are in real mode, only switch MMU on after the MMU is
+ * switched to host, to avoid the P9_RADIX_PREFETCH_BUG.
+ */
+ __mtmsrd(msr, 0);
+
+ end_timing(vcpu);
+
+ return trap;
+}
+EXPORT_SYMBOL_GPL(kvmhv_vcpu_entry_p9);
diff --git a/arch/powerpc/kvm/book3s_hv_rm_mmu.c b/arch/powerpc/kvm/book3s_hv_rm_mmu.c
index 7af7c70f1468..632b2545072b 100644
--- a/arch/powerpc/kvm/book3s_hv_rm_mmu.c
+++ b/arch/powerpc/kvm/book3s_hv_rm_mmu.c
@@ -23,20 +23,9 @@
#include <asm/pte-walk.h>
/* Translate address of a vmalloc'd thing to a linear map address */
-static void *real_vmalloc_addr(void *x)
+static void *real_vmalloc_addr(void *addr)
{
- unsigned long addr = (unsigned long) x;
- pte_t *p;
- /*
- * assume we don't have huge pages in vmalloc space...
- * So don't worry about THP collapse/split. Called
- * Only in realmode with MSR_EE = 0, hence won't need irq_save/restore.
- */
- p = find_init_mm_pte(addr, NULL);
- if (!p || !pte_present(*p))
- return NULL;
- addr = (pte_pfn(*p) << PAGE_SHIFT) | (addr & ~PAGE_MASK);
- return __va(addr);
+ return __va(ppc_find_vmap_phys((unsigned long)addr));
}
/* Return 1 if we need to do a global tlbie, 0 if we can use tlbiel */
@@ -57,6 +46,10 @@ static int global_invalidates(struct kvm *kvm)
else
global = 1;
+ /* LPID has been switched to host if in virt mode so can't do local */
+ if (!global && (mfmsr() & (MSR_IR|MSR_DR)))
+ global = 1;
+
if (!global) {
/* any other core might now have stale TLB entries... */
smp_wmb();
@@ -67,7 +60,7 @@ static int global_invalidates(struct kvm *kvm)
* so use the bit for the first thread to represent the core.
*/
if (cpu_has_feature(CPU_FTR_ARCH_300))
- cpu = cpu_first_thread_sibling(cpu);
+ cpu = cpu_first_tlb_thread_sibling(cpu);
cpumask_clear_cpu(cpu, &kvm->arch.need_tlb_flush);
}
@@ -409,6 +402,7 @@ long kvmppc_h_enter(struct kvm_vcpu *vcpu, unsigned long flags,
vcpu->arch.pgdir, true,
&vcpu->arch.regs.gpr[4]);
}
+EXPORT_SYMBOL_GPL(kvmppc_h_enter);
#ifdef __BIG_ENDIAN__
#define LOCK_TOKEN (*(u32 *)(&get_paca()->lock_token))
@@ -553,6 +547,7 @@ long kvmppc_h_remove(struct kvm_vcpu *vcpu, unsigned long flags,
return kvmppc_do_h_remove(vcpu->kvm, flags, pte_index, avpn,
&vcpu->arch.regs.gpr[4]);
}
+EXPORT_SYMBOL_GPL(kvmppc_h_remove);
long kvmppc_h_bulk_remove(struct kvm_vcpu *vcpu)
{
@@ -671,6 +666,7 @@ long kvmppc_h_bulk_remove(struct kvm_vcpu *vcpu)
return ret;
}
+EXPORT_SYMBOL_GPL(kvmppc_h_bulk_remove);
long kvmppc_h_protect(struct kvm_vcpu *vcpu, unsigned long flags,
unsigned long pte_index, unsigned long avpn)
@@ -741,6 +737,7 @@ long kvmppc_h_protect(struct kvm_vcpu *vcpu, unsigned long flags,
return H_SUCCESS;
}
+EXPORT_SYMBOL_GPL(kvmppc_h_protect);
long kvmppc_h_read(struct kvm_vcpu *vcpu, unsigned long flags,
unsigned long pte_index)
@@ -781,6 +778,7 @@ long kvmppc_h_read(struct kvm_vcpu *vcpu, unsigned long flags,
}
return H_SUCCESS;
}
+EXPORT_SYMBOL_GPL(kvmppc_h_read);
long kvmppc_h_clear_ref(struct kvm_vcpu *vcpu, unsigned long flags,
unsigned long pte_index)
@@ -829,6 +827,7 @@ long kvmppc_h_clear_ref(struct kvm_vcpu *vcpu, unsigned long flags,
unlock_hpte(hpte, v & ~HPTE_V_HVLOCK);
return ret;
}
+EXPORT_SYMBOL_GPL(kvmppc_h_clear_ref);
long kvmppc_h_clear_mod(struct kvm_vcpu *vcpu, unsigned long flags,
unsigned long pte_index)
@@ -876,6 +875,7 @@ long kvmppc_h_clear_mod(struct kvm_vcpu *vcpu, unsigned long flags,
unlock_hpte(hpte, v & ~HPTE_V_HVLOCK);
return ret;
}
+EXPORT_SYMBOL_GPL(kvmppc_h_clear_mod);
static int kvmppc_get_hpa(struct kvm_vcpu *vcpu, unsigned long mmu_seq,
unsigned long gpa, int writing, unsigned long *hpa,
@@ -1294,3 +1294,4 @@ long kvmppc_hpte_hv_fault(struct kvm_vcpu *vcpu, unsigned long addr,
return -1; /* send fault up to host kernel mode */
}
+EXPORT_SYMBOL_GPL(kvmppc_hpte_hv_fault);
diff --git a/arch/powerpc/kvm/book3s_hv_rm_xics.c b/arch/powerpc/kvm/book3s_hv_rm_xics.c
index c2c9c733f359..0a11ec88a0ae 100644
--- a/arch/powerpc/kvm/book3s_hv_rm_xics.c
+++ b/arch/powerpc/kvm/book3s_hv_rm_xics.c
@@ -141,13 +141,6 @@ static void icp_rm_set_vcpu_irq(struct kvm_vcpu *vcpu,
return;
}
- if (xive_enabled() && kvmhv_on_pseries()) {
- /* No XICS access or hypercalls available, too hard */
- this_icp->rm_action |= XICS_RM_KICK_VCPU;
- this_icp->rm_kick_target = vcpu;
- return;
- }
-
/*
* Check if the core is loaded,
* if not, find an available host core to post to wake the VCPU,
@@ -771,14 +764,6 @@ static void icp_eoi(struct irq_chip *c, u32 hwirq, __be32 xirr, bool *again)
void __iomem *xics_phys;
int64_t rc;
- if (kvmhv_on_pseries()) {
- unsigned long retbuf[PLPAR_HCALL_BUFSIZE];
-
- iosync();
- plpar_hcall_raw(H_EOI, retbuf, hwirq);
- return;
- }
-
rc = pnv_opal_pci_msi_eoi(c, hwirq);
if (rc)
diff --git a/arch/powerpc/kvm/book3s_hv_rmhandlers.S b/arch/powerpc/kvm/book3s_hv_rmhandlers.S
index 5e634db4809b..8dd437d7a2c6 100644
--- a/arch/powerpc/kvm/book3s_hv_rmhandlers.S
+++ b/arch/powerpc/kvm/book3s_hv_rmhandlers.S
@@ -25,18 +25,10 @@
#include <asm/export.h>
#include <asm/tm.h>
#include <asm/opal.h>
-#include <asm/xive-regs.h>
#include <asm/thread_info.h>
#include <asm/asm-compat.h>
#include <asm/feature-fixups.h>
#include <asm/cpuidle.h>
-#include <asm/ultravisor-api.h>
-
-/* Sign-extend HDEC if not on POWER9 */
-#define EXTEND_HDEC(reg) \
-BEGIN_FTR_SECTION; \
- extsw reg, reg; \
-END_FTR_SECTION_IFCLR(CPU_FTR_ARCH_300)
/* Values in HSTATE_NAPPING(r13) */
#define NAPPING_CEDE 1
@@ -44,9 +36,8 @@ END_FTR_SECTION_IFCLR(CPU_FTR_ARCH_300)
#define NAPPING_UNSPLIT 3
/* Stack frame offsets for kvmppc_hv_entry */
-#define SFS 208
+#define SFS 160
#define STACK_SLOT_TRAP (SFS-4)
-#define STACK_SLOT_SHORT_PATH (SFS-8)
#define STACK_SLOT_TID (SFS-16)
#define STACK_SLOT_PSSCR (SFS-24)
#define STACK_SLOT_PID (SFS-32)
@@ -57,10 +48,7 @@ END_FTR_SECTION_IFCLR(CPU_FTR_ARCH_300)
#define STACK_SLOT_HFSCR (SFS-72)
#define STACK_SLOT_AMR (SFS-80)
#define STACK_SLOT_UAMOR (SFS-88)
-#define STACK_SLOT_DAWR1 (SFS-96)
-#define STACK_SLOT_DAWRX1 (SFS-104)
-/* the following is used by the P9 short path */
-#define STACK_SLOT_NVGPRS (SFS-152) /* 18 gprs */
+#define STACK_SLOT_FSCR (SFS-96)
/*
* Call kvmppc_hv_entry in real mode.
@@ -136,15 +124,6 @@ END_FTR_SECTION_IFCLR(CPU_FTR_ARCH_207S)
/* Return the trap number on this thread as the return value */
mr r3, r12
- /*
- * If we came back from the guest via a relocation-on interrupt,
- * we will be in virtual mode at this point, which makes it a
- * little easier to get back to the caller.
- */
- mfmsr r0
- andi. r0, r0, MSR_IR /* in real mode? */
- bne .Lvirt_return
-
/* RFI into the highmem handler */
mfmsr r6
li r0, MSR_RI
@@ -154,11 +133,6 @@ END_FTR_SECTION_IFCLR(CPU_FTR_ARCH_207S)
mtsrr1 r7
RFI_TO_KERNEL
- /* Virtual-mode return */
-.Lvirt_return:
- mtlr r8
- blr
-
kvmppc_primary_no_guest:
/* We handle this much like a ceded vcpu */
/* put the HDEC into the DEC, since HDEC interrupts don't wake us */
@@ -245,7 +219,7 @@ kvm_novcpu_wakeup:
/* See if our timeslice has expired (HDEC is negative) */
mfspr r0, SPRN_HDEC
- EXTEND_HDEC(r0)
+ extsw r0, r0
li r12, BOOK3S_INTERRUPT_HV_DECREMENTER
cmpdi r0, 0
blt kvm_novcpu_exit
@@ -347,10 +321,8 @@ kvm_secondary_got_guest:
lbz r4, HSTATE_PTID(r13)
cmpwi r4, 0
bne 63f
- LOAD_REG_ADDR(r6, decrementer_max)
- ld r6, 0(r6)
+ lis r6,0x7fff /* MAX_INT@h */
mtspr SPRN_HDEC, r6
-BEGIN_FTR_SECTION
/* and set per-LPAR registers, if doing dynamic micro-threading */
ld r6, HSTATE_SPLIT_MODE(r13)
cmpdi r6, 0
@@ -362,7 +334,6 @@ BEGIN_FTR_SECTION
ld r0, KVM_SPLIT_LDBAR(r6)
mtspr SPRN_LDBAR, r0
isync
-END_FTR_SECTION_IFCLR(CPU_FTR_ARCH_300)
63:
/* Order load of vcpu after load of vcore */
lwsync
@@ -433,7 +404,6 @@ kvm_no_guest:
blr
53:
-BEGIN_FTR_SECTION
HMT_LOW
ld r5, HSTATE_KVM_VCORE(r13)
cmpdi r5, 0
@@ -448,14 +418,6 @@ BEGIN_FTR_SECTION
b kvm_unsplit_nap
60: HMT_MEDIUM
b kvm_secondary_got_guest
-FTR_SECTION_ELSE
- HMT_LOW
- ld r5, HSTATE_KVM_VCORE(r13)
- cmpdi r5, 0
- beq kvm_no_guest
- HMT_MEDIUM
- b kvm_secondary_got_guest
-ALT_FTR_SECTION_END_IFCLR(CPU_FTR_ARCH_300)
54: li r0, KVM_HWTHREAD_IN_KVM
stb r0, HSTATE_HWTHREAD_STATE(r13)
@@ -581,13 +543,11 @@ kvmppc_hv_entry:
bne 10f
lwz r7,KVM_LPID(r9)
-BEGIN_FTR_SECTION
ld r6,KVM_SDR1(r9)
li r0,LPID_RSVD /* switch to reserved LPID */
mtspr SPRN_LPID,r0
ptesync
mtspr SPRN_SDR1,r6 /* switch to partition page table */
-END_FTR_SECTION_IFCLR(CPU_FTR_ARCH_300)
mtspr SPRN_LPID,r7
isync
@@ -668,16 +628,6 @@ kvmppc_got_guest:
/* Save host values of some registers */
BEGIN_FTR_SECTION
- mfspr r5, SPRN_TIDR
- mfspr r6, SPRN_PSSCR
- mfspr r7, SPRN_PID
- std r5, STACK_SLOT_TID(r1)
- std r6, STACK_SLOT_PSSCR(r1)
- std r7, STACK_SLOT_PID(r1)
- mfspr r5, SPRN_HFSCR
- std r5, STACK_SLOT_HFSCR(r1)
-END_FTR_SECTION_IFSET(CPU_FTR_ARCH_300)
-BEGIN_FTR_SECTION
mfspr r5, SPRN_CIABR
mfspr r6, SPRN_DAWR0
mfspr r7, SPRN_DAWRX0
@@ -686,13 +636,9 @@ BEGIN_FTR_SECTION
std r6, STACK_SLOT_DAWR0(r1)
std r7, STACK_SLOT_DAWRX0(r1)
std r8, STACK_SLOT_IAMR(r1)
+ mfspr r5, SPRN_FSCR
+ std r5, STACK_SLOT_FSCR(r1)
END_FTR_SECTION_IFSET(CPU_FTR_ARCH_207S)
-BEGIN_FTR_SECTION
- mfspr r6, SPRN_DAWR1
- mfspr r7, SPRN_DAWRX1
- std r6, STACK_SLOT_DAWR1(r1)
- std r7, STACK_SLOT_DAWRX1(r1)
-END_FTR_SECTION_IFSET(CPU_FTR_ARCH_207S | CPU_FTR_DAWR1)
mfspr r5, SPRN_AMR
std r5, STACK_SLOT_AMR(r1)
@@ -710,13 +656,9 @@ BEGIN_FTR_SECTION
END_FTR_SECTION_IFCLR(CPU_FTR_ARCH_207S)
#ifdef CONFIG_PPC_TRANSACTIONAL_MEM
-/*
- * Branch around the call if both CPU_FTR_TM and
- * CPU_FTR_P9_TM_HV_ASSIST are off.
- */
BEGIN_FTR_SECTION
b 91f
-END_FTR_SECTION(CPU_FTR_TM | CPU_FTR_P9_TM_HV_ASSIST, 0)
+END_FTR_SECTION_IFCLR(CPU_FTR_TM)
/*
* NOTE THAT THIS TRASHES ALL NON-VOLATILE REGISTERS (but not CR)
*/
@@ -783,12 +725,6 @@ END_FTR_SECTION_IFCLR(CPU_FTR_ARCH_207S)
ld r6, VCPU_DAWRX0(r4)
mtspr SPRN_DAWR0, r5
mtspr SPRN_DAWRX0, r6
-BEGIN_FTR_SECTION
- ld r5, VCPU_DAWR1(r4)
- ld r6, VCPU_DAWRX1(r4)
- mtspr SPRN_DAWR1, r5
- mtspr SPRN_DAWRX1, r6
-END_FTR_SECTION_IFSET(CPU_FTR_DAWR1)
1:
ld r7, VCPU_CIABR(r4)
ld r8, VCPU_TAR(r4)
@@ -806,7 +742,6 @@ END_FTR_SECTION_IFSET(CPU_FTR_DAWR1)
mtspr SPRN_BESCR, r6
mtspr SPRN_PID, r7
mtspr SPRN_WORT, r8
-BEGIN_FTR_SECTION
/* POWER8-only registers */
ld r5, VCPU_TCSCR(r4)
ld r6, VCPU_ACOP(r4)
@@ -817,18 +752,6 @@ BEGIN_FTR_SECTION
mtspr SPRN_CSIGR, r7
mtspr SPRN_TACR, r8
nop
-FTR_SECTION_ELSE
- /* POWER9-only registers */
- ld r5, VCPU_TID(r4)
- ld r6, VCPU_PSSCR(r4)
- lbz r8, HSTATE_FAKE_SUSPEND(r13)
- oris r6, r6, PSSCR_EC@h /* This makes stop trap to HV */
- rldimi r6, r8, PSSCR_FAKE_SUSPEND_LG, 63 - PSSCR_FAKE_SUSPEND_LG
- ld r7, VCPU_HFSCR(r4)
- mtspr SPRN_TIDR, r5
- mtspr SPRN_PSSCR, r6
- mtspr SPRN_HFSCR, r7
-ALT_FTR_SECTION_END_IFCLR(CPU_FTR_ARCH_300)
8:
ld r5, VCPU_SPRG0(r4)
@@ -898,23 +821,15 @@ ALT_FTR_SECTION_END_IFCLR(CPU_FTR_ARCH_300)
/* Check if HDEC expires soon */
mfspr r3, SPRN_HDEC
- EXTEND_HDEC(r3)
+ extsw r3, r3
cmpdi r3, 512 /* 1 microsecond */
blt hdec_soon
- ld r6, VCPU_KVM(r4)
- lbz r0, KVM_RADIX(r6)
- cmpwi r0, 0
- bne 9f
-
- /* For hash guest, clear out and reload the SLB */
-BEGIN_MMU_FTR_SECTION
- /* Radix host won't have populated the SLB, so no need to clear */
+ /* Clear out and reload the SLB */
li r6, 0
slbmte r6, r6
PPC_SLBIA(6)
ptesync
-END_MMU_FTR_SECTION_IFCLR(MMU_FTR_TYPE_RADIX)
/* Load up guest SLB entries (N.B. slb_max will be 0 for radix) */
lwz r5,VCPU_SLB_MAX(r4)
@@ -929,96 +844,9 @@ END_MMU_FTR_SECTION_IFCLR(MMU_FTR_TYPE_RADIX)
bdnz 1b
9:
-#ifdef CONFIG_KVM_XICS
- /* We are entering the guest on that thread, push VCPU to XIVE */
- ld r11, VCPU_XIVE_SAVED_STATE(r4)
- li r9, TM_QW1_OS
- lwz r8, VCPU_XIVE_CAM_WORD(r4)
- cmpwi r8, 0
- beq no_xive
- li r7, TM_QW1_OS + TM_WORD2
- mfmsr r0
- andi. r0, r0, MSR_DR /* in real mode? */
- beq 2f
- ld r10, HSTATE_XIVE_TIMA_VIRT(r13)
- cmpldi cr1, r10, 0
- beq cr1, no_xive
- eieio
- stdx r11,r9,r10
- stwx r8,r7,r10
- b 3f
-2: ld r10, HSTATE_XIVE_TIMA_PHYS(r13)
- cmpldi cr1, r10, 0
- beq cr1, no_xive
- eieio
- stdcix r11,r9,r10
- stwcix r8,r7,r10
-3: li r9, 1
- stb r9, VCPU_XIVE_PUSHED(r4)
- eieio
-
- /*
- * We clear the irq_pending flag. There is a small chance of a
- * race vs. the escalation interrupt happening on another
- * processor setting it again, but the only consequence is to
- * cause a spurrious wakeup on the next H_CEDE which is not an
- * issue.
- */
- li r0,0
- stb r0, VCPU_IRQ_PENDING(r4)
-
- /*
- * In single escalation mode, if the escalation interrupt is
- * on, we mask it.
- */
- lbz r0, VCPU_XIVE_ESC_ON(r4)
- cmpwi cr1, r0,0
- beq cr1, 1f
- li r9, XIVE_ESB_SET_PQ_01
- beq 4f /* in real mode? */
- ld r10, VCPU_XIVE_ESC_VADDR(r4)
- ldx r0, r10, r9
- b 5f
-4: ld r10, VCPU_XIVE_ESC_RADDR(r4)
- ldcix r0, r10, r9
-5: sync
-
- /* We have a possible subtle race here: The escalation interrupt might
- * have fired and be on its way to the host queue while we mask it,
- * and if we unmask it early enough (re-cede right away), there is
- * a theorical possibility that it fires again, thus landing in the
- * target queue more than once which is a big no-no.
- *
- * Fortunately, solving this is rather easy. If the above load setting
- * PQ to 01 returns a previous value where P is set, then we know the
- * escalation interrupt is somewhere on its way to the host. In that
- * case we simply don't clear the xive_esc_on flag below. It will be
- * eventually cleared by the handler for the escalation interrupt.
- *
- * Then, when doing a cede, we check that flag again before re-enabling
- * the escalation interrupt, and if set, we abort the cede.
- */
- andi. r0, r0, XIVE_ESB_VAL_P
- bne- 1f
-
- /* Now P is 0, we can clear the flag */
- li r0, 0
- stb r0, VCPU_XIVE_ESC_ON(r4)
-1:
-no_xive:
-#endif /* CONFIG_KVM_XICS */
-
- li r0, 0
- stw r0, STACK_SLOT_SHORT_PATH(r1)
-
deliver_guest_interrupt: /* r4 = vcpu, r13 = paca */
/* Check if we can deliver an external or decrementer interrupt now */
ld r0, VCPU_PENDING_EXC(r4)
-BEGIN_FTR_SECTION
- /* On POWER9, also check for emulated doorbell interrupt */
- lbz r3, VCPU_DBELL_REQ(r4)
- or r0, r0, r3
-END_FTR_SECTION_IFSET(CPU_FTR_ARCH_300)
cmpdi r0, 0
beq 71f
mr r3, r4
@@ -1030,7 +858,6 @@ END_FTR_SECTION_IFSET(CPU_FTR_ARCH_300)
mtspr SPRN_SRR0, r6
mtspr SPRN_SRR1, r7
-fast_guest_entry_c:
ld r10, VCPU_PC(r4)
ld r11, VCPU_MSR(r4)
/* r11 = vcpu->arch.msr & ~MSR_HV */
@@ -1092,18 +919,8 @@ BEGIN_FTR_SECTION
mtspr SPRN_PPR, r0
END_FTR_SECTION_IFSET(CPU_FTR_HAS_PPR)
-/* Move canary into DSISR to check for later */
-BEGIN_FTR_SECTION
- li r0, 0x7fff
- mtspr SPRN_HDSISR, r0
-END_FTR_SECTION_IFSET(CPU_FTR_ARCH_300)
-
- ld r6, VCPU_KVM(r4)
- lbz r7, KVM_SECURE_GUEST(r6)
- cmpdi r7, 0
ld r6, VCPU_GPR(R6)(r4)
ld r7, VCPU_GPR(R7)(r4)
- bne ret_to_ultra
ld r0, VCPU_CR(r4)
mtcr r0
@@ -1114,117 +931,6 @@ END_FTR_SECTION_IFSET(CPU_FTR_ARCH_300)
ld r4, VCPU_GPR(R4)(r4)
HRFI_TO_GUEST
b .
-/*
- * Use UV_RETURN ultracall to return control back to the Ultravisor after
- * processing an hypercall or interrupt that was forwarded (a.k.a. reflected)
- * to the Hypervisor.
- *
- * All registers have already been loaded, except:
- * R0 = hcall result
- * R2 = SRR1, so UV can detect a synthesized interrupt (if any)
- * R3 = UV_RETURN
- */
-ret_to_ultra:
- ld r0, VCPU_CR(r4)
- mtcr r0
-
- ld r0, VCPU_GPR(R3)(r4)
- mfspr r2, SPRN_SRR1
- li r3, 0
- ori r3, r3, UV_RETURN
- ld r4, VCPU_GPR(R4)(r4)
- sc 2
-
-/*
- * Enter the guest on a P9 or later system where we have exactly
- * one vcpu per vcore and we don't need to go to real mode
- * (which implies that host and guest are both using radix MMU mode).
- * r3 = vcpu pointer
- * Most SPRs and all the VSRs have been loaded already.
- */
-_GLOBAL(__kvmhv_vcpu_entry_p9)
-EXPORT_SYMBOL_GPL(__kvmhv_vcpu_entry_p9)
- mflr r0
- std r0, PPC_LR_STKOFF(r1)
- stdu r1, -SFS(r1)
-
- li r0, 1
- stw r0, STACK_SLOT_SHORT_PATH(r1)
-
- std r3, HSTATE_KVM_VCPU(r13)
- mfcr r4
- stw r4, SFS+8(r1)
-
- std r1, HSTATE_HOST_R1(r13)
-
- reg = 14
- .rept 18
- std reg, STACK_SLOT_NVGPRS + ((reg - 14) * 8)(r1)
- reg = reg + 1
- .endr
-
- reg = 14
- .rept 18
- ld reg, __VCPU_GPR(reg)(r3)
- reg = reg + 1
- .endr
-
- mfmsr r10
- std r10, HSTATE_HOST_MSR(r13)
-
- mr r4, r3
- b fast_guest_entry_c
-guest_exit_short_path:
- /*
- * Malicious or buggy radix guests may have inserted SLB entries
- * (only 0..3 because radix always runs with UPRT=1), so these must
- * be cleared here to avoid side-channels. slbmte is used rather
- * than slbia, as it won't clear cached translations.
- */
- li r0,0
- slbmte r0,r0
- li r4,1
- slbmte r0,r4
- li r4,2
- slbmte r0,r4
- li r4,3
- slbmte r0,r4
-
- li r0, KVM_GUEST_MODE_NONE
- stb r0, HSTATE_IN_GUEST(r13)
-
- reg = 14
- .rept 18
- std reg, __VCPU_GPR(reg)(r9)
- reg = reg + 1
- .endr
-
- reg = 14
- .rept 18
- ld reg, STACK_SLOT_NVGPRS + ((reg - 14) * 8)(r1)
- reg = reg + 1
- .endr
-
- lwz r4, SFS+8(r1)
- mtcr r4
-
- mr r3, r12 /* trap number */
-
- addi r1, r1, SFS
- ld r0, PPC_LR_STKOFF(r1)
- mtlr r0
-
- /* If we are in real mode, do a rfid to get back to the caller */
- mfmsr r4
- andi. r5, r4, MSR_IR
- bnelr
- rldicl r5, r4, 64 - MSR_TS_S_LG, 62 /* extract TS field */
- mtspr SPRN_SRR0, r0
- ld r10, HSTATE_HOST_MSR(r13)
- rldimi r10, r5, MSR_TS_S_LG, 63 - MSR_TS_T_LG
- mtspr SPRN_SRR1, r10
- RFI_TO_KERNEL
- b .
secondary_too_late:
li r12, 0
@@ -1265,21 +971,16 @@ hdec_soon:
kvmppc_interrupt_hv:
/*
* Register contents:
+ * R9 = HSTATE_IN_GUEST
* R12 = (guest CR << 32) | interrupt vector
* R13 = PACA
* guest R12 saved in shadow VCPU SCRATCH0
* guest R13 saved in SPRN_SCRATCH0
+ * guest R9 saved in HSTATE_SCRATCH2
*/
- std r9, HSTATE_SCRATCH2(r13)
- lbz r9, HSTATE_IN_GUEST(r13)
- cmpwi r9, KVM_GUEST_MODE_HOST_HV
- beq kvmppc_bad_host_intr
-#ifdef CONFIG_KVM_BOOK3S_PR_POSSIBLE
- cmpwi r9, KVM_GUEST_MODE_GUEST
- ld r9, HSTATE_SCRATCH2(r13)
- beq kvmppc_interrupt_pr
-#endif
/* We're now back in the host but in guest MMU context */
+ cmpwi r9,KVM_GUEST_MODE_HOST_HV
+ beq kvmppc_bad_host_intr
li r9, KVM_GUEST_MODE_HOST_HV
stb r9, HSTATE_IN_GUEST(r13)
@@ -1397,7 +1098,7 @@ END_FTR_SECTION_IFSET(CPU_FTR_HAS_PPR)
cmpwi r12,BOOK3S_INTERRUPT_HV_DECREMENTER
bne 2f
mfspr r3,SPRN_HDEC
- EXTEND_HDEC(r3)
+ extsw r3, r3
cmpdi r3,0
mr r4,r9
bge fast_guest_return
@@ -1409,14 +1110,6 @@ END_FTR_SECTION_IFSET(CPU_FTR_HAS_PPR)
/* Hypervisor doorbell - exit only if host IPI flag set */
cmpwi r12, BOOK3S_INTERRUPT_H_DOORBELL
bne 3f
-BEGIN_FTR_SECTION
- PPC_MSGSYNC
- lwsync
- /* always exit if we're running a nested guest */
- ld r0, VCPU_NESTED(r9)
- cmpdi r0, 0
- bne guest_exit_cont
-END_FTR_SECTION_IFSET(CPU_FTR_ARCH_300)
lbz r0, HSTATE_HOST_IPI(r13)
cmpwi r0, 0
beq maybe_reenter_guest
@@ -1446,62 +1139,16 @@ guest_exit_cont: /* r9 = vcpu, r12 = trap, r13 = paca */
mr r4, r9
bl kvmhv_accumulate_time
#endif
-#ifdef CONFIG_KVM_XICS
- /* We are exiting, pull the VP from the XIVE */
- lbz r0, VCPU_XIVE_PUSHED(r9)
- cmpwi cr0, r0, 0
- beq 1f
- li r7, TM_SPC_PULL_OS_CTX
- li r6, TM_QW1_OS
- mfmsr r0
- andi. r0, r0, MSR_DR /* in real mode? */
- beq 2f
- ld r10, HSTATE_XIVE_TIMA_VIRT(r13)
- cmpldi cr0, r10, 0
- beq 1f
- /* First load to pull the context, we ignore the value */
- eieio
- lwzx r11, r7, r10
- /* Second load to recover the context state (Words 0 and 1) */
- ldx r11, r6, r10
- b 3f
-2: ld r10, HSTATE_XIVE_TIMA_PHYS(r13)
- cmpldi cr0, r10, 0
- beq 1f
- /* First load to pull the context, we ignore the value */
- eieio
- lwzcix r11, r7, r10
- /* Second load to recover the context state (Words 0 and 1) */
- ldcix r11, r6, r10
-3: std r11, VCPU_XIVE_SAVED_STATE(r9)
- /* Fixup some of the state for the next load */
- li r10, 0
- li r0, 0xff
- stb r10, VCPU_XIVE_PUSHED(r9)
- stb r10, (VCPU_XIVE_SAVED_STATE+3)(r9)
- stb r0, (VCPU_XIVE_SAVED_STATE+4)(r9)
- eieio
-1:
-#endif /* CONFIG_KVM_XICS */
/*
* Possibly flush the link stack here, before we do a blr in
- * guest_exit_short_path.
+ * kvmhv_switch_to_host.
*/
1: nop
patch_site 1b patch__call_kvm_flush_link_stack
- /* If we came in through the P9 short path, go back out to C now */
- lwz r0, STACK_SLOT_SHORT_PATH(r1)
- cmpwi r0, 0
- bne guest_exit_short_path
-
/* For hash guest, read the guest SLB and save it away */
- ld r5, VCPU_KVM(r9)
- lbz r0, KVM_RADIX(r5)
li r5, 0
- cmpwi r0, 0
- bne 0f /* for radix, save 0 entries */
lwz r0,VCPU_SLB_NR(r9) /* number of entries in SLB */
mtctr r0
li r6,0
@@ -1525,9 +1172,6 @@ guest_exit_cont: /* r9 = vcpu, r12 = trap, r13 = paca */
stw r5,VCPU_SLB_MAX(r9)
/* load host SLB entries */
-BEGIN_MMU_FTR_SECTION
- b guest_bypass
-END_MMU_FTR_SECTION_IFSET(MMU_FTR_TYPE_RADIX)
ld r8,PACA_SLBSHADOWPTR(r13)
.rept SLB_NUM_BOLTED
@@ -1540,21 +1184,6 @@ END_MMU_FTR_SECTION_IFSET(MMU_FTR_TYPE_RADIX)
slbmte r6,r5
1: addi r8,r8,16
.endr
- b guest_bypass
-
-0: /*
- * Sanitise radix guest SLB, see guest_exit_short_path comment.
- * We clear vcpu->arch.slb_max to match earlier behaviour.
- */
- li r0,0
- stw r0,VCPU_SLB_MAX(r9)
- slbmte r0,r0
- li r4,1
- slbmte r0,r4
- li r4,2
- slbmte r0,r4
- li r4,3
- slbmte r0,r4
guest_bypass:
stw r12, STACK_SLOT_TRAP(r1)
@@ -1564,12 +1193,6 @@ guest_bypass:
ld r3, HSTATE_KVM_VCORE(r13)
mfspr r5,SPRN_DEC
mftb r6
- /* On P9, if the guest has large decr enabled, don't sign extend */
-BEGIN_FTR_SECTION
- ld r4, VCORE_LPCR(r3)
- andis. r4, r4, LPCR_LD@h
- bne 16f
-END_FTR_SECTION_IFSET(CPU_FTR_ARCH_300)
extsw r5,r5
16: add r5,r5,r6
/* r5 is a guest timebase value here, convert to host TB */
@@ -1643,7 +1266,6 @@ END_FTR_SECTION_IFCLR(CPU_FTR_ARCH_207S)
std r6, VCPU_BESCR(r9)
stw r7, VCPU_GUEST_PID(r9)
std r8, VCPU_WORT(r9)
-BEGIN_FTR_SECTION
mfspr r5, SPRN_TCSCR
mfspr r6, SPRN_ACOP
mfspr r7, SPRN_CSIGR
@@ -1652,17 +1274,10 @@ BEGIN_FTR_SECTION
std r6, VCPU_ACOP(r9)
std r7, VCPU_CSIGR(r9)
std r8, VCPU_TACR(r9)
-FTR_SECTION_ELSE
- mfspr r5, SPRN_TIDR
- mfspr r6, SPRN_PSSCR
- std r5, VCPU_TID(r9)
- rldicl r6, r6, 4, 50 /* r6 &= PSSCR_GUEST_VIS */
- rotldi r6, r6, 60
- std r6, VCPU_PSSCR(r9)
- /* Restore host HFSCR value */
- ld r7, STACK_SLOT_HFSCR(r1)
- mtspr SPRN_HFSCR, r7
-ALT_FTR_SECTION_END_IFCLR(CPU_FTR_ARCH_300)
+BEGIN_FTR_SECTION
+ ld r5, STACK_SLOT_FSCR(r1)
+ mtspr SPRN_FSCR, r5
+END_FTR_SECTION_IFSET(CPU_FTR_ARCH_207S)
/*
* Restore various registers to 0, where non-zero values
* set by the guest could disrupt the host.
@@ -1670,13 +1285,11 @@ ALT_FTR_SECTION_END_IFCLR(CPU_FTR_ARCH_300)
li r0, 0
mtspr SPRN_PSPB, r0
mtspr SPRN_WORT, r0
-BEGIN_FTR_SECTION
mtspr SPRN_TCSCR, r0
/* Set MMCRS to 1<<31 to freeze and disable the SPMC counters */
li r0, 1
sldi r0, r0, 31
mtspr SPRN_MMCRS, r0
-END_FTR_SECTION_IFCLR(CPU_FTR_ARCH_300)
/* Save and restore AMR, IAMR and UAMOR before turning on the MMU */
ld r8, STACK_SLOT_IAMR(r1)
@@ -1733,13 +1346,9 @@ END_FTR_SECTION_IFCLR(CPU_FTR_ARCH_300)
bl kvmppc_save_fp
#ifdef CONFIG_PPC_TRANSACTIONAL_MEM
-/*
- * Branch around the call if both CPU_FTR_TM and
- * CPU_FTR_P9_TM_HV_ASSIST are off.
- */
BEGIN_FTR_SECTION
b 91f
-END_FTR_SECTION(CPU_FTR_TM | CPU_FTR_P9_TM_HV_ASSIST, 0)
+END_FTR_SECTION_IFCLR(CPU_FTR_TM)
/*
* NOTE THAT THIS TRASHES ALL NON-VOLATILE REGISTERS (but not CR)
*/
@@ -1785,80 +1394,6 @@ BEGIN_FTR_SECTION
mtspr SPRN_DAWR0, r6
mtspr SPRN_DAWRX0, r7
END_FTR_SECTION_IFSET(CPU_FTR_ARCH_207S)
-BEGIN_FTR_SECTION
- ld r6, STACK_SLOT_DAWR1(r1)
- ld r7, STACK_SLOT_DAWRX1(r1)
- mtspr SPRN_DAWR1, r6
- mtspr SPRN_DAWRX1, r7
-END_FTR_SECTION_IFSET(CPU_FTR_ARCH_207S | CPU_FTR_DAWR1)
-BEGIN_FTR_SECTION
- ld r5, STACK_SLOT_TID(r1)
- ld r6, STACK_SLOT_PSSCR(r1)
- ld r7, STACK_SLOT_PID(r1)
- mtspr SPRN_TIDR, r5
- mtspr SPRN_PSSCR, r6
- mtspr SPRN_PID, r7
-END_FTR_SECTION_IFSET(CPU_FTR_ARCH_300)
-
-#ifdef CONFIG_PPC_RADIX_MMU
- /*
- * Are we running hash or radix ?
- */
- ld r5, VCPU_KVM(r9)
- lbz r0, KVM_RADIX(r5)
- cmpwi cr2, r0, 0
- beq cr2, 2f
-
- /*
- * Radix: do eieio; tlbsync; ptesync sequence in case we
- * interrupted the guest between a tlbie and a ptesync.
- */
- eieio
- tlbsync
- ptesync
-
-BEGIN_FTR_SECTION
- /* Radix: Handle the case where the guest used an illegal PID */
- LOAD_REG_ADDR(r4, mmu_base_pid)
- lwz r3, VCPU_GUEST_PID(r9)
- lwz r5, 0(r4)
- cmpw cr0,r3,r5
- blt 2f
-
- /*
- * Illegal PID, the HW might have prefetched and cached in the TLB
- * some translations for the LPID 0 / guest PID combination which
- * Linux doesn't know about, so we need to flush that PID out of
- * the TLB. First we need to set LPIDR to 0 so tlbiel applies to
- * the right context.
- */
- li r0,0
- mtspr SPRN_LPID,r0
- isync
-
- /* Then do a congruence class local flush */
- ld r6,VCPU_KVM(r9)
- lwz r0,KVM_TLB_SETS(r6)
- mtctr r0
- li r7,0x400 /* IS field = 0b01 */
- ptesync
- sldi r0,r3,32 /* RS has PID */
-1: PPC_TLBIEL(7,0,2,1,1) /* RIC=2, PRS=1, R=1 */
- addi r7,r7,0x1000
- bdnz 1b
- ptesync
-END_FTR_SECTION_IFSET(CPU_FTR_P9_RADIX_PREFETCH_BUG)
-
-2:
-#endif /* CONFIG_PPC_RADIX_MMU */
-
- /*
- * cp_abort is required if the processor supports local copy-paste
- * to clear the copy buffer that was under control of the guest.
- */
-BEGIN_FTR_SECTION
- PPC_CP_ABORT
-END_FTR_SECTION_IFSET(CPU_FTR_ARCH_31)
/*
* POWER7/POWER8 guest -> host partition switch code.
@@ -1895,13 +1430,11 @@ kvmhv_switch_to_host:
/* Primary thread switches back to host partition */
lwz r7,KVM_HOST_LPID(r4)
-BEGIN_FTR_SECTION
ld r6,KVM_HOST_SDR1(r4)
li r8,LPID_RSVD /* switch to reserved LPID */
mtspr SPRN_LPID,r8
ptesync
mtspr SPRN_SDR1,r6 /* switch to host page table */
-END_FTR_SECTION_IFCLR(CPU_FTR_ARCH_300)
mtspr SPRN_LPID,r7
isync
@@ -2110,26 +1643,13 @@ kvmppc_tm_emul:
* reflect the HDSI to the guest as a DSI.
*/
kvmppc_hdsi:
- ld r3, VCPU_KVM(r9)
- lbz r0, KVM_RADIX(r3)
mfspr r4, SPRN_HDAR
mfspr r6, SPRN_HDSISR
-BEGIN_FTR_SECTION
- /* Look for DSISR canary. If we find it, retry instruction */
- cmpdi r6, 0x7fff
- beq 6f
-END_FTR_SECTION_IFSET(CPU_FTR_ARCH_300)
- cmpwi r0, 0
- bne .Lradix_hdsi /* on radix, just save DAR/DSISR/ASDR */
/* HPTE not found fault or protection fault? */
andis. r0, r6, (DSISR_NOHPTE | DSISR_PROTFAULT)@h
beq 1f /* if not, send it to the guest */
andi. r0, r11, MSR_DR /* data relocation enabled? */
beq 3f
-BEGIN_FTR_SECTION
- mfspr r5, SPRN_ASDR /* on POWER9, use ASDR to get VSID */
- b 4f
-END_FTR_SECTION_IFSET(CPU_FTR_ARCH_300)
clrrdi r0, r4, 28
PPC_SLBFEE_DOT(R5, R0) /* if so, look up SLB */
li r0, BOOK3S_INTERRUPT_DATA_SEGMENT
@@ -2197,31 +1717,15 @@ fast_interrupt_c_return:
stb r0, HSTATE_IN_GUEST(r13)
b guest_exit_cont
-.Lradix_hdsi:
- std r4, VCPU_FAULT_DAR(r9)
- stw r6, VCPU_FAULT_DSISR(r9)
-.Lradix_hisi:
- mfspr r5, SPRN_ASDR
- std r5, VCPU_FAULT_GPA(r9)
- b guest_exit_cont
-
/*
* Similarly for an HISI, reflect it to the guest as an ISI unless
* it is an HPTE not found fault for a page that we have paged out.
*/
kvmppc_hisi:
- ld r3, VCPU_KVM(r9)
- lbz r0, KVM_RADIX(r3)
- cmpwi r0, 0
- bne .Lradix_hisi /* for radix, just save ASDR */
andis. r0, r11, SRR1_ISI_NOPT@h
beq 1f
andi. r0, r11, MSR_IR /* instruction relocation enabled? */
beq 3f
-BEGIN_FTR_SECTION
- mfspr r5, SPRN_ASDR /* on POWER9, use ASDR to get VSID */
- b 4f
-END_FTR_SECTION_IFSET(CPU_FTR_ARCH_300)
clrrdi r0, r10, 28
PPC_SLBFEE_DOT(R5, R0) /* if so, look up SLB */
li r0, BOOK3S_INTERRUPT_INST_SEGMENT
@@ -2269,10 +1773,6 @@ hcall_try_real_mode:
andi. r0,r11,MSR_PR
/* sc 1 from userspace - reflect to guest syscall */
bne sc_1_fast_return
- /* sc 1 from nested guest - give it to L1 to handle */
- ld r0, VCPU_NESTED(r9)
- cmpdi r0, 0
- bne guest_exit_cont
clrrdi r3,r3,2
cmpldi r3,hcall_real_table_end - hcall_real_table
bge guest_exit_cont
@@ -2537,7 +2037,7 @@ hcall_real_table:
#else
.long 0 /* 0x2fc - H_XIRR_X*/
#endif
- .long DOTSYM(kvmppc_h_random) - hcall_real_table
+ .long DOTSYM(kvmppc_rm_h_random) - hcall_real_table
.globl hcall_real_table_end
hcall_real_table_end:
@@ -2668,13 +2168,9 @@ _GLOBAL(kvmppc_h_cede) /* r3 = vcpu pointer, r11 = msr, r13 = paca */
bl kvmppc_save_fp
#ifdef CONFIG_PPC_TRANSACTIONAL_MEM
-/*
- * Branch around the call if both CPU_FTR_TM and
- * CPU_FTR_P9_TM_HV_ASSIST are off.
- */
BEGIN_FTR_SECTION
b 91f
-END_FTR_SECTION(CPU_FTR_TM | CPU_FTR_P9_TM_HV_ASSIST, 0)
+END_FTR_SECTION_IFCLR(CPU_FTR_TM)
/*
* NOTE THAT THIS TRASHES ALL NON-VOLATILE REGISTERS (but not CR)
*/
@@ -2694,15 +2190,8 @@ END_FTR_SECTION(CPU_FTR_TM | CPU_FTR_P9_TM_HV_ASSIST, 0)
mfspr r3, SPRN_DEC
mfspr r4, SPRN_HDEC
mftb r5
-BEGIN_FTR_SECTION
- /* On P9 check whether the guest has large decrementer mode enabled */
- ld r6, HSTATE_KVM_VCORE(r13)
- ld r6, VCORE_LPCR(r6)
- andis. r6, r6, LPCR_LD@h
- bne 68f
-END_FTR_SECTION_IFSET(CPU_FTR_ARCH_300)
extsw r3, r3
-68: EXTEND_HDEC(r4)
+ extsw r4, r4
cmpd r3, r4
ble 67f
mtspr SPRN_DEC, r4
@@ -2747,28 +2236,11 @@ BEGIN_FTR_SECTION
END_FTR_SECTION_IFSET(CPU_FTR_ARCH_207S)
kvm_nap_sequence: /* desired LPCR value in r5 */
-BEGIN_FTR_SECTION
- /*
- * PSSCR bits: exit criterion = 1 (wakeup based on LPCR at sreset)
- * enable state loss = 1 (allow SMT mode switch)
- * requested level = 0 (just stop dispatching)
- */
- lis r3, (PSSCR_EC | PSSCR_ESL)@h
- /* Set LPCR_PECE_HVEE bit to enable wakeup by HV interrupts */
- li r4, LPCR_PECE_HVEE@higher
- sldi r4, r4, 32
- or r5, r5, r4
-FTR_SECTION_ELSE
li r3, PNV_THREAD_NAP
-ALT_FTR_SECTION_END_IFSET(CPU_FTR_ARCH_300)
mtspr SPRN_LPCR,r5
isync
-BEGIN_FTR_SECTION
- bl isa300_idle_stop_mayloss
-FTR_SECTION_ELSE
bl isa206_idle_insn_mayloss
-ALT_FTR_SECTION_END_IFSET(CPU_FTR_ARCH_300)
mfspr r0, SPRN_CTRLF
ori r0, r0, 1
@@ -2787,10 +2259,8 @@ ALT_FTR_SECTION_END_IFSET(CPU_FTR_ARCH_300)
beq kvm_end_cede
cmpwi r0, NAPPING_NOVCPU
beq kvm_novcpu_wakeup
-BEGIN_FTR_SECTION
cmpwi r0, NAPPING_UNSPLIT
beq kvm_unsplit_wakeup
-END_FTR_SECTION_IFCLR(CPU_FTR_ARCH_300)
twi 31,0,0 /* Nap state must not be zero */
33: mr r4, r3
@@ -2810,13 +2280,9 @@ kvm_end_cede:
#endif
#ifdef CONFIG_PPC_TRANSACTIONAL_MEM
-/*
- * Branch around the call if both CPU_FTR_TM and
- * CPU_FTR_P9_TM_HV_ASSIST are off.
- */
BEGIN_FTR_SECTION
b 91f
-END_FTR_SECTION(CPU_FTR_TM | CPU_FTR_P9_TM_HV_ASSIST, 0)
+END_FTR_SECTION_IFCLR(CPU_FTR_TM)
/*
* NOTE THAT THIS TRASHES ALL NON-VOLATILE REGISTERS (but not CR)
*/
@@ -2906,47 +2372,7 @@ kvm_cede_prodded:
/* we've ceded but we want to give control to the host */
kvm_cede_exit:
ld r9, HSTATE_KVM_VCPU(r13)
-#ifdef CONFIG_KVM_XICS
- /* are we using XIVE with single escalation? */
- ld r10, VCPU_XIVE_ESC_VADDR(r9)
- cmpdi r10, 0
- beq 3f
- li r6, XIVE_ESB_SET_PQ_00
- /*
- * If we still have a pending escalation, abort the cede,
- * and we must set PQ to 10 rather than 00 so that we don't
- * potentially end up with two entries for the escalation
- * interrupt in the XIVE interrupt queue. In that case
- * we also don't want to set xive_esc_on to 1 here in
- * case we race with xive_esc_irq().
- */
- lbz r5, VCPU_XIVE_ESC_ON(r9)
- cmpwi r5, 0
- beq 4f
- li r0, 0
- stb r0, VCPU_CEDED(r9)
- /*
- * The escalation interrupts are special as we don't EOI them.
- * There is no need to use the load-after-store ordering offset
- * to set PQ to 10 as we won't use StoreEOI.
- */
- li r6, XIVE_ESB_SET_PQ_10
- b 5f
-4: li r0, 1
- stb r0, VCPU_XIVE_ESC_ON(r9)
- /* make sure store to xive_esc_on is seen before xive_esc_irq runs */
- sync
-5: /* Enable XIVE escalation */
- mfmsr r0
- andi. r0, r0, MSR_DR /* in real mode? */
- beq 1f
- ldx r0, r10, r6
- b 2f
-1: ld r10, VCPU_XIVE_ESC_RADDR(r9)
- ldcix r0, r10, r6
-2: sync
-#endif /* CONFIG_KVM_XICS */
-3: b guest_exit_cont
+ b guest_exit_cont
/* Try to do machine check recovery in real mode */
machine_check_realmode:
@@ -3023,10 +2449,6 @@ END_FTR_SECTION_IFSET(CPU_FTR_ARCH_207S)
PPC_MSGCLR(6)
/* see if it's a host IPI */
li r3, 1
-BEGIN_FTR_SECTION
- PPC_MSGSYNC
- lwsync
-END_FTR_SECTION_IFSET(CPU_FTR_ARCH_300)
lbz r0, HSTATE_HOST_IPI(r13)
cmpwi r0, 0
bnelr
@@ -3335,73 +2757,12 @@ kvmppc_bad_host_intr:
std r3, STACK_FRAME_OVERHEAD-16(r1)
/*
- * On POWER9 do a minimal restore of the MMU and call C code,
- * which will print a message and panic.
* XXX On POWER7 and POWER8, we just spin here since we don't
* know what the other threads are doing (and we don't want to
* coordinate with them) - but at least we now have register state
* in memory that we might be able to look at from another CPU.
*/
-BEGIN_FTR_SECTION
b .
-END_FTR_SECTION_IFCLR(CPU_FTR_ARCH_300)
- ld r9, HSTATE_KVM_VCPU(r13)
- ld r10, VCPU_KVM(r9)
-
- li r0, 0
- mtspr SPRN_AMR, r0
- mtspr SPRN_IAMR, r0
- mtspr SPRN_CIABR, r0
- mtspr SPRN_DAWRX0, r0
-BEGIN_FTR_SECTION
- mtspr SPRN_DAWRX1, r0
-END_FTR_SECTION_IFSET(CPU_FTR_DAWR1)
-
- /* Clear hash and radix guest SLB, see guest_exit_short_path comment. */
- slbmte r0, r0
- PPC_SLBIA(6)
-
-BEGIN_MMU_FTR_SECTION
- b 4f
-END_MMU_FTR_SECTION_IFSET(MMU_FTR_TYPE_RADIX)
-
- ptesync
- ld r8, PACA_SLBSHADOWPTR(r13)
- .rept SLB_NUM_BOLTED
- li r3, SLBSHADOW_SAVEAREA
- LDX_BE r5, r8, r3
- addi r3, r3, 8
- LDX_BE r6, r8, r3
- andis. r7, r5, SLB_ESID_V@h
- beq 3f
- slbmte r6, r5
-3: addi r8, r8, 16
- .endr
-
-4: lwz r7, KVM_HOST_LPID(r10)
- mtspr SPRN_LPID, r7
- mtspr SPRN_PID, r0
- ld r8, KVM_HOST_LPCR(r10)
- mtspr SPRN_LPCR, r8
- isync
- li r0, KVM_GUEST_MODE_NONE
- stb r0, HSTATE_IN_GUEST(r13)
-
- /*
- * Turn on the MMU and jump to C code
- */
- bcl 20, 31, .+4
-5: mflr r3
- addi r3, r3, 9f - 5b
- li r4, -1
- rldimi r3, r4, 62, 0 /* ensure 0xc000000000000000 bits are set */
- ld r4, PACAKMSR(r13)
- mtspr SPRN_SRR0, r3
- mtspr SPRN_SRR1, r4
- RFI_TO_KERNEL
-9: addi r3, r1, STACK_FRAME_OVERHEAD
- bl kvmppc_bad_interrupt
- b 9b
/*
* This mimics the MSR transition on IRQ delivery. The new guest MSR is taken
diff --git a/arch/powerpc/kvm/book3s_hv_uvmem.c b/arch/powerpc/kvm/book3s_hv_uvmem.c
index 84e5a2dc8be5..a7061ee3b157 100644
--- a/arch/powerpc/kvm/book3s_hv_uvmem.c
+++ b/arch/powerpc/kvm/book3s_hv_uvmem.c
@@ -90,6 +90,7 @@
#include <linux/migrate.h>
#include <linux/kvm_host.h>
#include <linux/ksm.h>
+#include <linux/of.h>
#include <asm/ultravisor.h>
#include <asm/mman.h>
#include <asm/kvm_ppc.h>
@@ -614,7 +615,7 @@ void kvmppc_uvmem_drop_pages(const struct kvm_memory_slot *slot,
/* Fetch the VMA if addr is not in the latest fetched one */
if (!vma || addr >= vma->vm_end) {
- vma = find_vma_intersection(kvm->mm, addr, addr+1);
+ vma = vma_lookup(kvm->mm, addr);
if (!vma) {
pr_err("Can't find VMA for gfn:0x%lx\n", gfn);
break;
diff --git a/arch/powerpc/kvm/book3s_pr.c b/arch/powerpc/kvm/book3s_pr.c
index d7733b07f489..71bcb0140461 100644
--- a/arch/powerpc/kvm/book3s_pr.c
+++ b/arch/powerpc/kvm/book3s_pr.c
@@ -493,7 +493,7 @@ static void kvmppc_set_msr_pr(struct kvm_vcpu *vcpu, u64 msr)
if (!vcpu->arch.pending_exceptions) {
kvm_vcpu_block(vcpu);
kvm_clear_request(KVM_REQ_UNHALT, vcpu);
- vcpu->stat.halt_wakeup++;
+ vcpu->stat.generic.halt_wakeup++;
/* Unset POW bit after we woke up */
msr &= ~MSR_POW;
diff --git a/arch/powerpc/kvm/book3s_pr_papr.c b/arch/powerpc/kvm/book3s_pr_papr.c
index 031c8015864a..ac14239f3424 100644
--- a/arch/powerpc/kvm/book3s_pr_papr.c
+++ b/arch/powerpc/kvm/book3s_pr_papr.c
@@ -378,7 +378,7 @@ int kvmppc_h_pr(struct kvm_vcpu *vcpu, unsigned long cmd)
kvmppc_set_msr_fast(vcpu, kvmppc_get_msr(vcpu) | MSR_EE);
kvm_vcpu_block(vcpu);
kvm_clear_request(KVM_REQ_UNHALT, vcpu);
- vcpu->stat.halt_wakeup++;
+ vcpu->stat.generic.halt_wakeup++;
return EMULATE_DONE;
case H_LOGICAL_CI_LOAD:
return kvmppc_h_pr_logical_ci_load(vcpu);
diff --git a/arch/powerpc/kvm/book3s_segment.S b/arch/powerpc/kvm/book3s_segment.S
index 1f492aa4c8d6..202046a83fc1 100644
--- a/arch/powerpc/kvm/book3s_segment.S
+++ b/arch/powerpc/kvm/book3s_segment.S
@@ -164,12 +164,15 @@ kvmppc_interrupt_pr:
/* 64-bit entry. Register usage at this point:
*
* SPRG_SCRATCH0 = guest R13
+ * R9 = HSTATE_IN_GUEST
* R12 = (guest CR << 32) | exit handler id
* R13 = PACA
* HSTATE.SCRATCH0 = guest R12
+ * HSTATE.SCRATCH2 = guest R9
*/
#ifdef CONFIG_PPC64
/* Match 32-bit entry */
+ ld r9,HSTATE_SCRATCH2(r13)
rotldi r12, r12, 32 /* Flip R12 halves for stw */
stw r12, HSTATE_SCRATCH1(r13) /* CR is now in the low half */
srdi r12, r12, 32 /* shift trap into low half */
diff --git a/arch/powerpc/kvm/book3s_xive.c b/arch/powerpc/kvm/book3s_xive.c
index e7219b6f5f9a..8cfab3547494 100644
--- a/arch/powerpc/kvm/book3s_xive.c
+++ b/arch/powerpc/kvm/book3s_xive.c
@@ -14,6 +14,7 @@
#include <linux/percpu.h>
#include <linux/cpumask.h>
#include <linux/uaccess.h>
+#include <linux/irqdomain.h>
#include <asm/kvm_book3s.h>
#include <asm/kvm_ppc.h>
#include <asm/hvcall.h>
@@ -128,6 +129,71 @@ void kvmppc_xive_push_vcpu(struct kvm_vcpu *vcpu)
EXPORT_SYMBOL_GPL(kvmppc_xive_push_vcpu);
/*
+ * Pull a vcpu's context from the XIVE on guest exit.
+ * This assumes we are in virtual mode (MMU on)
+ */
+void kvmppc_xive_pull_vcpu(struct kvm_vcpu *vcpu)
+{
+ void __iomem *tima = local_paca->kvm_hstate.xive_tima_virt;
+
+ if (!vcpu->arch.xive_pushed)
+ return;
+
+ /*
+ * Should not have been pushed if there is no tima
+ */
+ if (WARN_ON(!tima))
+ return;
+
+ eieio();
+ /* First load to pull the context, we ignore the value */
+ __raw_readl(tima + TM_SPC_PULL_OS_CTX);
+ /* Second load to recover the context state (Words 0 and 1) */
+ vcpu->arch.xive_saved_state.w01 = __raw_readq(tima + TM_QW1_OS);
+
+ /* Fixup some of the state for the next load */
+ vcpu->arch.xive_saved_state.lsmfb = 0;
+ vcpu->arch.xive_saved_state.ack = 0xff;
+ vcpu->arch.xive_pushed = 0;
+ eieio();
+}
+EXPORT_SYMBOL_GPL(kvmppc_xive_pull_vcpu);
+
+void kvmppc_xive_rearm_escalation(struct kvm_vcpu *vcpu)
+{
+ void __iomem *esc_vaddr = (void __iomem *)vcpu->arch.xive_esc_vaddr;
+
+ if (!esc_vaddr)
+ return;
+
+ /* we are using XIVE with single escalation */
+
+ if (vcpu->arch.xive_esc_on) {
+ /*
+ * If we still have a pending escalation, abort the cede,
+ * and we must set PQ to 10 rather than 00 so that we don't
+ * potentially end up with two entries for the escalation
+ * interrupt in the XIVE interrupt queue. In that case
+ * we also don't want to set xive_esc_on to 1 here in
+ * case we race with xive_esc_irq().
+ */
+ vcpu->arch.ceded = 0;
+ /*
+ * The escalation interrupts are special as we don't EOI them.
+ * There is no need to use the load-after-store ordering offset
+ * to set PQ to 10 as we won't use StoreEOI.
+ */
+ __raw_readq(esc_vaddr + XIVE_ESB_SET_PQ_10);
+ } else {
+ vcpu->arch.xive_esc_on = true;
+ mb();
+ __raw_readq(esc_vaddr + XIVE_ESB_SET_PQ_00);
+ }
+ mb();
+}
+EXPORT_SYMBOL_GPL(kvmppc_xive_rearm_escalation);
+
+/*
* This is a simple trigger for a generic XIVE IRQ. This must
* only be called for interrupts that support a trigger page
*/
@@ -2075,6 +2141,36 @@ static int kvmppc_xive_create(struct kvm_device *dev, u32 type)
return 0;
}
+int kvmppc_xive_xics_hcall(struct kvm_vcpu *vcpu, u32 req)
+{
+ struct kvmppc_vcore *vc = vcpu->arch.vcore;
+
+ /* The VM should have configured XICS mode before doing XICS hcalls. */
+ if (!kvmppc_xics_enabled(vcpu))
+ return H_TOO_HARD;
+
+ switch (req) {
+ case H_XIRR:
+ return xive_vm_h_xirr(vcpu);
+ case H_CPPR:
+ return xive_vm_h_cppr(vcpu, kvmppc_get_gpr(vcpu, 4));
+ case H_EOI:
+ return xive_vm_h_eoi(vcpu, kvmppc_get_gpr(vcpu, 4));
+ case H_IPI:
+ return xive_vm_h_ipi(vcpu, kvmppc_get_gpr(vcpu, 4),
+ kvmppc_get_gpr(vcpu, 5));
+ case H_IPOLL:
+ return xive_vm_h_ipoll(vcpu, kvmppc_get_gpr(vcpu, 4));
+ case H_XIRR_X:
+ xive_vm_h_xirr(vcpu);
+ kvmppc_set_gpr(vcpu, 5, get_tb() + vc->tb_offset);
+ return H_SUCCESS;
+ }
+
+ return H_UNSUPPORTED;
+}
+EXPORT_SYMBOL_GPL(kvmppc_xive_xics_hcall);
+
int kvmppc_xive_debug_show_queues(struct seq_file *m, struct kvm_vcpu *vcpu)
{
struct kvmppc_xive_vcpu *xc = vcpu->arch.xive_vcpu;
@@ -2257,21 +2353,3 @@ struct kvm_device_ops kvm_xive_ops = {
.get_attr = xive_get_attr,
.has_attr = xive_has_attr,
};
-
-void kvmppc_xive_init_module(void)
-{
- __xive_vm_h_xirr = xive_vm_h_xirr;
- __xive_vm_h_ipoll = xive_vm_h_ipoll;
- __xive_vm_h_ipi = xive_vm_h_ipi;
- __xive_vm_h_cppr = xive_vm_h_cppr;
- __xive_vm_h_eoi = xive_vm_h_eoi;
-}
-
-void kvmppc_xive_exit_module(void)
-{
- __xive_vm_h_xirr = NULL;
- __xive_vm_h_ipoll = NULL;
- __xive_vm_h_ipi = NULL;
- __xive_vm_h_cppr = NULL;
- __xive_vm_h_eoi = NULL;
-}
diff --git a/arch/powerpc/kvm/book3s_xive.h b/arch/powerpc/kvm/book3s_xive.h
index 86c24a4ad809..afe9eeac6d56 100644
--- a/arch/powerpc/kvm/book3s_xive.h
+++ b/arch/powerpc/kvm/book3s_xive.h
@@ -289,13 +289,6 @@ extern int xive_rm_h_ipi(struct kvm_vcpu *vcpu, unsigned long server,
extern int xive_rm_h_cppr(struct kvm_vcpu *vcpu, unsigned long cppr);
extern int xive_rm_h_eoi(struct kvm_vcpu *vcpu, unsigned long xirr);
-extern unsigned long (*__xive_vm_h_xirr)(struct kvm_vcpu *vcpu);
-extern unsigned long (*__xive_vm_h_ipoll)(struct kvm_vcpu *vcpu, unsigned long server);
-extern int (*__xive_vm_h_ipi)(struct kvm_vcpu *vcpu, unsigned long server,
- unsigned long mfrr);
-extern int (*__xive_vm_h_cppr)(struct kvm_vcpu *vcpu, unsigned long cppr);
-extern int (*__xive_vm_h_eoi)(struct kvm_vcpu *vcpu, unsigned long xirr);
-
/*
* Common Xive routines for XICS-over-XIVE and XIVE native
*/
diff --git a/arch/powerpc/kvm/book3s_xive_native.c b/arch/powerpc/kvm/book3s_xive_native.c
index 76800c84f2a3..573ecaab3597 100644
--- a/arch/powerpc/kvm/book3s_xive_native.c
+++ b/arch/powerpc/kvm/book3s_xive_native.c
@@ -12,6 +12,7 @@
#include <linux/spinlock.h>
#include <linux/delay.h>
#include <linux/file.h>
+#include <linux/irqdomain.h>
#include <asm/uaccess.h>
#include <asm/kvm_book3s.h>
#include <asm/kvm_ppc.h>
@@ -1281,13 +1282,3 @@ struct kvm_device_ops kvm_xive_native_ops = {
.has_attr = kvmppc_xive_native_has_attr,
.mmap = kvmppc_xive_native_mmap,
};
-
-void kvmppc_xive_native_init_module(void)
-{
- ;
-}
-
-void kvmppc_xive_native_exit_module(void)
-{
- ;
-}
diff --git a/arch/powerpc/kvm/booke.c b/arch/powerpc/kvm/booke.c
index 7d5fe43f85c4..551b30d84aee 100644
--- a/arch/powerpc/kvm/booke.c
+++ b/arch/powerpc/kvm/booke.c
@@ -36,29 +36,59 @@
unsigned long kvmppc_booke_handlers;
-struct kvm_stats_debugfs_item debugfs_entries[] = {
- VCPU_STAT("mmio", mmio_exits),
- VCPU_STAT("sig", signal_exits),
- VCPU_STAT("itlb_r", itlb_real_miss_exits),
- VCPU_STAT("itlb_v", itlb_virt_miss_exits),
- VCPU_STAT("dtlb_r", dtlb_real_miss_exits),
- VCPU_STAT("dtlb_v", dtlb_virt_miss_exits),
- VCPU_STAT("sysc", syscall_exits),
- VCPU_STAT("isi", isi_exits),
- VCPU_STAT("dsi", dsi_exits),
- VCPU_STAT("inst_emu", emulated_inst_exits),
- VCPU_STAT("dec", dec_exits),
- VCPU_STAT("ext_intr", ext_intr_exits),
- VCPU_STAT("halt_successful_poll", halt_successful_poll),
- VCPU_STAT("halt_attempted_poll", halt_attempted_poll),
- VCPU_STAT("halt_poll_invalid", halt_poll_invalid),
- VCPU_STAT("halt_wakeup", halt_wakeup),
- VCPU_STAT("doorbell", dbell_exits),
- VCPU_STAT("guest doorbell", gdbell_exits),
- VCPU_STAT("halt_poll_success_ns", halt_poll_success_ns),
- VCPU_STAT("halt_poll_fail_ns", halt_poll_fail_ns),
- VM_STAT("remote_tlb_flush", remote_tlb_flush),
- { NULL }
+const struct _kvm_stats_desc kvm_vm_stats_desc[] = {
+ KVM_GENERIC_VM_STATS(),
+ STATS_DESC_ICOUNTER(VM, num_2M_pages),
+ STATS_DESC_ICOUNTER(VM, num_1G_pages)
+};
+static_assert(ARRAY_SIZE(kvm_vm_stats_desc) ==
+ sizeof(struct kvm_vm_stat) / sizeof(u64));
+
+const struct kvm_stats_header kvm_vm_stats_header = {
+ .name_size = KVM_STATS_NAME_SIZE,
+ .num_desc = ARRAY_SIZE(kvm_vm_stats_desc),
+ .id_offset = sizeof(struct kvm_stats_header),
+ .desc_offset = sizeof(struct kvm_stats_header) + KVM_STATS_NAME_SIZE,
+ .data_offset = sizeof(struct kvm_stats_header) + KVM_STATS_NAME_SIZE +
+ sizeof(kvm_vm_stats_desc),
+};
+
+const struct _kvm_stats_desc kvm_vcpu_stats_desc[] = {
+ KVM_GENERIC_VCPU_STATS(),
+ STATS_DESC_COUNTER(VCPU, sum_exits),
+ STATS_DESC_COUNTER(VCPU, mmio_exits),
+ STATS_DESC_COUNTER(VCPU, signal_exits),
+ STATS_DESC_COUNTER(VCPU, light_exits),
+ STATS_DESC_COUNTER(VCPU, itlb_real_miss_exits),
+ STATS_DESC_COUNTER(VCPU, itlb_virt_miss_exits),
+ STATS_DESC_COUNTER(VCPU, dtlb_real_miss_exits),
+ STATS_DESC_COUNTER(VCPU, dtlb_virt_miss_exits),
+ STATS_DESC_COUNTER(VCPU, syscall_exits),
+ STATS_DESC_COUNTER(VCPU, isi_exits),
+ STATS_DESC_COUNTER(VCPU, dsi_exits),
+ STATS_DESC_COUNTER(VCPU, emulated_inst_exits),
+ STATS_DESC_COUNTER(VCPU, dec_exits),
+ STATS_DESC_COUNTER(VCPU, ext_intr_exits),
+ STATS_DESC_TIME_NSEC(VCPU, halt_wait_ns),
+ STATS_DESC_COUNTER(VCPU, halt_successful_wait),
+ STATS_DESC_COUNTER(VCPU, dbell_exits),
+ STATS_DESC_COUNTER(VCPU, gdbell_exits),
+ STATS_DESC_COUNTER(VCPU, ld),
+ STATS_DESC_COUNTER(VCPU, st),
+ STATS_DESC_COUNTER(VCPU, pthru_all),
+ STATS_DESC_COUNTER(VCPU, pthru_host),
+ STATS_DESC_COUNTER(VCPU, pthru_bad_aff)
+};
+static_assert(ARRAY_SIZE(kvm_vcpu_stats_desc) ==
+ sizeof(struct kvm_vcpu_stat) / sizeof(u64));
+
+const struct kvm_stats_header kvm_vcpu_stats_header = {
+ .name_size = KVM_STATS_NAME_SIZE,
+ .num_desc = ARRAY_SIZE(kvm_vcpu_stats_desc),
+ .id_offset = sizeof(struct kvm_stats_header),
+ .desc_offset = sizeof(struct kvm_stats_header) + KVM_STATS_NAME_SIZE,
+ .data_offset = sizeof(struct kvm_stats_header) + KVM_STATS_NAME_SIZE +
+ sizeof(kvm_vcpu_stats_desc),
};
/* TODO: use vcpu_printf() */
diff --git a/arch/powerpc/kvm/powerpc.c b/arch/powerpc/kvm/powerpc.c
index a2a68a958fa0..be33b5321a76 100644
--- a/arch/powerpc/kvm/powerpc.c
+++ b/arch/powerpc/kvm/powerpc.c
@@ -682,6 +682,9 @@ int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext)
r = !!(hv_enabled && kvmppc_hv_ops->enable_dawr1 &&
!kvmppc_hv_ops->enable_dawr1(NULL));
break;
+ case KVM_CAP_PPC_RPT_INVALIDATE:
+ r = 1;
+ break;
#endif
default:
r = 0;
diff --git a/arch/powerpc/lib/feature-fixups.c b/arch/powerpc/lib/feature-fixups.c
index 1fd31b4b0e13..fe26f2fa0f3f 100644
--- a/arch/powerpc/lib/feature-fixups.c
+++ b/arch/powerpc/lib/feature-fixups.c
@@ -14,6 +14,7 @@
#include <linux/string.h>
#include <linux/init.h>
#include <linux/sched/mm.h>
+#include <linux/stop_machine.h>
#include <asm/cputable.h>
#include <asm/code-patching.h>
#include <asm/page.h>
@@ -149,17 +150,17 @@ static void do_stf_entry_barrier_fixups(enum stf_barrier_type types)
pr_devel("patching dest %lx\n", (unsigned long)dest);
- patch_instruction((struct ppc_inst *)dest, ppc_inst(instrs[0]));
-
- if (types & STF_BARRIER_FALLBACK)
+ // See comment in do_entry_flush_fixups() RE order of patching
+ if (types & STF_BARRIER_FALLBACK) {
+ patch_instruction((struct ppc_inst *)dest, ppc_inst(instrs[0]));
+ patch_instruction((struct ppc_inst *)(dest + 2), ppc_inst(instrs[2]));
patch_branch((struct ppc_inst *)(dest + 1),
- (unsigned long)&stf_barrier_fallback,
- BRANCH_SET_LINK);
- else
- patch_instruction((struct ppc_inst *)(dest + 1),
- ppc_inst(instrs[1]));
-
- patch_instruction((struct ppc_inst *)(dest + 2), ppc_inst(instrs[2]));
+ (unsigned long)&stf_barrier_fallback, BRANCH_SET_LINK);
+ } else {
+ patch_instruction((struct ppc_inst *)(dest + 1), ppc_inst(instrs[1]));
+ patch_instruction((struct ppc_inst *)(dest + 2), ppc_inst(instrs[2]));
+ patch_instruction((struct ppc_inst *)dest, ppc_inst(instrs[0]));
+ }
}
printk(KERN_DEBUG "stf-barrier: patched %d entry locations (%s barrier)\n", i,
@@ -227,11 +228,25 @@ static void do_stf_exit_barrier_fixups(enum stf_barrier_type types)
: "unknown");
}
+static int __do_stf_barrier_fixups(void *data)
+{
+ enum stf_barrier_type *types = data;
+
+ do_stf_entry_barrier_fixups(*types);
+ do_stf_exit_barrier_fixups(*types);
+
+ return 0;
+}
void do_stf_barrier_fixups(enum stf_barrier_type types)
{
- do_stf_entry_barrier_fixups(types);
- do_stf_exit_barrier_fixups(types);
+ /*
+ * The call to the fallback entry flush, and the fallback/sync-ori exit
+ * flush can not be safely patched in/out while other CPUs are executing
+ * them. So call __do_stf_barrier_fixups() on one CPU while all other CPUs
+ * spin in the stop machine core with interrupts hard disabled.
+ */
+ stop_machine(__do_stf_barrier_fixups, &types, NULL);
}
void do_uaccess_flush_fixups(enum l1d_flush_type types)
@@ -284,8 +299,9 @@ void do_uaccess_flush_fixups(enum l1d_flush_type types)
: "unknown");
}
-void do_entry_flush_fixups(enum l1d_flush_type types)
+static int __do_entry_flush_fixups(void *data)
{
+ enum l1d_flush_type types = *(enum l1d_flush_type *)data;
unsigned int instrs[3], *dest;
long *start, *end;
int i;
@@ -309,6 +325,31 @@ void do_entry_flush_fixups(enum l1d_flush_type types)
if (types & L1D_FLUSH_MTTRIG)
instrs[i++] = 0x7c12dba6; /* mtspr TRIG2,r0 (SPR #882) */
+ /*
+ * If we're patching in or out the fallback flush we need to be careful about the
+ * order in which we patch instructions. That's because it's possible we could
+ * take a page fault after patching one instruction, so the sequence of
+ * instructions must be safe even in a half patched state.
+ *
+ * To make that work, when patching in the fallback flush we patch in this order:
+ * - the mflr (dest)
+ * - the mtlr (dest + 2)
+ * - the branch (dest + 1)
+ *
+ * That ensures the sequence is safe to execute at any point. In contrast if we
+ * patch the mtlr last, it's possible we could return from the branch and not
+ * restore LR, leading to a crash later.
+ *
+ * When patching out the fallback flush (either with nops or another flush type),
+ * we patch in this order:
+ * - the branch (dest + 1)
+ * - the mtlr (dest + 2)
+ * - the mflr (dest)
+ *
+ * Note we are protected by stop_machine() from other CPUs executing the code in a
+ * semi-patched state.
+ */
+
start = PTRRELOC(&__start___entry_flush_fixup);
end = PTRRELOC(&__stop___entry_flush_fixup);
for (i = 0; start < end; start++, i++) {
@@ -316,15 +357,16 @@ void do_entry_flush_fixups(enum l1d_flush_type types)
pr_devel("patching dest %lx\n", (unsigned long)dest);
- patch_instruction((struct ppc_inst *)dest, ppc_inst(instrs[0]));
-
- if (types == L1D_FLUSH_FALLBACK)
- patch_branch((struct ppc_inst *)(dest + 1), (unsigned long)&entry_flush_fallback,
- BRANCH_SET_LINK);
- else
+ if (types == L1D_FLUSH_FALLBACK) {
+ patch_instruction((struct ppc_inst *)dest, ppc_inst(instrs[0]));
+ patch_instruction((struct ppc_inst *)(dest + 2), ppc_inst(instrs[2]));
+ patch_branch((struct ppc_inst *)(dest + 1),
+ (unsigned long)&entry_flush_fallback, BRANCH_SET_LINK);
+ } else {
patch_instruction((struct ppc_inst *)(dest + 1), ppc_inst(instrs[1]));
-
- patch_instruction((struct ppc_inst *)(dest + 2), ppc_inst(instrs[2]));
+ patch_instruction((struct ppc_inst *)(dest + 2), ppc_inst(instrs[2]));
+ patch_instruction((struct ppc_inst *)dest, ppc_inst(instrs[0]));
+ }
}
start = PTRRELOC(&__start___scv_entry_flush_fixup);
@@ -334,15 +376,16 @@ void do_entry_flush_fixups(enum l1d_flush_type types)
pr_devel("patching dest %lx\n", (unsigned long)dest);
- patch_instruction((struct ppc_inst *)dest, ppc_inst(instrs[0]));
-
- if (types == L1D_FLUSH_FALLBACK)
- patch_branch((struct ppc_inst *)(dest + 1), (unsigned long)&scv_entry_flush_fallback,
- BRANCH_SET_LINK);
- else
+ if (types == L1D_FLUSH_FALLBACK) {
+ patch_instruction((struct ppc_inst *)dest, ppc_inst(instrs[0]));
+ patch_instruction((struct ppc_inst *)(dest + 2), ppc_inst(instrs[2]));
+ patch_branch((struct ppc_inst *)(dest + 1),
+ (unsigned long)&scv_entry_flush_fallback, BRANCH_SET_LINK);
+ } else {
patch_instruction((struct ppc_inst *)(dest + 1), ppc_inst(instrs[1]));
-
- patch_instruction((struct ppc_inst *)(dest + 2), ppc_inst(instrs[2]));
+ patch_instruction((struct ppc_inst *)(dest + 2), ppc_inst(instrs[2]));
+ patch_instruction((struct ppc_inst *)dest, ppc_inst(instrs[0]));
+ }
}
@@ -354,6 +397,19 @@ void do_entry_flush_fixups(enum l1d_flush_type types)
: "ori type" :
(types & L1D_FLUSH_MTTRIG) ? "mttrig type"
: "unknown");
+
+ return 0;
+}
+
+void do_entry_flush_fixups(enum l1d_flush_type types)
+{
+ /*
+ * The call to the fallback flush can not be safely patched in/out while
+ * other CPUs are executing it. So call __do_entry_flush_fixups() on one
+ * CPU while all other CPUs spin in the stop machine core with interrupts
+ * hard disabled.
+ */
+ stop_machine(__do_entry_flush_fixups, &types, NULL);
}
void do_rfi_flush_fixups(enum l1d_flush_type types)
diff --git a/arch/powerpc/mm/Makefile b/arch/powerpc/mm/Makefile
index c3df3a8501d4..2ffcf540f08b 100644
--- a/arch/powerpc/mm/Makefile
+++ b/arch/powerpc/mm/Makefile
@@ -13,7 +13,7 @@ obj-y := fault.o mem.o pgtable.o mmap.o maccess.o \
obj-$(CONFIG_PPC_MMU_NOHASH) += nohash/
obj-$(CONFIG_PPC_BOOK3S_32) += book3s32/
obj-$(CONFIG_PPC_BOOK3S_64) += book3s64/
-obj-$(CONFIG_NEED_MULTIPLE_NODES) += numa.o
+obj-$(CONFIG_NUMA) += numa.o
obj-$(CONFIG_PPC_MM_SLICES) += slice.o
obj-$(CONFIG_HUGETLB_PAGE) += hugetlbpage.o
obj-$(CONFIG_NOT_COHERENT_CACHE) += dma-noncoherent.o
diff --git a/arch/powerpc/mm/book3s64/radix_pgtable.c b/arch/powerpc/mm/book3s64/radix_pgtable.c
index 5fef8db3b463..2176a5f70746 100644
--- a/arch/powerpc/mm/book3s64/radix_pgtable.c
+++ b/arch/powerpc/mm/book3s64/radix_pgtable.c
@@ -11,6 +11,7 @@
#include <linux/kernel.h>
#include <linux/sched/mm.h>
#include <linux/memblock.h>
+#include <linux/of.h>
#include <linux/of_fdt.h>
#include <linux/mm.h>
#include <linux/hugetlb.h>
@@ -357,30 +358,19 @@ static void __init radix_init_pgtable(void)
}
/* Find out how many PID bits are supported */
- if (!cpu_has_feature(CPU_FTR_P9_RADIX_PREFETCH_BUG)) {
- if (!mmu_pid_bits)
- mmu_pid_bits = 20;
- mmu_base_pid = 1;
- } else if (cpu_has_feature(CPU_FTR_HVMODE)) {
- if (!mmu_pid_bits)
- mmu_pid_bits = 20;
-#ifdef CONFIG_KVM_BOOK3S_HV_POSSIBLE
+ if (!cpu_has_feature(CPU_FTR_HVMODE) &&
+ cpu_has_feature(CPU_FTR_P9_RADIX_PREFETCH_BUG)) {
/*
- * When KVM is possible, we only use the top half of the
- * PID space to avoid collisions between host and guest PIDs
- * which can cause problems due to prefetch when exiting the
- * guest with AIL=3
+ * Older versions of KVM on these machines perfer if the
+ * guest only uses the low 19 PID bits.
*/
- mmu_base_pid = 1 << (mmu_pid_bits - 1);
-#else
- mmu_base_pid = 1;
-#endif
- } else {
- /* The guest uses the bottom half of the PID space */
if (!mmu_pid_bits)
mmu_pid_bits = 19;
- mmu_base_pid = 1;
+ } else {
+ if (!mmu_pid_bits)
+ mmu_pid_bits = 20;
}
+ mmu_base_pid = 1;
/*
* Allocate Partition table and process table for the
@@ -486,6 +476,7 @@ static int __init radix_dt_scan_page_sizes(unsigned long node,
def = &mmu_psize_defs[idx];
def->shift = shift;
def->ap = ap;
+ def->h_rpt_pgsize = psize_to_rpti_pgsize(idx);
}
/* needed ? */
@@ -560,9 +551,13 @@ void __init radix__early_init_devtree(void)
*/
mmu_psize_defs[MMU_PAGE_4K].shift = 12;
mmu_psize_defs[MMU_PAGE_4K].ap = 0x0;
+ mmu_psize_defs[MMU_PAGE_4K].h_rpt_pgsize =
+ psize_to_rpti_pgsize(MMU_PAGE_4K);
mmu_psize_defs[MMU_PAGE_64K].shift = 16;
mmu_psize_defs[MMU_PAGE_64K].ap = 0x5;
+ mmu_psize_defs[MMU_PAGE_64K].h_rpt_pgsize =
+ psize_to_rpti_pgsize(MMU_PAGE_64K);
}
/*
diff --git a/arch/powerpc/mm/book3s64/radix_tlb.c b/arch/powerpc/mm/book3s64/radix_tlb.c
index 409e61210789..318ec4f33661 100644
--- a/arch/powerpc/mm/book3s64/radix_tlb.c
+++ b/arch/powerpc/mm/book3s64/radix_tlb.c
@@ -20,10 +20,6 @@
#include "internal.h"
-#define RIC_FLUSH_TLB 0
-#define RIC_FLUSH_PWC 1
-#define RIC_FLUSH_ALL 2
-
/*
* tlbiel instruction for radix, set invalidation
* i.e., r=1 and is=01 or is=10 or is=11
@@ -130,6 +126,21 @@ static __always_inline void __tlbie_pid(unsigned long pid, unsigned long ric)
trace_tlbie(0, 0, rb, rs, ric, prs, r);
}
+static __always_inline void __tlbie_pid_lpid(unsigned long pid,
+ unsigned long lpid,
+ unsigned long ric)
+{
+ unsigned long rb, rs, prs, r;
+
+ rb = PPC_BIT(53); /* IS = 1 */
+ rs = (pid << PPC_BITLSHIFT(31)) | (lpid & ~(PPC_BITMASK(0, 31)));
+ prs = 1; /* process scoped */
+ r = 1; /* radix format */
+
+ asm volatile(PPC_TLBIE_5(%0, %4, %3, %2, %1)
+ : : "r"(rb), "i"(r), "i"(prs), "i"(ric), "r"(rs) : "memory");
+ trace_tlbie(0, 0, rb, rs, ric, prs, r);
+}
static __always_inline void __tlbie_lpid(unsigned long lpid, unsigned long ric)
{
unsigned long rb,rs,prs,r;
@@ -190,6 +201,23 @@ static __always_inline void __tlbie_va(unsigned long va, unsigned long pid,
trace_tlbie(0, 0, rb, rs, ric, prs, r);
}
+static __always_inline void __tlbie_va_lpid(unsigned long va, unsigned long pid,
+ unsigned long lpid,
+ unsigned long ap, unsigned long ric)
+{
+ unsigned long rb, rs, prs, r;
+
+ rb = va & ~(PPC_BITMASK(52, 63));
+ rb |= ap << PPC_BITLSHIFT(58);
+ rs = (pid << PPC_BITLSHIFT(31)) | (lpid & ~(PPC_BITMASK(0, 31)));
+ prs = 1; /* process scoped */
+ r = 1; /* radix format */
+
+ asm volatile(PPC_TLBIE_5(%0, %4, %3, %2, %1)
+ : : "r"(rb), "i"(r), "i"(prs), "i"(ric), "r"(rs) : "memory");
+ trace_tlbie(0, 0, rb, rs, ric, prs, r);
+}
+
static __always_inline void __tlbie_lpid_va(unsigned long va, unsigned long lpid,
unsigned long ap, unsigned long ric)
{
@@ -235,6 +263,22 @@ static inline void fixup_tlbie_va_range(unsigned long va, unsigned long pid,
}
}
+static inline void fixup_tlbie_va_range_lpid(unsigned long va,
+ unsigned long pid,
+ unsigned long lpid,
+ unsigned long ap)
+{
+ if (cpu_has_feature(CPU_FTR_P9_TLBIE_ERAT_BUG)) {
+ asm volatile("ptesync" : : : "memory");
+ __tlbie_pid_lpid(0, lpid, RIC_FLUSH_TLB);
+ }
+
+ if (cpu_has_feature(CPU_FTR_P9_TLBIE_STQ_BUG)) {
+ asm volatile("ptesync" : : : "memory");
+ __tlbie_va_lpid(va, pid, lpid, ap, RIC_FLUSH_TLB);
+ }
+}
+
static inline void fixup_tlbie_pid(unsigned long pid)
{
/*
@@ -254,6 +298,25 @@ static inline void fixup_tlbie_pid(unsigned long pid)
}
}
+static inline void fixup_tlbie_pid_lpid(unsigned long pid, unsigned long lpid)
+{
+ /*
+ * We can use any address for the invalidation, pick one which is
+ * probably unused as an optimisation.
+ */
+ unsigned long va = ((1UL << 52) - 1);
+
+ if (cpu_has_feature(CPU_FTR_P9_TLBIE_ERAT_BUG)) {
+ asm volatile("ptesync" : : : "memory");
+ __tlbie_pid_lpid(0, lpid, RIC_FLUSH_TLB);
+ }
+
+ if (cpu_has_feature(CPU_FTR_P9_TLBIE_STQ_BUG)) {
+ asm volatile("ptesync" : : : "memory");
+ __tlbie_va_lpid(va, pid, lpid, mmu_get_ap(MMU_PAGE_64K),
+ RIC_FLUSH_TLB);
+ }
+}
static inline void fixup_tlbie_lpid_va(unsigned long va, unsigned long lpid,
unsigned long ap)
@@ -344,6 +407,31 @@ static inline void _tlbie_pid(unsigned long pid, unsigned long ric)
asm volatile("eieio; tlbsync; ptesync": : :"memory");
}
+static inline void _tlbie_pid_lpid(unsigned long pid, unsigned long lpid,
+ unsigned long ric)
+{
+ asm volatile("ptesync" : : : "memory");
+
+ /*
+ * Workaround the fact that the "ric" argument to __tlbie_pid
+ * must be a compile-time contraint to match the "i" constraint
+ * in the asm statement.
+ */
+ switch (ric) {
+ case RIC_FLUSH_TLB:
+ __tlbie_pid_lpid(pid, lpid, RIC_FLUSH_TLB);
+ fixup_tlbie_pid_lpid(pid, lpid);
+ break;
+ case RIC_FLUSH_PWC:
+ __tlbie_pid_lpid(pid, lpid, RIC_FLUSH_PWC);
+ break;
+ case RIC_FLUSH_ALL:
+ default:
+ __tlbie_pid_lpid(pid, lpid, RIC_FLUSH_ALL);
+ fixup_tlbie_pid_lpid(pid, lpid);
+ }
+ asm volatile("eieio; tlbsync; ptesync" : : : "memory");
+}
struct tlbiel_pid {
unsigned long pid;
unsigned long ric;
@@ -469,6 +557,20 @@ static inline void __tlbie_va_range(unsigned long start, unsigned long end,
fixup_tlbie_va_range(addr - page_size, pid, ap);
}
+static inline void __tlbie_va_range_lpid(unsigned long start, unsigned long end,
+ unsigned long pid, unsigned long lpid,
+ unsigned long page_size,
+ unsigned long psize)
+{
+ unsigned long addr;
+ unsigned long ap = mmu_get_ap(psize);
+
+ for (addr = start; addr < end; addr += page_size)
+ __tlbie_va_lpid(addr, pid, lpid, ap, RIC_FLUSH_TLB);
+
+ fixup_tlbie_va_range_lpid(addr - page_size, pid, lpid, ap);
+}
+
static __always_inline void _tlbie_va(unsigned long va, unsigned long pid,
unsigned long psize, unsigned long ric)
{
@@ -549,6 +651,18 @@ static inline void _tlbie_va_range(unsigned long start, unsigned long end,
asm volatile("eieio; tlbsync; ptesync": : :"memory");
}
+static inline void _tlbie_va_range_lpid(unsigned long start, unsigned long end,
+ unsigned long pid, unsigned long lpid,
+ unsigned long page_size,
+ unsigned long psize, bool also_pwc)
+{
+ asm volatile("ptesync" : : : "memory");
+ if (also_pwc)
+ __tlbie_pid_lpid(pid, lpid, RIC_FLUSH_PWC);
+ __tlbie_va_range_lpid(start, end, pid, lpid, page_size, psize);
+ asm volatile("eieio; tlbsync; ptesync" : : : "memory");
+}
+
static inline void _tlbiel_va_range_multicast(struct mm_struct *mm,
unsigned long start, unsigned long end,
unsigned long pid, unsigned long page_size,
@@ -1338,47 +1452,57 @@ void radix__flush_tlb_all(void)
}
#ifdef CONFIG_KVM_BOOK3S_HV_POSSIBLE
-extern void radix_kvm_prefetch_workaround(struct mm_struct *mm)
+/*
+ * Performs process-scoped invalidations for a given LPID
+ * as part of H_RPT_INVALIDATE hcall.
+ */
+void do_h_rpt_invalidate_prt(unsigned long pid, unsigned long lpid,
+ unsigned long type, unsigned long pg_sizes,
+ unsigned long start, unsigned long end)
{
- unsigned long pid = mm->context.id;
+ unsigned long psize, nr_pages;
+ struct mmu_psize_def *def;
+ bool flush_pid;
- if (unlikely(pid == MMU_NO_CONTEXT))
+ /*
+ * A H_RPTI_TYPE_ALL request implies RIC=3, hence
+ * do a single IS=1 based flush.
+ */
+ if ((type & H_RPTI_TYPE_ALL) == H_RPTI_TYPE_ALL) {
+ _tlbie_pid_lpid(pid, lpid, RIC_FLUSH_ALL);
return;
+ }
- if (!cpu_has_feature(CPU_FTR_P9_RADIX_PREFETCH_BUG))
- return;
+ if (type & H_RPTI_TYPE_PWC)
+ _tlbie_pid_lpid(pid, lpid, RIC_FLUSH_PWC);
- /*
- * If this context hasn't run on that CPU before and KVM is
- * around, there's a slim chance that the guest on another
- * CPU just brought in obsolete translation into the TLB of
- * this CPU due to a bad prefetch using the guest PID on
- * the way into the hypervisor.
- *
- * We work around this here. If KVM is possible, we check if
- * any sibling thread is in KVM. If it is, the window may exist
- * and thus we flush that PID from the core.
- *
- * A potential future improvement would be to mark which PIDs
- * have never been used on the system and avoid it if the PID
- * is new and the process has no other cpumask bit set.
- */
- if (cpu_has_feature(CPU_FTR_HVMODE) && radix_enabled()) {
- int cpu = smp_processor_id();
- int sib = cpu_first_thread_sibling(cpu);
- bool flush = false;
-
- for (; sib <= cpu_last_thread_sibling(cpu) && !flush; sib++) {
- if (sib == cpu)
- continue;
- if (!cpu_possible(sib))
- continue;
- if (paca_ptrs[sib]->kvm_hstate.kvm_vcpu)
- flush = true;
+ /* Full PID flush */
+ if (start == 0 && end == -1)
+ return _tlbie_pid_lpid(pid, lpid, RIC_FLUSH_TLB);
+
+ /* Do range invalidation for all the valid page sizes */
+ for (psize = 0; psize < MMU_PAGE_COUNT; psize++) {
+ def = &mmu_psize_defs[psize];
+ if (!(pg_sizes & def->h_rpt_pgsize))
+ continue;
+
+ nr_pages = (end - start) >> def->shift;
+ flush_pid = nr_pages > tlb_single_page_flush_ceiling;
+
+ /*
+ * If the number of pages spanning the range is above
+ * the ceiling, convert the request into a full PID flush.
+ * And since PID flush takes out all the page sizes, there
+ * is no need to consider remaining page sizes.
+ */
+ if (flush_pid) {
+ _tlbie_pid_lpid(pid, lpid, RIC_FLUSH_TLB);
+ return;
}
- if (flush)
- _tlbiel_pid(pid, RIC_FLUSH_ALL);
+ _tlbie_va_range_lpid(start, end, pid, lpid,
+ (1UL << def->shift), psize, false);
}
}
-EXPORT_SYMBOL_GPL(radix_kvm_prefetch_workaround);
+EXPORT_SYMBOL_GPL(do_h_rpt_invalidate_prt);
+
#endif /* CONFIG_KVM_BOOK3S_HV_POSSIBLE */
diff --git a/arch/powerpc/mm/mem.c b/arch/powerpc/mm/mem.c
index 043bbeaf407c..c5e520c6f13b 100644
--- a/arch/powerpc/mm/mem.c
+++ b/arch/powerpc/mm/mem.c
@@ -20,6 +20,7 @@
#include <asm/machdep.h>
#include <asm/rtas.h>
#include <asm/kasan.h>
+#include <asm/sparsemem.h>
#include <asm/svm.h>
#include <mm/mmu_decl.h>
@@ -126,7 +127,7 @@ void __ref arch_remove_memory(int nid, u64 start, u64 size,
}
#endif
-#ifndef CONFIG_NEED_MULTIPLE_NODES
+#ifndef CONFIG_NUMA
void __init mem_topology_setup(void)
{
max_low_pfn = max_pfn = memblock_end_of_DRAM() >> PAGE_SHIFT;
@@ -161,7 +162,7 @@ static int __init mark_nonram_nosave(void)
return 0;
}
-#else /* CONFIG_NEED_MULTIPLE_NODES */
+#else /* CONFIG_NUMA */
static int __init mark_nonram_nosave(void)
{
return 0;
diff --git a/arch/powerpc/mm/mmu_context.c b/arch/powerpc/mm/mmu_context.c
index a857af401738..74246536b832 100644
--- a/arch/powerpc/mm/mmu_context.c
+++ b/arch/powerpc/mm/mmu_context.c
@@ -83,9 +83,7 @@ void switch_mm_irqs_off(struct mm_struct *prev, struct mm_struct *next,
if (cpu_has_feature(CPU_FTR_ALTIVEC))
asm volatile ("dssall");
- if (new_on_cpu)
- radix_kvm_prefetch_workaround(next);
- else
+ if (!new_on_cpu)
membarrier_arch_switch_mm(prev, next, tsk);
/*
diff --git a/arch/powerpc/perf/core-book3s.c b/arch/powerpc/perf/core-book3s.c
index 16d4d1b6a1ff..51622411a7cc 100644
--- a/arch/powerpc/perf/core-book3s.c
+++ b/arch/powerpc/perf/core-book3s.c
@@ -2254,7 +2254,7 @@ unsigned long perf_instruction_pointer(struct pt_regs *regs)
bool use_siar = regs_use_siar(regs);
unsigned long siar = mfspr(SPRN_SIAR);
- if (ppmu->flags & PPMU_P10_DD1) {
+ if (ppmu && (ppmu->flags & PPMU_P10_DD1)) {
if (siar)
return siar;
else
diff --git a/arch/powerpc/platforms/cell/Kconfig b/arch/powerpc/platforms/cell/Kconfig
index e7c976bcadff..cb70c5f25bc6 100644
--- a/arch/powerpc/platforms/cell/Kconfig
+++ b/arch/powerpc/platforms/cell/Kconfig
@@ -35,6 +35,7 @@ config PPC_IBM_CELL_BLADE
config AXON_MSI
bool
depends on PPC_IBM_CELL_BLADE && PCI_MSI
+ select IRQ_DOMAIN_NOMAP
default y
menu "Cell Broadband Engine options"
diff --git a/arch/powerpc/platforms/cell/pmu.c b/arch/powerpc/platforms/cell/pmu.c
index 35bbd15582af..b207a7f99be5 100644
--- a/arch/powerpc/platforms/cell/pmu.c
+++ b/arch/powerpc/platforms/cell/pmu.c
@@ -10,6 +10,7 @@
*/
#include <linux/interrupt.h>
+#include <linux/irqdomain.h>
#include <linux/types.h>
#include <linux/export.h>
#include <asm/io.h>
diff --git a/arch/powerpc/platforms/embedded6xx/flipper-pic.c b/arch/powerpc/platforms/embedded6xx/flipper-pic.c
index d39a9213a3e6..609bda2ad5dd 100644
--- a/arch/powerpc/platforms/embedded6xx/flipper-pic.c
+++ b/arch/powerpc/platforms/embedded6xx/flipper-pic.c
@@ -12,6 +12,7 @@
#include <linux/kernel.h>
#include <linux/init.h>
#include <linux/irq.h>
+#include <linux/irqdomain.h>
#include <linux/of.h>
#include <linux/of_address.h>
#include <asm/io.h>
diff --git a/arch/powerpc/platforms/powermac/Kconfig b/arch/powerpc/platforms/powermac/Kconfig
index c02d8c503b29..b97bf12801eb 100644
--- a/arch/powerpc/platforms/powermac/Kconfig
+++ b/arch/powerpc/platforms/powermac/Kconfig
@@ -24,6 +24,7 @@ config PPC_PMAC32_PSURGE
bool "Support for powersurge upgrade cards" if EXPERT
depends on SMP && PPC32 && PPC_PMAC
select PPC_SMP_MUXED_IPI
+ select IRQ_DOMAIN_NOMAP
default y
help
The powersurge cpu boards can be used in the generation
diff --git a/arch/powerpc/platforms/powernv/idle.c b/arch/powerpc/platforms/powernv/idle.c
index 999997d9e9a9..528a7e0cf83a 100644
--- a/arch/powerpc/platforms/powernv/idle.c
+++ b/arch/powerpc/platforms/powernv/idle.c
@@ -604,7 +604,7 @@ struct p9_sprs {
u64 uamor;
};
-static unsigned long power9_idle_stop(unsigned long psscr, bool mmu_on)
+static unsigned long power9_idle_stop(unsigned long psscr)
{
int cpu = raw_smp_processor_id();
int first = cpu_first_thread_sibling(cpu);
@@ -620,8 +620,6 @@ static unsigned long power9_idle_stop(unsigned long psscr, bool mmu_on)
if (!(psscr & (PSSCR_EC|PSSCR_ESL))) {
/* EC=ESL=0 case */
- BUG_ON(!mmu_on);
-
/*
* Wake synchronously. SRESET via xscom may still cause
* a 0x100 powersave wakeup with SRR1 reason!
@@ -803,8 +801,7 @@ core_woken:
__slb_restore_bolted_realmode();
out:
- if (mmu_on)
- mtmsr(MSR_KERNEL);
+ mtmsr(MSR_KERNEL);
return srr1;
}
@@ -895,7 +892,7 @@ struct p10_sprs {
*/
};
-static unsigned long power10_idle_stop(unsigned long psscr, bool mmu_on)
+static unsigned long power10_idle_stop(unsigned long psscr)
{
int cpu = raw_smp_processor_id();
int first = cpu_first_thread_sibling(cpu);
@@ -909,8 +906,6 @@ static unsigned long power10_idle_stop(unsigned long psscr, bool mmu_on)
if (!(psscr & (PSSCR_EC|PSSCR_ESL))) {
/* EC=ESL=0 case */
- BUG_ON(!mmu_on);
-
/*
* Wake synchronously. SRESET via xscom may still cause
* a 0x100 powersave wakeup with SRR1 reason!
@@ -991,8 +986,7 @@ core_woken:
__slb_restore_bolted_realmode();
out:
- if (mmu_on)
- mtmsr(MSR_KERNEL);
+ mtmsr(MSR_KERNEL);
return srr1;
}
@@ -1002,40 +996,10 @@ static unsigned long arch300_offline_stop(unsigned long psscr)
{
unsigned long srr1;
-#ifndef CONFIG_KVM_BOOK3S_HV_POSSIBLE
- __ppc64_runlatch_off();
if (cpu_has_feature(CPU_FTR_ARCH_31))
- srr1 = power10_idle_stop(psscr, true);
+ srr1 = power10_idle_stop(psscr);
else
- srr1 = power9_idle_stop(psscr, true);
- __ppc64_runlatch_on();
-#else
- /*
- * Tell KVM we're entering idle.
- * This does not have to be done in real mode because the P9 MMU
- * is independent per-thread. Some steppings share radix/hash mode
- * between threads, but in that case KVM has a barrier sync in real
- * mode before and after switching between radix and hash.
- *
- * kvm_start_guest must still be called in real mode though, hence
- * the false argument.
- */
- local_paca->kvm_hstate.hwthread_state = KVM_HWTHREAD_IN_IDLE;
-
- __ppc64_runlatch_off();
- if (cpu_has_feature(CPU_FTR_ARCH_31))
- srr1 = power10_idle_stop(psscr, false);
- else
- srr1 = power9_idle_stop(psscr, false);
- __ppc64_runlatch_on();
-
- local_paca->kvm_hstate.hwthread_state = KVM_HWTHREAD_IN_KERNEL;
- /* Order setting hwthread_state vs. testing hwthread_req */
- smp_mb();
- if (local_paca->kvm_hstate.hwthread_req)
- srr1 = idle_kvm_start_guest(srr1);
- mtmsr(MSR_KERNEL);
-#endif
+ srr1 = power9_idle_stop(psscr);
return srr1;
}
@@ -1055,9 +1019,9 @@ void arch300_idle_type(unsigned long stop_psscr_val,
__ppc64_runlatch_off();
if (cpu_has_feature(CPU_FTR_ARCH_31))
- srr1 = power10_idle_stop(psscr, true);
+ srr1 = power10_idle_stop(psscr);
else
- srr1 = power9_idle_stop(psscr, true);
+ srr1 = power9_idle_stop(psscr);
__ppc64_runlatch_on();
fini_irq_for_idle_irqsoff();
diff --git a/arch/powerpc/platforms/ps3/Kconfig b/arch/powerpc/platforms/ps3/Kconfig
index e32406e918d0..4d0535cc7946 100644
--- a/arch/powerpc/platforms/ps3/Kconfig
+++ b/arch/powerpc/platforms/ps3/Kconfig
@@ -7,6 +7,7 @@ config PPC_PS3
select USB_OHCI_BIG_ENDIAN_MMIO
select USB_EHCI_BIG_ENDIAN_MMIO
select HAVE_PCI
+ select IRQ_DOMAIN_NOMAP
help
This option enables support for the Sony PS3 game console
and other platforms using the PS3 hypervisor. Enabling this
diff --git a/arch/powerpc/platforms/ps3/interrupt.c b/arch/powerpc/platforms/ps3/interrupt.c
index 78f2339ed5cb..49871427f599 100644
--- a/arch/powerpc/platforms/ps3/interrupt.c
+++ b/arch/powerpc/platforms/ps3/interrupt.c
@@ -9,6 +9,7 @@
#include <linux/kernel.h>
#include <linux/export.h>
#include <linux/irq.h>
+#include <linux/irqdomain.h>
#include <asm/machdep.h>
#include <asm/udbg.h>
@@ -45,7 +46,7 @@
* implementation equates HV plug value to Linux virq value, constrains each
* interrupt to have a system wide unique plug number, and limits the range
* of the plug values to map into the first dword of the bitmaps. This
- * gives a usable range of plug values of {NUM_ISA_INTERRUPTS..63}. Note
+ * gives a usable range of plug values of {NR_IRQS_LEGACY..63}. Note
* that there is no constraint on how many in this set an individual thread
* can acquire.
*
@@ -721,7 +722,7 @@ static unsigned int ps3_get_irq(void)
}
#if defined(DEBUG)
- if (unlikely(plug < NUM_ISA_INTERRUPTS || plug > PS3_PLUG_MAX)) {
+ if (unlikely(plug < NR_IRQS_LEGACY || plug > PS3_PLUG_MAX)) {
dump_bmp(&per_cpu(ps3_private, 0));
dump_bmp(&per_cpu(ps3_private, 1));
BUG();
diff --git a/arch/powerpc/platforms/pseries/hvCall.S b/arch/powerpc/platforms/pseries/hvCall.S
index 2136e42833af..8a2b8d64265b 100644
--- a/arch/powerpc/platforms/pseries/hvCall.S
+++ b/arch/powerpc/platforms/pseries/hvCall.S
@@ -102,6 +102,16 @@ END_FTR_SECTION(0, 1); \
#define HCALL_BRANCH(LABEL)
#endif
+_GLOBAL_TOC(plpar_hcall_norets_notrace)
+ HMT_MEDIUM
+
+ mfcr r0
+ stw r0,8(r1)
+ HVSC /* invoke the hypervisor */
+ lwz r0,8(r1)
+ mtcrf 0xff,r0
+ blr /* return r3 = status */
+
_GLOBAL_TOC(plpar_hcall_norets)
HMT_MEDIUM
diff --git a/arch/powerpc/platforms/pseries/ibmebus.c b/arch/powerpc/platforms/pseries/ibmebus.c
index a15ab33646b3..c6c79ef55e13 100644
--- a/arch/powerpc/platforms/pseries/ibmebus.c
+++ b/arch/powerpc/platforms/pseries/ibmebus.c
@@ -42,6 +42,7 @@
#include <linux/kobject.h>
#include <linux/dma-map-ops.h>
#include <linux/interrupt.h>
+#include <linux/irqdomain.h>
#include <linux/of.h>
#include <linux/slab.h>
#include <linux/stat.h>
diff --git a/arch/powerpc/platforms/pseries/lpar.c b/arch/powerpc/platforms/pseries/lpar.c
index 1f3152ad7213..dab356e3ff87 100644
--- a/arch/powerpc/platforms/pseries/lpar.c
+++ b/arch/powerpc/platforms/pseries/lpar.c
@@ -1829,30 +1829,28 @@ void hcall_tracepoint_unregfunc(void)
#endif
/*
- * Since the tracing code might execute hcalls we need to guard against
- * recursion. One example of this are spinlocks calling H_YIELD on
- * shared processor partitions.
+ * Keep track of hcall tracing depth and prevent recursion. Warn if any is
+ * detected because it may indicate a problem. This will not catch all
+ * problems with tracing code making hcalls, because the tracing might have
+ * been invoked from a non-hcall, so the first hcall could recurse into it
+ * without warning here, but this better than nothing.
+ *
+ * Hcalls with specific problems being traced should use the _notrace
+ * plpar_hcall variants.
*/
static DEFINE_PER_CPU(unsigned int, hcall_trace_depth);
-void __trace_hcall_entry(unsigned long opcode, unsigned long *args)
+notrace void __trace_hcall_entry(unsigned long opcode, unsigned long *args)
{
unsigned long flags;
unsigned int *depth;
- /*
- * We cannot call tracepoints inside RCU idle regions which
- * means we must not trace H_CEDE.
- */
- if (opcode == H_CEDE)
- return;
-
local_irq_save(flags);
depth = this_cpu_ptr(&hcall_trace_depth);
- if (*depth)
+ if (WARN_ON_ONCE(*depth))
goto out;
(*depth)++;
@@ -1864,19 +1862,16 @@ out:
local_irq_restore(flags);
}
-void __trace_hcall_exit(long opcode, long retval, unsigned long *retbuf)
+notrace void __trace_hcall_exit(long opcode, long retval, unsigned long *retbuf)
{
unsigned long flags;
unsigned int *depth;
- if (opcode == H_CEDE)
- return;
-
local_irq_save(flags);
depth = this_cpu_ptr(&hcall_trace_depth);
- if (*depth)
+ if (*depth) /* Don't warn again on the way out */
goto out;
(*depth)++;
diff --git a/arch/powerpc/sysdev/ehv_pic.c b/arch/powerpc/sysdev/ehv_pic.c
index 48866e6c1efb..00705258ecf9 100644
--- a/arch/powerpc/sysdev/ehv_pic.c
+++ b/arch/powerpc/sysdev/ehv_pic.c
@@ -14,6 +14,7 @@
#include <linux/kernel.h>
#include <linux/init.h>
#include <linux/irq.h>
+#include <linux/irqdomain.h>
#include <linux/smp.h>
#include <linux/interrupt.h>
#include <linux/slab.h>
diff --git a/arch/powerpc/sysdev/fsl_mpic_err.c b/arch/powerpc/sysdev/fsl_mpic_err.c
index 13583bbc3e8e..5fa5fa215541 100644
--- a/arch/powerpc/sysdev/fsl_mpic_err.c
+++ b/arch/powerpc/sysdev/fsl_mpic_err.c
@@ -8,6 +8,7 @@
#include <linux/irq.h>
#include <linux/smp.h>
#include <linux/interrupt.h>
+#include <linux/irqdomain.h>
#include <asm/io.h>
#include <asm/irq.h>
diff --git a/arch/powerpc/sysdev/i8259.c b/arch/powerpc/sysdev/i8259.c
index c1d76c344351..dc1a151c63d7 100644
--- a/arch/powerpc/sysdev/i8259.c
+++ b/arch/powerpc/sysdev/i8259.c
@@ -260,7 +260,8 @@ void i8259_init(struct device_node *node, unsigned long intack_addr)
raw_spin_unlock_irqrestore(&i8259_lock, flags);
/* create a legacy host */
- i8259_host = irq_domain_add_legacy_isa(node, &i8259_host_ops, NULL);
+ i8259_host = irq_domain_add_legacy(node, NR_IRQS_LEGACY, 0, 0,
+ &i8259_host_ops, NULL);
if (i8259_host == NULL) {
printk(KERN_ERR "i8259: failed to allocate irq host !\n");
return;
diff --git a/arch/powerpc/sysdev/mpic.c b/arch/powerpc/sysdev/mpic.c
index b0426f28946a..995fb2ada507 100644
--- a/arch/powerpc/sysdev/mpic.c
+++ b/arch/powerpc/sysdev/mpic.c
@@ -602,7 +602,7 @@ static void __init mpic_scan_ht_pics(struct mpic *mpic)
/* Find an mpic associated with a given linux interrupt */
static struct mpic *mpic_find(unsigned int irq)
{
- if (irq < NUM_ISA_INTERRUPTS)
+ if (irq < NR_IRQS_LEGACY)
return NULL;
return irq_get_chip_data(irq);
diff --git a/arch/powerpc/sysdev/tsi108_pci.c b/arch/powerpc/sysdev/tsi108_pci.c
index 49f9541954f8..042bb38fa5c2 100644
--- a/arch/powerpc/sysdev/tsi108_pci.c
+++ b/arch/powerpc/sysdev/tsi108_pci.c
@@ -404,7 +404,8 @@ void __init tsi108_pci_int_init(struct device_node *node)
{
DBG("Tsi108_pci_int_init: initializing PCI interrupts\n");
- pci_irq_host = irq_domain_add_legacy_isa(node, &pci_irq_domain_ops, NULL);
+ pci_irq_host = irq_domain_add_legacy(node, NR_IRQS_LEGACY, 0, 0,
+ &pci_irq_domain_ops, NULL);
if (pci_irq_host == NULL) {
printk(KERN_ERR "pci_irq_host: failed to allocate irq domain!\n");
return;
diff --git a/arch/powerpc/sysdev/xics/icp-hv.c b/arch/powerpc/sysdev/xics/icp-hv.c
index 21b9d1bf39ff..6765d9e264a3 100644
--- a/arch/powerpc/sysdev/xics/icp-hv.c
+++ b/arch/powerpc/sysdev/xics/icp-hv.c
@@ -7,6 +7,7 @@
#include <linux/irq.h>
#include <linux/smp.h>
#include <linux/interrupt.h>
+#include <linux/irqdomain.h>
#include <linux/cpu.h>
#include <linux/of.h>
diff --git a/arch/powerpc/sysdev/xics/icp-opal.c b/arch/powerpc/sysdev/xics/icp-opal.c
index 68fd2540b093..675d708863d5 100644
--- a/arch/powerpc/sysdev/xics/icp-opal.c
+++ b/arch/powerpc/sysdev/xics/icp-opal.c
@@ -7,6 +7,7 @@
#include <linux/irq.h>
#include <linux/smp.h>
#include <linux/interrupt.h>
+#include <linux/irqdomain.h>
#include <linux/cpu.h>
#include <linux/of.h>
diff --git a/arch/powerpc/sysdev/xics/xics-common.c b/arch/powerpc/sysdev/xics/xics-common.c
index 7e4305c01bac..fdf8db4444b6 100644
--- a/arch/powerpc/sysdev/xics/xics-common.c
+++ b/arch/powerpc/sysdev/xics/xics-common.c
@@ -201,7 +201,7 @@ void xics_migrate_irqs_away(void)
struct ics *ics;
/* We can't set affinity on ISA interrupts */
- if (virq < NUM_ISA_INTERRUPTS)
+ if (virq < NR_IRQS_LEGACY)
continue;
/* We only need to migrate enabled IRQS */
if (!desc->action)
diff --git a/arch/powerpc/sysdev/xive/Kconfig b/arch/powerpc/sysdev/xive/Kconfig
index 785c292d104b..97796c6b63f0 100644
--- a/arch/powerpc/sysdev/xive/Kconfig
+++ b/arch/powerpc/sysdev/xive/Kconfig
@@ -3,6 +3,7 @@ config PPC_XIVE
bool
select PPC_SMP_MUXED_IPI
select HARDIRQS_SW_RESEND
+ select IRQ_DOMAIN_NOMAP
config PPC_XIVE_NATIVE
bool
diff --git a/arch/powerpc/xmon/xmon.c b/arch/powerpc/xmon/xmon.c
index c8173e92f19d..84de2d7c2f40 100644
--- a/arch/powerpc/xmon/xmon.c
+++ b/arch/powerpc/xmon/xmon.c
@@ -3162,6 +3162,7 @@ memzcan(void)
static void show_task(struct task_struct *tsk)
{
+ unsigned int p_state = READ_ONCE(tsk->__state);
char state;
/*
@@ -3169,14 +3170,14 @@ static void show_task(struct task_struct *tsk)
* appropriate for calling from xmon. This could be moved
* to a common, generic, routine used by both.
*/
- state = (tsk->state == 0) ? 'R' :
- (tsk->state < 0) ? 'U' :
- (tsk->state & TASK_UNINTERRUPTIBLE) ? 'D' :
- (tsk->state & TASK_STOPPED) ? 'T' :
- (tsk->state & TASK_TRACED) ? 'C' :
+ state = (p_state == 0) ? 'R' :
+ (p_state < 0) ? 'U' :
+ (p_state & TASK_UNINTERRUPTIBLE) ? 'D' :
+ (p_state & TASK_STOPPED) ? 'T' :
+ (p_state & TASK_TRACED) ? 'C' :
(tsk->exit_state & EXIT_ZOMBIE) ? 'Z' :
(tsk->exit_state & EXIT_DEAD) ? 'E' :
- (tsk->state & TASK_INTERRUPTIBLE) ? 'S' : '?';
+ (p_state & TASK_INTERRUPTIBLE) ? 'S' : '?';
printf("%16px %16lx %16px %6d %6d %c %2d %s\n", tsk,
tsk->thread.ksp, tsk->thread.regs,