aboutsummaryrefslogtreecommitdiffstatshomepage
path: root/arch/powerpc
diff options
context:
space:
mode:
Diffstat (limited to 'arch/powerpc')
-rw-r--r--arch/powerpc/Kconfig48
-rw-r--r--arch/powerpc/Makefile11
-rw-r--r--arch/powerpc/Makefile.postlink17
-rw-r--r--arch/powerpc/boot/Makefile5
-rw-r--r--arch/powerpc/boot/crtsavres.S8
-rw-r--r--arch/powerpc/boot/dts/ac14xx.dts2
-rw-r--r--arch/powerpc/boot/dts/digsy_mtc.dts2
-rw-r--r--arch/powerpc/boot/dts/fsl/b4qds.dtsi8
-rw-r--r--arch/powerpc/boot/dts/fsl/c293pcie.dts2
-rw-r--r--arch/powerpc/boot/dts/fsl/kmcent2.dts4
-rw-r--r--arch/powerpc/boot/dts/fsl/p1010rdb.dtsi2
-rw-r--r--arch/powerpc/boot/dts/fsl/p1023rdb.dts2
-rw-r--r--arch/powerpc/boot/dts/fsl/p2041rdb.dts4
-rw-r--r--arch/powerpc/boot/dts/fsl/p3041ds.dts4
-rw-r--r--arch/powerpc/boot/dts/fsl/p4080ds.dts4
-rw-r--r--arch/powerpc/boot/dts/fsl/p5020ds.dts4
-rw-r--r--arch/powerpc/boot/dts/fsl/p5040ds.dts4
-rw-r--r--arch/powerpc/boot/dts/fsl/t208xqds.dtsi8
-rw-r--r--arch/powerpc/boot/dts/fsl/t4240qds.dts12
-rw-r--r--arch/powerpc/boot/dts/fsl/t4240rdb.dts6
-rw-r--r--arch/powerpc/boot/dts/fsp2.dts608
-rw-r--r--arch/powerpc/boot/dts/mpc5121ads.dts4
-rw-r--r--arch/powerpc/boot/dts/mpc8308_p1m.dts2
-rw-r--r--arch/powerpc/boot/dts/mpc8349emitx.dts4
-rw-r--r--arch/powerpc/boot/dts/mpc8377_rdb.dts2
-rw-r--r--arch/powerpc/boot/dts/mpc8377_wlan.dts2
-rw-r--r--arch/powerpc/boot/dts/mpc8378_rdb.dts2
-rw-r--r--arch/powerpc/boot/dts/mpc8379_rdb.dts2
-rw-r--r--arch/powerpc/boot/dts/pcm030.dts2
-rw-r--r--arch/powerpc/boot/dts/pcm032.dts2
-rw-r--r--arch/powerpc/boot/dts/pdm360ng.dts2
-rw-r--r--arch/powerpc/boot/dts/sequoia.dts2
-rw-r--r--arch/powerpc/boot/dts/warp.dts2
-rw-r--r--arch/powerpc/boot/ppc_asm.h12
-rw-r--r--arch/powerpc/configs/44x/fsp2_defconfig126
-rw-r--r--arch/powerpc/include/asm/barrier.h5
-rw-r--r--arch/powerpc/include/asm/bitops.h87
-rw-r--r--arch/powerpc/include/asm/book3s/32/pgalloc.h3
-rw-r--r--arch/powerpc/include/asm/book3s/32/pgtable.h2
-rw-r--r--arch/powerpc/include/asm/book3s/64/hash-4k.h2
-rw-r--r--arch/powerpc/include/asm/book3s/64/hash.h3
-rw-r--r--arch/powerpc/include/asm/book3s/64/hugetlb.h10
-rw-r--r--arch/powerpc/include/asm/book3s/64/pgalloc.h16
-rw-r--r--arch/powerpc/include/asm/book3s/64/pgtable.h45
-rw-r--r--arch/powerpc/include/asm/book3s/64/radix.h6
-rw-r--r--arch/powerpc/include/asm/bug.h2
-rw-r--r--arch/powerpc/include/asm/code-patching.h10
-rw-r--r--arch/powerpc/include/asm/compat.h1
-rw-r--r--arch/powerpc/include/asm/cputable.h3
-rw-r--r--arch/powerpc/include/asm/dbell.h13
-rw-r--r--arch/powerpc/include/asm/delay.h16
-rw-r--r--arch/powerpc/include/asm/dma-mapping.h5
-rw-r--r--arch/powerpc/include/asm/exception-64s.h49
-rw-r--r--arch/powerpc/include/asm/fadump.h4
-rw-r--r--arch/powerpc/include/asm/head-64.h25
-rw-r--r--arch/powerpc/include/asm/hvcall.h2
-rw-r--r--arch/powerpc/include/asm/hw_irq.h4
-rw-r--r--arch/powerpc/include/asm/iommu.h4
-rw-r--r--arch/powerpc/include/asm/kprobes.h1
-rw-r--r--arch/powerpc/include/asm/kvm_book3s.h1
-rw-r--r--arch/powerpc/include/asm/kvm_book3s_asm.h2
-rw-r--r--arch/powerpc/include/asm/kvm_host.h13
-rw-r--r--arch/powerpc/include/asm/kvm_ppc.h2
-rw-r--r--arch/powerpc/include/asm/machdep.h1
-rw-r--r--arch/powerpc/include/asm/mce.h15
-rw-r--r--arch/powerpc/include/asm/nohash/32/pgalloc.h3
-rw-r--r--arch/powerpc/include/asm/nohash/32/pgtable.h2
-rw-r--r--arch/powerpc/include/asm/nohash/64/pgalloc.h11
-rw-r--r--arch/powerpc/include/asm/opal-api.h76
-rw-r--r--arch/powerpc/include/asm/paca.h14
-rw-r--r--arch/powerpc/include/asm/pgalloc.h14
-rw-r--r--arch/powerpc/include/asm/ppc-opcode.h13
-rw-r--r--arch/powerpc/include/asm/ppc_asm.h11
-rw-r--r--arch/powerpc/include/asm/processor.h61
-rw-r--r--arch/powerpc/include/asm/topology.h20
-rw-r--r--arch/powerpc/include/asm/trace.h33
-rw-r--r--arch/powerpc/include/asm/uaccess.h9
-rw-r--r--arch/powerpc/include/asm/xive.h12
-rw-r--r--arch/powerpc/include/uapi/asm/Kbuild6
-rw-r--r--arch/powerpc/include/uapi/asm/ioctls.h1
-rw-r--r--arch/powerpc/include/uapi/asm/kvm.h6
-rw-r--r--arch/powerpc/include/uapi/asm/param.h1
-rw-r--r--arch/powerpc/include/uapi/asm/poll.h1
-rw-r--r--arch/powerpc/include/uapi/asm/resource.h1
-rw-r--r--arch/powerpc/include/uapi/asm/socket.h90
-rw-r--r--arch/powerpc/include/uapi/asm/sockios.h20
-rw-r--r--arch/powerpc/include/uapi/asm/statfs.h6
-rw-r--r--arch/powerpc/kernel/Makefile2
-rw-r--r--arch/powerpc/kernel/asm-offsets.c13
-rw-r--r--arch/powerpc/kernel/dma-iommu.c6
-rw-r--r--arch/powerpc/kernel/dma.c17
-rw-r--r--arch/powerpc/kernel/dt_cpu_ftrs.c58
-rw-r--r--arch/powerpc/kernel/entry_64.S193
-rw-r--r--arch/powerpc/kernel/exceptions-64s.S295
-rw-r--r--arch/powerpc/kernel/fadump.c196
-rw-r--r--arch/powerpc/kernel/idle_book3s.S188
-rw-r--r--arch/powerpc/kernel/iommu.c28
-rw-r--r--arch/powerpc/kernel/irq.c62
-rw-r--r--arch/powerpc/kernel/kprobes.c25
-rw-r--r--arch/powerpc/kernel/mce.c2
-rw-r--r--arch/powerpc/kernel/mce_power.c3
-rw-r--r--arch/powerpc/kernel/misc_32.S6
-rw-r--r--arch/powerpc/kernel/nvram_64.c14
-rw-r--r--arch/powerpc/kernel/optprobes.c53
-rw-r--r--arch/powerpc/kernel/process.c48
-rw-r--r--arch/powerpc/kernel/rtasd.c2
-rw-r--r--arch/powerpc/kernel/setup-common.c6
-rw-r--r--arch/powerpc/kernel/setup_64.c35
-rw-r--r--arch/powerpc/kernel/smp.c9
-rw-r--r--arch/powerpc/kernel/time.c96
-rw-r--r--arch/powerpc/kernel/tm.S4
-rw-r--r--arch/powerpc/kernel/trace/ftrace_64_mprofile.S59
-rw-r--r--arch/powerpc/kernel/traps.c3
-rw-r--r--arch/powerpc/kernel/vmlinux.lds.S61
-rw-r--r--arch/powerpc/kvm/book3s_hv.c576
-rw-r--r--arch/powerpc/kvm/book3s_hv_builtin.c2
-rw-r--r--arch/powerpc/kvm/book3s_hv_interrupts.S20
-rw-r--r--arch/powerpc/kvm/book3s_hv_ras.c18
-rw-r--r--arch/powerpc/kvm/book3s_hv_rm_mmu.c11
-rw-r--r--arch/powerpc/kvm/book3s_hv_rmhandlers.S248
-rw-r--r--arch/powerpc/kvm/book3s_xive.c4
-rw-r--r--arch/powerpc/kvm/book3s_xive_template.c4
-rw-r--r--arch/powerpc/kvm/booke.c2
-rw-r--r--arch/powerpc/kvm/emulate.c4
-rw-r--r--arch/powerpc/kvm/powerpc.c45
-rw-r--r--arch/powerpc/lib/Makefile15
-rw-r--r--arch/powerpc/lib/code-patching.c171
-rw-r--r--arch/powerpc/lib/copyuser_power7.S4
-rw-r--r--arch/powerpc/lib/crtsavres.S6
-rw-r--r--arch/powerpc/lib/xor_vmx.c53
-rw-r--r--arch/powerpc/lib/xor_vmx.h20
-rw-r--r--arch/powerpc/lib/xor_vmx_glue.c62
-rw-r--r--arch/powerpc/mm/8xx_mmu.c2
-rw-r--r--arch/powerpc/mm/dma-noncoherent.c2
-rw-r--r--arch/powerpc/mm/dump_hashpagetable.c2
-rw-r--r--arch/powerpc/mm/fault.c17
-rw-r--r--arch/powerpc/mm/hash_native_64.c41
-rw-r--r--arch/powerpc/mm/hash_utils_64.c2
-rw-r--r--arch/powerpc/mm/hugetlbpage-radix.c2
-rw-r--r--arch/powerpc/mm/hugetlbpage.c95
-rw-r--r--arch/powerpc/mm/init_64.c82
-rw-r--r--arch/powerpc/mm/mem.c32
-rw-r--r--arch/powerpc/mm/mmap.c4
-rw-r--r--arch/powerpc/mm/mmu_context_book3s64.c17
-rw-r--r--arch/powerpc/mm/mmu_decl.h1
-rw-r--r--arch/powerpc/mm/numa.c22
-rw-r--r--arch/powerpc/mm/pgtable-book3s64.c4
-rw-r--r--arch/powerpc/mm/pgtable-hash64.c115
-rw-r--r--arch/powerpc/mm/pgtable-radix.c90
-rw-r--r--arch/powerpc/mm/pgtable_32.c15
-rw-r--r--arch/powerpc/mm/pgtable_64.c45
-rw-r--r--arch/powerpc/mm/slb.c10
-rw-r--r--arch/powerpc/mm/slb_low.S30
-rw-r--r--arch/powerpc/mm/slice.c2
-rw-r--r--arch/powerpc/mm/tlb-radix.c9
-rw-r--r--arch/powerpc/mm/tlb_hash64.c6
-rw-r--r--arch/powerpc/net/bpf_jit_comp64.c3
-rw-r--r--arch/powerpc/perf/hv-24x7.c242
-rw-r--r--arch/powerpc/perf/hv-24x7.h69
-rw-r--r--arch/powerpc/perf/perf_regs.c3
-rw-r--r--arch/powerpc/perf/power9-events-list.h4
-rw-r--r--arch/powerpc/perf/power9-pmu.c12
-rw-r--r--arch/powerpc/platforms/44x/Kconfig12
-rw-r--r--arch/powerpc/platforms/44x/Makefile1
-rw-r--r--arch/powerpc/platforms/44x/fsp2.c62
-rw-r--r--arch/powerpc/platforms/Kconfig11
-rw-r--r--arch/powerpc/platforms/Kconfig.cputype6
-rw-r--r--arch/powerpc/platforms/cell/iommu.c53
-rw-r--r--arch/powerpc/platforms/cell/smp.c3
-rw-r--r--arch/powerpc/platforms/cell/spufs/coredump.c2
-rw-r--r--arch/powerpc/platforms/powernv/eeh-powernv.c16
-rw-r--r--arch/powerpc/platforms/powernv/idle.c198
-rw-r--r--arch/powerpc/platforms/powernv/npu-dma.c97
-rw-r--r--arch/powerpc/platforms/powernv/opal-wrappers.S6
-rw-r--r--arch/powerpc/platforms/powernv/pci-ioda.c133
-rw-r--r--arch/powerpc/platforms/powernv/pci.c160
-rw-r--r--arch/powerpc/platforms/powernv/pci.h13
-rw-r--r--arch/powerpc/platforms/powernv/smp.c34
-rw-r--r--arch/powerpc/platforms/powernv/subcore.c18
-rw-r--r--arch/powerpc/platforms/ps3/system-bus.c10
-rw-r--r--arch/powerpc/platforms/pseries/Kconfig2
-rw-r--r--arch/powerpc/platforms/pseries/dlpar.c2
-rw-r--r--arch/powerpc/platforms/pseries/hotplug-cpu.c2
-rw-r--r--arch/powerpc/platforms/pseries/hotplug-memory.c9
-rw-r--r--arch/powerpc/platforms/pseries/ibmebus.c16
-rw-r--r--arch/powerpc/platforms/pseries/lpar.c11
-rw-r--r--arch/powerpc/platforms/pseries/mobility.c7
-rw-r--r--arch/powerpc/platforms/pseries/smp.c3
-rw-r--r--arch/powerpc/platforms/pseries/vio.c75
-rw-r--r--arch/powerpc/sysdev/axonram.c8
-rw-r--r--arch/powerpc/sysdev/mpc8xx_pic.c2
-rw-r--r--arch/powerpc/sysdev/simple_gpio.c3
-rw-r--r--arch/powerpc/sysdev/xive/common.c4
-rw-r--r--arch/powerpc/sysdev/xive/native.c4
-rw-r--r--arch/powerpc/tools/head_check.sh78
-rwxr-xr-xarch/powerpc/tools/unrel_branch_check.sh57
-rw-r--r--arch/powerpc/xmon/xmon.c15
197 files changed, 4917 insertions, 1639 deletions
diff --git a/arch/powerpc/Kconfig b/arch/powerpc/Kconfig
index 80d882a78426..7177a3f4f418 100644
--- a/arch/powerpc/Kconfig
+++ b/arch/powerpc/Kconfig
@@ -109,14 +109,6 @@ config GENERIC_LOCKBREAK
default y
depends on SMP && PREEMPT
-config ARCH_HAS_ILOG2_U32
- bool
- default y
-
-config ARCH_HAS_ILOG2_U64
- bool
- default y if 64BIT
-
config GENERIC_HWEIGHT
bool
default y
@@ -138,6 +130,7 @@ config PPC
select ARCH_HAS_SG_CHAIN
select ARCH_HAS_TICK_BROADCAST if GENERIC_CLOCKEVENTS_BROADCAST
select ARCH_HAS_UBSAN_SANITIZE_ALL
+ select ARCH_HAS_ZONE_DEVICE if PPC_BOOK3S_64
select ARCH_HAVE_NMI_SAFE_CMPXCHG
select ARCH_MIGHT_HAVE_PC_PARPORT
select ARCH_MIGHT_HAVE_PC_SERIO
@@ -163,7 +156,7 @@ config PPC
select GENERIC_SMP_IDLE_THREAD
select GENERIC_STRNCPY_FROM_USER
select GENERIC_STRNLEN_USER
- select GENERIC_TIME_VSYSCALL_OLD
+ select GENERIC_TIME_VSYSCALL
select HAVE_ARCH_AUDITSYSCALL
select HAVE_ARCH_JUMP_LABEL
select HAVE_ARCH_KGDB
@@ -171,6 +164,8 @@ config PPC
select HAVE_ARCH_MMAP_RND_COMPAT_BITS if COMPAT
select HAVE_ARCH_SECCOMP_FILTER
select HAVE_ARCH_TRACEHOOK
+ select ARCH_HAS_STRICT_KERNEL_RWX if (PPC_BOOK3S_64 && !RELOCATABLE && !HIBERNATION)
+ select ARCH_OPTIONAL_KERNEL_RWX if ARCH_HAS_STRICT_KERNEL_RWX
select HAVE_CBPF_JIT if !PPC64
select HAVE_CONTEXT_TRACKING if PPC64
select HAVE_DEBUG_KMEMLEAK
@@ -184,7 +179,7 @@ config PPC
select HAVE_FUNCTION_GRAPH_TRACER
select HAVE_FUNCTION_TRACER
select HAVE_GCC_PLUGINS
- select HAVE_GENERIC_RCU_GUP
+ select HAVE_GENERIC_GUP
select HAVE_HW_BREAKPOINT if PERF_EVENTS && (PPC_BOOK3S || PPC_8xx)
select HAVE_IDE
select HAVE_IOREMAP_PROT
@@ -208,6 +203,7 @@ config PPC
select HAVE_REGS_AND_STACK_ACCESS_API
select HAVE_SYSCALL_TRACEPOINTS
select HAVE_VIRT_CPU_ACCOUNTING
+ select HAVE_IRQ_TIME_ACCOUNTING
select IRQ_DOMAIN
select IRQ_FORCED_THREADING
select MODULES_USE_ELF_RELA
@@ -380,22 +376,6 @@ source "arch/powerpc/platforms/Kconfig"
menu "Kernel options"
-config PPC_DT_CPU_FTRS
- bool "Device-tree based CPU feature discovery & setup"
- depends on PPC_BOOK3S_64
- default n
- help
- This enables code to use a new device tree binding for describing CPU
- compatibility and features. Saying Y here will attempt to use the new
- binding if the firmware provides it. Currently only the skiboot
- firmware provides this binding.
- If you're not sure say Y.
-
-config PPC_CPUFEATURES_ENABLE_UNKNOWN
- bool "cpufeatures pass through unknown features to guest/userspace"
- depends on PPC_DT_CPU_FTRS
- default y
-
config HIGHMEM
bool "High memory support"
depends on PPC32
@@ -454,6 +434,17 @@ config PPC_TRANSACTIONAL_MEM
---help---
Support user-mode Transactional Memory on POWERPC.
+config LD_HEAD_STUB_CATCH
+ bool "Reserve 256 bytes to cope with linker stubs in HEAD text" if EXPERT
+ depends on PPC64
+ default n
+ help
+ Very large kernels can cause linker branch stubs to be generated by
+ code in head_64.S, which moves the head text sections out of their
+ specified location. This option can work around the problem.
+
+ If unsure, say "N".
+
config DISABLE_MPROFILE_KERNEL
bool "Disable use of mprofile-kernel for kernel tracing"
depends on PPC64 && CPU_LITTLE_ENDIAN
@@ -1207,11 +1198,6 @@ source "arch/powerpc/Kconfig.debug"
source "security/Kconfig"
-config KEYS_COMPAT
- bool
- depends on COMPAT && KEYS
- default y
-
source "crypto/Kconfig"
config PPC_LIB_RHEAP
diff --git a/arch/powerpc/Makefile b/arch/powerpc/Makefile
index 3e0f0e1fadef..8d4ed73d5490 100644
--- a/arch/powerpc/Makefile
+++ b/arch/powerpc/Makefile
@@ -98,6 +98,7 @@ endif
LDFLAGS_vmlinux-y := -Bstatic
LDFLAGS_vmlinux-$(CONFIG_RELOCATABLE) := -pie
LDFLAGS_vmlinux := $(LDFLAGS_vmlinux-y)
+LDFLAGS_vmlinux += $(call ld-option,--orphan-handling=warn)
ifeq ($(CONFIG_PPC64),y)
ifeq ($(call cc-option-yn,-mcmodel=medium),y)
@@ -189,7 +190,17 @@ else
CHECKFLAGS += -D__LITTLE_ENDIAN__
endif
+ifdef CONFIG_PPC32
KBUILD_LDFLAGS_MODULE += arch/powerpc/lib/crtsavres.o
+else
+ifeq ($(call ld-ifversion, -ge, 225000000, y),y)
+# Have the linker provide sfpr if possible.
+# There is a corresponding test in arch/powerpc/lib/Makefile
+KBUILD_LDFLAGS_MODULE += --save-restore-funcs
+else
+KBUILD_LDFLAGS_MODULE += arch/powerpc/lib/crtsavres.o
+endif
+endif
ifeq ($(CONFIG_476FPE_ERR46),y)
KBUILD_LDFLAGS_MODULE += --ppc476-workaround \
diff --git a/arch/powerpc/Makefile.postlink b/arch/powerpc/Makefile.postlink
index eccfcc88afae..5db43ebbe2df 100644
--- a/arch/powerpc/Makefile.postlink
+++ b/arch/powerpc/Makefile.postlink
@@ -10,13 +10,26 @@ __archpost:
-include include/config/auto.conf
include scripts/Kbuild.include
+quiet_cmd_head_check = CHKHEAD $@
+ cmd_head_check = $(CONFIG_SHELL) $(srctree)/arch/powerpc/tools/head_check.sh "$(NM)" "$@"
+
quiet_cmd_relocs_check = CHKREL $@
- cmd_relocs_check = $(CONFIG_SHELL) $(srctree)/arch/powerpc/tools/relocs_check.sh "$(OBJDUMP)" "$@"
+ifdef CONFIG_PPC_BOOK3S_64
+ cmd_relocs_check = \
+ $(CONFIG_SHELL) $(srctree)/arch/powerpc/tools/relocs_check.sh "$(OBJDUMP)" "$@" ; \
+ $(CONFIG_SHELL) $(srctree)/arch/powerpc/tools/unrel_branch_check.sh "$(OBJDUMP)" "$@"
+else
+ cmd_relocs_check = \
+ $(CONFIG_SHELL) $(srctree)/arch/powerpc/tools/relocs_check.sh "$(OBJDUMP)" "$@"
+endif
# `@true` prevents complaint when there is nothing to be done
vmlinux: FORCE
@true
+ifdef CONFIG_PPC64
+ $(call cmd,head_check)
+endif
ifdef CONFIG_RELOCATABLE
$(call if_changed,relocs_check)
endif
@@ -25,7 +38,7 @@ endif
@true
clean:
- @true
+ rm -f .tmp_symbols.txt
PHONY += FORCE clean
diff --git a/arch/powerpc/boot/Makefile b/arch/powerpc/boot/Makefile
index e82f333cc84a..a7814a7b1523 100644
--- a/arch/powerpc/boot/Makefile
+++ b/arch/powerpc/boot/Makefile
@@ -95,13 +95,16 @@ libfdtheader := fdt.h libfdt.h libfdt_internal.h
$(addprefix $(obj)/,$(libfdt) libfdt-wrapper.o simpleboot.o epapr.o opal.o): \
$(addprefix $(obj)/,$(libfdtheader))
-src-wlib-y := string.S crt0.S crtsavres.S stdio.c decompress.c main.c \
+src-wlib-y := string.S crt0.S stdio.c decompress.c main.c \
$(libfdt) libfdt-wrapper.c \
ns16550.c serial.c simple_alloc.c div64.S util.S \
elf_util.c $(zlib-y) devtree.c stdlib.c \
oflib.c ofconsole.c cuboot.c mpsc.c cpm-serial.c \
uartlite.c mpc52xx-psc.c opal.c
src-wlib-$(CONFIG_PPC64_BOOT_WRAPPER) += opal-calls.S
+ifndef CONFIG_PPC64_BOOT_WRAPPER
+src-wlib-y += crtsavres.S
+endif
src-wlib-$(CONFIG_40x) += 4xx.c planetcore.c
src-wlib-$(CONFIG_44x) += 4xx.c ebony.c bamboo.c
src-wlib-$(CONFIG_8xx) += mpc8xx.c planetcore.c fsl-soc.c
diff --git a/arch/powerpc/boot/crtsavres.S b/arch/powerpc/boot/crtsavres.S
index f3d9b35c07d4..085fb2b9a8b8 100644
--- a/arch/powerpc/boot/crtsavres.S
+++ b/arch/powerpc/boot/crtsavres.S
@@ -37,12 +37,13 @@
* the executable file might be covered by the GNU General Public License.
*/
+#ifdef __powerpc64__
+#error "On PPC64, FPR save/restore functions are provided by the linker."
+#endif
+
.file "crtsavres.S"
.section ".text"
-/* On PowerPC64 Linux, these functions are provided by the linker. */
-#ifndef __powerpc64__
-
#define _GLOBAL(name) \
.type name,@function; \
.globl name; \
@@ -230,4 +231,3 @@ _GLOBAL(_rest32gpr_31_x)
mtlr 0
mr 1,11
blr
-#endif
diff --git a/arch/powerpc/boot/dts/ac14xx.dts b/arch/powerpc/boot/dts/ac14xx.dts
index 27fcabc2f857..83bcfd865167 100644
--- a/arch/powerpc/boot/dts/ac14xx.dts
+++ b/arch/powerpc/boot/dts/ac14xx.dts
@@ -10,7 +10,7 @@
*/
-#include <mpc5121.dtsi>
+#include "mpc5121.dtsi"
/ {
model = "ac14xx";
diff --git a/arch/powerpc/boot/dts/digsy_mtc.dts b/arch/powerpc/boot/dts/digsy_mtc.dts
index 955bff629df3..c280e75c86bf 100644
--- a/arch/powerpc/boot/dts/digsy_mtc.dts
+++ b/arch/powerpc/boot/dts/digsy_mtc.dts
@@ -73,7 +73,7 @@
i2c@3d00 {
eeprom@50 {
- compatible = "at,24c08";
+ compatible = "atmel,24c08";
reg = <0x50>;
};
diff --git a/arch/powerpc/boot/dts/fsl/b4qds.dtsi b/arch/powerpc/boot/dts/fsl/b4qds.dtsi
index 3785ef826d07..999efd3bc167 100644
--- a/arch/powerpc/boot/dts/fsl/b4qds.dtsi
+++ b/arch/powerpc/boot/dts/fsl/b4qds.dtsi
@@ -166,19 +166,19 @@
reg = <0>;
eeprom@50 {
- compatible = "at24,24c64";
+ compatible = "atmel,24c64";
reg = <0x50>;
};
eeprom@51 {
- compatible = "at24,24c256";
+ compatible = "atmel,24c256";
reg = <0x51>;
};
eeprom@53 {
- compatible = "at24,24c256";
+ compatible = "atmel,24c256";
reg = <0x53>;
};
eeprom@57 {
- compatible = "at24,24c256";
+ compatible = "atmel,24c256";
reg = <0x57>;
};
rtc@68 {
diff --git a/arch/powerpc/boot/dts/fsl/c293pcie.dts b/arch/powerpc/boot/dts/fsl/c293pcie.dts
index 66709788429d..5e905e0857cf 100644
--- a/arch/powerpc/boot/dts/fsl/c293pcie.dts
+++ b/arch/powerpc/boot/dts/fsl/c293pcie.dts
@@ -153,7 +153,7 @@
&soc {
i2c@3000 {
eeprom@50 {
- compatible = "st,24c1024";
+ compatible = "st,24c1024", "atmel,24c1024";
reg = <0x50>;
};
diff --git a/arch/powerpc/boot/dts/fsl/kmcent2.dts b/arch/powerpc/boot/dts/fsl/kmcent2.dts
index 47afa438602e..5922c1ea0e96 100644
--- a/arch/powerpc/boot/dts/fsl/kmcent2.dts
+++ b/arch/powerpc/boot/dts/fsl/kmcent2.dts
@@ -293,9 +293,7 @@
compatible = "fsl,ucc-hdlc";
rx-clock-name = "clk9";
tx-clock-name = "clk9";
- fsl,tx-timeslot-mask = <0xfffffffe>;
- fsl,rx-timeslot-mask = <0xfffffffe>;
- fsl,siram-entry-id = <0>;
+ fsl,hdlc-bus;
};
};
};
diff --git a/arch/powerpc/boot/dts/fsl/p1010rdb.dtsi b/arch/powerpc/boot/dts/fsl/p1010rdb.dtsi
index a8e4ba070104..2ca9cee2ddeb 100644
--- a/arch/powerpc/boot/dts/fsl/p1010rdb.dtsi
+++ b/arch/powerpc/boot/dts/fsl/p1010rdb.dtsi
@@ -89,7 +89,7 @@
&board_soc {
i2c@3000 {
eeprom@50 {
- compatible = "st,24c256";
+ compatible = "st,24c256", "atmel,24c256";
reg = <0x50>;
};
diff --git a/arch/powerpc/boot/dts/fsl/p1023rdb.dts b/arch/powerpc/boot/dts/fsl/p1023rdb.dts
index 9716ca64651c..ead928364beb 100644
--- a/arch/powerpc/boot/dts/fsl/p1023rdb.dts
+++ b/arch/powerpc/boot/dts/fsl/p1023rdb.dts
@@ -79,7 +79,7 @@
i2c@3000 {
eeprom@53 {
- compatible = "at24,24c04";
+ compatible = "atmel,24c04";
reg = <0x53>;
};
diff --git a/arch/powerpc/boot/dts/fsl/p2041rdb.dts b/arch/powerpc/boot/dts/fsl/p2041rdb.dts
index e50fea95a853..950816b9d6e1 100644
--- a/arch/powerpc/boot/dts/fsl/p2041rdb.dts
+++ b/arch/powerpc/boot/dts/fsl/p2041rdb.dts
@@ -127,7 +127,7 @@
reg = <0x48>;
};
eeprom@50 {
- compatible = "at24,24c256";
+ compatible = "atmel,24c256";
reg = <0x50>;
};
rtc@68 {
@@ -142,7 +142,7 @@
i2c@118100 {
eeprom@50 {
- compatible = "at24,24c256";
+ compatible = "atmel,24c256";
reg = <0x50>;
};
};
diff --git a/arch/powerpc/boot/dts/fsl/p3041ds.dts b/arch/powerpc/boot/dts/fsl/p3041ds.dts
index 40748e415adb..6f5f7283c533 100644
--- a/arch/powerpc/boot/dts/fsl/p3041ds.dts
+++ b/arch/powerpc/boot/dts/fsl/p3041ds.dts
@@ -124,11 +124,11 @@
i2c@118100 {
eeprom@51 {
- compatible = "at24,24c256";
+ compatible = "atmel,24c256";
reg = <0x51>;
};
eeprom@52 {
- compatible = "at24,24c256";
+ compatible = "atmel,24c256";
reg = <0x52>;
};
};
diff --git a/arch/powerpc/boot/dts/fsl/p4080ds.dts b/arch/powerpc/boot/dts/fsl/p4080ds.dts
index 816b9788d5f6..65e20152e22f 100644
--- a/arch/powerpc/boot/dts/fsl/p4080ds.dts
+++ b/arch/powerpc/boot/dts/fsl/p4080ds.dts
@@ -125,11 +125,11 @@
i2c@118100 {
eeprom@51 {
- compatible = "at24,24c256";
+ compatible = "atmel,24c256";
reg = <0x51>;
};
eeprom@52 {
- compatible = "at24,24c256";
+ compatible = "atmel,24c256";
reg = <0x52>;
};
rtc@68 {
diff --git a/arch/powerpc/boot/dts/fsl/p5020ds.dts b/arch/powerpc/boot/dts/fsl/p5020ds.dts
index cd6f37386111..b24adf902d8d 100644
--- a/arch/powerpc/boot/dts/fsl/p5020ds.dts
+++ b/arch/powerpc/boot/dts/fsl/p5020ds.dts
@@ -124,11 +124,11 @@
i2c@118100 {
eeprom@51 {
- compatible = "at24,24c256";
+ compatible = "atmel,24c256";
reg = <0x51>;
};
eeprom@52 {
- compatible = "at24,24c256";
+ compatible = "atmel,24c256";
reg = <0x52>;
};
};
diff --git a/arch/powerpc/boot/dts/fsl/p5040ds.dts b/arch/powerpc/boot/dts/fsl/p5040ds.dts
index 45084738cf4e..30850b3228e0 100644
--- a/arch/powerpc/boot/dts/fsl/p5040ds.dts
+++ b/arch/powerpc/boot/dts/fsl/p5040ds.dts
@@ -133,11 +133,11 @@
i2c@118100 {
eeprom@51 {
- compatible = "at24,24c256";
+ compatible = "atmel,24c256";
reg = <0x51>;
};
eeprom@52 {
- compatible = "at24,24c256";
+ compatible = "atmel,24c256";
reg = <0x52>;
};
};
diff --git a/arch/powerpc/boot/dts/fsl/t208xqds.dtsi b/arch/powerpc/boot/dts/fsl/t208xqds.dtsi
index ec080bd01b09..db4139999b28 100644
--- a/arch/powerpc/boot/dts/fsl/t208xqds.dtsi
+++ b/arch/powerpc/boot/dts/fsl/t208xqds.dtsi
@@ -147,17 +147,17 @@
reg = <0x0>;
eeprom@50 {
- compatible = "at24,24c512";
+ compatible = "atmel,24c512";
reg = <0x50>;
};
eeprom@51 {
- compatible = "at24,24c02";
+ compatible = "atmel,24c02";
reg = <0x51>;
};
eeprom@57 {
- compatible = "at24,24c02";
+ compatible = "atmel,24c02";
reg = <0x57>;
};
@@ -174,7 +174,7 @@
reg = <0x1>;
eeprom@55 {
- compatible = "at24,24c02";
+ compatible = "atmel,24c02";
reg = <0x55>;
};
};
diff --git a/arch/powerpc/boot/dts/fsl/t4240qds.dts b/arch/powerpc/boot/dts/fsl/t4240qds.dts
index 9573ceada07c..c0913ac5aaad 100644
--- a/arch/powerpc/boot/dts/fsl/t4240qds.dts
+++ b/arch/powerpc/boot/dts/fsl/t4240qds.dts
@@ -377,27 +377,27 @@
reg = <0>;
eeprom@51 {
- compatible = "at24,24c256";
+ compatible = "atmel,24c256";
reg = <0x51>;
};
eeprom@52 {
- compatible = "at24,24c256";
+ compatible = "atmel,24c256";
reg = <0x52>;
};
eeprom@53 {
- compatible = "at24,24c256";
+ compatible = "atmel,24c256";
reg = <0x53>;
};
eeprom@54 {
- compatible = "at24,24c256";
+ compatible = "atmel,24c256";
reg = <0x54>;
};
eeprom@55 {
- compatible = "at24,24c256";
+ compatible = "atmel,24c256";
reg = <0x55>;
};
eeprom@56 {
- compatible = "at24,24c256";
+ compatible = "atmel,24c256";
reg = <0x56>;
};
rtc@68 {
diff --git a/arch/powerpc/boot/dts/fsl/t4240rdb.dts b/arch/powerpc/boot/dts/fsl/t4240rdb.dts
index 8166c660712a..15eb0a3f7290 100644
--- a/arch/powerpc/boot/dts/fsl/t4240rdb.dts
+++ b/arch/powerpc/boot/dts/fsl/t4240rdb.dts
@@ -130,15 +130,15 @@
reg = <0x2f>;
};
eeprom@52 {
- compatible = "at24,24c256";
+ compatible = "atmel,24c256";
reg = <0x52>;
};
eeprom@54 {
- compatible = "at24,24c256";
+ compatible = "atmel,24c256";
reg = <0x54>;
};
eeprom@56 {
- compatible = "at24,24c256";
+ compatible = "atmel,24c256";
reg = <0x56>;
};
rtc@68 {
diff --git a/arch/powerpc/boot/dts/fsp2.dts b/arch/powerpc/boot/dts/fsp2.dts
new file mode 100644
index 000000000000..475953ada707
--- /dev/null
+++ b/arch/powerpc/boot/dts/fsp2.dts
@@ -0,0 +1,608 @@
+/*
+ * Device Tree Source for FSP2
+ *
+ * Copyright 2010,2012 IBM Corp.
+ *
+ * This file is licensed under the terms of the GNU General Public
+ * License version 2. This program is licensed "as is" without
+ * any warranty of any kind, whether express or implied.
+ */
+
+
+/dts-v1/;
+
+/ {
+ #address-cells = <2>;
+ #size-cells = <1>;
+ model = "ibm,fsp2";
+ compatible = "ibm,fsp2";
+ dcr-parent = <&{/cpus/cpu@0}>;
+
+ aliases {
+ ethernet0 = &EMAC0;
+ ethernet1 = &EMAC1;
+ serial0 = &UART0;
+ };
+
+ cpus {
+ #address-cells = <1>;
+ #size-cells = <0>;
+
+ cpu@0 {
+ device_type = "cpu";
+ model = "PowerPC, 476FSP2";
+ reg = <0x0>;
+ clock-frequency = <0>; /* Filled in by cuboot */
+ timebase-frequency = <0>; /* Filled in by cuboot */
+ i-cache-line-size = <32>;
+ d-cache-line-size = <32>;
+ d-cache-size = <32768>;
+ i-cache-size = <32768>;
+ dcr-controller;
+ dcr-access-method = "native";
+ };
+ };
+
+ memory {
+ device_type = "memory";
+ reg = <0x00000000 0x00000000 0x00000000>; /* Filled in by
+ cuboot */
+ };
+
+ clocks {
+ mmc_clk: mmc_clk {
+ compatible = "fixed-clock";
+ clock-frequency = <50000000>;
+ clock-output-names = "mmc_clk";
+ };
+ };
+
+ UIC0: uic0 {
+ #address-cells = <0>;
+ #size-cells = <0>;
+ #interrupt-cells = <2>;
+ compatible = "ibm,uic";
+ interrupt-controller;
+ cell-index = <0>;
+ dcr-reg = <0x2c0 0x8>;
+ };
+
+ /* "interrupts" field is <bit level bit level>
+ first pair is non-critical, second is critical */
+ UIC1_0: uic1_0 {
+ #address-cells = <0>;
+ #size-cells = <0>;
+ #interrupt-cells = <2>;
+
+ compatible = "ibm,uic";
+ interrupt-controller;
+ cell-index = <1>;
+ dcr-reg = <0x2c8 0x8>;
+ interrupt-parent = <&UIC0>;
+ interrupts = <21 0x4 4 0x84>;
+ };
+
+ /* PSI and DMA */
+ UIC1_1: uic1_1 {
+ #address-cells = <0>;
+ #size-cells = <0>;
+ #interrupt-cells = <2>;
+
+ compatible = "ibm,uic";
+ interrupt-controller;
+ cell-index = <2>;
+ dcr-reg = <0x350 0x8>;
+ interrupt-parent = <&UIC0>;
+ interrupts = <22 0x4 5 0x84>;
+ };
+
+ /* Ethernet and USB */
+ UIC1_2: uic1_2 {
+ #address-cells = <0>;
+ #size-cells = <0>;
+ #interrupt-cells = <2>;
+
+ compatible = "ibm,uic";
+ interrupt-controller;
+ cell-index = <3>;
+ dcr-reg = <0x358 0x8>;
+ interrupt-parent = <&UIC0>;
+ interrupts = <23 0x4 6 0x84>;
+ };
+
+ /* PLB Errors */
+ UIC1_3: uic1_3 {
+ #address-cells = <0>;
+ #size-cells = <0>;
+ #interrupt-cells = <2>;
+
+ compatible = "ibm,uic";
+ interrupt-controller;
+ cell-index = <4>;
+ dcr-reg = <0x360 0x8>;
+ interrupt-parent = <&UIC0>;
+ interrupts = <24 0x4 7 0x84>;
+ };
+
+ UIC1_4: uic1_4 {
+ #address-cells = <0>;
+ #size-cells = <0>;
+ #interrupt-cells = <2>;
+
+ compatible = "ibm,uic";
+ interrupt-controller;
+ cell-index = <5>;
+ dcr-reg = <0x368 0x8>;
+ interrupt-parent = <&UIC0>;
+ interrupts = <25 0x4 8 0x84>;
+ };
+
+ UIC1_5: uic1_5 {
+ #address-cells = <0>;
+ #size-cells = <0>;
+ #interrupt-cells = <2>;
+
+ compatible = "ibm,uic";
+ interrupt-controller;
+ cell-index = <6>;
+ dcr-reg = <0x370 0x8>;
+ interrupt-parent = <&UIC0>;
+ interrupts = <26 0x4 9 0x84>;
+ };
+
+ /* 2nd level UICs for FSI */
+ UIC2_0: uic2_0 {
+ #address-cells = <0>;
+ #size-cells = <0>;
+ #interrupt-cells = <2>;
+
+ compatible = "ibm,uic";
+ interrupt-controller;
+ cell-index = <7>;
+ dcr-reg = <0x2d0 0x8>;
+ interrupt-parent = <&UIC1_0>;
+ interrupts = <16 0x4 0 0x84>;
+ };
+
+ UIC2_1: uic2_1 {
+ #address-cells = <0>;
+ #size-cells = <0>;
+ #interrupt-cells = <2>;
+
+ compatible = "ibm,uic";
+ interrupt-controller;
+ cell-index = <8>;
+ dcr-reg = <0x2d8 0x8>;
+ interrupt-parent = <&UIC1_0>;
+ interrupts = <17 0x4 1 0x84>;
+ };
+
+ UIC2_2: uic2_2 {
+ #address-cells = <0>;
+ #size-cells = <0>;
+ #interrupt-cells = <2>;
+
+ compatible = "ibm,uic";
+ interrupt-controller;
+ cell-index = <9>;
+ dcr-reg = <0x2e0 0x8>;
+ interrupt-parent = <&UIC1_0>;
+ interrupts = <18 0x4 2 0x84>;
+ };
+
+ UIC2_3: uic2_3 {
+ #address-cells = <0>;
+ #size-cells = <0>;
+ #interrupt-cells = <2>;
+
+ compatible = "ibm,uic";
+ interrupt-controller;
+ cell-index = <10>;
+ dcr-reg = <0x2e8 0x8>;
+ interrupt-parent = <&UIC1_0>;
+ interrupts = <19 0x4 3 0x84>;
+ };
+
+ UIC2_4: uic2_4 {
+ #address-cells = <0>;
+ #size-cells = <0>;
+ #interrupt-cells = <2>;
+
+ compatible = "ibm,uic";
+ interrupt-controller;
+ cell-index = <11>;
+ dcr-reg = <0x2f0 0x8>;
+ interrupt-parent = <&UIC1_0>;
+ interrupts = <20 0x4 4 0x84>;
+ };
+
+ UIC2_5: uic2_5 {
+ #address-cells = <0>;
+ #size-cells = <0>;
+ #interrupt-cells = <2>;
+
+ compatible = "ibm,uic";
+ interrupt-controller;
+ cell-index = <12>;
+ dcr-reg = <0x2f8 0x8>;
+ interrupt-parent = <&UIC1_0>;
+ interrupts = <21 0x4 5 0x84>;
+ };
+
+ UIC2_6: uic2_6 {
+ #address-cells = <0>;
+ #size-cells = <0>;
+ #interrupt-cells = <2>;
+
+ compatible = "ibm,uic";
+ interrupt-controller;
+ cell-index = <13>;
+ dcr-reg = <0x300 0x8>;
+ interrupt-parent = <&UIC1_0>;
+ interrupts = <22 0x4 6 0x84>;
+ };
+
+ UIC2_7: uic2_7 {
+ #address-cells = <0>;
+ #size-cells = <0>;
+ #interrupt-cells = <2>;
+
+ compatible = "ibm,uic";
+ interrupt-controller;
+ cell-index = <14>;
+ dcr-reg = <0x308 0x8>;
+ interrupt-parent = <&UIC1_0>;
+ interrupts = <23 0x4 7 0x84>;
+ };
+
+ UIC2_8: uic2_8 {
+ #address-cells = <0>;
+ #size-cells = <0>;
+ #interrupt-cells = <2>;
+
+ compatible = "ibm,uic";
+ interrupt-controller;
+ cell-index = <15>;
+ dcr-reg = <0x310 0x8>;
+ interrupt-parent = <&UIC1_0>;
+ interrupts = <24 0x4 8 0x84>;
+ };
+
+ UIC2_9: uic2_9 {
+ #address-cells = <0>;
+ #size-cells = <0>;
+ #interrupt-cells = <2>;
+
+ compatible = "ibm,uic";
+ interrupt-controller;
+ cell-index = <16>;
+ dcr-reg = <0x318 0x8>;
+ interrupt-parent = <&UIC1_0>;
+ interrupts = <25 0x4 9 0x84>;
+ };
+
+ UIC2_10: uic2_10 {
+ #address-cells = <0>;
+ #size-cells = <0>;
+ #interrupt-cells = <2>;
+
+ compatible = "ibm,uic";
+ interrupt-controller;
+ cell-index = <17>;
+ dcr-reg = <0x320 0x8>;
+ interrupt-parent = <&UIC1_0>;
+ interrupts = <26 0x4 10 0x84>;
+ };
+
+ UIC2_11: uic2_11 {
+ #address-cells = <0>;
+ #size-cells = <0>;
+ #interrupt-cells = <2>;
+
+ compatible = "ibm,uic";
+ interrupt-controller;
+ cell-index = <18>;
+ dcr-reg = <0x328 0x8>;
+ interrupt-parent = <&UIC1_0>;
+ interrupts = <27 0x4 11 0x84>;
+ };
+
+ UIC2_12: uic2_12 {
+ #address-cells = <0>;
+ #size-cells = <0>;
+ #interrupt-cells = <2>;
+
+ compatible = "ibm,uic";
+ interrupt-controller;
+ cell-index = <19>;
+ dcr-reg = <0x330 0x8>;
+ interrupt-parent = <&UIC1_0>;
+ interrupts = <28 0x4 12 0x84>;
+ };
+
+ UIC2_13: uic2_13 {
+ #address-cells = <0>;
+ #size-cells = <0>;
+ #interrupt-cells = <2>;
+
+ compatible = "ibm,uic";
+ interrupt-controller;
+ cell-index = <20>;
+ dcr-reg = <0x338 0x8>;
+ interrupt-parent = <&UIC1_0>;
+ interrupts = <29 0x4 13 0x84>;
+ };
+
+ UIC2_14: uic2_14 {
+ #address-cells = <0>;
+ #size-cells = <0>;
+ #interrupt-cells = <2>;
+
+ compatible = "ibm,uic";
+ interrupt-controller;
+ cell-index = <21>;
+ dcr-reg = <0x340 0x8>;
+ interrupt-parent = <&UIC1_0>;
+ interrupts = <30 0x4 14 0x84>;
+ };
+
+ UIC2_15: uic2_15 {
+ #address-cells = <0>;
+ #size-cells = <0>;
+ #interrupt-cells = <2>;
+
+ compatible = "ibm,uic";
+ interrupt-controller;
+ cell-index = <22>;
+ dcr-reg = <0x348 0x8>;
+ interrupt-parent = <&UIC1_0>;
+ interrupts = <31 0x4 15 0x84>;
+ };
+
+ mmc0: sdhci@020c0000 {
+ compatible = "st,sdhci-stih407", "st,sdhci";
+ status = "disabled";
+ reg = <0x020c0000 0x20000>;
+ reg-names = "mmc";
+ interrupt-parent = <&UIC1_3>;
+ interrupts = <21 0x4 22 0x4>;
+ interrupt-names = "mmcirq";
+ pinctrl-names = "default";
+ pinctrl-0 = <>;
+ clock-names = "mmc";
+ clocks = <&mmc_clk>;
+ };
+
+ plb6 {
+ compatible = "ibm,plb6";
+ #address-cells = <2>;
+ #size-cells = <1>;
+ ranges;
+
+ MCW0: memory-controller-wrapper {
+ compatible = "ibm,cw-476fsp2";
+ dcr-reg = <0x11111800 0x40>;
+ };
+
+ MCIF0: memory-controller {
+ compatible = "ibm,sdram-476fsp2", "ibm,sdram-4xx-ddr3";
+ dcr-reg = <0x11120000 0x10000>;
+ mcer-device = <&MCW0>;
+ interrupt-parent = <&UIC0>;
+ interrupts = <10 0x84 /* ECC UE */
+ 11 0x84>; /* ECC CE */
+ };
+ };
+
+ plb4 {
+ compatible = "ibm,plb4";
+ #address-cells = <1>;
+ #size-cells = <1>;
+ ranges = <0x00000000 0x00000010 0x00000000 0x80000000
+ 0x80000000 0x00000010 0x80000000 0x80000000>;
+ clock-frequency = <333333334>;
+
+ plb6-system-hung-irq {
+ compatible = "ibm,bus-error-irq";
+ #interrupt-cells = <2>;
+ interrupt-parent = <&UIC0>;
+ interrupts = <0 0x84>;
+ };
+
+ l2-error-irq {
+ compatible = "ibm,bus-error-irq";
+ #interrupt-cells = <2>;
+ interrupt-parent = <&UIC0>;
+ interrupts = <20 0x84>;
+ };
+
+ plb6-plb4-irq {
+ compatible = "ibm,bus-error-irq";
+ #interrupt-cells = <2>;
+ interrupt-parent = <&UIC0>;
+ interrupts = <1 0x84>;
+ };
+
+ plb4-ahb-irq {
+ compatible = "ibm,bus-error-irq";
+ #interrupt-cells = <2>;
+ interrupt-parent = <&UIC1_3>;
+ interrupts = <20 0x84>;
+ };
+
+ opbd-error-irq {
+ compatible = "ibm,opbd-error-irq";
+ #interrupt-cells = <2>;
+ interrupt-parent = <&UIC1_4>;
+ interrupts = <5 0x84>;
+ };
+
+ cmu-error-irq {
+ compatible = "ibm,cmu-error-irq";
+ #interrupt-cells = <2>;
+ interrupt-parent = <&UIC0>;
+ interrupts = <28 0x84>;
+ };
+
+ conf-error-irq {
+ compatible = "ibm,conf-error-irq";
+ #interrupt-cells = <2>;
+ interrupt-parent = <&UIC1_4>;
+ interrupts = <11 0x84>;
+ };
+
+ mc-ue-irq {
+ compatible = "ibm,mc-ue-irq";
+ #interrupt-cells = <2>;
+ interrupt-parent = <&UIC0>;
+ interrupts = <10 0x84>;
+ };
+
+ reset-warning-irq {
+ compatible = "ibm,reset-warning-irq";
+ #interrupt-cells = <2>;
+ interrupt-parent = <&UIC0>;
+ interrupts = <17 0x84>;
+ };
+
+ MAL0: mcmal0 {
+ #interrupt-cells = <1>;
+ #address-cells = <0>;
+ #size-cells = <0>;
+ compatible = "ibm,mcmal";
+ dcr-reg = <0x80 0x80>;
+ num-tx-chans = <1>;
+ num-rx-chans = <1>;
+ interrupt-parent = <&MAL0>;
+ interrupts = <0 1 2 3 4>;
+ /* index interrupt-parent interrupt# type */
+ interrupt-map = </*TXEOB*/ 0 &UIC1_2 4 0x4
+ /*RXEOB*/ 1 &UIC1_2 3 0x4
+ /*SERR*/ 2 &UIC1_2 7 0x4
+ /*TXDE*/ 3 &UIC1_2 6 0x4
+ /*RXDE*/ 4 &UIC1_2 5 0x4>;
+ };
+
+ MAL1: mcmal1 {
+ #interrupt-cells = <1>;
+ #address-cells = <0>;
+ #size-cells = <0>;
+ compatible = "ibm,mcmal";
+ dcr-reg = <0x100 0x80>;
+ num-tx-chans = <1>;
+ num-rx-chans = <1>;
+ interrupt-parent = <&MAL1>;
+ interrupts = <0 1 2 3 4>;
+ /* index interrupt-parent interrupt# type */
+ interrupt-map = </*TXEOB*/ 0 &UIC1_2 12 0x4
+ /*RXEOB*/ 1 &UIC1_2 11 0x4
+ /*SERR*/ 2 &UIC1_2 15 0x4
+ /*TXDE*/ 3 &UIC1_2 14 0x4
+ /*RXDE*/ 4 &UIC1_2 13 0x4>;
+ };
+
+ opb {
+ compatible = "ibm,opb";
+ #address-cells = <1>;
+ #size-cells = <1>;
+ ranges; // pass-thru to parent bus
+ clock-frequency = <83333334>;
+
+ EMAC0: ethernet@b0000000 {
+ linux,network-index = <0>;
+ device_type = "network";
+ compatible = "ibm,emac4sync";
+ has-inverted-stacr-oc;
+ interrupt-parent = <&UIC1_2>;
+ interrupts = <1 0x4 0 0x4>;
+ reg = <0xb0000000 0x100>;
+ local-mac-address = [000000000000]; /* Filled in by
+ cuboot */
+ mal-device = <&MAL0>;
+ mal-tx-channel = <0>;
+ mal-rx-channel = <0>;
+ cell-index = <0>;
+ max-frame-size = <1500>;
+ rx-fifo-size = <4096>;
+ tx-fifo-size = <4096>;
+ rx-fifo-size-gige = <16384>;
+ tx-fifo-size-gige = <8192>;
+ phy-address = <1>;
+ phy-mode = "rgmii";
+ phy-map = <00000003>;
+ rgmii-device = <&RGMII>;
+ rgmii-channel = <0>;
+ };
+
+ EMAC1: ethernet@b0000100 {
+ linux,network-index = <1>;
+ device_type = "network";
+ compatible = "ibm,emac4sync";
+ has-inverted-stacr-oc;
+ interrupt-parent = <&UIC1_2>;
+ interrupts = <9 0x4 8 0x4>;
+ reg = <0xb0000100 0x100>;
+ local-mac-address = [000000000000]; /* Filled in by
+ cuboot */
+ mal-device = <&MAL1>;
+ mal-tx-channel = <0>;
+ mal-rx-channel = <0>;
+ cell-index = <1>;
+ max-frame-size = <1500>;
+ rx-fifo-size = <4096>;
+ tx-fifo-size = <4096>;
+ rx-fifo-size-gige = <16384>;
+ tx-fifo-size-gige = <8192>;
+ phy-address = <2>;
+ phy-mode = "rgmii";
+ phy-map = <00000003>;
+ rgmii-device = <&RGMII>;
+ rgmii-channel = <1>;
+ };
+
+ RGMII: rgmii@b0000600 {
+ compatible = "ibm,rgmii";
+ has-mdio;
+ reg = <0xb0000600 0x8>;
+ };
+
+ UART0: serial@b0020000 {
+ device_type = "serial";
+ compatible = "ns16550";
+ reg = <0xb0020000 0x8>;
+ virtual-reg = <0xb0020000>;
+ clock-frequency = <20833333>;
+ current-speed = <115200>;
+ interrupt-parent = <&UIC0>;
+ interrupts = <31 0x4>;
+ };
+ };
+
+ OHCI1: ohci@02040000 {
+ compatible = "ohci-le";
+ reg = <0x02040000 0xa0>;
+ interrupt-parent = <&UIC1_3>;
+ interrupts = <28 0x8 29 0x8>;
+ };
+
+ OHCI2: ohci@02080000 {
+ compatible = "ohci-le";
+ reg = <0x02080000 0xa0>;
+ interrupt-parent = <&UIC1_3>;
+ interrupts = <30 0x8 31 0x8>;
+ };
+
+ EHCI: ehci@02000000 {
+ compatible = "usb-ehci";
+ reg = <0x02000000 0xa4>;
+ interrupt-parent = <&UIC1_3>;
+ interrupts = <23 0x4>;
+ };
+
+ };
+
+ chosen {
+ linux,stdout-path = "/plb/opb/serial@b0020000";
+ bootargs = "console=ttyS0,115200 rw log_buf_len=32768 debug";
+ };
+};
diff --git a/arch/powerpc/boot/dts/mpc5121ads.dts b/arch/powerpc/boot/dts/mpc5121ads.dts
index 75888ce2c792..1e81a7e32d18 100644
--- a/arch/powerpc/boot/dts/mpc5121ads.dts
+++ b/arch/powerpc/boot/dts/mpc5121ads.dts
@@ -9,7 +9,7 @@
* option) any later version.
*/
-#include <mpc5121.dtsi>
+#include "mpc5121.dtsi"
/ {
model = "mpc5121ads";
@@ -94,7 +94,7 @@
};
eeprom@50 {
- compatible = "at,24c32";
+ compatible = "atmel,24c32";
reg = <0x50>;
};
diff --git a/arch/powerpc/boot/dts/mpc8308_p1m.dts b/arch/powerpc/boot/dts/mpc8308_p1m.dts
index 57f86cdf9f36..cab933b3957a 100644
--- a/arch/powerpc/boot/dts/mpc8308_p1m.dts
+++ b/arch/powerpc/boot/dts/mpc8308_p1m.dts
@@ -123,7 +123,7 @@
interrupt-parent = <&ipic>;
dfsrr;
fram@50 {
- compatible = "ramtron,24c64";
+ compatible = "ramtron,24c64", "atmel,24c64";
reg = <0x50>;
};
};
diff --git a/arch/powerpc/boot/dts/mpc8349emitx.dts b/arch/powerpc/boot/dts/mpc8349emitx.dts
index 90aed3ac2f69..648a85858eb5 100644
--- a/arch/powerpc/boot/dts/mpc8349emitx.dts
+++ b/arch/powerpc/boot/dts/mpc8349emitx.dts
@@ -92,7 +92,7 @@
dfsrr;
eeprom: at24@50 {
- compatible = "st,24c256";
+ compatible = "st,24c256", "atmel,24c256";
reg = <0x50>;
};
@@ -130,7 +130,7 @@
};
spd: at24@51 {
- compatible = "at24,spd";
+ compatible = "atmel,spd";
reg = <0x51>;
};
diff --git a/arch/powerpc/boot/dts/mpc8377_rdb.dts b/arch/powerpc/boot/dts/mpc8377_rdb.dts
index e32613963ab0..5e85d8c93bca 100644
--- a/arch/powerpc/boot/dts/mpc8377_rdb.dts
+++ b/arch/powerpc/boot/dts/mpc8377_rdb.dts
@@ -150,7 +150,7 @@
};
at24@50 {
- compatible = "at24,24c256";
+ compatible = "atmel,24c256";
reg = <0x50>;
};
diff --git a/arch/powerpc/boot/dts/mpc8377_wlan.dts b/arch/powerpc/boot/dts/mpc8377_wlan.dts
index c0c790168b96..fee15fcbb46f 100644
--- a/arch/powerpc/boot/dts/mpc8377_wlan.dts
+++ b/arch/powerpc/boot/dts/mpc8377_wlan.dts
@@ -135,7 +135,7 @@
dfsrr;
at24@50 {
- compatible = "at24,24c256";
+ compatible = "atmel,24c256";
reg = <0x50>;
};
diff --git a/arch/powerpc/boot/dts/mpc8378_rdb.dts b/arch/powerpc/boot/dts/mpc8378_rdb.dts
index 71842fcd621f..e973d61956b9 100644
--- a/arch/powerpc/boot/dts/mpc8378_rdb.dts
+++ b/arch/powerpc/boot/dts/mpc8378_rdb.dts
@@ -150,7 +150,7 @@
};
at24@50 {
- compatible = "at24,24c256";
+ compatible = "atmel,24c256";
reg = <0x50>;
};
diff --git a/arch/powerpc/boot/dts/mpc8379_rdb.dts b/arch/powerpc/boot/dts/mpc8379_rdb.dts
index e442a29b2fe0..ed5d12ff2ee0 100644
--- a/arch/powerpc/boot/dts/mpc8379_rdb.dts
+++ b/arch/powerpc/boot/dts/mpc8379_rdb.dts
@@ -148,7 +148,7 @@
};
at24@50 {
- compatible = "at24,24c256";
+ compatible = "atmel,24c256";
reg = <0x50>;
};
diff --git a/arch/powerpc/boot/dts/pcm030.dts b/arch/powerpc/boot/dts/pcm030.dts
index 192e66af0001..836e47cc4bed 100644
--- a/arch/powerpc/boot/dts/pcm030.dts
+++ b/arch/powerpc/boot/dts/pcm030.dts
@@ -71,7 +71,7 @@
reg = <0x51>;
};
eeprom@52 {
- compatible = "catalyst,24c32";
+ compatible = "catalyst,24c32", "atmel,24c32";
reg = <0x52>;
pagesize = <32>;
};
diff --git a/arch/powerpc/boot/dts/pcm032.dts b/arch/powerpc/boot/dts/pcm032.dts
index 96b139bf50e9..576249bf2fb9 100644
--- a/arch/powerpc/boot/dts/pcm032.dts
+++ b/arch/powerpc/boot/dts/pcm032.dts
@@ -75,7 +75,7 @@
reg = <0x51>;
};
eeprom@52 {
- compatible = "catalyst,24c32";
+ compatible = "catalyst,24c32", "atmel,24c32";
reg = <0x52>;
pagesize = <32>;
};
diff --git a/arch/powerpc/boot/dts/pdm360ng.dts b/arch/powerpc/boot/dts/pdm360ng.dts
index 0cec7244abe7..445b88114009 100644
--- a/arch/powerpc/boot/dts/pdm360ng.dts
+++ b/arch/powerpc/boot/dts/pdm360ng.dts
@@ -13,7 +13,7 @@
* option) any later version.
*/
-#include <mpc5121.dtsi>
+#include "mpc5121.dtsi"
/ {
model = "pdm360ng";
diff --git a/arch/powerpc/boot/dts/sequoia.dts b/arch/powerpc/boot/dts/sequoia.dts
index b1d329246b08..e41b88a5eaee 100644
--- a/arch/powerpc/boot/dts/sequoia.dts
+++ b/arch/powerpc/boot/dts/sequoia.dts
@@ -229,7 +229,7 @@
};
partition@84000 {
label = "user";
- reg = <0x00000000 0x01f7c000>;
+ reg = <0x00084000 0x01f7c000>;
};
};
};
diff --git a/arch/powerpc/boot/dts/warp.dts b/arch/powerpc/boot/dts/warp.dts
index e576ee85c42f..ea9053ef4819 100644
--- a/arch/powerpc/boot/dts/warp.dts
+++ b/arch/powerpc/boot/dts/warp.dts
@@ -238,7 +238,7 @@
/* This will create 52 and 53 */
at24@52 {
- compatible = "at,24c04";
+ compatible = "atmel,24c04";
reg = <0x52>;
};
};
diff --git a/arch/powerpc/boot/ppc_asm.h b/arch/powerpc/boot/ppc_asm.h
index b03373d8b386..68e388ee94fe 100644
--- a/arch/powerpc/boot/ppc_asm.h
+++ b/arch/powerpc/boot/ppc_asm.h
@@ -67,13 +67,15 @@
#define MSR_LE 0x0000000000000001
#define FIXUP_ENDIAN \
- tdi 0, 0, 0x48; /* Reverse endian of b . + 8 */ \
- b $+36; /* Skip trampoline if endian is good */ \
- .long 0x05009f42; /* bcl 20,31,$+4 */ \
- .long 0xa602487d; /* mflr r10 */ \
- .long 0x1c004a39; /* addi r10,r10,28 */ \
+ tdi 0,0,0x48; /* Reverse endian of b . + 8 */ \
+ b $+44; /* Skip trampoline if endian is good */ \
.long 0xa600607d; /* mfmsr r11 */ \
.long 0x01006b69; /* xori r11,r11,1 */ \
+ .long 0x00004039; /* li r10,0 */ \
+ .long 0x6401417d; /* mtmsrd r10,1 */ \
+ .long 0x05009f42; /* bcl 20,31,$+4 */ \
+ .long 0xa602487d; /* mflr r10 */ \
+ .long 0x14004a39; /* addi r10,r10,20 */ \
.long 0xa6035a7d; /* mtsrr0 r10 */ \
.long 0xa6037b7d; /* mtsrr1 r11 */ \
.long 0x2400004c /* rfid */
diff --git a/arch/powerpc/configs/44x/fsp2_defconfig b/arch/powerpc/configs/44x/fsp2_defconfig
new file mode 100644
index 000000000000..e8e6a6999852
--- /dev/null
+++ b/arch/powerpc/configs/44x/fsp2_defconfig
@@ -0,0 +1,126 @@
+CONFIG_44x=y
+# CONFIG_SWAP is not set
+CONFIG_SYSVIPC=y
+# CONFIG_CROSS_MEMORY_ATTACH is not set
+# CONFIG_FHANDLE is not set
+CONFIG_NO_HZ=y
+CONFIG_HIGH_RES_TIMERS=y
+CONFIG_IKCONFIG=y
+CONFIG_IKCONFIG_PROC=y
+CONFIG_LOG_BUF_SHIFT=16
+CONFIG_BLK_DEV_INITRD=y
+# CONFIG_RD_LZMA is not set
+# CONFIG_RD_XZ is not set
+# CONFIG_RD_LZO is not set
+# CONFIG_RD_LZ4 is not set
+CONFIG_KALLSYMS_ALL=y
+CONFIG_BPF_SYSCALL=y
+CONFIG_EMBEDDED=y
+CONFIG_PROFILING=y
+CONFIG_OPROFILE=y
+CONFIG_MODULES=y
+CONFIG_MODULE_UNLOAD=y
+# CONFIG_BLK_DEV_BSG is not set
+CONFIG_PPC_47x=y
+# CONFIG_EBONY is not set
+CONFIG_FSP2=y
+CONFIG_476FPE_ERR46=y
+CONFIG_SWIOTLB=y
+CONFIG_KEXEC=y
+CONFIG_CRASH_DUMP=y
+CONFIG_CMDLINE_BOOL=y
+CONFIG_CMDLINE="ip=on rw"
+# CONFIG_SUSPEND is not set
+# CONFIG_PCI is not set
+CONFIG_NET=y
+CONFIG_PACKET=y
+CONFIG_UNIX=y
+CONFIG_INET=y
+CONFIG_IP_PNP=y
+CONFIG_IP_PNP_DHCP=y
+CONFIG_IP_PNP_BOOTP=y
+# CONFIG_INET_XFRM_MODE_TRANSPORT is not set
+# CONFIG_INET_XFRM_MODE_TUNNEL is not set
+# CONFIG_INET_XFRM_MODE_BEET is not set
+# CONFIG_IPV6 is not set
+CONFIG_VLAN_8021Q=m
+CONFIG_UEVENT_HELPER_PATH="/sbin/hotplug"
+CONFIG_DEVTMPFS=y
+CONFIG_DEVTMPFS_MOUNT=y
+CONFIG_CONNECTOR=y
+CONFIG_MTD=y
+CONFIG_MTD_BLOCK=y
+CONFIG_MTD_JEDECPROBE=y
+CONFIG_MTD_CFI_AMDSTD=y
+CONFIG_MTD_PHYSMAP_OF=y
+CONFIG_BLK_DEV_RAM=y
+CONFIG_BLK_DEV_RAM_SIZE=35000
+# CONFIG_SCSI_PROC_FS is not set
+CONFIG_BLK_DEV_SD=y
+# CONFIG_SCSI_LOWLEVEL is not set
+CONFIG_ATA=y
+# CONFIG_SATA_PMP is not set
+# CONFIG_ATA_SFF is not set
+CONFIG_NETDEVICES=y
+CONFIG_BONDING=m
+CONFIG_IBM_EMAC=m
+# CONFIG_INPUT is not set
+# CONFIG_SERIO is not set
+# CONFIG_VT is not set
+# CONFIG_LEGACY_PTYS is not set
+# CONFIG_DEVMEM is not set
+CONFIG_SERIAL_8250=y
+CONFIG_SERIAL_8250_CONSOLE=y
+CONFIG_SERIAL_8250_NR_UARTS=32
+CONFIG_SERIAL_8250_RUNTIME_UARTS=32
+CONFIG_SERIAL_8250_EXTENDED=y
+CONFIG_SERIAL_8250_SHARE_IRQ=y
+CONFIG_SERIAL_OF_PLATFORM=y
+# CONFIG_HW_RANDOM is not set
+CONFIG_I2C=y
+CONFIG_I2C_IBM_IIC=y
+CONFIG_PTP_1588_CLOCK=y
+# CONFIG_HWMON is not set
+CONFIG_THERMAL=y
+CONFIG_WATCHDOG=y
+CONFIG_BOOKE_WDT=y
+CONFIG_USB=y
+CONFIG_USB_EHCI_HCD=y
+CONFIG_USB_OHCI_HCD=y
+CONFIG_MMC=y
+CONFIG_MMC_DEBUG=y
+CONFIG_MMC_SDHCI=y
+CONFIG_MMC_SDHCI_PLTFM=y
+CONFIG_MMC_SDHCI_OF_ARASAN=y
+CONFIG_RTC_CLASS=y
+CONFIG_RTC_DRV_M41T80=y
+CONFIG_EXT2_FS=y
+CONFIG_EXT4_FS=y
+CONFIG_EXT4_FS_POSIX_ACL=y
+CONFIG_EXT4_FS_SECURITY=y
+CONFIG_PROC_KCORE=y
+CONFIG_TMPFS=y
+CONFIG_JFFS2_FS=y
+CONFIG_JFFS2_FS_WBUF_VERIFY=y
+CONFIG_JFFS2_SUMMARY=y
+CONFIG_JFFS2_FS_XATTR=y
+CONFIG_CRAMFS=y
+CONFIG_NFS_FS=y
+CONFIG_NFS_V3_ACL=y
+CONFIG_NFS_V4=y
+CONFIG_ROOT_NFS=y
+CONFIG_NLS_DEFAULT="n"
+CONFIG_XZ_DEC=y
+CONFIG_PRINTK_TIME=y
+CONFIG_MESSAGE_LOGLEVEL_DEFAULT=3
+CONFIG_DYNAMIC_DEBUG=y
+CONFIG_DEBUG_INFO=y
+CONFIG_DEBUG_FS=y
+CONFIG_MAGIC_SYSRQ=y
+CONFIG_DETECT_HUNG_TASK=y
+CONFIG_CRYPTO_CBC=y
+CONFIG_CRYPTO_ECB=y
+CONFIG_CRYPTO_PCBC=y
+CONFIG_CRYPTO_MD5=y
+CONFIG_CRYPTO_DES=y
+# CONFIG_CRYPTO_HW is not set
diff --git a/arch/powerpc/include/asm/barrier.h b/arch/powerpc/include/asm/barrier.h
index c0deafc212b8..25d42bd3f114 100644
--- a/arch/powerpc/include/asm/barrier.h
+++ b/arch/powerpc/include/asm/barrier.h
@@ -74,6 +74,11 @@ do { \
___p1; \
})
+/*
+ * This must resolve to hwsync on SMP for the context switch path.
+ * See _switch, and core scheduler context switch memory ordering
+ * comments.
+ */
#define smp_mb__before_spinlock() smp_mb()
#include <asm-generic/barrier.h>
diff --git a/arch/powerpc/include/asm/bitops.h b/arch/powerpc/include/asm/bitops.h
index 33a24fdd7958..b750ffef83c7 100644
--- a/arch/powerpc/include/asm/bitops.h
+++ b/arch/powerpc/include/asm/bitops.h
@@ -206,68 +206,13 @@ static __inline__ void __clear_bit_unlock(int nr, volatile unsigned long *addr)
* Return the zero-based bit position (LE, not IBM bit numbering) of
* the most significant 1-bit in a double word.
*/
-static __inline__ __attribute__((const))
-int __ilog2(unsigned long x)
-{
- int lz;
+#define __ilog2(x) ilog2(x)
- asm (PPC_CNTLZL "%0,%1" : "=r" (lz) : "r" (x));
- return BITS_PER_LONG - 1 - lz;
-}
+#include <asm-generic/bitops/ffz.h>
-static inline __attribute__((const))
-int __ilog2_u32(u32 n)
-{
- int bit;
- asm ("cntlzw %0,%1" : "=r" (bit) : "r" (n));
- return 31 - bit;
-}
+#include <asm-generic/bitops/builtin-__ffs.h>
-#ifdef __powerpc64__
-static inline __attribute__((const))
-int __ilog2_u64(u64 n)
-{
- int bit;
- asm ("cntlzd %0,%1" : "=r" (bit) : "r" (n));
- return 63 - bit;
-}
-#endif
-
-/*
- * Determines the bit position of the least significant 0 bit in the
- * specified double word. The returned bit position will be
- * zero-based, starting from the right side (63/31 - 0).
- */
-static __inline__ unsigned long ffz(unsigned long x)
-{
- /* no zero exists anywhere in the 8 byte area. */
- if ((x = ~x) == 0)
- return BITS_PER_LONG;
-
- /*
- * Calculate the bit position of the least significant '1' bit in x
- * (since x has been changed this will actually be the least significant
- * '0' bit in * the original x). Note: (x & -x) gives us a mask that
- * is the least significant * (RIGHT-most) 1-bit of the value in x.
- */
- return __ilog2(x & -x);
-}
-
-static __inline__ unsigned long __ffs(unsigned long x)
-{
- return __ilog2(x & -x);
-}
-
-/*
- * ffs: find first bit set. This is defined the same way as
- * the libc and compiler builtin ffs routines, therefore
- * differs in spirit from the above ffz (man ffs).
- */
-static __inline__ int ffs(int x)
-{
- unsigned long i = (unsigned long)x;
- return __ilog2(i & -i) + 1;
-}
+#include <asm-generic/bitops/builtin-ffs.h>
/*
* fls: find last (most-significant) bit set.
@@ -275,33 +220,15 @@ static __inline__ int ffs(int x)
*/
static __inline__ int fls(unsigned int x)
{
- int lz;
-
- asm ("cntlzw %0,%1" : "=r" (lz) : "r" (x));
- return 32 - lz;
+ return 32 - __builtin_clz(x);
}
-static __inline__ unsigned long __fls(unsigned long x)
-{
- return __ilog2(x);
-}
+#include <asm-generic/bitops/builtin-__fls.h>
-/*
- * 64-bit can do this using one cntlzd (count leading zeroes doubleword)
- * instruction; for 32-bit we use the generic version, which does two
- * 32-bit fls calls.
- */
-#ifdef __powerpc64__
static __inline__ int fls64(__u64 x)
{
- int lz;
-
- asm ("cntlzd %0,%1" : "=r" (lz) : "r" (x));
- return 64 - lz;
+ return 64 - __builtin_clzll(x);
}
-#else
-#include <asm-generic/bitops/fls64.h>
-#endif /* __powerpc64__ */
#ifdef CONFIG_PPC64
unsigned int __arch_hweight8(unsigned int w);
diff --git a/arch/powerpc/include/asm/book3s/32/pgalloc.h b/arch/powerpc/include/asm/book3s/32/pgalloc.h
index d310546e5d9d..a120e7f8d535 100644
--- a/arch/powerpc/include/asm/book3s/32/pgalloc.h
+++ b/arch/powerpc/include/asm/book3s/32/pgalloc.h
@@ -31,7 +31,8 @@ extern struct kmem_cache *pgtable_cache[];
static inline pgd_t *pgd_alloc(struct mm_struct *mm)
{
- return kmem_cache_alloc(PGT_CACHE(PGD_INDEX_SIZE), GFP_KERNEL);
+ return kmem_cache_alloc(PGT_CACHE(PGD_INDEX_SIZE),
+ pgtable_gfp_flags(mm, GFP_KERNEL));
}
static inline void pgd_free(struct mm_struct *mm, pgd_t *pgd)
diff --git a/arch/powerpc/include/asm/book3s/32/pgtable.h b/arch/powerpc/include/asm/book3s/32/pgtable.h
index 26ed228d4dc6..7fb755880409 100644
--- a/arch/powerpc/include/asm/book3s/32/pgtable.h
+++ b/arch/powerpc/include/asm/book3s/32/pgtable.h
@@ -297,6 +297,8 @@ static inline void __ptep_set_access_flags(struct mm_struct *mm,
extern int get_pteptr(struct mm_struct *mm, unsigned long addr, pte_t **ptep,
pmd_t **pmdp);
+int map_kernel_page(unsigned long va, phys_addr_t pa, int flags);
+
/* Generic accessors to PTE bits */
static inline int pte_write(pte_t pte) { return !!(pte_val(pte) & _PAGE_RW);}
static inline int pte_dirty(pte_t pte) { return !!(pte_val(pte) & _PAGE_DIRTY); }
diff --git a/arch/powerpc/include/asm/book3s/64/hash-4k.h b/arch/powerpc/include/asm/book3s/64/hash-4k.h
index b4b5e6b671ca..0c4e470571ca 100644
--- a/arch/powerpc/include/asm/book3s/64/hash-4k.h
+++ b/arch/powerpc/include/asm/book3s/64/hash-4k.h
@@ -8,7 +8,7 @@
#define H_PTE_INDEX_SIZE 9
#define H_PMD_INDEX_SIZE 7
#define H_PUD_INDEX_SIZE 9
-#define H_PGD_INDEX_SIZE 12
+#define H_PGD_INDEX_SIZE 9
#ifndef __ASSEMBLY__
#define H_PTE_TABLE_SIZE (sizeof(pte_t) << H_PTE_INDEX_SIZE)
diff --git a/arch/powerpc/include/asm/book3s/64/hash.h b/arch/powerpc/include/asm/book3s/64/hash.h
index 4e957b027fe0..0ce513f2926f 100644
--- a/arch/powerpc/include/asm/book3s/64/hash.h
+++ b/arch/powerpc/include/asm/book3s/64/hash.h
@@ -89,6 +89,9 @@ static inline int hash__pgd_bad(pgd_t pgd)
{
return (pgd_val(pgd) == 0);
}
+#ifdef CONFIG_STRICT_KERNEL_RWX
+extern void hash__mark_rodata_ro(void);
+#endif
extern void hpte_need_flush(struct mm_struct *mm, unsigned long addr,
pte_t *ptep, unsigned long pte, int huge);
diff --git a/arch/powerpc/include/asm/book3s/64/hugetlb.h b/arch/powerpc/include/asm/book3s/64/hugetlb.h
index 6666cd366596..5c28bd6f2ae1 100644
--- a/arch/powerpc/include/asm/book3s/64/hugetlb.h
+++ b/arch/powerpc/include/asm/book3s/64/hugetlb.h
@@ -50,4 +50,14 @@ static inline pte_t arch_make_huge_pte(pte_t entry, struct vm_area_struct *vma,
else
return entry;
}
+
+#ifdef CONFIG_ARCH_HAS_GIGANTIC_PAGE
+static inline bool gigantic_page_supported(void)
+{
+ if (radix_enabled())
+ return true;
+ return false;
+}
+#endif
+
#endif
diff --git a/arch/powerpc/include/asm/book3s/64/pgalloc.h b/arch/powerpc/include/asm/book3s/64/pgalloc.h
index cd5e7aa8cc34..20b1485ff1e8 100644
--- a/arch/powerpc/include/asm/book3s/64/pgalloc.h
+++ b/arch/powerpc/include/asm/book3s/64/pgalloc.h
@@ -53,10 +53,11 @@ extern void __tlb_remove_table(void *_table);
static inline pgd_t *radix__pgd_alloc(struct mm_struct *mm)
{
#ifdef CONFIG_PPC_64K_PAGES
- return (pgd_t *)__get_free_page(PGALLOC_GFP);
+ return (pgd_t *)__get_free_page(pgtable_gfp_flags(mm, PGALLOC_GFP));
#else
struct page *page;
- page = alloc_pages(PGALLOC_GFP | __GFP_REPEAT, 4);
+ page = alloc_pages(pgtable_gfp_flags(mm, PGALLOC_GFP | __GFP_REPEAT),
+ 4);
if (!page)
return NULL;
return (pgd_t *) page_address(page);
@@ -76,7 +77,8 @@ static inline pgd_t *pgd_alloc(struct mm_struct *mm)
{
if (radix_enabled())
return radix__pgd_alloc(mm);
- return kmem_cache_alloc(PGT_CACHE(PGD_INDEX_SIZE), GFP_KERNEL);
+ return kmem_cache_alloc(PGT_CACHE(PGD_INDEX_SIZE),
+ pgtable_gfp_flags(mm, GFP_KERNEL));
}
static inline void pgd_free(struct mm_struct *mm, pgd_t *pgd)
@@ -93,7 +95,8 @@ static inline void pgd_populate(struct mm_struct *mm, pgd_t *pgd, pud_t *pud)
static inline pud_t *pud_alloc_one(struct mm_struct *mm, unsigned long addr)
{
- return kmem_cache_alloc(PGT_CACHE(PUD_INDEX_SIZE), GFP_KERNEL);
+ return kmem_cache_alloc(PGT_CACHE(PUD_INDEX_SIZE),
+ pgtable_gfp_flags(mm, GFP_KERNEL));
}
static inline void pud_free(struct mm_struct *mm, pud_t *pud)
@@ -119,7 +122,8 @@ static inline void __pud_free_tlb(struct mmu_gather *tlb, pud_t *pud,
static inline pmd_t *pmd_alloc_one(struct mm_struct *mm, unsigned long addr)
{
- return kmem_cache_alloc(PGT_CACHE(PMD_CACHE_INDEX), GFP_KERNEL);
+ return kmem_cache_alloc(PGT_CACHE(PMD_CACHE_INDEX),
+ pgtable_gfp_flags(mm, GFP_KERNEL));
}
static inline void pmd_free(struct mm_struct *mm, pmd_t *pmd)
@@ -168,7 +172,7 @@ static inline pgtable_t pte_alloc_one(struct mm_struct *mm,
struct page *page;
pte_t *pte;
- pte = pte_alloc_one_kernel(mm, address);
+ pte = (pte_t *)__get_free_page(GFP_KERNEL | __GFP_ZERO | __GFP_ACCOUNT);
if (!pte)
return NULL;
page = virt_to_page(pte);
diff --git a/arch/powerpc/include/asm/book3s/64/pgtable.h b/arch/powerpc/include/asm/book3s/64/pgtable.h
index 85bc9875c3be..c0737c86a362 100644
--- a/arch/powerpc/include/asm/book3s/64/pgtable.h
+++ b/arch/powerpc/include/asm/book3s/64/pgtable.h
@@ -5,6 +5,7 @@
#ifndef __ASSEMBLY__
#include <linux/mmdebug.h>
+#include <linux/bug.h>
#endif
/*
@@ -79,6 +80,9 @@
#define _PAGE_SOFT_DIRTY _RPAGE_SW3 /* software: software dirty tracking */
#define _PAGE_SPECIAL _RPAGE_SW2 /* software: special page */
+#define _PAGE_DEVMAP _RPAGE_SW1 /* software: ZONE_DEVICE page */
+#define __HAVE_ARCH_PTE_DEVMAP
+
/*
* Drivers request for cache inhibited pte mapping using _PAGE_NO_CACHE
* Instead of fixing all of them, add an alternate define which
@@ -599,6 +603,16 @@ static inline pte_t pte_mkhuge(pte_t pte)
return pte;
}
+static inline pte_t pte_mkdevmap(pte_t pte)
+{
+ return __pte(pte_val(pte) | _PAGE_SPECIAL|_PAGE_DEVMAP);
+}
+
+static inline int pte_devmap(pte_t pte)
+{
+ return !!(pte_raw(pte) & cpu_to_be64(_PAGE_DEVMAP));
+}
+
static inline pte_t pte_modify(pte_t pte, pgprot_t newprot)
{
/* FIXME!! check whether this need to be a conditional */
@@ -1146,6 +1160,37 @@ static inline bool arch_needs_pgtable_deposit(void)
return true;
}
+
+static inline pmd_t pmd_mkdevmap(pmd_t pmd)
+{
+ return __pmd(pmd_val(pmd) | (_PAGE_PTE | _PAGE_DEVMAP));
+}
+
+static inline int pmd_devmap(pmd_t pmd)
+{
+ return pte_devmap(pmd_pte(pmd));
+}
+
+static inline int pud_devmap(pud_t pud)
+{
+ return 0;
+}
+
+static inline int pgd_devmap(pgd_t pgd)
+{
+ return 0;
+}
#endif /* CONFIG_TRANSPARENT_HUGEPAGE */
+
+static inline const int pud_pfn(pud_t pud)
+{
+ /*
+ * Currently all calls to pud_pfn() are gated around a pud_devmap()
+ * check so this should never be used. If it grows another user we
+ * want to know about it.
+ */
+ BUILD_BUG();
+ return 0;
+}
#endif /* __ASSEMBLY__ */
#endif /* _ASM_POWERPC_BOOK3S_64_PGTABLE_H_ */
diff --git a/arch/powerpc/include/asm/book3s/64/radix.h b/arch/powerpc/include/asm/book3s/64/radix.h
index ac16d1943022..487709ff6875 100644
--- a/arch/powerpc/include/asm/book3s/64/radix.h
+++ b/arch/powerpc/include/asm/book3s/64/radix.h
@@ -116,6 +116,10 @@
#define RADIX_PUD_TABLE_SIZE (sizeof(pud_t) << RADIX_PUD_INDEX_SIZE)
#define RADIX_PGD_TABLE_SIZE (sizeof(pgd_t) << RADIX_PGD_INDEX_SIZE)
+#ifdef CONFIG_STRICT_KERNEL_RWX
+extern void radix__mark_rodata_ro(void);
+#endif
+
static inline unsigned long __radix_pte_update(pte_t *ptep, unsigned long clr,
unsigned long set)
{
@@ -252,7 +256,7 @@ static inline int radix__pgd_bad(pgd_t pgd)
static inline int radix__pmd_trans_huge(pmd_t pmd)
{
- return !!(pmd_val(pmd) & _PAGE_PTE);
+ return (pmd_val(pmd) & (_PAGE_PTE | _PAGE_DEVMAP)) == _PAGE_PTE;
}
static inline pmd_t radix__pmd_mkhuge(pmd_t pmd)
diff --git a/arch/powerpc/include/asm/bug.h b/arch/powerpc/include/asm/bug.h
index f2c562a0a427..0151af6c2a50 100644
--- a/arch/powerpc/include/asm/bug.h
+++ b/arch/powerpc/include/asm/bug.h
@@ -104,7 +104,7 @@
"1: "PPC_TLNEI" %4,0\n" \
_EMIT_BUG_ENTRY \
: : "i" (__FILE__), "i" (__LINE__), \
- "i" (BUGFLAG_TAINT(TAINT_WARN)), \
+ "i" (BUGFLAG_WARNING|BUGFLAG_TAINT(TAINT_WARN)),\
"i" (sizeof(struct bug_entry)), \
"r" (__ret_warn_on)); \
} \
diff --git a/arch/powerpc/include/asm/code-patching.h b/arch/powerpc/include/asm/code-patching.h
index abef812de7f8..5482928eea1b 100644
--- a/arch/powerpc/include/asm/code-patching.h
+++ b/arch/powerpc/include/asm/code-patching.h
@@ -83,8 +83,16 @@ static inline unsigned long ppc_function_entry(void *func)
* On PPC64 ABIv1 the function pointer actually points to the
* function's descriptor. The first entry in the descriptor is the
* address of the function text.
+ *
+ * However, we may also receive pointer to an assembly symbol. To
+ * detect that, we first check if the function pointer we receive
+ * already points to kernel/module text and we only dereference it
+ * if it doesn't.
*/
- return ((func_descr_t *)func)->entry;
+ if (kernel_text_address((unsigned long)func))
+ return (unsigned long)func;
+ else
+ return ((func_descr_t *)func)->entry;
#else
return (unsigned long)func;
#endif
diff --git a/arch/powerpc/include/asm/compat.h b/arch/powerpc/include/asm/compat.h
index 4f2df589ec1d..f256e1d14a14 100644
--- a/arch/powerpc/include/asm/compat.h
+++ b/arch/powerpc/include/asm/compat.h
@@ -109,7 +109,6 @@ struct compat_statfs {
int f_spare[4];
};
-#define COMPAT_RLIM_OLD_INFINITY 0x7fffffff
#define COMPAT_RLIM_INFINITY 0xffffffff
typedef u32 compat_old_sigset_t;
diff --git a/arch/powerpc/include/asm/cputable.h b/arch/powerpc/include/asm/cputable.h
index c2d509584a98..d02ad93bf708 100644
--- a/arch/powerpc/include/asm/cputable.h
+++ b/arch/powerpc/include/asm/cputable.h
@@ -214,7 +214,6 @@ enum {
#define CPU_FTR_DAWR LONG_ASM_CONST(0x0400000000000000)
#define CPU_FTR_DABRX LONG_ASM_CONST(0x0800000000000000)
#define CPU_FTR_PMAO_BUG LONG_ASM_CONST(0x1000000000000000)
-#define CPU_FTR_SUBCORE LONG_ASM_CONST(0x2000000000000000)
#define CPU_FTR_POWER9_DD1 LONG_ASM_CONST(0x4000000000000000)
#ifndef __ASSEMBLY__
@@ -463,7 +462,7 @@ enum {
CPU_FTR_STCX_CHECKS_ADDRESS | CPU_FTR_POPCNTB | CPU_FTR_POPCNTD | \
CPU_FTR_ICSWX | CPU_FTR_CFAR | CPU_FTR_HVMODE | CPU_FTR_VMX_COPY | \
CPU_FTR_DBELL | CPU_FTR_HAS_PPR | CPU_FTR_DAWR | \
- CPU_FTR_ARCH_207S | CPU_FTR_TM_COMP | CPU_FTR_SUBCORE)
+ CPU_FTR_ARCH_207S | CPU_FTR_TM_COMP)
#define CPU_FTRS_POWER8E (CPU_FTRS_POWER8 | CPU_FTR_PMAO_BUG)
#define CPU_FTRS_POWER8_DD1 (CPU_FTRS_POWER8 & ~CPU_FTR_DBELL)
#define CPU_FTRS_POWER9 (CPU_FTR_USE_TB | CPU_FTR_LWSYNC | \
diff --git a/arch/powerpc/include/asm/dbell.h b/arch/powerpc/include/asm/dbell.h
index f70cbfe0ec04..9f2ae0d25e15 100644
--- a/arch/powerpc/include/asm/dbell.h
+++ b/arch/powerpc/include/asm/dbell.h
@@ -56,6 +56,19 @@ static inline void ppc_msgsync(void)
: : "i" (CPU_FTR_HVMODE|CPU_FTR_ARCH_300));
}
+static inline void _ppc_msgclr(u32 msg)
+{
+ __asm__ __volatile__ (ASM_FTR_IFSET(PPC_MSGCLR(%1), PPC_MSGCLRP(%1), %0)
+ : : "i" (CPU_FTR_HVMODE), "r" (msg));
+}
+
+static inline void ppc_msgclr(enum ppc_dbell type)
+{
+ u32 msg = PPC_DBELL_TYPE(type);
+
+ _ppc_msgclr(msg);
+}
+
#else /* CONFIG_PPC_BOOK3S */
#define PPC_DBELL_MSGTYPE PPC_DBELL
diff --git a/arch/powerpc/include/asm/delay.h b/arch/powerpc/include/asm/delay.h
index 52e4d54da2a9..3df4417dd9c8 100644
--- a/arch/powerpc/include/asm/delay.h
+++ b/arch/powerpc/include/asm/delay.h
@@ -2,6 +2,7 @@
#define _ASM_POWERPC_DELAY_H
#ifdef __KERNEL__
+#include <linux/processor.h>
#include <asm/time.h>
/*
@@ -58,11 +59,18 @@ extern void udelay(unsigned long usecs);
typeof(condition) __ret; \
unsigned long __loops = tb_ticks_per_usec * timeout; \
unsigned long __start = get_tbl(); \
- while (!(__ret = (condition)) && (tb_ticks_since(__start) <= __loops)) \
- if (delay) \
+ \
+ if (delay) { \
+ while (!(__ret = (condition)) && \
+ (tb_ticks_since(__start) <= __loops)) \
udelay(delay); \
- else \
- cpu_relax(); \
+ } else { \
+ spin_begin(); \
+ while (!(__ret = (condition)) && \
+ (tb_ticks_since(__start) <= __loops)) \
+ spin_cpu_relax(); \
+ spin_end(); \
+ } \
if (!__ret) \
__ret = (condition); \
__ret; \
diff --git a/arch/powerpc/include/asm/dma-mapping.h b/arch/powerpc/include/asm/dma-mapping.h
index 181a095468e4..eaece3d3e225 100644
--- a/arch/powerpc/include/asm/dma-mapping.h
+++ b/arch/powerpc/include/asm/dma-mapping.h
@@ -17,10 +17,6 @@
#include <asm/io.h>
#include <asm/swiotlb.h>
-#ifdef CONFIG_PPC64
-#define DMA_ERROR_CODE (~(dma_addr_t)0x0)
-#endif
-
/* Some dma direct funcs must be visible for use in other dma_ops */
extern void *__dma_direct_alloc_coherent(struct device *dev, size_t size,
dma_addr_t *dma_handle, gfp_t flag,
@@ -116,7 +112,6 @@ static inline void set_dma_offset(struct device *dev, dma_addr_t off)
#define HAVE_ARCH_DMA_SET_MASK 1
extern int dma_set_mask(struct device *dev, u64 dma_mask);
-extern int __dma_set_mask(struct device *dev, u64 dma_mask);
extern u64 __dma_get_required_mask(struct device *dev);
static inline bool dma_capable(struct device *dev, dma_addr_t addr, size_t size)
diff --git a/arch/powerpc/include/asm/exception-64s.h b/arch/powerpc/include/asm/exception-64s.h
index 183d73b6ed99..9a318973af05 100644
--- a/arch/powerpc/include/asm/exception-64s.h
+++ b/arch/powerpc/include/asm/exception-64s.h
@@ -36,20 +36,38 @@
*/
#include <asm/head-64.h>
+/* PACA save area offsets (exgen, exmc, etc) */
#define EX_R9 0
#define EX_R10 8
#define EX_R11 16
#define EX_R12 24
#define EX_R13 32
-#define EX_SRR0 40
-#define EX_DAR 48
-#define EX_DSISR 56
-#define EX_CCR 60
-#define EX_R3 64
-#define EX_LR 72
-#define EX_CFAR 80
-#define EX_PPR 88 /* SMT thread status register (priority) */
-#define EX_CTR 96
+#define EX_DAR 40
+#define EX_DSISR 48
+#define EX_CCR 52
+#define EX_CFAR 56
+#define EX_PPR 64
+#if defined(CONFIG_RELOCATABLE)
+#define EX_CTR 72
+#define EX_SIZE 10 /* size in u64 units */
+#else
+#define EX_SIZE 9 /* size in u64 units */
+#endif
+
+/*
+ * EX_LR is only used in EXSLB and where it does not overlap with EX_DAR
+ * EX_CCR similarly with DSISR, but being 4 byte registers there is a hole
+ * in the save area so it's not necessary to overlap them. Could be used
+ * for future savings though if another 4 byte register was to be saved.
+ */
+#define EX_LR EX_DAR
+
+/*
+ * EX_R3 is only used by the bad_stack handler. bad_stack reloads and
+ * saves DAR from SPRN_DAR, and EX_DAR is not used. So EX_R3 can overlap
+ * with EX_DAR.
+ */
+#define EX_R3 EX_DAR
#ifdef CONFIG_RELOCATABLE
#define __EXCEPTION_RELON_PROLOG_PSERIES_1(label, h) \
@@ -236,6 +254,19 @@ END_FTR_SECTION_NESTED(ftr,ftr,943)
#define kvmppc_interrupt kvmppc_interrupt_pr
#endif
+/*
+ * Branch to label using its 0xC000 address. This results in instruction
+ * address suitable for MSR[IR]=0 or 1, which allows relocation to be turned
+ * on using mtmsr rather than rfid.
+ *
+ * This could set the 0xc bits for !RELOCATABLE as an immediate, rather than
+ * load KBASE for a slight optimisation.
+ */
+#define BRANCH_TO_C000(reg, label) \
+ __LOAD_HANDLER(reg, label); \
+ mtctr reg; \
+ bctr
+
#ifdef CONFIG_RELOCATABLE
#define BRANCH_TO_COMMON(reg, label) \
__LOAD_HANDLER(reg, label); \
diff --git a/arch/powerpc/include/asm/fadump.h b/arch/powerpc/include/asm/fadump.h
index 60b91084f33c..ce88bbe1d809 100644
--- a/arch/powerpc/include/asm/fadump.h
+++ b/arch/powerpc/include/asm/fadump.h
@@ -43,6 +43,9 @@
#define MIN_BOOT_MEM (((RMA_END < (0x1UL << 28)) ? (0x1UL << 28) : RMA_END) \
+ (0x1UL << 26))
+/* The upper limit percentage for user specified boot memory size (25%) */
+#define MAX_BOOT_MEM_RATIO 4
+
#define memblock_num_regions(memblock_type) (memblock.memblock_type.cnt)
/* Firmware provided dump sections */
@@ -200,6 +203,7 @@ struct fad_crash_memory_ranges {
unsigned long long size;
};
+extern int is_fadump_boot_memory_area(u64 addr, ulong size);
extern int early_init_dt_scan_fw_dump(unsigned long node,
const char *uname, int depth, void *data);
extern int fadump_reserve_mem(void);
diff --git a/arch/powerpc/include/asm/head-64.h b/arch/powerpc/include/asm/head-64.h
index 86eb87382031..d81eac5b509f 100644
--- a/arch/powerpc/include/asm/head-64.h
+++ b/arch/powerpc/include/asm/head-64.h
@@ -3,6 +3,7 @@
#include <asm/cache.h>
+#ifdef __ASSEMBLY__
/*
* We can't do CPP stringification and concatination directly into the section
* name for some reason, so these macros can do it for us.
@@ -49,8 +50,8 @@
* CLOSE_FIXED_SECTION() or elsewhere, there may be something
* unexpected being added there. Remove the '. = x_len' line, rebuild, and
* check what is pushing the section down.
- * - If the build dies in linking, check arch/powerpc/kernel/vmlinux.lds.S
- * for instructions.
+ * - If the build dies in linking, check arch/powerpc/tools/head_check.sh
+ * comments.
* - If the kernel crashes or hangs in very early boot, it could be linker
* stubs at the start of the main text.
*/
@@ -63,11 +64,29 @@
. = 0x0; \
start_##sname:
+/*
+ * .linker_stub_catch section is used to catch linker stubs from being
+ * inserted in our .text section, above the start_text label (which breaks
+ * the ABS_ADDR calculation). See kernel/vmlinux.lds.S and tools/head_check.sh
+ * for more details. We would prefer to just keep a cacheline (0x80), but
+ * 0x100 seems to be how the linker aligns branch stub groups.
+ */
+#ifdef CONFIG_LD_HEAD_STUB_CATCH
+#define OPEN_TEXT_SECTION(start) \
+ .section ".linker_stub_catch","ax",@progbits; \
+linker_stub_catch: \
+ . = 0x4; \
+ text_start = (start) + 0x100; \
+ .section ".text","ax",@progbits; \
+ .balign 0x100; \
+start_text:
+#else
#define OPEN_TEXT_SECTION(start) \
text_start = (start); \
.section ".text","ax",@progbits; \
. = 0x0; \
start_text:
+#endif
#define ZERO_FIXED_SECTION(sname, start, end) \
sname##_start = (start); \
@@ -397,4 +416,6 @@ name:
EXC_COMMON_BEGIN(name); \
STD_EXCEPTION_COMMON(realvec + 0x2, name, hdlr); \
+#endif /* __ASSEMBLY__ */
+
#endif /* _ASM_POWERPC_HEAD_64_H */
diff --git a/arch/powerpc/include/asm/hvcall.h b/arch/powerpc/include/asm/hvcall.h
index d73755fafbb0..57d38b504ff7 100644
--- a/arch/powerpc/include/asm/hvcall.h
+++ b/arch/powerpc/include/asm/hvcall.h
@@ -295,6 +295,8 @@
#define H_DISABLE_ALL_VIO_INTS 0x0A
#define H_DISABLE_VIO_INTERRUPT 0x0B
#define H_ENABLE_VIO_INTERRUPT 0x0C
+#define H_GET_SESSION_TOKEN 0x19
+#define H_SESSION_ERR_DETECTED 0x1A
/* Platform specific hcalls, used by KVM */
diff --git a/arch/powerpc/include/asm/hw_irq.h b/arch/powerpc/include/asm/hw_irq.h
index eba60416536e..c1dd1929342d 100644
--- a/arch/powerpc/include/asm/hw_irq.h
+++ b/arch/powerpc/include/asm/hw_irq.h
@@ -129,6 +129,10 @@ static inline bool arch_irq_disabled_regs(struct pt_regs *regs)
}
extern bool prep_irq_for_idle(void);
+extern bool prep_irq_for_idle_irqsoff(void);
+extern void irq_set_pending_from_srr1(unsigned long srr1);
+
+#define fini_irq_for_idle_irqsoff() trace_hardirqs_off();
extern void force_external_irq_replay(void);
diff --git a/arch/powerpc/include/asm/iommu.h b/arch/powerpc/include/asm/iommu.h
index 8a8ce220d7d0..20febe0b7f32 100644
--- a/arch/powerpc/include/asm/iommu.h
+++ b/arch/powerpc/include/asm/iommu.h
@@ -139,6 +139,8 @@ struct scatterlist;
#ifdef CONFIG_PPC64
+#define IOMMU_MAPPING_ERROR (~(dma_addr_t)0x0)
+
static inline void set_iommu_table_base(struct device *dev,
struct iommu_table *base)
{
@@ -238,6 +240,8 @@ static inline int __init tce_iommu_bus_notifier_init(void)
}
#endif /* !CONFIG_IOMMU_API */
+int dma_iommu_mapping_error(struct device *dev, dma_addr_t dma_addr);
+
#else
static inline void *get_iommu_table_base(struct device *dev)
diff --git a/arch/powerpc/include/asm/kprobes.h b/arch/powerpc/include/asm/kprobes.h
index a83821f33ea3..8814a7249ceb 100644
--- a/arch/powerpc/include/asm/kprobes.h
+++ b/arch/powerpc/include/asm/kprobes.h
@@ -103,6 +103,7 @@ extern int kprobe_exceptions_notify(struct notifier_block *self,
extern int kprobe_fault_handler(struct pt_regs *regs, int trapnr);
extern int kprobe_handler(struct pt_regs *regs);
extern int kprobe_post_handler(struct pt_regs *regs);
+extern int is_current_kprobe_addr(unsigned long addr);
#ifdef CONFIG_KPROBES_ON_FTRACE
extern int skip_singlestep(struct kprobe *p, struct pt_regs *regs,
struct kprobe_ctlblk *kcb);
diff --git a/arch/powerpc/include/asm/kvm_book3s.h b/arch/powerpc/include/asm/kvm_book3s.h
index 2bf35017ffc0..b8d5b8e35244 100644
--- a/arch/powerpc/include/asm/kvm_book3s.h
+++ b/arch/powerpc/include/asm/kvm_book3s.h
@@ -86,7 +86,6 @@ struct kvmppc_vcore {
u16 last_cpu;
u8 vcore_state;
u8 in_guest;
- struct kvmppc_vcore *master_vcore;
struct kvm_vcpu *runnable_threads[MAX_SMT_THREADS];
struct list_head preempt_list;
spinlock_t lock;
diff --git a/arch/powerpc/include/asm/kvm_book3s_asm.h b/arch/powerpc/include/asm/kvm_book3s_asm.h
index b148496ffe36..7cea76f11c26 100644
--- a/arch/powerpc/include/asm/kvm_book3s_asm.h
+++ b/arch/powerpc/include/asm/kvm_book3s_asm.h
@@ -81,7 +81,7 @@ struct kvm_split_mode {
u8 subcore_size;
u8 do_nap;
u8 napped[MAX_SMT_THREADS];
- struct kvmppc_vcore *master_vcs[MAX_SUBCORES];
+ struct kvmppc_vcore *vc[MAX_SUBCORES];
};
/*
diff --git a/arch/powerpc/include/asm/kvm_host.h b/arch/powerpc/include/asm/kvm_host.h
index 9c51ac4b8f36..8b3f1238d07f 100644
--- a/arch/powerpc/include/asm/kvm_host.h
+++ b/arch/powerpc/include/asm/kvm_host.h
@@ -35,6 +35,7 @@
#include <asm/page.h>
#include <asm/cacheflush.h>
#include <asm/hvcall.h>
+#include <asm/mce.h>
#define KVM_MAX_VCPUS NR_CPUS
#define KVM_MAX_VCORES NR_CPUS
@@ -52,8 +53,8 @@
#define KVM_IRQCHIP_NUM_PINS 256
/* PPC-specific vcpu->requests bit members */
-#define KVM_REQ_WATCHDOG 8
-#define KVM_REQ_EPR_EXIT 9
+#define KVM_REQ_WATCHDOG KVM_ARCH_REQ(0)
+#define KVM_REQ_EPR_EXIT KVM_ARCH_REQ(1)
#include <linux/mmu_notifier.h>
@@ -267,6 +268,8 @@ struct kvm_resize_hpt;
struct kvm_arch {
unsigned int lpid;
+ unsigned int smt_mode; /* # vcpus per virtual core */
+ unsigned int emul_smt_mode; /* emualted SMT mode, on P9 */
#ifdef CONFIG_KVM_BOOK3S_HV_POSSIBLE
unsigned int tlb_sets;
struct kvm_hpt_info hpt;
@@ -285,6 +288,7 @@ struct kvm_arch {
cpumask_t need_tlb_flush;
cpumask_t cpu_in_guest;
u8 radix;
+ u8 fwnmi_enabled;
pgd_t *pgtable;
u64 process_table;
struct dentry *debugfs_dir;
@@ -566,6 +570,7 @@ struct kvm_vcpu_arch {
ulong wort;
ulong tid;
ulong psscr;
+ ulong hfscr;
ulong shadow_srr1;
#endif
u32 vrsave; /* also USPRG0 */
@@ -579,7 +584,7 @@ struct kvm_vcpu_arch {
ulong mcsrr0;
ulong mcsrr1;
ulong mcsr;
- u32 dec;
+ ulong dec;
#ifdef CONFIG_BOOKE
u32 decar;
#endif
@@ -710,6 +715,7 @@ struct kvm_vcpu_arch {
unsigned long pending_exceptions;
u8 ceded;
u8 prodded;
+ u8 doorbell_request;
u32 last_inst;
struct swait_queue_head *wqp;
@@ -722,6 +728,7 @@ struct kvm_vcpu_arch {
int prev_cpu;
bool timer_running;
wait_queue_head_t cpu_run;
+ struct machine_check_event mce_evt; /* Valid if trap == 0x200 */
struct kvm_vcpu_arch_shared *shared;
#if defined(CONFIG_PPC_BOOK3S_64) && defined(CONFIG_KVM_BOOK3S_PR_POSSIBLE)
diff --git a/arch/powerpc/include/asm/kvm_ppc.h b/arch/powerpc/include/asm/kvm_ppc.h
index e0d88c38602b..ba5fadd6f3c9 100644
--- a/arch/powerpc/include/asm/kvm_ppc.h
+++ b/arch/powerpc/include/asm/kvm_ppc.h
@@ -315,6 +315,8 @@ struct kvmppc_ops {
struct irq_bypass_producer *);
int (*configure_mmu)(struct kvm *kvm, struct kvm_ppc_mmuv3_cfg *cfg);
int (*get_rmmu_info)(struct kvm *kvm, struct kvm_ppc_rmmu_info *info);
+ int (*set_smt_mode)(struct kvm *kvm, unsigned long mode,
+ unsigned long flags);
};
extern struct kvmppc_ops *kvmppc_hv_ops;
diff --git a/arch/powerpc/include/asm/machdep.h b/arch/powerpc/include/asm/machdep.h
index f90b22c722e1..cd2fc1cc1cc7 100644
--- a/arch/powerpc/include/asm/machdep.h
+++ b/arch/powerpc/include/asm/machdep.h
@@ -226,6 +226,7 @@ struct machdep_calls {
extern void e500_idle(void);
extern void power4_idle(void);
extern void power7_idle(void);
+extern void power9_idle(void);
extern void ppc6xx_idle(void);
extern void book3e_idle(void);
diff --git a/arch/powerpc/include/asm/mce.h b/arch/powerpc/include/asm/mce.h
index 81eff8631434..190d69a7f701 100644
--- a/arch/powerpc/include/asm/mce.h
+++ b/arch/powerpc/include/asm/mce.h
@@ -90,13 +90,14 @@ enum MCE_UserErrorType {
enum MCE_RaErrorType {
MCE_RA_ERROR_INDETERMINATE = 0,
MCE_RA_ERROR_IFETCH = 1,
- MCE_RA_ERROR_PAGE_TABLE_WALK_IFETCH = 2,
- MCE_RA_ERROR_PAGE_TABLE_WALK_IFETCH_FOREIGN = 3,
- MCE_RA_ERROR_LOAD = 4,
- MCE_RA_ERROR_STORE = 5,
- MCE_RA_ERROR_PAGE_TABLE_WALK_LOAD_STORE = 6,
- MCE_RA_ERROR_PAGE_TABLE_WALK_LOAD_STORE_FOREIGN = 7,
- MCE_RA_ERROR_LOAD_STORE_FOREIGN = 8,
+ MCE_RA_ERROR_IFETCH_FOREIGN = 2,
+ MCE_RA_ERROR_PAGE_TABLE_WALK_IFETCH = 3,
+ MCE_RA_ERROR_PAGE_TABLE_WALK_IFETCH_FOREIGN = 4,
+ MCE_RA_ERROR_LOAD = 5,
+ MCE_RA_ERROR_STORE = 6,
+ MCE_RA_ERROR_PAGE_TABLE_WALK_LOAD_STORE = 7,
+ MCE_RA_ERROR_PAGE_TABLE_WALK_LOAD_STORE_FOREIGN = 8,
+ MCE_RA_ERROR_LOAD_STORE_FOREIGN = 9,
};
enum MCE_LinkErrorType {
diff --git a/arch/powerpc/include/asm/nohash/32/pgalloc.h b/arch/powerpc/include/asm/nohash/32/pgalloc.h
index 633139291a48..cc369a70f2bb 100644
--- a/arch/powerpc/include/asm/nohash/32/pgalloc.h
+++ b/arch/powerpc/include/asm/nohash/32/pgalloc.h
@@ -31,7 +31,8 @@ extern struct kmem_cache *pgtable_cache[];
static inline pgd_t *pgd_alloc(struct mm_struct *mm)
{
- return kmem_cache_alloc(PGT_CACHE(PGD_INDEX_SIZE), GFP_KERNEL);
+ return kmem_cache_alloc(PGT_CACHE(PGD_INDEX_SIZE),
+ pgtable_gfp_flags(mm, GFP_KERNEL));
}
static inline void pgd_free(struct mm_struct *mm, pgd_t *pgd)
diff --git a/arch/powerpc/include/asm/nohash/32/pgtable.h b/arch/powerpc/include/asm/nohash/32/pgtable.h
index 5134ade2e850..91314268f04f 100644
--- a/arch/powerpc/include/asm/nohash/32/pgtable.h
+++ b/arch/powerpc/include/asm/nohash/32/pgtable.h
@@ -340,6 +340,8 @@ static inline void __ptep_set_access_flags(struct mm_struct *mm,
extern int get_pteptr(struct mm_struct *mm, unsigned long addr, pte_t **ptep,
pmd_t **pmdp);
+int map_kernel_page(unsigned long va, phys_addr_t pa, int flags);
+
#endif /* !__ASSEMBLY__ */
#endif /* __ASM_POWERPC_NOHASH_32_PGTABLE_H */
diff --git a/arch/powerpc/include/asm/nohash/64/pgalloc.h b/arch/powerpc/include/asm/nohash/64/pgalloc.h
index 897d2e1c8a9b..9721c7867b9c 100644
--- a/arch/powerpc/include/asm/nohash/64/pgalloc.h
+++ b/arch/powerpc/include/asm/nohash/64/pgalloc.h
@@ -43,7 +43,8 @@ extern struct kmem_cache *pgtable_cache[];
static inline pgd_t *pgd_alloc(struct mm_struct *mm)
{
- return kmem_cache_alloc(PGT_CACHE(PGD_INDEX_SIZE), GFP_KERNEL);
+ return kmem_cache_alloc(PGT_CACHE(PGD_INDEX_SIZE),
+ pgtable_gfp_flags(mm, GFP_KERNEL));
}
static inline void pgd_free(struct mm_struct *mm, pgd_t *pgd)
@@ -57,7 +58,8 @@ static inline void pgd_free(struct mm_struct *mm, pgd_t *pgd)
static inline pud_t *pud_alloc_one(struct mm_struct *mm, unsigned long addr)
{
- return kmem_cache_alloc(PGT_CACHE(PUD_INDEX_SIZE), GFP_KERNEL);
+ return kmem_cache_alloc(PGT_CACHE(PUD_INDEX_SIZE),
+ pgtable_gfp_flags(mm, GFP_KERNEL));
}
static inline void pud_free(struct mm_struct *mm, pud_t *pud)
@@ -96,7 +98,7 @@ static inline pgtable_t pte_alloc_one(struct mm_struct *mm,
struct page *page;
pte_t *pte;
- pte = pte_alloc_one_kernel(mm, address);
+ pte = (pte_t *)__get_free_page(GFP_KERNEL | __GFP_ZERO | __GFP_ACCOUNT);
if (!pte)
return NULL;
page = virt_to_page(pte);
@@ -189,7 +191,8 @@ static inline void __pte_free_tlb(struct mmu_gather *tlb, pgtable_t table,
static inline pmd_t *pmd_alloc_one(struct mm_struct *mm, unsigned long addr)
{
- return kmem_cache_alloc(PGT_CACHE(PMD_CACHE_INDEX), GFP_KERNEL);
+ return kmem_cache_alloc(PGT_CACHE(PMD_CACHE_INDEX),
+ pgtable_gfp_flags(mm, GFP_KERNEL));
}
static inline void pmd_free(struct mm_struct *mm, pmd_t *pmd)
diff --git a/arch/powerpc/include/asm/opal-api.h b/arch/powerpc/include/asm/opal-api.h
index cb3e6242a78c..ef930ba500f9 100644
--- a/arch/powerpc/include/asm/opal-api.h
+++ b/arch/powerpc/include/asm/opal-api.h
@@ -667,12 +667,14 @@ enum {
enum {
OPAL_PHB_ERROR_DATA_TYPE_P7IOC = 1,
- OPAL_PHB_ERROR_DATA_TYPE_PHB3 = 2
+ OPAL_PHB_ERROR_DATA_TYPE_PHB3 = 2,
+ OPAL_PHB_ERROR_DATA_TYPE_PHB4 = 3
};
enum {
OPAL_P7IOC_NUM_PEST_REGS = 128,
- OPAL_PHB3_NUM_PEST_REGS = 256
+ OPAL_PHB3_NUM_PEST_REGS = 256,
+ OPAL_PHB4_NUM_PEST_REGS = 512
};
struct OpalIoPhbErrorCommon {
@@ -802,6 +804,75 @@ struct OpalIoPhb3ErrorData {
__be64 pestB[OPAL_PHB3_NUM_PEST_REGS];
};
+struct OpalIoPhb4ErrorData {
+ struct OpalIoPhbErrorCommon common;
+
+ __be32 brdgCtl;
+
+ /* PHB4 cfg regs */
+ __be32 deviceStatus;
+ __be32 slotStatus;
+ __be32 linkStatus;
+ __be32 devCmdStatus;
+ __be32 devSecStatus;
+
+ /* cfg AER regs */
+ __be32 rootErrorStatus;
+ __be32 uncorrErrorStatus;
+ __be32 corrErrorStatus;
+ __be32 tlpHdr1;
+ __be32 tlpHdr2;
+ __be32 tlpHdr3;
+ __be32 tlpHdr4;
+ __be32 sourceId;
+
+ /* PHB4 ETU Error Regs */
+ __be64 nFir; /* 000 */
+ __be64 nFirMask; /* 003 */
+ __be64 nFirWOF; /* 008 */
+ __be64 phbPlssr; /* 120 */
+ __be64 phbCsr; /* 110 */
+ __be64 lemFir; /* C00 */
+ __be64 lemErrorMask; /* C18 */
+ __be64 lemWOF; /* C40 */
+ __be64 phbErrorStatus; /* C80 */
+ __be64 phbFirstErrorStatus; /* C88 */
+ __be64 phbErrorLog0; /* CC0 */
+ __be64 phbErrorLog1; /* CC8 */
+ __be64 phbTxeErrorStatus; /* D00 */
+ __be64 phbTxeFirstErrorStatus; /* D08 */
+ __be64 phbTxeErrorLog0; /* D40 */
+ __be64 phbTxeErrorLog1; /* D48 */
+ __be64 phbRxeArbErrorStatus; /* D80 */
+ __be64 phbRxeArbFirstErrorStatus; /* D88 */
+ __be64 phbRxeArbErrorLog0; /* DC0 */
+ __be64 phbRxeArbErrorLog1; /* DC8 */
+ __be64 phbRxeMrgErrorStatus; /* E00 */
+ __be64 phbRxeMrgFirstErrorStatus; /* E08 */
+ __be64 phbRxeMrgErrorLog0; /* E40 */
+ __be64 phbRxeMrgErrorLog1; /* E48 */
+ __be64 phbRxeTceErrorStatus; /* E80 */
+ __be64 phbRxeTceFirstErrorStatus; /* E88 */
+ __be64 phbRxeTceErrorLog0; /* EC0 */
+ __be64 phbRxeTceErrorLog1; /* EC8 */
+
+ /* PHB4 REGB Error Regs */
+ __be64 phbPblErrorStatus; /* 1900 */
+ __be64 phbPblFirstErrorStatus; /* 1908 */
+ __be64 phbPblErrorLog0; /* 1940 */
+ __be64 phbPblErrorLog1; /* 1948 */
+ __be64 phbPcieDlpErrorLog1; /* 1AA0 */
+ __be64 phbPcieDlpErrorLog2; /* 1AA8 */
+ __be64 phbPcieDlpErrorStatus; /* 1AB0 */
+ __be64 phbRegbErrorStatus; /* 1C00 */
+ __be64 phbRegbFirstErrorStatus; /* 1C08 */
+ __be64 phbRegbErrorLog0; /* 1C40 */
+ __be64 phbRegbErrorLog1; /* 1C48 */
+
+ __be64 pestA[OPAL_PHB4_NUM_PEST_REGS];
+ __be64 pestB[OPAL_PHB4_NUM_PEST_REGS];
+};
+
enum {
OPAL_REINIT_CPUS_HILE_BE = (1 << 0),
OPAL_REINIT_CPUS_HILE_LE = (1 << 1),
@@ -877,6 +948,7 @@ enum {
OPAL_PHB_CAPI_MODE_SNOOP_OFF = 2,
OPAL_PHB_CAPI_MODE_SNOOP_ON = 3,
OPAL_PHB_CAPI_MODE_DMA = 4,
+ OPAL_PHB_CAPI_MODE_DMA_TVT1 = 5,
};
/* OPAL I2C request */
diff --git a/arch/powerpc/include/asm/paca.h b/arch/powerpc/include/asm/paca.h
index 1c09f8fe2ee8..dc88a31cc79a 100644
--- a/arch/powerpc/include/asm/paca.h
+++ b/arch/powerpc/include/asm/paca.h
@@ -21,7 +21,11 @@
#include <asm/lppaca.h>
#include <asm/mmu.h>
#include <asm/page.h>
+#ifdef CONFIG_PPC_BOOK3E
#include <asm/exception-64e.h>
+#else
+#include <asm/exception-64s.h>
+#endif
#ifdef CONFIG_KVM_BOOK3S_64_HANDLER
#include <asm/kvm_book3s_asm.h>
#endif
@@ -98,8 +102,8 @@ struct paca_struct {
* Now, starting in cacheline 2, the exception save areas
*/
/* used for most interrupts/exceptions */
- u64 exgen[13] __attribute__((aligned(0x80)));
- u64 exslb[13]; /* used for SLB/segment table misses
+ u64 exgen[EX_SIZE] __attribute__((aligned(0x80)));
+ u64 exslb[EX_SIZE]; /* used for SLB/segment table misses
* on the linear mapping */
/* SLB related definitions */
u16 vmalloc_sllp;
@@ -177,12 +181,14 @@ struct paca_struct {
* to the sibling threads' paca.
*/
struct paca_struct **thread_sibling_pacas;
+ /* The PSSCR value that the kernel requested before going to stop */
+ u64 requested_psscr;
#endif
#ifdef CONFIG_PPC_STD_MMU_64
/* Non-maskable exceptions that are not performance critical */
- u64 exnmi[13]; /* used for system reset (nmi) */
- u64 exmc[13]; /* used for machine checks */
+ u64 exnmi[EX_SIZE]; /* used for system reset (nmi) */
+ u64 exmc[EX_SIZE]; /* used for machine checks */
#endif
#ifdef CONFIG_PPC_BOOK3S_64
/* Exclusive stacks for system reset and machine check exception. */
diff --git a/arch/powerpc/include/asm/pgalloc.h b/arch/powerpc/include/asm/pgalloc.h
index 0413457ba11d..d795c5d5789c 100644
--- a/arch/powerpc/include/asm/pgalloc.h
+++ b/arch/powerpc/include/asm/pgalloc.h
@@ -3,6 +3,20 @@
#include <linux/mm.h>
+#ifndef MODULE
+static inline gfp_t pgtable_gfp_flags(struct mm_struct *mm, gfp_t gfp)
+{
+ if (unlikely(mm == &init_mm))
+ return gfp;
+ return gfp | __GFP_ACCOUNT;
+}
+#else /* !MODULE */
+static inline gfp_t pgtable_gfp_flags(struct mm_struct *mm, gfp_t gfp)
+{
+ return gfp | __GFP_ACCOUNT;
+}
+#endif /* MODULE */
+
#ifdef CONFIG_PPC_BOOK3S
#include <asm/book3s/pgalloc.h>
#else
diff --git a/arch/powerpc/include/asm/ppc-opcode.h b/arch/powerpc/include/asm/ppc-opcode.h
index 3a8d278e7421..fa9ebaead91e 100644
--- a/arch/powerpc/include/asm/ppc-opcode.h
+++ b/arch/powerpc/include/asm/ppc-opcode.h
@@ -103,6 +103,8 @@
#define OP_31_XOP_STBUX 247
#define OP_31_XOP_LHZX 279
#define OP_31_XOP_LHZUX 311
+#define OP_31_XOP_MSGSNDP 142
+#define OP_31_XOP_MSGCLRP 174
#define OP_31_XOP_MFSPR 339
#define OP_31_XOP_LWAX 341
#define OP_31_XOP_LHAX 343
@@ -189,8 +191,7 @@
/* sorted alphabetically */
#define PPC_INST_BHRBE 0x7c00025c
#define PPC_INST_CLRBHRB 0x7c00035c
-#define PPC_INST_COPY 0x7c00060c
-#define PPC_INST_COPY_FIRST 0x7c20060c
+#define PPC_INST_COPY 0x7c20060c
#define PPC_INST_CP_ABORT 0x7c00068c
#define PPC_INST_DCBA 0x7c0005ec
#define PPC_INST_DCBA_MASK 0xfc0007fe
@@ -221,10 +222,10 @@
#define PPC_INST_MSGCLR 0x7c0001dc
#define PPC_INST_MSGSYNC 0x7c0006ec
#define PPC_INST_MSGSNDP 0x7c00011c
+#define PPC_INST_MSGCLRP 0x7c00015c
#define PPC_INST_MTTMR 0x7c0003dc
#define PPC_INST_NOP 0x60000000
-#define PPC_INST_PASTE 0x7c00070c
-#define PPC_INST_PASTE_LAST 0x7c20070d
+#define PPC_INST_PASTE 0x7c20070d
#define PPC_INST_POPCNTB 0x7c0000f4
#define PPC_INST_POPCNTB_MASK 0xfc0007fe
#define PPC_INST_POPCNTD 0x7c0003f4
@@ -392,6 +393,8 @@
/* Deal with instructions that older assemblers aren't aware of */
#define PPC_CP_ABORT stringify_in_c(.long PPC_INST_CP_ABORT)
+#define PPC_COPY(a, b) stringify_in_c(.long PPC_INST_COPY | \
+ ___PPC_RA(a) | ___PPC_RB(b))
#define PPC_DCBAL(a, b) stringify_in_c(.long PPC_INST_DCBAL | \
__PPC_RA(a) | __PPC_RB(b))
#define PPC_DCBZL(a, b) stringify_in_c(.long PPC_INST_DCBZL | \
@@ -409,6 +412,8 @@
___PPC_RB(b))
#define PPC_MSGSNDP(b) stringify_in_c(.long PPC_INST_MSGSNDP | \
___PPC_RB(b))
+#define PPC_MSGCLRP(b) stringify_in_c(.long PPC_INST_MSGCLRP | \
+ ___PPC_RB(b))
#define PPC_POPCNTB(a, s) stringify_in_c(.long PPC_INST_POPCNTB | \
__PPC_RA(a) | __PPC_RS(s))
#define PPC_POPCNTD(a, s) stringify_in_c(.long PPC_INST_POPCNTD | \
diff --git a/arch/powerpc/include/asm/ppc_asm.h b/arch/powerpc/include/asm/ppc_asm.h
index 359c44341761..6baeeb9acd0d 100644
--- a/arch/powerpc/include/asm/ppc_asm.h
+++ b/arch/powerpc/include/asm/ppc_asm.h
@@ -770,15 +770,18 @@ END_FTR_SECTION_IFCLR(CPU_FTR_601)
#else
#define FIXUP_ENDIAN \
tdi 0,0,0x48; /* Reverse endian of b . + 8 */ \
- b $+36; /* Skip trampoline if endian is good */ \
- .long 0x05009f42; /* bcl 20,31,$+4 */ \
- .long 0xa602487d; /* mflr r10 */ \
- .long 0x1c004a39; /* addi r10,r10,28 */ \
+ b $+44; /* Skip trampoline if endian is good */ \
.long 0xa600607d; /* mfmsr r11 */ \
.long 0x01006b69; /* xori r11,r11,1 */ \
+ .long 0x00004039; /* li r10,0 */ \
+ .long 0x6401417d; /* mtmsrd r10,1 */ \
+ .long 0x05009f42; /* bcl 20,31,$+4 */ \
+ .long 0xa602487d; /* mflr r10 */ \
+ .long 0x14004a39; /* addi r10,r10,20 */ \
.long 0xa6035a7d; /* mtsrr0 r10 */ \
.long 0xa6037b7d; /* mtsrr1 r11 */ \
.long 0x2400004c /* rfid */
+
#endif /* !CONFIG_PPC_BOOK3E */
#endif /* __ASSEMBLY__ */
diff --git a/arch/powerpc/include/asm/processor.h b/arch/powerpc/include/asm/processor.h
index a2123f291ab0..fab7ff877304 100644
--- a/arch/powerpc/include/asm/processor.h
+++ b/arch/powerpc/include/asm/processor.h
@@ -110,13 +110,18 @@ void release_thread(struct task_struct *);
#define TASK_SIZE_128TB (0x0000800000000000UL)
#define TASK_SIZE_512TB (0x0002000000000000UL)
-#ifdef CONFIG_PPC_BOOK3S_64
+/*
+ * For now 512TB is only supported with book3s and 64K linux page size.
+ */
+#if defined(CONFIG_PPC_BOOK3S_64) && defined(CONFIG_PPC_64K_PAGES)
/*
* Max value currently used:
*/
-#define TASK_SIZE_USER64 TASK_SIZE_512TB
+#define TASK_SIZE_USER64 TASK_SIZE_512TB
+#define DEFAULT_MAP_WINDOW_USER64 TASK_SIZE_128TB
#else
-#define TASK_SIZE_USER64 TASK_SIZE_64TB
+#define TASK_SIZE_USER64 TASK_SIZE_64TB
+#define DEFAULT_MAP_WINDOW_USER64 TASK_SIZE_64TB
#endif
/*
@@ -132,7 +137,7 @@ void release_thread(struct task_struct *);
* space during mmap's.
*/
#define TASK_UNMAPPED_BASE_USER32 (PAGE_ALIGN(TASK_SIZE_USER32 / 4))
-#define TASK_UNMAPPED_BASE_USER64 (PAGE_ALIGN(TASK_SIZE_128TB / 4))
+#define TASK_UNMAPPED_BASE_USER64 (PAGE_ALIGN(DEFAULT_MAP_WINDOW_USER64 / 4))
#define TASK_UNMAPPED_BASE ((is_32bit_task()) ? \
TASK_UNMAPPED_BASE_USER32 : TASK_UNMAPPED_BASE_USER64 )
@@ -143,21 +148,15 @@ void release_thread(struct task_struct *);
* with 128TB and conditionally enable upto 512TB
*/
#ifdef CONFIG_PPC_BOOK3S_64
-#define DEFAULT_MAP_WINDOW ((is_32bit_task()) ? \
- TASK_SIZE_USER32 : TASK_SIZE_128TB)
+#define DEFAULT_MAP_WINDOW ((is_32bit_task()) ? \
+ TASK_SIZE_USER32 : DEFAULT_MAP_WINDOW_USER64)
#else
#define DEFAULT_MAP_WINDOW TASK_SIZE
#endif
#ifdef __powerpc64__
-#ifdef CONFIG_PPC_BOOK3S_64
-/* Limit stack to 128TB */
-#define STACK_TOP_USER64 TASK_SIZE_128TB
-#else
-#define STACK_TOP_USER64 TASK_SIZE_USER64
-#endif
-
+#define STACK_TOP_USER64 DEFAULT_MAP_WINDOW_USER64
#define STACK_TOP_USER32 TASK_SIZE_USER32
#define STACK_TOP (is_32bit_task() ? \
@@ -379,12 +378,6 @@ struct thread_struct {
}
#endif
-/*
- * Return saved PC of a blocked thread. For now, this is the "user" PC
- */
-#define thread_saved_pc(tsk) \
- ((tsk)->thread.regs? (tsk)->thread.regs->nip: 0)
-
#define task_pt_regs(tsk) ((struct pt_regs *)(tsk)->thread.regs)
unsigned long get_wchan(struct task_struct *p);
@@ -428,6 +421,26 @@ static inline unsigned long __pack_fe01(unsigned int fpmode)
#ifdef CONFIG_PPC64
#define cpu_relax() do { HMT_low(); HMT_medium(); barrier(); } while (0)
+
+#define spin_begin() HMT_low()
+
+#define spin_cpu_relax() barrier()
+
+#define spin_cpu_yield() spin_cpu_relax()
+
+#define spin_end() HMT_medium()
+
+#define spin_until_cond(cond) \
+do { \
+ if (unlikely(!(cond))) { \
+ spin_begin(); \
+ do { \
+ spin_cpu_relax(); \
+ } while (!(cond)); \
+ spin_end(); \
+ } \
+} while (0)
+
#else
#define cpu_relax() barrier()
#endif
@@ -481,11 +494,11 @@ extern unsigned long cpuidle_disable;
enum idle_boot_override {IDLE_NO_OVERRIDE = 0, IDLE_POWERSAVE_OFF};
extern int powersave_nap; /* set if nap mode can be used in idle loop */
-extern unsigned long power7_nap(int check_irq);
-extern unsigned long power7_sleep(void);
-extern unsigned long power7_winkle(void);
-extern unsigned long power9_idle_stop(unsigned long stop_psscr_val,
- unsigned long stop_psscr_mask);
+extern unsigned long power7_idle_insn(unsigned long type); /* PNV_THREAD_NAP/etc*/
+extern void power7_idle_type(unsigned long type);
+extern unsigned long power9_idle_stop(unsigned long psscr_val);
+extern void power9_idle_type(unsigned long stop_psscr_val,
+ unsigned long stop_psscr_mask);
extern void flush_instruction_cache(void);
extern void hard_reset_now(void);
diff --git a/arch/powerpc/include/asm/topology.h b/arch/powerpc/include/asm/topology.h
index 8b3b46b7b0f2..dc4e15937ccf 100644
--- a/arch/powerpc/include/asm/topology.h
+++ b/arch/powerpc/include/asm/topology.h
@@ -43,9 +43,24 @@ extern void __init dump_numa_cpu_topology(void);
extern int sysfs_add_device_to_node(struct device *dev, int nid);
extern void sysfs_remove_device_from_node(struct device *dev, int nid);
+extern int numa_update_cpu_topology(bool cpus_locked);
+static inline int early_cpu_to_node(int cpu)
+{
+ int nid;
+
+ nid = numa_cpu_lookup_table[cpu];
+
+ /*
+ * Fall back to node 0 if nid is unset (it should be, except bugs).
+ * This allows callers to safely do NODE_DATA(early_cpu_to_node(cpu)).
+ */
+ return (nid < 0) ? 0 : nid;
+}
#else
+static inline int early_cpu_to_node(int cpu) { return 0; }
+
static inline void dump_numa_cpu_topology(void) {}
static inline int sysfs_add_device_to_node(struct device *dev, int nid)
@@ -57,6 +72,11 @@ static inline void sysfs_remove_device_from_node(struct device *dev,
int nid)
{
}
+
+static inline int numa_update_cpu_topology(bool cpus_locked)
+{
+ return 0;
+}
#endif /* CONFIG_NUMA */
#if defined(CONFIG_NUMA) && defined(CONFIG_PPC_SPLPAR)
diff --git a/arch/powerpc/include/asm/trace.h b/arch/powerpc/include/asm/trace.h
index c05cef6ee06c..18f168aebae3 100644
--- a/arch/powerpc/include/asm/trace.h
+++ b/arch/powerpc/include/asm/trace.h
@@ -168,6 +168,39 @@ TRACE_EVENT(hash_fault,
__entry->addr, __entry->access, __entry->trap)
);
+
+TRACE_EVENT(tlbie,
+
+ TP_PROTO(unsigned long lpid, unsigned long local, unsigned long rb,
+ unsigned long rs, unsigned long ric, unsigned long prs,
+ unsigned long r),
+ TP_ARGS(lpid, local, rb, rs, ric, prs, r),
+ TP_STRUCT__entry(
+ __field(unsigned long, lpid)
+ __field(unsigned long, local)
+ __field(unsigned long, rb)
+ __field(unsigned long, rs)
+ __field(unsigned long, ric)
+ __field(unsigned long, prs)
+ __field(unsigned long, r)
+ ),
+
+ TP_fast_assign(
+ __entry->lpid = lpid;
+ __entry->local = local;
+ __entry->rb = rb;
+ __entry->rs = rs;
+ __entry->ric = ric;
+ __entry->prs = prs;
+ __entry->r = r;
+ ),
+
+ TP_printk("lpid=%ld, local=%ld, rb=0x%lx, rs=0x%lx, ric=0x%lx, "
+ "prs=0x%lx, r=0x%lx", __entry->lpid, __entry->local,
+ __entry->rb, __entry->rs, __entry->ric, __entry->prs,
+ __entry->r)
+);
+
#endif /* _TRACE_POWERPC_H */
#undef TRACE_INCLUDE_PATH
diff --git a/arch/powerpc/include/asm/uaccess.h b/arch/powerpc/include/asm/uaccess.h
index 5c0d8a8cdae5..4cf57f2126e6 100644
--- a/arch/powerpc/include/asm/uaccess.h
+++ b/arch/powerpc/include/asm/uaccess.h
@@ -267,13 +267,7 @@ do { \
extern unsigned long __copy_tofrom_user(void __user *to,
const void __user *from, unsigned long size);
-#ifndef __powerpc64__
-
-#define INLINE_COPY_FROM_USER
-#define INLINE_COPY_TO_USER
-
-#else /* __powerpc64__ */
-
+#ifdef __powerpc64__
static inline unsigned long
raw_copy_in_user(void __user *to, const void __user *from, unsigned long n)
{
@@ -346,7 +340,6 @@ static inline unsigned long clear_user(void __user *addr, unsigned long size)
}
extern long strncpy_from_user(char *dst, const char __user *src, long count);
-extern __must_check long strlen_user(const char __user *str);
extern __must_check long strnlen_user(const char __user *str, long n);
#endif /* _ARCH_POWERPC_UACCESS_H */
diff --git a/arch/powerpc/include/asm/xive.h b/arch/powerpc/include/asm/xive.h
index c8a822acf962..c23ff4389ca2 100644
--- a/arch/powerpc/include/asm/xive.h
+++ b/arch/powerpc/include/asm/xive.h
@@ -94,11 +94,13 @@ struct xive_q {
* store at 0 and some ESBs support doing a trigger via a
* separate trigger page.
*/
-#define XIVE_ESB_GET 0x800
-#define XIVE_ESB_SET_PQ_00 0xc00
-#define XIVE_ESB_SET_PQ_01 0xd00
-#define XIVE_ESB_SET_PQ_10 0xe00
-#define XIVE_ESB_SET_PQ_11 0xf00
+#define XIVE_ESB_STORE_EOI 0x400 /* Store */
+#define XIVE_ESB_LOAD_EOI 0x000 /* Load */
+#define XIVE_ESB_GET 0x800 /* Load */
+#define XIVE_ESB_SET_PQ_00 0xc00 /* Load */
+#define XIVE_ESB_SET_PQ_01 0xd00 /* Load */
+#define XIVE_ESB_SET_PQ_10 0xe00 /* Load */
+#define XIVE_ESB_SET_PQ_11 0xf00 /* Load */
#define XIVE_ESB_VAL_P 0x2
#define XIVE_ESB_VAL_Q 0x1
diff --git a/arch/powerpc/include/uapi/asm/Kbuild b/arch/powerpc/include/uapi/asm/Kbuild
index b15bf6bc0e94..0d960ef78a9a 100644
--- a/arch/powerpc/include/uapi/asm/Kbuild
+++ b/arch/powerpc/include/uapi/asm/Kbuild
@@ -1,2 +1,8 @@
# UAPI Header export list
include include/uapi/asm-generic/Kbuild.asm
+
+generic-y += param.h
+generic-y += poll.h
+generic-y += resource.h
+generic-y += sockios.h
+generic-y += statfs.h
diff --git a/arch/powerpc/include/uapi/asm/ioctls.h b/arch/powerpc/include/uapi/asm/ioctls.h
index 49a25796a61a..bfd609a3e928 100644
--- a/arch/powerpc/include/uapi/asm/ioctls.h
+++ b/arch/powerpc/include/uapi/asm/ioctls.h
@@ -100,6 +100,7 @@
#define TIOCGPKT _IOR('T', 0x38, int) /* Get packet mode state */
#define TIOCGPTLCK _IOR('T', 0x39, int) /* Get Pty lock state */
#define TIOCGEXCL _IOR('T', 0x40, int) /* Get exclusive mode state */
+#define TIOCGPTPEER _IOR('T', 0x41, int) /* Safely open the slave */
#define TIOCSERCONFIG 0x5453
#define TIOCSERGWILD 0x5454
diff --git a/arch/powerpc/include/uapi/asm/kvm.h b/arch/powerpc/include/uapi/asm/kvm.h
index 07fbeb927834..8cf8f0c96906 100644
--- a/arch/powerpc/include/uapi/asm/kvm.h
+++ b/arch/powerpc/include/uapi/asm/kvm.h
@@ -60,6 +60,12 @@ struct kvm_regs {
#define KVM_SREGS_E_FSL_PIDn (1 << 0) /* PID1/PID2 */
+/* flags for kvm_run.flags */
+#define KVM_RUN_PPC_NMI_DISP_MASK (3 << 0)
+#define KVM_RUN_PPC_NMI_DISP_FULLY_RECOV (1 << 0)
+#define KVM_RUN_PPC_NMI_DISP_LIMITED_RECOV (2 << 0)
+#define KVM_RUN_PPC_NMI_DISP_NOT_RECOV (3 << 0)
+
/*
* Feature bits indicate which sections of the sregs struct are valid,
* both in KVM_GET_SREGS and KVM_SET_SREGS. On KVM_SET_SREGS, registers
diff --git a/arch/powerpc/include/uapi/asm/param.h b/arch/powerpc/include/uapi/asm/param.h
deleted file mode 100644
index 965d45427975..000000000000
--- a/arch/powerpc/include/uapi/asm/param.h
+++ /dev/null
@@ -1 +0,0 @@
-#include <asm-generic/param.h>
diff --git a/arch/powerpc/include/uapi/asm/poll.h b/arch/powerpc/include/uapi/asm/poll.h
deleted file mode 100644
index c98509d3149e..000000000000
--- a/arch/powerpc/include/uapi/asm/poll.h
+++ /dev/null
@@ -1 +0,0 @@
-#include <asm-generic/poll.h>
diff --git a/arch/powerpc/include/uapi/asm/resource.h b/arch/powerpc/include/uapi/asm/resource.h
deleted file mode 100644
index 04bc4db8921b..000000000000
--- a/arch/powerpc/include/uapi/asm/resource.h
+++ /dev/null
@@ -1 +0,0 @@
-#include <asm-generic/resource.h>
diff --git a/arch/powerpc/include/uapi/asm/socket.h b/arch/powerpc/include/uapi/asm/socket.h
index 58e2ec0310fc..3c590c7c42c0 100644
--- a/arch/powerpc/include/uapi/asm/socket.h
+++ b/arch/powerpc/include/uapi/asm/socket.h
@@ -8,28 +8,6 @@
* 2 of the License, or (at your option) any later version.
*/
-#include <asm/sockios.h>
-
-/* For setsockopt(2) */
-#define SOL_SOCKET 1
-
-#define SO_DEBUG 1
-#define SO_REUSEADDR 2
-#define SO_TYPE 3
-#define SO_ERROR 4
-#define SO_DONTROUTE 5
-#define SO_BROADCAST 6
-#define SO_SNDBUF 7
-#define SO_RCVBUF 8
-#define SO_SNDBUFFORCE 32
-#define SO_RCVBUFFORCE 33
-#define SO_KEEPALIVE 9
-#define SO_OOBINLINE 10
-#define SO_NO_CHECK 11
-#define SO_PRIORITY 12
-#define SO_LINGER 13
-#define SO_BSDCOMPAT 14
-#define SO_REUSEPORT 15
#define SO_RCVLOWAT 16
#define SO_SNDLOWAT 17
#define SO_RCVTIMEO 18
@@ -37,72 +15,6 @@
#define SO_PASSCRED 20
#define SO_PEERCRED 21
-/* Security levels - as per NRL IPv6 - don't actually do anything */
-#define SO_SECURITY_AUTHENTICATION 22
-#define SO_SECURITY_ENCRYPTION_TRANSPORT 23
-#define SO_SECURITY_ENCRYPTION_NETWORK 24
-
-#define SO_BINDTODEVICE 25
-
-/* Socket filtering */
-#define SO_ATTACH_FILTER 26
-#define SO_DETACH_FILTER 27
-#define SO_GET_FILTER SO_ATTACH_FILTER
-
-#define SO_PEERNAME 28
-#define SO_TIMESTAMP 29
-#define SCM_TIMESTAMP SO_TIMESTAMP
-
-#define SO_ACCEPTCONN 30
-
-#define SO_PEERSEC 31
-#define SO_PASSSEC 34
-#define SO_TIMESTAMPNS 35
-#define SCM_TIMESTAMPNS SO_TIMESTAMPNS
-
-#define SO_MARK 36
-
-#define SO_TIMESTAMPING 37
-#define SCM_TIMESTAMPING SO_TIMESTAMPING
-
-#define SO_PROTOCOL 38
-#define SO_DOMAIN 39
-
-#define SO_RXQ_OVFL 40
-
-#define SO_WIFI_STATUS 41
-#define SCM_WIFI_STATUS SO_WIFI_STATUS
-#define SO_PEEK_OFF 42
-
-/* Instruct lower device to use last 4-bytes of skb data as FCS */
-#define SO_NOFCS 43
-
-#define SO_LOCK_FILTER 44
-
-#define SO_SELECT_ERR_QUEUE 45
-
-#define SO_BUSY_POLL 46
-
-#define SO_MAX_PACING_RATE 47
-
-#define SO_BPF_EXTENSIONS 48
-
-#define SO_INCOMING_CPU 49
-
-#define SO_ATTACH_BPF 50
-#define SO_DETACH_BPF SO_DETACH_FILTER
-
-#define SO_ATTACH_REUSEPORT_CBPF 51
-#define SO_ATTACH_REUSEPORT_EBPF 52
-
-#define SO_CNX_ADVICE 53
-
-#define SCM_TIMESTAMPING_OPT_STATS 54
-
-#define SO_MEMINFO 55
-
-#define SO_INCOMING_NAPI_ID 56
-
-#define SO_COOKIE 57
+#include <asm-generic/socket.h>
#endif /* _ASM_POWERPC_SOCKET_H */
diff --git a/arch/powerpc/include/uapi/asm/sockios.h b/arch/powerpc/include/uapi/asm/sockios.h
deleted file mode 100644
index 55cef7675a31..000000000000
--- a/arch/powerpc/include/uapi/asm/sockios.h
+++ /dev/null
@@ -1,20 +0,0 @@
-#ifndef _ASM_POWERPC_SOCKIOS_H
-#define _ASM_POWERPC_SOCKIOS_H
-
-/*
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License
- * as published by the Free Software Foundation; either version
- * 2 of the License, or (at your option) any later version.
- */
-
-/* Socket-level I/O control calls. */
-#define FIOSETOWN 0x8901
-#define SIOCSPGRP 0x8902
-#define FIOGETOWN 0x8903
-#define SIOCGPGRP 0x8904
-#define SIOCATMARK 0x8905
-#define SIOCGSTAMP 0x8906 /* Get stamp (timeval) */
-#define SIOCGSTAMPNS 0x8907 /* Get stamp (timespec) */
-
-#endif /* _ASM_POWERPC_SOCKIOS_H */
diff --git a/arch/powerpc/include/uapi/asm/statfs.h b/arch/powerpc/include/uapi/asm/statfs.h
deleted file mode 100644
index 5244834583a4..000000000000
--- a/arch/powerpc/include/uapi/asm/statfs.h
+++ /dev/null
@@ -1,6 +0,0 @@
-#ifndef _ASM_POWERPC_STATFS_H
-#define _ASM_POWERPC_STATFS_H
-
-#include <asm-generic/statfs.h>
-
-#endif
diff --git a/arch/powerpc/kernel/Makefile b/arch/powerpc/kernel/Makefile
index e132902e1f14..0845eebc5af3 100644
--- a/arch/powerpc/kernel/Makefile
+++ b/arch/powerpc/kernel/Makefile
@@ -25,8 +25,6 @@ CFLAGS_REMOVE_cputable.o = -mno-sched-epilog $(CC_FLAGS_FTRACE)
CFLAGS_REMOVE_prom_init.o = -mno-sched-epilog $(CC_FLAGS_FTRACE)
CFLAGS_REMOVE_btext.o = -mno-sched-epilog $(CC_FLAGS_FTRACE)
CFLAGS_REMOVE_prom.o = -mno-sched-epilog $(CC_FLAGS_FTRACE)
-# timers used by tracing
-CFLAGS_REMOVE_time.o = -mno-sched-epilog $(CC_FLAGS_FTRACE)
endif
obj-y := cputable.o ptrace.o syscalls.o \
diff --git a/arch/powerpc/kernel/asm-offsets.c b/arch/powerpc/kernel/asm-offsets.c
index 709e23425317..6e95c2c19a7e 100644
--- a/arch/powerpc/kernel/asm-offsets.c
+++ b/arch/powerpc/kernel/asm-offsets.c
@@ -100,12 +100,12 @@ int main(void)
OFFSET(THREAD_NORMSAVES, thread_struct, normsave[0]);
#endif
OFFSET(THREAD_FPEXC_MODE, thread_struct, fpexc_mode);
- OFFSET(THREAD_FPSTATE, thread_struct, fp_state);
+ OFFSET(THREAD_FPSTATE, thread_struct, fp_state.fpr);
OFFSET(THREAD_FPSAVEAREA, thread_struct, fp_save_area);
OFFSET(FPSTATE_FPSCR, thread_fp_state, fpscr);
OFFSET(THREAD_LOAD_FP, thread_struct, load_fp);
#ifdef CONFIG_ALTIVEC
- OFFSET(THREAD_VRSTATE, thread_struct, vr_state);
+ OFFSET(THREAD_VRSTATE, thread_struct, vr_state.vr);
OFFSET(THREAD_VRSAVEAREA, thread_struct, vr_save_area);
OFFSET(THREAD_VRSAVE, thread_struct, vrsave);
OFFSET(THREAD_USED_VR, thread_struct, used_vr);
@@ -145,9 +145,9 @@ int main(void)
OFFSET(THREAD_TM_PPR, thread_struct, tm_ppr);
OFFSET(THREAD_TM_DSCR, thread_struct, tm_dscr);
OFFSET(PT_CKPT_REGS, thread_struct, ckpt_regs);
- OFFSET(THREAD_CKVRSTATE, thread_struct, ckvr_state);
+ OFFSET(THREAD_CKVRSTATE, thread_struct, ckvr_state.vr);
OFFSET(THREAD_CKVRSAVE, thread_struct, ckvrsave);
- OFFSET(THREAD_CKFPSTATE, thread_struct, ckfp_state);
+ OFFSET(THREAD_CKFPSTATE, thread_struct, ckfp_state.fpr);
/* Local pt_regs on stack for Transactional Memory funcs. */
DEFINE(TM_FRAME_SIZE, STACK_FRAME_OVERHEAD +
sizeof(struct pt_regs) + 16);
@@ -485,6 +485,7 @@ int main(void)
OFFSET(KVM_ENABLED_HCALLS, kvm, arch.enabled_hcalls);
OFFSET(KVM_VRMA_SLB_V, kvm, arch.vrma_slb_v);
OFFSET(KVM_RADIX, kvm, arch.radix);
+ OFFSET(KVM_FWNMI, kvm, arch.fwnmi_enabled);
OFFSET(VCPU_DSISR, kvm_vcpu, arch.shregs.dsisr);
OFFSET(VCPU_DAR, kvm_vcpu, arch.shregs.dar);
OFFSET(VCPU_VPA, kvm_vcpu, arch.vpa.pinned_addr);
@@ -513,6 +514,7 @@ int main(void)
OFFSET(VCPU_PENDING_EXC, kvm_vcpu, arch.pending_exceptions);
OFFSET(VCPU_CEDED, kvm_vcpu, arch.ceded);
OFFSET(VCPU_PRODDED, kvm_vcpu, arch.prodded);
+ OFFSET(VCPU_DBELL_REQ, kvm_vcpu, arch.doorbell_request);
OFFSET(VCPU_MMCR, kvm_vcpu, arch.mmcr);
OFFSET(VCPU_PMC, kvm_vcpu, arch.pmc);
OFFSET(VCPU_SPMC, kvm_vcpu, arch.spmc);
@@ -542,6 +544,7 @@ int main(void)
OFFSET(VCPU_WORT, kvm_vcpu, arch.wort);
OFFSET(VCPU_TID, kvm_vcpu, arch.tid);
OFFSET(VCPU_PSSCR, kvm_vcpu, arch.psscr);
+ OFFSET(VCPU_HFSCR, kvm_vcpu, arch.hfscr);
OFFSET(VCORE_ENTRY_EXIT, kvmppc_vcore, entry_exit_map);
OFFSET(VCORE_IN_GUEST, kvmppc_vcore, in_guest);
OFFSET(VCORE_NAPPING_THREADS, kvmppc_vcore, napping_threads);
@@ -742,9 +745,11 @@ int main(void)
OFFSET(PACA_THREAD_MASK, paca_struct, thread_mask);
OFFSET(PACA_SUBCORE_SIBLING_MASK, paca_struct, subcore_sibling_mask);
OFFSET(PACA_SIBLING_PACA_PTRS, paca_struct, thread_sibling_pacas);
+ OFFSET(PACA_REQ_PSSCR, paca_struct, requested_psscr);
#endif
DEFINE(PPC_DBELL_SERVER, PPC_DBELL_SERVER);
+ DEFINE(PPC_DBELL_MSGTYPE, PPC_DBELL_MSGTYPE);
#ifdef CONFIG_PPC_8xx
DEFINE(VIRT_IMMR_BASE, (u64)__fix_to_virt(FIX_IMMR_BASE));
diff --git a/arch/powerpc/kernel/dma-iommu.c b/arch/powerpc/kernel/dma-iommu.c
index fb7cbaa37658..8f7abf9baa63 100644
--- a/arch/powerpc/kernel/dma-iommu.c
+++ b/arch/powerpc/kernel/dma-iommu.c
@@ -105,6 +105,11 @@ static u64 dma_iommu_get_required_mask(struct device *dev)
return mask;
}
+int dma_iommu_mapping_error(struct device *dev, dma_addr_t dma_addr)
+{
+ return dma_addr == IOMMU_MAPPING_ERROR;
+}
+
struct dma_map_ops dma_iommu_ops = {
.alloc = dma_iommu_alloc_coherent,
.free = dma_iommu_free_coherent,
@@ -115,5 +120,6 @@ struct dma_map_ops dma_iommu_ops = {
.map_page = dma_iommu_map_page,
.unmap_page = dma_iommu_unmap_page,
.get_required_mask = dma_iommu_get_required_mask,
+ .mapping_error = dma_iommu_mapping_error,
};
EXPORT_SYMBOL(dma_iommu_ops);
diff --git a/arch/powerpc/kernel/dma.c b/arch/powerpc/kernel/dma.c
index 41c749586bd2..4194bbbbdb10 100644
--- a/arch/powerpc/kernel/dma.c
+++ b/arch/powerpc/kernel/dma.c
@@ -314,18 +314,6 @@ EXPORT_SYMBOL(dma_set_coherent_mask);
#define PREALLOC_DMA_DEBUG_ENTRIES (1 << 16)
-int __dma_set_mask(struct device *dev, u64 dma_mask)
-{
- const struct dma_map_ops *dma_ops = get_dma_ops(dev);
-
- if ((dma_ops != NULL) && (dma_ops->set_dma_mask != NULL))
- return dma_ops->set_dma_mask(dev, dma_mask);
- if (!dev->dma_mask || !dma_supported(dev, dma_mask))
- return -EIO;
- *dev->dma_mask = dma_mask;
- return 0;
-}
-
int dma_set_mask(struct device *dev, u64 dma_mask)
{
if (ppc_md.dma_set_mask)
@@ -338,7 +326,10 @@ int dma_set_mask(struct device *dev, u64 dma_mask)
return phb->controller_ops.dma_set_mask(pdev, dma_mask);
}
- return __dma_set_mask(dev, dma_mask);
+ if (!dev->dma_mask || !dma_supported(dev, dma_mask))
+ return -EIO;
+ *dev->dma_mask = dma_mask;
+ return 0;
}
EXPORT_SYMBOL(dma_set_mask);
diff --git a/arch/powerpc/kernel/dt_cpu_ftrs.c b/arch/powerpc/kernel/dt_cpu_ftrs.c
index fcc7588a96d6..4c7656dc4e04 100644
--- a/arch/powerpc/kernel/dt_cpu_ftrs.c
+++ b/arch/powerpc/kernel/dt_cpu_ftrs.c
@@ -8,6 +8,7 @@
#include <linux/export.h>
#include <linux/init.h>
#include <linux/jump_label.h>
+#include <linux/libfdt.h>
#include <linux/memblock.h>
#include <linux/printk.h>
#include <linux/sched.h>
@@ -642,7 +643,6 @@ static struct dt_cpu_feature_match __initdata
{"processor-control-facility", feat_enable_dbell, CPU_FTR_DBELL},
{"processor-control-facility-v3", feat_enable_dbell, CPU_FTR_DBELL},
{"processor-utilization-of-resources-register", feat_enable_purr, 0},
- {"subcore", feat_enable, CPU_FTR_SUBCORE},
{"no-execute", feat_enable, 0},
{"strong-access-ordering", feat_enable, CPU_FTR_SAO},
{"cache-inhibited-large-page", feat_enable_large_ci, 0},
@@ -671,12 +671,24 @@ static struct dt_cpu_feature_match __initdata
{"wait-v3", feat_enable, 0},
};
-/* XXX: how to configure this? Default + boot time? */
-#ifdef CONFIG_PPC_CPUFEATURES_ENABLE_UNKNOWN
-#define CPU_FEATURE_ENABLE_UNKNOWN 1
-#else
-#define CPU_FEATURE_ENABLE_UNKNOWN 0
-#endif
+static bool __initdata using_dt_cpu_ftrs;
+static bool __initdata enable_unknown = true;
+
+static int __init dt_cpu_ftrs_parse(char *str)
+{
+ if (!str)
+ return 0;
+
+ if (!strcmp(str, "off"))
+ using_dt_cpu_ftrs = false;
+ else if (!strcmp(str, "known"))
+ enable_unknown = false;
+ else
+ return 1;
+
+ return 0;
+}
+early_param("dt_cpu_ftrs", dt_cpu_ftrs_parse);
static void __init cpufeatures_setup_start(u32 isa)
{
@@ -707,7 +719,7 @@ static bool __init cpufeatures_process_feature(struct dt_cpu_feature *f)
}
}
- if (!known && CPU_FEATURE_ENABLE_UNKNOWN) {
+ if (!known && enable_unknown) {
if (!feat_try_enable_unknown(f)) {
pr_info("not enabling: %s (unknown and unsupported by kernel)\n",
f->name);
@@ -756,6 +768,26 @@ static void __init cpufeatures_setup_finished(void)
cur_cpu_spec->cpu_features, cur_cpu_spec->mmu_features);
}
+static int __init disabled_on_cmdline(void)
+{
+ unsigned long root, chosen;
+ const char *p;
+
+ root = of_get_flat_dt_root();
+ chosen = of_get_flat_dt_subnode_by_name(root, "chosen");
+ if (chosen == -FDT_ERR_NOTFOUND)
+ return false;
+
+ p = of_get_flat_dt_prop(chosen, "bootargs", NULL);
+ if (!p)
+ return false;
+
+ if (strstr(p, "dt_cpu_ftrs=off"))
+ return true;
+
+ return false;
+}
+
static int __init fdt_find_cpu_features(unsigned long node, const char *uname,
int depth, void *data)
{
@@ -766,8 +798,6 @@ static int __init fdt_find_cpu_features(unsigned long node, const char *uname,
return 0;
}
-static bool __initdata using_dt_cpu_ftrs = false;
-
bool __init dt_cpu_ftrs_in_use(void)
{
return using_dt_cpu_ftrs;
@@ -775,6 +805,8 @@ bool __init dt_cpu_ftrs_in_use(void)
bool __init dt_cpu_ftrs_init(void *fdt)
{
+ using_dt_cpu_ftrs = false;
+
/* Setup and verify the FDT, if it fails we just bail */
if (!early_init_dt_verify(fdt))
return false;
@@ -782,6 +814,9 @@ bool __init dt_cpu_ftrs_init(void *fdt)
if (!of_scan_flat_dt(fdt_find_cpu_features, NULL))
return false;
+ if (disabled_on_cmdline())
+ return false;
+
cpufeatures_setup_cpu();
using_dt_cpu_ftrs = true;
@@ -1027,5 +1062,8 @@ static int __init dt_cpu_ftrs_scan_callback(unsigned long node, const char
void __init dt_cpu_ftrs_scan(void)
{
+ if (!using_dt_cpu_ftrs)
+ return;
+
of_scan_flat_dt(dt_cpu_ftrs_scan_callback, NULL);
}
diff --git a/arch/powerpc/kernel/entry_64.S b/arch/powerpc/kernel/entry_64.S
index bfbad08a1207..49d8422767b4 100644
--- a/arch/powerpc/kernel/entry_64.S
+++ b/arch/powerpc/kernel/entry_64.S
@@ -57,7 +57,7 @@ system_call_common:
#ifdef CONFIG_PPC_TRANSACTIONAL_MEM
BEGIN_FTR_SECTION
extrdi. r10, r12, 1, (63-MSR_TS_T_LG) /* transaction active? */
- bne tabort_syscall
+ bne .Ltabort_syscall
END_FTR_SECTION_IFSET(CPU_FTR_TM)
#endif
andi. r10,r12,MSR_PR
@@ -143,6 +143,7 @@ END_FW_FTR_SECTION_IFSET(FW_FEATURE_SPLPAR)
mtmsrd r11,1
#endif /* CONFIG_PPC_BOOK3E */
+system_call: /* label this so stack traces look sane */
/* We do need to set SOFTE in the stack frame or the return
* from interrupt will be painful
*/
@@ -152,11 +153,11 @@ END_FW_FTR_SECTION_IFSET(FW_FEATURE_SPLPAR)
CURRENT_THREAD_INFO(r11, r1)
ld r10,TI_FLAGS(r11)
andi. r11,r10,_TIF_SYSCALL_DOTRACE
- bne syscall_dotrace /* does not return */
+ bne .Lsyscall_dotrace /* does not return */
cmpldi 0,r0,NR_syscalls
- bge- syscall_enosys
+ bge- .Lsyscall_enosys
-system_call: /* label this so stack traces look sane */
+.Lsyscall:
/*
* Need to vector to 32 Bit or default sys_call_table here,
* based on caller's run-mode / personality.
@@ -185,8 +186,20 @@ system_call: /* label this so stack traces look sane */
#ifdef CONFIG_PPC_BOOK3S
/* No MSR:RI on BookE */
andi. r10,r8,MSR_RI
- beq- unrecov_restore
+ beq- .Lunrecov_restore
#endif
+
+/*
+ * This is a few instructions into the actual syscall exit path (which actually
+ * starts at .Lsyscall_exit) to cater to kprobe blacklisting and to reduce the
+ * number of visible symbols for profiling purposes.
+ *
+ * We can probe from system_call until this point as MSR_RI is set. But once it
+ * is cleared below, we won't be able to take a trap.
+ *
+ * This is blacklisted from kprobes further below with _ASM_NOKPROBE_SYMBOL().
+ */
+system_call_exit:
/*
* Disable interrupts so current_thread_info()->flags can't change,
* and so that we don't get interrupted after loading SRR0/1.
@@ -208,31 +221,21 @@ system_call: /* label this so stack traces look sane */
ld r9,TI_FLAGS(r12)
li r11,-MAX_ERRNO
andi. r0,r9,(_TIF_SYSCALL_DOTRACE|_TIF_SINGLESTEP|_TIF_USER_WORK_MASK|_TIF_PERSYSCALL_MASK)
- bne- syscall_exit_work
+ bne- .Lsyscall_exit_work
- andi. r0,r8,MSR_FP
- beq 2f
+ /* If MSR_FP and MSR_VEC are set in user msr, then no need to restore */
+ li r7,MSR_FP
#ifdef CONFIG_ALTIVEC
- andis. r0,r8,MSR_VEC@h
- bne 3f
-#endif
-2: addi r3,r1,STACK_FRAME_OVERHEAD
-#ifdef CONFIG_PPC_BOOK3S
- li r10,MSR_RI
- mtmsrd r10,1 /* Restore RI */
-#endif
- bl restore_math
-#ifdef CONFIG_PPC_BOOK3S
- li r11,0
- mtmsrd r11,1
+ oris r7,r7,MSR_VEC@h
#endif
- ld r8,_MSR(r1)
- ld r3,RESULT(r1)
- li r11,-MAX_ERRNO
+ and r0,r8,r7
+ cmpd r0,r7
+ bne .Lsyscall_restore_math
+.Lsyscall_restore_math_cont:
-3: cmpld r3,r11
+ cmpld r3,r11
ld r5,_CCR(r1)
- bge- syscall_error
+ bge- .Lsyscall_error
.Lsyscall_error_cont:
ld r7,_NIP(r1)
BEGIN_FTR_SECTION
@@ -258,14 +261,48 @@ END_FTR_SECTION_IFSET(CPU_FTR_HAS_PPR)
RFI
b . /* prevent speculative execution */
-syscall_error:
+.Lsyscall_error:
oris r5,r5,0x1000 /* Set SO bit in CR */
neg r3,r3
std r5,_CCR(r1)
b .Lsyscall_error_cont
-
+
+.Lsyscall_restore_math:
+ /*
+ * Some initial tests from restore_math to avoid the heavyweight
+ * C code entry and MSR manipulations.
+ */
+ LOAD_REG_IMMEDIATE(r0, MSR_TS_MASK)
+ and. r0,r0,r8
+ bne 1f
+
+ ld r7,PACACURRENT(r13)
+ lbz r0,THREAD+THREAD_LOAD_FP(r7)
+#ifdef CONFIG_ALTIVEC
+ lbz r6,THREAD+THREAD_LOAD_VEC(r7)
+ add r0,r0,r6
+#endif
+ cmpdi r0,0
+ beq .Lsyscall_restore_math_cont
+
+1: addi r3,r1,STACK_FRAME_OVERHEAD
+#ifdef CONFIG_PPC_BOOK3S
+ li r10,MSR_RI
+ mtmsrd r10,1 /* Restore RI */
+#endif
+ bl restore_math
+#ifdef CONFIG_PPC_BOOK3S
+ li r11,0
+ mtmsrd r11,1
+#endif
+ /* Restore volatiles, reload MSR from updated one */
+ ld r8,_MSR(r1)
+ ld r3,RESULT(r1)
+ li r11,-MAX_ERRNO
+ b .Lsyscall_restore_math_cont
+
/* Traced system call support */
-syscall_dotrace:
+.Lsyscall_dotrace:
bl save_nvgprs
addi r3,r1,STACK_FRAME_OVERHEAD
bl do_syscall_trace_enter
@@ -286,23 +323,23 @@ syscall_dotrace:
ld r7,GPR7(r1)
ld r8,GPR8(r1)
- /* Repopulate r9 and r10 for the system_call path */
+ /* Repopulate r9 and r10 for the syscall path */
addi r9,r1,STACK_FRAME_OVERHEAD
CURRENT_THREAD_INFO(r10, r1)
ld r10,TI_FLAGS(r10)
cmpldi r0,NR_syscalls
- blt+ system_call
+ blt+ .Lsyscall
/* Return code is already in r3 thanks to do_syscall_trace_enter() */
b .Lsyscall_exit
-syscall_enosys:
+.Lsyscall_enosys:
li r3,-ENOSYS
b .Lsyscall_exit
-syscall_exit_work:
+.Lsyscall_exit_work:
#ifdef CONFIG_PPC_BOOK3S
li r10,MSR_RI
mtmsrd r10,1 /* Restore RI */
@@ -362,7 +399,7 @@ END_FTR_SECTION_IFSET(CPU_FTR_HAS_PPR)
b ret_from_except
#ifdef CONFIG_PPC_TRANSACTIONAL_MEM
-tabort_syscall:
+.Ltabort_syscall:
/* Firstly we need to enable TM in the kernel */
mfmsr r10
li r9, 1
@@ -388,6 +425,8 @@ tabort_syscall:
rfid
b . /* prevent speculative execution */
#endif
+_ASM_NOKPROBE_SYMBOL(system_call_common);
+_ASM_NOKPROBE_SYMBOL(system_call_exit);
/* Save non-volatile GPRs, if not already saved. */
_GLOBAL(save_nvgprs)
@@ -398,6 +437,7 @@ _GLOBAL(save_nvgprs)
clrrdi r0,r11,1
std r0,_TRAP(r1)
blr
+_ASM_NOKPROBE_SYMBOL(save_nvgprs);
/*
@@ -488,33 +528,30 @@ _GLOBAL(_switch)
std r23,_CCR(r1)
std r1,KSP(r3) /* Set old stack pointer */
-#ifdef CONFIG_SMP
- /* We need a sync somewhere here to make sure that if the
- * previous task gets rescheduled on another CPU, it sees all
- * stores it has performed on this one.
+ /*
+ * On SMP kernels, care must be taken because a task may be
+ * scheduled off CPUx and on to CPUy. Memory ordering must be
+ * considered.
+ *
+ * Cacheable stores on CPUx will be visible when the task is
+ * scheduled on CPUy by virtue of the core scheduler barriers
+ * (see "Notes on Program-Order guarantees on SMP systems." in
+ * kernel/sched/core.c).
+ *
+ * Uncacheable stores in the case of involuntary preemption must
+ * be taken care of. The smp_mb__before_spin_lock() in __schedule()
+ * is implemented as hwsync on powerpc, which orders MMIO too. So
+ * long as there is an hwsync in the context switch path, it will
+ * be executed on the source CPU after the task has performed
+ * all MMIO ops on that CPU, and on the destination CPU before the
+ * task performs any MMIO ops there.
*/
- sync
-#endif /* CONFIG_SMP */
/*
- * If we optimise away the clear of the reservation in system
- * calls because we know the CPU tracks the address of the
- * reservation, then we need to clear it here to cover the
- * case that the kernel context switch path has no larx
- * instructions.
+ * The kernel context switch path must contain a spin_lock,
+ * which contains larx/stcx, which will clear any reservation
+ * of the task being switched.
*/
-BEGIN_FTR_SECTION
- ldarx r6,0,r1
-END_FTR_SECTION_IFSET(CPU_FTR_STCX_CHECKS_ADDRESS)
-
-BEGIN_FTR_SECTION
-/*
- * A cp_abort (copy paste abort) here ensures that when context switching, a
- * copy from one process can't leak into the paste of another.
- */
- PPC_CP_ABORT
-END_FTR_SECTION_IFSET(CPU_FTR_ARCH_300)
-
#ifdef CONFIG_PPC_BOOK3S
/* Cancel all explict user streams as they will have no use after context
* switch and will stop the HW from creating streams itself
@@ -583,6 +620,14 @@ END_MMU_FTR_SECTION_IFSET(MMU_FTR_1T_SEGMENT)
top of the kernel stack. */
addi r7,r7,THREAD_SIZE-SWITCH_FRAME_SIZE
+ /*
+ * PMU interrupts in radix may come in here. They will use r1, not
+ * PACAKSAVE, so this stack switch will not cause a problem. They
+ * will store to the process stack, which may then be migrated to
+ * another CPU. However the rq lock release on this CPU paired with
+ * the rq lock acquire on the new CPU before the stack becomes
+ * active on the new CPU, will order those stores.
+ */
mr r1,r8 /* start using new stack pointer */
std r7,PACAKSAVE(r13)
@@ -763,11 +808,11 @@ restore:
ld r5,SOFTE(r1)
lbz r6,PACASOFTIRQEN(r13)
cmpwi cr0,r5,0
- beq restore_irq_off
+ beq .Lrestore_irq_off
/* We are enabling, were we already enabled ? Yes, just return */
cmpwi cr0,r6,1
- beq cr0,do_restore
+ beq cr0,.Ldo_restore
/*
* We are about to soft-enable interrupts (we are hard disabled
@@ -776,14 +821,14 @@ restore:
*/
lbz r0,PACAIRQHAPPENED(r13)
cmpwi cr0,r0,0
- bne- restore_check_irq_replay
+ bne- .Lrestore_check_irq_replay
/*
* Get here when nothing happened while soft-disabled, just
* soft-enable and move-on. We will hard-enable as a side
* effect of rfi
*/
-restore_no_replay:
+.Lrestore_no_replay:
TRACE_ENABLE_INTS
li r0,1
stb r0,PACASOFTIRQEN(r13);
@@ -791,7 +836,7 @@ restore_no_replay:
/*
* Final return path. BookE is handled in a different file
*/
-do_restore:
+.Ldo_restore:
#ifdef CONFIG_PPC_BOOK3E
b exception_return_book3e
#else
@@ -825,7 +870,7 @@ fast_exception_return:
REST_8GPRS(5, r1)
andi. r0,r3,MSR_RI
- beq- unrecov_restore
+ beq- .Lunrecov_restore
/* Load PPR from thread struct before we clear MSR:RI */
BEGIN_FTR_SECTION
@@ -883,7 +928,7 @@ END_FTR_SECTION_IFSET(CPU_FTR_HAS_PPR)
* make sure that in this case, we also clear PACA_IRQ_HARD_DIS
* or that bit can get out of sync and bad things will happen
*/
-restore_irq_off:
+.Lrestore_irq_off:
ld r3,_MSR(r1)
lbz r7,PACAIRQHAPPENED(r13)
andi. r0,r3,MSR_EE
@@ -893,13 +938,13 @@ restore_irq_off:
1: li r0,0
stb r0,PACASOFTIRQEN(r13);
TRACE_DISABLE_INTS
- b do_restore
+ b .Ldo_restore
/*
* Something did happen, check if a re-emit is needed
* (this also clears paca->irq_happened)
*/
-restore_check_irq_replay:
+.Lrestore_check_irq_replay:
/* XXX: We could implement a fast path here where we check
* for irq_happened being just 0x01, in which case we can
* clear it and return. That means that we would potentially
@@ -909,7 +954,7 @@ restore_check_irq_replay:
*/
bl __check_irq_replay
cmpwi cr0,r3,0
- beq restore_no_replay
+ beq .Lrestore_no_replay
/*
* We need to re-emit an interrupt. We do so by re-using our
@@ -958,10 +1003,18 @@ restore_check_irq_replay:
#endif /* CONFIG_PPC_DOORBELL */
1: b ret_from_except /* What else to do here ? */
-unrecov_restore:
+.Lunrecov_restore:
addi r3,r1,STACK_FRAME_OVERHEAD
bl unrecoverable_exception
- b unrecov_restore
+ b .Lunrecov_restore
+
+_ASM_NOKPROBE_SYMBOL(ret_from_except);
+_ASM_NOKPROBE_SYMBOL(ret_from_except_lite);
+_ASM_NOKPROBE_SYMBOL(resume_kernel);
+_ASM_NOKPROBE_SYMBOL(fast_exc_return_irq);
+_ASM_NOKPROBE_SYMBOL(restore);
+_ASM_NOKPROBE_SYMBOL(fast_exception_return);
+
#ifdef CONFIG_PPC_RTAS
/*
@@ -1038,6 +1091,8 @@ _GLOBAL(enter_rtas)
rldicr r9,r9,MSR_SF_LG,(63-MSR_SF_LG)
ori r9,r9,MSR_IR|MSR_DR|MSR_FE0|MSR_FE1|MSR_FP|MSR_RI|MSR_LE
andc r6,r0,r9
+
+__enter_rtas:
sync /* disable interrupts so SRR0/1 */
mtmsrd r0 /* don't get trashed */
@@ -1074,6 +1129,8 @@ rtas_return_loc:
mtspr SPRN_SRR1,r4
rfid
b . /* prevent speculative execution */
+_ASM_NOKPROBE_SYMBOL(__enter_rtas)
+_ASM_NOKPROBE_SYMBOL(rtas_return_loc)
.align 3
1: .llong rtas_restore_regs
diff --git a/arch/powerpc/kernel/exceptions-64s.S b/arch/powerpc/kernel/exceptions-64s.S
index ae418b85c17c..4c18a5fbb4bb 100644
--- a/arch/powerpc/kernel/exceptions-64s.S
+++ b/arch/powerpc/kernel/exceptions-64s.S
@@ -99,7 +99,11 @@ EXC_VIRT_NONE(0x4000, 0x100)
#ifdef CONFIG_PPC_P7_NAP
/*
* If running native on arch 2.06 or later, check if we are waking up
- * from nap/sleep/winkle, and branch to idle handler.
+ * from nap/sleep/winkle, and branch to idle handler. This tests SRR1
+ * bits 46:47. A non-0 value indicates that we are coming from a power
+ * saving state. The idle wakeup handler initially runs in real mode,
+ * but we branch to the 0xc000... address so we can turn on relocation
+ * with mtmsr.
*/
#define IDLETEST(n) \
BEGIN_FTR_SECTION ; \
@@ -107,7 +111,7 @@ EXC_VIRT_NONE(0x4000, 0x100)
rlwinm. r10,r10,47-31,30,31 ; \
beq- 1f ; \
cmpwi cr3,r10,2 ; \
- BRANCH_TO_COMMON(r10, system_reset_idle_common) ; \
+ BRANCH_TO_C000(r10, system_reset_idle_common) ; \
1: \
END_FTR_SECTION_IFSET(CPU_FTR_HVMODE | CPU_FTR_ARCH_206)
#else
@@ -128,6 +132,7 @@ EXC_VIRT_NONE(0x4100, 0x100)
#ifdef CONFIG_PPC_P7_NAP
EXC_COMMON_BEGIN(system_reset_idle_common)
+ mfspr r12,SPRN_SRR1
b pnv_powersave_wakeup
#endif
@@ -507,46 +512,22 @@ EXC_REAL_BEGIN(data_access_slb, 0x380, 0x80)
SET_SCRATCH0(r13)
EXCEPTION_PROLOG_0(PACA_EXSLB)
EXCEPTION_PROLOG_1(PACA_EXSLB, KVMTEST_PR, 0x380)
- std r3,PACA_EXSLB+EX_R3(r13)
+ mr r12,r3 /* save r3 */
mfspr r3,SPRN_DAR
- mfspr r12,SPRN_SRR1
+ mfspr r11,SPRN_SRR1
crset 4*cr6+eq
-#ifndef CONFIG_RELOCATABLE
- b slb_miss_realmode
-#else
- /*
- * We can't just use a direct branch to slb_miss_realmode
- * because the distance from here to there depends on where
- * the kernel ends up being put.
- */
- mfctr r11
- LOAD_HANDLER(r10, slb_miss_realmode)
- mtctr r10
- bctr
-#endif
+ BRANCH_TO_COMMON(r10, slb_miss_common)
EXC_REAL_END(data_access_slb, 0x380, 0x80)
EXC_VIRT_BEGIN(data_access_slb, 0x4380, 0x80)
SET_SCRATCH0(r13)
EXCEPTION_PROLOG_0(PACA_EXSLB)
EXCEPTION_PROLOG_1(PACA_EXSLB, NOTEST, 0x380)
- std r3,PACA_EXSLB+EX_R3(r13)
+ mr r12,r3 /* save r3 */
mfspr r3,SPRN_DAR
- mfspr r12,SPRN_SRR1
+ mfspr r11,SPRN_SRR1
crset 4*cr6+eq
-#ifndef CONFIG_RELOCATABLE
- b slb_miss_realmode
-#else
- /*
- * We can't just use a direct branch to slb_miss_realmode
- * because the distance from here to there depends on where
- * the kernel ends up being put.
- */
- mfctr r11
- LOAD_HANDLER(r10, slb_miss_realmode)
- mtctr r10
- bctr
-#endif
+ BRANCH_TO_COMMON(r10, slb_miss_common)
EXC_VIRT_END(data_access_slb, 0x4380, 0x80)
TRAMP_KVM_SKIP(PACA_EXSLB, 0x380)
@@ -575,88 +556,82 @@ EXC_REAL_BEGIN(instruction_access_slb, 0x480, 0x80)
SET_SCRATCH0(r13)
EXCEPTION_PROLOG_0(PACA_EXSLB)
EXCEPTION_PROLOG_1(PACA_EXSLB, KVMTEST_PR, 0x480)
- std r3,PACA_EXSLB+EX_R3(r13)
+ mr r12,r3 /* save r3 */
mfspr r3,SPRN_SRR0 /* SRR0 is faulting address */
- mfspr r12,SPRN_SRR1
+ mfspr r11,SPRN_SRR1
crclr 4*cr6+eq
-#ifndef CONFIG_RELOCATABLE
- b slb_miss_realmode
-#else
- mfctr r11
- LOAD_HANDLER(r10, slb_miss_realmode)
- mtctr r10
- bctr
-#endif
+ BRANCH_TO_COMMON(r10, slb_miss_common)
EXC_REAL_END(instruction_access_slb, 0x480, 0x80)
EXC_VIRT_BEGIN(instruction_access_slb, 0x4480, 0x80)
SET_SCRATCH0(r13)
EXCEPTION_PROLOG_0(PACA_EXSLB)
EXCEPTION_PROLOG_1(PACA_EXSLB, NOTEST, 0x480)
- std r3,PACA_EXSLB+EX_R3(r13)
+ mr r12,r3 /* save r3 */
mfspr r3,SPRN_SRR0 /* SRR0 is faulting address */
- mfspr r12,SPRN_SRR1
+ mfspr r11,SPRN_SRR1
crclr 4*cr6+eq
-#ifndef CONFIG_RELOCATABLE
- b slb_miss_realmode
-#else
- mfctr r11
- LOAD_HANDLER(r10, slb_miss_realmode)
- mtctr r10
- bctr
-#endif
+ BRANCH_TO_COMMON(r10, slb_miss_common)
EXC_VIRT_END(instruction_access_slb, 0x4480, 0x80)
TRAMP_KVM(PACA_EXSLB, 0x480)
-/* This handler is used by both 0x380 and 0x480 slb miss interrupts */
-EXC_COMMON_BEGIN(slb_miss_realmode)
+/*
+ * This handler is used by the 0x380 and 0x480 SLB miss interrupts, as well as
+ * the virtual mode 0x4380 and 0x4480 interrupts if AIL is enabled.
+ */
+EXC_COMMON_BEGIN(slb_miss_common)
/*
* r13 points to the PACA, r9 contains the saved CR,
- * r12 contain the saved SRR1, SRR0 is still ready for return
+ * r12 contains the saved r3,
+ * r11 contain the saved SRR1, SRR0 is still ready for return
* r3 has the faulting address
* r9 - r13 are saved in paca->exslb.
- * r3 is saved in paca->slb_r3
* cr6.eq is set for a D-SLB miss, clear for a I-SLB miss
* We assume we aren't going to take any exceptions during this
* procedure.
*/
mflr r10
-#ifdef CONFIG_RELOCATABLE
- mtctr r11
-#endif
-
stw r9,PACA_EXSLB+EX_CCR(r13) /* save CR in exc. frame */
std r10,PACA_EXSLB+EX_LR(r13) /* save LR */
- std r3,PACA_EXSLB+EX_DAR(r13)
+
+ /*
+ * Test MSR_RI before calling slb_allocate_realmode, because the
+ * MSR in r11 gets clobbered. However we still want to allocate
+ * SLB in case MSR_RI=0, to minimise the risk of getting stuck in
+ * recursive SLB faults. So use cr5 for this, which is preserved.
+ */
+ andi. r11,r11,MSR_RI /* check for unrecoverable exception */
+ cmpdi cr5,r11,MSR_RI
crset 4*cr0+eq
#ifdef CONFIG_PPC_STD_MMU_64
BEGIN_MMU_FTR_SECTION
- bl slb_allocate_realmode
+ bl slb_allocate
END_MMU_FTR_SECTION_IFCLR(MMU_FTR_TYPE_RADIX)
#endif
ld r10,PACA_EXSLB+EX_LR(r13)
- ld r3,PACA_EXSLB+EX_R3(r13)
lwz r9,PACA_EXSLB+EX_CCR(r13) /* get saved CR */
mtlr r10
- beq 8f /* if bad address, make full stack frame */
+ beq- 8f /* if bad address, make full stack frame */
- andi. r10,r12,MSR_RI /* check for unrecoverable exception */
- beq- 2f
+ bne- cr5,2f /* if unrecoverable exception, oops */
/* All done -- return from exception. */
.machine push
.machine "power4"
mtcrf 0x80,r9
+ mtcrf 0x04,r9 /* MSR[RI] indication is in cr5 */
mtcrf 0x02,r9 /* I/D indication is in cr6 */
mtcrf 0x01,r9 /* slb_allocate uses cr0 and cr7 */
.machine pop
+ RESTORE_CTR(r9, PACA_EXSLB)
RESTORE_PPR_PACA(PACA_EXSLB, r9)
+ mr r3,r12
ld r9,PACA_EXSLB+EX_R9(r13)
ld r10,PACA_EXSLB+EX_R10(r13)
ld r11,PACA_EXSLB+EX_R11(r13)
@@ -665,7 +640,10 @@ END_MMU_FTR_SECTION_IFCLR(MMU_FTR_TYPE_RADIX)
rfid
b . /* prevent speculative execution */
-2: mfspr r11,SPRN_SRR0
+2: std r3,PACA_EXSLB+EX_DAR(r13)
+ mr r3,r12
+ mfspr r11,SPRN_SRR0
+ mfspr r12,SPRN_SRR1
LOAD_HANDLER(r10,unrecov_slb)
mtspr SPRN_SRR0,r10
ld r10,PACAKMSR(r13)
@@ -673,7 +651,10 @@ END_MMU_FTR_SECTION_IFCLR(MMU_FTR_TYPE_RADIX)
rfid
b .
-8: mfspr r11,SPRN_SRR0
+8: std r3,PACA_EXSLB+EX_DAR(r13)
+ mr r3,r12
+ mfspr r11,SPRN_SRR0
+ mfspr r12,SPRN_SRR1
LOAD_HANDLER(r10,bad_addr_slb)
mtspr SPRN_SRR0,r10
ld r10,PACAKMSR(r13)
@@ -821,46 +802,80 @@ EXC_VIRT(trap_0b, 0x4b00, 0x100, 0xb00)
TRAMP_KVM(PACA_EXGEN, 0xb00)
EXC_COMMON(trap_0b_common, 0xb00, unknown_exception)
+/*
+ * system call / hypercall (0xc00, 0x4c00)
+ *
+ * The system call exception is invoked with "sc 0" and does not alter HV bit.
+ * There is support for kernel code to invoke system calls but there are no
+ * in-tree users.
+ *
+ * The hypercall is invoked with "sc 1" and sets HV=1.
+ *
+ * In HPT, sc 1 always goes to 0xc00 real mode. In RADIX, sc 1 can go to
+ * 0x4c00 virtual mode.
+ *
+ * Call convention:
+ *
+ * syscall register convention is in Documentation/powerpc/syscall64-abi.txt
+ *
+ * For hypercalls, the register convention is as follows:
+ * r0 volatile
+ * r1-2 nonvolatile
+ * r3 volatile parameter and return value for status
+ * r4-r10 volatile input and output value
+ * r11 volatile hypercall number and output value
+ * r12 volatile
+ * r13-r31 nonvolatile
+ * LR nonvolatile
+ * CTR volatile
+ * XER volatile
+ * CR0-1 CR5-7 volatile
+ * CR2-4 nonvolatile
+ * Other registers nonvolatile
+ *
+ * The intersection of volatile registers that don't contain possible
+ * inputs is: r12, cr0, xer, ctr. We may use these as scratch regs
+ * upon entry without saving.
+ */
#ifdef CONFIG_KVM_BOOK3S_64_HANDLER
- /*
- * If CONFIG_KVM_BOOK3S_64_HANDLER is set, save the PPR (on systems
- * that support it) before changing to HMT_MEDIUM. That allows the KVM
- * code to save that value into the guest state (it is the guest's PPR
- * value). Otherwise just change to HMT_MEDIUM as userspace has
- * already saved the PPR.
- */
+ /*
+ * There is a little bit of juggling to get syscall and hcall
+ * working well. Save r10 in ctr to be restored in case it is a
+ * hcall.
+ *
+ * Userspace syscalls have already saved the PPR, hcalls must save
+ * it before setting HMT_MEDIUM.
+ */
#define SYSCALL_KVMTEST \
- SET_SCRATCH0(r13); \
+ mr r12,r13; \
GET_PACA(r13); \
- std r9,PACA_EXGEN+EX_R9(r13); \
- OPT_GET_SPR(r9, SPRN_PPR, CPU_FTR_HAS_PPR); \
+ mtctr r10; \
+ KVMTEST_PR(0xc00); /* uses r10, branch to do_kvm_0xc00_system_call */ \
HMT_MEDIUM; \
- std r10,PACA_EXGEN+EX_R10(r13); \
- OPT_SAVE_REG_TO_PACA(PACA_EXGEN+EX_PPR, r9, CPU_FTR_HAS_PPR); \
- mfcr r9; \
- KVMTEST_PR(0xc00); \
- GET_SCRATCH0(r13)
+ mr r9,r12; \
#else
#define SYSCALL_KVMTEST \
- HMT_MEDIUM
+ HMT_MEDIUM; \
+ mr r9,r13; \
+ GET_PACA(r13);
#endif
#define LOAD_SYSCALL_HANDLER(reg) \
__LOAD_HANDLER(reg, system_call_common)
-/* Syscall routine is used twice, in reloc-off and reloc-on paths */
-#define SYSCALL_PSERIES_1 \
+#define SYSCALL_FASTENDIAN_TEST \
BEGIN_FTR_SECTION \
cmpdi r0,0x1ebe ; \
beq- 1f ; \
END_FTR_SECTION_IFSET(CPU_FTR_REAL_LE) \
- mr r9,r13 ; \
- GET_PACA(r13) ; \
- mfspr r11,SPRN_SRR0 ; \
-0:
-#define SYSCALL_PSERIES_2_RFID \
+/*
+ * After SYSCALL_KVMTEST, we reach here with PACA in r13, r13 in r9,
+ * and HMT_MEDIUM.
+ */
+#define SYSCALL_REAL \
+ mfspr r11,SPRN_SRR0 ; \
mfspr r12,SPRN_SRR1 ; \
LOAD_SYSCALL_HANDLER(r10) ; \
mtspr SPRN_SRR0,r10 ; \
@@ -869,11 +884,12 @@ END_FTR_SECTION_IFSET(CPU_FTR_REAL_LE) \
rfid ; \
b . ; /* prevent speculative execution */
-#define SYSCALL_PSERIES_3 \
+#define SYSCALL_FASTENDIAN \
/* Fast LE/BE switch system call */ \
1: mfspr r12,SPRN_SRR1 ; \
xori r12,r12,MSR_LE ; \
mtspr SPRN_SRR1,r12 ; \
+ mr r13,r9 ; \
rfid ; /* return to userspace */ \
b . ; /* prevent speculative execution */
@@ -882,16 +898,18 @@ END_FTR_SECTION_IFSET(CPU_FTR_REAL_LE) \
* We can't branch directly so we do it via the CTR which
* is volatile across system calls.
*/
-#define SYSCALL_PSERIES_2_DIRECT \
- LOAD_SYSCALL_HANDLER(r12) ; \
- mtctr r12 ; \
+#define SYSCALL_VIRT \
+ LOAD_SYSCALL_HANDLER(r10) ; \
+ mtctr r10 ; \
+ mfspr r11,SPRN_SRR0 ; \
mfspr r12,SPRN_SRR1 ; \
li r10,MSR_RI ; \
mtmsrd r10,1 ; \
bctr ;
#else
/* We can branch directly */
-#define SYSCALL_PSERIES_2_DIRECT \
+#define SYSCALL_VIRT \
+ mfspr r11,SPRN_SRR0 ; \
mfspr r12,SPRN_SRR1 ; \
li r10,MSR_RI ; \
mtmsrd r10,1 ; /* Set RI (EE=0) */ \
@@ -899,20 +917,43 @@ END_FTR_SECTION_IFSET(CPU_FTR_REAL_LE) \
#endif
EXC_REAL_BEGIN(system_call, 0xc00, 0x100)
- SYSCALL_KVMTEST
- SYSCALL_PSERIES_1
- SYSCALL_PSERIES_2_RFID
- SYSCALL_PSERIES_3
+ SYSCALL_KVMTEST /* loads PACA into r13, and saves r13 to r9 */
+ SYSCALL_FASTENDIAN_TEST
+ SYSCALL_REAL
+ SYSCALL_FASTENDIAN
EXC_REAL_END(system_call, 0xc00, 0x100)
EXC_VIRT_BEGIN(system_call, 0x4c00, 0x100)
- SYSCALL_KVMTEST
- SYSCALL_PSERIES_1
- SYSCALL_PSERIES_2_DIRECT
- SYSCALL_PSERIES_3
+ SYSCALL_KVMTEST /* loads PACA into r13, and saves r13 to r9 */
+ SYSCALL_FASTENDIAN_TEST
+ SYSCALL_VIRT
+ SYSCALL_FASTENDIAN
EXC_VIRT_END(system_call, 0x4c00, 0x100)
-TRAMP_KVM(PACA_EXGEN, 0xc00)
+#ifdef CONFIG_KVM_BOOK3S_64_HANDLER
+ /*
+ * This is a hcall, so register convention is as above, with these
+ * differences:
+ * r13 = PACA
+ * r12 = orig r13
+ * ctr = orig r10
+ */
+TRAMP_KVM_BEGIN(do_kvm_0xc00)
+ /*
+ * Save the PPR (on systems that support it) before changing to
+ * HMT_MEDIUM. That allows the KVM code to save that value into the
+ * guest state (it is the guest's PPR value).
+ */
+ OPT_GET_SPR(r0, SPRN_PPR, CPU_FTR_HAS_PPR)
+ HMT_MEDIUM
+ OPT_SAVE_REG_TO_PACA(PACA_EXGEN+EX_PPR, r0, CPU_FTR_HAS_PPR)
+ mfctr r10
+ SET_SCRATCH0(r12)
+ std r9,PACA_EXGEN+EX_R9(r13)
+ mfcr r9
+ std r10,PACA_EXGEN+EX_R10(r13)
+ KVM_HANDLER(PACA_EXGEN, EXC_STD, 0xc00)
+#endif
EXC_REAL(single_step, 0xd00, 0x100)
@@ -1411,10 +1452,8 @@ USE_TEXT_SECTION()
.balign IFETCH_ALIGN_BYTES
do_hash_page:
#ifdef CONFIG_PPC_STD_MMU_64
- andis. r0,r4,0xa410 /* weird error? */
+ andis. r0,r4,0xa450 /* weird error? */
bne- handle_page_fault /* if not, try to insert a HPTE */
- andis. r0,r4,DSISR_DABRMATCH@h
- bne- handle_dabr_fault
CURRENT_THREAD_INFO(r11, r1)
lwz r0,TI_PREEMPT(r11) /* If we're in an "NMI" */
andis. r0,r0,NMI_MASK@h /* (i.e. an irq when soft-disabled) */
@@ -1438,11 +1477,16 @@ do_hash_page:
/* Error */
blt- 13f
+
+ /* Reload DSISR into r4 for the DABR check below */
+ ld r4,_DSISR(r1)
#endif /* CONFIG_PPC_STD_MMU_64 */
/* Here we have a page fault that hash_page can't handle. */
handle_page_fault:
-11: ld r4,_DAR(r1)
+11: andis. r0,r4,DSISR_DABRMATCH@h
+ bne- handle_dabr_fault
+ ld r4,_DAR(r1)
ld r5,_DSISR(r1)
addi r3,r1,STACK_FRAME_OVERHEAD
bl do_page_fault
@@ -1550,6 +1594,26 @@ END_FTR_SECTION_IFSET(CPU_FTR_CFAR)
1: addi r3,r1,STACK_FRAME_OVERHEAD
bl kernel_bad_stack
b 1b
+_ASM_NOKPROBE_SYMBOL(bad_stack);
+
+/*
+ * When doorbell is triggered from system reset wakeup, the message is
+ * not cleared, so it would fire again when EE is enabled.
+ *
+ * When coming from local_irq_enable, there may be the same problem if
+ * we were hard disabled.
+ *
+ * Execute msgclr to clear pending exceptions before handling it.
+ */
+h_doorbell_common_msgclr:
+ LOAD_REG_IMMEDIATE(r3, PPC_DBELL_MSGTYPE << (63-36))
+ PPC_MSGCLR(3)
+ b h_doorbell_common
+
+doorbell_super_common_msgclr:
+ LOAD_REG_IMMEDIATE(r3, PPC_DBELL_MSGTYPE << (63-36))
+ PPC_MSGCLRP(3)
+ b doorbell_super_common
/*
* Called from arch_local_irq_enable when an interrupt needs
@@ -1560,6 +1624,10 @@ END_FTR_SECTION_IFSET(CPU_FTR_CFAR)
* Note: While MSR:EE is off, we need to make sure that _MSR
* in the generated frame has EE set to 1 or the exception
* handler will not properly re-enable them.
+ *
+ * Note that we don't specify LR as the NIP (return address) for
+ * the interrupt because that would unbalance the return branch
+ * predictor.
*/
_GLOBAL(__replay_interrupt)
/* We are going to jump to the exception common code which
@@ -1567,7 +1635,7 @@ _GLOBAL(__replay_interrupt)
* we don't give a damn about, so we don't bother storing them.
*/
mfmsr r12
- mflr r11
+ LOAD_REG_ADDR(r11, 1f)
mfcr r9
ori r12,r12,MSR_EE
cmpwi r3,0x900
@@ -1576,13 +1644,16 @@ _GLOBAL(__replay_interrupt)
beq hardware_interrupt_common
BEGIN_FTR_SECTION
cmpwi r3,0xe80
- beq h_doorbell_common
+ beq h_doorbell_common_msgclr
cmpwi r3,0xea0
beq h_virt_irq_common
cmpwi r3,0xe60
beq hmi_exception_common
FTR_SECTION_ELSE
cmpwi r3,0xa00
- beq doorbell_super_common
+ beq doorbell_super_common_msgclr
ALT_FTR_SECTION_END_IFSET(CPU_FTR_HVMODE)
+1:
blr
+
+_ASM_NOKPROBE_SYMBOL(__replay_interrupt)
diff --git a/arch/powerpc/kernel/fadump.c b/arch/powerpc/kernel/fadump.c
index 466569e26278..3079518f2245 100644
--- a/arch/powerpc/kernel/fadump.c
+++ b/arch/powerpc/kernel/fadump.c
@@ -113,11 +113,55 @@ int __init early_init_dt_scan_fw_dump(unsigned long node,
return 1;
}
+/*
+ * If fadump is registered, check if the memory provided
+ * falls within boot memory area.
+ */
+int is_fadump_boot_memory_area(u64 addr, ulong size)
+{
+ if (!fw_dump.dump_registered)
+ return 0;
+
+ return (addr + size) > RMA_START && addr <= fw_dump.boot_memory_size;
+}
+
int is_fadump_active(void)
{
return fw_dump.dump_active;
}
+/*
+ * Returns 1, if there are no holes in boot memory area,
+ * 0 otherwise.
+ */
+static int is_boot_memory_area_contiguous(void)
+{
+ struct memblock_region *reg;
+ unsigned long tstart, tend;
+ unsigned long start_pfn = PHYS_PFN(RMA_START);
+ unsigned long end_pfn = PHYS_PFN(RMA_START + fw_dump.boot_memory_size);
+ unsigned int ret = 0;
+
+ for_each_memblock(memory, reg) {
+ tstart = max(start_pfn, memblock_region_memory_base_pfn(reg));
+ tend = min(end_pfn, memblock_region_memory_end_pfn(reg));
+ if (tstart < tend) {
+ /* Memory hole from start_pfn to tstart */
+ if (tstart > start_pfn)
+ break;
+
+ if (tend == end_pfn) {
+ ret = 1;
+ break;
+ }
+
+ start_pfn = tend + 1;
+ }
+ }
+
+ return ret;
+}
+
/* Print firmware assisted dump configurations for debugging purpose. */
static void fadump_show_config(void)
{
@@ -212,20 +256,46 @@ static inline unsigned long fadump_calculate_reserve_size(void)
int ret;
unsigned long long base, size;
+ if (fw_dump.reserve_bootvar)
+ pr_warn("'fadump_reserve_mem=' parameter is deprecated in favor of 'crashkernel=' parameter.\n");
+
/*
* Check if the size is specified through crashkernel= cmdline
- * option. If yes, then use that but ignore base as fadump
- * reserves memory at end of RAM.
+ * option. If yes, then use that but ignore base as fadump reserves
+ * memory at a predefined offset.
*/
ret = parse_crashkernel(boot_command_line, memblock_phys_mem_size(),
&size, &base);
if (ret == 0 && size > 0) {
+ unsigned long max_size;
+
+ if (fw_dump.reserve_bootvar)
+ pr_info("Using 'crashkernel=' parameter for memory reservation.\n");
+
fw_dump.reserve_bootvar = (unsigned long)size;
+
+ /*
+ * Adjust if the boot memory size specified is above
+ * the upper limit.
+ */
+ max_size = memblock_phys_mem_size() / MAX_BOOT_MEM_RATIO;
+ if (fw_dump.reserve_bootvar > max_size) {
+ fw_dump.reserve_bootvar = max_size;
+ pr_info("Adjusted boot memory size to %luMB\n",
+ (fw_dump.reserve_bootvar >> 20));
+ }
+
+ return fw_dump.reserve_bootvar;
+ } else if (fw_dump.reserve_bootvar) {
+ /*
+ * 'fadump_reserve_mem=' is being used to reserve memory
+ * for firmware-assisted dump.
+ */
return fw_dump.reserve_bootvar;
}
/* divide by 20 to get 5% of value */
- size = memblock_end_of_DRAM() / 20;
+ size = memblock_phys_mem_size() / 20;
/* round it down in multiples of 256 */
size = size & ~0x0FFFFFFFUL;
@@ -377,9 +447,22 @@ static int __init early_fadump_param(char *p)
}
early_param("fadump", early_fadump_param);
-static void register_fw_dump(struct fadump_mem_struct *fdm)
+/*
+ * Look for fadump_reserve_mem= cmdline option
+ * TODO: Remove references to 'fadump_reserve_mem=' parameter,
+ * the sooner 'crashkernel=' parameter is accustomed to.
+ */
+static int __init early_fadump_reserve_mem(char *p)
+{
+ if (p)
+ fw_dump.reserve_bootvar = memparse(p, &p);
+ return 0;
+}
+early_param("fadump_reserve_mem", early_fadump_reserve_mem);
+
+static int register_fw_dump(struct fadump_mem_struct *fdm)
{
- int rc;
+ int rc, err;
unsigned int wait_time;
pr_debug("Registering for firmware-assisted kernel dump...\n");
@@ -396,26 +479,38 @@ static void register_fw_dump(struct fadump_mem_struct *fdm)
} while (wait_time);
+ err = -EIO;
switch (rc) {
+ default:
+ pr_err("Failed to register. Unknown Error(%d).\n", rc);
+ break;
case -1:
printk(KERN_ERR "Failed to register firmware-assisted kernel"
" dump. Hardware Error(%d).\n", rc);
break;
case -3:
+ if (!is_boot_memory_area_contiguous())
+ pr_err("Can't have holes in boot memory area while "
+ "registering fadump\n");
+
printk(KERN_ERR "Failed to register firmware-assisted kernel"
" dump. Parameter Error(%d).\n", rc);
+ err = -EINVAL;
break;
case -9:
printk(KERN_ERR "firmware-assisted kernel dump is already "
" registered.");
fw_dump.dump_registered = 1;
+ err = -EEXIST;
break;
case 0:
printk(KERN_INFO "firmware-assisted kernel dump registration"
" is successful\n");
fw_dump.dump_registered = 1;
+ err = 0;
break;
}
+ return err;
}
void crash_fadump(struct pt_regs *regs, const char *str)
@@ -831,8 +926,19 @@ static void fadump_setup_crash_memory_ranges(void)
for_each_memblock(memory, reg) {
start = (unsigned long long)reg->base;
end = start + (unsigned long long)reg->size;
- if (start == RMA_START && end >= fw_dump.boot_memory_size)
- start = fw_dump.boot_memory_size;
+
+ /*
+ * skip the first memory chunk that is already added (RMA_START
+ * through boot_memory_size). This logic needs a relook if and
+ * when RMA_START changes to a non-zero value.
+ */
+ BUILD_BUG_ON(RMA_START != 0);
+ if (start < fw_dump.boot_memory_size) {
+ if (end > fw_dump.boot_memory_size)
+ start = fw_dump.boot_memory_size;
+ else
+ continue;
+ }
/* add this range excluding the reserved dump area. */
fadump_exclude_reserved_area(start, end);
@@ -956,7 +1062,7 @@ static unsigned long init_fadump_header(unsigned long addr)
return addr;
}
-static void register_fadump(void)
+static int register_fadump(void)
{
unsigned long addr;
void *vaddr;
@@ -966,7 +1072,7 @@ static void register_fadump(void)
* assisted dump.
*/
if (!fw_dump.reserve_dump_area_size)
- return;
+ return -ENODEV;
fadump_setup_crash_memory_ranges();
@@ -979,7 +1085,7 @@ static void register_fadump(void)
fadump_create_elfcore_headers(vaddr);
/* register the future kernel dump with firmware. */
- register_fw_dump(&fdm);
+ return register_fw_dump(&fdm);
}
static int fadump_unregister_dump(struct fadump_mem_struct *fdm)
@@ -1046,28 +1152,71 @@ void fadump_cleanup(void)
}
}
+static void fadump_free_reserved_memory(unsigned long start_pfn,
+ unsigned long end_pfn)
+{
+ unsigned long pfn;
+ unsigned long time_limit = jiffies + HZ;
+
+ pr_info("freeing reserved memory (0x%llx - 0x%llx)\n",
+ PFN_PHYS(start_pfn), PFN_PHYS(end_pfn));
+
+ for (pfn = start_pfn; pfn < end_pfn; pfn++) {
+ free_reserved_page(pfn_to_page(pfn));
+
+ if (time_after(jiffies, time_limit)) {
+ cond_resched();
+ time_limit = jiffies + HZ;
+ }
+ }
+}
+
+/*
+ * Skip memory holes and free memory that was actually reserved.
+ */
+static void fadump_release_reserved_area(unsigned long start, unsigned long end)
+{
+ struct memblock_region *reg;
+ unsigned long tstart, tend;
+ unsigned long start_pfn = PHYS_PFN(start);
+ unsigned long end_pfn = PHYS_PFN(end);
+
+ for_each_memblock(memory, reg) {
+ tstart = max(start_pfn, memblock_region_memory_base_pfn(reg));
+ tend = min(end_pfn, memblock_region_memory_end_pfn(reg));
+ if (tstart < tend) {
+ fadump_free_reserved_memory(tstart, tend);
+
+ if (tend == end_pfn)
+ break;
+
+ start_pfn = tend + 1;
+ }
+ }
+}
+
/*
* Release the memory that was reserved in early boot to preserve the memory
* contents. The released memory will be available for general use.
*/
static void fadump_release_memory(unsigned long begin, unsigned long end)
{
- unsigned long addr;
unsigned long ra_start, ra_end;
ra_start = fw_dump.reserve_dump_area_start;
ra_end = ra_start + fw_dump.reserve_dump_area_size;
- for (addr = begin; addr < end; addr += PAGE_SIZE) {
- /*
- * exclude the dump reserve area. Will reuse it for next
- * fadump registration.
- */
- if (addr <= ra_end && ((addr + PAGE_SIZE) > ra_start))
- continue;
-
- free_reserved_page(pfn_to_page(addr >> PAGE_SHIFT));
- }
+ /*
+ * exclude the dump reserve area. Will reuse it for next
+ * fadump registration.
+ */
+ if (begin < ra_end && end > ra_start) {
+ if (begin < ra_start)
+ fadump_release_reserved_area(begin, ra_start);
+ if (end > ra_end)
+ fadump_release_reserved_area(ra_end, end);
+ } else
+ fadump_release_reserved_area(begin, end);
}
static void fadump_invalidate_release_mem(void)
@@ -1161,7 +1310,6 @@ static ssize_t fadump_register_store(struct kobject *kobj,
switch (buf[0]) {
case '0':
if (fw_dump.dump_registered == 0) {
- ret = -EINVAL;
goto unlock_out;
}
/* Un-register Firmware-assisted dump */
@@ -1169,11 +1317,11 @@ static ssize_t fadump_register_store(struct kobject *kobj,
break;
case '1':
if (fw_dump.dump_registered == 1) {
- ret = -EINVAL;
+ ret = -EEXIST;
goto unlock_out;
}
/* Register Firmware-assisted dump */
- register_fadump();
+ ret = register_fadump();
break;
default:
ret = -EINVAL;
diff --git a/arch/powerpc/kernel/idle_book3s.S b/arch/powerpc/kernel/idle_book3s.S
index 4898d676dcae..5adb390e773b 100644
--- a/arch/powerpc/kernel/idle_book3s.S
+++ b/arch/powerpc/kernel/idle_book3s.S
@@ -31,6 +31,7 @@
* registers for winkle support.
*/
#define _SDR1 GPR3
+#define _PTCR GPR3
#define _RPR GPR4
#define _SPURR GPR5
#define _PURR GPR6
@@ -39,7 +40,7 @@
#define _AMOR GPR9
#define _WORT GPR10
#define _WORC GPR11
-#define _PTCR GPR12
+#define _LPCR GPR12
#define PSSCR_EC_ESL_MASK_SHIFTED (PSSCR_EC | PSSCR_ESL) >> 16
@@ -55,12 +56,14 @@ save_sprs_to_stack:
* here since any thread in the core might wake up first
*/
BEGIN_FTR_SECTION
- mfspr r3,SPRN_PTCR
- std r3,_PTCR(r1)
/*
* Note - SDR1 is dropped in Power ISA v3. Hence not restoring
* SDR1 here
*/
+ mfspr r3,SPRN_PTCR
+ std r3,_PTCR(r1)
+ mfspr r3,SPRN_LPCR
+ std r3,_LPCR(r1)
FTR_SECTION_ELSE
mfspr r3,SPRN_SDR1
std r3,_SDR1(r1)
@@ -106,13 +109,9 @@ core_idle_lock_held:
/*
* Pass requested state in r3:
* r3 - PNV_THREAD_NAP/SLEEP/WINKLE in POWER8
- * - Requested STOP state in POWER9
- *
- * To check IRQ_HAPPENED in r4
- * 0 - don't check
- * 1 - check
+ * - Requested PSSCR value in POWER9
*
- * Address to 'rfid' to in r5
+ * Address of idle handler to branch to in realmode in r4
*/
pnv_powersave_common:
/* Use r3 to pass state nap/sleep/winkle */
@@ -122,37 +121,14 @@ pnv_powersave_common:
* need to save PC, some CR bits and the NV GPRs,
* but for now an interrupt frame will do.
*/
+ mtctr r4
+
mflr r0
std r0,16(r1)
stdu r1,-INT_FRAME_SIZE(r1)
std r0,_LINK(r1)
std r0,_NIP(r1)
- /* Hard disable interrupts */
- mfmsr r9
- rldicl r9,r9,48,1
- rotldi r9,r9,16
- mtmsrd r9,1 /* hard-disable interrupts */
-
- /* Check if something happened while soft-disabled */
- lbz r0,PACAIRQHAPPENED(r13)
- andi. r0,r0,~PACA_IRQ_HARD_DIS@l
- beq 1f
- cmpwi cr0,r4,0
- beq 1f
- addi r1,r1,INT_FRAME_SIZE
- ld r0,16(r1)
- li r3,0 /* Return 0 (no nap) */
- mtlr r0
- blr
-
-1: /* We mark irqs hard disabled as this is the state we'll
- * be in when returning and we need to tell arch_local_irq_restore()
- * about it
- */
- li r0,PACA_IRQ_HARD_DIS
- stb r0,PACAIRQHAPPENED(r13)
-
/* We haven't lost state ... yet */
li r0,0
stb r0,PACA_NAPSTATELOST(r13)
@@ -160,9 +136,8 @@ pnv_powersave_common:
/* Continue saving state */
SAVE_GPR(2, r1)
SAVE_NVGPRS(r1)
- mfcr r4
- std r4,_CCR(r1)
- std r9,_MSR(r1)
+ mfcr r5
+ std r5,_CCR(r1)
std r1,PACAR1(r13)
/*
@@ -172,12 +147,8 @@ pnv_powersave_common:
* the MMU context to the guest.
*/
LOAD_REG_IMMEDIATE(r7, MSR_IDLE)
- li r6, MSR_RI
- andc r6, r9, r6
- mtmsrd r6, 1 /* clear RI before setting SRR0/1 */
- mtspr SPRN_SRR0, r5
- mtspr SPRN_SRR1, r7
- rfid
+ mtmsrd r7,0
+ bctr
.globl pnv_enter_arch207_idle_mode
pnv_enter_arch207_idle_mode:
@@ -285,6 +256,19 @@ power_enter_stop:
bne .Lhandle_esl_ec_set
IDLE_STATE_ENTER_SEQ(PPC_STOP)
li r3,0 /* Since we didn't lose state, return 0 */
+
+ /*
+ * pnv_wakeup_noloss() expects r12 to contain the SRR1 value so
+ * it can determine if the wakeup reason is an HMI in
+ * CHECK_HMI_INTERRUPT.
+ *
+ * However, when we wakeup with ESL=0, SRR1 will not contain the wakeup
+ * reason, so there is no point setting r12 to SRR1.
+ *
+ * Further, we clear r12 here, so that we don't accidentally enter the
+ * HMI in pnv_wakeup_noloss() if the value of r12[42:45] == WAKE_HMI.
+ */
+ li r12, 0
b pnv_wakeup_noloss
.Lhandle_esl_ec_set:
@@ -319,45 +303,23 @@ lwarx_loop_stop:
IDLE_STATE_ENTER_SEQ_NORET(PPC_STOP)
-_GLOBAL(power7_idle)
+/*
+ * Entered with MSR[EE]=0 and no soft-masked interrupts pending.
+ * r3 contains desired idle state (PNV_THREAD_NAP/SLEEP/WINKLE).
+ */
+_GLOBAL(power7_idle_insn)
/* Now check if user or arch enabled NAP mode */
- LOAD_REG_ADDRBASE(r3,powersave_nap)
- lwz r4,ADDROFF(powersave_nap)(r3)
- cmpwi 0,r4,0
- beqlr
- li r3, 1
- /* fall through */
-
-_GLOBAL(power7_nap)
- mr r4,r3
- li r3,PNV_THREAD_NAP
- LOAD_REG_ADDR(r5, pnv_enter_arch207_idle_mode)
- b pnv_powersave_common
- /* No return */
-
-_GLOBAL(power7_sleep)
- li r3,PNV_THREAD_SLEEP
- li r4,1
- LOAD_REG_ADDR(r5, pnv_enter_arch207_idle_mode)
- b pnv_powersave_common
- /* No return */
-
-_GLOBAL(power7_winkle)
- li r3,PNV_THREAD_WINKLE
- li r4,1
- LOAD_REG_ADDR(r5, pnv_enter_arch207_idle_mode)
+ LOAD_REG_ADDR(r4, pnv_enter_arch207_idle_mode)
b pnv_powersave_common
- /* No return */
#define CHECK_HMI_INTERRUPT \
- mfspr r0,SPRN_SRR1; \
BEGIN_FTR_SECTION_NESTED(66); \
- rlwinm r0,r0,45-31,0xf; /* extract wake reason field (P8) */ \
+ rlwinm r0,r12,45-31,0xf; /* extract wake reason field (P8) */ \
FTR_SECTION_ELSE_NESTED(66); \
- rlwinm r0,r0,45-31,0xe; /* P7 wake reason field is 3 bits */ \
+ rlwinm r0,r12,45-31,0xe; /* P7 wake reason field is 3 bits */ \
ALT_FTR_SECTION_END_NESTED_IFSET(CPU_FTR_ARCH_207S, 66); \
cmpwi r0,0xa; /* Hypervisor maintenance ? */ \
- bne 20f; \
+ bne+ 20f; \
/* Invoke opal call to handle hmi */ \
ld r2,PACATOC(r13); \
ld r1,PACAR1(r13); \
@@ -369,16 +331,13 @@ ALT_FTR_SECTION_END_NESTED_IFSET(CPU_FTR_ARCH_207S, 66); \
20: nop;
/*
- * r3 - The PSSCR value corresponding to the stop state.
- * r4 - The PSSCR mask corrresonding to the stop state.
+ * Entered with MSR[EE]=0 and no soft-masked interrupts pending.
+ * r3 contains desired PSSCR register value.
*/
_GLOBAL(power9_idle_stop)
- mfspr r5,SPRN_PSSCR
- andc r5,r5,r4
- or r3,r3,r5
+ std r3, PACA_REQ_PSSCR(r13)
mtspr SPRN_PSSCR,r3
- LOAD_REG_ADDR(r5,power_enter_stop)
- li r4,1
+ LOAD_REG_ADDR(r4,power_enter_stop)
b pnv_powersave_common
/* No return */
@@ -436,17 +395,17 @@ pnv_powersave_wakeup_mce:
/*
* Now put the original SRR1 with SRR1_WAKEMCE_RESVD as the wake
- * reason into SRR1, which allows reuse of the system reset wakeup
+ * reason into r12, which allows reuse of the system reset wakeup
* code without being mistaken for another type of wakeup.
*/
- oris r3,r3,SRR1_WAKEMCE_RESVD@h
- mtspr SPRN_SRR1,r3
+ oris r12,r3,SRR1_WAKEMCE_RESVD@h
b pnv_powersave_wakeup
/*
* Called from reset vector for powersave wakeups.
* cr3 - set to gt if waking up with partial/complete hypervisor state loss
+ * r12 - SRR1
*/
.global pnv_powersave_wakeup
pnv_powersave_wakeup:
@@ -464,6 +423,8 @@ ALT_FTR_SECTION_END_IFSET(CPU_FTR_ARCH_300)
li r0,PNV_THREAD_RUNNING
stb r0,PACA_THREAD_IDLE_STATE(r13) /* Clear thread state */
+ mr r3,r12
+
#ifdef CONFIG_KVM_BOOK3S_HV_POSSIBLE
li r0,KVM_HWTHREAD_IN_KERNEL
stb r0,HSTATE_HWTHREAD_STATE(r13)
@@ -477,7 +438,6 @@ ALT_FTR_SECTION_END_IFSET(CPU_FTR_ARCH_300)
#endif
/* Return SRR1 from power7_nap() */
- mfspr r3,SPRN_SRR1
blt cr3,pnv_wakeup_noloss
b pnv_wakeup_loss
@@ -489,18 +449,35 @@ ALT_FTR_SECTION_END_IFSET(CPU_FTR_ARCH_300)
*/
pnv_restore_hyp_resource_arch300:
/*
+ * Workaround for POWER9, if we lost resources, the ERAT
+ * might have been mixed up and needs flushing.
+ */
+ blt cr3,1f
+ PPC_INVALIDATE_ERAT
+1:
+ /*
* POWER ISA 3. Use PSSCR to determine if we
* are waking up from deep idle state
*/
LOAD_REG_ADDRBASE(r5,pnv_first_deep_stop_state)
ld r4,ADDROFF(pnv_first_deep_stop_state)(r5)
- mfspr r5,SPRN_PSSCR
+BEGIN_FTR_SECTION_NESTED(71)
+ /*
+ * Assume that we are waking up from the state
+ * same as the Requested Level (RL) in the PSSCR
+ * which are Bits 60-63
+ */
+ ld r5,PACA_REQ_PSSCR(r13)
+ rldicl r5,r5,0,60
+FTR_SECTION_ELSE_NESTED(71)
/*
* 0-3 bits correspond to Power-Saving Level Status
* which indicates the idle state we are waking up from
*/
+ mfspr r5, SPRN_PSSCR
rldicl r5,r5,4,60
+ALT_FTR_SECTION_END_NESTED_IFSET(CPU_FTR_POWER9_DD1, 71)
cmpd cr4,r5,r4
bge cr4,pnv_wakeup_tb_loss /* returns to caller */
@@ -567,9 +544,9 @@ pnv_wakeup_tb_loss:
* is required to return back to reset vector after hypervisor state
* restore is complete.
*/
+ mr r19,r12
mr r18,r4
mflr r17
- mfspr r16,SPRN_SRR1
BEGIN_FTR_SECTION
CHECK_HMI_INTERRUPT
END_FTR_SECTION_IFSET(CPU_FTR_HVMODE)
@@ -731,13 +708,14 @@ timebase_resync:
* Use cr3 which indicates that we are waking up with atleast partial
* hypervisor state loss to determine if TIMEBASE RESYNC is needed.
*/
- ble cr3,clear_lock
+ ble cr3,.Ltb_resynced
/* Time base re-sync */
bl opal_resync_timebase;
/*
- * If waking up from sleep, per core state is not lost, skip to
- * clear_lock.
+ * If waking up from sleep (POWER8), per core state
+ * is not lost, skip to clear_lock.
*/
+.Ltb_resynced:
blt cr4,clear_lock
/*
@@ -812,9 +790,13 @@ no_segments:
mtctr r12
bctrl
+BEGIN_FTR_SECTION
+ ld r4,_LPCR(r1)
+ mtspr SPRN_LPCR,r4
+END_FTR_SECTION_IFSET(CPU_FTR_ARCH_300)
hypervisor_state_restored:
- mtspr SPRN_SRR1,r16
+ mr r12,r19
mtlr r17
blr /* return to pnv_powersave_wakeup */
@@ -827,6 +809,7 @@ fastsleep_workaround_at_exit:
/*
* R3 here contains the value that will be returned to the caller
* of power7_nap.
+ * R12 contains SRR1 for CHECK_HMI_INTERRUPT.
*/
.global pnv_wakeup_loss
pnv_wakeup_loss:
@@ -836,32 +819,33 @@ BEGIN_FTR_SECTION
END_FTR_SECTION_IFSET(CPU_FTR_HVMODE)
REST_NVGPRS(r1)
REST_GPR(2, r1)
+ ld r4,PACAKMSR(r13)
+ ld r5,_LINK(r1)
ld r6,_CCR(r1)
- ld r4,_MSR(r1)
- ld r5,_NIP(r1)
addi r1,r1,INT_FRAME_SIZE
+ mtlr r5
mtcr r6
- mtspr SPRN_SRR1,r4
- mtspr SPRN_SRR0,r5
- rfid
+ mtmsrd r4
+ blr
/*
* R3 here contains the value that will be returned to the caller
* of power7_nap.
+ * R12 contains SRR1 for CHECK_HMI_INTERRUPT.
*/
pnv_wakeup_noloss:
lbz r0,PACA_NAPSTATELOST(r13)
cmpwi r0,0
bne pnv_wakeup_loss
+ ld r1,PACAR1(r13)
BEGIN_FTR_SECTION
CHECK_HMI_INTERRUPT
END_FTR_SECTION_IFSET(CPU_FTR_HVMODE)
- ld r1,PACAR1(r13)
- ld r6,_CCR(r1)
- ld r4,_MSR(r1)
+ ld r4,PACAKMSR(r13)
ld r5,_NIP(r1)
+ ld r6,_CCR(r1)
addi r1,r1,INT_FRAME_SIZE
+ mtlr r5
mtcr r6
- mtspr SPRN_SRR1,r4
- mtspr SPRN_SRR0,r5
- rfid
+ mtmsrd r4
+ blr
diff --git a/arch/powerpc/kernel/iommu.c b/arch/powerpc/kernel/iommu.c
index f2b724cd9e64..233ca3fe4754 100644
--- a/arch/powerpc/kernel/iommu.c
+++ b/arch/powerpc/kernel/iommu.c
@@ -198,11 +198,11 @@ static unsigned long iommu_range_alloc(struct device *dev,
if (unlikely(npages == 0)) {
if (printk_ratelimit())
WARN_ON(1);
- return DMA_ERROR_CODE;
+ return IOMMU_MAPPING_ERROR;
}
if (should_fail_iommu(dev))
- return DMA_ERROR_CODE;
+ return IOMMU_MAPPING_ERROR;
/*
* We don't need to disable preemption here because any CPU can
@@ -278,7 +278,7 @@ again:
} else {
/* Give up */
spin_unlock_irqrestore(&(pool->lock), flags);
- return DMA_ERROR_CODE;
+ return IOMMU_MAPPING_ERROR;
}
}
@@ -310,13 +310,13 @@ static dma_addr_t iommu_alloc(struct device *dev, struct iommu_table *tbl,
unsigned long attrs)
{
unsigned long entry;
- dma_addr_t ret = DMA_ERROR_CODE;
+ dma_addr_t ret = IOMMU_MAPPING_ERROR;
int build_fail;
entry = iommu_range_alloc(dev, tbl, npages, NULL, mask, align_order);
- if (unlikely(entry == DMA_ERROR_CODE))
- return DMA_ERROR_CODE;
+ if (unlikely(entry == IOMMU_MAPPING_ERROR))
+ return IOMMU_MAPPING_ERROR;
entry += tbl->it_offset; /* Offset into real TCE table */
ret = entry << tbl->it_page_shift; /* Set the return dma address */
@@ -328,12 +328,12 @@ static dma_addr_t iommu_alloc(struct device *dev, struct iommu_table *tbl,
/* tbl->it_ops->set() only returns non-zero for transient errors.
* Clean up the table bitmap in this case and return
- * DMA_ERROR_CODE. For all other errors the functionality is
+ * IOMMU_MAPPING_ERROR. For all other errors the functionality is
* not altered.
*/
if (unlikely(build_fail)) {
__iommu_free(tbl, ret, npages);
- return DMA_ERROR_CODE;
+ return IOMMU_MAPPING_ERROR;
}
/* Flush/invalidate TLB caches if necessary */
@@ -478,7 +478,7 @@ int ppc_iommu_map_sg(struct device *dev, struct iommu_table *tbl,
DBG(" - vaddr: %lx, size: %lx\n", vaddr, slen);
/* Handle failure */
- if (unlikely(entry == DMA_ERROR_CODE)) {
+ if (unlikely(entry == IOMMU_MAPPING_ERROR)) {
if (!(attrs & DMA_ATTR_NO_WARN) &&
printk_ratelimit())
dev_info(dev, "iommu_alloc failed, tbl %p "
@@ -545,7 +545,7 @@ int ppc_iommu_map_sg(struct device *dev, struct iommu_table *tbl,
*/
if (outcount < incount) {
outs = sg_next(outs);
- outs->dma_address = DMA_ERROR_CODE;
+ outs->dma_address = IOMMU_MAPPING_ERROR;
outs->dma_length = 0;
}
@@ -563,7 +563,7 @@ int ppc_iommu_map_sg(struct device *dev, struct iommu_table *tbl,
npages = iommu_num_pages(s->dma_address, s->dma_length,
IOMMU_PAGE_SIZE(tbl));
__iommu_free(tbl, vaddr, npages);
- s->dma_address = DMA_ERROR_CODE;
+ s->dma_address = IOMMU_MAPPING_ERROR;
s->dma_length = 0;
}
if (s == outs)
@@ -777,7 +777,7 @@ dma_addr_t iommu_map_page(struct device *dev, struct iommu_table *tbl,
unsigned long mask, enum dma_data_direction direction,
unsigned long attrs)
{
- dma_addr_t dma_handle = DMA_ERROR_CODE;
+ dma_addr_t dma_handle = IOMMU_MAPPING_ERROR;
void *vaddr;
unsigned long uaddr;
unsigned int npages, align;
@@ -797,7 +797,7 @@ dma_addr_t iommu_map_page(struct device *dev, struct iommu_table *tbl,
dma_handle = iommu_alloc(dev, tbl, vaddr, npages, direction,
mask >> tbl->it_page_shift, align,
attrs);
- if (dma_handle == DMA_ERROR_CODE) {
+ if (dma_handle == IOMMU_MAPPING_ERROR) {
if (!(attrs & DMA_ATTR_NO_WARN) &&
printk_ratelimit()) {
dev_info(dev, "iommu_alloc failed, tbl %p "
@@ -869,7 +869,7 @@ void *iommu_alloc_coherent(struct device *dev, struct iommu_table *tbl,
io_order = get_iommu_order(size, tbl);
mapping = iommu_alloc(dev, tbl, ret, nio_pages, DMA_BIDIRECTIONAL,
mask >> tbl->it_page_shift, io_order, 0);
- if (mapping == DMA_ERROR_CODE) {
+ if (mapping == IOMMU_MAPPING_ERROR) {
free_pages((unsigned long)ret, order);
return NULL;
}
diff --git a/arch/powerpc/kernel/irq.c b/arch/powerpc/kernel/irq.c
index 5c291df30fe3..0bcec745a672 100644
--- a/arch/powerpc/kernel/irq.c
+++ b/arch/powerpc/kernel/irq.c
@@ -322,7 +322,8 @@ bool prep_irq_for_idle(void)
* First we need to hard disable to ensure no interrupt
* occurs before we effectively enter the low power state
*/
- hard_irq_disable();
+ __hard_irq_disable();
+ local_paca->irq_happened |= PACA_IRQ_HARD_DIS;
/*
* If anything happened while we were soft-disabled,
@@ -347,6 +348,65 @@ bool prep_irq_for_idle(void)
return true;
}
+#ifdef CONFIG_PPC_BOOK3S
+/*
+ * This is for idle sequences that return with IRQs off, but the
+ * idle state itself wakes on interrupt. Tell the irq tracer that
+ * IRQs are enabled for the duration of idle so it does not get long
+ * off times. Must be paired with fini_irq_for_idle_irqsoff.
+ */
+bool prep_irq_for_idle_irqsoff(void)
+{
+ WARN_ON(!irqs_disabled());
+
+ /*
+ * First we need to hard disable to ensure no interrupt
+ * occurs before we effectively enter the low power state
+ */
+ __hard_irq_disable();
+ local_paca->irq_happened |= PACA_IRQ_HARD_DIS;
+
+ /*
+ * If anything happened while we were soft-disabled,
+ * we return now and do not enter the low power state.
+ */
+ if (lazy_irq_pending())
+ return false;
+
+ /* Tell lockdep we are about to re-enable */
+ trace_hardirqs_on();
+
+ return true;
+}
+
+/*
+ * Take the SRR1 wakeup reason, index into this table to find the
+ * appropriate irq_happened bit.
+ */
+static const u8 srr1_to_lazyirq[0x10] = {
+ 0, 0, 0,
+ PACA_IRQ_DBELL,
+ 0,
+ PACA_IRQ_DBELL,
+ PACA_IRQ_DEC,
+ 0,
+ PACA_IRQ_EE,
+ PACA_IRQ_EE,
+ PACA_IRQ_HMI,
+ 0, 0, 0, 0, 0 };
+
+void irq_set_pending_from_srr1(unsigned long srr1)
+{
+ unsigned int idx = (srr1 & SRR1_WAKEMASK_P8) >> 18;
+
+ /*
+ * The 0 index (SRR1[42:45]=b0000) must always evaluate to 0,
+ * so this can be called unconditionally with srr1 wake reason.
+ */
+ local_paca->irq_happened |= srr1_to_lazyirq[idx];
+}
+#endif /* CONFIG_PPC_BOOK3S */
+
/*
* Force a replay of the external interrupt handler on this CPU.
*/
diff --git a/arch/powerpc/kernel/kprobes.c b/arch/powerpc/kernel/kprobes.c
index fc4343514bed..45f1ff721c32 100644
--- a/arch/powerpc/kernel/kprobes.c
+++ b/arch/powerpc/kernel/kprobes.c
@@ -43,6 +43,12 @@ DEFINE_PER_CPU(struct kprobe_ctlblk, kprobe_ctlblk);
struct kretprobe_blackpoint kretprobe_blacklist[] = {{NULL, NULL}};
+int is_current_kprobe_addr(unsigned long addr)
+{
+ struct kprobe *p = kprobe_running();
+ return (p && (unsigned long)p->addr == addr) ? 1 : 0;
+}
+
bool arch_within_kprobe_blacklist(unsigned long addr)
{
return (addr >= (unsigned long)__kprobes_text_start &&
@@ -158,17 +164,13 @@ NOKPROBE_SYMBOL(arch_prepare_kprobe);
void arch_arm_kprobe(struct kprobe *p)
{
- *p->addr = BREAKPOINT_INSTRUCTION;
- flush_icache_range((unsigned long) p->addr,
- (unsigned long) p->addr + sizeof(kprobe_opcode_t));
+ patch_instruction(p->addr, BREAKPOINT_INSTRUCTION);
}
NOKPROBE_SYMBOL(arch_arm_kprobe);
void arch_disarm_kprobe(struct kprobe *p)
{
- *p->addr = p->opcode;
- flush_icache_range((unsigned long) p->addr,
- (unsigned long) p->addr + sizeof(kprobe_opcode_t));
+ patch_instruction(p->addr, p->opcode);
}
NOKPROBE_SYMBOL(arch_disarm_kprobe);
@@ -617,6 +619,15 @@ int setjmp_pre_handler(struct kprobe *p, struct pt_regs *regs)
regs->gpr[2] = (unsigned long)(((func_descr_t *)jp->entry)->toc);
#endif
+ /*
+ * jprobes use jprobe_return() which skips the normal return
+ * path of the function, and this messes up the accounting of the
+ * function graph tracer.
+ *
+ * Pause function graph tracing while performing the jprobe function.
+ */
+ pause_graph_tracing();
+
return 1;
}
NOKPROBE_SYMBOL(setjmp_pre_handler);
@@ -642,6 +653,8 @@ int longjmp_break_handler(struct kprobe *p, struct pt_regs *regs)
* saved regs...
*/
memcpy(regs, &kcb->jprobe_saved_regs, sizeof(struct pt_regs));
+ /* It's OK to start function graph tracing again */
+ unpause_graph_tracing();
preempt_enable_no_resched();
return 1;
}
diff --git a/arch/powerpc/kernel/mce.c b/arch/powerpc/kernel/mce.c
index 5f9eada3519b..e0e131e662ed 100644
--- a/arch/powerpc/kernel/mce.c
+++ b/arch/powerpc/kernel/mce.c
@@ -268,6 +268,7 @@ void machine_check_print_event_info(struct machine_check_event *evt,
static const char *mc_ra_types[] = {
"Indeterminate",
"Instruction fetch (bad)",
+ "Instruction fetch (foreign)",
"Page table walk ifetch (bad)",
"Page table walk ifetch (foreign)",
"Load (bad)",
@@ -405,6 +406,7 @@ void machine_check_print_event_info(struct machine_check_event *evt,
break;
}
}
+EXPORT_SYMBOL_GPL(machine_check_print_event_info);
uint64_t get_mce_fault_addr(struct machine_check_event *evt)
{
diff --git a/arch/powerpc/kernel/mce_power.c b/arch/powerpc/kernel/mce_power.c
index f913139bb0c2..d24e689e893f 100644
--- a/arch/powerpc/kernel/mce_power.c
+++ b/arch/powerpc/kernel/mce_power.c
@@ -236,6 +236,9 @@ static const struct mce_ierror_table mce_p9_ierror_table[] = {
{ 0x00000000081c0000, 0x0000000000180000, true,
MCE_ERROR_TYPE_UE, MCE_UE_ERROR_PAGE_TABLE_WALK_IFETCH,
MCE_INITIATOR_CPU, MCE_SEV_ERROR_SYNC, },
+{ 0x00000000081c0000, 0x00000000001c0000, true,
+ MCE_ERROR_TYPE_RA, MCE_RA_ERROR_IFETCH_FOREIGN,
+ MCE_INITIATOR_CPU, MCE_SEV_ERROR_SYNC, },
{ 0x00000000081c0000, 0x0000000008000000, true,
MCE_ERROR_TYPE_LINK,MCE_LINK_ERROR_IFETCH_TIMEOUT,
MCE_INITIATOR_CPU, MCE_SEV_ERROR_SYNC, },
diff --git a/arch/powerpc/kernel/misc_32.S b/arch/powerpc/kernel/misc_32.S
index 84db14e435f5..3f7a9a2d2435 100644
--- a/arch/powerpc/kernel/misc_32.S
+++ b/arch/powerpc/kernel/misc_32.S
@@ -244,8 +244,7 @@ _GLOBAL(_nmask_and_or_msr)
*/
_GLOBAL(real_readb)
mfmsr r7
- ori r0,r7,MSR_DR
- xori r0,r0,MSR_DR
+ rlwinm r0,r7,0,~MSR_DR
sync
mtmsr r0
sync
@@ -262,8 +261,7 @@ _GLOBAL(real_readb)
*/
_GLOBAL(real_writeb)
mfmsr r7
- ori r0,r7,MSR_DR
- xori r0,r0,MSR_DR
+ rlwinm r0,r7,0,~MSR_DR
sync
mtmsr r0
sync
diff --git a/arch/powerpc/kernel/nvram_64.c b/arch/powerpc/kernel/nvram_64.c
index eae61b044e9e..496d6393bd41 100644
--- a/arch/powerpc/kernel/nvram_64.c
+++ b/arch/powerpc/kernel/nvram_64.c
@@ -792,21 +792,17 @@ static ssize_t dev_nvram_write(struct file *file, const char __user *buf,
count = min_t(size_t, count, size - *ppos);
count = min(count, PAGE_SIZE);
- ret = -ENOMEM;
- tmp = kmalloc(count, GFP_KERNEL);
- if (!tmp)
- goto out;
-
- ret = -EFAULT;
- if (copy_from_user(tmp, buf, count))
+ tmp = memdup_user(buf, count);
+ if (IS_ERR(tmp)) {
+ ret = PTR_ERR(tmp);
goto out;
+ }
ret = ppc_md.nvram_write(tmp, count, ppos);
-out:
kfree(tmp);
+out:
return ret;
-
}
static long dev_nvram_ioctl(struct file *file, unsigned int cmd,
diff --git a/arch/powerpc/kernel/optprobes.c b/arch/powerpc/kernel/optprobes.c
index ec60ed0d4aad..6f8273f5e988 100644
--- a/arch/powerpc/kernel/optprobes.c
+++ b/arch/powerpc/kernel/optprobes.c
@@ -158,12 +158,13 @@ void arch_remove_optimized_kprobe(struct optimized_kprobe *op)
void patch_imm32_load_insns(unsigned int val, kprobe_opcode_t *addr)
{
/* addis r4,0,(insn)@h */
- *addr++ = PPC_INST_ADDIS | ___PPC_RT(4) |
- ((val >> 16) & 0xffff);
+ patch_instruction(addr, PPC_INST_ADDIS | ___PPC_RT(4) |
+ ((val >> 16) & 0xffff));
+ addr++;
/* ori r4,r4,(insn)@l */
- *addr = PPC_INST_ORI | ___PPC_RA(4) | ___PPC_RS(4) |
- (val & 0xffff);
+ patch_instruction(addr, PPC_INST_ORI | ___PPC_RA(4) |
+ ___PPC_RS(4) | (val & 0xffff));
}
/*
@@ -173,24 +174,28 @@ void patch_imm32_load_insns(unsigned int val, kprobe_opcode_t *addr)
void patch_imm64_load_insns(unsigned long val, kprobe_opcode_t *addr)
{
/* lis r3,(op)@highest */
- *addr++ = PPC_INST_ADDIS | ___PPC_RT(3) |
- ((val >> 48) & 0xffff);
+ patch_instruction(addr, PPC_INST_ADDIS | ___PPC_RT(3) |
+ ((val >> 48) & 0xffff));
+ addr++;
/* ori r3,r3,(op)@higher */
- *addr++ = PPC_INST_ORI | ___PPC_RA(3) | ___PPC_RS(3) |
- ((val >> 32) & 0xffff);
+ patch_instruction(addr, PPC_INST_ORI | ___PPC_RA(3) |
+ ___PPC_RS(3) | ((val >> 32) & 0xffff));
+ addr++;
/* rldicr r3,r3,32,31 */
- *addr++ = PPC_INST_RLDICR | ___PPC_RA(3) | ___PPC_RS(3) |
- __PPC_SH64(32) | __PPC_ME64(31);
+ patch_instruction(addr, PPC_INST_RLDICR | ___PPC_RA(3) |
+ ___PPC_RS(3) | __PPC_SH64(32) | __PPC_ME64(31));
+ addr++;
/* oris r3,r3,(op)@h */
- *addr++ = PPC_INST_ORIS | ___PPC_RA(3) | ___PPC_RS(3) |
- ((val >> 16) & 0xffff);
+ patch_instruction(addr, PPC_INST_ORIS | ___PPC_RA(3) |
+ ___PPC_RS(3) | ((val >> 16) & 0xffff));
+ addr++;
/* ori r3,r3,(op)@l */
- *addr = PPC_INST_ORI | ___PPC_RA(3) | ___PPC_RS(3) |
- (val & 0xffff);
+ patch_instruction(addr, PPC_INST_ORI | ___PPC_RA(3) |
+ ___PPC_RS(3) | (val & 0xffff));
}
int arch_prepare_optimized_kprobe(struct optimized_kprobe *op, struct kprobe *p)
@@ -198,7 +203,8 @@ int arch_prepare_optimized_kprobe(struct optimized_kprobe *op, struct kprobe *p)
kprobe_opcode_t *buff, branch_op_callback, branch_emulate_step;
kprobe_opcode_t *op_callback_addr, *emulate_step_addr;
long b_offset;
- unsigned long nip;
+ unsigned long nip, size;
+ int rc, i;
kprobe_ppc_optinsn_slots.insn_size = MAX_OPTINSN_SIZE;
@@ -231,8 +237,14 @@ int arch_prepare_optimized_kprobe(struct optimized_kprobe *op, struct kprobe *p)
goto error;
/* Setup template */
- memcpy(buff, optprobe_template_entry,
- TMPL_END_IDX * sizeof(kprobe_opcode_t));
+ /* We can optimize this via patch_instruction_window later */
+ size = (TMPL_END_IDX * sizeof(kprobe_opcode_t)) / sizeof(int);
+ pr_devel("Copying template to %p, size %lu\n", buff, size);
+ for (i = 0; i < size; i++) {
+ rc = patch_instruction(buff + i, *(optprobe_template_entry + i));
+ if (rc < 0)
+ goto error;
+ }
/*
* Fixup the template with instructions to:
@@ -261,8 +273,8 @@ int arch_prepare_optimized_kprobe(struct optimized_kprobe *op, struct kprobe *p)
if (!branch_op_callback || !branch_emulate_step)
goto error;
- buff[TMPL_CALL_HDLR_IDX] = branch_op_callback;
- buff[TMPL_EMULATE_IDX] = branch_emulate_step;
+ patch_instruction(buff + TMPL_CALL_HDLR_IDX, branch_op_callback);
+ patch_instruction(buff + TMPL_EMULATE_IDX, branch_emulate_step);
/*
* 3. load instruction to be emulated into relevant register, and
@@ -272,8 +284,7 @@ int arch_prepare_optimized_kprobe(struct optimized_kprobe *op, struct kprobe *p)
/*
* 4. branch back from trampoline
*/
- buff[TMPL_RET_IDX] = create_branch((unsigned int *)buff + TMPL_RET_IDX,
- (unsigned long)nip, 0);
+ patch_branch(buff + TMPL_RET_IDX, (unsigned long)nip, 0);
flush_icache_range((unsigned long)buff,
(unsigned long)(&buff[TMPL_END_IDX]));
diff --git a/arch/powerpc/kernel/process.c b/arch/powerpc/kernel/process.c
index baae104b16c7..9f3e2c932dcc 100644
--- a/arch/powerpc/kernel/process.c
+++ b/arch/powerpc/kernel/process.c
@@ -511,6 +511,10 @@ void restore_math(struct pt_regs *regs)
{
unsigned long msr;
+ /*
+ * Syscall exit makes a similar initial check before branching
+ * to restore_math. Keep them in synch.
+ */
if (!msr_tm_active(regs->msr) &&
!current->thread.load_fp && !loadvec(current->thread))
return;
@@ -1133,6 +1137,11 @@ static inline void restore_sprs(struct thread_struct *old_thread,
#endif
}
+#ifdef CONFIG_PPC_BOOK3S_64
+#define CP_SIZE 128
+static const u8 dummy_copy_buffer[CP_SIZE] __attribute__((aligned(CP_SIZE)));
+#endif
+
struct task_struct *__switch_to(struct task_struct *prev,
struct task_struct *new)
{
@@ -1195,12 +1204,14 @@ struct task_struct *__switch_to(struct task_struct *prev,
__switch_to_tm(prev, new);
- /*
- * We can't take a PMU exception inside _switch() since there is a
- * window where the kernel stack SLB and the kernel stack are out
- * of sync. Hard disable here.
- */
- hard_irq_disable();
+ if (!radix_enabled()) {
+ /*
+ * We can't take a PMU exception inside _switch() since there
+ * is a window where the kernel stack SLB and the kernel stack
+ * are out of sync. Hard disable here.
+ */
+ hard_irq_disable();
+ }
/*
* Call restore_sprs() before calling _switch(). If we move it after
@@ -1220,8 +1231,28 @@ struct task_struct *__switch_to(struct task_struct *prev,
batch->active = 1;
}
- if (current_thread_info()->task->thread.regs)
+ if (current_thread_info()->task->thread.regs) {
restore_math(current_thread_info()->task->thread.regs);
+
+ /*
+ * The copy-paste buffer can only store into foreign real
+ * addresses, so unprivileged processes can not see the
+ * data or use it in any way unless they have foreign real
+ * mappings. We don't have a VAS driver that allocates those
+ * yet, so no cpabort is required.
+ */
+ if (cpu_has_feature(CPU_FTR_POWER9_DD1)) {
+ /*
+ * DD1 allows paste into normal system memory, so we
+ * do an unpaired copy here to clear the buffer and
+ * prevent a covert channel being set up.
+ *
+ * cpabort is not used because it is quite expensive.
+ */
+ asm volatile(PPC_COPY(%0, %1)
+ : : "r"(dummy_copy_buffer), "r"(0));
+ }
+ }
#endif /* CONFIG_PPC_STD_MMU_64 */
return last;
@@ -1666,6 +1697,7 @@ void start_thread(struct pt_regs *regs, unsigned long start, unsigned long sp)
#ifdef CONFIG_VSX
current->thread.used_vsr = 0;
#endif
+ current->thread.load_fp = 0;
memset(&current->thread.fp_state, 0, sizeof(current->thread.fp_state));
current->thread.fp_save_area = NULL;
#ifdef CONFIG_ALTIVEC
@@ -1674,6 +1706,7 @@ void start_thread(struct pt_regs *regs, unsigned long start, unsigned long sp)
current->thread.vr_save_area = NULL;
current->thread.vrsave = 0;
current->thread.used_vr = 0;
+ current->thread.load_vec = 0;
#endif /* CONFIG_ALTIVEC */
#ifdef CONFIG_SPE
memset(current->thread.evr, 0, sizeof(current->thread.evr));
@@ -1685,6 +1718,7 @@ void start_thread(struct pt_regs *regs, unsigned long start, unsigned long sp)
current->thread.tm_tfhar = 0;
current->thread.tm_texasr = 0;
current->thread.tm_tfiar = 0;
+ current->thread.load_tm = 0;
#endif /* CONFIG_PPC_TRANSACTIONAL_MEM */
}
EXPORT_SYMBOL(start_thread);
diff --git a/arch/powerpc/kernel/rtasd.c b/arch/powerpc/kernel/rtasd.c
index 3650732639ed..0f0b1b2f3b60 100644
--- a/arch/powerpc/kernel/rtasd.c
+++ b/arch/powerpc/kernel/rtasd.c
@@ -283,7 +283,7 @@ static void prrn_work_fn(struct work_struct *work)
* the RTAS event.
*/
pseries_devicetree_update(-prrn_update_scope);
- arch_update_cpu_topology();
+ numa_update_cpu_topology(false);
}
static DECLARE_WORK(prrn_work, prrn_work_fn);
diff --git a/arch/powerpc/kernel/setup-common.c b/arch/powerpc/kernel/setup-common.c
index 71dcda91755d..94a948207cd2 100644
--- a/arch/powerpc/kernel/setup-common.c
+++ b/arch/powerpc/kernel/setup-common.c
@@ -335,6 +335,10 @@ static int show_cpuinfo(struct seq_file *m, void *v)
maj = ((pvr >> 8) & 0xFF) - 1;
min = pvr & 0xFF;
break;
+ case 0x004e: /* POWER9 bits 12-15 give chip type */
+ maj = (pvr >> 8) & 0x0F;
+ min = pvr & 0xFF;
+ break;
default:
maj = (pvr >> 8) & 0xFF;
min = pvr & 0xFF;
@@ -928,7 +932,7 @@ void __init setup_arch(char **cmdline_p)
#ifdef CONFIG_PPC_MM_SLICES
#ifdef CONFIG_PPC64
- init_mm.context.addr_limit = TASK_SIZE_128TB;
+ init_mm.context.addr_limit = DEFAULT_MAP_WINDOW_USER64;
#else
#error "context.addr_limit not initialized."
#endif
diff --git a/arch/powerpc/kernel/setup_64.c b/arch/powerpc/kernel/setup_64.c
index f35ff9dea4fb..4640f6d64f8b 100644
--- a/arch/powerpc/kernel/setup_64.c
+++ b/arch/powerpc/kernel/setup_64.c
@@ -616,6 +616,24 @@ void __init exc_lvl_early_init(void)
#endif
/*
+ * Emergency stacks are used for a range of things, from asynchronous
+ * NMIs (system reset, machine check) to synchronous, process context.
+ * We set preempt_count to zero, even though that isn't necessarily correct. To
+ * get the right value we'd need to copy it from the previous thread_info, but
+ * doing that might fault causing more problems.
+ * TODO: what to do with accounting?
+ */
+static void emerg_stack_init_thread_info(struct thread_info *ti, int cpu)
+{
+ ti->task = NULL;
+ ti->cpu = cpu;
+ ti->preempt_count = 0;
+ ti->local_flags = 0;
+ ti->flags = 0;
+ klp_init_thread_info(ti);
+}
+
+/*
* Stack space used when we detect a bad kernel stack pointer, and
* early in SMP boots before relocation is enabled. Exclusive emergency
* stack for machine checks.
@@ -633,24 +651,31 @@ void __init emergency_stack_init(void)
* Since we use these as temporary stacks during secondary CPU
* bringup, we need to get at them in real mode. This means they
* must also be within the RMO region.
+ *
+ * The IRQ stacks allocated elsewhere in this file are zeroed and
+ * initialized in kernel/irq.c. These are initialized here in order
+ * to have emergency stacks available as early as possible.
*/
limit = min(safe_stack_limit(), ppc64_rma_size);
for_each_possible_cpu(i) {
struct thread_info *ti;
ti = __va(memblock_alloc_base(THREAD_SIZE, THREAD_SIZE, limit));
- klp_init_thread_info(ti);
+ memset(ti, 0, THREAD_SIZE);
+ emerg_stack_init_thread_info(ti, i);
paca[i].emergency_sp = (void *)ti + THREAD_SIZE;
#ifdef CONFIG_PPC_BOOK3S_64
/* emergency stack for NMI exception handling. */
ti = __va(memblock_alloc_base(THREAD_SIZE, THREAD_SIZE, limit));
- klp_init_thread_info(ti);
+ memset(ti, 0, THREAD_SIZE);
+ emerg_stack_init_thread_info(ti, i);
paca[i].nmi_emergency_sp = (void *)ti + THREAD_SIZE;
/* emergency stack for machine check exception handling. */
ti = __va(memblock_alloc_base(THREAD_SIZE, THREAD_SIZE, limit));
- klp_init_thread_info(ti);
+ memset(ti, 0, THREAD_SIZE);
+ emerg_stack_init_thread_info(ti, i);
paca[i].mc_emergency_sp = (void *)ti + THREAD_SIZE;
#endif
}
@@ -661,7 +686,7 @@ void __init emergency_stack_init(void)
static void * __init pcpu_fc_alloc(unsigned int cpu, size_t size, size_t align)
{
- return __alloc_bootmem_node(NODE_DATA(cpu_to_node(cpu)), size, align,
+ return __alloc_bootmem_node(NODE_DATA(early_cpu_to_node(cpu)), size, align,
__pa(MAX_DMA_ADDRESS));
}
@@ -672,7 +697,7 @@ static void __init pcpu_fc_free(void *ptr, size_t size)
static int pcpu_cpu_distance(unsigned int from, unsigned int to)
{
- if (cpu_to_node(from) == cpu_to_node(to))
+ if (early_cpu_to_node(from) == early_cpu_to_node(to))
return LOCAL_DISTANCE;
else
return REMOTE_DISTANCE;
diff --git a/arch/powerpc/kernel/smp.c b/arch/powerpc/kernel/smp.c
index df2a41647d8e..c6b8bace1766 100644
--- a/arch/powerpc/kernel/smp.c
+++ b/arch/powerpc/kernel/smp.c
@@ -33,6 +33,7 @@
#include <linux/notifier.h>
#include <linux/topology.h>
#include <linux/profile.h>
+#include <linux/processor.h>
#include <asm/ptrace.h>
#include <linux/atomic.h>
@@ -97,7 +98,7 @@ int smp_generic_cpu_bootable(unsigned int nr)
/* Special case - we inhibit secondary thread startup
* during boot if the user requests it.
*/
- if (system_state == SYSTEM_BOOTING && cpu_has_feature(CPU_FTR_SMT)) {
+ if (system_state < SYSTEM_RUNNING && cpu_has_feature(CPU_FTR_SMT)) {
if (!smt_enabled_at_boot && cpu_thread_in_core(nr) != 0)
return 0;
if (smt_enabled_at_boot
@@ -112,7 +113,8 @@ int smp_generic_cpu_bootable(unsigned int nr)
#ifdef CONFIG_PPC64
int smp_generic_kick_cpu(int nr)
{
- BUG_ON(nr < 0 || nr >= NR_CPUS);
+ if (nr < 0 || nr >= nr_cpu_ids)
+ return -EINVAL;
/*
* The processor is currently spinning, waiting for the
@@ -766,8 +768,7 @@ int __cpu_up(unsigned int cpu, struct task_struct *tidle)
smp_ops->give_timebase();
/* Wait until cpu puts itself in the online & active maps */
- while (!cpu_online(cpu))
- cpu_relax();
+ spin_until_cond(cpu_online(cpu));
return 0;
}
diff --git a/arch/powerpc/kernel/time.c b/arch/powerpc/kernel/time.c
index 2b33cfaac7b8..fe6f3a285455 100644
--- a/arch/powerpc/kernel/time.c
+++ b/arch/powerpc/kernel/time.c
@@ -59,10 +59,10 @@
#include <linux/suspend.h>
#include <linux/rtc.h>
#include <linux/sched/cputime.h>
+#include <linux/processor.h>
#include <asm/trace.h>
#include <asm/io.h>
-#include <asm/processor.h>
#include <asm/nvram.h>
#include <asm/cache.h>
#include <asm/machdep.h>
@@ -442,6 +442,7 @@ void __delay(unsigned long loops)
unsigned long start;
int diff;
+ spin_begin();
if (__USE_RTC()) {
start = get_rtcl();
do {
@@ -449,13 +450,14 @@ void __delay(unsigned long loops)
diff = get_rtcl() - start;
if (diff < 0)
diff += 1000000000;
+ spin_cpu_relax();
} while (diff < loops);
} else {
start = get_tbl();
while (get_tbl() - start < loops)
- HMT_low();
- HMT_medium();
+ spin_cpu_relax();
}
+ spin_end();
}
EXPORT_SYMBOL(__delay);
@@ -675,7 +677,7 @@ EXPORT_SYMBOL_GPL(tb_to_ns);
* the high 64 bits of a * b, i.e. (a * b) >> 64, where a and b
* are 64-bit unsigned numbers.
*/
-unsigned long long sched_clock(void)
+notrace unsigned long long sched_clock(void)
{
if (__USE_RTC())
return get_rtc();
@@ -739,12 +741,20 @@ static int __init get_freq(char *name, int cells, unsigned long *val)
static void start_cpu_decrementer(void)
{
#if defined(CONFIG_BOOKE) || defined(CONFIG_40x)
+ unsigned int tcr;
+
/* Clear any pending timer interrupts */
mtspr(SPRN_TSR, TSR_ENW | TSR_WIS | TSR_DIS | TSR_FIS);
- /* Enable decrementer interrupt */
- mtspr(SPRN_TCR, TCR_DIE);
-#endif /* defined(CONFIG_BOOKE) || defined(CONFIG_40x) */
+ tcr = mfspr(SPRN_TCR);
+ /*
+ * The watchdog may have already been enabled by u-boot. So leave
+ * TRC[WP] (Watchdog Period) alone.
+ */
+ tcr &= TCR_WP_MASK; /* Clear all bits except for TCR[WP] */
+ tcr |= TCR_DIE; /* Enable decrementer */
+ mtspr(SPRN_TCR, tcr);
+#endif
}
void __init generic_calibrate_decr(void)
@@ -823,38 +833,76 @@ void read_persistent_clock(struct timespec *ts)
}
/* clocksource code */
-static u64 rtc_read(struct clocksource *cs)
+static notrace u64 rtc_read(struct clocksource *cs)
{
return (u64)get_rtc();
}
-static u64 timebase_read(struct clocksource *cs)
+static notrace u64 timebase_read(struct clocksource *cs)
{
return (u64)get_tb();
}
-void update_vsyscall_old(struct timespec *wall_time, struct timespec *wtm,
- struct clocksource *clock, u32 mult, u64 cycle_last)
+
+void update_vsyscall(struct timekeeper *tk)
{
+ struct timespec xt;
+ struct clocksource *clock = tk->tkr_mono.clock;
+ u32 mult = tk->tkr_mono.mult;
+ u32 shift = tk->tkr_mono.shift;
+ u64 cycle_last = tk->tkr_mono.cycle_last;
u64 new_tb_to_xs, new_stamp_xsec;
- u32 frac_sec;
+ u64 frac_sec;
if (clock != &clocksource_timebase)
return;
+ xt.tv_sec = tk->xtime_sec;
+ xt.tv_nsec = (long)(tk->tkr_mono.xtime_nsec >> tk->tkr_mono.shift);
+
/* Make userspace gettimeofday spin until we're done. */
++vdso_data->tb_update_count;
smp_mb();
- /* 19342813113834067 ~= 2^(20+64) / 1e9 */
- new_tb_to_xs = (u64) mult * (19342813113834067ULL >> clock->shift);
- new_stamp_xsec = (u64) wall_time->tv_nsec * XSEC_PER_SEC;
- do_div(new_stamp_xsec, 1000000000);
- new_stamp_xsec += (u64) wall_time->tv_sec * XSEC_PER_SEC;
+ /*
+ * This computes ((2^20 / 1e9) * mult) >> shift as a
+ * 0.64 fixed-point fraction.
+ * The computation in the else clause below won't overflow
+ * (as long as the timebase frequency is >= 1.049 MHz)
+ * but loses precision because we lose the low bits of the constant
+ * in the shift. Note that 19342813113834067 ~= 2^(20+64) / 1e9.
+ * For a shift of 24 the error is about 0.5e-9, or about 0.5ns
+ * over a second. (Shift values are usually 22, 23 or 24.)
+ * For high frequency clocks such as the 512MHz timebase clock
+ * on POWER[6789], the mult value is small (e.g. 32768000)
+ * and so we can shift the constant by 16 initially
+ * (295147905179 ~= 2^(20+64-16) / 1e9) and then do the
+ * remaining shifts after the multiplication, which gives a
+ * more accurate result (e.g. with mult = 32768000, shift = 24,
+ * the error is only about 1.2e-12, or 0.7ns over 10 minutes).
+ */
+ if (mult <= 62500000 && clock->shift >= 16)
+ new_tb_to_xs = ((u64) mult * 295147905179ULL) >> (clock->shift - 16);
+ else
+ new_tb_to_xs = (u64) mult * (19342813113834067ULL >> clock->shift);
+
+ /*
+ * Compute the fractional second in units of 2^-32 seconds.
+ * The fractional second is tk->tkr_mono.xtime_nsec >> tk->tkr_mono.shift
+ * in nanoseconds, so multiplying that by 2^32 / 1e9 gives
+ * it in units of 2^-32 seconds.
+ * We assume shift <= 32 because clocks_calc_mult_shift()
+ * generates shift values in the range 0 - 32.
+ */
+ frac_sec = tk->tkr_mono.xtime_nsec << (32 - shift);
+ do_div(frac_sec, NSEC_PER_SEC);
- BUG_ON(wall_time->tv_nsec >= NSEC_PER_SEC);
- /* this is tv_nsec / 1e9 as a 0.32 fraction */
- frac_sec = ((u64) wall_time->tv_nsec * 18446744073ULL) >> 32;
+ /*
+ * Work out new stamp_xsec value for any legacy users of systemcfg.
+ * stamp_xsec is in units of 2^-20 seconds.
+ */
+ new_stamp_xsec = frac_sec >> 12;
+ new_stamp_xsec += tk->xtime_sec * XSEC_PER_SEC;
/*
* tb_update_count is used to allow the userspace gettimeofday code
@@ -864,15 +912,13 @@ void update_vsyscall_old(struct timespec *wall_time, struct timespec *wtm,
* the two values of tb_update_count match and are even then the
* tb_to_xs and stamp_xsec values are consistent. If not, then it
* loops back and reads them again until this criteria is met.
- * We expect the caller to have done the first increment of
- * vdso_data->tb_update_count already.
*/
vdso_data->tb_orig_stamp = cycle_last;
vdso_data->stamp_xsec = new_stamp_xsec;
vdso_data->tb_to_xs = new_tb_to_xs;
- vdso_data->wtom_clock_sec = wtm->tv_sec;
- vdso_data->wtom_clock_nsec = wtm->tv_nsec;
- vdso_data->stamp_xtime = *wall_time;
+ vdso_data->wtom_clock_sec = tk->wall_to_monotonic.tv_sec;
+ vdso_data->wtom_clock_nsec = tk->wall_to_monotonic.tv_nsec;
+ vdso_data->stamp_xtime = xt;
vdso_data->stamp_sec_fraction = frac_sec;
smp_wmb();
++(vdso_data->tb_update_count);
diff --git a/arch/powerpc/kernel/tm.S b/arch/powerpc/kernel/tm.S
index 3a2d04134da9..c4ba37822ba0 100644
--- a/arch/powerpc/kernel/tm.S
+++ b/arch/powerpc/kernel/tm.S
@@ -313,8 +313,8 @@ dont_backup_fp:
blr
- /* void tm_recheckpoint(struct thread_struct *thread,
- * unsigned long orig_msr)
+ /* void __tm_recheckpoint(struct thread_struct *thread,
+ * unsigned long orig_msr)
* - Restore the checkpointed register state saved by tm_reclaim
* when we switch_to a process.
*
diff --git a/arch/powerpc/kernel/trace/ftrace_64_mprofile.S b/arch/powerpc/kernel/trace/ftrace_64_mprofile.S
index 7c933a99f5d5..c98e90b4ea7b 100644
--- a/arch/powerpc/kernel/trace/ftrace_64_mprofile.S
+++ b/arch/powerpc/kernel/trace/ftrace_64_mprofile.S
@@ -45,10 +45,14 @@ _GLOBAL(ftrace_caller)
stdu r1,-SWITCH_FRAME_SIZE(r1)
/* Save all gprs to pt_regs */
- SAVE_8GPRS(0,r1)
- SAVE_8GPRS(8,r1)
- SAVE_8GPRS(16,r1)
- SAVE_8GPRS(24,r1)
+ SAVE_GPR(0, r1)
+ SAVE_10GPRS(2, r1)
+ SAVE_10GPRS(12, r1)
+ SAVE_10GPRS(22, r1)
+
+ /* Save previous stack pointer (r1) */
+ addi r8, r1, SWITCH_FRAME_SIZE
+ std r8, GPR1(r1)
/* Load special regs for save below */
mfmsr r8
@@ -95,18 +99,44 @@ ftrace_call:
bl ftrace_stub
nop
- /* Load ctr with the possibly modified NIP */
- ld r3, _NIP(r1)
- mtctr r3
+ /* Load the possibly modified NIP */
+ ld r15, _NIP(r1)
+
#ifdef CONFIG_LIVEPATCH
- cmpd r14,r3 /* has NIP been altered? */
+ cmpd r14, r15 /* has NIP been altered? */
+#endif
+
+#if defined(CONFIG_LIVEPATCH) && defined(CONFIG_KPROBES_ON_FTRACE)
+ /* NIP has not been altered, skip over further checks */
+ beq 1f
+
+ /* Check if there is an active kprobe on us */
+ subi r3, r14, 4
+ bl is_current_kprobe_addr
+ nop
+
+ /*
+ * If r3 == 1, then this is a kprobe/jprobe.
+ * else, this is livepatched function.
+ *
+ * The conditional branch for livepatch_handler below will use the
+ * result of this comparison. For kprobe/jprobe, we just need to branch to
+ * the new NIP, not call livepatch_handler. The branch below is bne, so we
+ * want CR0[EQ] to be true if this is a kprobe/jprobe. Which means we want
+ * CR0[EQ] = (r3 == 1).
+ */
+ cmpdi r3, 1
+1:
#endif
+ /* Load CTR with the possibly modified NIP */
+ mtctr r15
+
/* Restore gprs */
- REST_8GPRS(0,r1)
- REST_8GPRS(8,r1)
- REST_8GPRS(16,r1)
- REST_8GPRS(24,r1)
+ REST_GPR(0,r1)
+ REST_10GPRS(2,r1)
+ REST_10GPRS(12,r1)
+ REST_10GPRS(22,r1)
/* Restore possibly modified LR */
ld r0, _LINK(r1)
@@ -119,7 +149,10 @@ ftrace_call:
addi r1, r1, SWITCH_FRAME_SIZE
#ifdef CONFIG_LIVEPATCH
- /* Based on the cmpd above, if the NIP was altered handle livepatch */
+ /*
+ * Based on the cmpd or cmpdi above, if the NIP was altered and we're
+ * not on a kprobe/jprobe, then handle livepatch.
+ */
bne- livepatch_handler
#endif
diff --git a/arch/powerpc/kernel/traps.c b/arch/powerpc/kernel/traps.c
index d4e545d27ef9..bfcfd9ef09f2 100644
--- a/arch/powerpc/kernel/traps.c
+++ b/arch/powerpc/kernel/traps.c
@@ -237,6 +237,7 @@ void die(const char *str, struct pt_regs *regs, long err)
err = 0;
oops_end(flags, regs, err);
}
+NOKPROBE_SYMBOL(die);
void user_single_step_siginfo(struct task_struct *tsk,
struct pt_regs *regs, siginfo_t *info)
@@ -1968,6 +1969,7 @@ void unrecoverable_exception(struct pt_regs *regs)
regs->trap, regs->nip);
die("Unrecoverable exception", regs, SIGABRT);
}
+NOKPROBE_SYMBOL(unrecoverable_exception);
#if defined(CONFIG_BOOKE_WDT) || defined(CONFIG_40x)
/*
@@ -1998,6 +2000,7 @@ void kernel_bad_stack(struct pt_regs *regs)
regs->gpr[1], regs->nip);
die("Bad kernel stack pointer", regs, SIGABRT);
}
+NOKPROBE_SYMBOL(kernel_bad_stack);
void __init trap_init(void)
{
diff --git a/arch/powerpc/kernel/vmlinux.lds.S b/arch/powerpc/kernel/vmlinux.lds.S
index 2f793be3d2b1..b1a250560198 100644
--- a/arch/powerpc/kernel/vmlinux.lds.S
+++ b/arch/powerpc/kernel/vmlinux.lds.S
@@ -8,6 +8,12 @@
#include <asm/cache.h>
#include <asm/thread_info.h>
+#ifdef CONFIG_STRICT_KERNEL_RWX
+#define STRICT_ALIGN_SIZE (1 << 24)
+#else
+#define STRICT_ALIGN_SIZE PAGE_SIZE
+#endif
+
ENTRY(_stext)
PHDRS {
@@ -58,7 +64,6 @@ SECTIONS
#ifdef CONFIG_PPC64
KEEP(*(.head.text.first_256B));
#ifdef CONFIG_PPC_BOOK3E
-# define END_FIXED 0x100
#else
KEEP(*(.head.text.real_vectors));
*(.head.text.real_trampolines);
@@ -66,12 +71,8 @@ SECTIONS
*(.head.text.virt_trampolines);
# if defined(CONFIG_PPC_PSERIES) || defined(CONFIG_PPC_POWERNV)
KEEP(*(.head.data.fwnmi_page));
-# define END_FIXED 0x8000
-# else
-# define END_FIXED 0x7000
# endif
#endif
- ASSERT((. == END_FIXED), "vmlinux.lds.S: fixed section overflow error");
#else /* !CONFIG_PPC64 */
HEAD_TEXT
#endif
@@ -79,23 +80,6 @@ SECTIONS
__head_end = .;
- /*
- * If the build dies here, it's likely code in head_64.S is referencing
- * labels it can't reach, and the linker inserting stubs without the
- * assembler's knowledge. To debug, remove the above assert and
- * rebuild. Look for branch stubs in the fixed section region.
- *
- * Linker stub generation could be allowed in "trampoline"
- * sections if absolutely necessary, but this would require
- * some rework of the fixed sections. Before resorting to this,
- * consider references that have sufficient addressing range,
- * (e.g., hand coded trampolines) so the linker does not have
- * to add stubs.
- *
- * Linker stubs at the top of the main text section are currently not
- * detected, and will result in a crash at boot due to offsets being
- * wrong.
- */
#ifdef CONFIG_PPC64
/*
* BLOCK(0) overrides the default output section alignment because
@@ -103,18 +87,31 @@ SECTIONS
* section placement to work.
*/
.text BLOCK(0) : AT(ADDR(.text) - LOAD_OFFSET) {
+#ifdef CONFIG_LD_HEAD_STUB_CATCH
+ *(.linker_stub_catch);
+ . = . ;
+#endif
+
#else
.text : AT(ADDR(.text) - LOAD_OFFSET) {
ALIGN_FUNCTION();
#endif
/* careful! __ftr_alt_* sections need to be close to .text */
- *(.text .fixup __ftr_alt_* .ref.text)
+ *(.text.hot .text .text.fixup .text.unlikely .fixup __ftr_alt_* .ref.text);
SCHED_TEXT
CPUIDLE_TEXT
LOCK_TEXT
KPROBES_TEXT
IRQENTRY_TEXT
SOFTIRQENTRY_TEXT
+ /*
+ * -Os builds call FP save/restore functions. The powerpc64
+ * linker generates those on demand in the .sfpr section.
+ * .sfpr gets placed at the beginning of a group of input
+ * sections, which can break start-of-text offset if it is
+ * included with the main text sections, so put it by itself.
+ */
+ *(.sfpr);
MEM_KEEP(init.text)
MEM_KEEP(exit.text)
@@ -132,7 +129,7 @@ SECTIONS
PROVIDE32 (etext = .);
/* Read-only data */
- RODATA
+ RO_DATA(PAGE_SIZE)
EXCEPTION_TABLE(0)
@@ -149,7 +146,7 @@ SECTIONS
/*
* Init sections discarded at runtime
*/
- . = ALIGN(PAGE_SIZE);
+ . = ALIGN(STRICT_ALIGN_SIZE);
__init_begin = .;
INIT_TEXT_SECTION(PAGE_SIZE) :kernel
@@ -267,7 +264,9 @@ SECTIONS
.data : AT(ADDR(.data) - LOAD_OFFSET) {
DATA_DATA
*(.sdata)
+ *(.sdata2)
*(.got.plt) *(.got)
+ *(.plt)
}
#else
.data : AT(ADDR(.data) - LOAD_OFFSET) {
@@ -330,6 +329,16 @@ SECTIONS
_end = . ;
PROVIDE32 (end = .);
- /* Sections to be discarded. */
+ STABS_DEBUG
+
+ DWARF_DEBUG
+
DISCARDS
+ /DISCARD/ : {
+ *(*.EMB.apuinfo)
+ *(.glink .iplt .plt .rela* .comment)
+ *(.gnu.version*)
+ *(.gnu.attributes)
+ *(.eh_frame)
+ }
}
diff --git a/arch/powerpc/kvm/book3s_hv.c b/arch/powerpc/kvm/book3s_hv.c
index 42b7a4fd57d9..0b436df746fc 100644
--- a/arch/powerpc/kvm/book3s_hv.c
+++ b/arch/powerpc/kvm/book3s_hv.c
@@ -46,6 +46,8 @@
#include <linux/of.h>
#include <asm/reg.h>
+#include <asm/ppc-opcode.h>
+#include <asm/disassemble.h>
#include <asm/cputable.h>
#include <asm/cacheflush.h>
#include <asm/tlbflush.h>
@@ -645,6 +647,7 @@ static void kvmppc_create_dtl_entry(struct kvm_vcpu *vcpu,
unsigned long stolen;
unsigned long core_stolen;
u64 now;
+ unsigned long flags;
dt = vcpu->arch.dtl_ptr;
vpa = vcpu->arch.vpa.pinned_addr;
@@ -652,10 +655,10 @@ static void kvmppc_create_dtl_entry(struct kvm_vcpu *vcpu,
core_stolen = vcore_stolen_time(vc, now);
stolen = core_stolen - vcpu->arch.stolen_logged;
vcpu->arch.stolen_logged = core_stolen;
- spin_lock_irq(&vcpu->arch.tbacct_lock);
+ spin_lock_irqsave(&vcpu->arch.tbacct_lock, flags);
stolen += vcpu->arch.busy_stolen;
vcpu->arch.busy_stolen = 0;
- spin_unlock_irq(&vcpu->arch.tbacct_lock);
+ spin_unlock_irqrestore(&vcpu->arch.tbacct_lock, flags);
if (!dt || !vpa)
return;
memset(dt, 0, sizeof(struct dtl_entry));
@@ -675,6 +678,26 @@ static void kvmppc_create_dtl_entry(struct kvm_vcpu *vcpu,
vcpu->arch.dtl.dirty = true;
}
+/* See if there is a doorbell interrupt pending for a vcpu */
+static bool kvmppc_doorbell_pending(struct kvm_vcpu *vcpu)
+{
+ int thr;
+ struct kvmppc_vcore *vc;
+
+ if (vcpu->arch.doorbell_request)
+ return true;
+ /*
+ * Ensure that the read of vcore->dpdes comes after the read
+ * of vcpu->doorbell_request. This barrier matches the
+ * lwsync in book3s_hv_rmhandlers.S just before the
+ * fast_guest_return label.
+ */
+ smp_rmb();
+ vc = vcpu->arch.vcore;
+ thr = vcpu->vcpu_id - vc->first_vcpuid;
+ return !!(vc->dpdes & (1 << thr));
+}
+
static bool kvmppc_power8_compatible(struct kvm_vcpu *vcpu)
{
if (vcpu->arch.vcore->arch_compat >= PVR_ARCH_207)
@@ -926,6 +949,101 @@ static int kvmppc_emulate_debug_inst(struct kvm_run *run,
}
}
+static void do_nothing(void *x)
+{
+}
+
+static unsigned long kvmppc_read_dpdes(struct kvm_vcpu *vcpu)
+{
+ int thr, cpu, pcpu, nthreads;
+ struct kvm_vcpu *v;
+ unsigned long dpdes;
+
+ nthreads = vcpu->kvm->arch.emul_smt_mode;
+ dpdes = 0;
+ cpu = vcpu->vcpu_id & ~(nthreads - 1);
+ for (thr = 0; thr < nthreads; ++thr, ++cpu) {
+ v = kvmppc_find_vcpu(vcpu->kvm, cpu);
+ if (!v)
+ continue;
+ /*
+ * If the vcpu is currently running on a physical cpu thread,
+ * interrupt it in order to pull it out of the guest briefly,
+ * which will update its vcore->dpdes value.
+ */
+ pcpu = READ_ONCE(v->cpu);
+ if (pcpu >= 0)
+ smp_call_function_single(pcpu, do_nothing, NULL, 1);
+ if (kvmppc_doorbell_pending(v))
+ dpdes |= 1 << thr;
+ }
+ return dpdes;
+}
+
+/*
+ * On POWER9, emulate doorbell-related instructions in order to
+ * give the guest the illusion of running on a multi-threaded core.
+ * The instructions emulated are msgsndp, msgclrp, mfspr TIR,
+ * and mfspr DPDES.
+ */
+static int kvmppc_emulate_doorbell_instr(struct kvm_vcpu *vcpu)
+{
+ u32 inst, rb, thr;
+ unsigned long arg;
+ struct kvm *kvm = vcpu->kvm;
+ struct kvm_vcpu *tvcpu;
+
+ if (!cpu_has_feature(CPU_FTR_ARCH_300))
+ return EMULATE_FAIL;
+ if (kvmppc_get_last_inst(vcpu, INST_GENERIC, &inst) != EMULATE_DONE)
+ return RESUME_GUEST;
+ if (get_op(inst) != 31)
+ return EMULATE_FAIL;
+ rb = get_rb(inst);
+ thr = vcpu->vcpu_id & (kvm->arch.emul_smt_mode - 1);
+ switch (get_xop(inst)) {
+ case OP_31_XOP_MSGSNDP:
+ arg = kvmppc_get_gpr(vcpu, rb);
+ if (((arg >> 27) & 0xf) != PPC_DBELL_SERVER)
+ break;
+ arg &= 0x3f;
+ if (arg >= kvm->arch.emul_smt_mode)
+ break;
+ tvcpu = kvmppc_find_vcpu(kvm, vcpu->vcpu_id - thr + arg);
+ if (!tvcpu)
+ break;
+ if (!tvcpu->arch.doorbell_request) {
+ tvcpu->arch.doorbell_request = 1;
+ kvmppc_fast_vcpu_kick_hv(tvcpu);
+ }
+ break;
+ case OP_31_XOP_MSGCLRP:
+ arg = kvmppc_get_gpr(vcpu, rb);
+ if (((arg >> 27) & 0xf) != PPC_DBELL_SERVER)
+ break;
+ vcpu->arch.vcore->dpdes = 0;
+ vcpu->arch.doorbell_request = 0;
+ break;
+ case OP_31_XOP_MFSPR:
+ switch (get_sprn(inst)) {
+ case SPRN_TIR:
+ arg = thr;
+ break;
+ case SPRN_DPDES:
+ arg = kvmppc_read_dpdes(vcpu);
+ break;
+ default:
+ return EMULATE_FAIL;
+ }
+ kvmppc_set_gpr(vcpu, get_rt(inst), arg);
+ break;
+ default:
+ return EMULATE_FAIL;
+ }
+ kvmppc_set_pc(vcpu, kvmppc_get_pc(vcpu) + 4);
+ return RESUME_GUEST;
+}
+
static int kvmppc_handle_exit_hv(struct kvm_run *run, struct kvm_vcpu *vcpu,
struct task_struct *tsk)
{
@@ -971,15 +1089,20 @@ static int kvmppc_handle_exit_hv(struct kvm_run *run, struct kvm_vcpu *vcpu,
r = RESUME_GUEST;
break;
case BOOK3S_INTERRUPT_MACHINE_CHECK:
- /*
- * Deliver a machine check interrupt to the guest.
- * We have to do this, even if the host has handled the
- * machine check, because machine checks use SRR0/1 and
- * the interrupt might have trashed guest state in them.
- */
- kvmppc_book3s_queue_irqprio(vcpu,
- BOOK3S_INTERRUPT_MACHINE_CHECK);
- r = RESUME_GUEST;
+ /* Exit to guest with KVM_EXIT_NMI as exit reason */
+ run->exit_reason = KVM_EXIT_NMI;
+ run->hw.hardware_exit_reason = vcpu->arch.trap;
+ /* Clear out the old NMI status from run->flags */
+ run->flags &= ~KVM_RUN_PPC_NMI_DISP_MASK;
+ /* Now set the NMI status */
+ if (vcpu->arch.mce_evt.disposition == MCE_DISPOSITION_RECOVERED)
+ run->flags |= KVM_RUN_PPC_NMI_DISP_FULLY_RECOV;
+ else
+ run->flags |= KVM_RUN_PPC_NMI_DISP_NOT_RECOV;
+
+ r = RESUME_HOST;
+ /* Print the MCE event to host console. */
+ machine_check_print_event_info(&vcpu->arch.mce_evt, false);
break;
case BOOK3S_INTERRUPT_PROGRAM:
{
@@ -1048,12 +1171,19 @@ static int kvmppc_handle_exit_hv(struct kvm_run *run, struct kvm_vcpu *vcpu,
break;
/*
* This occurs if the guest (kernel or userspace), does something that
- * is prohibited by HFSCR. We just generate a program interrupt to
- * the guest.
+ * is prohibited by HFSCR.
+ * On POWER9, this could be a doorbell instruction that we need
+ * to emulate.
+ * Otherwise, we just generate a program interrupt to the guest.
*/
case BOOK3S_INTERRUPT_H_FAC_UNAVAIL:
- kvmppc_core_queue_program(vcpu, SRR1_PROGILL);
- r = RESUME_GUEST;
+ r = EMULATE_FAIL;
+ if ((vcpu->arch.hfscr >> 56) == FSCR_MSGP_LG)
+ r = kvmppc_emulate_doorbell_instr(vcpu);
+ if (r == EMULATE_FAIL) {
+ kvmppc_core_queue_program(vcpu, SRR1_PROGILL);
+ r = RESUME_GUEST;
+ }
break;
case BOOK3S_INTERRUPT_HV_RM_HARD:
r = RESUME_PASSTHROUGH;
@@ -1143,6 +1273,12 @@ static void kvmppc_set_lpcr(struct kvm_vcpu *vcpu, u64 new_lpcr,
mask = LPCR_DPFD | LPCR_ILE | LPCR_TC;
if (cpu_has_feature(CPU_FTR_ARCH_207S))
mask |= LPCR_AIL;
+ /*
+ * On POWER9, allow userspace to enable large decrementer for the
+ * guest, whether or not the host has it enabled.
+ */
+ if (cpu_has_feature(CPU_FTR_ARCH_300))
+ mask |= LPCR_LD;
/* Broken 32-bit version of LPCR must not clear top bits */
if (preserve_top32)
@@ -1486,6 +1622,14 @@ static int kvmppc_set_one_reg_hv(struct kvm_vcpu *vcpu, u64 id,
r = set_vpa(vcpu, &vcpu->arch.dtl, addr, len);
break;
case KVM_REG_PPC_TB_OFFSET:
+ /*
+ * POWER9 DD1 has an erratum where writing TBU40 causes
+ * the timebase to lose ticks. So we don't let the
+ * timebase offset be changed on P9 DD1. (It is
+ * initialized to zero.)
+ */
+ if (cpu_has_feature(CPU_FTR_POWER9_DD1))
+ break;
/* round up to multiple of 2^24 */
vcpu->arch.vcore->tb_offset =
ALIGN(set_reg_val(id, *val), 1UL << 24);
@@ -1603,7 +1747,7 @@ static struct kvmppc_vcore *kvmppc_vcore_create(struct kvm *kvm, int core)
init_swait_queue_head(&vcore->wq);
vcore->preempt_tb = TB_NIL;
vcore->lpcr = kvm->arch.lpcr;
- vcore->first_vcpuid = core * threads_per_vcore();
+ vcore->first_vcpuid = core * kvm->arch.smt_mode;
vcore->kvm = kvm;
INIT_LIST_HEAD(&vcore->preempt_list);
@@ -1762,14 +1906,10 @@ static struct kvm_vcpu *kvmppc_core_vcpu_create_hv(struct kvm *kvm,
unsigned int id)
{
struct kvm_vcpu *vcpu;
- int err = -EINVAL;
+ int err;
int core;
struct kvmppc_vcore *vcore;
- core = id / threads_per_vcore();
- if (core >= KVM_MAX_VCORES)
- goto out;
-
err = -ENOMEM;
vcpu = kmem_cache_zalloc(kvm_vcpu_cache, GFP_KERNEL);
if (!vcpu)
@@ -1800,6 +1940,20 @@ static struct kvm_vcpu *kvmppc_core_vcpu_create_hv(struct kvm *kvm,
vcpu->arch.busy_preempt = TB_NIL;
vcpu->arch.intr_msr = MSR_SF | MSR_ME;
+ /*
+ * Set the default HFSCR for the guest from the host value.
+ * This value is only used on POWER9.
+ * On POWER9 DD1, TM doesn't work, so we make sure to
+ * prevent the guest from using it.
+ * On POWER9, we want to virtualize the doorbell facility, so we
+ * turn off the HFSCR bit, which causes those instructions to trap.
+ */
+ vcpu->arch.hfscr = mfspr(SPRN_HFSCR);
+ if (!cpu_has_feature(CPU_FTR_TM))
+ vcpu->arch.hfscr &= ~HFSCR_TM;
+ if (cpu_has_feature(CPU_FTR_ARCH_300))
+ vcpu->arch.hfscr &= ~HFSCR_MSGP;
+
kvmppc_mmu_book3s_hv_init(vcpu);
vcpu->arch.state = KVMPPC_VCPU_NOTREADY;
@@ -1807,11 +1961,17 @@ static struct kvm_vcpu *kvmppc_core_vcpu_create_hv(struct kvm *kvm,
init_waitqueue_head(&vcpu->arch.cpu_run);
mutex_lock(&kvm->lock);
- vcore = kvm->arch.vcores[core];
- if (!vcore) {
- vcore = kvmppc_vcore_create(kvm, core);
- kvm->arch.vcores[core] = vcore;
- kvm->arch.online_vcores++;
+ vcore = NULL;
+ err = -EINVAL;
+ core = id / kvm->arch.smt_mode;
+ if (core < KVM_MAX_VCORES) {
+ vcore = kvm->arch.vcores[core];
+ if (!vcore) {
+ err = -ENOMEM;
+ vcore = kvmppc_vcore_create(kvm, core);
+ kvm->arch.vcores[core] = vcore;
+ kvm->arch.online_vcores++;
+ }
}
mutex_unlock(&kvm->lock);
@@ -1839,6 +1999,43 @@ out:
return ERR_PTR(err);
}
+static int kvmhv_set_smt_mode(struct kvm *kvm, unsigned long smt_mode,
+ unsigned long flags)
+{
+ int err;
+ int esmt = 0;
+
+ if (flags)
+ return -EINVAL;
+ if (smt_mode > MAX_SMT_THREADS || !is_power_of_2(smt_mode))
+ return -EINVAL;
+ if (!cpu_has_feature(CPU_FTR_ARCH_300)) {
+ /*
+ * On POWER8 (or POWER7), the threading mode is "strict",
+ * so we pack smt_mode vcpus per vcore.
+ */
+ if (smt_mode > threads_per_subcore)
+ return -EINVAL;
+ } else {
+ /*
+ * On POWER9, the threading mode is "loose",
+ * so each vcpu gets its own vcore.
+ */
+ esmt = smt_mode;
+ smt_mode = 1;
+ }
+ mutex_lock(&kvm->lock);
+ err = -EBUSY;
+ if (!kvm->arch.online_vcores) {
+ kvm->arch.smt_mode = smt_mode;
+ kvm->arch.emul_smt_mode = esmt;
+ err = 0;
+ }
+ mutex_unlock(&kvm->lock);
+
+ return err;
+}
+
static void unpin_vpa(struct kvm *kvm, struct kvmppc_vpa *vpa)
{
if (vpa->pinned_addr)
@@ -1889,7 +2086,7 @@ static void kvmppc_end_cede(struct kvm_vcpu *vcpu)
}
}
-extern void __kvmppc_vcore_entry(void);
+extern int __kvmppc_vcore_entry(void);
static void kvmppc_remove_runnable(struct kvmppc_vcore *vc,
struct kvm_vcpu *vcpu)
@@ -1954,10 +2151,6 @@ static void kvmppc_release_hwthread(int cpu)
tpaca->kvm_hstate.kvm_split_mode = NULL;
}
-static void do_nothing(void *x)
-{
-}
-
static void radix_flush_cpu(struct kvm *kvm, int cpu, struct kvm_vcpu *vcpu)
{
int i;
@@ -1975,11 +2168,35 @@ static void radix_flush_cpu(struct kvm *kvm, int cpu, struct kvm_vcpu *vcpu)
smp_call_function_single(cpu + i, do_nothing, NULL, 1);
}
+static void kvmppc_prepare_radix_vcpu(struct kvm_vcpu *vcpu, int pcpu)
+{
+ struct kvm *kvm = vcpu->kvm;
+
+ /*
+ * With radix, the guest can do TLB invalidations itself,
+ * and it could choose to use the local form (tlbiel) if
+ * it is invalidating a translation that has only ever been
+ * used on one vcpu. However, that doesn't mean it has
+ * only ever been used on one physical cpu, since vcpus
+ * can move around between pcpus. To cope with this, when
+ * a vcpu moves from one pcpu to another, we need to tell
+ * any vcpus running on the same core as this vcpu previously
+ * ran to flush the TLB. The TLB is shared between threads,
+ * so we use a single bit in .need_tlb_flush for all 4 threads.
+ */
+ if (vcpu->arch.prev_cpu != pcpu) {
+ if (vcpu->arch.prev_cpu >= 0 &&
+ cpu_first_thread_sibling(vcpu->arch.prev_cpu) !=
+ cpu_first_thread_sibling(pcpu))
+ radix_flush_cpu(kvm, vcpu->arch.prev_cpu, vcpu);
+ vcpu->arch.prev_cpu = pcpu;
+ }
+}
+
static void kvmppc_start_thread(struct kvm_vcpu *vcpu, struct kvmppc_vcore *vc)
{
int cpu;
struct paca_struct *tpaca;
- struct kvmppc_vcore *mvc = vc->master_vcore;
struct kvm *kvm = vc->kvm;
cpu = vc->pcpu;
@@ -1989,36 +2206,16 @@ static void kvmppc_start_thread(struct kvm_vcpu *vcpu, struct kvmppc_vcore *vc)
vcpu->arch.timer_running = 0;
}
cpu += vcpu->arch.ptid;
- vcpu->cpu = mvc->pcpu;
+ vcpu->cpu = vc->pcpu;
vcpu->arch.thread_cpu = cpu;
-
- /*
- * With radix, the guest can do TLB invalidations itself,
- * and it could choose to use the local form (tlbiel) if
- * it is invalidating a translation that has only ever been
- * used on one vcpu. However, that doesn't mean it has
- * only ever been used on one physical cpu, since vcpus
- * can move around between pcpus. To cope with this, when
- * a vcpu moves from one pcpu to another, we need to tell
- * any vcpus running on the same core as this vcpu previously
- * ran to flush the TLB. The TLB is shared between threads,
- * so we use a single bit in .need_tlb_flush for all 4 threads.
- */
- if (kvm_is_radix(kvm) && vcpu->arch.prev_cpu != cpu) {
- if (vcpu->arch.prev_cpu >= 0 &&
- cpu_first_thread_sibling(vcpu->arch.prev_cpu) !=
- cpu_first_thread_sibling(cpu))
- radix_flush_cpu(kvm, vcpu->arch.prev_cpu, vcpu);
- vcpu->arch.prev_cpu = cpu;
- }
cpumask_set_cpu(cpu, &kvm->arch.cpu_in_guest);
}
tpaca = &paca[cpu];
tpaca->kvm_hstate.kvm_vcpu = vcpu;
- tpaca->kvm_hstate.ptid = cpu - mvc->pcpu;
+ tpaca->kvm_hstate.ptid = cpu - vc->pcpu;
/* Order stores to hstate.kvm_vcpu etc. before store to kvm_vcore */
smp_wmb();
- tpaca->kvm_hstate.kvm_vcore = mvc;
+ tpaca->kvm_hstate.kvm_vcore = vc;
if (cpu != smp_processor_id())
kvmppc_ipi_thread(cpu);
}
@@ -2147,8 +2344,7 @@ struct core_info {
int max_subcore_threads;
int total_threads;
int subcore_threads[MAX_SUBCORES];
- struct kvm *subcore_vm[MAX_SUBCORES];
- struct list_head vcs[MAX_SUBCORES];
+ struct kvmppc_vcore *vc[MAX_SUBCORES];
};
/*
@@ -2159,17 +2355,12 @@ static int subcore_thread_map[MAX_SUBCORES] = { 0, 4, 2, 6 };
static void init_core_info(struct core_info *cip, struct kvmppc_vcore *vc)
{
- int sub;
-
memset(cip, 0, sizeof(*cip));
cip->n_subcores = 1;
cip->max_subcore_threads = vc->num_threads;
cip->total_threads = vc->num_threads;
cip->subcore_threads[0] = vc->num_threads;
- cip->subcore_vm[0] = vc->kvm;
- for (sub = 0; sub < MAX_SUBCORES; ++sub)
- INIT_LIST_HEAD(&cip->vcs[sub]);
- list_add_tail(&vc->preempt_list, &cip->vcs[0]);
+ cip->vc[0] = vc;
}
static bool subcore_config_ok(int n_subcores, int n_threads)
@@ -2189,9 +2380,8 @@ static bool subcore_config_ok(int n_subcores, int n_threads)
return n_subcores * roundup_pow_of_two(n_threads) <= MAX_SMT_THREADS;
}
-static void init_master_vcore(struct kvmppc_vcore *vc)
+static void init_vcore_to_run(struct kvmppc_vcore *vc)
{
- vc->master_vcore = vc;
vc->entry_exit_map = 0;
vc->in_guest = 0;
vc->napping_threads = 0;
@@ -2216,9 +2406,9 @@ static bool can_dynamic_split(struct kvmppc_vcore *vc, struct core_info *cip)
++cip->n_subcores;
cip->total_threads += vc->num_threads;
cip->subcore_threads[sub] = vc->num_threads;
- cip->subcore_vm[sub] = vc->kvm;
- init_master_vcore(vc);
- list_move_tail(&vc->preempt_list, &cip->vcs[sub]);
+ cip->vc[sub] = vc;
+ init_vcore_to_run(vc);
+ list_del_init(&vc->preempt_list);
return true;
}
@@ -2286,6 +2476,18 @@ static void collect_piggybacks(struct core_info *cip, int target_threads)
spin_unlock(&lp->lock);
}
+static bool recheck_signals(struct core_info *cip)
+{
+ int sub, i;
+ struct kvm_vcpu *vcpu;
+
+ for (sub = 0; sub < cip->n_subcores; ++sub)
+ for_each_runnable_thread(i, vcpu, cip->vc[sub])
+ if (signal_pending(vcpu->arch.run_task))
+ return true;
+ return false;
+}
+
static void post_guest_process(struct kvmppc_vcore *vc, bool is_master)
{
int still_running = 0, i;
@@ -2323,7 +2525,6 @@ static void post_guest_process(struct kvmppc_vcore *vc, bool is_master)
wake_up(&vcpu->arch.cpu_run);
}
}
- list_del_init(&vc->preempt_list);
if (!is_master) {
if (still_running > 0) {
kvmppc_vcore_preempt(vc);
@@ -2385,6 +2586,21 @@ static inline int kvmppc_set_host_core(unsigned int cpu)
return 0;
}
+static void set_irq_happened(int trap)
+{
+ switch (trap) {
+ case BOOK3S_INTERRUPT_EXTERNAL:
+ local_paca->irq_happened |= PACA_IRQ_EE;
+ break;
+ case BOOK3S_INTERRUPT_H_DOORBELL:
+ local_paca->irq_happened |= PACA_IRQ_DBELL;
+ break;
+ case BOOK3S_INTERRUPT_HMI:
+ local_paca->irq_happened |= PACA_IRQ_HMI;
+ break;
+ }
+}
+
/*
* Run a set of guest threads on a physical core.
* Called with vc->lock held.
@@ -2395,7 +2611,7 @@ static noinline void kvmppc_run_core(struct kvmppc_vcore *vc)
int i;
int srcu_idx;
struct core_info core_info;
- struct kvmppc_vcore *pvc, *vcnext;
+ struct kvmppc_vcore *pvc;
struct kvm_split_mode split_info, *sip;
int split, subcore_size, active;
int sub;
@@ -2404,6 +2620,7 @@ static noinline void kvmppc_run_core(struct kvmppc_vcore *vc)
int pcpu, thr;
int target_threads;
int controlled_threads;
+ int trap;
/*
* Remove from the list any threads that have a signal pending
@@ -2418,7 +2635,7 @@ static noinline void kvmppc_run_core(struct kvmppc_vcore *vc)
/*
* Initialize *vc.
*/
- init_master_vcore(vc);
+ init_vcore_to_run(vc);
vc->preempt_tb = TB_NIL;
/*
@@ -2455,6 +2672,43 @@ static noinline void kvmppc_run_core(struct kvmppc_vcore *vc)
if (vc->num_threads < target_threads)
collect_piggybacks(&core_info, target_threads);
+ /*
+ * On radix, arrange for TLB flushing if necessary.
+ * This has to be done before disabling interrupts since
+ * it uses smp_call_function().
+ */
+ pcpu = smp_processor_id();
+ if (kvm_is_radix(vc->kvm)) {
+ for (sub = 0; sub < core_info.n_subcores; ++sub)
+ for_each_runnable_thread(i, vcpu, core_info.vc[sub])
+ kvmppc_prepare_radix_vcpu(vcpu, pcpu);
+ }
+
+ /*
+ * Hard-disable interrupts, and check resched flag and signals.
+ * If we need to reschedule or deliver a signal, clean up
+ * and return without going into the guest(s).
+ */
+ local_irq_disable();
+ hard_irq_disable();
+ if (lazy_irq_pending() || need_resched() ||
+ recheck_signals(&core_info)) {
+ local_irq_enable();
+ vc->vcore_state = VCORE_INACTIVE;
+ /* Unlock all except the primary vcore */
+ for (sub = 1; sub < core_info.n_subcores; ++sub) {
+ pvc = core_info.vc[sub];
+ /* Put back on to the preempted vcores list */
+ kvmppc_vcore_preempt(pvc);
+ spin_unlock(&pvc->lock);
+ }
+ for (i = 0; i < controlled_threads; ++i)
+ kvmppc_release_hwthread(pcpu + i);
+ return;
+ }
+
+ kvmppc_clear_host_core(pcpu);
+
/* Decide on micro-threading (split-core) mode */
subcore_size = threads_per_subcore;
cmd_bit = stat_bit = 0;
@@ -2478,13 +2732,10 @@ static noinline void kvmppc_run_core(struct kvmppc_vcore *vc)
split_info.ldbar = mfspr(SPRN_LDBAR);
split_info.subcore_size = subcore_size;
for (sub = 0; sub < core_info.n_subcores; ++sub)
- split_info.master_vcs[sub] =
- list_first_entry(&core_info.vcs[sub],
- struct kvmppc_vcore, preempt_list);
+ split_info.vc[sub] = core_info.vc[sub];
/* order writes to split_info before kvm_split_mode pointer */
smp_wmb();
}
- pcpu = smp_processor_id();
for (thr = 0; thr < controlled_threads; ++thr)
paca[pcpu + thr].kvm_hstate.kvm_split_mode = sip;
@@ -2504,32 +2755,29 @@ static noinline void kvmppc_run_core(struct kvmppc_vcore *vc)
}
}
- kvmppc_clear_host_core(pcpu);
-
/* Start all the threads */
active = 0;
for (sub = 0; sub < core_info.n_subcores; ++sub) {
thr = subcore_thread_map[sub];
thr0_done = false;
active |= 1 << thr;
- list_for_each_entry(pvc, &core_info.vcs[sub], preempt_list) {
- pvc->pcpu = pcpu + thr;
- for_each_runnable_thread(i, vcpu, pvc) {
- kvmppc_start_thread(vcpu, pvc);
- kvmppc_create_dtl_entry(vcpu, pvc);
- trace_kvm_guest_enter(vcpu);
- if (!vcpu->arch.ptid)
- thr0_done = true;
- active |= 1 << (thr + vcpu->arch.ptid);
- }
- /*
- * We need to start the first thread of each subcore
- * even if it doesn't have a vcpu.
- */
- if (pvc->master_vcore == pvc && !thr0_done)
- kvmppc_start_thread(NULL, pvc);
- thr += pvc->num_threads;
+ pvc = core_info.vc[sub];
+ pvc->pcpu = pcpu + thr;
+ for_each_runnable_thread(i, vcpu, pvc) {
+ kvmppc_start_thread(vcpu, pvc);
+ kvmppc_create_dtl_entry(vcpu, pvc);
+ trace_kvm_guest_enter(vcpu);
+ if (!vcpu->arch.ptid)
+ thr0_done = true;
+ active |= 1 << (thr + vcpu->arch.ptid);
}
+ /*
+ * We need to start the first thread of each subcore
+ * even if it doesn't have a vcpu.
+ */
+ if (!thr0_done)
+ kvmppc_start_thread(NULL, pvc);
+ thr += pvc->num_threads;
}
/*
@@ -2556,17 +2804,27 @@ static noinline void kvmppc_run_core(struct kvmppc_vcore *vc)
trace_kvmppc_run_core(vc, 0);
for (sub = 0; sub < core_info.n_subcores; ++sub)
- list_for_each_entry(pvc, &core_info.vcs[sub], preempt_list)
- spin_unlock(&pvc->lock);
+ spin_unlock(&core_info.vc[sub]->lock);
+
+ /*
+ * Interrupts will be enabled once we get into the guest,
+ * so tell lockdep that we're about to enable interrupts.
+ */
+ trace_hardirqs_on();
guest_enter();
srcu_idx = srcu_read_lock(&vc->kvm->srcu);
- __kvmppc_vcore_entry();
+ trap = __kvmppc_vcore_entry();
srcu_read_unlock(&vc->kvm->srcu, srcu_idx);
+ guest_exit();
+
+ trace_hardirqs_off();
+ set_irq_happened(trap);
+
spin_lock(&vc->lock);
/* prevent other vcpu threads from doing kvmppc_start_thread() now */
vc->vcore_state = VCORE_EXITING;
@@ -2594,6 +2852,10 @@ static noinline void kvmppc_run_core(struct kvmppc_vcore *vc)
split_info.do_nap = 0;
}
+ kvmppc_set_host_core(pcpu);
+
+ local_irq_enable();
+
/* Let secondaries go back to the offline loop */
for (i = 0; i < controlled_threads; ++i) {
kvmppc_release_hwthread(pcpu + i);
@@ -2602,18 +2864,15 @@ static noinline void kvmppc_run_core(struct kvmppc_vcore *vc)
cpumask_clear_cpu(pcpu + i, &vc->kvm->arch.cpu_in_guest);
}
- kvmppc_set_host_core(pcpu);
-
spin_unlock(&vc->lock);
/* make sure updates to secondary vcpu structs are visible now */
smp_mb();
- guest_exit();
- for (sub = 0; sub < core_info.n_subcores; ++sub)
- list_for_each_entry_safe(pvc, vcnext, &core_info.vcs[sub],
- preempt_list)
- post_guest_process(pvc, pvc == vc);
+ for (sub = 0; sub < core_info.n_subcores; ++sub) {
+ pvc = core_info.vc[sub];
+ post_guest_process(pvc, pvc == vc);
+ }
spin_lock(&vc->lock);
preempt_enable();
@@ -2658,6 +2917,30 @@ static void shrink_halt_poll_ns(struct kvmppc_vcore *vc)
vc->halt_poll_ns /= halt_poll_ns_shrink;
}
+#ifdef CONFIG_KVM_XICS
+static inline bool xive_interrupt_pending(struct kvm_vcpu *vcpu)
+{
+ if (!xive_enabled())
+ return false;
+ return vcpu->arch.xive_saved_state.pipr <
+ vcpu->arch.xive_saved_state.cppr;
+}
+#else
+static inline bool xive_interrupt_pending(struct kvm_vcpu *vcpu)
+{
+ return false;
+}
+#endif /* CONFIG_KVM_XICS */
+
+static bool kvmppc_vcpu_woken(struct kvm_vcpu *vcpu)
+{
+ if (vcpu->arch.pending_exceptions || vcpu->arch.prodded ||
+ kvmppc_doorbell_pending(vcpu) || xive_interrupt_pending(vcpu))
+ return true;
+
+ return false;
+}
+
/*
* Check to see if any of the runnable vcpus on the vcore have pending
* exceptions or are no longer ceded
@@ -2668,8 +2951,7 @@ static int kvmppc_vcore_check_block(struct kvmppc_vcore *vc)
int i;
for_each_runnable_thread(i, vcpu, vc) {
- if (vcpu->arch.pending_exceptions || !vcpu->arch.ceded ||
- vcpu->arch.prodded)
+ if (!vcpu->arch.ceded || kvmppc_vcpu_woken(vcpu))
return 1;
}
@@ -2811,15 +3093,14 @@ static int kvmppc_run_vcpu(struct kvm_run *kvm_run, struct kvm_vcpu *vcpu)
*/
if (!signal_pending(current)) {
if (vc->vcore_state == VCORE_PIGGYBACK) {
- struct kvmppc_vcore *mvc = vc->master_vcore;
- if (spin_trylock(&mvc->lock)) {
- if (mvc->vcore_state == VCORE_RUNNING &&
- !VCORE_IS_EXITING(mvc)) {
+ if (spin_trylock(&vc->lock)) {
+ if (vc->vcore_state == VCORE_RUNNING &&
+ !VCORE_IS_EXITING(vc)) {
kvmppc_create_dtl_entry(vcpu, vc);
kvmppc_start_thread(vcpu, vc);
trace_kvm_guest_enter(vcpu);
}
- spin_unlock(&mvc->lock);
+ spin_unlock(&vc->lock);
}
} else if (vc->vcore_state == VCORE_RUNNING &&
!VCORE_IS_EXITING(vc)) {
@@ -2855,7 +3136,7 @@ static int kvmppc_run_vcpu(struct kvm_run *kvm_run, struct kvm_vcpu *vcpu)
break;
n_ceded = 0;
for_each_runnable_thread(i, v, vc) {
- if (!v->arch.pending_exceptions && !v->arch.prodded)
+ if (!kvmppc_vcpu_woken(v))
n_ceded += v->arch.ceded;
else
v->arch.ceded = 0;
@@ -2907,12 +3188,36 @@ static int kvmppc_vcpu_run_hv(struct kvm_run *run, struct kvm_vcpu *vcpu)
{
int r;
int srcu_idx;
+ unsigned long ebb_regs[3] = {}; /* shut up GCC */
+ unsigned long user_tar = 0;
+ unsigned int user_vrsave;
if (!vcpu->arch.sane) {
run->exit_reason = KVM_EXIT_INTERNAL_ERROR;
return -EINVAL;
}
+ /*
+ * Don't allow entry with a suspended transaction, because
+ * the guest entry/exit code will lose it.
+ * If the guest has TM enabled, save away their TM-related SPRs
+ * (they will get restored by the TM unavailable interrupt).
+ */
+#ifdef CONFIG_PPC_TRANSACTIONAL_MEM
+ if (cpu_has_feature(CPU_FTR_TM) && current->thread.regs &&
+ (current->thread.regs->msr & MSR_TM)) {
+ if (MSR_TM_ACTIVE(current->thread.regs->msr)) {
+ run->exit_reason = KVM_EXIT_FAIL_ENTRY;
+ run->fail_entry.hardware_entry_failure_reason = 0;
+ return -EINVAL;
+ }
+ current->thread.tm_tfhar = mfspr(SPRN_TFHAR);
+ current->thread.tm_tfiar = mfspr(SPRN_TFIAR);
+ current->thread.tm_texasr = mfspr(SPRN_TEXASR);
+ current->thread.regs->msr &= ~MSR_TM;
+ }
+#endif
+
kvmppc_core_prepare_to_enter(vcpu);
/* No need to go into the guest when all we'll do is come back out */
@@ -2934,6 +3239,15 @@ static int kvmppc_vcpu_run_hv(struct kvm_run *run, struct kvm_vcpu *vcpu)
flush_all_to_thread(current);
+ /* Save userspace EBB and other register values */
+ if (cpu_has_feature(CPU_FTR_ARCH_207S)) {
+ ebb_regs[0] = mfspr(SPRN_EBBHR);
+ ebb_regs[1] = mfspr(SPRN_EBBRR);
+ ebb_regs[2] = mfspr(SPRN_BESCR);
+ user_tar = mfspr(SPRN_TAR);
+ }
+ user_vrsave = mfspr(SPRN_VRSAVE);
+
vcpu->arch.wqp = &vcpu->arch.vcore->wq;
vcpu->arch.pgdir = current->mm->pgd;
vcpu->arch.state = KVMPPC_VCPU_BUSY_IN_HOST;
@@ -2960,6 +3274,16 @@ static int kvmppc_vcpu_run_hv(struct kvm_run *run, struct kvm_vcpu *vcpu)
}
} while (is_kvmppc_resume_guest(r));
+ /* Restore userspace EBB and other register values */
+ if (cpu_has_feature(CPU_FTR_ARCH_207S)) {
+ mtspr(SPRN_EBBHR, ebb_regs[0]);
+ mtspr(SPRN_EBBRR, ebb_regs[1]);
+ mtspr(SPRN_BESCR, ebb_regs[2]);
+ mtspr(SPRN_TAR, user_tar);
+ mtspr(SPRN_FSCR, current->thread.fscr);
+ }
+ mtspr(SPRN_VRSAVE, user_vrsave);
+
out:
vcpu->arch.state = KVMPPC_VCPU_NOTREADY;
atomic_dec(&vcpu->kvm->arch.vcpus_running);
@@ -3317,7 +3641,7 @@ void kvmppc_alloc_host_rm_ops(void)
return;
}
- get_online_cpus();
+ cpus_read_lock();
for (cpu = 0; cpu < nr_cpu_ids; cpu += threads_per_core) {
if (!cpu_online(cpu))
@@ -3339,17 +3663,17 @@ void kvmppc_alloc_host_rm_ops(void)
l_ops = (unsigned long) ops;
if (cmpxchg64((unsigned long *)&kvmppc_host_rm_ops_hv, 0, l_ops)) {
- put_online_cpus();
+ cpus_read_unlock();
kfree(ops->rm_core);
kfree(ops);
return;
}
- cpuhp_setup_state_nocalls(CPUHP_KVM_PPC_BOOK3S_PREPARE,
- "ppc/kvm_book3s:prepare",
- kvmppc_set_host_core,
- kvmppc_clear_host_core);
- put_online_cpus();
+ cpuhp_setup_state_nocalls_cpuslocked(CPUHP_KVM_PPC_BOOK3S_PREPARE,
+ "ppc/kvm_book3s:prepare",
+ kvmppc_set_host_core,
+ kvmppc_clear_host_core);
+ cpus_read_unlock();
}
void kvmppc_free_host_rm_ops(void)
@@ -3468,6 +3792,19 @@ static int kvmppc_core_init_vm_hv(struct kvm *kvm)
kvm_hv_vm_activated();
/*
+ * Initialize smt_mode depending on processor.
+ * POWER8 and earlier have to use "strict" threading, where
+ * all vCPUs in a vcore have to run on the same (sub)core,
+ * whereas on POWER9 the threads can each run a different
+ * guest.
+ */
+ if (!cpu_has_feature(CPU_FTR_ARCH_300))
+ kvm->arch.smt_mode = threads_per_subcore;
+ else
+ kvm->arch.smt_mode = 1;
+ kvm->arch.emul_smt_mode = 1;
+
+ /*
* Create a debugfs directory for the VM
*/
snprintf(buf, sizeof(buf), "vm%d", current->pid);
@@ -3896,6 +4233,7 @@ static struct kvmppc_ops kvm_ops_hv = {
#endif
.configure_mmu = kvmhv_configure_mmu,
.get_rmmu_info = kvmhv_get_rmmu_info,
+ .set_smt_mode = kvmhv_set_smt_mode,
};
static int kvm_init_subcore_bitmap(void)
diff --git a/arch/powerpc/kvm/book3s_hv_builtin.c b/arch/powerpc/kvm/book3s_hv_builtin.c
index ee4c2558c305..90644db9d38e 100644
--- a/arch/powerpc/kvm/book3s_hv_builtin.c
+++ b/arch/powerpc/kvm/book3s_hv_builtin.c
@@ -307,7 +307,7 @@ void kvmhv_commence_exit(int trap)
return;
for (i = 0; i < MAX_SUBCORES; ++i) {
- vc = sip->master_vcs[i];
+ vc = sip->vc[i];
if (!vc)
break;
do {
diff --git a/arch/powerpc/kvm/book3s_hv_interrupts.S b/arch/powerpc/kvm/book3s_hv_interrupts.S
index 0fdc4a28970b..dc54373c8780 100644
--- a/arch/powerpc/kvm/book3s_hv_interrupts.S
+++ b/arch/powerpc/kvm/book3s_hv_interrupts.S
@@ -61,13 +61,6 @@ BEGIN_FTR_SECTION
std r3, HSTATE_DABR(r13)
END_FTR_SECTION_IFCLR(CPU_FTR_ARCH_207S)
- /* Hard-disable interrupts */
- mfmsr r10
- std r10, HSTATE_HOST_MSR(r13)
- rldicl r10,r10,48,1
- rotldi r10,r10,16
- mtmsrd r10,1
-
/* Save host PMU registers */
BEGIN_FTR_SECTION
/* Work around P8 PMAE bug */
@@ -121,10 +114,20 @@ END_FTR_SECTION_IFSET(CPU_FTR_ARCH_207S)
* Put whatever is in the decrementer into the
* hypervisor decrementer.
*/
+BEGIN_FTR_SECTION
+ ld r5, HSTATE_KVM_VCORE(r13)
+ ld r6, VCORE_KVM(r5)
+ ld r9, KVM_HOST_LPCR(r6)
+ andis. r9, r9, LPCR_LD@h
+END_FTR_SECTION_IFSET(CPU_FTR_ARCH_300)
mfspr r8,SPRN_DEC
mftb r7
- mtspr SPRN_HDEC,r8
+BEGIN_FTR_SECTION
+ /* On POWER9, don't sign-extend if host LPCR[LD] bit is set */
+ bne 32f
+END_FTR_SECTION_IFSET(CPU_FTR_ARCH_300)
extsw r8,r8
+32: mtspr SPRN_HDEC,r8
add r8,r8,r7
std r8,HSTATE_DECEXP(r13)
@@ -143,6 +146,7 @@ END_FTR_SECTION_IFSET(CPU_FTR_ARCH_207S)
*
* R1 = host R1
* R2 = host R2
+ * R3 = trap number on this thread
* R12 = exit handler id
* R13 = PACA
*/
diff --git a/arch/powerpc/kvm/book3s_hv_ras.c b/arch/powerpc/kvm/book3s_hv_ras.c
index 7ef0993214f3..c356f9a40b24 100644
--- a/arch/powerpc/kvm/book3s_hv_ras.c
+++ b/arch/powerpc/kvm/book3s_hv_ras.c
@@ -130,12 +130,28 @@ static long kvmppc_realmode_mc_power7(struct kvm_vcpu *vcpu)
out:
/*
+ * For guest that supports FWNMI capability, hook the MCE event into
+ * vcpu structure. We are going to exit the guest with KVM_EXIT_NMI
+ * exit reason. On our way to exit we will pull this event from vcpu
+ * structure and print it from thread 0 of the core/subcore.
+ *
+ * For guest that does not support FWNMI capability (old QEMU):
* We are now going enter guest either through machine check
* interrupt (for unhandled errors) or will continue from
* current HSRR0 (for handled errors) in guest. Hence
* queue up the event so that we can log it from host console later.
*/
- machine_check_queue_event();
+ if (vcpu->kvm->arch.fwnmi_enabled) {
+ /*
+ * Hook up the mce event on to vcpu structure.
+ * First clear the old event.
+ */
+ memset(&vcpu->arch.mce_evt, 0, sizeof(vcpu->arch.mce_evt));
+ if (get_mce_event(&mce_evt, MCE_EVENT_RELEASE)) {
+ vcpu->arch.mce_evt = mce_evt;
+ }
+ } else
+ machine_check_queue_event();
return handled;
}
diff --git a/arch/powerpc/kvm/book3s_hv_rm_mmu.c b/arch/powerpc/kvm/book3s_hv_rm_mmu.c
index ce6f2121fffe..584c74c8119f 100644
--- a/arch/powerpc/kvm/book3s_hv_rm_mmu.c
+++ b/arch/powerpc/kvm/book3s_hv_rm_mmu.c
@@ -15,6 +15,7 @@
#include <linux/log2.h>
#include <asm/tlbflush.h>
+#include <asm/trace.h>
#include <asm/kvm_ppc.h>
#include <asm/kvm_book3s.h>
#include <asm/book3s/64/mmu-hash.h>
@@ -443,17 +444,23 @@ static void do_tlbies(struct kvm *kvm, unsigned long *rbvalues,
cpu_relax();
if (need_sync)
asm volatile("ptesync" : : : "memory");
- for (i = 0; i < npages; ++i)
+ for (i = 0; i < npages; ++i) {
asm volatile(PPC_TLBIE_5(%0,%1,0,0,0) : :
"r" (rbvalues[i]), "r" (kvm->arch.lpid));
+ trace_tlbie(kvm->arch.lpid, 0, rbvalues[i],
+ kvm->arch.lpid, 0, 0, 0);
+ }
asm volatile("eieio; tlbsync; ptesync" : : : "memory");
kvm->arch.tlbie_lock = 0;
} else {
if (need_sync)
asm volatile("ptesync" : : : "memory");
- for (i = 0; i < npages; ++i)
+ for (i = 0; i < npages; ++i) {
asm volatile(PPC_TLBIEL(%0,%1,0,0,0) : :
"r" (rbvalues[i]), "r" (0));
+ trace_tlbie(kvm->arch.lpid, 1, rbvalues[i],
+ 0, 0, 0, 0);
+ }
asm volatile("ptesync" : : : "memory");
}
}
diff --git a/arch/powerpc/kvm/book3s_hv_rmhandlers.S b/arch/powerpc/kvm/book3s_hv_rmhandlers.S
index bdb3f76ceb6b..cb44065e2946 100644
--- a/arch/powerpc/kvm/book3s_hv_rmhandlers.S
+++ b/arch/powerpc/kvm/book3s_hv_rmhandlers.S
@@ -32,12 +32,30 @@
#include <asm/opal.h>
#include <asm/xive-regs.h>
+/* Sign-extend HDEC if not on POWER9 */
+#define EXTEND_HDEC(reg) \
+BEGIN_FTR_SECTION; \
+ extsw reg, reg; \
+END_FTR_SECTION_IFCLR(CPU_FTR_ARCH_300)
+
#define VCPU_GPRS_TM(reg) (((reg) * ULONG_SIZE) + VCPU_GPR_TM)
/* Values in HSTATE_NAPPING(r13) */
#define NAPPING_CEDE 1
#define NAPPING_NOVCPU 2
+/* Stack frame offsets for kvmppc_hv_entry */
+#define SFS 160
+#define STACK_SLOT_TRAP (SFS-4)
+#define STACK_SLOT_TID (SFS-16)
+#define STACK_SLOT_PSSCR (SFS-24)
+#define STACK_SLOT_PID (SFS-32)
+#define STACK_SLOT_IAMR (SFS-40)
+#define STACK_SLOT_CIABR (SFS-48)
+#define STACK_SLOT_DAWR (SFS-56)
+#define STACK_SLOT_DAWRX (SFS-64)
+#define STACK_SLOT_HFSCR (SFS-72)
+
/*
* Call kvmppc_hv_entry in real mode.
* Must be called with interrupts hard-disabled.
@@ -51,6 +69,7 @@ _GLOBAL_TOC(kvmppc_hv_entry_trampoline)
std r0, PPC_LR_STKOFF(r1)
stdu r1, -112(r1)
mfmsr r10
+ std r10, HSTATE_HOST_MSR(r13)
LOAD_REG_ADDR(r5, kvmppc_call_hv_entry)
li r0,MSR_RI
andc r0,r10,r0
@@ -135,20 +154,21 @@ END_FTR_SECTION_IFSET(CPU_FTR_ARCH_207S)
stb r0, HSTATE_HWTHREAD_REQ(r13)
/*
- * For external and machine check interrupts, we need
- * to call the Linux handler to process the interrupt.
- * We do that by jumping to absolute address 0x500 for
- * external interrupts, or the machine_check_fwnmi label
- * for machine checks (since firmware might have patched
- * the vector area at 0x200). The [h]rfid at the end of the
- * handler will return to the book3s_hv_interrupts.S code.
- * For other interrupts we do the rfid to get back
- * to the book3s_hv_interrupts.S code here.
+ * For external interrupts we need to call the Linux
+ * handler to process the interrupt. We do that by jumping
+ * to absolute address 0x500 for external interrupts.
+ * The [h]rfid at the end of the handler will return to
+ * the book3s_hv_interrupts.S code. For other interrupts
+ * we do the rfid to get back to the book3s_hv_interrupts.S
+ * code here.
*/
ld r8, 112+PPC_LR_STKOFF(r1)
addi r1, r1, 112
ld r7, HSTATE_HOST_MSR(r13)
+ /* Return the trap number on this thread as the return value */
+ mr r3, r12
+
/*
* If we came back from the guest via a relocation-on interrupt,
* we will be in virtual mode at this point, which makes it a
@@ -158,62 +178,25 @@ END_FTR_SECTION_IFSET(CPU_FTR_ARCH_207S)
andi. r0, r0, MSR_IR /* in real mode? */
bne .Lvirt_return
- cmpwi cr1, r12, BOOK3S_INTERRUPT_MACHINE_CHECK
- cmpwi r12, BOOK3S_INTERRUPT_EXTERNAL
- beq 11f
- cmpwi r12, BOOK3S_INTERRUPT_H_DOORBELL
- beq 15f /* Invoke the H_DOORBELL handler */
- cmpwi cr2, r12, BOOK3S_INTERRUPT_HMI
- beq cr2, 14f /* HMI check */
-
- /* RFI into the highmem handler, or branch to interrupt handler */
+ /* RFI into the highmem handler */
mfmsr r6
li r0, MSR_RI
andc r6, r6, r0
mtmsrd r6, 1 /* Clear RI in MSR */
mtsrr0 r8
mtsrr1 r7
- beq cr1, 13f /* machine check */
RFI
- /* On POWER7, we have external interrupts set to use HSRR0/1 */
-11: mtspr SPRN_HSRR0, r8
- mtspr SPRN_HSRR1, r7
- ba 0x500
-
-13: b machine_check_fwnmi
-
-14: mtspr SPRN_HSRR0, r8
- mtspr SPRN_HSRR1, r7
- b hmi_exception_after_realmode
-
-15: mtspr SPRN_HSRR0, r8
- mtspr SPRN_HSRR1, r7
- ba 0xe80
-
- /* Virtual-mode return - can't get here for HMI or machine check */
+ /* Virtual-mode return */
.Lvirt_return:
- cmpwi r12, BOOK3S_INTERRUPT_EXTERNAL
- beq 16f
- cmpwi r12, BOOK3S_INTERRUPT_H_DOORBELL
- beq 17f
- andi. r0, r7, MSR_EE /* were interrupts hard-enabled? */
- beq 18f
- mtmsrd r7, 1 /* if so then re-enable them */
-18: mtlr r8
+ mtlr r8
blr
-16: mtspr SPRN_HSRR0, r8 /* jump to reloc-on external vector */
- mtspr SPRN_HSRR1, r7
- b exc_virt_0x4500_hardware_interrupt
-
-17: mtspr SPRN_HSRR0, r8
- mtspr SPRN_HSRR1, r7
- b exc_virt_0x4e80_h_doorbell
-
kvmppc_primary_no_guest:
/* We handle this much like a ceded vcpu */
/* put the HDEC into the DEC, since HDEC interrupts don't wake us */
+ /* HDEC may be larger than DEC for arch >= v3.00, but since the */
+ /* HDEC value came from DEC in the first place, it will fit */
mfspr r3, SPRN_HDEC
mtspr SPRN_DEC, r3
/*
@@ -295,8 +278,9 @@ kvm_novcpu_wakeup:
/* See if our timeslice has expired (HDEC is negative) */
mfspr r0, SPRN_HDEC
+ EXTEND_HDEC(r0)
li r12, BOOK3S_INTERRUPT_HV_DECREMENTER
- cmpwi r0, 0
+ cmpdi r0, 0
blt kvm_novcpu_exit
/* Got an IPI but other vcpus aren't yet exiting, must be a latecomer */
@@ -319,25 +303,31 @@ kvm_novcpu_exit:
bl kvmhv_accumulate_time
#endif
13: mr r3, r12
- stw r12, 112-4(r1)
+ stw r12, STACK_SLOT_TRAP(r1)
bl kvmhv_commence_exit
nop
- lwz r12, 112-4(r1)
+ lwz r12, STACK_SLOT_TRAP(r1)
b kvmhv_switch_to_host
/*
* We come in here when wakened from nap mode.
* Relocation is off and most register values are lost.
* r13 points to the PACA.
+ * r3 contains the SRR1 wakeup value, SRR1 is trashed.
*/
.globl kvm_start_guest
kvm_start_guest:
-
/* Set runlatch bit the minute you wake up from nap */
mfspr r0, SPRN_CTRLF
ori r0, r0, 1
mtspr SPRN_CTRLT, r0
+ /*
+ * Could avoid this and pass it through in r3. For now,
+ * code expects it to be in SRR1.
+ */
+ mtspr SPRN_SRR1,r3
+
ld r2,PACATOC(r13)
li r0,KVM_HWTHREAD_IN_KVM
@@ -390,8 +380,8 @@ kvm_secondary_got_guest:
lbz r4, HSTATE_PTID(r13)
cmpwi r4, 0
bne 63f
- lis r6, 0x7fff
- ori r6, r6, 0xffff
+ LOAD_REG_ADDR(r6, decrementer_max)
+ ld r6, 0(r6)
mtspr SPRN_HDEC, r6
/* and set per-LPAR registers, if doing dynamic micro-threading */
ld r6, HSTATE_SPLIT_MODE(r13)
@@ -456,13 +446,15 @@ kvm_no_guest:
/*
* We jump to pnv_wakeup_loss, which will return to the caller
* of power7_nap in the powernv cpu offline loop. The value we
- * put in r3 becomes the return value for power7_nap.
+ * put in r3 becomes the return value for power7_nap. pnv_wakeup_loss
+ * requires SRR1 in r12.
*/
li r3, LPCR_PECE0
mfspr r4, SPRN_LPCR
rlwimi r4, r3, 0, LPCR_PECE0 | LPCR_PECE1
mtspr SPRN_LPCR, r4
li r3, 0
+ mfspr r12,SPRN_SRR1
b pnv_wakeup_loss
53: HMT_LOW
@@ -545,11 +537,6 @@ END_FTR_SECTION_IFSET(CPU_FTR_ARCH_207S)
* *
*****************************************************************************/
-/* Stack frame offsets */
-#define STACK_SLOT_TID (112-16)
-#define STACK_SLOT_PSSCR (112-24)
-#define STACK_SLOT_PID (112-32)
-
.global kvmppc_hv_entry
kvmppc_hv_entry:
@@ -565,7 +552,7 @@ kvmppc_hv_entry:
*/
mflr r0
std r0, PPC_LR_STKOFF(r1)
- stdu r1, -112(r1)
+ stdu r1, -SFS(r1)
/* Save R1 in the PACA */
std r1, HSTATE_HOST_R1(r13)
@@ -749,10 +736,22 @@ BEGIN_FTR_SECTION
mfspr r5, SPRN_TIDR
mfspr r6, SPRN_PSSCR
mfspr r7, SPRN_PID
+ mfspr r8, SPRN_IAMR
std r5, STACK_SLOT_TID(r1)
std r6, STACK_SLOT_PSSCR(r1)
std r7, STACK_SLOT_PID(r1)
+ std r8, STACK_SLOT_IAMR(r1)
+ mfspr r5, SPRN_HFSCR
+ std r5, STACK_SLOT_HFSCR(r1)
END_FTR_SECTION_IFSET(CPU_FTR_ARCH_300)
+BEGIN_FTR_SECTION
+ mfspr r5, SPRN_CIABR
+ mfspr r6, SPRN_DAWR
+ mfspr r7, SPRN_DAWRX
+ std r5, STACK_SLOT_CIABR(r1)
+ std r6, STACK_SLOT_DAWR(r1)
+ std r7, STACK_SLOT_DAWRX(r1)
+END_FTR_SECTION_IFSET(CPU_FTR_ARCH_207S)
BEGIN_FTR_SECTION
/* Set partition DABR */
@@ -895,8 +894,10 @@ FTR_SECTION_ELSE
ld r5, VCPU_TID(r4)
ld r6, VCPU_PSSCR(r4)
oris r6, r6, PSSCR_EC@h /* This makes stop trap to HV */
+ ld r7, VCPU_HFSCR(r4)
mtspr SPRN_TIDR, r5
mtspr SPRN_PSSCR, r6
+ mtspr SPRN_HFSCR, r7
ALT_FTR_SECTION_END_IFCLR(CPU_FTR_ARCH_300)
8:
@@ -911,7 +912,7 @@ ALT_FTR_SECTION_END_IFCLR(CPU_FTR_ARCH_300)
mftb r7
subf r3,r7,r8
mtspr SPRN_DEC,r3
- stw r3,VCPU_DEC(r4)
+ std r3,VCPU_DEC(r4)
ld r5, VCPU_SPRG0(r4)
ld r6, VCPU_SPRG1(r4)
@@ -968,7 +969,8 @@ ALT_FTR_SECTION_END_IFCLR(CPU_FTR_ARCH_300)
/* Check if HDEC expires soon */
mfspr r3, SPRN_HDEC
- cmpwi r3, 512 /* 1 microsecond */
+ EXTEND_HDEC(r3)
+ cmpdi r3, 512 /* 1 microsecond */
blt hdec_soon
#ifdef CONFIG_KVM_XICS
@@ -1022,7 +1024,13 @@ kvmppc_cede_reentry: /* r4 = vcpu, r13 = paca */
li r0, BOOK3S_INTERRUPT_EXTERNAL
bne cr1, 12f
mfspr r0, SPRN_DEC
- cmpwi r0, 0
+BEGIN_FTR_SECTION
+ /* On POWER9 check whether the guest has large decrementer enabled */
+ andis. r8, r8, LPCR_LD@h
+ bne 15f
+END_FTR_SECTION_IFSET(CPU_FTR_ARCH_300)
+ extsw r0, r0
+15: cmpdi r0, 0
li r0, BOOK3S_INTERRUPT_DECREMENTER
bge 5f
@@ -1032,6 +1040,23 @@ kvmppc_cede_reentry: /* r4 = vcpu, r13 = paca */
mr r9, r4
bl kvmppc_msr_interrupt
5:
+BEGIN_FTR_SECTION
+ b fast_guest_return
+END_FTR_SECTION_IFCLR(CPU_FTR_ARCH_300)
+ /* On POWER9, check for pending doorbell requests */
+ lbz r0, VCPU_DBELL_REQ(r4)
+ cmpwi r0, 0
+ beq fast_guest_return
+ ld r5, HSTATE_KVM_VCORE(r13)
+ /* Set DPDES register so the CPU will take a doorbell interrupt */
+ li r0, 1
+ mtspr SPRN_DPDES, r0
+ std r0, VCORE_DPDES(r5)
+ /* Make sure other cpus see vcore->dpdes set before dbell req clear */
+ lwsync
+ /* Clear the pending doorbell request */
+ li r0, 0
+ stb r0, VCPU_DBELL_REQ(r4)
/*
* Required state:
@@ -1206,6 +1231,15 @@ END_FTR_SECTION_IFSET(CPU_FTR_HAS_PPR)
stw r12,VCPU_TRAP(r9)
+ /*
+ * Now that we have saved away SRR0/1 and HSRR0/1,
+ * interrupts are recoverable in principle, so set MSR_RI.
+ * This becomes important for relocation-on interrupts from
+ * the guest, which we can get in radix mode on POWER9.
+ */
+ li r0, MSR_RI
+ mtmsrd r0, 1
+
#ifdef CONFIG_KVM_BOOK3S_HV_EXIT_TIMING
addi r3, r9, VCPU_TB_RMINTR
mr r4, r9
@@ -1262,6 +1296,13 @@ END_FTR_SECTION_IFSET(CPU_FTR_HAS_PPR)
beq 4f
b guest_exit_cont
3:
+ /* If it's a hypervisor facility unavailable interrupt, save HFSCR */
+ cmpwi r12, BOOK3S_INTERRUPT_H_FAC_UNAVAIL
+ bne 14f
+ mfspr r3, SPRN_HFSCR
+ std r3, VCPU_HFSCR(r9)
+ b guest_exit_cont
+14:
/* External interrupt ? */
cmpwi r12, BOOK3S_INTERRUPT_EXTERNAL
bne+ guest_exit_cont
@@ -1449,12 +1490,18 @@ mc_cont:
mtspr SPRN_SPURR,r4
/* Save DEC */
+ ld r3, HSTATE_KVM_VCORE(r13)
mfspr r5,SPRN_DEC
mftb r6
+ /* On P9, if the guest has large decr enabled, don't sign extend */
+BEGIN_FTR_SECTION
+ ld r4, VCORE_LPCR(r3)
+ andis. r4, r4, LPCR_LD@h
+ bne 16f
+END_FTR_SECTION_IFSET(CPU_FTR_ARCH_300)
extsw r5,r5
- add r5,r5,r6
+16: add r5,r5,r6
/* r5 is a guest timebase value here, convert to host TB */
- ld r3,HSTATE_KVM_VCORE(r13)
ld r4,VCORE_TB_OFFSET(r3)
subf r5,r4,r5
std r5,VCPU_DEC_EXPIRES(r9)
@@ -1499,17 +1546,19 @@ FTR_SECTION_ELSE
rldicl r6, r6, 4, 50 /* r6 &= PSSCR_GUEST_VIS */
rotldi r6, r6, 60
std r6, VCPU_PSSCR(r9)
+ /* Restore host HFSCR value */
+ ld r7, STACK_SLOT_HFSCR(r1)
+ mtspr SPRN_HFSCR, r7
ALT_FTR_SECTION_END_IFCLR(CPU_FTR_ARCH_300)
/*
* Restore various registers to 0, where non-zero values
* set by the guest could disrupt the host.
*/
li r0, 0
- mtspr SPRN_IAMR, r0
- mtspr SPRN_CIABR, r0
- mtspr SPRN_DAWRX, r0
+ mtspr SPRN_PSPB, r0
mtspr SPRN_WORT, r0
BEGIN_FTR_SECTION
+ mtspr SPRN_IAMR, r0
mtspr SPRN_TCSCR, r0
/* Set MMCRS to 1<<31 to freeze and disable the SPMC counters */
li r0, 1
@@ -1525,6 +1574,7 @@ END_FTR_SECTION_IFCLR(CPU_FTR_ARCH_300)
std r6,VCPU_UAMOR(r9)
li r6,0
mtspr SPRN_AMR,r6
+ mtspr SPRN_UAMOR, r6
/* Switch DSCR back to host value */
mfspr r8, SPRN_DSCR
@@ -1670,12 +1720,22 @@ END_FTR_SECTION_IFSET(CPU_FTR_ARCH_207S)
/* Restore host values of some registers */
BEGIN_FTR_SECTION
+ ld r5, STACK_SLOT_CIABR(r1)
+ ld r6, STACK_SLOT_DAWR(r1)
+ ld r7, STACK_SLOT_DAWRX(r1)
+ mtspr SPRN_CIABR, r5
+ mtspr SPRN_DAWR, r6
+ mtspr SPRN_DAWRX, r7
+END_FTR_SECTION_IFSET(CPU_FTR_ARCH_207S)
+BEGIN_FTR_SECTION
ld r5, STACK_SLOT_TID(r1)
ld r6, STACK_SLOT_PSSCR(r1)
ld r7, STACK_SLOT_PID(r1)
+ ld r8, STACK_SLOT_IAMR(r1)
mtspr SPRN_TIDR, r5
mtspr SPRN_PSSCR, r6
mtspr SPRN_PID, r7
+ mtspr SPRN_IAMR, r8
END_FTR_SECTION_IFSET(CPU_FTR_ARCH_300)
BEGIN_FTR_SECTION
PPC_INVALIDATE_ERAT
@@ -1819,8 +1879,8 @@ END_MMU_FTR_SECTION_IFSET(MMU_FTR_TYPE_RADIX)
li r0, KVM_GUEST_MODE_NONE
stb r0, HSTATE_IN_GUEST(r13)
- ld r0, 112+PPC_LR_STKOFF(r1)
- addi r1, r1, 112
+ ld r0, SFS+PPC_LR_STKOFF(r1)
+ addi r1, r1, SFS
mtlr r0
blr
@@ -2366,12 +2426,20 @@ END_FTR_SECTION_IFSET(CPU_FTR_TM)
mfspr r3, SPRN_DEC
mfspr r4, SPRN_HDEC
mftb r5
- cmpw r3, r4
+BEGIN_FTR_SECTION
+ /* On P9 check whether the guest has large decrementer mode enabled */
+ ld r6, HSTATE_KVM_VCORE(r13)
+ ld r6, VCORE_LPCR(r6)
+ andis. r6, r6, LPCR_LD@h
+ bne 68f
+END_FTR_SECTION_IFSET(CPU_FTR_ARCH_300)
+ extsw r3, r3
+68: EXTEND_HDEC(r4)
+ cmpd r3, r4
ble 67f
mtspr SPRN_DEC, r4
67:
/* save expiry time of guest decrementer */
- extsw r3, r3
add r3, r3, r5
ld r4, HSTATE_KVM_VCPU(r13)
ld r5, HSTATE_KVM_VCORE(r13)
@@ -2552,22 +2620,32 @@ machine_check_realmode:
ld r9, HSTATE_KVM_VCPU(r13)
li r12, BOOK3S_INTERRUPT_MACHINE_CHECK
/*
- * Deliver unhandled/fatal (e.g. UE) MCE errors to guest through
- * machine check interrupt (set HSRR0 to 0x200). And for handled
- * errors (no-fatal), just go back to guest execution with current
- * HSRR0 instead of exiting guest. This new approach will inject
- * machine check to guest for fatal error causing guest to crash.
- *
- * The old code used to return to host for unhandled errors which
- * was causing guest to hang with soft lockups inside guest and
- * makes it difficult to recover guest instance.
+ * For the guest that is FWNMI capable, deliver all the MCE errors
+ * (handled/unhandled) by exiting the guest with KVM_EXIT_NMI exit
+ * reason. This new approach injects machine check errors in guest
+ * address space to guest with additional information in the form
+ * of RTAS event, thus enabling guest kernel to suitably handle
+ * such errors.
*
+ * For the guest that is not FWNMI capable (old QEMU) fallback
+ * to old behaviour for backward compatibility:
+ * Deliver unhandled/fatal (e.g. UE) MCE errors to guest either
+ * through machine check interrupt (set HSRR0 to 0x200).
+ * For handled errors (no-fatal), just go back to guest execution
+ * with current HSRR0.
* if we receive machine check with MSR(RI=0) then deliver it to
* guest as machine check causing guest to crash.
*/
ld r11, VCPU_MSR(r9)
rldicl. r0, r11, 64-MSR_HV_LG, 63 /* check if it happened in HV mode */
bne mc_cont /* if so, exit to host */
+ /* Check if guest is capable of handling NMI exit */
+ ld r10, VCPU_KVM(r9)
+ lbz r10, KVM_FWNMI(r10)
+ cmpdi r10, 1 /* FWNMI capable? */
+ beq mc_cont /* if so, exit with KVM_EXIT_NMI. */
+
+ /* if not, fall through for backward compatibility. */
andi. r10, r11, MSR_RI /* check for unrecoverable exception */
beq 1f /* Deliver a machine check to guest */
ld r10, VCPU_PC(r9)
diff --git a/arch/powerpc/kvm/book3s_xive.c b/arch/powerpc/kvm/book3s_xive.c
index ffe1da95033a..08b200a0bbce 100644
--- a/arch/powerpc/kvm/book3s_xive.c
+++ b/arch/powerpc/kvm/book3s_xive.c
@@ -1257,8 +1257,8 @@ static void xive_pre_save_scan(struct kvmppc_xive *xive)
if (!xc)
continue;
for (j = 0; j < KVMPPC_XIVE_Q_COUNT; j++) {
- if (xc->queues[i].qpage)
- xive_pre_save_queue(xive, &xc->queues[i]);
+ if (xc->queues[j].qpage)
+ xive_pre_save_queue(xive, &xc->queues[j]);
}
}
diff --git a/arch/powerpc/kvm/book3s_xive_template.c b/arch/powerpc/kvm/book3s_xive_template.c
index 023a31133c37..4636ca6e7d38 100644
--- a/arch/powerpc/kvm/book3s_xive_template.c
+++ b/arch/powerpc/kvm/book3s_xive_template.c
@@ -69,7 +69,7 @@ static void GLUE(X_PFX,source_eoi)(u32 hw_irq, struct xive_irq_data *xd)
{
/* If the XIVE supports the new "store EOI facility, use it */
if (xd->flags & XIVE_IRQ_FLAG_STORE_EOI)
- __x_writeq(0, __x_eoi_page(xd));
+ __x_writeq(0, __x_eoi_page(xd) + XIVE_ESB_STORE_EOI);
else if (hw_irq && xd->flags & XIVE_IRQ_FLAG_EOI_FW) {
opal_int_eoi(hw_irq);
} else {
@@ -89,7 +89,7 @@ static void GLUE(X_PFX,source_eoi)(u32 hw_irq, struct xive_irq_data *xd)
* properly.
*/
if (xd->flags & XIVE_IRQ_FLAG_LSI)
- __x_readq(__x_eoi_page(xd));
+ __x_readq(__x_eoi_page(xd) + XIVE_ESB_LOAD_EOI);
else {
eoi_val = GLUE(X_PFX,esb_load)(xd, XIVE_ESB_SET_PQ_00);
diff --git a/arch/powerpc/kvm/booke.c b/arch/powerpc/kvm/booke.c
index 3eaac3809977..071b87ee682f 100644
--- a/arch/powerpc/kvm/booke.c
+++ b/arch/powerpc/kvm/booke.c
@@ -687,7 +687,7 @@ int kvmppc_core_prepare_to_enter(struct kvm_vcpu *vcpu)
kvmppc_core_check_exceptions(vcpu);
- if (vcpu->requests) {
+ if (kvm_request_pending(vcpu)) {
/* Exception delivery raised request; start over */
return 1;
}
diff --git a/arch/powerpc/kvm/emulate.c b/arch/powerpc/kvm/emulate.c
index c873ffe55362..4d8b4d6cebff 100644
--- a/arch/powerpc/kvm/emulate.c
+++ b/arch/powerpc/kvm/emulate.c
@@ -39,7 +39,7 @@ void kvmppc_emulate_dec(struct kvm_vcpu *vcpu)
unsigned long dec_nsec;
unsigned long long dec_time;
- pr_debug("mtDEC: %x\n", vcpu->arch.dec);
+ pr_debug("mtDEC: %lx\n", vcpu->arch.dec);
hrtimer_try_to_cancel(&vcpu->arch.dec_timer);
#ifdef CONFIG_PPC_BOOK3S
@@ -109,7 +109,7 @@ static int kvmppc_emulate_mtspr(struct kvm_vcpu *vcpu, int sprn, int rs)
case SPRN_TBWU: break;
case SPRN_DEC:
- vcpu->arch.dec = spr_val;
+ vcpu->arch.dec = (u32) spr_val;
kvmppc_emulate_dec(vcpu);
break;
diff --git a/arch/powerpc/kvm/powerpc.c b/arch/powerpc/kvm/powerpc.c
index 7f71ab5fcad1..1a75c0b5f4ca 100644
--- a/arch/powerpc/kvm/powerpc.c
+++ b/arch/powerpc/kvm/powerpc.c
@@ -55,8 +55,7 @@ EXPORT_SYMBOL_GPL(kvmppc_pr_ops);
int kvm_arch_vcpu_runnable(struct kvm_vcpu *v)
{
- return !!(v->arch.pending_exceptions) ||
- v->requests;
+ return !!(v->arch.pending_exceptions) || kvm_request_pending(v);
}
int kvm_arch_vcpu_should_kick(struct kvm_vcpu *vcpu)
@@ -108,7 +107,7 @@ int kvmppc_prepare_to_enter(struct kvm_vcpu *vcpu)
*/
smp_mb();
- if (vcpu->requests) {
+ if (kvm_request_pending(vcpu)) {
/* Make sure we process requests preemptable */
local_irq_enable();
trace_kvm_check_requests(vcpu);
@@ -554,13 +553,28 @@ int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext)
#ifdef CONFIG_KVM_BOOK3S_HV_POSSIBLE
case KVM_CAP_PPC_SMT:
r = 0;
- if (hv_enabled) {
+ if (kvm) {
+ if (kvm->arch.emul_smt_mode > 1)
+ r = kvm->arch.emul_smt_mode;
+ else
+ r = kvm->arch.smt_mode;
+ } else if (hv_enabled) {
if (cpu_has_feature(CPU_FTR_ARCH_300))
r = 1;
else
r = threads_per_subcore;
}
break;
+ case KVM_CAP_PPC_SMT_POSSIBLE:
+ r = 1;
+ if (hv_enabled) {
+ if (!cpu_has_feature(CPU_FTR_ARCH_300))
+ r = ((threads_per_subcore << 1) - 1);
+ else
+ /* P9 can emulate dbells, so allow any mode */
+ r = 8 | 4 | 2 | 1;
+ }
+ break;
case KVM_CAP_PPC_RMA:
r = 0;
break;
@@ -619,6 +633,11 @@ int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext)
r = !!hv_enabled && !cpu_has_feature(CPU_FTR_ARCH_300);
break;
#endif
+#ifdef CONFIG_KVM_BOOK3S_HV_POSSIBLE
+ case KVM_CAP_PPC_FWNMI:
+ r = hv_enabled;
+ break;
+#endif
case KVM_CAP_PPC_HTM:
r = cpu_has_feature(CPU_FTR_TM_COMP) &&
is_kvmppc_hv_enabled(kvm);
@@ -1538,6 +1557,15 @@ static int kvm_vcpu_ioctl_enable_cap(struct kvm_vcpu *vcpu,
break;
}
#endif /* CONFIG_KVM_XICS */
+#ifdef CONFIG_KVM_BOOK3S_HV_POSSIBLE
+ case KVM_CAP_PPC_FWNMI:
+ r = -EINVAL;
+ if (!is_kvmppc_hv_enabled(vcpu->kvm))
+ break;
+ r = 0;
+ vcpu->kvm->arch.fwnmi_enabled = true;
+ break;
+#endif /* CONFIG_KVM_BOOK3S_HV_POSSIBLE */
default:
r = -EINVAL;
break;
@@ -1712,6 +1740,15 @@ static int kvm_vm_ioctl_enable_cap(struct kvm *kvm,
r = 0;
break;
}
+ case KVM_CAP_PPC_SMT: {
+ unsigned long mode = cap->args[0];
+ unsigned long flags = cap->args[1];
+
+ r = -EINVAL;
+ if (kvm->arch.kvm_ops->set_smt_mode)
+ r = kvm->arch.kvm_ops->set_smt_mode(kvm, mode, flags);
+ break;
+ }
#endif
default:
r = -EINVAL;
diff --git a/arch/powerpc/lib/Makefile b/arch/powerpc/lib/Makefile
index ed7dfce331e0..3c3146ba62da 100644
--- a/arch/powerpc/lib/Makefile
+++ b/arch/powerpc/lib/Makefile
@@ -9,10 +9,17 @@ ccflags-$(CONFIG_PPC64) := $(NO_MINIMAL_TOC)
CFLAGS_REMOVE_code-patching.o = $(CC_FLAGS_FTRACE)
CFLAGS_REMOVE_feature-fixups.o = $(CC_FLAGS_FTRACE)
-obj-y += string.o alloc.o crtsavres.o code-patching.o \
- feature-fixups.o
+obj-y += string.o alloc.o code-patching.o feature-fixups.o
-obj-$(CONFIG_PPC32) += div64.o copy_32.o
+obj-$(CONFIG_PPC32) += div64.o copy_32.o crtsavres.o
+
+# See corresponding test in arch/powerpc/Makefile
+# 64-bit linker creates .sfpr on demand for final link (vmlinux),
+# so it is only needed for modules, and only for older linkers which
+# do not support --save-restore-funcs
+ifeq ($(call ld-ifversion, -lt, 225000000, y),y)
+extra-$(CONFIG_PPC64) += crtsavres.o
+endif
obj64-y += copypage_64.o copyuser_64.o mem_64.o hweight_64.o \
copyuser_power7.o string_64.o copypage_power7.o memcpy_power7.o \
@@ -30,7 +37,7 @@ obj-$(CONFIG_PPC_LIB_RHEAP) += rheap.o
obj-$(CONFIG_FTR_FIXUP_SELFTEST) += feature-fixups-test.o
-obj-$(CONFIG_ALTIVEC) += xor_vmx.o
+obj-$(CONFIG_ALTIVEC) += xor_vmx.o xor_vmx_glue.o
CFLAGS_xor_vmx.o += -maltivec $(call cc-option,-mabi=altivec)
obj-$(CONFIG_PPC64) += $(obj64-y)
diff --git a/arch/powerpc/lib/code-patching.c b/arch/powerpc/lib/code-patching.c
index 500b0f6a0b64..c9de03e0c1f1 100644
--- a/arch/powerpc/lib/code-patching.c
+++ b/arch/powerpc/lib/code-patching.c
@@ -12,23 +12,186 @@
#include <linux/vmalloc.h>
#include <linux/init.h>
#include <linux/mm.h>
-#include <asm/page.h>
-#include <asm/code-patching.h>
+#include <linux/cpuhotplug.h>
+#include <linux/slab.h>
#include <linux/uaccess.h>
#include <linux/kprobes.h>
+#include <asm/pgtable.h>
+#include <asm/tlbflush.h>
+#include <asm/page.h>
+#include <asm/code-patching.h>
-int patch_instruction(unsigned int *addr, unsigned int instr)
+static int __patch_instruction(unsigned int *addr, unsigned int instr)
{
int err;
__put_user_size(instr, addr, 4, err);
if (err)
return err;
- asm ("dcbst 0, %0; sync; icbi 0,%0; sync; isync" : : "r" (addr));
+
+ asm ("dcbst 0, %0; sync; icbi 0,%0; sync; isync" :: "r" (addr));
+
+ return 0;
+}
+
+#ifdef CONFIG_STRICT_KERNEL_RWX
+static DEFINE_PER_CPU(struct vm_struct *, text_poke_area);
+
+static int text_area_cpu_up(unsigned int cpu)
+{
+ struct vm_struct *area;
+
+ area = get_vm_area(PAGE_SIZE, VM_ALLOC);
+ if (!area) {
+ WARN_ONCE(1, "Failed to create text area for cpu %d\n",
+ cpu);
+ return -1;
+ }
+ this_cpu_write(text_poke_area, area);
+
+ return 0;
+}
+
+static int text_area_cpu_down(unsigned int cpu)
+{
+ free_vm_area(this_cpu_read(text_poke_area));
+ return 0;
+}
+
+/*
+ * Run as a late init call. This allows all the boot time patching to be done
+ * simply by patching the code, and then we're called here prior to
+ * mark_rodata_ro(), which happens after all init calls are run. Although
+ * BUG_ON() is rude, in this case it should only happen if ENOMEM, and we judge
+ * it as being preferable to a kernel that will crash later when someone tries
+ * to use patch_instruction().
+ */
+static int __init setup_text_poke_area(void)
+{
+ BUG_ON(!cpuhp_setup_state(CPUHP_AP_ONLINE_DYN,
+ "powerpc/text_poke:online", text_area_cpu_up,
+ text_area_cpu_down));
+
+ return 0;
+}
+late_initcall(setup_text_poke_area);
+
+/*
+ * This can be called for kernel text or a module.
+ */
+static int map_patch_area(void *addr, unsigned long text_poke_addr)
+{
+ unsigned long pfn;
+ int err;
+
+ if (is_vmalloc_addr(addr))
+ pfn = vmalloc_to_pfn(addr);
+ else
+ pfn = __pa_symbol(addr) >> PAGE_SHIFT;
+
+ err = map_kernel_page(text_poke_addr, (pfn << PAGE_SHIFT),
+ pgprot_val(PAGE_KERNEL));
+
+ pr_devel("Mapped addr %lx with pfn %lx:%d\n", text_poke_addr, pfn, err);
+ if (err)
+ return -1;
+
return 0;
}
+static inline int unmap_patch_area(unsigned long addr)
+{
+ pte_t *ptep;
+ pmd_t *pmdp;
+ pud_t *pudp;
+ pgd_t *pgdp;
+
+ pgdp = pgd_offset_k(addr);
+ if (unlikely(!pgdp))
+ return -EINVAL;
+
+ pudp = pud_offset(pgdp, addr);
+ if (unlikely(!pudp))
+ return -EINVAL;
+
+ pmdp = pmd_offset(pudp, addr);
+ if (unlikely(!pmdp))
+ return -EINVAL;
+
+ ptep = pte_offset_kernel(pmdp, addr);
+ if (unlikely(!ptep))
+ return -EINVAL;
+
+ pr_devel("clearing mm %p, pte %p, addr %lx\n", &init_mm, ptep, addr);
+
+ /*
+ * In hash, pte_clear flushes the tlb, in radix, we have to
+ */
+ pte_clear(&init_mm, addr, ptep);
+ flush_tlb_kernel_range(addr, addr + PAGE_SIZE);
+
+ return 0;
+}
+
+int patch_instruction(unsigned int *addr, unsigned int instr)
+{
+ int err;
+ unsigned int *dest = NULL;
+ unsigned long flags;
+ unsigned long text_poke_addr;
+ unsigned long kaddr = (unsigned long)addr;
+
+ /*
+ * During early early boot patch_instruction is called
+ * when text_poke_area is not ready, but we still need
+ * to allow patching. We just do the plain old patching
+ * We use slab_is_available and per cpu read * via this_cpu_read
+ * of text_poke_area. Per-CPU areas might not be up early
+ * this can create problems with just using this_cpu_read()
+ */
+ if (!slab_is_available() || !this_cpu_read(text_poke_area))
+ return __patch_instruction(addr, instr);
+
+ local_irq_save(flags);
+
+ text_poke_addr = (unsigned long)__this_cpu_read(text_poke_area)->addr;
+ if (map_patch_area(addr, text_poke_addr)) {
+ err = -1;
+ goto out;
+ }
+
+ dest = (unsigned int *)(text_poke_addr) +
+ ((kaddr & ~PAGE_MASK) / sizeof(unsigned int));
+
+ /*
+ * We use __put_user_size so that we can handle faults while
+ * writing to dest and return err to handle faults gracefully
+ */
+ __put_user_size(instr, dest, 4, err);
+ if (!err)
+ asm ("dcbst 0, %0; sync; icbi 0,%0; icbi 0,%1; sync; isync"
+ ::"r" (dest), "r"(addr));
+
+ err = unmap_patch_area(text_poke_addr);
+ if (err)
+ pr_warn("failed to unmap %lx\n", text_poke_addr);
+
+out:
+ local_irq_restore(flags);
+
+ return err;
+}
+#else /* !CONFIG_STRICT_KERNEL_RWX */
+
+int patch_instruction(unsigned int *addr, unsigned int instr)
+{
+ return __patch_instruction(addr, instr);
+}
+
+#endif /* CONFIG_STRICT_KERNEL_RWX */
+NOKPROBE_SYMBOL(patch_instruction);
+
int patch_branch(unsigned int *addr, unsigned long target, int flags)
{
return patch_instruction(addr, create_branch(addr, target, flags));
diff --git a/arch/powerpc/lib/copyuser_power7.S b/arch/powerpc/lib/copyuser_power7.S
index a24b4039352c..706b7cc19846 100644
--- a/arch/powerpc/lib/copyuser_power7.S
+++ b/arch/powerpc/lib/copyuser_power7.S
@@ -82,14 +82,14 @@
_GLOBAL(__copy_tofrom_user_power7)
#ifdef CONFIG_ALTIVEC
cmpldi r5,16
- cmpldi cr1,r5,4096
+ cmpldi cr1,r5,3328
std r3,-STACKFRAMESIZE+STK_REG(R31)(r1)
std r4,-STACKFRAMESIZE+STK_REG(R30)(r1)
std r5,-STACKFRAMESIZE+STK_REG(R29)(r1)
blt .Lshort_copy
- bgt cr1,.Lvmx_copy
+ bge cr1,.Lvmx_copy
#else
cmpldi r5,16
diff --git a/arch/powerpc/lib/crtsavres.S b/arch/powerpc/lib/crtsavres.S
index 18af0b3d3eb2..7e5e1c28e56a 100644
--- a/arch/powerpc/lib/crtsavres.S
+++ b/arch/powerpc/lib/crtsavres.S
@@ -44,10 +44,10 @@
#ifdef CONFIG_CC_OPTIMIZE_FOR_SIZE
-#ifndef CONFIG_PPC64
-
.section ".text"
+#ifndef CONFIG_PPC64
+
/* Routines for saving integer registers, called by the compiler. */
/* Called with r11 pointing to the stack header word of the caller of the */
/* function, just beyond the end of the integer save area. */
@@ -314,8 +314,6 @@ _GLOBAL(_restvr_31)
#else /* CONFIG_PPC64 */
- .section ".text.save.restore","ax",@progbits
-
.globl _savegpr0_14
_savegpr0_14:
std r14,-144(r1)
diff --git a/arch/powerpc/lib/xor_vmx.c b/arch/powerpc/lib/xor_vmx.c
index f9de69a04e88..4df240aa5f81 100644
--- a/arch/powerpc/lib/xor_vmx.c
+++ b/arch/powerpc/lib/xor_vmx.c
@@ -29,10 +29,7 @@
#define vector __attribute__((vector_size(16)))
#endif
-#include <linux/preempt.h>
-#include <linux/export.h>
-#include <linux/sched.h>
-#include <asm/switch_to.h>
+#include "xor_vmx.h"
typedef vector signed char unative_t;
@@ -64,16 +61,13 @@ typedef vector signed char unative_t;
V1##_3 = vec_xor(V1##_3, V2##_3); \
} while (0)
-void xor_altivec_2(unsigned long bytes, unsigned long *v1_in,
- unsigned long *v2_in)
+void __xor_altivec_2(unsigned long bytes, unsigned long *v1_in,
+ unsigned long *v2_in)
{
DEFINE(v1);
DEFINE(v2);
unsigned long lines = bytes / (sizeof(unative_t)) / 4;
- preempt_disable();
- enable_kernel_altivec();
-
do {
LOAD(v1);
LOAD(v2);
@@ -83,23 +77,16 @@ void xor_altivec_2(unsigned long bytes, unsigned long *v1_in,
v1 += 4;
v2 += 4;
} while (--lines > 0);
-
- disable_kernel_altivec();
- preempt_enable();
}
-EXPORT_SYMBOL(xor_altivec_2);
-void xor_altivec_3(unsigned long bytes, unsigned long *v1_in,
- unsigned long *v2_in, unsigned long *v3_in)
+void __xor_altivec_3(unsigned long bytes, unsigned long *v1_in,
+ unsigned long *v2_in, unsigned long *v3_in)
{
DEFINE(v1);
DEFINE(v2);
DEFINE(v3);
unsigned long lines = bytes / (sizeof(unative_t)) / 4;
- preempt_disable();
- enable_kernel_altivec();
-
do {
LOAD(v1);
LOAD(v2);
@@ -112,15 +99,11 @@ void xor_altivec_3(unsigned long bytes, unsigned long *v1_in,
v2 += 4;
v3 += 4;
} while (--lines > 0);
-
- disable_kernel_altivec();
- preempt_enable();
}
-EXPORT_SYMBOL(xor_altivec_3);
-void xor_altivec_4(unsigned long bytes, unsigned long *v1_in,
- unsigned long *v2_in, unsigned long *v3_in,
- unsigned long *v4_in)
+void __xor_altivec_4(unsigned long bytes, unsigned long *v1_in,
+ unsigned long *v2_in, unsigned long *v3_in,
+ unsigned long *v4_in)
{
DEFINE(v1);
DEFINE(v2);
@@ -128,9 +111,6 @@ void xor_altivec_4(unsigned long bytes, unsigned long *v1_in,
DEFINE(v4);
unsigned long lines = bytes / (sizeof(unative_t)) / 4;
- preempt_disable();
- enable_kernel_altivec();
-
do {
LOAD(v1);
LOAD(v2);
@@ -146,15 +126,11 @@ void xor_altivec_4(unsigned long bytes, unsigned long *v1_in,
v3 += 4;
v4 += 4;
} while (--lines > 0);
-
- disable_kernel_altivec();
- preempt_enable();
}
-EXPORT_SYMBOL(xor_altivec_4);
-void xor_altivec_5(unsigned long bytes, unsigned long *v1_in,
- unsigned long *v2_in, unsigned long *v3_in,
- unsigned long *v4_in, unsigned long *v5_in)
+void __xor_altivec_5(unsigned long bytes, unsigned long *v1_in,
+ unsigned long *v2_in, unsigned long *v3_in,
+ unsigned long *v4_in, unsigned long *v5_in)
{
DEFINE(v1);
DEFINE(v2);
@@ -163,9 +139,6 @@ void xor_altivec_5(unsigned long bytes, unsigned long *v1_in,
DEFINE(v5);
unsigned long lines = bytes / (sizeof(unative_t)) / 4;
- preempt_disable();
- enable_kernel_altivec();
-
do {
LOAD(v1);
LOAD(v2);
@@ -184,8 +157,4 @@ void xor_altivec_5(unsigned long bytes, unsigned long *v1_in,
v4 += 4;
v5 += 4;
} while (--lines > 0);
-
- disable_kernel_altivec();
- preempt_enable();
}
-EXPORT_SYMBOL(xor_altivec_5);
diff --git a/arch/powerpc/lib/xor_vmx.h b/arch/powerpc/lib/xor_vmx.h
new file mode 100644
index 000000000000..4746708451ae
--- /dev/null
+++ b/arch/powerpc/lib/xor_vmx.h
@@ -0,0 +1,20 @@
+/*
+ * Simple interface to link xor_vmx.c and xor_vmx_glue.c
+ *
+ * Separating these file ensures that no altivec instructions are run
+ * outside of the enable/disable altivec block.
+ */
+
+void __xor_altivec_2(unsigned long bytes, unsigned long *v1_in,
+ unsigned long *v2_in);
+
+void __xor_altivec_3(unsigned long bytes, unsigned long *v1_in,
+ unsigned long *v2_in, unsigned long *v3_in);
+
+void __xor_altivec_4(unsigned long bytes, unsigned long *v1_in,
+ unsigned long *v2_in, unsigned long *v3_in,
+ unsigned long *v4_in);
+
+void __xor_altivec_5(unsigned long bytes, unsigned long *v1_in,
+ unsigned long *v2_in, unsigned long *v3_in,
+ unsigned long *v4_in, unsigned long *v5_in);
diff --git a/arch/powerpc/lib/xor_vmx_glue.c b/arch/powerpc/lib/xor_vmx_glue.c
new file mode 100644
index 000000000000..6521fe5e8cef
--- /dev/null
+++ b/arch/powerpc/lib/xor_vmx_glue.c
@@ -0,0 +1,62 @@
+/*
+ * Altivec XOR operations
+ *
+ * Copyright 2017 IBM Corp.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ */
+
+#include <linux/preempt.h>
+#include <linux/export.h>
+#include <linux/sched.h>
+#include <asm/switch_to.h>
+#include "xor_vmx.h"
+
+void xor_altivec_2(unsigned long bytes, unsigned long *v1_in,
+ unsigned long *v2_in)
+{
+ preempt_disable();
+ enable_kernel_altivec();
+ __xor_altivec_2(bytes, v1_in, v2_in);
+ disable_kernel_altivec();
+ preempt_enable();
+}
+EXPORT_SYMBOL(xor_altivec_2);
+
+void xor_altivec_3(unsigned long bytes, unsigned long *v1_in,
+ unsigned long *v2_in, unsigned long *v3_in)
+{
+ preempt_disable();
+ enable_kernel_altivec();
+ __xor_altivec_3(bytes, v1_in, v2_in, v3_in);
+ disable_kernel_altivec();
+ preempt_enable();
+}
+EXPORT_SYMBOL(xor_altivec_3);
+
+void xor_altivec_4(unsigned long bytes, unsigned long *v1_in,
+ unsigned long *v2_in, unsigned long *v3_in,
+ unsigned long *v4_in)
+{
+ preempt_disable();
+ enable_kernel_altivec();
+ __xor_altivec_4(bytes, v1_in, v2_in, v3_in, v4_in);
+ disable_kernel_altivec();
+ preempt_enable();
+}
+EXPORT_SYMBOL(xor_altivec_4);
+
+void xor_altivec_5(unsigned long bytes, unsigned long *v1_in,
+ unsigned long *v2_in, unsigned long *v3_in,
+ unsigned long *v4_in, unsigned long *v5_in)
+{
+ preempt_disable();
+ enable_kernel_altivec();
+ __xor_altivec_5(bytes, v1_in, v2_in, v3_in, v4_in, v5_in);
+ disable_kernel_altivec();
+ preempt_enable();
+}
+EXPORT_SYMBOL(xor_altivec_5);
diff --git a/arch/powerpc/mm/8xx_mmu.c b/arch/powerpc/mm/8xx_mmu.c
index 6c5025e81236..f4c6472f2fc4 100644
--- a/arch/powerpc/mm/8xx_mmu.c
+++ b/arch/powerpc/mm/8xx_mmu.c
@@ -88,7 +88,7 @@ static void mmu_mapin_immr(void)
int offset;
for (offset = 0; offset < IMMR_SIZE; offset += PAGE_SIZE)
- map_page(v + offset, p + offset, f);
+ map_kernel_page(v + offset, p + offset, f);
}
/* Address of instructions to patch */
diff --git a/arch/powerpc/mm/dma-noncoherent.c b/arch/powerpc/mm/dma-noncoherent.c
index 2dc74e5c6458..382528475433 100644
--- a/arch/powerpc/mm/dma-noncoherent.c
+++ b/arch/powerpc/mm/dma-noncoherent.c
@@ -227,7 +227,7 @@ __dma_alloc_coherent(struct device *dev, size_t size, dma_addr_t *handle, gfp_t
do {
SetPageReserved(page);
- map_page(vaddr, page_to_phys(page),
+ map_kernel_page(vaddr, page_to_phys(page),
pgprot_val(pgprot_noncached(PAGE_KERNEL)));
page++;
vaddr += PAGE_SIZE;
diff --git a/arch/powerpc/mm/dump_hashpagetable.c b/arch/powerpc/mm/dump_hashpagetable.c
index c6b900f54c07..b1c144b03fcf 100644
--- a/arch/powerpc/mm/dump_hashpagetable.c
+++ b/arch/powerpc/mm/dump_hashpagetable.c
@@ -335,7 +335,7 @@ static unsigned long hpte_find(struct pg_state *st, unsigned long ea, int psize)
unsigned long rpn, lp_bits;
int base_psize = 0, actual_psize = 0;
- if (ea <= PAGE_OFFSET)
+ if (ea < PAGE_OFFSET)
return -1;
/* Look in primary table */
diff --git a/arch/powerpc/mm/fault.c b/arch/powerpc/mm/fault.c
index 3a7d580fdc59..4c422632047b 100644
--- a/arch/powerpc/mm/fault.c
+++ b/arch/powerpc/mm/fault.c
@@ -206,6 +206,7 @@ int do_page_fault(struct pt_regs *regs, unsigned long address,
int is_write = 0;
int trap = TRAP(regs);
int is_exec = trap == 0x400;
+ int is_user = user_mode(regs);
int fault;
int rc = 0, store_update_sp = 0;
@@ -216,7 +217,7 @@ int do_page_fault(struct pt_regs *regs, unsigned long address,
* bits we are interested in. But there are some bits which
* indicate errors in DSISR but can validly be set in SRR1.
*/
- if (trap == 0x400)
+ if (is_exec)
error_code &= 0x48200000;
else
is_write = error_code & DSISR_ISSTORE;
@@ -247,13 +248,13 @@ int do_page_fault(struct pt_regs *regs, unsigned long address,
* The kernel should never take an execute fault nor should it
* take a page fault to a kernel address.
*/
- if (!user_mode(regs) && (is_exec || (address >= TASK_SIZE))) {
+ if (!is_user && (is_exec || (address >= TASK_SIZE))) {
rc = SIGSEGV;
goto bail;
}
#if !(defined(CONFIG_4xx) || defined(CONFIG_BOOKE) || \
- defined(CONFIG_PPC_BOOK3S_64))
+ defined(CONFIG_PPC_BOOK3S_64) || defined(CONFIG_PPC_8xx))
if (error_code & DSISR_DABRMATCH) {
/* breakpoint match */
do_break(regs, address, error_code);
@@ -266,7 +267,7 @@ int do_page_fault(struct pt_regs *regs, unsigned long address,
local_irq_enable();
if (faulthandler_disabled() || mm == NULL) {
- if (!user_mode(regs)) {
+ if (!is_user) {
rc = SIGSEGV;
goto bail;
}
@@ -287,10 +288,10 @@ int do_page_fault(struct pt_regs *regs, unsigned long address,
* can result in fault, which will cause a deadlock when called with
* mmap_sem held
*/
- if (!is_exec && user_mode(regs))
+ if (is_write && is_user)
store_update_sp = store_updates_sp(regs);
- if (user_mode(regs))
+ if (is_user)
flags |= FAULT_FLAG_USER;
/* When running in the kernel we expect faults to occur only to
@@ -309,7 +310,7 @@ int do_page_fault(struct pt_regs *regs, unsigned long address,
* thus avoiding the deadlock.
*/
if (!down_read_trylock(&mm->mmap_sem)) {
- if (!user_mode(regs) && !search_exception_tables(regs->nip))
+ if (!is_user && !search_exception_tables(regs->nip))
goto bad_area_nosemaphore;
retry:
@@ -509,7 +510,7 @@ bad_area:
bad_area_nosemaphore:
/* User mode accesses cause a SIGSEGV */
- if (user_mode(regs)) {
+ if (is_user) {
_exception(SIGSEGV, regs, code, address);
goto bail;
}
diff --git a/arch/powerpc/mm/hash_native_64.c b/arch/powerpc/mm/hash_native_64.c
index 65bb8f33b399..3848af167df9 100644
--- a/arch/powerpc/mm/hash_native_64.c
+++ b/arch/powerpc/mm/hash_native_64.c
@@ -15,6 +15,7 @@
#include <linux/spinlock.h>
#include <linux/bitops.h>
#include <linux/of.h>
+#include <linux/processor.h>
#include <linux/threads.h>
#include <linux/smp.h>
@@ -23,6 +24,7 @@
#include <asm/mmu_context.h>
#include <asm/pgtable.h>
#include <asm/tlbflush.h>
+#include <asm/trace.h>
#include <asm/tlb.h>
#include <asm/cputable.h>
#include <asm/udbg.h>
@@ -98,6 +100,7 @@ static inline void __tlbie(unsigned long vpn, int psize, int apsize, int ssize)
: "memory");
break;
}
+ trace_tlbie(0, 0, va, 0, 0, 0, 0);
}
static inline void __tlbiel(unsigned long vpn, int psize, int apsize, int ssize)
@@ -147,6 +150,7 @@ static inline void __tlbiel(unsigned long vpn, int psize, int apsize, int ssize)
: "memory");
break;
}
+ trace_tlbie(0, 1, va, 0, 0, 0, 0);
}
@@ -181,8 +185,10 @@ static inline void native_lock_hpte(struct hash_pte *hptep)
while (1) {
if (!test_and_set_bit_lock(HPTE_LOCK_BIT, word))
break;
+ spin_begin();
while(test_bit(HPTE_LOCK_BIT, word))
- cpu_relax();
+ spin_cpu_relax();
+ spin_end();
}
}
@@ -407,6 +413,38 @@ static void native_hpte_updateboltedpp(unsigned long newpp, unsigned long ea,
tlbie(vpn, psize, psize, ssize, 0);
}
+/*
+ * Remove a bolted kernel entry. Memory hotplug uses this.
+ *
+ * No need to lock here because we should be the only user.
+ */
+static int native_hpte_removebolted(unsigned long ea, int psize, int ssize)
+{
+ unsigned long vpn;
+ unsigned long vsid;
+ long slot;
+ struct hash_pte *hptep;
+
+ vsid = get_kernel_vsid(ea, ssize);
+ vpn = hpt_vpn(ea, vsid, ssize);
+
+ slot = native_hpte_find(vpn, psize, ssize);
+ if (slot == -1)
+ return -ENOENT;
+
+ hptep = htab_address + slot;
+
+ VM_WARN_ON(!(be64_to_cpu(hptep->v) & HPTE_V_BOLTED));
+
+ /* Invalidate the hpte */
+ hptep->v = 0;
+
+ /* Invalidate the TLB */
+ tlbie(vpn, psize, psize, ssize, 0);
+ return 0;
+}
+
+
static void native_hpte_invalidate(unsigned long slot, unsigned long vpn,
int bpsize, int apsize, int ssize, int local)
{
@@ -725,6 +763,7 @@ void __init hpte_init_native(void)
mmu_hash_ops.hpte_invalidate = native_hpte_invalidate;
mmu_hash_ops.hpte_updatepp = native_hpte_updatepp;
mmu_hash_ops.hpte_updateboltedpp = native_hpte_updateboltedpp;
+ mmu_hash_ops.hpte_removebolted = native_hpte_removebolted;
mmu_hash_ops.hpte_insert = native_hpte_insert;
mmu_hash_ops.hpte_remove = native_hpte_remove;
mmu_hash_ops.hpte_clear_all = native_hpte_clear;
diff --git a/arch/powerpc/mm/hash_utils_64.c b/arch/powerpc/mm/hash_utils_64.c
index f2095ce9d4b0..7a20669c19e7 100644
--- a/arch/powerpc/mm/hash_utils_64.c
+++ b/arch/powerpc/mm/hash_utils_64.c
@@ -810,6 +810,8 @@ static void update_hid_for_hash(void)
asm volatile(PPC_TLBIE_5(%0, %4, %3, %2, %1)
: : "r"(rb), "i"(0), "i"(0), "i"(2), "r"(0) : "memory");
asm volatile("eieio; tlbsync; ptesync; isync; slbia": : :"memory");
+ trace_tlbie(0, 0, rb, 0, 2, 0, 0);
+
/*
* now switch the HID
*/
diff --git a/arch/powerpc/mm/hugetlbpage-radix.c b/arch/powerpc/mm/hugetlbpage-radix.c
index 6575b9aabef4..a12e86395025 100644
--- a/arch/powerpc/mm/hugetlbpage-radix.c
+++ b/arch/powerpc/mm/hugetlbpage-radix.c
@@ -68,7 +68,7 @@ radix__hugetlb_get_unmapped_area(struct file *file, unsigned long addr,
addr = ALIGN(addr, huge_page_size(h));
vma = find_vma(mm, addr);
if (mm->task_size - len >= addr &&
- (!vma || addr + len <= vma->vm_start))
+ (!vma || addr + len <= vm_start_gap(vma)))
return addr;
}
/*
diff --git a/arch/powerpc/mm/hugetlbpage.c b/arch/powerpc/mm/hugetlbpage.c
index a4f33de4008e..e1bf5ca397fe 100644
--- a/arch/powerpc/mm/hugetlbpage.c
+++ b/arch/powerpc/mm/hugetlbpage.c
@@ -17,6 +17,8 @@
#include <linux/memblock.h>
#include <linux/bootmem.h>
#include <linux/moduleparam.h>
+#include <linux/swap.h>
+#include <linux/swapops.h>
#include <asm/pgtable.h>
#include <asm/pgalloc.h>
#include <asm/tlb.h>
@@ -32,6 +34,7 @@
#define PAGE_SHIFT_16G 34
unsigned int HPAGE_SHIFT;
+EXPORT_SYMBOL(HPAGE_SHIFT);
/*
* Tracks gpages after the device tree is scanned and before the
@@ -55,7 +58,7 @@ static unsigned nr_gpages;
#define hugepd_none(hpd) (hpd_val(hpd) == 0)
-pte_t *huge_pte_offset(struct mm_struct *mm, unsigned long addr)
+pte_t *huge_pte_offset(struct mm_struct *mm, unsigned long addr, unsigned long sz)
{
/* Only called for hugetlbfs pages, hence can ignore THP */
return __find_linux_pte_or_hugepte(mm->pgd, addr, NULL, NULL);
@@ -77,7 +80,7 @@ static int __hugepte_alloc(struct mm_struct *mm, hugepd_t *hpdp,
num_hugepd = 1;
}
- new = kmem_cache_zalloc(cachep, GFP_KERNEL);
+ new = kmem_cache_zalloc(cachep, pgtable_gfp_flags(mm, GFP_KERNEL));
BUG_ON(pshift > HUGEPD_SHIFT_MASK);
BUG_ON((unsigned long)new & HUGEPD_SHIFT_MASK);
@@ -617,62 +620,39 @@ void hugetlb_free_pgd_range(struct mmu_gather *tlb,
} while (addr = next, addr != end);
}
-/*
- * We are holding mmap_sem, so a parallel huge page collapse cannot run.
- * To prevent hugepage split, disable irq.
- */
-struct page *
-follow_huge_addr(struct mm_struct *mm, unsigned long address, int write)
+struct page *follow_huge_pd(struct vm_area_struct *vma,
+ unsigned long address, hugepd_t hpd,
+ int flags, int pdshift)
{
- bool is_thp;
- pte_t *ptep, pte;
- unsigned shift;
- unsigned long mask, flags;
- struct page *page = ERR_PTR(-EINVAL);
-
- local_irq_save(flags);
- ptep = find_linux_pte_or_hugepte(mm->pgd, address, &is_thp, &shift);
- if (!ptep)
- goto no_page;
- pte = READ_ONCE(*ptep);
- /*
- * Verify it is a huge page else bail.
- * Transparent hugepages are handled by generic code. We can skip them
- * here.
- */
- if (!shift || is_thp)
- goto no_page;
-
- if (!pte_present(pte)) {
- page = NULL;
- goto no_page;
+ pte_t *ptep;
+ spinlock_t *ptl;
+ struct page *page = NULL;
+ unsigned long mask;
+ int shift = hugepd_shift(hpd);
+ struct mm_struct *mm = vma->vm_mm;
+
+retry:
+ ptl = &mm->page_table_lock;
+ spin_lock(ptl);
+
+ ptep = hugepte_offset(hpd, address, pdshift);
+ if (pte_present(*ptep)) {
+ mask = (1UL << shift) - 1;
+ page = pte_page(*ptep);
+ page += ((address & mask) >> PAGE_SHIFT);
+ if (flags & FOLL_GET)
+ get_page(page);
+ } else {
+ if (is_hugetlb_entry_migration(*ptep)) {
+ spin_unlock(ptl);
+ __migration_entry_wait(mm, ptep, ptl);
+ goto retry;
+ }
}
- mask = (1UL << shift) - 1;
- page = pte_page(pte);
- if (page)
- page += (address & mask) / PAGE_SIZE;
-
-no_page:
- local_irq_restore(flags);
+ spin_unlock(ptl);
return page;
}
-struct page *
-follow_huge_pmd(struct mm_struct *mm, unsigned long address,
- pmd_t *pmd, int write)
-{
- BUG();
- return NULL;
-}
-
-struct page *
-follow_huge_pud(struct mm_struct *mm, unsigned long address,
- pud_t *pud, int write)
-{
- BUG();
- return NULL;
-}
-
static unsigned long hugepte_addr_end(unsigned long addr, unsigned long end,
unsigned long sz)
{
@@ -763,8 +743,11 @@ static int __init add_huge_page_size(unsigned long long size)
* Hash: 16M and 16G
*/
if (radix_enabled()) {
- if (mmu_psize != MMU_PAGE_2M)
- return -EINVAL;
+ if (mmu_psize != MMU_PAGE_2M) {
+ if (cpu_has_feature(CPU_FTR_POWER9_DD1) ||
+ (mmu_psize != MMU_PAGE_1G))
+ return -EINVAL;
+ }
} else {
if (mmu_psize != MMU_PAGE_16M && mmu_psize != MMU_PAGE_16G)
return -EINVAL;
@@ -963,7 +946,7 @@ pte_t *__find_linux_pte_or_hugepte(pgd_t *pgdir, unsigned long ea,
if (pmd_none(pmd))
return NULL;
- if (pmd_trans_huge(pmd)) {
+ if (pmd_trans_huge(pmd) || pmd_devmap(pmd)) {
if (is_thp)
*is_thp = true;
ret_pte = (pte_t *) pmdp;
diff --git a/arch/powerpc/mm/init_64.c b/arch/powerpc/mm/init_64.c
index ec84b31c6c86..5b4c25d12ff3 100644
--- a/arch/powerpc/mm/init_64.c
+++ b/arch/powerpc/mm/init_64.c
@@ -44,6 +44,7 @@
#include <linux/slab.h>
#include <linux/of_fdt.h>
#include <linux/libfdt.h>
+#include <linux/memremap.h>
#include <asm/pgalloc.h>
#include <asm/page.h>
@@ -110,8 +111,29 @@ static int __meminit vmemmap_populated(unsigned long start, int page_size)
return 0;
}
+/*
+ * vmemmap virtual address space management does not have a traditonal page
+ * table to track which virtual struct pages are backed by physical mapping.
+ * The virtual to physical mappings are tracked in a simple linked list
+ * format. 'vmemmap_list' maintains the entire vmemmap physical mapping at
+ * all times where as the 'next' list maintains the available
+ * vmemmap_backing structures which have been deleted from the
+ * 'vmemmap_global' list during system runtime (memory hotplug remove
+ * operation). The freed 'vmemmap_backing' structures are reused later when
+ * new requests come in without allocating fresh memory. This pointer also
+ * tracks the allocated 'vmemmap_backing' structures as we allocate one
+ * full page memory at a time when we dont have any.
+ */
struct vmemmap_backing *vmemmap_list;
static struct vmemmap_backing *next;
+
+/*
+ * The same pointer 'next' tracks individual chunks inside the allocated
+ * full page during the boot time and again tracks the freeed nodes during
+ * runtime. It is racy but it does not happen as they are separated by the
+ * boot process. Will create problem if some how we have memory hotplug
+ * operation during boot !!
+ */
static int num_left;
static int num_freed;
@@ -171,13 +193,17 @@ int __meminit vmemmap_populate(unsigned long start, unsigned long end, int node)
pr_debug("vmemmap_populate %lx..%lx, node %d\n", start, end, node);
for (; start < end; start += page_size) {
+ struct vmem_altmap *altmap;
void *p;
int rc;
if (vmemmap_populated(start, page_size))
continue;
- p = vmemmap_alloc_block(page_size, node);
+ /* altmap lookups only work at section boundaries */
+ altmap = to_vmem_altmap(SECTION_ALIGN_DOWN(start));
+
+ p = __vmemmap_alloc_block_buf(page_size, node, altmap);
if (!p)
return -ENOMEM;
@@ -234,13 +260,17 @@ static unsigned long vmemmap_list_free(unsigned long start)
void __ref vmemmap_free(unsigned long start, unsigned long end)
{
unsigned long page_size = 1 << mmu_psize_defs[mmu_vmemmap_psize].shift;
+ unsigned long page_order = get_order(page_size);
start = _ALIGN_DOWN(start, page_size);
pr_debug("vmemmap_free %lx...%lx\n", start, end);
for (; start < end; start += page_size) {
- unsigned long addr;
+ unsigned long nr_pages, addr;
+ struct vmem_altmap *altmap;
+ struct page *section_base;
+ struct page *page;
/*
* the section has already be marked as invalid, so
@@ -251,29 +281,33 @@ void __ref vmemmap_free(unsigned long start, unsigned long end)
continue;
addr = vmemmap_list_free(start);
- if (addr) {
- struct page *page = pfn_to_page(addr >> PAGE_SHIFT);
-
- if (PageReserved(page)) {
- /* allocated from bootmem */
- if (page_size < PAGE_SIZE) {
- /*
- * this shouldn't happen, but if it is
- * the case, leave the memory there
- */
- WARN_ON_ONCE(1);
- } else {
- unsigned int nr_pages =
- 1 << get_order(page_size);
- while (nr_pages--)
- free_reserved_page(page++);
- }
- } else
- free_pages((unsigned long)(__va(addr)),
- get_order(page_size));
-
- vmemmap_remove_mapping(start, page_size);
+ if (!addr)
+ continue;
+
+ page = pfn_to_page(addr >> PAGE_SHIFT);
+ section_base = pfn_to_page(vmemmap_section_start(start));
+ nr_pages = 1 << page_order;
+
+ altmap = to_vmem_altmap((unsigned long) section_base);
+ if (altmap) {
+ vmem_altmap_free(altmap, nr_pages);
+ } else if (PageReserved(page)) {
+ /* allocated from bootmem */
+ if (page_size < PAGE_SIZE) {
+ /*
+ * this shouldn't happen, but if it is
+ * the case, leave the memory there
+ */
+ WARN_ON_ONCE(1);
+ } else {
+ while (nr_pages--)
+ free_reserved_page(page++);
+ }
+ } else {
+ free_pages((unsigned long)(__va(addr)), page_order);
}
+
+ vmemmap_remove_mapping(start, page_size);
}
}
#endif
diff --git a/arch/powerpc/mm/mem.c b/arch/powerpc/mm/mem.c
index 9ee536ec0739..8541f18694a4 100644
--- a/arch/powerpc/mm/mem.c
+++ b/arch/powerpc/mm/mem.c
@@ -36,6 +36,7 @@
#include <linux/hugetlb.h>
#include <linux/slab.h>
#include <linux/vmalloc.h>
+#include <linux/memremap.h>
#include <asm/pgalloc.h>
#include <asm/prom.h>
@@ -126,18 +127,14 @@ int __weak remove_section_mapping(unsigned long start, unsigned long end)
return -ENODEV;
}
-int arch_add_memory(int nid, u64 start, u64 size, bool for_device)
+int arch_add_memory(int nid, u64 start, u64 size, bool want_memblock)
{
- struct pglist_data *pgdata;
- struct zone *zone;
unsigned long start_pfn = start >> PAGE_SHIFT;
unsigned long nr_pages = size >> PAGE_SHIFT;
int rc;
resize_hpt_for_hotplug(memblock_phys_mem_size());
- pgdata = NODE_DATA(nid);
-
start = (unsigned long)__va(start);
rc = create_section_mapping(start, start + size);
if (rc) {
@@ -147,11 +144,7 @@ int arch_add_memory(int nid, u64 start, u64 size, bool for_device)
return -EFAULT;
}
- /* this should work for most non-highmem platforms */
- zone = pgdata->node_zones +
- zone_for_memory(nid, start, size, 0, for_device);
-
- return __add_pages(nid, zone, start_pfn, nr_pages);
+ return __add_pages(nid, start_pfn, nr_pages, want_memblock);
}
#ifdef CONFIG_MEMORY_HOTREMOVE
@@ -159,11 +152,20 @@ int arch_remove_memory(u64 start, u64 size)
{
unsigned long start_pfn = start >> PAGE_SHIFT;
unsigned long nr_pages = size >> PAGE_SHIFT;
- struct zone *zone;
+ struct vmem_altmap *altmap;
+ struct page *page;
int ret;
- zone = page_zone(pfn_to_page(start_pfn));
- ret = __remove_pages(zone, start_pfn, nr_pages);
+ /*
+ * If we have an altmap then we need to skip over any reserved PFNs
+ * when querying the zone.
+ */
+ page = pfn_to_page(start_pfn);
+ altmap = to_vmem_altmap((unsigned long) page);
+ if (altmap)
+ page += vmem_altmap_offset(altmap);
+
+ ret = __remove_pages(page_zone(page), start_pfn, nr_pages);
if (ret)
return ret;
@@ -313,11 +315,11 @@ void __init paging_init(void)
unsigned long end = __fix_to_virt(FIX_HOLE);
for (; v < end; v += PAGE_SIZE)
- map_page(v, 0, 0); /* XXX gross */
+ map_kernel_page(v, 0, 0); /* XXX gross */
#endif
#ifdef CONFIG_HIGHMEM
- map_page(PKMAP_BASE, 0, 0); /* XXX gross */
+ map_kernel_page(PKMAP_BASE, 0, 0); /* XXX gross */
pkmap_page_table = virt_to_kpte(PKMAP_BASE);
kmap_pte = virt_to_kpte(__fix_to_virt(FIX_KMAP_BEGIN));
diff --git a/arch/powerpc/mm/mmap.c b/arch/powerpc/mm/mmap.c
index 9dbd2a733d6b..0ee6be4f1ba4 100644
--- a/arch/powerpc/mm/mmap.c
+++ b/arch/powerpc/mm/mmap.c
@@ -112,7 +112,7 @@ radix__arch_get_unmapped_area(struct file *filp, unsigned long addr,
addr = PAGE_ALIGN(addr);
vma = find_vma(mm, addr);
if (mm->task_size - len >= addr && addr >= mmap_min_addr &&
- (!vma || addr + len <= vma->vm_start))
+ (!vma || addr + len <= vm_start_gap(vma)))
return addr;
}
@@ -157,7 +157,7 @@ radix__arch_get_unmapped_area_topdown(struct file *filp,
addr = PAGE_ALIGN(addr);
vma = find_vma(mm, addr);
if (mm->task_size - len >= addr && addr >= mmap_min_addr &&
- (!vma || addr + len <= vma->vm_start))
+ (!vma || addr + len <= vm_start_gap(vma)))
return addr;
}
diff --git a/arch/powerpc/mm/mmu_context_book3s64.c b/arch/powerpc/mm/mmu_context_book3s64.c
index c6dca2ae78ef..71de2c6d88f3 100644
--- a/arch/powerpc/mm/mmu_context_book3s64.c
+++ b/arch/powerpc/mm/mmu_context_book3s64.c
@@ -99,7 +99,7 @@ static int hash__init_new_context(struct mm_struct *mm)
* mm->context.addr_limit. Default to max task size so that we copy the
* default values to paca which will help us to handle slb miss early.
*/
- mm->context.addr_limit = TASK_SIZE_128TB;
+ mm->context.addr_limit = DEFAULT_MAP_WINDOW_USER64;
/*
* The old code would re-promote on fork, we don't do that when using
@@ -235,10 +235,15 @@ void destroy_context(struct mm_struct *mm)
#ifdef CONFIG_PPC_RADIX_MMU
void radix__switch_mmu_context(struct mm_struct *prev, struct mm_struct *next)
{
- asm volatile("isync": : :"memory");
- mtspr(SPRN_PID, next->context.id);
- asm volatile("isync \n"
- PPC_SLBIA(0x7)
- : : :"memory");
+
+ if (cpu_has_feature(CPU_FTR_POWER9_DD1)) {
+ isync();
+ mtspr(SPRN_PID, next->context.id);
+ isync();
+ asm volatile(PPC_INVALIDATE_ERAT : : :"memory");
+ } else {
+ mtspr(SPRN_PID, next->context.id);
+ isync();
+ }
}
#endif
diff --git a/arch/powerpc/mm/mmu_decl.h b/arch/powerpc/mm/mmu_decl.h
index f988db655e5b..d46128b22150 100644
--- a/arch/powerpc/mm/mmu_decl.h
+++ b/arch/powerpc/mm/mmu_decl.h
@@ -94,7 +94,6 @@ extern void _tlbia(void);
#ifdef CONFIG_PPC32
extern void mapin_ram(void);
-extern int map_page(unsigned long va, phys_addr_t pa, int flags);
extern void setbat(int index, unsigned long virt, phys_addr_t phys,
unsigned int size, pgprot_t prot);
diff --git a/arch/powerpc/mm/numa.c b/arch/powerpc/mm/numa.c
index 371792e4418f..b95c584ce19d 100644
--- a/arch/powerpc/mm/numa.c
+++ b/arch/powerpc/mm/numa.c
@@ -1311,8 +1311,10 @@ static int update_lookup_table(void *data)
/*
* Update the node maps and sysfs entries for each cpu whose home node
* has changed. Returns 1 when the topology has changed, and 0 otherwise.
+ *
+ * cpus_locked says whether we already hold cpu_hotplug_lock.
*/
-int arch_update_cpu_topology(void)
+int numa_update_cpu_topology(bool cpus_locked)
{
unsigned int cpu, sibling, changed = 0;
struct topology_update_data *updates, *ud;
@@ -1400,15 +1402,23 @@ int arch_update_cpu_topology(void)
if (!cpumask_weight(&updated_cpus))
goto out;
- stop_machine(update_cpu_topology, &updates[0], &updated_cpus);
+ if (cpus_locked)
+ stop_machine_cpuslocked(update_cpu_topology, &updates[0],
+ &updated_cpus);
+ else
+ stop_machine(update_cpu_topology, &updates[0], &updated_cpus);
/*
* Update the numa-cpu lookup table with the new mappings, even for
* offline CPUs. It is best to perform this update from the stop-
* machine context.
*/
- stop_machine(update_lookup_table, &updates[0],
+ if (cpus_locked)
+ stop_machine_cpuslocked(update_lookup_table, &updates[0],
cpumask_of(raw_smp_processor_id()));
+ else
+ stop_machine(update_lookup_table, &updates[0],
+ cpumask_of(raw_smp_processor_id()));
for (ud = &updates[0]; ud; ud = ud->next) {
unregister_cpu_under_node(ud->cpu, ud->old_nid);
@@ -1426,6 +1436,12 @@ out:
return changed;
}
+int arch_update_cpu_topology(void)
+{
+ lockdep_assert_cpus_held();
+ return numa_update_cpu_topology(true);
+}
+
static void topology_work_fn(struct work_struct *work)
{
rebuild_sched_domains();
diff --git a/arch/powerpc/mm/pgtable-book3s64.c b/arch/powerpc/mm/pgtable-book3s64.c
index 5fcb3dd74c13..31eed8fa8e99 100644
--- a/arch/powerpc/mm/pgtable-book3s64.c
+++ b/arch/powerpc/mm/pgtable-book3s64.c
@@ -32,7 +32,7 @@ int pmdp_set_access_flags(struct vm_area_struct *vma, unsigned long address,
{
int changed;
#ifdef CONFIG_DEBUG_VM
- WARN_ON(!pmd_trans_huge(*pmdp));
+ WARN_ON(!pmd_trans_huge(*pmdp) && !pmd_devmap(*pmdp));
assert_spin_locked(&vma->vm_mm->page_table_lock);
#endif
changed = !pmd_same(*(pmdp), entry);
@@ -59,7 +59,7 @@ void set_pmd_at(struct mm_struct *mm, unsigned long addr,
#ifdef CONFIG_DEBUG_VM
WARN_ON(pte_present(pmd_pte(*pmdp)) && !pte_protnone(pmd_pte(*pmdp)));
assert_spin_locked(&mm->page_table_lock);
- WARN_ON(!pmd_trans_huge(pmd));
+ WARN_ON(!(pmd_trans_huge(pmd) || pmd_devmap(pmd)));
#endif
trace_hugepage_set_pmd(addr, pmd_val(pmd));
return set_pte_at(mm, addr, pmdp_ptep(pmdp), pmd_pte(pmd));
diff --git a/arch/powerpc/mm/pgtable-hash64.c b/arch/powerpc/mm/pgtable-hash64.c
index 8b85a14b08ea..188b4107584d 100644
--- a/arch/powerpc/mm/pgtable-hash64.c
+++ b/arch/powerpc/mm/pgtable-hash64.c
@@ -11,8 +11,12 @@
#include <linux/sched.h>
#include <linux/mm_types.h>
+#include <linux/mm.h>
#include <asm/pgalloc.h>
+#include <asm/pgtable.h>
+#include <asm/sections.h>
+#include <asm/mmu.h>
#include <asm/tlb.h>
#include "mmu_decl.h"
@@ -22,6 +26,81 @@
#ifdef CONFIG_SPARSEMEM_VMEMMAP
/*
+ * vmemmap is the starting address of the virtual address space where
+ * struct pages are allocated for all possible PFNs present on the system
+ * including holes and bad memory (hence sparse). These virtual struct
+ * pages are stored in sequence in this virtual address space irrespective
+ * of the fact whether the corresponding PFN is valid or not. This achieves
+ * constant relationship between address of struct page and its PFN.
+ *
+ * During boot or memory hotplug operation when a new memory section is
+ * added, physical memory allocation (including hash table bolting) will
+ * be performed for the set of struct pages which are part of the memory
+ * section. This saves memory by not allocating struct pages for PFNs
+ * which are not valid.
+ *
+ * ----------------------------------------------
+ * | PHYSICAL ALLOCATION OF VIRTUAL STRUCT PAGES|
+ * ----------------------------------------------
+ *
+ * f000000000000000 c000000000000000
+ * vmemmap +--------------+ +--------------+
+ * + | page struct | +--------------> | page struct |
+ * | +--------------+ +--------------+
+ * | | page struct | +--------------> | page struct |
+ * | +--------------+ | +--------------+
+ * | | page struct | + +------> | page struct |
+ * | +--------------+ | +--------------+
+ * | | page struct | | +--> | page struct |
+ * | +--------------+ | | +--------------+
+ * | | page struct | | |
+ * | +--------------+ | |
+ * | | page struct | | |
+ * | +--------------+ | |
+ * | | page struct | | |
+ * | +--------------+ | |
+ * | | page struct | | |
+ * | +--------------+ | |
+ * | | page struct | +-------+ |
+ * | +--------------+ |
+ * | | page struct | +-----------+
+ * | +--------------+
+ * | | page struct | No mapping
+ * | +--------------+
+ * | | page struct | No mapping
+ * v +--------------+
+ *
+ * -----------------------------------------
+ * | RELATION BETWEEN STRUCT PAGES AND PFNS|
+ * -----------------------------------------
+ *
+ * vmemmap +--------------+ +---------------+
+ * + | page struct | +-------------> | PFN |
+ * | +--------------+ +---------------+
+ * | | page struct | +-------------> | PFN |
+ * | +--------------+ +---------------+
+ * | | page struct | +-------------> | PFN |
+ * | +--------------+ +---------------+
+ * | | page struct | +-------------> | PFN |
+ * | +--------------+ +---------------+
+ * | | |
+ * | +--------------+
+ * | | |
+ * | +--------------+
+ * | | |
+ * | +--------------+ +---------------+
+ * | | page struct | +-------------> | PFN |
+ * | +--------------+ +---------------+
+ * | | |
+ * | +--------------+
+ * | | |
+ * | +--------------+ +---------------+
+ * | | page struct | +-------------> | PFN |
+ * | +--------------+ +---------------+
+ * | | page struct | +-------------> | PFN |
+ * v +--------------+ +---------------+
+ */
+/*
* On hash-based CPUs, the vmemmap is bolted in the hash table.
*
*/
@@ -109,7 +188,7 @@ unsigned long hash__pmd_hugepage_update(struct mm_struct *mm, unsigned long addr
unsigned long old;
#ifdef CONFIG_DEBUG_VM
- WARN_ON(!pmd_trans_huge(*pmdp));
+ WARN_ON(!hash__pmd_trans_huge(*pmdp) && !pmd_devmap(*pmdp));
assert_spin_locked(&mm->page_table_lock);
#endif
@@ -141,6 +220,7 @@ pmd_t hash__pmdp_collapse_flush(struct vm_area_struct *vma, unsigned long addres
VM_BUG_ON(address & ~HPAGE_PMD_MASK);
VM_BUG_ON(pmd_trans_huge(*pmdp));
+ VM_BUG_ON(pmd_devmap(*pmdp));
pmd = *pmdp;
pmd_clear(pmdp);
@@ -221,6 +301,7 @@ void hash__pmdp_huge_split_prepare(struct vm_area_struct *vma,
{
VM_BUG_ON(address & ~HPAGE_PMD_MASK);
VM_BUG_ON(REGION_ID(address) != USER_REGION_ID);
+ VM_BUG_ON(pmd_devmap(*pmdp));
/*
* We can't mark the pmd none here, because that will cause a race
@@ -342,3 +423,35 @@ int hash__has_transparent_hugepage(void)
return 1;
}
#endif /* CONFIG_TRANSPARENT_HUGEPAGE */
+
+#ifdef CONFIG_STRICT_KERNEL_RWX
+void hash__mark_rodata_ro(void)
+{
+ unsigned long start = (unsigned long)_stext;
+ unsigned long end = (unsigned long)__init_begin;
+ unsigned long idx;
+ unsigned int step, shift;
+ unsigned long newpp = PP_RXXX;
+
+ shift = mmu_psize_defs[mmu_linear_psize].shift;
+ step = 1 << shift;
+
+ start = ((start + step - 1) >> shift) << shift;
+ end = (end >> shift) << shift;
+
+ pr_devel("marking ro start %lx, end %lx, step %x\n",
+ start, end, step);
+
+ if (start == end) {
+ pr_warn("could not set rodata ro, relocate the start"
+ " of the kernel to a 0x%x boundary\n", step);
+ return;
+ }
+
+ for (idx = start; idx < end; idx += step)
+ /* Not sure if we can do much with the return value */
+ mmu_hash_ops.hpte_updateboltedpp(newpp, idx, mmu_linear_psize,
+ mmu_kernel_ssize);
+
+}
+#endif
diff --git a/arch/powerpc/mm/pgtable-radix.c b/arch/powerpc/mm/pgtable-radix.c
index c28165d8970b..8c13e4282308 100644
--- a/arch/powerpc/mm/pgtable-radix.c
+++ b/arch/powerpc/mm/pgtable-radix.c
@@ -11,6 +11,7 @@
#include <linux/sched/mm.h>
#include <linux/memblock.h>
#include <linux/of_fdt.h>
+#include <linux/mm.h>
#include <asm/pgtable.h>
#include <asm/pgalloc.h>
@@ -19,6 +20,8 @@
#include <asm/mmu.h>
#include <asm/firmware.h>
#include <asm/powernv.h>
+#include <asm/sections.h>
+#include <asm/trace.h>
#include <trace/events/thp.h>
@@ -108,6 +111,49 @@ set_the_pte:
return 0;
}
+#ifdef CONFIG_STRICT_KERNEL_RWX
+void radix__mark_rodata_ro(void)
+{
+ unsigned long start = (unsigned long)_stext;
+ unsigned long end = (unsigned long)__init_begin;
+ unsigned long idx;
+ pgd_t *pgdp;
+ pud_t *pudp;
+ pmd_t *pmdp;
+ pte_t *ptep;
+
+ start = ALIGN_DOWN(start, PAGE_SIZE);
+ end = PAGE_ALIGN(end); // aligns up
+
+ pr_devel("marking ro start %lx, end %lx\n", start, end);
+
+ for (idx = start; idx < end; idx += PAGE_SIZE) {
+ pgdp = pgd_offset_k(idx);
+ pudp = pud_alloc(&init_mm, pgdp, idx);
+ if (!pudp)
+ continue;
+ if (pud_huge(*pudp)) {
+ ptep = (pte_t *)pudp;
+ goto update_the_pte;
+ }
+ pmdp = pmd_alloc(&init_mm, pudp, idx);
+ if (!pmdp)
+ continue;
+ if (pmd_huge(*pmdp)) {
+ ptep = pmdp_ptep(pmdp);
+ goto update_the_pte;
+ }
+ ptep = pte_alloc_kernel(pmdp, idx);
+ if (!ptep)
+ continue;
+update_the_pte:
+ radix__pte_update(&init_mm, idx, ptep, _PAGE_WRITE, 0, 0);
+ }
+
+ radix__flush_tlb_kernel_range(start, end);
+}
+#endif /* CONFIG_STRICT_KERNEL_RWX */
+
static inline void __meminit print_mapping(unsigned long start,
unsigned long end,
unsigned long size)
@@ -121,7 +167,14 @@ static inline void __meminit print_mapping(unsigned long start,
static int __meminit create_physical_mapping(unsigned long start,
unsigned long end)
{
- unsigned long addr, mapping_size = 0;
+ unsigned long vaddr, addr, mapping_size = 0;
+ pgprot_t prot;
+ unsigned long max_mapping_size;
+#ifdef CONFIG_STRICT_KERNEL_RWX
+ int split_text_mapping = 1;
+#else
+ int split_text_mapping = 0;
+#endif
start = _ALIGN_UP(start, PAGE_SIZE);
for (addr = start; addr < end; addr += mapping_size) {
@@ -130,9 +183,12 @@ static int __meminit create_physical_mapping(unsigned long start,
gap = end - addr;
previous_size = mapping_size;
+ max_mapping_size = PUD_SIZE;
+retry:
if (IS_ALIGNED(addr, PUD_SIZE) && gap >= PUD_SIZE &&
- mmu_psize_defs[MMU_PAGE_1G].shift)
+ mmu_psize_defs[MMU_PAGE_1G].shift &&
+ PUD_SIZE <= max_mapping_size)
mapping_size = PUD_SIZE;
else if (IS_ALIGNED(addr, PMD_SIZE) && gap >= PMD_SIZE &&
mmu_psize_defs[MMU_PAGE_2M].shift)
@@ -140,13 +196,32 @@ static int __meminit create_physical_mapping(unsigned long start,
else
mapping_size = PAGE_SIZE;
+ if (split_text_mapping && (mapping_size == PUD_SIZE) &&
+ (addr <= __pa_symbol(__init_begin)) &&
+ (addr + mapping_size) >= __pa_symbol(_stext)) {
+ max_mapping_size = PMD_SIZE;
+ goto retry;
+ }
+
+ if (split_text_mapping && (mapping_size == PMD_SIZE) &&
+ (addr <= __pa_symbol(__init_begin)) &&
+ (addr + mapping_size) >= __pa_symbol(_stext))
+ mapping_size = PAGE_SIZE;
+
if (mapping_size != previous_size) {
print_mapping(start, addr, previous_size);
start = addr;
}
- rc = radix__map_kernel_page((unsigned long)__va(addr), addr,
- PAGE_KERNEL_X, mapping_size);
+ vaddr = (unsigned long)__va(addr);
+
+ if (overlaps_kernel_text(vaddr, vaddr + mapping_size) ||
+ overlaps_interrupt_vector_text(vaddr, vaddr + mapping_size))
+ prot = PAGE_KERNEL_X;
+ else
+ prot = PAGE_KERNEL;
+
+ rc = radix__map_kernel_page(vaddr, addr, prot, mapping_size);
if (rc)
return rc;
}
@@ -190,6 +265,7 @@ static void __init radix_init_pgtable(void)
asm volatile(PPC_TLBIE_5(%0,%1,2,1,1) : :
"r" (TLBIEL_INVAL_SET_LPID), "r" (0));
asm volatile("eieio; tlbsync; ptesync" : : : "memory");
+ trace_tlbie(0, 0, TLBIEL_INVAL_SET_LPID, 0, 2, 1, 1);
}
static void __init radix_init_partition_table(void)
@@ -316,6 +392,9 @@ static void update_hid_for_radix(void)
asm volatile(PPC_TLBIE_5(%0, %4, %3, %2, %1)
: : "r"(rb), "i"(1), "i"(1), "i"(2), "r"(0) : "memory");
asm volatile("eieio; tlbsync; ptesync; isync; slbia": : :"memory");
+ trace_tlbie(0, 0, rb, 0, 2, 0, 1);
+ trace_tlbie(0, 0, rb, 0, 2, 1, 1);
+
/*
* now switch the HID
*/
@@ -683,7 +762,7 @@ unsigned long radix__pmd_hugepage_update(struct mm_struct *mm, unsigned long add
unsigned long old;
#ifdef CONFIG_DEBUG_VM
- WARN_ON(!radix__pmd_trans_huge(*pmdp));
+ WARN_ON(!radix__pmd_trans_huge(*pmdp) && !pmd_devmap(*pmdp));
assert_spin_locked(&mm->page_table_lock);
#endif
@@ -701,6 +780,7 @@ pmd_t radix__pmdp_collapse_flush(struct vm_area_struct *vma, unsigned long addre
VM_BUG_ON(address & ~HPAGE_PMD_MASK);
VM_BUG_ON(radix__pmd_trans_huge(*pmdp));
+ VM_BUG_ON(pmd_devmap(*pmdp));
/*
* khugepaged calls this for normal pmd
*/
diff --git a/arch/powerpc/mm/pgtable_32.c b/arch/powerpc/mm/pgtable_32.c
index a65c0b4c0669..a9e4bfc025bc 100644
--- a/arch/powerpc/mm/pgtable_32.c
+++ b/arch/powerpc/mm/pgtable_32.c
@@ -60,7 +60,7 @@ pgtable_t pte_alloc_one(struct mm_struct *mm, unsigned long address)
{
struct page *ptepage;
- gfp_t flags = GFP_KERNEL | __GFP_ZERO;
+ gfp_t flags = GFP_KERNEL | __GFP_ZERO | __GFP_ACCOUNT;
ptepage = alloc_pages(flags, 0);
if (!ptepage)
@@ -189,7 +189,7 @@ __ioremap_caller(phys_addr_t addr, unsigned long size, unsigned long flags,
err = 0;
for (i = 0; i < size && err == 0; i += PAGE_SIZE)
- err = map_page(v+i, p+i, flags);
+ err = map_kernel_page(v+i, p+i, flags);
if (err) {
if (slab_is_available())
vunmap((void *)v);
@@ -215,7 +215,7 @@ void iounmap(volatile void __iomem *addr)
}
EXPORT_SYMBOL(iounmap);
-int map_page(unsigned long va, phys_addr_t pa, int flags)
+int map_kernel_page(unsigned long va, phys_addr_t pa, int flags)
{
pmd_t *pd;
pte_t *pg;
@@ -255,7 +255,7 @@ void __init __mapin_ram_chunk(unsigned long offset, unsigned long top)
ktext = ((char *)v >= _stext && (char *)v < etext) ||
((char *)v >= _sinittext && (char *)v < _einittext);
f = ktext ? pgprot_val(PAGE_KERNEL_TEXT) : pgprot_val(PAGE_KERNEL);
- map_page(v, p, f);
+ map_kernel_page(v, p, f);
#ifdef CONFIG_PPC_STD_MMU_32
if (ktext)
hash_preload(&init_mm, v, 0, 0x300);
@@ -387,11 +387,6 @@ void __set_fixmap (enum fixed_addresses idx, phys_addr_t phys, pgprot_t flags)
return;
}
- map_page(address, phys, pgprot_val(flags));
+ map_kernel_page(address, phys, pgprot_val(flags));
fixmaps++;
}
-
-void __this_fixmap_does_not_exist(void)
-{
- WARN_ON(1);
-}
diff --git a/arch/powerpc/mm/pgtable_64.c b/arch/powerpc/mm/pgtable_64.c
index db93cf747a03..5c0b795d656c 100644
--- a/arch/powerpc/mm/pgtable_64.c
+++ b/arch/powerpc/mm/pgtable_64.c
@@ -47,6 +47,7 @@
#include <asm/smp.h>
#include <asm/machdep.h>
#include <asm/tlb.h>
+#include <asm/trace.h>
#include <asm/processor.h>
#include <asm/cputable.h>
#include <asm/sections.h>
@@ -323,7 +324,7 @@ struct page *pud_page(pud_t pud)
*/
struct page *pmd_page(pmd_t pmd)
{
- if (pmd_trans_huge(pmd) || pmd_huge(pmd))
+ if (pmd_trans_huge(pmd) || pmd_huge(pmd) || pmd_devmap(pmd))
return pte_page(pmd_pte(pmd));
return virt_to_page(pmd_page_vaddr(pmd));
}
@@ -351,12 +352,20 @@ static pte_t *get_from_cache(struct mm_struct *mm)
static pte_t *__alloc_for_cache(struct mm_struct *mm, int kernel)
{
void *ret = NULL;
- struct page *page = alloc_page(GFP_KERNEL | __GFP_NOTRACK | __GFP_ZERO);
- if (!page)
- return NULL;
- if (!kernel && !pgtable_page_ctor(page)) {
- __free_page(page);
- return NULL;
+ struct page *page;
+
+ if (!kernel) {
+ page = alloc_page(PGALLOC_GFP | __GFP_ACCOUNT);
+ if (!page)
+ return NULL;
+ if (!pgtable_page_ctor(page)) {
+ __free_page(page);
+ return NULL;
+ }
+ } else {
+ page = alloc_page(PGALLOC_GFP);
+ if (!page)
+ return NULL;
}
ret = page_address(page);
@@ -469,13 +478,31 @@ void mmu_partition_table_set_entry(unsigned int lpid, unsigned long dw0,
* use of this partition ID was, not the new use.
*/
asm volatile("ptesync" : : : "memory");
- if (old & PATB_HR)
+ if (old & PATB_HR) {
asm volatile(PPC_TLBIE_5(%0,%1,2,0,1) : :
"r" (TLBIEL_INVAL_SET_LPID), "r" (lpid));
- else
+ trace_tlbie(lpid, 0, TLBIEL_INVAL_SET_LPID, lpid, 2, 0, 1);
+ } else {
asm volatile(PPC_TLBIE_5(%0,%1,2,0,0) : :
"r" (TLBIEL_INVAL_SET_LPID), "r" (lpid));
+ trace_tlbie(lpid, 0, TLBIEL_INVAL_SET_LPID, lpid, 2, 0, 0);
+ }
asm volatile("eieio; tlbsync; ptesync" : : : "memory");
}
EXPORT_SYMBOL_GPL(mmu_partition_table_set_entry);
#endif /* CONFIG_PPC_BOOK3S_64 */
+
+#ifdef CONFIG_STRICT_KERNEL_RWX
+void mark_rodata_ro(void)
+{
+ if (!mmu_has_feature(MMU_FTR_KERNEL_RO)) {
+ pr_warn("Warning: Unable to mark rodata read only on this CPU.\n");
+ return;
+ }
+
+ if (radix_enabled())
+ radix__mark_rodata_ro();
+ else
+ hash__mark_rodata_ro();
+}
+#endif
diff --git a/arch/powerpc/mm/slb.c b/arch/powerpc/mm/slb.c
index 654a0d7ba0e7..13cfe413b40d 100644
--- a/arch/powerpc/mm/slb.c
+++ b/arch/powerpc/mm/slb.c
@@ -33,15 +33,7 @@ enum slb_index {
KSTACK_INDEX = 2, /* Kernel stack map */
};
-extern void slb_allocate_realmode(unsigned long ea);
-
-static void slb_allocate(unsigned long ea)
-{
- /* Currently, we do real mode for all SLBs including user, but
- * that will change if we bring back dynamic VSIDs
- */
- slb_allocate_realmode(ea);
-}
+extern void slb_allocate(unsigned long ea);
#define slb_esid_mask(ssize) \
(((ssize) == MMU_SEGSIZE_256M)? ESID_MASK: ESID_MASK_1T)
diff --git a/arch/powerpc/mm/slb_low.S b/arch/powerpc/mm/slb_low.S
index 1519617aab36..bde378559d01 100644
--- a/arch/powerpc/mm/slb_low.S
+++ b/arch/powerpc/mm/slb_low.S
@@ -65,14 +65,15 @@ MMU_FTR_SECTION_ELSE \
ALT_MMU_FTR_SECTION_END_IFCLR(MMU_FTR_68_BIT_VA)
-/* void slb_allocate_realmode(unsigned long ea);
+/* void slb_allocate(unsigned long ea);
*
* Create an SLB entry for the given EA (user or kernel).
* r3 = faulting address, r13 = PACA
* r9, r10, r11 are clobbered by this function
+ * r3 is preserved.
* No other registers are examined or changed.
*/
-_GLOBAL(slb_allocate_realmode)
+_GLOBAL(slb_allocate)
/*
* check for bad kernel/user address
* (ea & ~REGION_MASK) >= PGTABLE_RANGE
@@ -235,6 +236,9 @@ END_MMU_FTR_SECTION_IFSET(MMU_FTR_1T_SEGMENT)
* dont have any LRU information to help us choose a slot.
*/
+ mr r9,r3
+
+ /* slb_finish_load_1T continues here. r9=EA with non-ESID bits clear */
7: ld r10,PACASTABRR(r13)
addi r10,r10,1
/* This gets soft patched on boot. */
@@ -249,10 +253,10 @@ slb_compare_rr_to_size:
std r10,PACASTABRR(r13)
3:
- rldimi r3,r10,0,36 /* r3= EA[0:35] | entry */
- oris r10,r3,SLB_ESID_V@h /* r3 |= SLB_ESID_V */
+ rldimi r9,r10,0,36 /* r9 = EA[0:35] | entry */
+ oris r10,r9,SLB_ESID_V@h /* r10 = r9 | SLB_ESID_V */
- /* r3 = ESID data, r11 = VSID data */
+ /* r9 = ESID data, r11 = VSID data */
/*
* No need for an isync before or after this slbmte. The exception
@@ -265,21 +269,21 @@ slb_compare_rr_to_size:
bgelr cr7
/* Update the slb cache */
- lhz r3,PACASLBCACHEPTR(r13) /* offset = paca->slb_cache_ptr */
- cmpldi r3,SLB_CACHE_ENTRIES
+ lhz r9,PACASLBCACHEPTR(r13) /* offset = paca->slb_cache_ptr */
+ cmpldi r9,SLB_CACHE_ENTRIES
bge 1f
/* still room in the slb cache */
- sldi r11,r3,2 /* r11 = offset * sizeof(u32) */
+ sldi r11,r9,2 /* r11 = offset * sizeof(u32) */
srdi r10,r10,28 /* get the 36 bits of the ESID */
add r11,r11,r13 /* r11 = (u32 *)paca + offset */
stw r10,PACASLBCACHE(r11) /* paca->slb_cache[offset] = esid */
- addi r3,r3,1 /* offset++ */
+ addi r9,r9,1 /* offset++ */
b 2f
1: /* offset >= SLB_CACHE_ENTRIES */
- li r3,SLB_CACHE_ENTRIES+1
+ li r9,SLB_CACHE_ENTRIES+1
2:
- sth r3,PACASLBCACHEPTR(r13) /* paca->slb_cache_ptr = offset */
+ sth r9,PACASLBCACHEPTR(r13) /* paca->slb_cache_ptr = offset */
crclr 4*cr0+eq /* set result to "success" */
blr
@@ -301,11 +305,11 @@ slb_compare_rr_to_size:
rldimi r11,r10,SLB_VSID_SSIZE_SHIFT,0 /* insert segment size */
/* r3 = EA, r11 = VSID data */
- clrrdi r3,r3,SID_SHIFT_1T /* clear out non-ESID bits */
+ clrrdi r9,r3,SID_SHIFT_1T /* clear out non-ESID bits */
b 7b
-_ASM_NOKPROBE_SYMBOL(slb_allocate_realmode)
+_ASM_NOKPROBE_SYMBOL(slb_allocate)
_ASM_NOKPROBE_SYMBOL(slb_miss_kernel_load_linear)
_ASM_NOKPROBE_SYMBOL(slb_miss_kernel_load_io)
_ASM_NOKPROBE_SYMBOL(slb_compare_rr_to_size)
diff --git a/arch/powerpc/mm/slice.c b/arch/powerpc/mm/slice.c
index 966b9fccfa66..45f6740dd407 100644
--- a/arch/powerpc/mm/slice.c
+++ b/arch/powerpc/mm/slice.c
@@ -99,7 +99,7 @@ static int slice_area_is_free(struct mm_struct *mm, unsigned long addr,
if ((mm->task_size - len) < addr)
return 0;
vma = find_vma(mm, addr);
- return (!vma || (addr + len) <= vma->vm_start);
+ return (!vma || (addr + len) <= vm_start_gap(vma));
}
static int slice_low_has_vma(struct mm_struct *mm, unsigned long slice)
diff --git a/arch/powerpc/mm/tlb-radix.c b/arch/powerpc/mm/tlb-radix.c
index 02e71402fdd3..744e0164ecf5 100644
--- a/arch/powerpc/mm/tlb-radix.c
+++ b/arch/powerpc/mm/tlb-radix.c
@@ -16,6 +16,7 @@
#include <asm/tlb.h>
#include <asm/tlbflush.h>
+#include <asm/trace.h>
#define RIC_FLUSH_TLB 0
@@ -35,6 +36,7 @@ static inline void __tlbiel_pid(unsigned long pid, int set,
asm volatile(PPC_TLBIEL(%0, %4, %3, %2, %1)
: : "r"(rb), "i"(r), "i"(prs), "i"(ric), "r"(rs) : "memory");
+ trace_tlbie(0, 1, rb, rs, ric, prs, r);
}
/*
@@ -87,6 +89,7 @@ static inline void _tlbie_pid(unsigned long pid, unsigned long ric)
asm volatile(PPC_TLBIE_5(%0, %4, %3, %2, %1)
: : "r"(rb), "i"(r), "i"(prs), "i"(ric), "r"(rs) : "memory");
asm volatile("eieio; tlbsync; ptesync": : :"memory");
+ trace_tlbie(0, 0, rb, rs, ric, prs, r);
}
static inline void _tlbiel_va(unsigned long va, unsigned long pid,
@@ -104,6 +107,7 @@ static inline void _tlbiel_va(unsigned long va, unsigned long pid,
asm volatile(PPC_TLBIEL(%0, %4, %3, %2, %1)
: : "r"(rb), "i"(r), "i"(prs), "i"(ric), "r"(rs) : "memory");
asm volatile("ptesync": : :"memory");
+ trace_tlbie(0, 1, rb, rs, ric, prs, r);
}
static inline void _tlbie_va(unsigned long va, unsigned long pid,
@@ -121,6 +125,7 @@ static inline void _tlbie_va(unsigned long va, unsigned long pid,
asm volatile(PPC_TLBIE_5(%0, %4, %3, %2, %1)
: : "r"(rb), "i"(r), "i"(prs), "i"(ric), "r"(rs) : "memory");
asm volatile("eieio; tlbsync; ptesync": : :"memory");
+ trace_tlbie(0, 0, rb, rs, ric, prs, r);
}
/*
@@ -377,6 +382,7 @@ void radix__flush_tlb_lpid_va(unsigned long lpid, unsigned long gpa,
asm volatile(PPC_TLBIE_5(%0, %4, %3, %2, %1)
: : "r"(rb), "i"(r), "i"(prs), "i"(ric), "r"(rs) : "memory");
asm volatile("eieio; tlbsync; ptesync": : :"memory");
+ trace_tlbie(lpid, 0, rb, rs, ric, prs, r);
}
EXPORT_SYMBOL(radix__flush_tlb_lpid_va);
@@ -394,6 +400,7 @@ void radix__flush_tlb_lpid(unsigned long lpid)
asm volatile(PPC_TLBIE_5(%0, %4, %3, %2, %1)
: : "r"(rb), "i"(r), "i"(prs), "i"(ric), "r"(rs) : "memory");
asm volatile("eieio; tlbsync; ptesync": : :"memory");
+ trace_tlbie(lpid, 0, rb, rs, ric, prs, r);
}
EXPORT_SYMBOL(radix__flush_tlb_lpid);
@@ -420,12 +427,14 @@ void radix__flush_tlb_all(void)
*/
asm volatile(PPC_TLBIE_5(%0, %4, %3, %2, %1)
: : "r"(rb), "i"(r), "i"(1), "i"(ric), "r"(rs) : "memory");
+ trace_tlbie(0, 0, rb, rs, ric, prs, r);
/*
* now flush host entires by passing PRS = 0 and LPID == 0
*/
asm volatile(PPC_TLBIE_5(%0, %4, %3, %2, %1)
: : "r"(rb), "i"(r), "i"(prs), "i"(ric), "r"(0) : "memory");
asm volatile("eieio; tlbsync; ptesync": : :"memory");
+ trace_tlbie(0, 0, rb, 0, ric, prs, r);
}
void radix__flush_tlb_pte_p9_dd1(unsigned long old_pte, struct mm_struct *mm,
diff --git a/arch/powerpc/mm/tlb_hash64.c b/arch/powerpc/mm/tlb_hash64.c
index 4517aa43a8b1..b5b0fb97b9c0 100644
--- a/arch/powerpc/mm/tlb_hash64.c
+++ b/arch/powerpc/mm/tlb_hash64.c
@@ -93,12 +93,10 @@ void hpte_need_flush(struct mm_struct *mm, unsigned long addr,
/*
* Check if we have an active batch on this CPU. If not, just
- * flush now and return. For now, we don global invalidates
- * in that case, might be worth testing the mm cpu mask though
- * and decide to use local invalidates instead...
+ * flush now and return.
*/
if (!batch->active) {
- flush_hash_page(vpn, rpte, psize, ssize, 0);
+ flush_hash_page(vpn, rpte, psize, ssize, mm_is_thread_local(mm));
put_cpu_var(ppc64_tlb_batch);
return;
}
diff --git a/arch/powerpc/net/bpf_jit_comp64.c b/arch/powerpc/net/bpf_jit_comp64.c
index aee2bb817ac6..861c5af1c9c4 100644
--- a/arch/powerpc/net/bpf_jit_comp64.c
+++ b/arch/powerpc/net/bpf_jit_comp64.c
@@ -938,7 +938,7 @@ common_load:
/*
* Tail call
*/
- case BPF_JMP | BPF_CALL | BPF_X:
+ case BPF_JMP | BPF_TAIL_CALL:
ctx->seen |= SEEN_TAILCALL;
bpf_jit_emit_tail_call(image, ctx, addrs[i + 1]);
break;
@@ -1052,6 +1052,7 @@ struct bpf_prog *bpf_int_jit_compile(struct bpf_prog *fp)
fp->bpf_func = (void *)image;
fp->jited = 1;
+ fp->jited_len = alloclen;
bpf_flush_icache(bpf_hdr, (u8 *)bpf_hdr + (bpf_hdr->pages * PAGE_SIZE));
diff --git a/arch/powerpc/perf/hv-24x7.c b/arch/powerpc/perf/hv-24x7.c
index 7b2ca16b1eb4..9c88b82f6229 100644
--- a/arch/powerpc/perf/hv-24x7.c
+++ b/arch/powerpc/perf/hv-24x7.c
@@ -18,6 +18,7 @@
#include <linux/slab.h>
#include <linux/vmalloc.h>
+#include <asm/cputhreads.h>
#include <asm/firmware.h>
#include <asm/hvcall.h>
#include <asm/io.h>
@@ -27,6 +28,12 @@
#include "hv-24x7-catalog.h"
#include "hv-common.h"
+/* Version of the 24x7 hypervisor API that we should use in this machine. */
+static int interface_version;
+
+/* Whether we have to aggregate result data for some domains. */
+static bool aggregate_result_elements;
+
static bool domain_is_valid(unsigned domain)
{
switch (domain) {
@@ -54,6 +61,15 @@ static bool is_physical_domain(unsigned domain)
}
}
+/* Domains for which more than one result element are returned for each event. */
+static bool domain_needs_aggregation(unsigned int domain)
+{
+ return aggregate_result_elements &&
+ (domain == HV_PERF_DOMAIN_PHYS_CORE ||
+ (domain >= HV_PERF_DOMAIN_VCPU_HOME_CORE &&
+ domain <= HV_PERF_DOMAIN_VCPU_REMOTE_NODE));
+}
+
static const char *domain_name(unsigned domain)
{
if (!domain_is_valid(domain))
@@ -74,7 +90,11 @@ static const char *domain_name(unsigned domain)
static bool catalog_entry_domain_is_valid(unsigned domain)
{
- return is_physical_domain(domain);
+ /* POWER8 doesn't support virtual domains. */
+ if (interface_version == 1)
+ return is_physical_domain(domain);
+ else
+ return domain_is_valid(domain);
}
/*
@@ -166,6 +186,12 @@ DEFINE_PER_CPU(struct hv_24x7_hw, hv_24x7_hw);
DEFINE_PER_CPU(char, hv_24x7_reqb[H24x7_DATA_BUFFER_SIZE]) __aligned(4096);
DEFINE_PER_CPU(char, hv_24x7_resb[H24x7_DATA_BUFFER_SIZE]) __aligned(4096);
+static unsigned int max_num_requests(int interface_version)
+{
+ return (H24x7_DATA_BUFFER_SIZE - sizeof(struct hv_24x7_request_buffer))
+ / H24x7_REQUEST_SIZE(interface_version);
+}
+
static char *event_name(struct hv_24x7_event_data *ev, int *len)
{
*len = be16_to_cpu(ev->event_name_len) - 2;
@@ -260,9 +286,8 @@ static void *event_end(struct hv_24x7_event_data *ev, void *end)
return start + nl + dl + ldl;
}
-static unsigned long h_get_24x7_catalog_page_(unsigned long phys_4096,
- unsigned long version,
- unsigned long index)
+static long h_get_24x7_catalog_page_(unsigned long phys_4096,
+ unsigned long version, unsigned long index)
{
pr_devel("h_get_24x7_catalog_page(0x%lx, %lu, %lu)",
phys_4096, version, index);
@@ -273,8 +298,7 @@ static unsigned long h_get_24x7_catalog_page_(unsigned long phys_4096,
phys_4096, version, index);
}
-static unsigned long h_get_24x7_catalog_page(char page[],
- u64 version, u32 index)
+static long h_get_24x7_catalog_page(char page[], u64 version, u32 index)
{
return h_get_24x7_catalog_page_(virt_to_phys(page),
version, index);
@@ -664,13 +688,13 @@ static int create_events_from_catalog(struct attribute ***events_,
struct attribute ***event_descs_,
struct attribute ***event_long_descs_)
{
- unsigned long hret;
+ long hret;
size_t catalog_len, catalog_page_len, event_entry_count,
event_data_len, event_data_offs,
event_data_bytes, junk_events, event_idx, event_attr_ct, i,
attr_max, event_idx_last, desc_ct, long_desc_ct;
ssize_t ct, ev_len;
- uint32_t catalog_version_num;
+ uint64_t catalog_version_num;
struct attribute **events, **event_descs, **event_long_descs;
struct hv_24x7_catalog_page_0 *page_0 =
kmem_cache_alloc(hv_page_cache, GFP_KERNEL);
@@ -706,8 +730,8 @@ static int create_events_from_catalog(struct attribute ***events_,
event_data_offs = be16_to_cpu(page_0->event_data_offs);
event_data_len = be16_to_cpu(page_0->event_data_len);
- pr_devel("cv %zu cl %zu eec %zu edo %zu edl %zu\n",
- (size_t)catalog_version_num, catalog_len,
+ pr_devel("cv %llu cl %zu eec %zu edo %zu edl %zu\n",
+ catalog_version_num, catalog_len,
event_entry_count, event_data_offs, event_data_len);
if ((MAX_4K < event_data_len)
@@ -761,8 +785,8 @@ static int create_events_from_catalog(struct attribute ***events_,
catalog_version_num,
i + event_data_offs);
if (hret) {
- pr_err("failed to get event data in page %zu\n",
- i + event_data_offs);
+ pr_err("Failed to get event data in page %zu: rc=%ld\n",
+ i + event_data_offs, hret);
ret = -EIO;
goto e_event_data;
}
@@ -903,7 +927,7 @@ static ssize_t catalog_read(struct file *filp, struct kobject *kobj,
struct bin_attribute *bin_attr, char *buf,
loff_t offset, size_t count)
{
- unsigned long hret;
+ long hret;
ssize_t ret = 0;
size_t catalog_len = 0, catalog_page_len = 0;
loff_t page_offset = 0;
@@ -988,7 +1012,7 @@ static ssize_t _name##_show(struct device *dev, \
struct device_attribute *dev_attr, \
char *buf) \
{ \
- unsigned long hret; \
+ long hret; \
ssize_t ret = 0; \
void *page = kmem_cache_alloc(hv_page_cache, GFP_USER); \
struct hv_24x7_catalog_page_0 *page_0 = page; \
@@ -1040,21 +1064,6 @@ static const struct attribute_group *attr_groups[] = {
NULL,
};
-static void log_24x7_hcall(struct hv_24x7_request_buffer *request_buffer,
- struct hv_24x7_data_result_buffer *result_buffer,
- unsigned long ret)
-{
- struct hv_24x7_request *req;
-
- req = &request_buffer->requests[0];
- pr_notice_ratelimited("hcall failed: [%d %#x %#x %d] => "
- "ret 0x%lx (%ld) detail=0x%x failing ix=%x\n",
- req->performance_domain, req->data_offset,
- req->starting_ix, req->starting_lpar_ix, ret, ret,
- result_buffer->detailed_rc,
- result_buffer->failing_request_ix);
-}
-
/*
* Start the process for a new H_GET_24x7_DATA hcall.
*/
@@ -1062,10 +1071,10 @@ static void init_24x7_request(struct hv_24x7_request_buffer *request_buffer,
struct hv_24x7_data_result_buffer *result_buffer)
{
- memset(request_buffer, 0, 4096);
- memset(result_buffer, 0, 4096);
+ memset(request_buffer, 0, H24x7_DATA_BUFFER_SIZE);
+ memset(result_buffer, 0, H24x7_DATA_BUFFER_SIZE);
- request_buffer->interface_version = HV_24X7_IF_VERSION_CURRENT;
+ request_buffer->interface_version = interface_version;
/* memset above set request_buffer->num_requests to 0 */
}
@@ -1076,7 +1085,7 @@ static void init_24x7_request(struct hv_24x7_request_buffer *request_buffer,
static int make_24x7_request(struct hv_24x7_request_buffer *request_buffer,
struct hv_24x7_data_result_buffer *result_buffer)
{
- unsigned long ret;
+ long ret;
/*
* NOTE: Due to variable number of array elements in request and
@@ -1087,10 +1096,19 @@ static int make_24x7_request(struct hv_24x7_request_buffer *request_buffer,
virt_to_phys(request_buffer), H24x7_DATA_BUFFER_SIZE,
virt_to_phys(result_buffer), H24x7_DATA_BUFFER_SIZE);
- if (ret)
- log_24x7_hcall(request_buffer, result_buffer, ret);
+ if (ret) {
+ struct hv_24x7_request *req;
+
+ req = request_buffer->requests;
+ pr_notice_ratelimited("hcall failed: [%d %#x %#x %d] => ret 0x%lx (%ld) detail=0x%x failing ix=%x\n",
+ req->performance_domain, req->data_offset,
+ req->starting_ix, req->starting_lpar_ix,
+ ret, ret, result_buffer->detailed_rc,
+ result_buffer->failing_request_ix);
+ return -EIO;
+ }
- return ret;
+ return 0;
}
/*
@@ -1105,9 +1123,11 @@ static int add_event_to_24x7_request(struct perf_event *event,
{
u16 idx;
int i;
+ size_t req_size;
struct hv_24x7_request *req;
- if (request_buffer->num_requests > 254) {
+ if (request_buffer->num_requests >=
+ max_num_requests(request_buffer->interface_version)) {
pr_devel("Too many requests for 24x7 HCALL %d\n",
request_buffer->num_requests);
return -EINVAL;
@@ -1124,23 +1144,113 @@ static int add_event_to_24x7_request(struct perf_event *event,
idx = event_get_vcpu(event);
}
+ req_size = H24x7_REQUEST_SIZE(request_buffer->interface_version);
+
i = request_buffer->num_requests++;
- req = &request_buffer->requests[i];
+ req = (void *) request_buffer->requests + i * req_size;
req->performance_domain = event_get_domain(event);
req->data_size = cpu_to_be16(8);
req->data_offset = cpu_to_be32(event_get_offset(event));
- req->starting_lpar_ix = cpu_to_be16(event_get_lpar(event)),
+ req->starting_lpar_ix = cpu_to_be16(event_get_lpar(event));
req->max_num_lpars = cpu_to_be16(1);
req->starting_ix = cpu_to_be16(idx);
req->max_ix = cpu_to_be16(1);
+ if (request_buffer->interface_version > 1) {
+ if (domain_needs_aggregation(req->performance_domain))
+ req->max_num_thread_groups = -1;
+ else if (req->performance_domain != HV_PERF_DOMAIN_PHYS_CHIP) {
+ req->starting_thread_group_ix = idx % 2;
+ req->max_num_thread_groups = 1;
+ }
+ }
+
+ return 0;
+}
+
+/**
+ * get_count_from_result - get event count from all result elements in result
+ *
+ * If the event corresponding to this result needs aggregation of the result
+ * element values, then this function does that.
+ *
+ * @event: Event associated with @res.
+ * @resb: Result buffer containing @res.
+ * @res: Result to work on.
+ * @countp: Output variable containing the event count.
+ * @next: Optional output variable pointing to the next result in @resb.
+ */
+static int get_count_from_result(struct perf_event *event,
+ struct hv_24x7_data_result_buffer *resb,
+ struct hv_24x7_result *res, u64 *countp,
+ struct hv_24x7_result **next)
+{
+ u16 num_elements = be16_to_cpu(res->num_elements_returned);
+ u16 data_size = be16_to_cpu(res->result_element_data_size);
+ unsigned int data_offset;
+ void *element_data;
+ int i;
+ u64 count;
+
+ /*
+ * We can bail out early if the result is empty.
+ */
+ if (!num_elements) {
+ pr_debug("Result of request %hhu is empty, nothing to do\n",
+ res->result_ix);
+
+ if (next)
+ *next = (struct hv_24x7_result *) res->elements;
+
+ return -ENODATA;
+ }
+
+ /*
+ * Since we always specify 1 as the maximum for the smallest resource
+ * we're requesting, there should to be only one element per result.
+ * Except when an event needs aggregation, in which case there are more.
+ */
+ if (num_elements != 1 &&
+ !domain_needs_aggregation(event_get_domain(event))) {
+ pr_err("Error: result of request %hhu has %hu elements\n",
+ res->result_ix, num_elements);
+
+ return -EIO;
+ }
+
+ if (data_size != sizeof(u64)) {
+ pr_debug("Error: result of request %hhu has data of %hu bytes\n",
+ res->result_ix, data_size);
+
+ return -ENOTSUPP;
+ }
+
+ if (resb->interface_version == 1)
+ data_offset = offsetof(struct hv_24x7_result_element_v1,
+ element_data);
+ else
+ data_offset = offsetof(struct hv_24x7_result_element_v2,
+ element_data);
+
+ /* Go through the result elements in the result. */
+ for (i = count = 0, element_data = res->elements + data_offset;
+ i < num_elements;
+ i++, element_data += data_size + data_offset)
+ count += be64_to_cpu(*((u64 *) element_data));
+
+ *countp = count;
+
+ /* The next result is after the last result element. */
+ if (next)
+ *next = element_data - data_offset;
+
return 0;
}
-static unsigned long single_24x7_request(struct perf_event *event, u64 *count)
+static int single_24x7_request(struct perf_event *event, u64 *count)
{
- unsigned long ret;
+ int ret;
struct hv_24x7_request_buffer *request_buffer;
struct hv_24x7_data_result_buffer *result_buffer;
@@ -1157,13 +1267,12 @@ static unsigned long single_24x7_request(struct perf_event *event, u64 *count)
goto out;
ret = make_24x7_request(request_buffer, result_buffer);
- if (ret) {
- log_24x7_hcall(request_buffer, result_buffer, ret);
+ if (ret)
goto out;
- }
/* process result from hcall */
- *count = be64_to_cpu(result_buffer->results[0].elements[0].element_data[0]);
+ ret = get_count_from_result(event, result_buffer,
+ result_buffer->results, count, NULL);
out:
put_cpu_var(hv_24x7_reqb);
@@ -1216,9 +1325,8 @@ static int h_24x7_event_init(struct perf_event *event)
return -EINVAL;
}
- /* Domains above 6 are invalid */
domain = event_get_domain(event);
- if (domain > 6) {
+ if (domain >= HV_PERF_DOMAIN_MAX) {
pr_devel("invalid domain %d\n", domain);
return -EINVAL;
}
@@ -1250,10 +1358,9 @@ static int h_24x7_event_init(struct perf_event *event)
static u64 h_24x7_get_value(struct perf_event *event)
{
- unsigned long ret;
u64 ct;
- ret = single_24x7_request(event, &ct);
- if (ret)
+
+ if (single_24x7_request(event, &ct))
/* We checked this in event init, shouldn't fail here... */
return 0;
@@ -1396,8 +1503,7 @@ static int h_24x7_event_commit_txn(struct pmu *pmu)
{
struct hv_24x7_request_buffer *request_buffer;
struct hv_24x7_data_result_buffer *result_buffer;
- struct hv_24x7_result *resb;
- struct perf_event *event;
+ struct hv_24x7_result *res, *next_res;
u64 count;
int i, ret, txn_flags;
struct hv_24x7_hw *h24x7hw;
@@ -1417,19 +1523,21 @@ static int h_24x7_event_commit_txn(struct pmu *pmu)
result_buffer = (void *)get_cpu_var(hv_24x7_resb);
ret = make_24x7_request(request_buffer, result_buffer);
- if (ret) {
- log_24x7_hcall(request_buffer, result_buffer, ret);
+ if (ret)
goto put_reqb;
- }
h24x7hw = &get_cpu_var(hv_24x7_hw);
- /* Update event counts from hcall */
- for (i = 0; i < request_buffer->num_requests; i++) {
- resb = &result_buffer->results[i];
- count = be64_to_cpu(resb->elements[0].element_data[0]);
- event = h24x7hw->events[i];
- h24x7hw->events[i] = NULL;
+ /* Go through results in the result buffer to update event counts. */
+ for (i = 0, res = result_buffer->results;
+ i < result_buffer->num_results; i++, res = next_res) {
+ struct perf_event *event = h24x7hw->events[res->result_ix];
+
+ ret = get_count_from_result(event, result_buffer, res, &count,
+ &next_res);
+ if (ret)
+ break;
+
update_event_count(event, count);
}
@@ -1480,6 +1588,18 @@ static int hv_24x7_init(void)
if (!firmware_has_feature(FW_FEATURE_LPAR)) {
pr_debug("not a virtualized system, not enabling\n");
return -ENODEV;
+ } else if (!cur_cpu_spec->oprofile_cpu_type)
+ return -ENODEV;
+
+ /* POWER8 only supports v1, while POWER9 only supports v2. */
+ if (!strcmp(cur_cpu_spec->oprofile_cpu_type, "ppc64/power8"))
+ interface_version = 1;
+ else {
+ interface_version = 2;
+
+ /* SMT8 in POWER9 needs to aggregate result elements. */
+ if (threads_per_core == 8)
+ aggregate_result_elements = true;
}
hret = hv_perf_caps_get(&caps);
diff --git a/arch/powerpc/perf/hv-24x7.h b/arch/powerpc/perf/hv-24x7.h
index 634ef4082cdc..5092c4a222a6 100644
--- a/arch/powerpc/perf/hv-24x7.h
+++ b/arch/powerpc/perf/hv-24x7.h
@@ -10,6 +10,8 @@ enum hv_perf_domains {
HV_PERF_DOMAIN_MAX,
};
+#define H24x7_REQUEST_SIZE(iface_version) (iface_version == 1 ? 16 : 32)
+
struct hv_24x7_request {
/* PHYSICAL domains require enabling via phyp/hmc. */
__u8 performance_domain;
@@ -42,19 +44,27 @@ struct hv_24x7_request {
/* chip, core, or virtual processor based on @performance_domain */
__be16 starting_ix;
__be16 max_ix;
+
+ /* The following fields were added in v2 of the 24x7 interface. */
+
+ __u8 starting_thread_group_ix;
+
+ /* -1 means all thread groups starting at @starting_thread_group_ix */
+ __u8 max_num_thread_groups;
+
+ __u8 reserved2[0xE];
} __packed;
struct hv_24x7_request_buffer {
/* 0 - ? */
/* 1 - ? */
-#define HV_24X7_IF_VERSION_CURRENT 0x01
__u8 interface_version;
__u8 num_requests;
__u8 reserved[0xE];
- struct hv_24x7_request requests[1];
+ struct hv_24x7_request requests[];
} __packed;
-struct hv_24x7_result_element {
+struct hv_24x7_result_element_v1 {
__be16 lpar_ix;
/*
@@ -67,10 +77,38 @@ struct hv_24x7_result_element {
__be32 lpar_cfg_instance_id;
/* size = @result_element_data_size of containing result. */
- __u64 element_data[1];
+ __u64 element_data[];
+} __packed;
+
+/*
+ * We need a separate struct for v2 because the offset of @element_data changed
+ * between versions.
+ */
+struct hv_24x7_result_element_v2 {
+ __be16 lpar_ix;
+
+ /*
+ * represents the core, chip, or virtual processor based on the
+ * request's @performance_domain
+ */
+ __be16 domain_ix;
+
+ /* -1 if @performance_domain does not refer to a virtual processor */
+ __be32 lpar_cfg_instance_id;
+
+ __u8 thread_group_ix;
+
+ __u8 reserved[7];
+
+ /* size = @result_element_data_size of containing result. */
+ __u64 element_data[];
} __packed;
struct hv_24x7_result {
+ /*
+ * The index of the 24x7 Request Structure in the 24x7 Request Buffer
+ * used to request this result.
+ */
__u8 result_ix;
/*
@@ -81,14 +119,25 @@ struct hv_24x7_result {
__u8 results_complete;
__be16 num_elements_returned;
- /* This is a copy of @data_size from the corresponding hv_24x7_request */
+ /*
+ * This is a copy of @data_size from the corresponding hv_24x7_request
+ *
+ * Warning: to obtain the size of each element in @elements you have
+ * to add the size of the other members of the result_element struct.
+ */
__be16 result_element_data_size;
__u8 reserved[0x2];
- /* WARNING: only valid for first result element due to variable sizes
- * of result elements */
- /* struct hv_24x7_result_element[@num_elements_returned] */
- struct hv_24x7_result_element elements[1];
+ /*
+ * Either
+ * struct hv_24x7_result_element_v1[@num_elements_returned]
+ * or
+ * struct hv_24x7_result_element_v2[@num_elements_returned]
+ *
+ * depending on the interface_version field of the
+ * struct hv_24x7_data_result_buffer containing this result.
+ */
+ char elements[];
} __packed;
struct hv_24x7_data_result_buffer {
@@ -104,7 +153,7 @@ struct hv_24x7_data_result_buffer {
__u8 reserved2[0x8];
/* WARNING: only valid for the first result due to variable sizes of
* results */
- struct hv_24x7_result results[1]; /* [@num_results] */
+ struct hv_24x7_result results[]; /* [@num_results] */
} __packed;
#endif
diff --git a/arch/powerpc/perf/perf_regs.c b/arch/powerpc/perf/perf_regs.c
index cbd82fde5770..09ceea6175ba 100644
--- a/arch/powerpc/perf/perf_regs.c
+++ b/arch/powerpc/perf/perf_regs.c
@@ -101,5 +101,6 @@ void perf_get_regs_user(struct perf_regs *regs_user,
struct pt_regs *regs_user_copy)
{
regs_user->regs = task_pt_regs(current);
- regs_user->abi = perf_reg_abi(current);
+ regs_user->abi = (regs_user->regs) ? perf_reg_abi(current) :
+ PERF_SAMPLE_REGS_ABI_NONE;
}
diff --git a/arch/powerpc/perf/power9-events-list.h b/arch/powerpc/perf/power9-events-list.h
index 71a6bfee5c02..80204e064362 100644
--- a/arch/powerpc/perf/power9-events-list.h
+++ b/arch/powerpc/perf/power9-events-list.h
@@ -16,7 +16,7 @@ EVENT(PM_CYC, 0x0001e)
EVENT(PM_ICT_NOSLOT_CYC, 0x100f8)
EVENT(PM_CMPLU_STALL, 0x1e054)
EVENT(PM_INST_CMPL, 0x00002)
-EVENT(PM_BRU_CMPL, 0x10012)
+EVENT(PM_BRU_CMPL, 0x4d05e)
EVENT(PM_BR_MPRED_CMPL, 0x400f6)
/* All L1 D cache load references counted at finish, gated by reject */
@@ -56,3 +56,5 @@ EVENT(PM_RUN_CYC, 0x600f4)
/* Instruction Dispatched */
EVENT(PM_INST_DISP, 0x200f2)
EVENT(PM_INST_DISP_ALT, 0x300f2)
+/* Alternate Branch event code */
+EVENT(PM_BR_CMPL_ALT, 0x10012)
diff --git a/arch/powerpc/perf/power9-pmu.c b/arch/powerpc/perf/power9-pmu.c
index 018f8e90ac35..f17435e4a489 100644
--- a/arch/powerpc/perf/power9-pmu.c
+++ b/arch/powerpc/perf/power9-pmu.c
@@ -231,7 +231,7 @@ static int power9_generic_events_dd1[] = {
[PERF_COUNT_HW_STALLED_CYCLES_FRONTEND] = PM_ICT_NOSLOT_CYC,
[PERF_COUNT_HW_STALLED_CYCLES_BACKEND] = PM_CMPLU_STALL,
[PERF_COUNT_HW_INSTRUCTIONS] = PM_INST_DISP,
- [PERF_COUNT_HW_BRANCH_INSTRUCTIONS] = PM_BRU_CMPL,
+ [PERF_COUNT_HW_BRANCH_INSTRUCTIONS] = PM_BR_CMPL_ALT,
[PERF_COUNT_HW_BRANCH_MISSES] = PM_BR_MPRED_CMPL,
[PERF_COUNT_HW_CACHE_REFERENCES] = PM_LD_REF_L1,
[PERF_COUNT_HW_CACHE_MISSES] = PM_LD_MISS_L1_FIN,
@@ -402,7 +402,7 @@ static struct power_pmu power9_isa207_pmu = {
.name = "POWER9",
.n_counter = MAX_PMU_COUNTERS,
.add_fields = ISA207_ADD_FIELDS,
- .test_adder = ISA207_TEST_ADDER,
+ .test_adder = P9_DD1_TEST_ADDER,
.compute_mmcr = isa207_compute_mmcr,
.config_bhrb = power9_config_bhrb,
.bhrb_filter_map = power9_bhrb_filter_map,
@@ -421,7 +421,7 @@ static struct power_pmu power9_pmu = {
.name = "POWER9",
.n_counter = MAX_PMU_COUNTERS,
.add_fields = ISA207_ADD_FIELDS,
- .test_adder = P9_DD1_TEST_ADDER,
+ .test_adder = ISA207_TEST_ADDER,
.compute_mmcr = isa207_compute_mmcr,
.config_bhrb = power9_config_bhrb,
.bhrb_filter_map = power9_bhrb_filter_map,
@@ -453,6 +453,12 @@ static int __init init_power9_pmu(void)
* sampling scenarios in power9 DD1, instead use PM_INST_DISP.
*/
EVENT_VAR(PM_INST_CMPL, _g).id = PM_INST_DISP;
+ /*
+ * Power9 DD1 should use PM_BR_CMPL_ALT event code for
+ * "branches" to provide correct counter value.
+ */
+ EVENT_VAR(PM_BRU_CMPL, _g).id = PM_BR_CMPL_ALT;
+ EVENT_VAR(PM_BRU_CMPL, _c).id = PM_BR_CMPL_ALT;
rc = register_power_pmu(&power9_isa207_pmu);
} else {
rc = register_power_pmu(&power9_pmu);
diff --git a/arch/powerpc/platforms/44x/Kconfig b/arch/powerpc/platforms/44x/Kconfig
index 9b0afe935cc1..01cb109ebf17 100644
--- a/arch/powerpc/platforms/44x/Kconfig
+++ b/arch/powerpc/platforms/44x/Kconfig
@@ -199,6 +199,18 @@ config CURRITUCK
help
This option enables support for the IBM Currituck (476fpe) evaluation board
+config FSP2
+ bool "IBM FSP2 (476fpe) Support"
+ depends on PPC_47x
+ default n
+ select 476FPE
+ select IBM_EMAC_EMAC4 if IBM_EMAC
+ select IBM_EMAC_RGMII if IBM_EMAC
+ select COMMON_CLK
+ select DEFAULT_UIMAGE
+ help
+ This option enables support for the IBM FSP2 (476fpe) board
+
config AKEBONO
bool "IBM Akebono (476gtr) Support"
depends on PPC_47x
diff --git a/arch/powerpc/platforms/44x/Makefile b/arch/powerpc/platforms/44x/Makefile
index 26d35b5941f7..72b824160660 100644
--- a/arch/powerpc/platforms/44x/Makefile
+++ b/arch/powerpc/platforms/44x/Makefile
@@ -12,3 +12,4 @@ obj-$(CONFIG_ISS4xx) += iss4xx.o
obj-$(CONFIG_CANYONLANDS)+= canyonlands.o
obj-$(CONFIG_CURRITUCK) += ppc476.o
obj-$(CONFIG_AKEBONO) += ppc476.o
+obj-$(CONFIG_FSP2) += fsp2.o
diff --git a/arch/powerpc/platforms/44x/fsp2.c b/arch/powerpc/platforms/44x/fsp2.c
new file mode 100644
index 000000000000..92e98048404f
--- /dev/null
+++ b/arch/powerpc/platforms/44x/fsp2.c
@@ -0,0 +1,62 @@
+/*
+ * FSP-2 board specific routines
+ *
+ * Based on earlier code:
+ * Matt Porter <mporter@kernel.crashing.org>
+ * Copyright 2002-2005 MontaVista Software Inc.
+ *
+ * Eugene Surovegin <eugene.surovegin@zultys.com> or <ebs@ebshome.net>
+ * Copyright (c) 2003-2005 Zultys Technologies
+ *
+ * Rewritten and ported to the merged powerpc tree:
+ * Copyright 2007 David Gibson <dwg@au1.ibm.com>, IBM Corporation.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License as published by the
+ * Free Software Foundation; either version 2 of the License, or (at your
+ * option) any later version.
+ */
+
+#include <linux/init.h>
+#include <linux/of_platform.h>
+#include <linux/rtc.h>
+
+#include <asm/machdep.h>
+#include <asm/prom.h>
+#include <asm/udbg.h>
+#include <asm/time.h>
+#include <asm/uic.h>
+#include <asm/ppc4xx.h>
+
+static __initdata struct of_device_id fsp2_of_bus[] = {
+ { .compatible = "ibm,plb4", },
+ { .compatible = "ibm,plb6", },
+ { .compatible = "ibm,opb", },
+ {},
+};
+
+static int __init fsp2_device_probe(void)
+{
+ of_platform_bus_probe(NULL, fsp2_of_bus, NULL);
+ return 0;
+}
+machine_device_initcall(fsp2, fsp2_device_probe);
+
+static int __init fsp2_probe(void)
+{
+ unsigned long root = of_get_flat_dt_root();
+
+ if (!of_flat_dt_is_compatible(root, "ibm,fsp2"))
+ return 0;
+ return 1;
+}
+
+define_machine(fsp2) {
+ .name = "FSP-2",
+ .probe = fsp2_probe,
+ .progress = udbg_progress,
+ .init_IRQ = uic_init_tree,
+ .get_irq = uic_get_irq,
+ .restart = ppc4xx_reset_system,
+ .calibrate_decr = generic_calibrate_decr,
+};
diff --git a/arch/powerpc/platforms/Kconfig b/arch/powerpc/platforms/Kconfig
index 33244e3d9375..4fd64d3f5c44 100644
--- a/arch/powerpc/platforms/Kconfig
+++ b/arch/powerpc/platforms/Kconfig
@@ -59,6 +59,17 @@ config PPC_OF_BOOT_TRAMPOLINE
In case of doubt, say Y
+config PPC_DT_CPU_FTRS
+ bool "Device-tree based CPU feature discovery & setup"
+ depends on PPC_BOOK3S_64
+ default y
+ help
+ This enables code to use a new device tree binding for describing CPU
+ compatibility and features. Saying Y here will attempt to use the new
+ binding if the firmware provides it. Currently only the skiboot
+ firmware provides this binding.
+ If you're not sure say Y.
+
config UDBG_RTAS_CONSOLE
bool "RTAS based debug console"
depends on PPC_RTAS
diff --git a/arch/powerpc/platforms/Kconfig.cputype b/arch/powerpc/platforms/Kconfig.cputype
index 684e886eaae4..2f629e0551e9 100644
--- a/arch/powerpc/platforms/Kconfig.cputype
+++ b/arch/powerpc/platforms/Kconfig.cputype
@@ -344,12 +344,18 @@ config PPC_STD_MMU_64
config PPC_RADIX_MMU
bool "Radix MMU Support"
depends on PPC_BOOK3S_64
+ select ARCH_HAS_GIGANTIC_PAGE if (MEMORY_ISOLATION && COMPACTION) || CMA
default y
help
Enable support for the Power ISA 3.0 Radix style MMU. Currently this
is only implemented by IBM Power9 CPUs, if you don't have one of them
you can probably disable this.
+config ARCH_ENABLE_HUGEPAGE_MIGRATION
+ def_bool y
+ depends on PPC_BOOK3S_64 && HUGETLB_PAGE && MIGRATION
+
+
config PPC_MMU_NOHASH
def_bool y
depends on !PPC_STD_MMU
diff --git a/arch/powerpc/platforms/cell/iommu.c b/arch/powerpc/platforms/cell/iommu.c
index 71b995bbcae0..29d4f96ed33e 100644
--- a/arch/powerpc/platforms/cell/iommu.c
+++ b/arch/powerpc/platforms/cell/iommu.c
@@ -644,32 +644,22 @@ static void dma_fixed_unmap_sg(struct device *dev, struct scatterlist *sg,
direction, attrs);
}
-static int dma_fixed_dma_supported(struct device *dev, u64 mask)
-{
- return mask == DMA_BIT_MASK(64);
-}
-
-static int dma_set_mask_and_switch(struct device *dev, u64 dma_mask);
+static int dma_suported_and_switch(struct device *dev, u64 dma_mask);
static const struct dma_map_ops dma_iommu_fixed_ops = {
.alloc = dma_fixed_alloc_coherent,
.free = dma_fixed_free_coherent,
.map_sg = dma_fixed_map_sg,
.unmap_sg = dma_fixed_unmap_sg,
- .dma_supported = dma_fixed_dma_supported,
- .set_dma_mask = dma_set_mask_and_switch,
+ .dma_supported = dma_suported_and_switch,
.map_page = dma_fixed_map_page,
.unmap_page = dma_fixed_unmap_page,
+ .mapping_error = dma_iommu_mapping_error,
};
-static void cell_dma_dev_setup_fixed(struct device *dev);
-
static void cell_dma_dev_setup(struct device *dev)
{
- /* Order is important here, these are not mutually exclusive */
- if (get_dma_ops(dev) == &dma_iommu_fixed_ops)
- cell_dma_dev_setup_fixed(dev);
- else if (get_pci_dma_ops() == &dma_iommu_ops)
+ if (get_pci_dma_ops() == &dma_iommu_ops)
set_iommu_table_base(dev, cell_get_iommu_table(dev));
else if (get_pci_dma_ops() == &dma_direct_ops)
set_dma_offset(dev, cell_dma_direct_offset);
@@ -956,38 +946,29 @@ out:
return dev_addr;
}
-static int dma_set_mask_and_switch(struct device *dev, u64 dma_mask)
+static int dma_suported_and_switch(struct device *dev, u64 dma_mask)
{
- if (!dev->dma_mask || !dma_supported(dev, dma_mask))
- return -EIO;
-
if (dma_mask == DMA_BIT_MASK(64) &&
- cell_iommu_get_fixed_address(dev) != OF_BAD_ADDR)
- {
+ cell_iommu_get_fixed_address(dev) != OF_BAD_ADDR) {
+ u64 addr = cell_iommu_get_fixed_address(dev) +
+ dma_iommu_fixed_base;
dev_dbg(dev, "iommu: 64-bit OK, using fixed ops\n");
+ dev_dbg(dev, "iommu: fixed addr = %llx\n", addr);
set_dma_ops(dev, &dma_iommu_fixed_ops);
- } else {
+ set_dma_offset(dev, addr);
+ return 1;
+ }
+
+ if (dma_iommu_dma_supported(dev, dma_mask)) {
dev_dbg(dev, "iommu: not 64-bit, using default ops\n");
set_dma_ops(dev, get_pci_dma_ops());
+ cell_dma_dev_setup(dev);
+ return 1;
}
- cell_dma_dev_setup(dev);
-
- *dev->dma_mask = dma_mask;
-
return 0;
}
-static void cell_dma_dev_setup_fixed(struct device *dev)
-{
- u64 addr;
-
- addr = cell_iommu_get_fixed_address(dev) + dma_iommu_fixed_base;
- set_dma_offset(dev, addr);
-
- dev_dbg(dev, "iommu: fixed addr = %llx\n", addr);
-}
-
static void insert_16M_pte(unsigned long addr, unsigned long *ptab,
unsigned long base_pte)
{
@@ -1139,7 +1120,7 @@ static int __init cell_iommu_fixed_mapping_init(void)
cell_iommu_setup_window(iommu, np, dbase, dsize, 0);
}
- dma_iommu_ops.set_dma_mask = dma_set_mask_and_switch;
+ dma_iommu_ops.dma_supported = dma_suported_and_switch;
set_pci_dma_ops(&dma_iommu_ops);
return 0;
diff --git a/arch/powerpc/platforms/cell/smp.c b/arch/powerpc/platforms/cell/smp.c
index 895560f4be69..f84d52a2db40 100644
--- a/arch/powerpc/platforms/cell/smp.c
+++ b/arch/powerpc/platforms/cell/smp.c
@@ -115,7 +115,8 @@ static void smp_cell_setup_cpu(int cpu)
static int smp_cell_kick_cpu(int nr)
{
- BUG_ON(nr < 0 || nr >= NR_CPUS);
+ if (nr < 0 || nr >= nr_cpu_ids)
+ return -EINVAL;
if (!smp_startup_cpu(nr))
return -ENOENT;
diff --git a/arch/powerpc/platforms/cell/spufs/coredump.c b/arch/powerpc/platforms/cell/spufs/coredump.c
index e5a891ae80ee..84b7ac926ce6 100644
--- a/arch/powerpc/platforms/cell/spufs/coredump.c
+++ b/arch/powerpc/platforms/cell/spufs/coredump.c
@@ -175,6 +175,8 @@ static int spufs_arch_write_note(struct spu_context *ctx, int i,
skip = roundup(cprm->pos - total + sz, 4) - cprm->pos;
if (!dump_skip(cprm, skip))
goto Eio;
+
+ rc = 0;
out:
free_page((unsigned long)buf);
return rc;
diff --git a/arch/powerpc/platforms/powernv/eeh-powernv.c b/arch/powerpc/platforms/powernv/eeh-powernv.c
index d12ea7b9fd47..3f48f6df1cf3 100644
--- a/arch/powerpc/platforms/powernv/eeh-powernv.c
+++ b/arch/powerpc/platforms/powernv/eeh-powernv.c
@@ -48,6 +48,7 @@ static int pnv_eeh_init(void)
{
struct pci_controller *hose;
struct pnv_phb *phb;
+ int max_diag_size = PNV_PCI_DIAG_BUF_SIZE;
if (!firmware_has_feature(FW_FEATURE_OPAL)) {
pr_warn("%s: OPAL is required !\n",
@@ -69,6 +70,9 @@ static int pnv_eeh_init(void)
if (phb->model == PNV_PHB_MODEL_P7IOC)
eeh_add_flag(EEH_ENABLE_IO_FOR_LOG);
+ if (phb->diag_data_size > max_diag_size)
+ max_diag_size = phb->diag_data_size;
+
/*
* PE#0 should be regarded as valid by EEH core
* if it's not the reserved one. Currently, we
@@ -82,6 +86,8 @@ static int pnv_eeh_init(void)
break;
}
+ eeh_set_pe_aux_size(max_diag_size);
+
return 0;
}
@@ -540,7 +546,7 @@ static void pnv_eeh_get_phb_diag(struct eeh_pe *pe)
s64 rc;
rc = opal_pci_get_phb_diag_data2(phb->opal_id, pe->data,
- PNV_PCI_DIAG_BUF_SIZE);
+ phb->diag_data_size);
if (rc != OPAL_SUCCESS)
pr_warn("%s: Failure %lld getting PHB#%x diag-data\n",
__func__, rc, pe->phb->global_number);
@@ -1314,7 +1320,8 @@ static void pnv_eeh_dump_hub_diag_common(struct OpalIoP7IOCErrorData *data)
static void pnv_eeh_get_and_dump_hub_diag(struct pci_controller *hose)
{
struct pnv_phb *phb = hose->private_data;
- struct OpalIoP7IOCErrorData *data = &phb->diag.hub_diag;
+ struct OpalIoP7IOCErrorData *data =
+ (struct OpalIoP7IOCErrorData*)phb->diag_data;
long rc;
rc = opal_pci_get_hub_diag_data(phb->hub_id, data, sizeof(*data));
@@ -1549,10 +1556,10 @@ static int pnv_eeh_next_error(struct eeh_pe **pe)
/* Dump PHB diag-data */
rc = opal_pci_get_phb_diag_data2(phb->opal_id,
- phb->diag.blob, PNV_PCI_DIAG_BUF_SIZE);
+ phb->diag_data, phb->diag_data_size);
if (rc == OPAL_SUCCESS)
pnv_pci_dump_phb_diag_data(hose,
- phb->diag.blob);
+ phb->diag_data);
/* Try best to clear it */
opal_pci_eeh_freeze_clear(phb->opal_id,
@@ -1795,7 +1802,6 @@ static int __init eeh_powernv_init(void)
{
int ret = -EINVAL;
- eeh_set_pe_aux_size(PNV_PCI_DIAG_BUF_SIZE);
ret = eeh_ops_register(&pnv_eeh_ops);
if (!ret)
pr_info("EEH: PowerNV platform initialized\n");
diff --git a/arch/powerpc/platforms/powernv/idle.c b/arch/powerpc/platforms/powernv/idle.c
index 445f30a2c5ef..2abee070373f 100644
--- a/arch/powerpc/platforms/powernv/idle.c
+++ b/arch/powerpc/platforms/powernv/idle.c
@@ -23,6 +23,7 @@
#include <asm/cpuidle.h>
#include <asm/code-patching.h>
#include <asm/smp.h>
+#include <asm/runlatch.h>
#include "powernv.h"
#include "subcore.h"
@@ -30,8 +31,33 @@
/* Power ISA 3.0 allows for stop states 0x0 - 0xF */
#define MAX_STOP_STATE 0xF
+#define P9_STOP_SPR_MSR 2000
+#define P9_STOP_SPR_PSSCR 855
+
static u32 supported_cpuidle_states;
+/*
+ * The default stop state that will be used by ppc_md.power_save
+ * function on platforms that support stop instruction.
+ */
+static u64 pnv_default_stop_val;
+static u64 pnv_default_stop_mask;
+static bool default_stop_found;
+
+/*
+ * First deep stop state. Used to figure out when to save/restore
+ * hypervisor context.
+ */
+u64 pnv_first_deep_stop_state = MAX_STOP_STATE;
+
+/*
+ * psscr value and mask of the deepest stop idle state.
+ * Used when a cpu is offlined.
+ */
+static u64 pnv_deepest_stop_psscr_val;
+static u64 pnv_deepest_stop_psscr_mask;
+static bool deepest_stop_found;
+
static int pnv_save_sprs_for_deep_states(void)
{
int cpu;
@@ -48,6 +74,8 @@ static int pnv_save_sprs_for_deep_states(void)
uint64_t hid4_val = mfspr(SPRN_HID4);
uint64_t hid5_val = mfspr(SPRN_HID5);
uint64_t hmeer_val = mfspr(SPRN_HMEER);
+ uint64_t msr_val = MSR_IDLE;
+ uint64_t psscr_val = pnv_deepest_stop_psscr_val;
for_each_possible_cpu(cpu) {
uint64_t pir = get_hard_smp_processor_id(cpu);
@@ -61,6 +89,18 @@ static int pnv_save_sprs_for_deep_states(void)
if (rc != 0)
return rc;
+ if (cpu_has_feature(CPU_FTR_ARCH_300)) {
+ rc = opal_slw_set_reg(pir, P9_STOP_SPR_MSR, msr_val);
+ if (rc)
+ return rc;
+
+ rc = opal_slw_set_reg(pir,
+ P9_STOP_SPR_PSSCR, psscr_val);
+
+ if (rc)
+ return rc;
+ }
+
/* HIDs are per core registers */
if (cpu_thread_in_core(cpu) == 0) {
@@ -72,17 +112,21 @@ static int pnv_save_sprs_for_deep_states(void)
if (rc != 0)
return rc;
- rc = opal_slw_set_reg(pir, SPRN_HID1, hid1_val);
- if (rc != 0)
- return rc;
+ /* Only p8 needs to set extra HID regiters */
+ if (!cpu_has_feature(CPU_FTR_ARCH_300)) {
- rc = opal_slw_set_reg(pir, SPRN_HID4, hid4_val);
- if (rc != 0)
- return rc;
+ rc = opal_slw_set_reg(pir, SPRN_HID1, hid1_val);
+ if (rc != 0)
+ return rc;
- rc = opal_slw_set_reg(pir, SPRN_HID5, hid5_val);
- if (rc != 0)
- return rc;
+ rc = opal_slw_set_reg(pir, SPRN_HID4, hid4_val);
+ if (rc != 0)
+ return rc;
+
+ rc = opal_slw_set_reg(pir, SPRN_HID5, hid5_val);
+ if (rc != 0)
+ return rc;
+ }
}
}
@@ -96,15 +140,24 @@ static void pnv_alloc_idle_core_states(void)
u32 *core_idle_state;
/*
- * core_idle_state - First 8 bits track the idle state of each thread
- * of the core. The 8th bit is the lock bit. Initially all thread bits
- * are set. They are cleared when the thread enters deep idle state
- * like sleep and winkle. Initially the lock bit is cleared.
- * The lock bit has 2 purposes
- * a. While the first thread is restoring core state, it prevents
- * other threads in the core from switching to process context.
- * b. While the last thread in the core is saving the core state, it
- * prevents a different thread from waking up.
+ * core_idle_state - The lower 8 bits track the idle state of
+ * each thread of the core.
+ *
+ * The most significant bit is the lock bit.
+ *
+ * Initially all the bits corresponding to threads_per_core
+ * are set. They are cleared when the thread enters deep idle
+ * state like sleep and winkle/stop.
+ *
+ * Initially the lock bit is cleared. The lock bit has 2
+ * purposes:
+ * a. While the first thread in the core waking up from
+ * idle is restoring core state, it prevents other
+ * threads in the core from switching to process
+ * context.
+ * b. While the last thread in the core is saving the
+ * core state, it prevents a different thread from
+ * waking up.
*/
for (i = 0; i < nr_cores; i++) {
int first_cpu = i * threads_per_core;
@@ -112,7 +165,7 @@ static void pnv_alloc_idle_core_states(void)
size_t paca_ptr_array_size;
core_idle_state = kmalloc_node(sizeof(u32), GFP_KERNEL, node);
- *core_idle_state = PNV_CORE_IDLE_THREAD_BITS;
+ *core_idle_state = (1 << threads_per_core) - 1;
paca_ptr_array_size = (threads_per_core *
sizeof(struct paca_struct *));
@@ -231,56 +284,104 @@ static DEVICE_ATTR(fastsleep_workaround_applyonce, 0600,
show_fastsleep_workaround_applyonce,
store_fastsleep_workaround_applyonce);
-/*
- * The default stop state that will be used by ppc_md.power_save
- * function on platforms that support stop instruction.
- */
-static u64 pnv_default_stop_val;
-static u64 pnv_default_stop_mask;
-static bool default_stop_found;
+static unsigned long __power7_idle_type(unsigned long type)
+{
+ unsigned long srr1;
-/*
- * Used for ppc_md.power_save which needs a function with no parameters
- */
-static void power9_idle(void)
+ if (!prep_irq_for_idle_irqsoff())
+ return 0;
+
+ __ppc64_runlatch_off();
+ srr1 = power7_idle_insn(type);
+ __ppc64_runlatch_on();
+
+ fini_irq_for_idle_irqsoff();
+
+ return srr1;
+}
+
+void power7_idle_type(unsigned long type)
+{
+ unsigned long srr1;
+
+ srr1 = __power7_idle_type(type);
+ irq_set_pending_from_srr1(srr1);
+}
+
+void power7_idle(void)
{
- power9_idle_stop(pnv_default_stop_val, pnv_default_stop_mask);
+ if (!powersave_nap)
+ return;
+
+ power7_idle_type(PNV_THREAD_NAP);
}
-/*
- * First deep stop state. Used to figure out when to save/restore
- * hypervisor context.
- */
-u64 pnv_first_deep_stop_state = MAX_STOP_STATE;
+static unsigned long __power9_idle_type(unsigned long stop_psscr_val,
+ unsigned long stop_psscr_mask)
+{
+ unsigned long psscr;
+ unsigned long srr1;
+
+ if (!prep_irq_for_idle_irqsoff())
+ return 0;
+
+ psscr = mfspr(SPRN_PSSCR);
+ psscr = (psscr & ~stop_psscr_mask) | stop_psscr_val;
+
+ __ppc64_runlatch_off();
+ srr1 = power9_idle_stop(psscr);
+ __ppc64_runlatch_on();
+
+ fini_irq_for_idle_irqsoff();
+
+ return srr1;
+}
+
+void power9_idle_type(unsigned long stop_psscr_val,
+ unsigned long stop_psscr_mask)
+{
+ unsigned long srr1;
+
+ srr1 = __power9_idle_type(stop_psscr_val, stop_psscr_mask);
+ irq_set_pending_from_srr1(srr1);
+}
/*
- * psscr value and mask of the deepest stop idle state.
- * Used when a cpu is offlined.
+ * Used for ppc_md.power_save which needs a function with no parameters
*/
-static u64 pnv_deepest_stop_psscr_val;
-static u64 pnv_deepest_stop_psscr_mask;
-static bool deepest_stop_found;
+void power9_idle(void)
+{
+ power9_idle_type(pnv_default_stop_val, pnv_default_stop_mask);
+}
+#ifdef CONFIG_HOTPLUG_CPU
/*
* pnv_cpu_offline: A function that puts the CPU into the deepest
* available platform idle state on a CPU-Offline.
+ * interrupts hard disabled and no lazy irq pending.
*/
unsigned long pnv_cpu_offline(unsigned int cpu)
{
unsigned long srr1;
-
u32 idle_states = pnv_get_supported_cpuidle_states();
+ __ppc64_runlatch_off();
+
if (cpu_has_feature(CPU_FTR_ARCH_300) && deepest_stop_found) {
- srr1 = power9_idle_stop(pnv_deepest_stop_psscr_val,
- pnv_deepest_stop_psscr_mask);
+ unsigned long psscr;
+
+ psscr = mfspr(SPRN_PSSCR);
+ psscr = (psscr & ~pnv_deepest_stop_psscr_mask) |
+ pnv_deepest_stop_psscr_val;
+ srr1 = power9_idle_stop(psscr);
+
} else if (idle_states & OPAL_PM_WINKLE_ENABLED) {
- srr1 = power7_winkle();
+ srr1 = power7_idle_insn(PNV_THREAD_WINKLE);
} else if ((idle_states & OPAL_PM_SLEEP_ENABLED) ||
(idle_states & OPAL_PM_SLEEP_ENABLED_ER1)) {
- srr1 = power7_sleep();
+ srr1 = power7_idle_insn(PNV_THREAD_SLEEP);
} else if (idle_states & OPAL_PM_NAP_ENABLED) {
- srr1 = power7_nap(1);
+ srr1 = power7_idle_insn(PNV_THREAD_NAP);
} else {
/* This is the fallback method. We emulate snooze */
while (!generic_check_cpu_restart(cpu)) {
@@ -291,8 +392,11 @@ unsigned long pnv_cpu_offline(unsigned int cpu)
HMT_medium();
}
+ __ppc64_runlatch_on();
+
return srr1;
}
+#endif
/*
* Power ISA 3.0 idle initialization.
diff --git a/arch/powerpc/platforms/powernv/npu-dma.c b/arch/powerpc/platforms/powernv/npu-dma.c
index 78fa9395b8c5..b5d960d6db3d 100644
--- a/arch/powerpc/platforms/powernv/npu-dma.c
+++ b/arch/powerpc/platforms/powernv/npu-dma.c
@@ -75,7 +75,8 @@ struct pci_dev *pnv_pci_get_npu_dev(struct pci_dev *gpdev, int index)
if (WARN_ON(!gpdev))
return NULL;
- if (WARN_ON(!gpdev->dev.of_node))
+ /* Not all PCI devices have device-tree nodes */
+ if (!gpdev->dev.of_node)
return NULL;
/* Get assoicated PCI device */
@@ -448,7 +449,7 @@ static int mmio_launch_invalidate(struct npu *npu, unsigned long launch,
return mmio_atsd_reg;
}
-static int mmio_invalidate_pid(struct npu *npu, unsigned long pid)
+static int mmio_invalidate_pid(struct npu *npu, unsigned long pid, bool flush)
{
unsigned long launch;
@@ -464,12 +465,15 @@ static int mmio_invalidate_pid(struct npu *npu, unsigned long pid)
/* PID */
launch |= pid << PPC_BITLSHIFT(38);
+ /* No flush */
+ launch |= !flush << PPC_BITLSHIFT(39);
+
/* Invalidating the entire process doesn't use a va */
return mmio_launch_invalidate(npu, launch, 0);
}
static int mmio_invalidate_va(struct npu *npu, unsigned long va,
- unsigned long pid)
+ unsigned long pid, bool flush)
{
unsigned long launch;
@@ -485,26 +489,60 @@ static int mmio_invalidate_va(struct npu *npu, unsigned long va,
/* PID */
launch |= pid << PPC_BITLSHIFT(38);
+ /* No flush */
+ launch |= !flush << PPC_BITLSHIFT(39);
+
return mmio_launch_invalidate(npu, launch, va);
}
#define mn_to_npu_context(x) container_of(x, struct npu_context, mn)
+struct mmio_atsd_reg {
+ struct npu *npu;
+ int reg;
+};
+
+static void mmio_invalidate_wait(
+ struct mmio_atsd_reg mmio_atsd_reg[NV_MAX_NPUS], bool flush)
+{
+ struct npu *npu;
+ int i, reg;
+
+ /* Wait for all invalidations to complete */
+ for (i = 0; i <= max_npu2_index; i++) {
+ if (mmio_atsd_reg[i].reg < 0)
+ continue;
+
+ /* Wait for completion */
+ npu = mmio_atsd_reg[i].npu;
+ reg = mmio_atsd_reg[i].reg;
+ while (__raw_readq(npu->mmio_atsd_regs[reg] + XTS_ATSD_STAT))
+ cpu_relax();
+
+ put_mmio_atsd_reg(npu, reg);
+
+ /*
+ * The GPU requires two flush ATSDs to ensure all entries have
+ * been flushed. We use PID 0 as it will never be used for a
+ * process on the GPU.
+ */
+ if (flush)
+ mmio_invalidate_pid(npu, 0, true);
+ }
+}
+
/*
* Invalidate either a single address or an entire PID depending on
* the value of va.
*/
static void mmio_invalidate(struct npu_context *npu_context, int va,
- unsigned long address)
+ unsigned long address, bool flush)
{
- int i, j, reg;
+ int i, j;
struct npu *npu;
struct pnv_phb *nphb;
struct pci_dev *npdev;
- struct {
- struct npu *npu;
- int reg;
- } mmio_atsd_reg[NV_MAX_NPUS];
+ struct mmio_atsd_reg mmio_atsd_reg[NV_MAX_NPUS];
unsigned long pid = npu_context->mm->context.id;
/*
@@ -524,10 +562,11 @@ static void mmio_invalidate(struct npu_context *npu_context, int va,
if (va)
mmio_atsd_reg[i].reg =
- mmio_invalidate_va(npu, address, pid);
+ mmio_invalidate_va(npu, address, pid,
+ flush);
else
mmio_atsd_reg[i].reg =
- mmio_invalidate_pid(npu, pid);
+ mmio_invalidate_pid(npu, pid, flush);
/*
* The NPU hardware forwards the shootdown to all GPUs
@@ -543,18 +582,10 @@ static void mmio_invalidate(struct npu_context *npu_context, int va,
*/
flush_tlb_mm(npu_context->mm);
- /* Wait for all invalidations to complete */
- for (i = 0; i <= max_npu2_index; i++) {
- if (mmio_atsd_reg[i].reg < 0)
- continue;
-
- /* Wait for completion */
- npu = mmio_atsd_reg[i].npu;
- reg = mmio_atsd_reg[i].reg;
- while (__raw_readq(npu->mmio_atsd_regs[reg] + XTS_ATSD_STAT))
- cpu_relax();
- put_mmio_atsd_reg(npu, reg);
- }
+ mmio_invalidate_wait(mmio_atsd_reg, flush);
+ if (flush)
+ /* Wait for the flush to complete */
+ mmio_invalidate_wait(mmio_atsd_reg, false);
}
static void pnv_npu2_mn_release(struct mmu_notifier *mn,
@@ -570,7 +601,7 @@ static void pnv_npu2_mn_release(struct mmu_notifier *mn,
* There should be no more translation requests for this PID, but we
* need to ensure any entries for it are removed from the TLB.
*/
- mmio_invalidate(npu_context, 0, 0);
+ mmio_invalidate(npu_context, 0, 0, true);
}
static void pnv_npu2_mn_change_pte(struct mmu_notifier *mn,
@@ -580,7 +611,7 @@ static void pnv_npu2_mn_change_pte(struct mmu_notifier *mn,
{
struct npu_context *npu_context = mn_to_npu_context(mn);
- mmio_invalidate(npu_context, 1, address);
+ mmio_invalidate(npu_context, 1, address, true);
}
static void pnv_npu2_mn_invalidate_page(struct mmu_notifier *mn,
@@ -589,7 +620,7 @@ static void pnv_npu2_mn_invalidate_page(struct mmu_notifier *mn,
{
struct npu_context *npu_context = mn_to_npu_context(mn);
- mmio_invalidate(npu_context, 1, address);
+ mmio_invalidate(npu_context, 1, address, true);
}
static void pnv_npu2_mn_invalidate_range(struct mmu_notifier *mn,
@@ -599,8 +630,11 @@ static void pnv_npu2_mn_invalidate_range(struct mmu_notifier *mn,
struct npu_context *npu_context = mn_to_npu_context(mn);
unsigned long address;
- for (address = start; address <= end; address += PAGE_SIZE)
- mmio_invalidate(npu_context, 1, address);
+ for (address = start; address < end; address += PAGE_SIZE)
+ mmio_invalidate(npu_context, 1, address, false);
+
+ /* Do the flush only on the final addess == end */
+ mmio_invalidate(npu_context, 1, address, true);
}
static const struct mmu_notifier_ops nv_nmmu_notifier_ops = {
@@ -650,8 +684,11 @@ struct npu_context *pnv_npu2_init_context(struct pci_dev *gpdev,
/* No nvlink associated with this GPU device */
return ERR_PTR(-ENODEV);
- if (!mm) {
- /* kernel thread contexts are not supported */
+ if (!mm || mm->context.id == 0) {
+ /*
+ * Kernel thread contexts are not supported and context id 0 is
+ * reserved on the GPU.
+ */
return ERR_PTR(-EINVAL);
}
diff --git a/arch/powerpc/platforms/powernv/opal-wrappers.S b/arch/powerpc/platforms/powernv/opal-wrappers.S
index f620572f891f..4ca6c26a56d5 100644
--- a/arch/powerpc/platforms/powernv/opal-wrappers.S
+++ b/arch/powerpc/platforms/powernv/opal-wrappers.S
@@ -99,10 +99,10 @@ opal_return:
lwz r4,8(r1);
ld r5,PPC_LR_STKOFF(r1);
ld r6,PACASAVEDMSR(r13);
- mtspr SPRN_SRR0,r5;
- mtspr SPRN_SRR1,r6;
mtcr r4;
- rfid
+ mtspr SPRN_HSRR0,r5;
+ mtspr SPRN_HSRR1,r6;
+ hrfid
opal_real_call:
mfcr r11
diff --git a/arch/powerpc/platforms/powernv/pci-ioda.c b/arch/powerpc/platforms/powernv/pci-ioda.c
index 283caf1070c9..437613588df1 100644
--- a/arch/powerpc/platforms/powernv/pci-ioda.c
+++ b/arch/powerpc/platforms/powernv/pci-ioda.c
@@ -1718,6 +1718,100 @@ static void pnv_pci_ioda_dma_dev_setup(struct pnv_phb *phb, struct pci_dev *pdev
*/
}
+static bool pnv_pci_ioda_pe_single_vendor(struct pnv_ioda_pe *pe)
+{
+ unsigned short vendor = 0;
+ struct pci_dev *pdev;
+
+ if (pe->device_count == 1)
+ return true;
+
+ /* pe->pdev should be set if it's a single device, pe->pbus if not */
+ if (!pe->pbus)
+ return true;
+
+ list_for_each_entry(pdev, &pe->pbus->devices, bus_list) {
+ if (!vendor) {
+ vendor = pdev->vendor;
+ continue;
+ }
+
+ if (pdev->vendor != vendor)
+ return false;
+ }
+
+ return true;
+}
+
+/*
+ * Reconfigure TVE#0 to be usable as 64-bit DMA space.
+ *
+ * The first 4GB of virtual memory for a PE is reserved for 32-bit accesses.
+ * Devices can only access more than that if bit 59 of the PCI address is set
+ * by hardware, which indicates TVE#1 should be used instead of TVE#0.
+ * Many PCI devices are not capable of addressing that many bits, and as a
+ * result are limited to the 4GB of virtual memory made available to 32-bit
+ * devices in TVE#0.
+ *
+ * In order to work around this, reconfigure TVE#0 to be suitable for 64-bit
+ * devices by configuring the virtual memory past the first 4GB inaccessible
+ * by 64-bit DMAs. This should only be used by devices that want more than
+ * 4GB, and only on PEs that have no 32-bit devices.
+ *
+ * Currently this will only work on PHB3 (POWER8).
+ */
+static int pnv_pci_ioda_dma_64bit_bypass(struct pnv_ioda_pe *pe)
+{
+ u64 window_size, table_size, tce_count, addr;
+ struct page *table_pages;
+ u64 tce_order = 28; /* 256MB TCEs */
+ __be64 *tces;
+ s64 rc;
+
+ /*
+ * Window size needs to be a power of two, but needs to account for
+ * shifting memory by the 4GB offset required to skip 32bit space.
+ */
+ window_size = roundup_pow_of_two(memory_hotplug_max() + (1ULL << 32));
+ tce_count = window_size >> tce_order;
+ table_size = tce_count << 3;
+
+ if (table_size < PAGE_SIZE)
+ table_size = PAGE_SIZE;
+
+ table_pages = alloc_pages_node(pe->phb->hose->node, GFP_KERNEL,
+ get_order(table_size));
+ if (!table_pages)
+ goto err;
+
+ tces = page_address(table_pages);
+ if (!tces)
+ goto err;
+
+ memset(tces, 0, table_size);
+
+ for (addr = 0; addr < memory_hotplug_max(); addr += (1 << tce_order)) {
+ tces[(addr + (1ULL << 32)) >> tce_order] =
+ cpu_to_be64(addr | TCE_PCI_READ | TCE_PCI_WRITE);
+ }
+
+ rc = opal_pci_map_pe_dma_window(pe->phb->opal_id,
+ pe->pe_number,
+ /* reconfigure window 0 */
+ (pe->pe_number << 1) + 0,
+ 1,
+ __pa(tces),
+ table_size,
+ 1 << tce_order);
+ if (rc == OPAL_SUCCESS) {
+ pe_info(pe, "Using 64-bit DMA iommu bypass (through TVE#0)\n");
+ return 0;
+ }
+err:
+ pe_err(pe, "Error configuring 64-bit DMA bypass\n");
+ return -EIO;
+}
+
static int pnv_pci_ioda_dma_set_mask(struct pci_dev *pdev, u64 dma_mask)
{
struct pci_controller *hose = pci_bus_to_host(pdev->bus);
@@ -1726,6 +1820,7 @@ static int pnv_pci_ioda_dma_set_mask(struct pci_dev *pdev, u64 dma_mask)
struct pnv_ioda_pe *pe;
uint64_t top;
bool bypass = false;
+ s64 rc;
if (WARN_ON(!pdn || pdn->pe_number == IODA_INVALID_PE))
return -ENODEV;;
@@ -1740,8 +1835,27 @@ static int pnv_pci_ioda_dma_set_mask(struct pci_dev *pdev, u64 dma_mask)
dev_info(&pdev->dev, "Using 64-bit DMA iommu bypass\n");
set_dma_ops(&pdev->dev, &dma_direct_ops);
} else {
- dev_info(&pdev->dev, "Using 32-bit DMA via iommu\n");
- set_dma_ops(&pdev->dev, &dma_iommu_ops);
+ /*
+ * If the device can't set the TCE bypass bit but still wants
+ * to access 4GB or more, on PHB3 we can reconfigure TVE#0 to
+ * bypass the 32-bit region and be usable for 64-bit DMAs.
+ * The device needs to be able to address all of this space.
+ */
+ if (dma_mask >> 32 &&
+ dma_mask > (memory_hotplug_max() + (1ULL << 32)) &&
+ pnv_pci_ioda_pe_single_vendor(pe) &&
+ phb->model == PNV_PHB_MODEL_PHB3) {
+ /* Configure the bypass mode */
+ rc = pnv_pci_ioda_dma_64bit_bypass(pe);
+ if (rc)
+ return rc;
+ /* 4GB offset bypasses 32-bit space */
+ set_dma_offset(&pdev->dev, (1ULL << 32));
+ set_dma_ops(&pdev->dev, &dma_direct_ops);
+ } else {
+ dev_info(&pdev->dev, "Using 32-bit DMA via iommu\n");
+ set_dma_ops(&pdev->dev, &dma_iommu_ops);
+ }
}
*pdev->dev.dma_mask = dma_mask;
@@ -3123,13 +3237,13 @@ static int pnv_pci_diag_data_set(void *data, u64 val)
phb = hose->private_data;
/* Retrieve the diag data from firmware */
- ret = opal_pci_get_phb_diag_data2(phb->opal_id, phb->diag.blob,
- PNV_PCI_DIAG_BUF_SIZE);
+ ret = opal_pci_get_phb_diag_data2(phb->opal_id, phb->diag_data,
+ phb->diag_data_size);
if (ret != OPAL_SUCCESS)
return -EIO;
/* Print the diag data to the kernel log */
- pnv_pci_dump_phb_diag_data(phb->hose, phb->diag.blob);
+ pnv_pci_dump_phb_diag_data(phb->hose, phb->diag_data);
return 0;
}
@@ -3725,6 +3839,15 @@ static void __init pnv_pci_init_ioda_phb(struct device_node *np,
else
phb->model = PNV_PHB_MODEL_UNKNOWN;
+ /* Initialize diagnostic data buffer */
+ prop32 = of_get_property(np, "ibm,phb-diag-data-size", NULL);
+ if (prop32)
+ phb->diag_data_size = be32_to_cpup(prop32);
+ else
+ phb->diag_data_size = PNV_PCI_DIAG_BUF_SIZE;
+
+ phb->diag_data = memblock_virt_alloc(phb->diag_data_size, 0);
+
/* Parse 32-bit and IO ranges (if any) */
pci_process_bridge_OF_ranges(hose, np, !hose->global_number);
diff --git a/arch/powerpc/platforms/powernv/pci.c b/arch/powerpc/platforms/powernv/pci.c
index 935ccb249a8a..7905d179d036 100644
--- a/arch/powerpc/platforms/powernv/pci.c
+++ b/arch/powerpc/platforms/powernv/pci.c
@@ -227,11 +227,39 @@ void pnv_teardown_msi_irqs(struct pci_dev *pdev)
}
#endif /* CONFIG_PCI_MSI */
+/* Nicely print the contents of the PE State Tables (PEST). */
+static void pnv_pci_dump_pest(__be64 pestA[], __be64 pestB[], int pest_size)
+{
+ __be64 prevA = ULONG_MAX, prevB = ULONG_MAX;
+ bool dup = false;
+ int i;
+
+ for (i = 0; i < pest_size; i++) {
+ __be64 peA = be64_to_cpu(pestA[i]);
+ __be64 peB = be64_to_cpu(pestB[i]);
+
+ if (peA != prevA || peB != prevB) {
+ if (dup) {
+ pr_info("PE[..%03x] A/B: as above\n", i-1);
+ dup = false;
+ }
+ prevA = peA;
+ prevB = peB;
+ if (peA & PNV_IODA_STOPPED_STATE ||
+ peB & PNV_IODA_STOPPED_STATE)
+ pr_info("PE[%03x] A/B: %016llx %016llx\n",
+ i, peA, peB);
+ } else if (!dup && (peA & PNV_IODA_STOPPED_STATE ||
+ peB & PNV_IODA_STOPPED_STATE)) {
+ dup = true;
+ }
+ }
+}
+
static void pnv_pci_dump_p7ioc_diag_data(struct pci_controller *hose,
struct OpalIoPhbErrorCommon *common)
{
struct OpalIoP7IOCPhbErrorData *data;
- int i;
data = (struct OpalIoP7IOCPhbErrorData *)common;
pr_info("P7IOC PHB#%x Diag-data (Version: %d)\n",
@@ -308,22 +336,13 @@ static void pnv_pci_dump_p7ioc_diag_data(struct pci_controller *hose,
be64_to_cpu(data->dma1ErrorLog0),
be64_to_cpu(data->dma1ErrorLog1));
- for (i = 0; i < OPAL_P7IOC_NUM_PEST_REGS; i++) {
- if ((be64_to_cpu(data->pestA[i]) >> 63) == 0 &&
- (be64_to_cpu(data->pestB[i]) >> 63) == 0)
- continue;
-
- pr_info("PE[%3d] A/B: %016llx %016llx\n",
- i, be64_to_cpu(data->pestA[i]),
- be64_to_cpu(data->pestB[i]));
- }
+ pnv_pci_dump_pest(data->pestA, data->pestB, OPAL_P7IOC_NUM_PEST_REGS);
}
static void pnv_pci_dump_phb3_diag_data(struct pci_controller *hose,
struct OpalIoPhbErrorCommon *common)
{
struct OpalIoPhb3ErrorData *data;
- int i;
data = (struct OpalIoPhb3ErrorData*)common;
pr_info("PHB3 PHB#%x Diag-data (Version: %d)\n",
@@ -404,15 +423,109 @@ static void pnv_pci_dump_phb3_diag_data(struct pci_controller *hose,
be64_to_cpu(data->dma1ErrorLog0),
be64_to_cpu(data->dma1ErrorLog1));
- for (i = 0; i < OPAL_PHB3_NUM_PEST_REGS; i++) {
- if ((be64_to_cpu(data->pestA[i]) >> 63) == 0 &&
- (be64_to_cpu(data->pestB[i]) >> 63) == 0)
- continue;
+ pnv_pci_dump_pest(data->pestA, data->pestB, OPAL_PHB3_NUM_PEST_REGS);
+}
- pr_info("PE[%3d] A/B: %016llx %016llx\n",
- i, be64_to_cpu(data->pestA[i]),
- be64_to_cpu(data->pestB[i]));
- }
+static void pnv_pci_dump_phb4_diag_data(struct pci_controller *hose,
+ struct OpalIoPhbErrorCommon *common)
+{
+ struct OpalIoPhb4ErrorData *data;
+
+ data = (struct OpalIoPhb4ErrorData*)common;
+ pr_info("PHB4 PHB#%d Diag-data (Version: %d)\n",
+ hose->global_number, be32_to_cpu(common->version));
+ if (data->brdgCtl)
+ pr_info("brdgCtl: %08x\n",
+ be32_to_cpu(data->brdgCtl));
+ if (data->deviceStatus || data->slotStatus ||
+ data->linkStatus || data->devCmdStatus ||
+ data->devSecStatus)
+ pr_info("RootSts: %08x %08x %08x %08x %08x\n",
+ be32_to_cpu(data->deviceStatus),
+ be32_to_cpu(data->slotStatus),
+ be32_to_cpu(data->linkStatus),
+ be32_to_cpu(data->devCmdStatus),
+ be32_to_cpu(data->devSecStatus));
+ if (data->rootErrorStatus || data->uncorrErrorStatus ||
+ data->corrErrorStatus)
+ pr_info("RootErrSts: %08x %08x %08x\n",
+ be32_to_cpu(data->rootErrorStatus),
+ be32_to_cpu(data->uncorrErrorStatus),
+ be32_to_cpu(data->corrErrorStatus));
+ if (data->tlpHdr1 || data->tlpHdr2 ||
+ data->tlpHdr3 || data->tlpHdr4)
+ pr_info("RootErrLog: %08x %08x %08x %08x\n",
+ be32_to_cpu(data->tlpHdr1),
+ be32_to_cpu(data->tlpHdr2),
+ be32_to_cpu(data->tlpHdr3),
+ be32_to_cpu(data->tlpHdr4));
+ if (data->sourceId)
+ pr_info("sourceId: %08x\n", be32_to_cpu(data->sourceId));
+ if (data->nFir)
+ pr_info("nFir: %016llx %016llx %016llx\n",
+ be64_to_cpu(data->nFir),
+ be64_to_cpu(data->nFirMask),
+ be64_to_cpu(data->nFirWOF));
+ if (data->phbPlssr || data->phbCsr)
+ pr_info("PhbSts: %016llx %016llx\n",
+ be64_to_cpu(data->phbPlssr),
+ be64_to_cpu(data->phbCsr));
+ if (data->lemFir)
+ pr_info("Lem: %016llx %016llx %016llx\n",
+ be64_to_cpu(data->lemFir),
+ be64_to_cpu(data->lemErrorMask),
+ be64_to_cpu(data->lemWOF));
+ if (data->phbErrorStatus)
+ pr_info("PhbErr: %016llx %016llx %016llx %016llx\n",
+ be64_to_cpu(data->phbErrorStatus),
+ be64_to_cpu(data->phbFirstErrorStatus),
+ be64_to_cpu(data->phbErrorLog0),
+ be64_to_cpu(data->phbErrorLog1));
+ if (data->phbTxeErrorStatus)
+ pr_info("PhbTxeErr: %016llx %016llx %016llx %016llx\n",
+ be64_to_cpu(data->phbTxeErrorStatus),
+ be64_to_cpu(data->phbTxeFirstErrorStatus),
+ be64_to_cpu(data->phbTxeErrorLog0),
+ be64_to_cpu(data->phbTxeErrorLog1));
+ if (data->phbRxeArbErrorStatus)
+ pr_info("RxeArbErr: %016llx %016llx %016llx %016llx\n",
+ be64_to_cpu(data->phbRxeArbErrorStatus),
+ be64_to_cpu(data->phbRxeArbFirstErrorStatus),
+ be64_to_cpu(data->phbRxeArbErrorLog0),
+ be64_to_cpu(data->phbRxeArbErrorLog1));
+ if (data->phbRxeMrgErrorStatus)
+ pr_info("RxeMrgErr: %016llx %016llx %016llx %016llx\n",
+ be64_to_cpu(data->phbRxeMrgErrorStatus),
+ be64_to_cpu(data->phbRxeMrgFirstErrorStatus),
+ be64_to_cpu(data->phbRxeMrgErrorLog0),
+ be64_to_cpu(data->phbRxeMrgErrorLog1));
+ if (data->phbRxeTceErrorStatus)
+ pr_info("RxeTceErr: %016llx %016llx %016llx %016llx\n",
+ be64_to_cpu(data->phbRxeTceErrorStatus),
+ be64_to_cpu(data->phbRxeTceFirstErrorStatus),
+ be64_to_cpu(data->phbRxeTceErrorLog0),
+ be64_to_cpu(data->phbRxeTceErrorLog1));
+
+ if (data->phbPblErrorStatus)
+ pr_info("PblErr: %016llx %016llx %016llx %016llx\n",
+ be64_to_cpu(data->phbPblErrorStatus),
+ be64_to_cpu(data->phbPblFirstErrorStatus),
+ be64_to_cpu(data->phbPblErrorLog0),
+ be64_to_cpu(data->phbPblErrorLog1));
+ if (data->phbPcieDlpErrorStatus)
+ pr_info("PcieDlp: %016llx %016llx %016llx\n",
+ be64_to_cpu(data->phbPcieDlpErrorLog1),
+ be64_to_cpu(data->phbPcieDlpErrorLog2),
+ be64_to_cpu(data->phbPcieDlpErrorStatus));
+ if (data->phbRegbErrorStatus)
+ pr_info("RegbErr: %016llx %016llx %016llx %016llx\n",
+ be64_to_cpu(data->phbRegbErrorStatus),
+ be64_to_cpu(data->phbRegbFirstErrorStatus),
+ be64_to_cpu(data->phbRegbErrorLog0),
+ be64_to_cpu(data->phbRegbErrorLog1));
+
+
+ pnv_pci_dump_pest(data->pestA, data->pestB, OPAL_PHB4_NUM_PEST_REGS);
}
void pnv_pci_dump_phb_diag_data(struct pci_controller *hose,
@@ -431,6 +544,9 @@ void pnv_pci_dump_phb_diag_data(struct pci_controller *hose,
case OPAL_PHB_ERROR_DATA_TYPE_PHB3:
pnv_pci_dump_phb3_diag_data(hose, common);
break;
+ case OPAL_PHB_ERROR_DATA_TYPE_PHB4:
+ pnv_pci_dump_phb4_diag_data(hose, common);
+ break;
default:
pr_warn("%s: Unrecognized ioType %d\n",
__func__, be32_to_cpu(common->ioType));
@@ -445,8 +561,8 @@ static void pnv_pci_handle_eeh_config(struct pnv_phb *phb, u32 pe_no)
spin_lock_irqsave(&phb->lock, flags);
/* Fetch PHB diag-data */
- rc = opal_pci_get_phb_diag_data2(phb->opal_id, phb->diag.blob,
- PNV_PCI_DIAG_BUF_SIZE);
+ rc = opal_pci_get_phb_diag_data2(phb->opal_id, phb->diag_data,
+ phb->diag_data_size);
has_diag = (rc == OPAL_SUCCESS);
/* If PHB supports compound PE, to handle it */
@@ -474,7 +590,7 @@ static void pnv_pci_handle_eeh_config(struct pnv_phb *phb, u32 pe_no)
* with the normal errors generated when probing empty slots
*/
if (has_diag && ret)
- pnv_pci_dump_phb_diag_data(phb->hose, phb->diag.blob);
+ pnv_pci_dump_phb_diag_data(phb->hose, phb->diag_data);
spin_unlock_irqrestore(&phb->lock, flags);
}
diff --git a/arch/powerpc/platforms/powernv/pci.h b/arch/powerpc/platforms/powernv/pci.h
index 18c8a2fa03b8..f16bc403ec03 100644
--- a/arch/powerpc/platforms/powernv/pci.h
+++ b/arch/powerpc/platforms/powernv/pci.h
@@ -33,6 +33,9 @@ enum pnv_phb_model {
#define PNV_IODA_PE_SLAVE (1 << 4) /* Slave PE in compound case */
#define PNV_IODA_PE_VF (1 << 5) /* PE for one VF */
+/* Indicates operations are frozen for a PE: MMIO in PESTA & DMA in PESTB. */
+#define PNV_IODA_STOPPED_STATE 0x8000000000000000
+
/* Data associated with a PE, including IOMMU tracking etc.. */
struct pnv_phb;
struct pnv_ioda_pe {
@@ -169,13 +172,9 @@ struct pnv_phb {
unsigned int pe_rmap[0x10000];
} ioda;
- /* PHB and hub status structure */
- union {
- unsigned char blob[PNV_PCI_DIAG_BUF_SIZE];
- struct OpalIoP7IOCPhbErrorData p7ioc;
- struct OpalIoPhb3ErrorData phb3;
- struct OpalIoP7IOCErrorData hub_diag;
- } diag;
+ /* PHB and hub diagnostics */
+ unsigned int diag_data_size;
+ u8 *diag_data;
/* Nvlink2 data */
struct npu {
diff --git a/arch/powerpc/platforms/powernv/smp.c b/arch/powerpc/platforms/powernv/smp.c
index 4aff754b6f2c..40dae96f7e20 100644
--- a/arch/powerpc/platforms/powernv/smp.c
+++ b/arch/powerpc/platforms/powernv/smp.c
@@ -63,7 +63,8 @@ static int pnv_smp_kick_cpu(int nr)
long rc;
uint8_t status;
- BUG_ON(nr < 0 || nr >= NR_CPUS);
+ if (nr < 0 || nr >= nr_cpu_ids)
+ return -EINVAL;
/*
* If we already started or OPAL is not supported, we just
@@ -144,7 +145,14 @@ static void pnv_smp_cpu_kill_self(void)
unsigned long srr1, wmask;
/* Standard hot unplug procedure */
- local_irq_disable();
+ /*
+ * This hard disables local interurpts, ensuring we have no lazy
+ * irqs pending.
+ */
+ WARN_ON(irqs_disabled());
+ hard_irq_disable();
+ WARN_ON(lazy_irq_pending());
+
idle_task_exit();
current->active_mm = NULL; /* for sanity */
cpu = smp_processor_id();
@@ -162,16 +170,6 @@ static void pnv_smp_cpu_kill_self(void)
*/
mtspr(SPRN_LPCR, mfspr(SPRN_LPCR) & ~(u64)LPCR_PECE1);
- /*
- * Hard-disable interrupts, and then clear irq_happened flags
- * that we can safely ignore while off-line, since they
- * are for things for which we do no processing when off-line
- * (or in the case of HMI, all the processing we need to do
- * is done in lower-level real-mode code).
- */
- hard_irq_disable();
- local_paca->irq_happened &= ~(PACA_IRQ_DEC | PACA_IRQ_HMI);
-
while (!generic_check_cpu_restart(cpu)) {
/*
* Clear IPI flag, since we don't handle IPIs while
@@ -182,9 +180,9 @@ static void pnv_smp_cpu_kill_self(void)
*/
kvmppc_set_host_ipi(cpu, 0);
- ppc64_runlatch_off();
srr1 = pnv_cpu_offline(cpu);
- ppc64_runlatch_on();
+
+ WARN_ON(lazy_irq_pending());
/*
* If the SRR1 value indicates that we woke up due to
@@ -198,8 +196,7 @@ static void pnv_smp_cpu_kill_self(void)
* contains 0.
*/
if (((srr1 & wmask) == SRR1_WAKEEE) ||
- ((srr1 & wmask) == SRR1_WAKEHVI) ||
- (local_paca->irq_happened & PACA_IRQ_EE)) {
+ ((srr1 & wmask) == SRR1_WAKEHVI)) {
if (cpu_has_feature(CPU_FTR_ARCH_300)) {
if (xive_enabled())
xive_flush_interrupt();
@@ -211,14 +208,15 @@ static void pnv_smp_cpu_kill_self(void)
unsigned long msg = PPC_DBELL_TYPE(PPC_DBELL_SERVER);
asm volatile(PPC_MSGCLR(%0) : : "r" (msg));
}
- local_paca->irq_happened &= ~(PACA_IRQ_EE | PACA_IRQ_DBELL);
smp_mb();
if (cpu_core_split_required())
continue;
if (srr1 && !generic_check_cpu_restart(cpu))
- DBG("CPU%d Unexpected exit while offline !\n", cpu);
+ DBG("CPU%d Unexpected exit while offline srr1=%lx!\n",
+ cpu, srr1);
+
}
/* Re-enable decrementer interrupts */
diff --git a/arch/powerpc/platforms/powernv/subcore.c b/arch/powerpc/platforms/powernv/subcore.c
index 0babef11136f..596ae2e98040 100644
--- a/arch/powerpc/platforms/powernv/subcore.c
+++ b/arch/powerpc/platforms/powernv/subcore.c
@@ -18,6 +18,7 @@
#include <linux/stop_machine.h>
#include <asm/cputhreads.h>
+#include <asm/cpuidle.h>
#include <asm/kvm_ppc.h>
#include <asm/machdep.h>
#include <asm/opal.h>
@@ -182,7 +183,7 @@ static void unsplit_core(void)
cpu = smp_processor_id();
if (cpu_thread_in_core(cpu) != 0) {
while (mfspr(SPRN_HID0) & mask)
- power7_nap(0);
+ power7_idle_insn(PNV_THREAD_NAP);
per_cpu(split_state, cpu).step = SYNC_STEP_UNSPLIT;
return;
@@ -348,7 +349,7 @@ static int set_subcores_per_core(int new_mode)
state->master = 0;
}
- get_online_cpus();
+ cpus_read_lock();
/* This cpu will update the globals before exiting stop machine */
this_cpu_ptr(&split_state)->master = 1;
@@ -356,9 +357,10 @@ static int set_subcores_per_core(int new_mode)
/* Ensure state is consistent before we call the other cpus */
mb();
- stop_machine(cpu_update_split_mode, &new_mode, cpu_online_mask);
+ stop_machine_cpuslocked(cpu_update_split_mode, &new_mode,
+ cpu_online_mask);
- put_online_cpus();
+ cpus_read_unlock();
return 0;
}
@@ -407,7 +409,13 @@ static DEVICE_ATTR(subcores_per_core, 0644,
static int subcore_init(void)
{
- if (!cpu_has_feature(CPU_FTR_SUBCORE))
+ unsigned pvr_ver;
+
+ pvr_ver = PVR_VER(mfspr(SPRN_PVR));
+
+ if (pvr_ver != PVR_POWER8 &&
+ pvr_ver != PVR_POWER8E &&
+ pvr_ver != PVR_POWER8NVL)
return 0;
/*
diff --git a/arch/powerpc/platforms/ps3/system-bus.c b/arch/powerpc/platforms/ps3/system-bus.c
index 2d2e5f80a3d3..5cc35d6b94b6 100644
--- a/arch/powerpc/platforms/ps3/system-bus.c
+++ b/arch/powerpc/platforms/ps3/system-bus.c
@@ -471,11 +471,13 @@ static ssize_t modalias_show(struct device *_dev, struct device_attribute *a,
return (len >= PAGE_SIZE) ? (PAGE_SIZE - 1) : len;
}
+static DEVICE_ATTR_RO(modalias);
-static struct device_attribute ps3_system_bus_dev_attrs[] = {
- __ATTR_RO(modalias),
- __ATTR_NULL,
+static struct attribute *ps3_system_bus_dev_attrs[] = {
+ &dev_attr_modalias.attr,
+ NULL,
};
+ATTRIBUTE_GROUPS(ps3_system_bus_dev);
struct bus_type ps3_system_bus_type = {
.name = "ps3_system_bus",
@@ -484,7 +486,7 @@ struct bus_type ps3_system_bus_type = {
.probe = ps3_system_bus_probe,
.remove = ps3_system_bus_remove,
.shutdown = ps3_system_bus_shutdown,
- .dev_attrs = ps3_system_bus_dev_attrs,
+ .dev_groups = ps3_system_bus_dev_groups,
};
static int __init ps3_system_bus_init(void)
diff --git a/arch/powerpc/platforms/pseries/Kconfig b/arch/powerpc/platforms/pseries/Kconfig
index 913c54e23eea..3a6dfd14f64b 100644
--- a/arch/powerpc/platforms/pseries/Kconfig
+++ b/arch/powerpc/platforms/pseries/Kconfig
@@ -124,7 +124,7 @@ config HV_PERF_CTRS
Enable access to hypervisor supplied counters in perf. Currently,
this enables code that uses the hcall GetPerfCounterInfo and 24x7
interfaces to retrieve counters. GPCI exists on Power 6 and later
- systems. 24x7 is available on Power 8 systems.
+ systems. 24x7 is available on Power 8 and later systems.
If unsure, select Y.
diff --git a/arch/powerpc/platforms/pseries/dlpar.c b/arch/powerpc/platforms/pseries/dlpar.c
index bda18d8e1674..39187696ee74 100644
--- a/arch/powerpc/platforms/pseries/dlpar.c
+++ b/arch/powerpc/platforms/pseries/dlpar.c
@@ -588,7 +588,7 @@ static ssize_t dlpar_show(struct class *class, struct class_attribute *attr,
return sprintf(buf, "%s\n", "memory,cpu");
}
-static CLASS_ATTR(dlpar, S_IWUSR | S_IRUSR, dlpar_show, dlpar_store);
+static CLASS_ATTR_RW(dlpar);
static int __init pseries_dlpar_init(void)
{
diff --git a/arch/powerpc/platforms/pseries/hotplug-cpu.c b/arch/powerpc/platforms/pseries/hotplug-cpu.c
index 7bc0e91f8715..6afd1efd3633 100644
--- a/arch/powerpc/platforms/pseries/hotplug-cpu.c
+++ b/arch/powerpc/platforms/pseries/hotplug-cpu.c
@@ -554,7 +554,7 @@ static ssize_t dlpar_cpu_remove(struct device_node *dn, u32 drc_index)
{
int rc;
- pr_debug("Attemping to remove CPU %s, drc index: %x\n",
+ pr_debug("Attempting to remove CPU %s, drc index: %x\n",
dn->name, drc_index);
rc = dlpar_offline_cpu(dn);
diff --git a/arch/powerpc/platforms/pseries/hotplug-memory.c b/arch/powerpc/platforms/pseries/hotplug-memory.c
index e104c71ea44a..ca9b2f4aaa22 100644
--- a/arch/powerpc/platforms/pseries/hotplug-memory.c
+++ b/arch/powerpc/platforms/pseries/hotplug-memory.c
@@ -22,6 +22,7 @@
#include <asm/machdep.h>
#include <asm/prom.h>
#include <asm/sparsemem.h>
+#include <asm/fadump.h>
#include "pseries.h"
static bool rtas_hp_event;
@@ -124,6 +125,7 @@ static struct property *dlpar_clone_drconf_property(struct device_node *dn)
for (i = 0; i < num_lmbs; i++) {
lmbs[i].base_addr = be64_to_cpu(lmbs[i].base_addr);
lmbs[i].drc_index = be32_to_cpu(lmbs[i].drc_index);
+ lmbs[i].aa_index = be32_to_cpu(lmbs[i].aa_index);
lmbs[i].flags = be32_to_cpu(lmbs[i].flags);
}
@@ -147,6 +149,7 @@ static void dlpar_update_drconf_property(struct device_node *dn,
for (i = 0; i < num_lmbs; i++) {
lmbs[i].base_addr = cpu_to_be64(lmbs[i].base_addr);
lmbs[i].drc_index = cpu_to_be32(lmbs[i].drc_index);
+ lmbs[i].aa_index = cpu_to_be32(lmbs[i].aa_index);
lmbs[i].flags = cpu_to_be32(lmbs[i].flags);
}
@@ -406,6 +409,12 @@ static bool lmb_is_removable(struct of_drconf_cell *lmb)
scns_per_block = block_sz / MIN_MEMORY_BLOCK_SIZE;
phys_addr = lmb->base_addr;
+#ifdef CONFIG_FA_DUMP
+ /* Don't hot-remove memory that falls in fadump boot memory area */
+ if (is_fadump_boot_memory_area(phys_addr, block_sz))
+ return false;
+#endif
+
for (i = 0; i < scns_per_block; i++) {
pfn = PFN_DOWN(phys_addr);
if (!pfn_present(pfn))
diff --git a/arch/powerpc/platforms/pseries/ibmebus.c b/arch/powerpc/platforms/pseries/ibmebus.c
index b363e439ddb9..52146b1356d2 100644
--- a/arch/powerpc/platforms/pseries/ibmebus.c
+++ b/arch/powerpc/platforms/pseries/ibmebus.c
@@ -397,6 +397,7 @@ static ssize_t devspec_show(struct device *dev,
ofdev = to_platform_device(dev);
return sprintf(buf, "%s\n", ofdev->dev.of_node->full_name);
}
+static DEVICE_ATTR_RO(devspec);
static ssize_t name_show(struct device *dev,
struct device_attribute *attr, char *buf)
@@ -406,19 +407,22 @@ static ssize_t name_show(struct device *dev,
ofdev = to_platform_device(dev);
return sprintf(buf, "%s\n", ofdev->dev.of_node->name);
}
+static DEVICE_ATTR_RO(name);
static ssize_t modalias_show(struct device *dev,
struct device_attribute *attr, char *buf)
{
return of_device_modalias(dev, buf, PAGE_SIZE);
}
+static DEVICE_ATTR_RO(modalias);
-static struct device_attribute ibmebus_bus_device_attrs[] = {
- __ATTR_RO(devspec),
- __ATTR_RO(name),
- __ATTR_RO(modalias),
- __ATTR_NULL
+static struct attribute *ibmebus_bus_device_attrs[] = {
+ &dev_attr_devspec.attr,
+ &dev_attr_name.attr,
+ &dev_attr_modalias.attr,
+ NULL,
};
+ATTRIBUTE_GROUPS(ibmebus_bus_device);
struct bus_type ibmebus_bus_type = {
.name = "ibmebus",
@@ -428,7 +432,7 @@ struct bus_type ibmebus_bus_type = {
.probe = ibmebus_bus_device_probe,
.remove = ibmebus_bus_device_remove,
.shutdown = ibmebus_bus_device_shutdown,
- .dev_attrs = ibmebus_bus_device_attrs,
+ .dev_groups = ibmebus_bus_device_groups,
};
EXPORT_SYMBOL(ibmebus_bus_type);
diff --git a/arch/powerpc/platforms/pseries/lpar.c b/arch/powerpc/platforms/pseries/lpar.c
index 6541d0b03e4c..495ba4e7336d 100644
--- a/arch/powerpc/platforms/pseries/lpar.c
+++ b/arch/powerpc/platforms/pseries/lpar.c
@@ -301,7 +301,7 @@ static long pSeries_lpar_hpte_updatepp(unsigned long slot,
int ssize, unsigned long inv_flags)
{
unsigned long lpar_rc;
- unsigned long flags = (newpp & 7) | H_AVPN;
+ unsigned long flags;
unsigned long want_v;
want_v = hpte_encode_avpn(vpn, psize, ssize);
@@ -309,6 +309,11 @@ static long pSeries_lpar_hpte_updatepp(unsigned long slot,
pr_devel(" update: avpnv=%016lx, hash=%016lx, f=%lx, psize: %d ...",
want_v, slot, flags, psize);
+ flags = (newpp & 7) | H_AVPN;
+ if (mmu_has_feature(MMU_FTR_KERNEL_RO))
+ /* Move pp0 into bit 8 (IBM 55) */
+ flags |= (newpp & HPTE_R_PP0) >> 55;
+
lpar_rc = plpar_pte_protect(flags, slot, want_v);
if (lpar_rc == H_NOT_FOUND) {
@@ -380,6 +385,10 @@ static void pSeries_lpar_hpte_updateboltedpp(unsigned long newpp,
BUG_ON(slot == -1);
flags = newpp & 7;
+ if (mmu_has_feature(MMU_FTR_KERNEL_RO))
+ /* Move pp0 into bit 8 (IBM 55) */
+ flags |= (newpp & HPTE_R_PP0) >> 55;
+
lpar_rc = plpar_pte_protect(flags, slot, 0);
BUG_ON(lpar_rc != H_SUCCESS);
diff --git a/arch/powerpc/platforms/pseries/mobility.c b/arch/powerpc/platforms/pseries/mobility.c
index 5a0c7ba429ce..2da4851eff99 100644
--- a/arch/powerpc/platforms/pseries/mobility.c
+++ b/arch/powerpc/platforms/pseries/mobility.c
@@ -349,8 +349,9 @@ void post_mobility_fixup(void)
return;
}
-static ssize_t migrate_store(struct class *class, struct class_attribute *attr,
- const char *buf, size_t count)
+static ssize_t migration_store(struct class *class,
+ struct class_attribute *attr, const char *buf,
+ size_t count)
{
u64 streamid;
int rc;
@@ -380,7 +381,7 @@ static ssize_t migrate_store(struct class *class, struct class_attribute *attr,
*/
#define MIGRATION_API_VERSION 1
-static CLASS_ATTR(migration, S_IWUSR, NULL, migrate_store);
+static CLASS_ATTR_WO(migration);
static CLASS_ATTR_STRING(api_version, S_IRUGO, __stringify(MIGRATION_API_VERSION));
static int __init mobility_sysfs_init(void)
diff --git a/arch/powerpc/platforms/pseries/smp.c b/arch/powerpc/platforms/pseries/smp.c
index 52ca6b311d44..24785f63fb40 100644
--- a/arch/powerpc/platforms/pseries/smp.c
+++ b/arch/powerpc/platforms/pseries/smp.c
@@ -151,7 +151,8 @@ static void smp_setup_cpu(int cpu)
static int smp_pSeries_kick_cpu(int nr)
{
- BUG_ON(nr < 0 || nr >= NR_CPUS);
+ if (nr < 0 || nr >= nr_cpu_ids)
+ return -EINVAL;
if (!smp_startup_cpu(nr))
return -ENOENT;
diff --git a/arch/powerpc/platforms/pseries/vio.c b/arch/powerpc/platforms/pseries/vio.c
index 28b09fd797ec..8a47f168476b 100644
--- a/arch/powerpc/platforms/pseries/vio.c
+++ b/arch/powerpc/platforms/pseries/vio.c
@@ -519,7 +519,7 @@ static dma_addr_t vio_dma_iommu_map_page(struct device *dev, struct page *page,
{
struct vio_dev *viodev = to_vio_dev(dev);
struct iommu_table *tbl;
- dma_addr_t ret = DMA_ERROR_CODE;
+ dma_addr_t ret = IOMMU_MAPPING_ERROR;
tbl = get_iommu_table_base(dev);
if (vio_cmo_alloc(viodev, roundup(size, IOMMU_PAGE_SIZE(tbl)))) {
@@ -625,6 +625,7 @@ static const struct dma_map_ops vio_dma_mapping_ops = {
.unmap_page = vio_dma_iommu_unmap_page,
.dma_supported = vio_dma_iommu_dma_supported,
.get_required_mask = vio_dma_get_required_mask,
+ .mapping_error = dma_iommu_mapping_error,
};
/**
@@ -948,21 +949,21 @@ static void vio_cmo_bus_init(void)
/* sysfs device functions and data structures for CMO */
#define viodev_cmo_rd_attr(name) \
-static ssize_t viodev_cmo_##name##_show(struct device *dev, \
+static ssize_t cmo_##name##_show(struct device *dev, \
struct device_attribute *attr, \
char *buf) \
{ \
return sprintf(buf, "%lu\n", to_vio_dev(dev)->cmo.name); \
}
-static ssize_t viodev_cmo_allocs_failed_show(struct device *dev,
+static ssize_t cmo_allocs_failed_show(struct device *dev,
struct device_attribute *attr, char *buf)
{
struct vio_dev *viodev = to_vio_dev(dev);
return sprintf(buf, "%d\n", atomic_read(&viodev->cmo.allocs_failed));
}
-static ssize_t viodev_cmo_allocs_failed_reset(struct device *dev,
+static ssize_t cmo_allocs_failed_store(struct device *dev,
struct device_attribute *attr, const char *buf, size_t count)
{
struct vio_dev *viodev = to_vio_dev(dev);
@@ -970,7 +971,7 @@ static ssize_t viodev_cmo_allocs_failed_reset(struct device *dev,
return count;
}
-static ssize_t viodev_cmo_desired_set(struct device *dev,
+static ssize_t cmo_desired_store(struct device *dev,
struct device_attribute *attr, const char *buf, size_t count)
{
struct vio_dev *viodev = to_vio_dev(dev);
@@ -993,27 +994,37 @@ static ssize_t name_show(struct device *, struct device_attribute *, char *);
static ssize_t devspec_show(struct device *, struct device_attribute *, char *);
static ssize_t modalias_show(struct device *dev, struct device_attribute *attr,
char *buf);
-static struct device_attribute vio_cmo_dev_attrs[] = {
- __ATTR_RO(name),
- __ATTR_RO(devspec),
- __ATTR_RO(modalias),
- __ATTR(cmo_desired, S_IWUSR|S_IRUSR|S_IWGRP|S_IRGRP|S_IROTH,
- viodev_cmo_desired_show, viodev_cmo_desired_set),
- __ATTR(cmo_entitled, S_IRUGO, viodev_cmo_entitled_show, NULL),
- __ATTR(cmo_allocated, S_IRUGO, viodev_cmo_allocated_show, NULL),
- __ATTR(cmo_allocs_failed, S_IWUSR|S_IRUSR|S_IWGRP|S_IRGRP|S_IROTH,
- viodev_cmo_allocs_failed_show, viodev_cmo_allocs_failed_reset),
- __ATTR_NULL
+
+static struct device_attribute dev_attr_name;
+static struct device_attribute dev_attr_devspec;
+static struct device_attribute dev_attr_modalias;
+
+static DEVICE_ATTR_RO(cmo_entitled);
+static DEVICE_ATTR_RO(cmo_allocated);
+static DEVICE_ATTR_RW(cmo_desired);
+static DEVICE_ATTR_RW(cmo_allocs_failed);
+
+static struct attribute *vio_cmo_dev_attrs[] = {
+ &dev_attr_name.attr,
+ &dev_attr_devspec.attr,
+ &dev_attr_modalias.attr,
+ &dev_attr_cmo_entitled.attr,
+ &dev_attr_cmo_allocated.attr,
+ &dev_attr_cmo_desired.attr,
+ &dev_attr_cmo_allocs_failed.attr,
+ NULL,
};
+ATTRIBUTE_GROUPS(vio_cmo_dev);
/* sysfs bus functions and data structures for CMO */
#define viobus_cmo_rd_attr(name) \
-static ssize_t cmo_##name##_show(struct bus_type *bt, char *buf) \
+static ssize_t cmo_bus_##name##_show(struct bus_type *bt, char *buf) \
{ \
return sprintf(buf, "%lu\n", vio_cmo.name); \
} \
-static BUS_ATTR_RO(cmo_##name)
+static struct bus_attribute bus_attr_cmo_bus_##name = \
+ __ATTR(cmo_##name, S_IRUGO, cmo_bus_##name##_show, NULL)
#define viobus_cmo_pool_rd_attr(name, var) \
static ssize_t \
@@ -1051,11 +1062,11 @@ static ssize_t cmo_high_store(struct bus_type *bt, const char *buf,
static BUS_ATTR_RW(cmo_high);
static struct attribute *vio_bus_attrs[] = {
- &bus_attr_cmo_entitled.attr,
- &bus_attr_cmo_spare.attr,
- &bus_attr_cmo_min.attr,
- &bus_attr_cmo_desired.attr,
- &bus_attr_cmo_curr.attr,
+ &bus_attr_cmo_bus_entitled.attr,
+ &bus_attr_cmo_bus_spare.attr,
+ &bus_attr_cmo_bus_min.attr,
+ &bus_attr_cmo_bus_desired.attr,
+ &bus_attr_cmo_bus_curr.attr,
&bus_attr_cmo_high.attr,
&bus_attr_cmo_reserve_size.attr,
&bus_attr_cmo_excess_size.attr,
@@ -1066,7 +1077,7 @@ ATTRIBUTE_GROUPS(vio_bus);
static void vio_cmo_sysfs_init(void)
{
- vio_bus_type.dev_attrs = vio_cmo_dev_attrs;
+ vio_bus_type.dev_groups = vio_cmo_dev_groups;
vio_bus_type.bus_groups = vio_bus_groups;
}
#else /* CONFIG_PPC_SMLPAR */
@@ -1537,6 +1548,7 @@ static ssize_t name_show(struct device *dev,
{
return sprintf(buf, "%s\n", to_vio_dev(dev)->name);
}
+static DEVICE_ATTR_RO(name);
static ssize_t devspec_show(struct device *dev,
struct device_attribute *attr, char *buf)
@@ -1545,6 +1557,7 @@ static ssize_t devspec_show(struct device *dev,
return sprintf(buf, "%s\n", of_node_full_name(of_node));
}
+static DEVICE_ATTR_RO(devspec);
static ssize_t modalias_show(struct device *dev, struct device_attribute *attr,
char *buf)
@@ -1566,13 +1579,15 @@ static ssize_t modalias_show(struct device *dev, struct device_attribute *attr,
return sprintf(buf, "vio:T%sS%s\n", vio_dev->type, cp);
}
+static DEVICE_ATTR_RO(modalias);
-static struct device_attribute vio_dev_attrs[] = {
- __ATTR_RO(name),
- __ATTR_RO(devspec),
- __ATTR_RO(modalias),
- __ATTR_NULL
+static struct attribute *vio_dev_attrs[] = {
+ &dev_attr_name.attr,
+ &dev_attr_devspec.attr,
+ &dev_attr_modalias.attr,
+ NULL,
};
+ATTRIBUTE_GROUPS(vio_dev);
void vio_unregister_device(struct vio_dev *viodev)
{
@@ -1608,7 +1623,7 @@ static int vio_hotplug(struct device *dev, struct kobj_uevent_env *env)
struct bus_type vio_bus_type = {
.name = "vio",
- .dev_attrs = vio_dev_attrs,
+ .dev_groups = vio_dev_groups,
.uevent = vio_hotplug,
.match = vio_bus_match,
.probe = vio_bus_probe,
diff --git a/arch/powerpc/sysdev/axonram.c b/arch/powerpc/sysdev/axonram.c
index a7fe5fee744f..2799706106c6 100644
--- a/arch/powerpc/sysdev/axonram.c
+++ b/arch/powerpc/sysdev/axonram.c
@@ -45,6 +45,7 @@
#include <linux/of_device.h>
#include <linux/of_platform.h>
#include <linux/pfn_t.h>
+#include <linux/uio.h>
#include <asm/page.h>
#include <asm/prom.h>
@@ -163,8 +164,15 @@ axon_ram_dax_direct_access(struct dax_device *dax_dev, pgoff_t pgoff, long nr_pa
return __axon_ram_direct_access(bank, pgoff, nr_pages, kaddr, pfn);
}
+static size_t axon_ram_copy_from_iter(struct dax_device *dax_dev, pgoff_t pgoff,
+ void *addr, size_t bytes, struct iov_iter *i)
+{
+ return copy_from_iter(addr, bytes, i);
+}
+
static const struct dax_operations axon_ram_dax_ops = {
.direct_access = axon_ram_dax_direct_access,
+ .copy_from_iter = axon_ram_copy_from_iter,
};
/**
diff --git a/arch/powerpc/sysdev/mpc8xx_pic.c b/arch/powerpc/sysdev/mpc8xx_pic.c
index 3e828b20c21e..2842f9d63d21 100644
--- a/arch/powerpc/sysdev/mpc8xx_pic.c
+++ b/arch/powerpc/sysdev/mpc8xx_pic.c
@@ -79,7 +79,7 @@ unsigned int mpc8xx_get_irq(void)
irq = in_be32(&siu_reg->sc_sivec) >> 26;
if (irq == PIC_VEC_SPURRIOUS)
- irq = 0;
+ return 0;
return irq_linear_revmap(mpc8xx_pic_host, irq);
diff --git a/arch/powerpc/sysdev/simple_gpio.c b/arch/powerpc/sysdev/simple_gpio.c
index ef470b470b04..6afddae2fb47 100644
--- a/arch/powerpc/sysdev/simple_gpio.c
+++ b/arch/powerpc/sysdev/simple_gpio.c
@@ -75,7 +75,8 @@ static int u8_gpio_dir_out(struct gpio_chip *gc, unsigned int gpio, int val)
static void u8_gpio_save_regs(struct of_mm_gpio_chip *mm_gc)
{
- struct u8_gpio_chip *u8_gc = gpiochip_get_data(&mm_gc->gc);
+ struct u8_gpio_chip *u8_gc =
+ container_of(mm_gc, struct u8_gpio_chip, mm_gc);
u8_gc->data = in_8(mm_gc->regs);
}
diff --git a/arch/powerpc/sysdev/xive/common.c b/arch/powerpc/sysdev/xive/common.c
index 913825086b8d..6595462b1fc8 100644
--- a/arch/powerpc/sysdev/xive/common.c
+++ b/arch/powerpc/sysdev/xive/common.c
@@ -297,7 +297,7 @@ void xive_do_source_eoi(u32 hw_irq, struct xive_irq_data *xd)
{
/* If the XIVE supports the new "store EOI facility, use it */
if (xd->flags & XIVE_IRQ_FLAG_STORE_EOI)
- out_be64(xd->eoi_mmio, 0);
+ out_be64(xd->eoi_mmio + XIVE_ESB_STORE_EOI, 0);
else if (hw_irq && xd->flags & XIVE_IRQ_FLAG_EOI_FW) {
/*
* The FW told us to call it. This happens for some
@@ -1417,7 +1417,7 @@ bool xive_core_init(const struct xive_ops *ops, void __iomem *area, u32 offset,
/* Get ready for interrupts */
xive_setup_cpu();
- pr_info("Interrupt handling intialized with %s backend\n",
+ pr_info("Interrupt handling initialized with %s backend\n",
xive_ops->name);
pr_info("Using priority %d for all interrupts\n", max_prio);
diff --git a/arch/powerpc/sysdev/xive/native.c b/arch/powerpc/sysdev/xive/native.c
index ab9ecce61ee5..0f95476b01f6 100644
--- a/arch/powerpc/sysdev/xive/native.c
+++ b/arch/powerpc/sysdev/xive/native.c
@@ -633,8 +633,8 @@ u32 xive_native_alloc_vp_block(u32 max_vcpus)
if (max_vcpus > (1 << order))
order++;
- pr_info("VP block alloc, for max VCPUs %d use order %d\n",
- max_vcpus, order);
+ pr_debug("VP block alloc, for max VCPUs %d use order %d\n",
+ max_vcpus, order);
for (;;) {
rc = opal_xive_alloc_vp_block(order);
diff --git a/arch/powerpc/tools/head_check.sh b/arch/powerpc/tools/head_check.sh
new file mode 100644
index 000000000000..ad9e57209aa4
--- /dev/null
+++ b/arch/powerpc/tools/head_check.sh
@@ -0,0 +1,78 @@
+# Copyright © 2016 IBM Corporation
+
+# This program is free software; you can redistribute it and/or
+# modify it under the terms of the GNU General Public License
+# as published by the Free Software Foundation; either version
+# 2 of the License, or (at your option) any later version.
+
+# This script checks the head of a vmlinux for linker stubs that
+# break our placement of fixed-location code for 64-bit.
+
+# based on relocs_check.pl
+# Copyright © 2009 IBM Corporation
+
+# NOTE!
+#
+# If the build dies here, it's likely code in head_64.S/exception-64*.S or
+# nearby, is branching to labels it can't reach directly, which results in the
+# linker inserting branch stubs. This can move code around in ways that break
+# the fixed section calculations (head-64.h). To debug this, disassemble the
+# vmlinux and look for branch stubs (long_branch, plt_branch, etc.) in the
+# fixed section region (0 - 0x8000ish). Check what code is calling those stubs,
+# and perhaps change so a direct branch can reach.
+#
+# A ".linker_stub_catch" section is used to catch some stubs generated by
+# early .text code, which tend to get placed at the start of the section.
+# If there are too many such stubs, they can overflow this section. Expanding
+# it may help (or reducing the number of stub branches).
+#
+# Linker stubs use the TOC pointer, so even if fixed section code could
+# tolerate them being inserted into head code, they can't be allowed in low
+# level entry code (boot, interrupt vectors, etc) until r2 is set up. This
+# could cause the kernel to die in early boot.
+
+# Turn this on if you want more debug output:
+# set -x
+
+if [ $# -lt 2 ]; then
+ echo "$0 [path to nm] [path to vmlinux]" 1>&2
+ exit 1
+fi
+
+# Have Kbuild supply the path to nm so we handle cross compilation.
+nm="$1"
+vmlinux="$2"
+
+# gcc-4.6-era toolchain make _stext an A (absolute) symbol rather than T
+$nm "$vmlinux" | grep -e " [TA] _stext$" -e " t start_first_256B$" -e " a text_start$" -e " t start_text$" -m4 > .tmp_symbols.txt
+
+
+vma=$(cat .tmp_symbols.txt | grep -e " [TA] _stext$" | cut -d' ' -f1)
+
+expected_start_head_addr=$vma
+
+start_head_addr=$(cat .tmp_symbols.txt | grep " t start_first_256B$" | cut -d' ' -f1)
+
+if [ "$start_head_addr" != "$expected_start_head_addr" ]; then
+ echo "ERROR: head code starts at $start_head_addr, should be $expected_start_head_addr"
+ echo "ERROR: try to enable LD_HEAD_STUB_CATCH config option"
+ echo "ERROR: see comments in arch/powerpc/tools/head_check.sh"
+
+ exit 1
+fi
+
+top_vma=$(echo $vma | cut -d'0' -f1)
+
+expected_start_text_addr=$(cat .tmp_symbols.txt | grep " a text_start$" | cut -d' ' -f1 | sed "s/^0/$top_vma/")
+
+start_text_addr=$(cat .tmp_symbols.txt | grep " t start_text$" | cut -d' ' -f1)
+
+if [ "$start_text_addr" != "$expected_start_text_addr" ]; then
+ echo "ERROR: start_text address is $start_text_addr, should be $expected_start_text_addr"
+ echo "ERROR: try to enable LD_HEAD_STUB_CATCH config option"
+ echo "ERROR: see comments in arch/powerpc/tools/head_check.sh"
+
+ exit 1
+fi
+
+rm -f .tmp_symbols.txt
diff --git a/arch/powerpc/tools/unrel_branch_check.sh b/arch/powerpc/tools/unrel_branch_check.sh
new file mode 100755
index 000000000000..1e972df3107e
--- /dev/null
+++ b/arch/powerpc/tools/unrel_branch_check.sh
@@ -0,0 +1,57 @@
+# Copyright © 2016 IBM Corporation
+#
+# This program is free software; you can redistribute it and/or
+# modify it under the terms of the GNU General Public License
+# as published by the Free Software Foundation; either version
+# 2 of the License, or (at your option) any later version.
+#
+# This script checks the relocations of a vmlinux for "suspicious"
+# branches from unrelocated code (head_64.S code).
+
+# Turn this on if you want more debug output:
+# set -x
+
+# Have Kbuild supply the path to objdump so we handle cross compilation.
+objdump="$1"
+vmlinux="$2"
+
+#__end_interrupts should be located within the first 64K
+
+end_intr=0x$(
+"$objdump" -R "$vmlinux" -d --start-address=0xc000000000000000 \
+ --stop-address=0xc000000000010000 |
+grep '\<__end_interrupts>:' |
+awk '{print $1}'
+)
+
+BRANCHES=$(
+"$objdump" -R "$vmlinux" -D --start-address=0xc000000000000000 \
+ --stop-address=${end_intr} |
+grep -e "^c[0-9a-f]*:[[:space:]]*\([0-9a-f][0-9a-f][[:space:]]\)\{4\}[[:space:]]*b" |
+grep -v '\<__start_initialization_multiplatform>' |
+grep -v -e 'b.\?.\?ctr' |
+grep -v -e 'b.\?.\?lr' |
+sed 's/://' |
+awk '{ print $1 ":" $6 ":0x" $7 ":" $8 " "}'
+)
+
+for tuple in $BRANCHES
+do
+ from=`echo $tuple | cut -d':' -f1`
+ branch=`echo $tuple | cut -d':' -f2`
+ to=`echo $tuple | cut -d':' -f3 | sed 's/cr[0-7],//'`
+ sym=`echo $tuple | cut -d':' -f4`
+
+ if (( $to > $end_intr ))
+ then
+ if [ -z "$bad_branches" ]; then
+ echo "WARNING: Unrelocated relative branches"
+ bad_branches="yes"
+ fi
+ echo "$from $branch-> $to $sym"
+ fi
+done
+
+if [ -z "$bad_branches" ]; then
+ exit 0
+fi
diff --git a/arch/powerpc/xmon/xmon.c b/arch/powerpc/xmon/xmon.c
index f11f65634aab..08e367e3e8c3 100644
--- a/arch/powerpc/xmon/xmon.c
+++ b/arch/powerpc/xmon/xmon.c
@@ -53,6 +53,7 @@
#include <asm/xive.h>
#include <asm/opal.h>
#include <asm/firmware.h>
+#include <asm/code-patching.h>
#ifdef CONFIG_PPC64
#include <asm/hvcall.h>
@@ -837,7 +838,8 @@ static void insert_bpts(void)
store_inst(&bp->instr[0]);
if (bp->enabled & BP_CIABR)
continue;
- if (mwrite(bp->address, &bpinstr, 4) != 4) {
+ if (patch_instruction((unsigned int *)bp->address,
+ bpinstr) != 0) {
printf("Couldn't write instruction at %lx, "
"disabling breakpoint there\n", bp->address);
bp->enabled &= ~BP_TRAP;
@@ -874,7 +876,8 @@ static void remove_bpts(void)
continue;
if (mread(bp->address, &instr, 4) == 4
&& instr == bpinstr
- && mwrite(bp->address, &bp->instr, 4) != 4)
+ && patch_instruction(
+ (unsigned int *)bp->address, bp->instr[0]) != 0)
printf("Couldn't remove breakpoint at %lx\n",
bp->address);
else
@@ -1242,14 +1245,14 @@ bpt_cmds(void)
{
int cmd;
unsigned long a;
- int mode, i;
+ int i;
struct bpt *bp;
- const char badaddr[] = "Only kernel addresses are permitted "
- "for breakpoints\n";
cmd = inchar();
switch (cmd) {
-#ifndef CONFIG_8xx
+#ifndef CONFIG_PPC_8xx
+ static const char badaddr[] = "Only kernel addresses are permitted for breakpoints\n";
+ int mode;
case 'd': /* bd - hardware data breakpoint */
mode = 7;
cmd = inchar();