aboutsummaryrefslogtreecommitdiffstatshomepage
path: root/arch/powerpc/kernel
diff options
context:
space:
mode:
Diffstat (limited to 'arch/powerpc/kernel')
-rw-r--r--arch/powerpc/kernel/Makefile4
-rw-r--r--arch/powerpc/kernel/asm-offsets.c2
-rw-r--r--arch/powerpc/kernel/btext.c4
-rw-r--r--arch/powerpc/kernel/eeh.c11
-rw-r--r--arch/powerpc/kernel/eeh_cache.c5
-rw-r--r--arch/powerpc/kernel/entry_32.S3
-rw-r--r--arch/powerpc/kernel/exceptions-64s.S88
-rw-r--r--arch/powerpc/kernel/fadump.c10
-rw-r--r--arch/powerpc/kernel/head_32.S23
-rw-r--r--arch/powerpc/kernel/head_64.S10
-rw-r--r--arch/powerpc/kernel/head_8xx.S16
-rw-r--r--arch/powerpc/kernel/idle.c2
-rw-r--r--arch/powerpc/kernel/idle_6xx.S2
-rw-r--r--arch/powerpc/kernel/idle_book3s.S148
-rw-r--r--arch/powerpc/kernel/iommu.c4
-rw-r--r--arch/powerpc/kernel/kprobes.c3
-rw-r--r--arch/powerpc/kernel/kvm.c2
-rw-r--r--arch/powerpc/kernel/l2cr_6xx.S6
-rw-r--r--arch/powerpc/kernel/machine_kexec.c15
-rw-r--r--arch/powerpc/kernel/paca.c4
-rw-r--r--arch/powerpc/kernel/pci-common.c10
-rw-r--r--arch/powerpc/kernel/process.c8
-rw-r--r--arch/powerpc/kernel/prom.c2
-rw-r--r--arch/powerpc/kernel/prom_init.c14
-rw-r--r--arch/powerpc/kernel/ptrace.c21
-rw-r--r--arch/powerpc/kernel/rtas.c168
-rw-r--r--arch/powerpc/kernel/security.c5
-rw-r--r--arch/powerpc/kernel/setup-common.c4
-rw-r--r--arch/powerpc/kernel/setup.h6
-rw-r--r--arch/powerpc/kernel/setup_64.c124
-rw-r--r--arch/powerpc/kernel/smp.c49
-rw-r--r--arch/powerpc/kernel/stacktrace.c28
-rw-r--r--arch/powerpc/kernel/swsusp_32.S2
-rw-r--r--arch/powerpc/kernel/swsusp_asm64.S2
-rw-r--r--arch/powerpc/kernel/sysfs.c42
-rw-r--r--arch/powerpc/kernel/tau_6xx.c147
-rw-r--r--arch/powerpc/kernel/trace/ftrace.c15
-rw-r--r--arch/powerpc/kernel/traps.c7
-rw-r--r--arch/powerpc/kernel/vmlinux.lds.S42
-rw-r--r--arch/powerpc/kernel/watchdog.c41
40 files changed, 747 insertions, 352 deletions
diff --git a/arch/powerpc/kernel/Makefile b/arch/powerpc/kernel/Makefile
index 59260eb96291..5819a577d267 100644
--- a/arch/powerpc/kernel/Makefile
+++ b/arch/powerpc/kernel/Makefile
@@ -13,6 +13,7 @@ CFLAGS_prom_init.o += -fPIC
CFLAGS_btext.o += -fPIC
endif
+CFLAGS_early_32.o += $(DISABLE_LATENT_ENTROPY_PLUGIN)
CFLAGS_cputable.o += $(DISABLE_LATENT_ENTROPY_PLUGIN)
CFLAGS_prom_init.o += $(DISABLE_LATENT_ENTROPY_PLUGIN)
CFLAGS_btext.o += $(DISABLE_LATENT_ENTROPY_PLUGIN)
@@ -181,6 +182,9 @@ KCOV_INSTRUMENT_cputable.o := n
KCOV_INSTRUMENT_setup_64.o := n
KCOV_INSTRUMENT_paca.o := n
+CFLAGS_setup_64.o += -fno-stack-protector
+CFLAGS_paca.o += -fno-stack-protector
+
extra-$(CONFIG_PPC_FPU) += fpu.o
extra-$(CONFIG_ALTIVEC) += vector.o
extra-$(CONFIG_PPC64) += entry_64.o
diff --git a/arch/powerpc/kernel/asm-offsets.c b/arch/powerpc/kernel/asm-offsets.c
index 5c0a1e17219b..af399675248e 100644
--- a/arch/powerpc/kernel/asm-offsets.c
+++ b/arch/powerpc/kernel/asm-offsets.c
@@ -285,7 +285,7 @@ int main(void)
/* Interrupt register frame */
DEFINE(INT_FRAME_SIZE, STACK_INT_FRAME_SIZE);
- DEFINE(SWITCH_FRAME_SIZE, STACK_FRAME_OVERHEAD + sizeof(struct pt_regs));
+ DEFINE(SWITCH_FRAME_SIZE, STACK_FRAME_WITH_PT_REGS);
STACK_PT_REGS_OFFSET(GPR0, gpr[0]);
STACK_PT_REGS_OFFSET(GPR1, gpr[1]);
STACK_PT_REGS_OFFSET(GPR2, gpr[2]);
diff --git a/arch/powerpc/kernel/btext.c b/arch/powerpc/kernel/btext.c
index 6dfceaa820e4..b0e0b3cd91ee 100644
--- a/arch/powerpc/kernel/btext.c
+++ b/arch/powerpc/kernel/btext.c
@@ -250,8 +250,10 @@ int __init btext_find_display(int allow_nonstdout)
rc = btext_initialize(np);
printk("result: %d\n", rc);
}
- if (rc == 0)
+ if (rc == 0) {
+ of_node_put(np);
break;
+ }
}
return rc;
}
diff --git a/arch/powerpc/kernel/eeh.c b/arch/powerpc/kernel/eeh.c
index c35069294ecf..8b0e523b2abb 100644
--- a/arch/powerpc/kernel/eeh.c
+++ b/arch/powerpc/kernel/eeh.c
@@ -368,14 +368,11 @@ static inline unsigned long eeh_token_to_phys(unsigned long token)
pa = pte_pfn(*ptep);
/* On radix we can do hugepage mappings for io, so handle that */
- if (hugepage_shift) {
- pa <<= hugepage_shift;
- pa |= token & ((1ul << hugepage_shift) - 1);
- } else {
- pa <<= PAGE_SHIFT;
- pa |= token & (PAGE_SIZE - 1);
- }
+ if (!hugepage_shift)
+ hugepage_shift = PAGE_SHIFT;
+ pa <<= PAGE_SHIFT;
+ pa |= token & ((1ul << hugepage_shift) - 1);
return pa;
}
diff --git a/arch/powerpc/kernel/eeh_cache.c b/arch/powerpc/kernel/eeh_cache.c
index cf11277ebd02..000ebb5a6fb3 100644
--- a/arch/powerpc/kernel/eeh_cache.c
+++ b/arch/powerpc/kernel/eeh_cache.c
@@ -272,8 +272,9 @@ static int eeh_addr_cache_show(struct seq_file *s, void *v)
{
struct pci_io_addr_range *piar;
struct rb_node *n;
+ unsigned long flags;
- spin_lock(&pci_io_addr_cache_root.piar_lock);
+ spin_lock_irqsave(&pci_io_addr_cache_root.piar_lock, flags);
for (n = rb_first(&pci_io_addr_cache_root.rb_root); n; n = rb_next(n)) {
piar = rb_entry(n, struct pci_io_addr_range, rb_node);
@@ -281,7 +282,7 @@ static int eeh_addr_cache_show(struct seq_file *s, void *v)
(piar->flags & IORESOURCE_IO) ? "i/o" : "mem",
&piar->addr_lo, &piar->addr_hi, pci_name(piar->pcidev));
}
- spin_unlock(&pci_io_addr_cache_root.piar_lock);
+ spin_unlock_irqrestore(&pci_io_addr_cache_root.piar_lock, flags);
return 0;
}
diff --git a/arch/powerpc/kernel/entry_32.S b/arch/powerpc/kernel/entry_32.S
index f29bb176381f..c72894ff9d61 100644
--- a/arch/powerpc/kernel/entry_32.S
+++ b/arch/powerpc/kernel/entry_32.S
@@ -336,6 +336,9 @@ trace_syscall_entry_irq_off:
.globl transfer_to_syscall
transfer_to_syscall:
+#ifdef CONFIG_PPC_BOOK3S_32
+ kuep_lock r11, r12
+#endif
#ifdef CONFIG_TRACE_IRQFLAGS
andi. r12,r9,MSR_EE
beq- trace_syscall_entry_irq_off
diff --git a/arch/powerpc/kernel/exceptions-64s.S b/arch/powerpc/kernel/exceptions-64s.S
index 70ac8a6ba0c1..88bba0a931d6 100644
--- a/arch/powerpc/kernel/exceptions-64s.S
+++ b/arch/powerpc/kernel/exceptions-64s.S
@@ -1150,7 +1150,7 @@ EXC_REAL_BEGIN(data_access, 0x300, 0x80)
INT_HANDLER data_access, 0x300, ool=1, dar=1, dsisr=1, kvm=1
EXC_REAL_END(data_access, 0x300, 0x80)
EXC_VIRT_BEGIN(data_access, 0x4300, 0x80)
- INT_HANDLER data_access, 0x300, virt=1, dar=1, dsisr=1
+ INT_HANDLER data_access, 0x300, ool=1, virt=1, dar=1, dsisr=1
EXC_VIRT_END(data_access, 0x4300, 0x80)
INT_KVM_HANDLER data_access, 0x300, EXC_STD, PACA_EXGEN, 1
EXC_COMMON_BEGIN(data_access_common)
@@ -1205,7 +1205,7 @@ ALT_MMU_FTR_SECTION_END_IFCLR(MMU_FTR_TYPE_RADIX)
EXC_REAL_BEGIN(instruction_access, 0x400, 0x80)
- INT_HANDLER instruction_access, 0x400, kvm=1
+ INT_HANDLER instruction_access, 0x400, ool=1, kvm=1
EXC_REAL_END(instruction_access, 0x400, 0x80)
EXC_VIRT_BEGIN(instruction_access, 0x4400, 0x80)
INT_HANDLER instruction_access, 0x400, virt=1
@@ -1225,7 +1225,7 @@ ALT_MMU_FTR_SECTION_END_IFCLR(MMU_FTR_TYPE_RADIX)
EXC_REAL_BEGIN(instruction_access_slb, 0x480, 0x80)
- INT_HANDLER instruction_access_slb, 0x480, area=PACA_EXSLB, kvm=1
+ INT_HANDLER instruction_access_slb, 0x480, ool=1, area=PACA_EXSLB, kvm=1
EXC_REAL_END(instruction_access_slb, 0x480, 0x80)
EXC_VIRT_BEGIN(instruction_access_slb, 0x4480, 0x80)
INT_HANDLER instruction_access_slb, 0x480, virt=1, area=PACA_EXSLB
@@ -1365,17 +1365,17 @@ EXC_REAL_BEGIN(decrementer, 0x900, 0x80)
INT_HANDLER decrementer, 0x900, ool=1, bitmask=IRQS_DISABLED, kvm=1
EXC_REAL_END(decrementer, 0x900, 0x80)
EXC_VIRT_BEGIN(decrementer, 0x4900, 0x80)
- INT_HANDLER decrementer, 0x900, virt=1, bitmask=IRQS_DISABLED
+ INT_HANDLER decrementer, 0x900, ool=1, virt=1, bitmask=IRQS_DISABLED
EXC_VIRT_END(decrementer, 0x4900, 0x80)
INT_KVM_HANDLER decrementer, 0x900, EXC_STD, PACA_EXGEN, 0
EXC_COMMON_ASYNC(decrementer_common, 0x900, timer_interrupt)
EXC_REAL_BEGIN(hdecrementer, 0x980, 0x80)
- INT_HANDLER hdecrementer, 0x980, hsrr=EXC_HV, kvm=1
+ INT_HANDLER hdecrementer, 0x980, ool=1, hsrr=EXC_HV, kvm=1
EXC_REAL_END(hdecrementer, 0x980, 0x80)
EXC_VIRT_BEGIN(hdecrementer, 0x4980, 0x80)
- INT_HANDLER hdecrementer, 0x980, virt=1, hsrr=EXC_HV, kvm=1
+ INT_HANDLER hdecrementer, 0x980, ool=1, virt=1, hsrr=EXC_HV, kvm=1
EXC_VIRT_END(hdecrementer, 0x4980, 0x80)
INT_KVM_HANDLER hdecrementer, 0x980, EXC_HV, PACA_EXGEN, 0
EXC_COMMON(hdecrementer_common, 0x980, hdec_interrupt)
@@ -2046,15 +2046,8 @@ TRAMP_REAL_BEGIN(stf_barrier_fallback)
.endr
blr
-TRAMP_REAL_BEGIN(rfi_flush_fallback)
- SET_SCRATCH0(r13);
- GET_PACA(r13);
- std r1,PACA_EXRFI+EX_R12(r13)
- ld r1,PACAKSAVE(r13)
- std r9,PACA_EXRFI+EX_R9(r13)
- std r10,PACA_EXRFI+EX_R10(r13)
- std r11,PACA_EXRFI+EX_R11(r13)
- mfctr r9
+/* Clobbers r10, r11, ctr */
+.macro L1D_DISPLACEMENT_FLUSH
ld r10,PACA_RFI_FLUSH_FALLBACK_AREA(r13)
ld r11,PACA_L1D_FLUSH_SIZE(r13)
srdi r11,r11,(7 + 3) /* 128 byte lines, unrolled 8x */
@@ -2065,7 +2058,7 @@ TRAMP_REAL_BEGIN(rfi_flush_fallback)
sync
/*
- * The load adresses are at staggered offsets within cachelines,
+ * The load addresses are at staggered offsets within cachelines,
* which suits some pipelines better (on others it should not
* hurt).
*/
@@ -2080,7 +2073,30 @@ TRAMP_REAL_BEGIN(rfi_flush_fallback)
ld r11,(0x80 + 8)*7(r10)
addi r10,r10,0x80*8
bdnz 1b
+.endm
+
+TRAMP_REAL_BEGIN(entry_flush_fallback)
+ std r9,PACA_EXRFI+EX_R9(r13)
+ std r10,PACA_EXRFI+EX_R10(r13)
+ std r11,PACA_EXRFI+EX_R11(r13)
+ mfctr r9
+ L1D_DISPLACEMENT_FLUSH
+ mtctr r9
+ ld r9,PACA_EXRFI+EX_R9(r13)
+ ld r10,PACA_EXRFI+EX_R10(r13)
+ ld r11,PACA_EXRFI+EX_R11(r13)
+ blr
+TRAMP_REAL_BEGIN(rfi_flush_fallback)
+ SET_SCRATCH0(r13);
+ GET_PACA(r13);
+ std r1,PACA_EXRFI+EX_R12(r13)
+ ld r1,PACAKSAVE(r13)
+ std r9,PACA_EXRFI+EX_R9(r13)
+ std r10,PACA_EXRFI+EX_R10(r13)
+ std r11,PACA_EXRFI+EX_R11(r13)
+ mfctr r9
+ L1D_DISPLACEMENT_FLUSH
mtctr r9
ld r9,PACA_EXRFI+EX_R9(r13)
ld r10,PACA_EXRFI+EX_R10(r13)
@@ -2098,32 +2114,7 @@ TRAMP_REAL_BEGIN(hrfi_flush_fallback)
std r10,PACA_EXRFI+EX_R10(r13)
std r11,PACA_EXRFI+EX_R11(r13)
mfctr r9
- ld r10,PACA_RFI_FLUSH_FALLBACK_AREA(r13)
- ld r11,PACA_L1D_FLUSH_SIZE(r13)
- srdi r11,r11,(7 + 3) /* 128 byte lines, unrolled 8x */
- mtctr r11
- DCBT_BOOK3S_STOP_ALL_STREAM_IDS(r11) /* Stop prefetch streams */
-
- /* order ld/st prior to dcbt stop all streams with flushing */
- sync
-
- /*
- * The load adresses are at staggered offsets within cachelines,
- * which suits some pipelines better (on others it should not
- * hurt).
- */
-1:
- ld r11,(0x80 + 8)*0(r10)
- ld r11,(0x80 + 8)*1(r10)
- ld r11,(0x80 + 8)*2(r10)
- ld r11,(0x80 + 8)*3(r10)
- ld r11,(0x80 + 8)*4(r10)
- ld r11,(0x80 + 8)*5(r10)
- ld r11,(0x80 + 8)*6(r10)
- ld r11,(0x80 + 8)*7(r10)
- addi r10,r10,0x80*8
- bdnz 1b
-
+ L1D_DISPLACEMENT_FLUSH
mtctr r9
ld r9,PACA_EXRFI+EX_R9(r13)
ld r10,PACA_EXRFI+EX_R10(r13)
@@ -2132,6 +2123,19 @@ TRAMP_REAL_BEGIN(hrfi_flush_fallback)
GET_SCRATCH0(r13);
hrfid
+USE_TEXT_SECTION()
+
+_GLOBAL(do_uaccess_flush)
+ UACCESS_FLUSH_FIXUP_SECTION
+ nop
+ nop
+ nop
+ blr
+ L1D_DISPLACEMENT_FLUSH
+ blr
+_ASM_NOKPROBE_SYMBOL(do_uaccess_flush)
+EXPORT_SYMBOL(do_uaccess_flush)
+
/*
* Real mode exceptions actually use this too, but alternate
* instruction code patches (which end up in the common .text area)
diff --git a/arch/powerpc/kernel/fadump.c b/arch/powerpc/kernel/fadump.c
index 9b522152d8f0..69d64f406204 100644
--- a/arch/powerpc/kernel/fadump.c
+++ b/arch/powerpc/kernel/fadump.c
@@ -279,7 +279,7 @@ static void fadump_show_config(void)
* that is required for a kernel to boot successfully.
*
*/
-static inline u64 fadump_calculate_reserve_size(void)
+static __init u64 fadump_calculate_reserve_size(void)
{
u64 base, size, bootmem_min;
int ret;
@@ -835,7 +835,6 @@ static int fadump_alloc_mem_ranges(struct fadump_mrange_info *mrange_info)
sizeof(struct fadump_memory_range));
return 0;
}
-
static inline int fadump_add_mem_range(struct fadump_mrange_info *mrange_info,
u64 base, u64 end)
{
@@ -854,7 +853,12 @@ static inline int fadump_add_mem_range(struct fadump_mrange_info *mrange_info,
start = mem_ranges[mrange_info->mem_range_cnt - 1].base;
size = mem_ranges[mrange_info->mem_range_cnt - 1].size;
- if ((start + size) == base)
+ /*
+ * Boot memory area needs separate PT_LOAD segment(s) as it
+ * is moved to a different location at the time of crash.
+ * So, fold only if the region is not boot memory area.
+ */
+ if ((start + size) == base && start >= fw_dump.boot_mem_top)
is_adjacent = true;
}
if (!is_adjacent) {
diff --git a/arch/powerpc/kernel/head_32.S b/arch/powerpc/kernel/head_32.S
index 4a24f8f026c7..edaab1142498 100644
--- a/arch/powerpc/kernel/head_32.S
+++ b/arch/powerpc/kernel/head_32.S
@@ -418,14 +418,11 @@ InstructionTLBMiss:
cmplw 0,r1,r3
#endif
mfspr r2, SPRN_SPRG_PGDIR
-#ifdef CONFIG_SWAP
- li r1,_PAGE_PRESENT | _PAGE_ACCESSED | _PAGE_EXEC
-#else
- li r1,_PAGE_PRESENT | _PAGE_EXEC
-#endif
+ li r1,_PAGE_PRESENT | _PAGE_ACCESSED | _PAGE_EXEC | _PAGE_USER
#if defined(CONFIG_MODULES) || defined(CONFIG_DEBUG_PAGEALLOC)
bge- 112f
lis r2, (swapper_pg_dir - PAGE_OFFSET)@ha /* if kernel address, use */
+ li r1,_PAGE_PRESENT | _PAGE_ACCESSED | _PAGE_EXEC
addi r2, r2, (swapper_pg_dir - PAGE_OFFSET)@l /* kernel page table */
#endif
112: rlwimi r2,r3,12,20,29 /* insert top 10 bits of address */
@@ -484,13 +481,10 @@ DataLoadTLBMiss:
lis r1,PAGE_OFFSET@h /* check if kernel address */
cmplw 0,r1,r3
mfspr r2, SPRN_SPRG_PGDIR
-#ifdef CONFIG_SWAP
- li r1, _PAGE_PRESENT | _PAGE_ACCESSED
-#else
- li r1, _PAGE_PRESENT
-#endif
+ li r1, _PAGE_PRESENT | _PAGE_ACCESSED | _PAGE_USER
bge- 112f
lis r2, (swapper_pg_dir - PAGE_OFFSET)@ha /* if kernel address, use */
+ li r1, _PAGE_PRESENT | _PAGE_ACCESSED
addi r2, r2, (swapper_pg_dir - PAGE_OFFSET)@l /* kernel page table */
112: rlwimi r2,r3,12,20,29 /* insert top 10 bits of address */
lwz r2,0(r2) /* get pmd entry */
@@ -564,13 +558,10 @@ DataStoreTLBMiss:
lis r1,PAGE_OFFSET@h /* check if kernel address */
cmplw 0,r1,r3
mfspr r2, SPRN_SPRG_PGDIR
-#ifdef CONFIG_SWAP
- li r1, _PAGE_RW | _PAGE_DIRTY | _PAGE_PRESENT | _PAGE_ACCESSED
-#else
- li r1, _PAGE_RW | _PAGE_DIRTY | _PAGE_PRESENT
-#endif
+ li r1, _PAGE_RW | _PAGE_DIRTY | _PAGE_PRESENT | _PAGE_ACCESSED | _PAGE_USER
bge- 112f
lis r2, (swapper_pg_dir - PAGE_OFFSET)@ha /* if kernel address, use */
+ li r1, _PAGE_RW | _PAGE_DIRTY | _PAGE_PRESENT | _PAGE_ACCESSED
addi r2, r2, (swapper_pg_dir - PAGE_OFFSET)@l /* kernel page table */
112: rlwimi r2,r3,12,20,29 /* insert top 10 bits of address */
lwz r2,0(r2) /* get pmd entry */
@@ -843,7 +834,7 @@ BEGIN_MMU_FTR_SECTION
END_MMU_FTR_SECTION_IFSET(MMU_FTR_USE_HIGH_BATS)
blr
-load_segment_registers:
+_GLOBAL(load_segment_registers)
li r0, NUM_USER_SEGMENTS /* load up user segment register values */
mtctr r0 /* for context 0 */
li r3, 0 /* Kp = 0, Ks = 0, VSID = 0 */
diff --git a/arch/powerpc/kernel/head_64.S b/arch/powerpc/kernel/head_64.S
index 780f527eabd2..9019f1395d39 100644
--- a/arch/powerpc/kernel/head_64.S
+++ b/arch/powerpc/kernel/head_64.S
@@ -420,6 +420,10 @@ generic_secondary_common_init:
/* From now on, r24 is expected to be logical cpuid */
mr r24,r5
+ /* Create a temp kernel stack for use before relocation is on. */
+ ld r1,PACAEMERGSP(r13)
+ subi r1,r1,STACK_FRAME_OVERHEAD
+
/* See if we need to call a cpu state restore handler */
LOAD_REG_ADDR(r23, cur_cpu_spec)
ld r23,0(r23)
@@ -448,10 +452,6 @@ generic_secondary_common_init:
sync /* order paca.run and cur_cpu_spec */
isync /* In case code patching happened */
- /* Create a temp kernel stack for use before relocation is on. */
- ld r1,PACAEMERGSP(r13)
- subi r1,r1,STACK_FRAME_OVERHEAD
-
b __secondary_start
#endif /* SMP */
@@ -992,7 +992,7 @@ start_here_common:
bl start_kernel
/* Not reached */
- trap
+0: trap
EMIT_BUG_ENTRY 0b, __FILE__, __LINE__, 0
/*
diff --git a/arch/powerpc/kernel/head_8xx.S b/arch/powerpc/kernel/head_8xx.S
index 98d8b6832fcb..6f3e417f55a3 100644
--- a/arch/powerpc/kernel/head_8xx.S
+++ b/arch/powerpc/kernel/head_8xx.S
@@ -191,7 +191,7 @@ SystemCall:
/* On the MPC8xx, this is a software emulation interrupt. It occurs
* for all unimplemented and illegal instructions.
*/
- EXCEPTION(0x1000, SoftEmu, program_check_exception, EXC_XFER_STD)
+ EXCEPTION(0x1000, SoftEmu, emulation_assist_interrupt, EXC_XFER_STD)
/* Called from DataStoreTLBMiss when perf TLB misses events are activated */
#ifdef CONFIG_PERF_EVENTS
@@ -229,9 +229,7 @@ SystemCall:
InstructionTLBMiss:
mtspr SPRN_SPRG_SCRATCH0, r10
-#if defined(ITLB_MISS_KERNEL) || defined(CONFIG_SWAP)
mtspr SPRN_SPRG_SCRATCH1, r11
-#endif
/* If we are faulting a kernel address, we have to use the
* kernel page tables.
@@ -278,11 +276,9 @@ InstructionTLBMiss:
#ifdef ITLB_MISS_KERNEL
mtcr r11
#endif
-#ifdef CONFIG_SWAP
- rlwinm r11, r10, 32-5, _PAGE_PRESENT
+ rlwinm r11, r10, 32-7, _PAGE_PRESENT
and r11, r11, r10
rlwimi r10, r11, 0, _PAGE_PRESENT
-#endif
/* The Linux PTE won't go exactly into the MMU TLB.
* Software indicator bits 20 and 23 must be clear.
* Software indicator bits 22, 24, 25, 26, and 27 must be
@@ -296,9 +292,7 @@ InstructionTLBMiss:
/* Restore registers */
0: mfspr r10, SPRN_SPRG_SCRATCH0
-#if defined(ITLB_MISS_KERNEL) || defined(CONFIG_SWAP)
mfspr r11, SPRN_SPRG_SCRATCH1
-#endif
rfi
patch_site 0b, patch__itlbmiss_exit_1
@@ -308,9 +302,7 @@ InstructionTLBMiss:
addi r10, r10, 1
stw r10, (itlb_miss_counter - PAGE_OFFSET)@l(0)
mfspr r10, SPRN_SPRG_SCRATCH0
-#if defined(ITLB_MISS_KERNEL) || defined(CONFIG_SWAP)
mfspr r11, SPRN_SPRG_SCRATCH1
-#endif
rfi
#endif
@@ -394,11 +386,9 @@ DataStoreTLBMiss:
* r11 = ((r10 & PRESENT) & ((r10 & ACCESSED) >> 5));
* r10 = (r10 & ~PRESENT) | r11;
*/
-#ifdef CONFIG_SWAP
- rlwinm r11, r10, 32-5, _PAGE_PRESENT
+ rlwinm r11, r10, 32-7, _PAGE_PRESENT
and r11, r11, r10
rlwimi r10, r11, 0, _PAGE_PRESENT
-#endif
/* The Linux PTE won't go exactly into the MMU TLB.
* Software indicator bits 24, 25, 26, and 27 must be
* set. All other Linux PTE bits control the behavior
diff --git a/arch/powerpc/kernel/idle.c b/arch/powerpc/kernel/idle.c
index a36fd053c3db..0615ba86baef 100644
--- a/arch/powerpc/kernel/idle.c
+++ b/arch/powerpc/kernel/idle.c
@@ -37,7 +37,7 @@ static int __init powersave_off(char *arg)
{
ppc_md.power_save = NULL;
cpuidle_disable = IDLE_POWERSAVE_OFF;
- return 0;
+ return 1;
}
__setup("powersave=off", powersave_off);
diff --git a/arch/powerpc/kernel/idle_6xx.S b/arch/powerpc/kernel/idle_6xx.S
index 0ffdd18b9f26..acb8215c5a01 100644
--- a/arch/powerpc/kernel/idle_6xx.S
+++ b/arch/powerpc/kernel/idle_6xx.S
@@ -129,7 +129,7 @@ BEGIN_FTR_SECTION
END_FTR_SECTION_IFCLR(CPU_FTR_NO_DPM)
mtspr SPRN_HID0,r4
BEGIN_FTR_SECTION
- DSSALL
+ PPC_DSSALL
sync
END_FTR_SECTION_IFSET(CPU_FTR_ALTIVEC)
lwz r8,TI_LOCAL_FLAGS(r2) /* set napping bit */
diff --git a/arch/powerpc/kernel/idle_book3s.S b/arch/powerpc/kernel/idle_book3s.S
index d32751994a62..c3f62c9ce740 100644
--- a/arch/powerpc/kernel/idle_book3s.S
+++ b/arch/powerpc/kernel/idle_book3s.S
@@ -50,28 +50,32 @@ _GLOBAL(isa300_idle_stop_mayloss)
std r1,PACAR1(r13)
mflr r4
mfcr r5
- /* use stack red zone rather than a new frame for saving regs */
- std r2,-8*0(r1)
- std r14,-8*1(r1)
- std r15,-8*2(r1)
- std r16,-8*3(r1)
- std r17,-8*4(r1)
- std r18,-8*5(r1)
- std r19,-8*6(r1)
- std r20,-8*7(r1)
- std r21,-8*8(r1)
- std r22,-8*9(r1)
- std r23,-8*10(r1)
- std r24,-8*11(r1)
- std r25,-8*12(r1)
- std r26,-8*13(r1)
- std r27,-8*14(r1)
- std r28,-8*15(r1)
- std r29,-8*16(r1)
- std r30,-8*17(r1)
- std r31,-8*18(r1)
- std r4,-8*19(r1)
- std r5,-8*20(r1)
+ /*
+ * Use the stack red zone rather than a new frame for saving regs since
+ * in the case of no GPR loss the wakeup code branches directly back to
+ * the caller without deallocating the stack frame first.
+ */
+ std r2,-8*1(r1)
+ std r14,-8*2(r1)
+ std r15,-8*3(r1)
+ std r16,-8*4(r1)
+ std r17,-8*5(r1)
+ std r18,-8*6(r1)
+ std r19,-8*7(r1)
+ std r20,-8*8(r1)
+ std r21,-8*9(r1)
+ std r22,-8*10(r1)
+ std r23,-8*11(r1)
+ std r24,-8*12(r1)
+ std r25,-8*13(r1)
+ std r26,-8*14(r1)
+ std r27,-8*15(r1)
+ std r28,-8*16(r1)
+ std r29,-8*17(r1)
+ std r30,-8*18(r1)
+ std r31,-8*19(r1)
+ std r4,-8*20(r1)
+ std r5,-8*21(r1)
/* 168 bytes */
PPC_STOP
b . /* catch bugs */
@@ -87,8 +91,8 @@ _GLOBAL(isa300_idle_stop_mayloss)
*/
_GLOBAL(idle_return_gpr_loss)
ld r1,PACAR1(r13)
- ld r4,-8*19(r1)
- ld r5,-8*20(r1)
+ ld r4,-8*20(r1)
+ ld r5,-8*21(r1)
mtlr r4
mtcr r5
/*
@@ -96,38 +100,40 @@ _GLOBAL(idle_return_gpr_loss)
* from PACATOC. This could be avoided for that less common case
* if KVM saved its r2.
*/
- ld r2,-8*0(r1)
- ld r14,-8*1(r1)
- ld r15,-8*2(r1)
- ld r16,-8*3(r1)
- ld r17,-8*4(r1)
- ld r18,-8*5(r1)
- ld r19,-8*6(r1)
- ld r20,-8*7(r1)
- ld r21,-8*8(r1)
- ld r22,-8*9(r1)
- ld r23,-8*10(r1)
- ld r24,-8*11(r1)
- ld r25,-8*12(r1)
- ld r26,-8*13(r1)
- ld r27,-8*14(r1)
- ld r28,-8*15(r1)
- ld r29,-8*16(r1)
- ld r30,-8*17(r1)
- ld r31,-8*18(r1)
+ ld r2,-8*1(r1)
+ ld r14,-8*2(r1)
+ ld r15,-8*3(r1)
+ ld r16,-8*4(r1)
+ ld r17,-8*5(r1)
+ ld r18,-8*6(r1)
+ ld r19,-8*7(r1)
+ ld r20,-8*8(r1)
+ ld r21,-8*9(r1)
+ ld r22,-8*10(r1)
+ ld r23,-8*11(r1)
+ ld r24,-8*12(r1)
+ ld r25,-8*13(r1)
+ ld r26,-8*14(r1)
+ ld r27,-8*15(r1)
+ ld r28,-8*16(r1)
+ ld r29,-8*17(r1)
+ ld r30,-8*18(r1)
+ ld r31,-8*19(r1)
blr
/*
* This is the sequence required to execute idle instructions, as
* specified in ISA v2.07 (and earlier). MSR[IR] and MSR[DR] must be 0.
- *
- * The 0(r1) slot is used to save r2 in isa206, so use that here.
+ * We have to store a GPR somewhere, ptesync, then reload it, and create
+ * a false dependency on the result of the load. It doesn't matter which
+ * GPR we store, or where we store it. We have already stored r2 to the
+ * stack at -8(r1) in isa206_idle_insn_mayloss, so use that.
*/
#define IDLE_STATE_ENTER_SEQ_NORET(IDLE_INST) \
/* Magic NAP/SLEEP/WINKLE mode enter sequence */ \
- std r2,0(r1); \
+ std r2,-8(r1); \
ptesync; \
- ld r2,0(r1); \
+ ld r2,-8(r1); \
236: cmpd cr0,r2,r2; \
bne 236b; \
IDLE_INST; \
@@ -152,28 +158,32 @@ _GLOBAL(isa206_idle_insn_mayloss)
std r1,PACAR1(r13)
mflr r4
mfcr r5
- /* use stack red zone rather than a new frame for saving regs */
- std r2,-8*0(r1)
- std r14,-8*1(r1)
- std r15,-8*2(r1)
- std r16,-8*3(r1)
- std r17,-8*4(r1)
- std r18,-8*5(r1)
- std r19,-8*6(r1)
- std r20,-8*7(r1)
- std r21,-8*8(r1)
- std r22,-8*9(r1)
- std r23,-8*10(r1)
- std r24,-8*11(r1)
- std r25,-8*12(r1)
- std r26,-8*13(r1)
- std r27,-8*14(r1)
- std r28,-8*15(r1)
- std r29,-8*16(r1)
- std r30,-8*17(r1)
- std r31,-8*18(r1)
- std r4,-8*19(r1)
- std r5,-8*20(r1)
+ /*
+ * Use the stack red zone rather than a new frame for saving regs since
+ * in the case of no GPR loss the wakeup code branches directly back to
+ * the caller without deallocating the stack frame first.
+ */
+ std r2,-8*1(r1)
+ std r14,-8*2(r1)
+ std r15,-8*3(r1)
+ std r16,-8*4(r1)
+ std r17,-8*5(r1)
+ std r18,-8*6(r1)
+ std r19,-8*7(r1)
+ std r20,-8*8(r1)
+ std r21,-8*9(r1)
+ std r22,-8*10(r1)
+ std r23,-8*11(r1)
+ std r24,-8*12(r1)
+ std r25,-8*13(r1)
+ std r26,-8*14(r1)
+ std r27,-8*15(r1)
+ std r28,-8*16(r1)
+ std r29,-8*17(r1)
+ std r30,-8*18(r1)
+ std r31,-8*19(r1)
+ std r4,-8*20(r1)
+ std r5,-8*21(r1)
cmpwi r3,PNV_THREAD_NAP
bne 1f
IDLE_STATE_ENTER_SEQ_NORET(PPC_NAP)
diff --git a/arch/powerpc/kernel/iommu.c b/arch/powerpc/kernel/iommu.c
index 9704f3f76e63..d7d42bd448c4 100644
--- a/arch/powerpc/kernel/iommu.c
+++ b/arch/powerpc/kernel/iommu.c
@@ -1057,7 +1057,7 @@ int iommu_take_ownership(struct iommu_table *tbl)
spin_lock_irqsave(&tbl->large_pool.lock, flags);
for (i = 0; i < tbl->nr_pools; i++)
- spin_lock(&tbl->pools[i].lock);
+ spin_lock_nest_lock(&tbl->pools[i].lock, &tbl->large_pool.lock);
iommu_table_release_pages(tbl);
@@ -1085,7 +1085,7 @@ void iommu_release_ownership(struct iommu_table *tbl)
spin_lock_irqsave(&tbl->large_pool.lock, flags);
for (i = 0; i < tbl->nr_pools; i++)
- spin_lock(&tbl->pools[i].lock);
+ spin_lock_nest_lock(&tbl->pools[i].lock, &tbl->large_pool.lock);
memset(tbl->it_map, 0, sz);
diff --git a/arch/powerpc/kernel/kprobes.c b/arch/powerpc/kernel/kprobes.c
index 9b340af02c38..dd01a4abc258 100644
--- a/arch/powerpc/kernel/kprobes.c
+++ b/arch/powerpc/kernel/kprobes.c
@@ -264,7 +264,8 @@ int kprobe_handler(struct pt_regs *regs)
if (user_mode(regs))
return 0;
- if (!(regs->msr & MSR_IR) || !(regs->msr & MSR_DR))
+ if (!IS_ENABLED(CONFIG_BOOKE) &&
+ (!(regs->msr & MSR_IR) || !(regs->msr & MSR_DR)))
return 0;
/*
diff --git a/arch/powerpc/kernel/kvm.c b/arch/powerpc/kernel/kvm.c
index 617eba82531c..d89cf802d9aa 100644
--- a/arch/powerpc/kernel/kvm.c
+++ b/arch/powerpc/kernel/kvm.c
@@ -669,7 +669,7 @@ static void __init kvm_use_magic_page(void)
on_each_cpu(kvm_map_magic_page, &features, 1);
/* Quick self-test to see if the mapping works */
- if (!fault_in_pages_readable((const char *)KVM_MAGIC_PAGE, sizeof(u32))) {
+ if (fault_in_pages_readable((const char *)KVM_MAGIC_PAGE, sizeof(u32))) {
kvm_patching_worked = false;
return;
}
diff --git a/arch/powerpc/kernel/l2cr_6xx.S b/arch/powerpc/kernel/l2cr_6xx.S
index 2020d255585f..7684f644e93e 100644
--- a/arch/powerpc/kernel/l2cr_6xx.S
+++ b/arch/powerpc/kernel/l2cr_6xx.S
@@ -96,7 +96,7 @@ END_FTR_SECTION_IFCLR(CPU_FTR_L2CR)
/* Stop DST streams */
BEGIN_FTR_SECTION
- DSSALL
+ PPC_DSSALL
sync
END_FTR_SECTION_IFSET(CPU_FTR_ALTIVEC)
@@ -293,7 +293,7 @@ END_FTR_SECTION_IFCLR(CPU_FTR_L3CR)
isync
/* Stop DST streams */
- DSSALL
+ PPC_DSSALL
sync
/* Get the current enable bit of the L3CR into r4 */
@@ -402,7 +402,7 @@ END_FTR_SECTION_IFSET(CPU_FTR_L3CR)
_GLOBAL(__flush_disable_L1)
/* Stop pending alitvec streams and memory accesses */
BEGIN_FTR_SECTION
- DSSALL
+ PPC_DSSALL
END_FTR_SECTION_IFSET(CPU_FTR_ALTIVEC)
sync
diff --git a/arch/powerpc/kernel/machine_kexec.c b/arch/powerpc/kernel/machine_kexec.c
index 7a1c11a7cba5..716f8bb17461 100644
--- a/arch/powerpc/kernel/machine_kexec.c
+++ b/arch/powerpc/kernel/machine_kexec.c
@@ -146,11 +146,18 @@ void __init reserve_crashkernel(void)
if (!crashk_res.start) {
#ifdef CONFIG_PPC64
/*
- * On 64bit we split the RMO in half but cap it at half of
- * a small SLB (128MB) since the crash kernel needs to place
- * itself and some stacks to be in the first segment.
+ * On the LPAR platform place the crash kernel to mid of
+ * RMA size (512MB or more) to ensure the crash kernel
+ * gets enough space to place itself and some stack to be
+ * in the first segment. At the same time normal kernel
+ * also get enough space to allocate memory for essential
+ * system resource in the first segment. Keep the crash
+ * kernel starts at 128MB offset on other platforms.
*/
- crashk_res.start = min(0x8000000ULL, (ppc64_rma_size / 2));
+ if (firmware_has_feature(FW_FEATURE_LPAR))
+ crashk_res.start = ppc64_rma_size / 2;
+ else
+ crashk_res.start = min(0x8000000ULL, (ppc64_rma_size / 2));
#else
crashk_res.start = KDUMP_KERNELBASE;
#endif
diff --git a/arch/powerpc/kernel/paca.c b/arch/powerpc/kernel/paca.c
index 4ea0cca52e16..c786adfb9413 100644
--- a/arch/powerpc/kernel/paca.c
+++ b/arch/powerpc/kernel/paca.c
@@ -176,7 +176,7 @@ static struct slb_shadow * __init new_slb_shadow(int cpu, unsigned long limit)
struct paca_struct **paca_ptrs __read_mostly;
EXPORT_SYMBOL(paca_ptrs);
-void __init __nostackprotector initialise_paca(struct paca_struct *new_paca, int cpu)
+void __init initialise_paca(struct paca_struct *new_paca, int cpu)
{
#ifdef CONFIG_PPC_PSERIES
new_paca->lppaca_ptr = NULL;
@@ -205,7 +205,7 @@ void __init __nostackprotector initialise_paca(struct paca_struct *new_paca, int
}
/* Put the paca pointer into r13 and SPRG_PACA */
-void __nostackprotector setup_paca(struct paca_struct *new_paca)
+void setup_paca(struct paca_struct *new_paca)
{
/* Setup r13 */
local_paca = new_paca;
diff --git a/arch/powerpc/kernel/pci-common.c b/arch/powerpc/kernel/pci-common.c
index 1c448cf25506..a2c258a8d736 100644
--- a/arch/powerpc/kernel/pci-common.c
+++ b/arch/powerpc/kernel/pci-common.c
@@ -1669,3 +1669,13 @@ static void fixup_hide_host_resource_fsl(struct pci_dev *dev)
}
DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_MOTOROLA, PCI_ANY_ID, fixup_hide_host_resource_fsl);
DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_FREESCALE, PCI_ANY_ID, fixup_hide_host_resource_fsl);
+
+
+static int __init discover_phbs(void)
+{
+ if (ppc_md.discover_phbs)
+ ppc_md.discover_phbs();
+
+ return 0;
+}
+core_initcall(discover_phbs);
diff --git a/arch/powerpc/kernel/process.c b/arch/powerpc/kernel/process.c
index bd0c258a1d5d..cf87573e6e78 100644
--- a/arch/powerpc/kernel/process.c
+++ b/arch/powerpc/kernel/process.c
@@ -1719,7 +1719,7 @@ void start_thread(struct pt_regs *regs, unsigned long start, unsigned long sp)
tm_reclaim_current(0);
#endif
- memset(regs->gpr, 0, sizeof(regs->gpr));
+ memset(&regs->gpr[1], 0, sizeof(regs->gpr) - sizeof(regs->gpr[0]));
regs->ctr = 0;
regs->link = 0;
regs->xer = 0;
@@ -2001,12 +2001,12 @@ static unsigned long __get_wchan(struct task_struct *p)
return 0;
do {
- sp = *(unsigned long *)sp;
+ sp = READ_ONCE_NOCHECK(*(unsigned long *)sp);
if (!validate_sp(sp, p, STACK_FRAME_OVERHEAD) ||
p->state == TASK_RUNNING)
return 0;
if (count > 0) {
- ip = ((unsigned long *)sp)[STACK_FRAME_LR_SAVE];
+ ip = READ_ONCE_NOCHECK(((unsigned long *)sp)[STACK_FRAME_LR_SAVE]);
if (!in_sched_functions(ip))
return ip;
}
@@ -2081,7 +2081,7 @@ void show_stack(struct task_struct *tsk, unsigned long *stack)
* See if this is an exception frame.
* We look for the "regshere" marker in the current frame.
*/
- if (validate_sp(sp, tsk, STACK_INT_FRAME_SIZE)
+ if (validate_sp(sp, tsk, STACK_FRAME_WITH_PT_REGS)
&& stack[STACK_FRAME_MARKER] == STACK_FRAME_REGS_MARKER) {
struct pt_regs *regs = (struct pt_regs *)
(sp + STACK_FRAME_OVERHEAD);
diff --git a/arch/powerpc/kernel/prom.c b/arch/powerpc/kernel/prom.c
index e13e96e665e0..537142b877b8 100644
--- a/arch/powerpc/kernel/prom.c
+++ b/arch/powerpc/kernel/prom.c
@@ -266,7 +266,7 @@ static struct feature_property {
};
#if defined(CONFIG_44x) && defined(CONFIG_PPC_FPU)
-static inline void identical_pvr_fixup(unsigned long node)
+static __init void identical_pvr_fixup(unsigned long node)
{
unsigned int pvr;
const char *model = of_get_flat_dt_prop(node, "model", NULL);
diff --git a/arch/powerpc/kernel/prom_init.c b/arch/powerpc/kernel/prom_init.c
index 689664cd4e79..7f4e2c031a9a 100644
--- a/arch/powerpc/kernel/prom_init.c
+++ b/arch/powerpc/kernel/prom_init.c
@@ -1305,14 +1305,10 @@ static void __init prom_check_platform_support(void)
if (prop_len > sizeof(vec))
prom_printf("WARNING: ibm,arch-vec-5-platform-support longer than expected (len: %d)\n",
prop_len);
- prom_getprop(prom.chosen, "ibm,arch-vec-5-platform-support",
- &vec, sizeof(vec));
- for (i = 0; i < sizeof(vec); i += 2) {
- prom_debug("%d: index = 0x%x val = 0x%x\n", i / 2
- , vec[i]
- , vec[i + 1]);
- prom_parse_platform_support(vec[i], vec[i + 1],
- &supported);
+ prom_getprop(prom.chosen, "ibm,arch-vec-5-platform-support", &vec, sizeof(vec));
+ for (i = 0; i < prop_len; i += 2) {
+ prom_debug("%d: index = 0x%x val = 0x%x\n", i / 2, vec[i], vec[i + 1]);
+ prom_parse_platform_support(vec[i], vec[i + 1], &supported);
}
}
@@ -2923,7 +2919,7 @@ static void __init fixup_device_tree_efika_add_phy(void)
/* Check if the phy-handle property exists - bail if it does */
rv = prom_getprop(node, "phy-handle", prop, sizeof(prop));
- if (!rv)
+ if (rv <= 0)
return;
/*
diff --git a/arch/powerpc/kernel/ptrace.c b/arch/powerpc/kernel/ptrace.c
index 8c92febf5f44..63bfc5250b67 100644
--- a/arch/powerpc/kernel/ptrace.c
+++ b/arch/powerpc/kernel/ptrace.c
@@ -3014,8 +3014,13 @@ long arch_ptrace(struct task_struct *child, long request,
flush_fp_to_thread(child);
if (fpidx < (PT_FPSCR - PT_FPR0))
- memcpy(&tmp, &child->thread.TS_FPR(fpidx),
- sizeof(long));
+ if (IS_ENABLED(CONFIG_PPC32)) {
+ // On 32-bit the index we are passed refers to 32-bit words
+ tmp = ((u32 *)child->thread.fp_state.fpr)[fpidx];
+ } else {
+ memcpy(&tmp, &child->thread.TS_FPR(fpidx),
+ sizeof(long));
+ }
else
tmp = child->thread.fp_state.fpscr;
}
@@ -3047,8 +3052,13 @@ long arch_ptrace(struct task_struct *child, long request,
flush_fp_to_thread(child);
if (fpidx < (PT_FPSCR - PT_FPR0))
- memcpy(&child->thread.TS_FPR(fpidx), &data,
- sizeof(long));
+ if (IS_ENABLED(CONFIG_PPC32)) {
+ // On 32-bit the index we are passed refers to 32-bit words
+ ((u32 *)child->thread.fp_state.fpr)[fpidx] = data;
+ } else {
+ memcpy(&child->thread.TS_FPR(fpidx), &data,
+ sizeof(long));
+ }
else
child->thread.fp_state.fpscr = data;
ret = 0;
@@ -3398,4 +3408,7 @@ void __init pt_regs_check(void)
offsetof(struct user_pt_regs, result));
BUILD_BUG_ON(sizeof(struct user_pt_regs) > sizeof(struct pt_regs));
+
+ // ptrace_get/put_fpr() rely on PPC32 and VSX being incompatible
+ BUILD_BUG_ON(IS_ENABLED(CONFIG_PPC32) && IS_ENABLED(CONFIG_VSX));
}
diff --git a/arch/powerpc/kernel/rtas.c b/arch/powerpc/kernel/rtas.c
index 01210593d60c..35e246e39705 100644
--- a/arch/powerpc/kernel/rtas.c
+++ b/arch/powerpc/kernel/rtas.c
@@ -940,6 +940,156 @@ struct pseries_errorlog *get_pseries_errorlog(struct rtas_error_log *log,
return NULL;
}
+#ifdef CONFIG_PPC_RTAS_FILTER
+
+/*
+ * The sys_rtas syscall, as originally designed, allows root to pass
+ * arbitrary physical addresses to RTAS calls. A number of RTAS calls
+ * can be abused to write to arbitrary memory and do other things that
+ * are potentially harmful to system integrity, and thus should only
+ * be used inside the kernel and not exposed to userspace.
+ *
+ * All known legitimate users of the sys_rtas syscall will only ever
+ * pass addresses that fall within the RMO buffer, and use a known
+ * subset of RTAS calls.
+ *
+ * Accordingly, we filter RTAS requests to check that the call is
+ * permitted, and that provided pointers fall within the RMO buffer.
+ * The rtas_filters list contains an entry for each permitted call,
+ * with the indexes of the parameters which are expected to contain
+ * addresses and sizes of buffers allocated inside the RMO buffer.
+ */
+struct rtas_filter {
+ const char *name;
+ int token;
+ /* Indexes into the args buffer, -1 if not used */
+ int buf_idx1;
+ int size_idx1;
+ int buf_idx2;
+ int size_idx2;
+
+ int fixed_size;
+};
+
+static struct rtas_filter rtas_filters[] __ro_after_init = {
+ { "ibm,activate-firmware", -1, -1, -1, -1, -1 },
+ { "ibm,configure-connector", -1, 0, -1, 1, -1, 4096 }, /* Special cased */
+ { "display-character", -1, -1, -1, -1, -1 },
+ { "ibm,display-message", -1, 0, -1, -1, -1 },
+ { "ibm,errinjct", -1, 2, -1, -1, -1, 1024 },
+ { "ibm,close-errinjct", -1, -1, -1, -1, -1 },
+ { "ibm,open-errinjct", -1, -1, -1, -1, -1 },
+ { "ibm,get-config-addr-info2", -1, -1, -1, -1, -1 },
+ { "ibm,get-dynamic-sensor-state", -1, 1, -1, -1, -1 },
+ { "ibm,get-indices", -1, 2, 3, -1, -1 },
+ { "get-power-level", -1, -1, -1, -1, -1 },
+ { "get-sensor-state", -1, -1, -1, -1, -1 },
+ { "ibm,get-system-parameter", -1, 1, 2, -1, -1 },
+ { "get-time-of-day", -1, -1, -1, -1, -1 },
+ { "ibm,get-vpd", -1, 0, -1, 1, 2 },
+ { "ibm,lpar-perftools", -1, 2, 3, -1, -1 },
+ { "ibm,platform-dump", -1, 4, 5, -1, -1 }, /* Special cased */
+ { "ibm,read-slot-reset-state", -1, -1, -1, -1, -1 },
+ { "ibm,scan-log-dump", -1, 0, 1, -1, -1 },
+ { "ibm,set-dynamic-indicator", -1, 2, -1, -1, -1 },
+ { "ibm,set-eeh-option", -1, -1, -1, -1, -1 },
+ { "set-indicator", -1, -1, -1, -1, -1 },
+ { "set-power-level", -1, -1, -1, -1, -1 },
+ { "set-time-for-power-on", -1, -1, -1, -1, -1 },
+ { "ibm,set-system-parameter", -1, 1, -1, -1, -1 },
+ { "set-time-of-day", -1, -1, -1, -1, -1 },
+ { "ibm,suspend-me", -1, -1, -1, -1, -1 },
+ { "ibm,update-nodes", -1, 0, -1, -1, -1, 4096 },
+ { "ibm,update-properties", -1, 0, -1, -1, -1, 4096 },
+ { "ibm,physical-attestation", -1, 0, 1, -1, -1 },
+};
+
+static bool in_rmo_buf(u32 base, u32 end)
+{
+ return base >= rtas_rmo_buf &&
+ base < (rtas_rmo_buf + RTAS_RMOBUF_MAX) &&
+ base <= end &&
+ end >= rtas_rmo_buf &&
+ end < (rtas_rmo_buf + RTAS_RMOBUF_MAX);
+}
+
+static bool block_rtas_call(int token, int nargs,
+ struct rtas_args *args)
+{
+ int i;
+
+ for (i = 0; i < ARRAY_SIZE(rtas_filters); i++) {
+ struct rtas_filter *f = &rtas_filters[i];
+ u32 base, size, end;
+
+ if (token != f->token)
+ continue;
+
+ if (f->buf_idx1 != -1) {
+ base = be32_to_cpu(args->args[f->buf_idx1]);
+ if (f->size_idx1 != -1)
+ size = be32_to_cpu(args->args[f->size_idx1]);
+ else if (f->fixed_size)
+ size = f->fixed_size;
+ else
+ size = 1;
+
+ end = base + size - 1;
+
+ /*
+ * Special case for ibm,platform-dump - NULL buffer
+ * address is used to indicate end of dump processing
+ */
+ if (!strcmp(f->name, "ibm,platform-dump") &&
+ base == 0)
+ return false;
+
+ if (!in_rmo_buf(base, end))
+ goto err;
+ }
+
+ if (f->buf_idx2 != -1) {
+ base = be32_to_cpu(args->args[f->buf_idx2]);
+ if (f->size_idx2 != -1)
+ size = be32_to_cpu(args->args[f->size_idx2]);
+ else if (f->fixed_size)
+ size = f->fixed_size;
+ else
+ size = 1;
+ end = base + size - 1;
+
+ /*
+ * Special case for ibm,configure-connector where the
+ * address can be 0
+ */
+ if (!strcmp(f->name, "ibm,configure-connector") &&
+ base == 0)
+ return false;
+
+ if (!in_rmo_buf(base, end))
+ goto err;
+ }
+
+ return false;
+ }
+
+err:
+ pr_err_ratelimited("sys_rtas: RTAS call blocked - exploit attempt?\n");
+ pr_err_ratelimited("sys_rtas: token=0x%x, nargs=%d (called by %s)\n",
+ token, nargs, current->comm);
+ return true;
+}
+
+#else
+
+static bool block_rtas_call(int token, int nargs,
+ struct rtas_args *args)
+{
+ return false;
+}
+
+#endif /* CONFIG_PPC_RTAS_FILTER */
+
/* We assume to be passed big endian arguments */
SYSCALL_DEFINE1(rtas, struct rtas_args __user *, uargs)
{
@@ -977,6 +1127,9 @@ SYSCALL_DEFINE1(rtas, struct rtas_args __user *, uargs)
args.rets = &args.args[nargs];
memset(args.rets, 0, nret * sizeof(rtas_arg_t));
+ if (block_rtas_call(token, nargs, &args))
+ return -EINVAL;
+
/* Need to handle ibm,suspend_me call specially */
if (token == ibm_suspend_me_token) {
@@ -1038,6 +1191,9 @@ void __init rtas_initialize(void)
unsigned long rtas_region = RTAS_INSTANTIATE_MAX;
u32 base, size, entry;
int no_base, no_size, no_entry;
+#ifdef CONFIG_PPC_RTAS_FILTER
+ int i;
+#endif
/* Get RTAS dev node and fill up our "rtas" structure with infos
* about it.
@@ -1077,6 +1233,12 @@ void __init rtas_initialize(void)
#ifdef CONFIG_RTAS_ERROR_LOGGING
rtas_last_error_token = rtas_token("rtas-last-error");
#endif
+
+#ifdef CONFIG_PPC_RTAS_FILTER
+ for (i = 0; i < ARRAY_SIZE(rtas_filters); i++) {
+ rtas_filters[i].token = rtas_token(rtas_filters[i].name);
+ }
+#endif
}
int __init early_init_dt_scan_rtas(unsigned long node,
@@ -1091,6 +1253,12 @@ int __init early_init_dt_scan_rtas(unsigned long node,
entryp = of_get_flat_dt_prop(node, "linux,rtas-entry", NULL);
sizep = of_get_flat_dt_prop(node, "rtas-size", NULL);
+#ifdef CONFIG_PPC64
+ /* need this feature to decide the crashkernel offset */
+ if (of_get_flat_dt_prop(node, "ibm,hypertas-functions", NULL))
+ powerpc_firmware_features |= FW_FEATURE_LPAR;
+#endif
+
if (basep && entryp && sizep) {
rtas.base = *basep;
rtas.entry = *entryp;
diff --git a/arch/powerpc/kernel/security.c b/arch/powerpc/kernel/security.c
index 1740a66cea84..ff022e725693 100644
--- a/arch/powerpc/kernel/security.c
+++ b/arch/powerpc/kernel/security.c
@@ -256,6 +256,11 @@ static int __init handle_no_stf_barrier(char *p)
early_param("no_stf_barrier", handle_no_stf_barrier);
+enum stf_barrier_type stf_barrier_type_get(void)
+{
+ return stf_enabled_flush_types;
+}
+
/* This is the generic flag used by other architectures */
static int __init handle_ssbd(char *p)
{
diff --git a/arch/powerpc/kernel/setup-common.c b/arch/powerpc/kernel/setup-common.c
index 25aaa3903000..f281d011f4b9 100644
--- a/arch/powerpc/kernel/setup-common.c
+++ b/arch/powerpc/kernel/setup-common.c
@@ -903,8 +903,6 @@ void __init setup_arch(char **cmdline_p)
/* On BookE, setup per-core TLB data structures. */
setup_tlb_core_data();
-
- smp_release_cpus();
#endif
/* Print various info about the machine that has been gathered so far. */
@@ -925,6 +923,8 @@ void __init setup_arch(char **cmdline_p)
exc_lvl_early_init();
emergency_stack_init();
+ smp_release_cpus();
+
initmem_init();
early_memtest(min_low_pfn << PAGE_SHIFT, max_low_pfn << PAGE_SHIFT);
diff --git a/arch/powerpc/kernel/setup.h b/arch/powerpc/kernel/setup.h
index 1b02d338a5f5..c82577c4b15d 100644
--- a/arch/powerpc/kernel/setup.h
+++ b/arch/powerpc/kernel/setup.h
@@ -8,12 +8,6 @@
#ifndef __ARCH_POWERPC_KERNEL_SETUP_H
#define __ARCH_POWERPC_KERNEL_SETUP_H
-#ifdef CONFIG_CC_IS_CLANG
-#define __nostackprotector
-#else
-#define __nostackprotector __attribute__((__optimize__("no-stack-protector")))
-#endif
-
void initialize_cache_info(void);
void irqstack_early_init(void);
diff --git a/arch/powerpc/kernel/setup_64.c b/arch/powerpc/kernel/setup_64.c
index e50fbed36651..5bc7e753df4d 100644
--- a/arch/powerpc/kernel/setup_64.c
+++ b/arch/powerpc/kernel/setup_64.c
@@ -284,7 +284,7 @@ void __init record_spr_defaults(void)
* device-tree is not accessible via normal means at this point.
*/
-void __init __nostackprotector early_setup(unsigned long dt_ptr)
+void __init early_setup(unsigned long dt_ptr)
{
static __initdata struct paca_struct boot_paca;
@@ -859,7 +859,13 @@ early_initcall(disable_hardlockup_detector);
static enum l1d_flush_type enabled_flush_types;
static void *l1d_flush_fallback_area;
static bool no_rfi_flush;
+static bool no_entry_flush;
+static bool no_uaccess_flush;
bool rfi_flush;
+bool entry_flush;
+bool uaccess_flush;
+DEFINE_STATIC_KEY_FALSE(uaccess_flush_key);
+EXPORT_SYMBOL(uaccess_flush_key);
static int __init handle_no_rfi_flush(char *p)
{
@@ -869,6 +875,22 @@ static int __init handle_no_rfi_flush(char *p)
}
early_param("no_rfi_flush", handle_no_rfi_flush);
+static int __init handle_no_entry_flush(char *p)
+{
+ pr_info("entry-flush: disabled on command line.");
+ no_entry_flush = true;
+ return 0;
+}
+early_param("no_entry_flush", handle_no_entry_flush);
+
+static int __init handle_no_uaccess_flush(char *p)
+{
+ pr_info("uaccess-flush: disabled on command line.");
+ no_uaccess_flush = true;
+ return 0;
+}
+early_param("no_uaccess_flush", handle_no_uaccess_flush);
+
/*
* The RFI flush is not KPTI, but because users will see doco that says to use
* nopti we hijack that option here to also disable the RFI flush.
@@ -900,6 +922,32 @@ void rfi_flush_enable(bool enable)
rfi_flush = enable;
}
+void entry_flush_enable(bool enable)
+{
+ if (enable) {
+ do_entry_flush_fixups(enabled_flush_types);
+ on_each_cpu(do_nothing, NULL, 1);
+ } else {
+ do_entry_flush_fixups(L1D_FLUSH_NONE);
+ }
+
+ entry_flush = enable;
+}
+
+void uaccess_flush_enable(bool enable)
+{
+ if (enable) {
+ do_uaccess_flush_fixups(enabled_flush_types);
+ static_branch_enable(&uaccess_flush_key);
+ on_each_cpu(do_nothing, NULL, 1);
+ } else {
+ static_branch_disable(&uaccess_flush_key);
+ do_uaccess_flush_fixups(L1D_FLUSH_NONE);
+ }
+
+ uaccess_flush = enable;
+}
+
static void __ref init_fallback_flush(void)
{
u64 l1d_size, limit;
@@ -958,10 +1006,28 @@ void setup_rfi_flush(enum l1d_flush_type types, bool enable)
enabled_flush_types = types;
- if (!no_rfi_flush && !cpu_mitigations_off())
+ if (!cpu_mitigations_off() && !no_rfi_flush)
rfi_flush_enable(enable);
}
+void setup_entry_flush(bool enable)
+{
+ if (cpu_mitigations_off())
+ return;
+
+ if (!no_entry_flush)
+ entry_flush_enable(enable);
+}
+
+void setup_uaccess_flush(bool enable)
+{
+ if (cpu_mitigations_off())
+ return;
+
+ if (!no_uaccess_flush)
+ uaccess_flush_enable(enable);
+}
+
#ifdef CONFIG_DEBUG_FS
static int rfi_flush_set(void *data, u64 val)
{
@@ -989,9 +1055,63 @@ static int rfi_flush_get(void *data, u64 *val)
DEFINE_SIMPLE_ATTRIBUTE(fops_rfi_flush, rfi_flush_get, rfi_flush_set, "%llu\n");
+static int entry_flush_set(void *data, u64 val)
+{
+ bool enable;
+
+ if (val == 1)
+ enable = true;
+ else if (val == 0)
+ enable = false;
+ else
+ return -EINVAL;
+
+ /* Only do anything if we're changing state */
+ if (enable != entry_flush)
+ entry_flush_enable(enable);
+
+ return 0;
+}
+
+static int entry_flush_get(void *data, u64 *val)
+{
+ *val = entry_flush ? 1 : 0;
+ return 0;
+}
+
+DEFINE_SIMPLE_ATTRIBUTE(fops_entry_flush, entry_flush_get, entry_flush_set, "%llu\n");
+
+static int uaccess_flush_set(void *data, u64 val)
+{
+ bool enable;
+
+ if (val == 1)
+ enable = true;
+ else if (val == 0)
+ enable = false;
+ else
+ return -EINVAL;
+
+ /* Only do anything if we're changing state */
+ if (enable != uaccess_flush)
+ uaccess_flush_enable(enable);
+
+ return 0;
+}
+
+static int uaccess_flush_get(void *data, u64 *val)
+{
+ *val = uaccess_flush ? 1 : 0;
+ return 0;
+}
+
+DEFINE_SIMPLE_ATTRIBUTE(fops_uaccess_flush, uaccess_flush_get, uaccess_flush_set, "%llu\n");
+
static __init int rfi_flush_debugfs_init(void)
{
debugfs_create_file("rfi_flush", 0600, powerpc_debugfs_root, NULL, &fops_rfi_flush);
+ debugfs_create_file("entry_flush", 0600, powerpc_debugfs_root, NULL, &fops_entry_flush);
+ debugfs_create_file("uaccess_flush", 0600, powerpc_debugfs_root, NULL, &fops_uaccess_flush);
return 0;
}
device_initcall(rfi_flush_debugfs_init);
diff --git a/arch/powerpc/kernel/smp.c b/arch/powerpc/kernel/smp.c
index ea6adbf6a221..4de63ec2e155 100644
--- a/arch/powerpc/kernel/smp.c
+++ b/arch/powerpc/kernel/smp.c
@@ -583,11 +583,43 @@ void crash_send_ipi(void (*crash_ipi_callback)(struct pt_regs *))
#endif
#ifdef CONFIG_NMI_IPI
+static void crash_stop_this_cpu(struct pt_regs *regs)
+#else
+static void crash_stop_this_cpu(void *dummy)
+#endif
+{
+ /*
+ * Just busy wait here and avoid marking CPU as offline to ensure
+ * register data is captured appropriately.
+ */
+ while (1)
+ cpu_relax();
+}
+
+void crash_smp_send_stop(void)
+{
+ static bool stopped = false;
+
+ if (stopped)
+ return;
+
+ stopped = true;
+
+#ifdef CONFIG_NMI_IPI
+ smp_send_nmi_ipi(NMI_IPI_ALL_OTHERS, crash_stop_this_cpu, 1000000);
+#else
+ smp_call_function(crash_stop_this_cpu, NULL, 0);
+#endif /* CONFIG_NMI_IPI */
+}
+
+#ifdef CONFIG_NMI_IPI
static void nmi_stop_this_cpu(struct pt_regs *regs)
{
/*
* IRQs are already hard disabled by the smp_handle_nmi_ipi.
*/
+ set_cpu_online(smp_processor_id(), false);
+
spin_begin();
while (1)
spin_cpu_relax();
@@ -603,6 +635,15 @@ void smp_send_stop(void)
static void stop_this_cpu(void *dummy)
{
hard_irq_disable();
+
+ /*
+ * Offlining CPUs in stop_this_cpu can result in scheduler warnings,
+ * (see commit de6e5d38417e), but printk_safe_flush_on_panic() wants
+ * to know other CPUs are offline before it breaks locks to flush
+ * printk buffers, in case we panic()ed while holding the lock.
+ */
+ set_cpu_online(smp_processor_id(), false);
+
spin_begin();
while (1)
spin_cpu_relax();
@@ -1254,6 +1295,9 @@ void start_secondary(void *unused)
vdso_getcpu_init();
#endif
+ set_numa_node(numa_cpu_lookup_table[cpu]);
+ set_numa_mem(local_memory_node(numa_cpu_lookup_table[cpu]));
+
/* Update topology CPU masks */
add_cpu_to_masks(cpu);
@@ -1266,9 +1310,6 @@ void start_secondary(void *unused)
if (!cpumask_equal(cpu_l2_cache_mask(cpu), sibling_mask(cpu)))
shared_caches = true;
- set_numa_node(numa_cpu_lookup_table[cpu]);
- set_numa_mem(local_memory_node(numa_cpu_lookup_table[cpu]));
-
smp_wmb();
notify_cpu_starting(cpu);
set_cpu_online(cpu, true);
@@ -1285,10 +1326,12 @@ void start_secondary(void *unused)
BUG();
}
+#ifdef CONFIG_PROFILING
int setup_profiling_timer(unsigned int multiplier)
{
return 0;
}
+#endif
#ifdef CONFIG_SCHED_SMT
/* cpumask of CPUs with asymetric SMT dependancy */
diff --git a/arch/powerpc/kernel/stacktrace.c b/arch/powerpc/kernel/stacktrace.c
index e2a46cfed5fd..890f95151fb4 100644
--- a/arch/powerpc/kernel/stacktrace.c
+++ b/arch/powerpc/kernel/stacktrace.c
@@ -8,6 +8,7 @@
* Copyright 2018 Nick Piggin, Michael Ellerman, IBM Corp.
*/
+#include <linux/delay.h>
#include <linux/export.h>
#include <linux/kallsyms.h>
#include <linux/module.h>
@@ -19,6 +20,7 @@
#include <asm/ptrace.h>
#include <asm/processor.h>
#include <linux/ftrace.h>
+#include <linux/delay.h>
#include <asm/kprobes.h>
#include <asm/paca.h>
@@ -230,17 +232,31 @@ static void handle_backtrace_ipi(struct pt_regs *regs)
static void raise_backtrace_ipi(cpumask_t *mask)
{
+ struct paca_struct *p;
unsigned int cpu;
+ u64 delay_us;
for_each_cpu(cpu, mask) {
- if (cpu == smp_processor_id())
+ if (cpu == smp_processor_id()) {
handle_backtrace_ipi(NULL);
- else
- smp_send_safe_nmi_ipi(cpu, handle_backtrace_ipi, 5 * USEC_PER_SEC);
- }
+ continue;
+ }
- for_each_cpu(cpu, mask) {
- struct paca_struct *p = paca_ptrs[cpu];
+ delay_us = 5 * USEC_PER_SEC;
+
+ if (smp_send_safe_nmi_ipi(cpu, handle_backtrace_ipi, delay_us)) {
+ // Now wait up to 5s for the other CPU to do its backtrace
+ while (cpumask_test_cpu(cpu, mask) && delay_us) {
+ udelay(1);
+ delay_us--;
+ }
+
+ // Other CPU cleared itself from the mask
+ if (delay_us)
+ continue;
+ }
+
+ p = paca_ptrs[cpu];
cpumask_clear_cpu(cpu, mask);
diff --git a/arch/powerpc/kernel/swsusp_32.S b/arch/powerpc/kernel/swsusp_32.S
index cbdf86228eaa..54c44aea338c 100644
--- a/arch/powerpc/kernel/swsusp_32.S
+++ b/arch/powerpc/kernel/swsusp_32.S
@@ -181,7 +181,7 @@ _GLOBAL(swsusp_arch_resume)
#ifdef CONFIG_ALTIVEC
/* Stop pending alitvec streams and memory accesses */
BEGIN_FTR_SECTION
- DSSALL
+ PPC_DSSALL
END_FTR_SECTION_IFSET(CPU_FTR_ALTIVEC)
#endif
sync
diff --git a/arch/powerpc/kernel/swsusp_asm64.S b/arch/powerpc/kernel/swsusp_asm64.S
index 6d3189830dd3..068a268a8013 100644
--- a/arch/powerpc/kernel/swsusp_asm64.S
+++ b/arch/powerpc/kernel/swsusp_asm64.S
@@ -142,7 +142,7 @@ END_FW_FTR_SECTION_IFCLR(FW_FEATURE_LPAR)
_GLOBAL(swsusp_arch_resume)
/* Stop pending alitvec streams and memory accesses */
BEGIN_FTR_SECTION
- DSSALL
+ PPC_DSSALL
END_FTR_SECTION_IFSET(CPU_FTR_ALTIVEC)
sync
diff --git a/arch/powerpc/kernel/sysfs.c b/arch/powerpc/kernel/sysfs.c
index 80a676da11cb..f08ca604a394 100644
--- a/arch/powerpc/kernel/sysfs.c
+++ b/arch/powerpc/kernel/sysfs.c
@@ -31,29 +31,27 @@
static DEFINE_PER_CPU(struct cpu, cpu_devices);
-/*
- * SMT snooze delay stuff, 64-bit only for now
- */
-
#ifdef CONFIG_PPC64
-/* Time in microseconds we delay before sleeping in the idle loop */
-static DEFINE_PER_CPU(long, smt_snooze_delay) = { 100 };
+/*
+ * Snooze delay has not been hooked up since 3fa8cad82b94 ("powerpc/pseries/cpuidle:
+ * smt-snooze-delay cleanup.") and has been broken even longer. As was foretold in
+ * 2014:
+ *
+ * "ppc64_util currently utilises it. Once we fix ppc64_util, propose to clean
+ * up the kernel code."
+ *
+ * powerpc-utils stopped using it as of 1.3.8. At some point in the future this
+ * code should be removed.
+ */
static ssize_t store_smt_snooze_delay(struct device *dev,
struct device_attribute *attr,
const char *buf,
size_t count)
{
- struct cpu *cpu = container_of(dev, struct cpu, dev);
- ssize_t ret;
- long snooze;
-
- ret = sscanf(buf, "%ld", &snooze);
- if (ret != 1)
- return -EINVAL;
-
- per_cpu(smt_snooze_delay, cpu->dev.id) = snooze;
+ pr_warn_once("%s (%d) stored to unsupported smt_snooze_delay, which has no effect.\n",
+ current->comm, current->pid);
return count;
}
@@ -61,9 +59,9 @@ static ssize_t show_smt_snooze_delay(struct device *dev,
struct device_attribute *attr,
char *buf)
{
- struct cpu *cpu = container_of(dev, struct cpu, dev);
-
- return sprintf(buf, "%ld\n", per_cpu(smt_snooze_delay, cpu->dev.id));
+ pr_warn_once("%s (%d) read from unsupported smt_snooze_delay\n",
+ current->comm, current->pid);
+ return sprintf(buf, "100\n");
}
static DEVICE_ATTR(smt_snooze_delay, 0644, show_smt_snooze_delay,
@@ -71,16 +69,10 @@ static DEVICE_ATTR(smt_snooze_delay, 0644, show_smt_snooze_delay,
static int __init setup_smt_snooze_delay(char *str)
{
- unsigned int cpu;
- long snooze;
-
if (!cpu_has_feature(CPU_FTR_SMT))
return 1;
- snooze = simple_strtol(str, NULL, 10);
- for_each_possible_cpu(cpu)
- per_cpu(smt_snooze_delay, cpu) = snooze;
-
+ pr_warn("smt-snooze-delay command line option has no effect\n");
return 1;
}
__setup("smt-snooze-delay=", setup_smt_snooze_delay);
diff --git a/arch/powerpc/kernel/tau_6xx.c b/arch/powerpc/kernel/tau_6xx.c
index e2ab8a111b69..0b4694b8d248 100644
--- a/arch/powerpc/kernel/tau_6xx.c
+++ b/arch/powerpc/kernel/tau_6xx.c
@@ -13,13 +13,14 @@
*/
#include <linux/errno.h>
-#include <linux/jiffies.h>
#include <linux/kernel.h>
#include <linux/param.h>
#include <linux/string.h>
#include <linux/mm.h>
#include <linux/interrupt.h>
#include <linux/init.h>
+#include <linux/delay.h>
+#include <linux/workqueue.h>
#include <asm/io.h>
#include <asm/reg.h>
@@ -39,9 +40,7 @@ static struct tau_temp
unsigned char grew;
} tau[NR_CPUS];
-struct timer_list tau_timer;
-
-#undef DEBUG
+static bool tau_int_enable;
/* TODO: put these in a /proc interface, with some sanity checks, and maybe
* dynamic adjustment to minimize # of interrupts */
@@ -50,72 +49,49 @@ struct timer_list tau_timer;
#define step_size 2 /* step size when temp goes out of range */
#define window_expand 1 /* expand the window by this much */
/* configurable values for shrinking the window */
-#define shrink_timer 2*HZ /* period between shrinking the window */
+#define shrink_timer 2000 /* period between shrinking the window */
#define min_window 2 /* minimum window size, degrees C */
static void set_thresholds(unsigned long cpu)
{
-#ifdef CONFIG_TAU_INT
- /*
- * setup THRM1,
- * threshold, valid bit, enable interrupts, interrupt when below threshold
- */
- mtspr(SPRN_THRM1, THRM1_THRES(tau[cpu].low) | THRM1_V | THRM1_TIE | THRM1_TID);
+ u32 maybe_tie = tau_int_enable ? THRM1_TIE : 0;
- /* setup THRM2,
- * threshold, valid bit, enable interrupts, interrupt when above threshold
- */
- mtspr (SPRN_THRM2, THRM1_THRES(tau[cpu].high) | THRM1_V | THRM1_TIE);
-#else
- /* same thing but don't enable interrupts */
- mtspr(SPRN_THRM1, THRM1_THRES(tau[cpu].low) | THRM1_V | THRM1_TID);
- mtspr(SPRN_THRM2, THRM1_THRES(tau[cpu].high) | THRM1_V);
-#endif
+ /* setup THRM1, threshold, valid bit, interrupt when below threshold */
+ mtspr(SPRN_THRM1, THRM1_THRES(tau[cpu].low) | THRM1_V | maybe_tie | THRM1_TID);
+
+ /* setup THRM2, threshold, valid bit, interrupt when above threshold */
+ mtspr(SPRN_THRM2, THRM1_THRES(tau[cpu].high) | THRM1_V | maybe_tie);
}
static void TAUupdate(int cpu)
{
- unsigned thrm;
-
-#ifdef DEBUG
- printk("TAUupdate ");
-#endif
+ u32 thrm;
+ u32 bits = THRM1_TIV | THRM1_TIN | THRM1_V;
/* if both thresholds are crossed, the step_sizes cancel out
* and the window winds up getting expanded twice. */
- if((thrm = mfspr(SPRN_THRM1)) & THRM1_TIV){ /* is valid? */
- if(thrm & THRM1_TIN){ /* crossed low threshold */
- if (tau[cpu].low >= step_size){
- tau[cpu].low -= step_size;
- tau[cpu].high -= (step_size - window_expand);
- }
- tau[cpu].grew = 1;
-#ifdef DEBUG
- printk("low threshold crossed ");
-#endif
+ thrm = mfspr(SPRN_THRM1);
+ if ((thrm & bits) == bits) {
+ mtspr(SPRN_THRM1, 0);
+
+ if (tau[cpu].low >= step_size) {
+ tau[cpu].low -= step_size;
+ tau[cpu].high -= (step_size - window_expand);
}
+ tau[cpu].grew = 1;
+ pr_debug("%s: low threshold crossed\n", __func__);
}
- if((thrm = mfspr(SPRN_THRM2)) & THRM1_TIV){ /* is valid? */
- if(thrm & THRM1_TIN){ /* crossed high threshold */
- if (tau[cpu].high <= 127-step_size){
- tau[cpu].low += (step_size - window_expand);
- tau[cpu].high += step_size;
- }
- tau[cpu].grew = 1;
-#ifdef DEBUG
- printk("high threshold crossed ");
-#endif
+ thrm = mfspr(SPRN_THRM2);
+ if ((thrm & bits) == bits) {
+ mtspr(SPRN_THRM2, 0);
+
+ if (tau[cpu].high <= 127 - step_size) {
+ tau[cpu].low += (step_size - window_expand);
+ tau[cpu].high += step_size;
}
+ tau[cpu].grew = 1;
+ pr_debug("%s: high threshold crossed\n", __func__);
}
-
-#ifdef DEBUG
- printk("grew = %d\n", tau[cpu].grew);
-#endif
-
-#ifndef CONFIG_TAU_INT /* tau_timeout will do this if not using interrupts */
- set_thresholds(cpu);
-#endif
-
}
#ifdef CONFIG_TAU_INT
@@ -140,17 +116,16 @@ void TAUException(struct pt_regs * regs)
static void tau_timeout(void * info)
{
int cpu;
- unsigned long flags;
int size;
int shrink;
- /* disabling interrupts *should* be okay */
- local_irq_save(flags);
cpu = smp_processor_id();
-#ifndef CONFIG_TAU_INT
- TAUupdate(cpu);
-#endif
+ if (!tau_int_enable)
+ TAUupdate(cpu);
+
+ /* Stop thermal sensor comparisons and interrupts */
+ mtspr(SPRN_THRM3, 0);
size = tau[cpu].high - tau[cpu].low;
if (size > min_window && ! tau[cpu].grew) {
@@ -173,32 +148,26 @@ static void tau_timeout(void * info)
set_thresholds(cpu);
- /*
- * Do the enable every time, since otherwise a bunch of (relatively)
- * complex sleep code needs to be added. One mtspr every time
- * tau_timeout is called is probably not a big deal.
- *
- * Enable thermal sensor and set up sample interval timer
- * need 20 us to do the compare.. until a nice 'cpu_speed' function
- * call is implemented, just assume a 500 mhz clock. It doesn't really
- * matter if we take too long for a compare since it's all interrupt
- * driven anyway.
- *
- * use a extra long time.. (60 us @ 500 mhz)
+ /* Restart thermal sensor comparisons and interrupts.
+ * The "PowerPC 740 and PowerPC 750 Microprocessor Datasheet"
+ * recommends that "the maximum value be set in THRM3 under all
+ * conditions."
*/
- mtspr(SPRN_THRM3, THRM3_SITV(500*60) | THRM3_E);
-
- local_irq_restore(flags);
+ mtspr(SPRN_THRM3, THRM3_SITV(0x1fff) | THRM3_E);
}
-static void tau_timeout_smp(struct timer_list *unused)
-{
+static struct workqueue_struct *tau_workq;
- /* schedule ourselves to be run again */
- mod_timer(&tau_timer, jiffies + shrink_timer) ;
+static void tau_work_func(struct work_struct *work)
+{
+ msleep(shrink_timer);
on_each_cpu(tau_timeout, NULL, 0);
+ /* schedule ourselves to be run again */
+ queue_work(tau_workq, work);
}
+DECLARE_WORK(tau_work, tau_work_func);
+
/*
* setup the TAU
*
@@ -231,21 +200,19 @@ static int __init TAU_init(void)
return 1;
}
+ tau_int_enable = IS_ENABLED(CONFIG_TAU_INT) &&
+ !strcmp(cur_cpu_spec->platform, "ppc750");
- /* first, set up the window shrinking timer */
- timer_setup(&tau_timer, tau_timeout_smp, 0);
- tau_timer.expires = jiffies + shrink_timer;
- add_timer(&tau_timer);
+ tau_workq = alloc_workqueue("tau", WQ_UNBOUND, 1, 0);
+ if (!tau_workq)
+ return -ENOMEM;
on_each_cpu(TAU_init_smp, NULL, 0);
- printk("Thermal assist unit ");
-#ifdef CONFIG_TAU_INT
- printk("using interrupts, ");
-#else
- printk("using timers, ");
-#endif
- printk("shrink_timer: %d jiffies\n", shrink_timer);
+ queue_work(tau_workq, &tau_work);
+
+ pr_info("Thermal assist unit using %s, shrink_timer: %d ms\n",
+ tau_int_enable ? "interrupts" : "workqueue", shrink_timer);
tau_initialized = 1;
return 0;
diff --git a/arch/powerpc/kernel/trace/ftrace.c b/arch/powerpc/kernel/trace/ftrace.c
index 7ea0ca044b65..d816e714f2f4 100644
--- a/arch/powerpc/kernel/trace/ftrace.c
+++ b/arch/powerpc/kernel/trace/ftrace.c
@@ -328,9 +328,7 @@ static int setup_mcount_compiler_tramp(unsigned long tramp)
/* Is this a known long jump tramp? */
for (i = 0; i < NUM_FTRACE_TRAMPS; i++)
- if (!ftrace_tramps[i])
- break;
- else if (ftrace_tramps[i] == tramp)
+ if (ftrace_tramps[i] == tramp)
return 0;
/* Is this a known plt tramp? */
@@ -868,6 +866,17 @@ void arch_ftrace_update_code(int command)
extern unsigned int ftrace_tramp_text[], ftrace_tramp_init[];
+void ftrace_free_init_tramp(void)
+{
+ int i;
+
+ for (i = 0; i < NUM_FTRACE_TRAMPS && ftrace_tramps[i]; i++)
+ if (ftrace_tramps[i] == (unsigned long)ftrace_tramp_init) {
+ ftrace_tramps[i] = 0;
+ return;
+ }
+}
+
int __init ftrace_dyn_arch_init(void)
{
int i;
diff --git a/arch/powerpc/kernel/traps.c b/arch/powerpc/kernel/traps.c
index 9432fc6af28a..ecfa460f66d1 100644
--- a/arch/powerpc/kernel/traps.c
+++ b/arch/powerpc/kernel/traps.c
@@ -513,8 +513,11 @@ out:
die("Unrecoverable nested System Reset", regs, SIGABRT);
#endif
/* Must die if the interrupt is not recoverable */
- if (!(regs->msr & MSR_RI))
+ if (!(regs->msr & MSR_RI)) {
+ /* For the reason explained in die_mce, nmi_exit before die */
+ nmi_exit();
die("Unrecoverable System Reset", regs, SIGABRT);
+ }
if (saved_hsrrs) {
mtspr(SPRN_HSRR0, hsrr0);
@@ -877,7 +880,7 @@ static void p9_hmi_special_emu(struct pt_regs *regs)
{
unsigned int ra, rb, t, i, sel, instr, rc;
const void __user *addr;
- u8 vbuf[16], *vdst;
+ u8 vbuf[16] __aligned(16), *vdst;
unsigned long ea, msr, msr_mask;
bool swap;
diff --git a/arch/powerpc/kernel/vmlinux.lds.S b/arch/powerpc/kernel/vmlinux.lds.S
index 060a1acd7c6d..3ea360cad337 100644
--- a/arch/powerpc/kernel/vmlinux.lds.S
+++ b/arch/powerpc/kernel/vmlinux.lds.S
@@ -98,10 +98,11 @@ SECTIONS
ALIGN_FUNCTION();
#endif
/* careful! __ftr_alt_* sections need to be close to .text */
- *(.text.hot TEXT_MAIN .text.fixup .text.unlikely .fixup __ftr_alt_* .ref.text);
+ *(.text.hot .text.hot.* TEXT_MAIN .text.fixup .text.unlikely .text.unlikely.* .fixup __ftr_alt_* .ref.text);
#ifdef CONFIG_PPC64
*(.tramp.ftrace.text);
#endif
+ NOINSTR_TEXT
SCHED_TEXT
CPUIDLE_TEXT
LOCK_TEXT
@@ -144,6 +145,20 @@ SECTIONS
}
. = ALIGN(8);
+ __uaccess_flush_fixup : AT(ADDR(__uaccess_flush_fixup) - LOAD_OFFSET) {
+ __start___uaccess_flush_fixup = .;
+ *(__uaccess_flush_fixup)
+ __stop___uaccess_flush_fixup = .;
+ }
+
+ . = ALIGN(8);
+ __entry_flush_fixup : AT(ADDR(__entry_flush_fixup) - LOAD_OFFSET) {
+ __start___entry_flush_fixup = .;
+ *(__entry_flush_fixup)
+ __stop___entry_flush_fixup = .;
+ }
+
+ . = ALIGN(8);
__stf_exit_barrier_fixup : AT(ADDR(__stf_exit_barrier_fixup) - LOAD_OFFSET) {
__start___stf_exit_barrier_fixup = .;
*(__stf_exit_barrier_fixup)
@@ -196,6 +211,12 @@ SECTIONS
.init.text : AT(ADDR(.init.text) - LOAD_OFFSET) {
_sinittext = .;
INIT_TEXT
+
+ /*
+ *.init.text might be RO so we must ensure this section ends on
+ * a page boundary.
+ */
+ . = ALIGN(PAGE_SIZE);
_einittext = .;
#ifdef CONFIG_PPC64
*(.tramp.ftrace.init);
@@ -209,21 +230,9 @@ SECTIONS
EXIT_TEXT
}
- .init.data : AT(ADDR(.init.data) - LOAD_OFFSET) {
- INIT_DATA
- }
-
- .init.setup : AT(ADDR(.init.setup) - LOAD_OFFSET) {
- INIT_SETUP(16)
- }
-
- .initcall.init : AT(ADDR(.initcall.init) - LOAD_OFFSET) {
- INIT_CALLS
- }
+ . = ALIGN(PAGE_SIZE);
- .con_initcall.init : AT(ADDR(.con_initcall.init) - LOAD_OFFSET) {
- CON_INITCALL
- }
+ INIT_DATA_SECTION(16)
. = ALIGN(8);
__ftr_fixup : AT(ADDR(__ftr_fixup) - LOAD_OFFSET) {
@@ -251,9 +260,6 @@ SECTIONS
__stop___fw_ftr_fixup = .;
}
#endif
- .init.ramfs : AT(ADDR(.init.ramfs) - LOAD_OFFSET) {
- INIT_RAM_FS
- }
PERCPU_SECTION(L1_CACHE_BYTES)
diff --git a/arch/powerpc/kernel/watchdog.c b/arch/powerpc/kernel/watchdog.c
index af3c15a1d41e..75b2a6c4db5a 100644
--- a/arch/powerpc/kernel/watchdog.c
+++ b/arch/powerpc/kernel/watchdog.c
@@ -132,6 +132,10 @@ static void set_cpumask_stuck(const struct cpumask *cpumask, u64 tb)
{
cpumask_or(&wd_smp_cpus_stuck, &wd_smp_cpus_stuck, cpumask);
cpumask_andnot(&wd_smp_cpus_pending, &wd_smp_cpus_pending, cpumask);
+ /*
+ * See wd_smp_clear_cpu_pending()
+ */
+ smp_mb();
if (cpumask_empty(&wd_smp_cpus_pending)) {
wd_smp_last_reset_tb = tb;
cpumask_andnot(&wd_smp_cpus_pending,
@@ -217,13 +221,44 @@ static void wd_smp_clear_cpu_pending(int cpu, u64 tb)
cpumask_clear_cpu(cpu, &wd_smp_cpus_stuck);
wd_smp_unlock(&flags);
+ } else {
+ /*
+ * The last CPU to clear pending should have reset the
+ * watchdog so we generally should not find it empty
+ * here if our CPU was clear. However it could happen
+ * due to a rare race with another CPU taking the
+ * last CPU out of the mask concurrently.
+ *
+ * We can't add a warning for it. But just in case
+ * there is a problem with the watchdog that is causing
+ * the mask to not be reset, try to kick it along here.
+ */
+ if (unlikely(cpumask_empty(&wd_smp_cpus_pending)))
+ goto none_pending;
}
return;
}
+
cpumask_clear_cpu(cpu, &wd_smp_cpus_pending);
+
+ /*
+ * Order the store to clear pending with the load(s) to check all
+ * words in the pending mask to check they are all empty. This orders
+ * with the same barrier on another CPU. This prevents two CPUs
+ * clearing the last 2 pending bits, but neither seeing the other's
+ * store when checking if the mask is empty, and missing an empty
+ * mask, which ends with a false positive.
+ */
+ smp_mb();
if (cpumask_empty(&wd_smp_cpus_pending)) {
unsigned long flags;
+none_pending:
+ /*
+ * Double check under lock because more than one CPU could see
+ * a clear mask with the lockless check after clearing their
+ * pending bits.
+ */
wd_smp_lock(&flags);
if (cpumask_empty(&wd_smp_cpus_pending)) {
wd_smp_last_reset_tb = tb;
@@ -314,8 +349,12 @@ void arch_touch_nmi_watchdog(void)
{
unsigned long ticks = tb_ticks_per_usec * wd_timer_period_ms * 1000;
int cpu = smp_processor_id();
- u64 tb = get_tb();
+ u64 tb;
+ if (!cpumask_test_cpu(cpu, &watchdog_cpumask))
+ return;
+
+ tb = get_tb();
if (tb - per_cpu(wd_timer_tb, cpu) >= ticks) {
per_cpu(wd_timer_tb, cpu) = tb;
wd_smp_clear_cpu_pending(cpu, tb);