aboutsummaryrefslogtreecommitdiffstats
path: root/arch/powerpc/kernel
diff options
context:
space:
mode:
Diffstat (limited to 'arch/powerpc/kernel')
-rw-r--r--arch/powerpc/kernel/Makefile4
-rw-r--r--arch/powerpc/kernel/align.c4
-rw-r--r--arch/powerpc/kernel/asm-offsets.c5
-rw-r--r--arch/powerpc/kernel/btext.c16
-rw-r--r--arch/powerpc/kernel/cacheinfo.c5
-rw-r--r--arch/powerpc/kernel/cpu_setup_power.c12
-rw-r--r--arch/powerpc/kernel/dbell.c3
-rw-r--r--arch/powerpc/kernel/dt_cpu_ftrs.c24
-rw-r--r--arch/powerpc/kernel/eeh_cache.c2
-rw-r--r--arch/powerpc/kernel/eeh_driver.c162
-rw-r--r--arch/powerpc/kernel/entry_32.S54
-rw-r--r--arch/powerpc/kernel/entry_64.S4
-rw-r--r--arch/powerpc/kernel/epapr_paravirt.c2
-rw-r--r--arch/powerpc/kernel/exceptions-64e.S14
-rw-r--r--arch/powerpc/kernel/exceptions-64s.S174
-rw-r--r--arch/powerpc/kernel/fadump.c26
-rw-r--r--arch/powerpc/kernel/fpu.S5
-rw-r--r--arch/powerpc/kernel/head_32.h15
-rw-r--r--arch/powerpc/kernel/head_40x.S17
-rw-r--r--arch/powerpc/kernel/head_44x.S26
-rw-r--r--arch/powerpc/kernel/head_64.S20
-rw-r--r--arch/powerpc/kernel/head_book3s_32.S4
-rw-r--r--arch/powerpc/kernel/head_booke.h3
-rw-r--r--arch/powerpc/kernel/head_fsl_booke.S13
-rw-r--r--arch/powerpc/kernel/hw_breakpoint.c4
-rw-r--r--arch/powerpc/kernel/hw_breakpoint_constraints.c4
-rw-r--r--arch/powerpc/kernel/idle.c2
-rw-r--r--arch/powerpc/kernel/idle_6xx.S2
-rw-r--r--arch/powerpc/kernel/interrupt.c18
-rw-r--r--arch/powerpc/kernel/interrupt_64.S48
-rw-r--r--arch/powerpc/kernel/irq.c5
-rw-r--r--arch/powerpc/kernel/kgdb.c4
-rw-r--r--arch/powerpc/kernel/kprobes.c4
-rw-r--r--arch/powerpc/kernel/l2cr_6xx.S6
-rw-r--r--arch/powerpc/kernel/mce.c2
-rw-r--r--arch/powerpc/kernel/mce_power.c18
-rw-r--r--arch/powerpc/kernel/module.c11
-rw-r--r--arch/powerpc/kernel/module_32.c33
-rw-r--r--arch/powerpc/kernel/module_64.c42
-rw-r--r--arch/powerpc/kernel/nvram_64.c6
-rw-r--r--arch/powerpc/kernel/optprobes.c12
-rw-r--r--arch/powerpc/kernel/optprobes_head.S4
-rw-r--r--arch/powerpc/kernel/paca.c18
-rw-r--r--arch/powerpc/kernel/pci-common.c2
-rw-r--r--arch/powerpc/kernel/pci_32.c4
-rw-r--r--arch/powerpc/kernel/proc_powerpc.c4
-rw-r--r--arch/powerpc/kernel/process.c58
-rw-r--r--arch/powerpc/kernel/prom.c33
-rw-r--r--arch/powerpc/kernel/prom_init.c14
-rw-r--r--arch/powerpc/kernel/ptrace/ptrace.c3
-rw-r--r--arch/powerpc/kernel/rtas.c104
-rw-r--r--arch/powerpc/kernel/rtasd.c6
-rw-r--r--arch/powerpc/kernel/security.c4
-rw-r--r--arch/powerpc/kernel/setup-common.c2
-rw-r--r--arch/powerpc/kernel/setup.h2
-rw-r--r--arch/powerpc/kernel/setup_32.c4
-rw-r--r--arch/powerpc/kernel/setup_64.c120
-rw-r--r--arch/powerpc/kernel/signal_32.c14
-rw-r--r--arch/powerpc/kernel/smp.c47
-rw-r--r--arch/powerpc/kernel/swsusp_32.S2
-rw-r--r--arch/powerpc/kernel/swsusp_asm64.S2
-rw-r--r--arch/powerpc/kernel/syscalls/syscall.tbl2
-rw-r--r--arch/powerpc/kernel/sysfs.c10
-rw-r--r--arch/powerpc/kernel/time.c90
-rw-r--r--arch/powerpc/kernel/tm.S15
-rw-r--r--arch/powerpc/kernel/trace/ftrace.c107
-rw-r--r--arch/powerpc/kernel/trace/ftrace_32.S118
-rw-r--r--arch/powerpc/kernel/trace/ftrace_64_mprofile.S15
-rw-r--r--arch/powerpc/kernel/traps.c8
-rw-r--r--arch/powerpc/kernel/udbg_16550.c10
-rw-r--r--arch/powerpc/kernel/vecemu.c2
-rw-r--r--arch/powerpc/kernel/vector.S10
-rw-r--r--arch/powerpc/kernel/vmlinux.lds.S16
-rw-r--r--arch/powerpc/kernel/watchdog.c223
74 files changed, 1210 insertions, 698 deletions
diff --git a/arch/powerpc/kernel/Makefile b/arch/powerpc/kernel/Makefile
index 5fa68c2ef1f8..4d7829399570 100644
--- a/arch/powerpc/kernel/Makefile
+++ b/arch/powerpc/kernel/Makefile
@@ -11,6 +11,7 @@ CFLAGS_prom_init.o += -fPIC
CFLAGS_btext.o += -fPIC
endif
+CFLAGS_early_32.o += $(DISABLE_LATENT_ENTROPY_PLUGIN)
CFLAGS_cputable.o += $(DISABLE_LATENT_ENTROPY_PLUGIN)
CFLAGS_prom_init.o += $(DISABLE_LATENT_ENTROPY_PLUGIN)
CFLAGS_btext.o += $(DISABLE_LATENT_ENTROPY_PLUGIN)
@@ -47,7 +48,7 @@ obj-y := cputable.o syscalls.o \
udbg.o misc.o io.o misc_$(BITS).o \
of_platform.o prom_parse.o firmware.o \
hw_breakpoint_constraints.o interrupt.o \
- kdebugfs.o
+ kdebugfs.o stacktrace.o
obj-y += ptrace/
obj-$(CONFIG_PPC64) += setup_64.o \
paca.o nvram_64.o note.o
@@ -116,7 +117,6 @@ obj-$(CONFIG_OPTPROBES) += optprobes.o optprobes_head.o
obj-$(CONFIG_KPROBES_ON_FTRACE) += kprobes-ftrace.o
obj-$(CONFIG_UPROBES) += uprobes.o
obj-$(CONFIG_PPC_UDBG_16550) += legacy_serial.o udbg_16550.o
-obj-$(CONFIG_STACKTRACE) += stacktrace.o
obj-$(CONFIG_SWIOTLB) += dma-swiotlb.o
obj-$(CONFIG_ARCH_HAS_DMA_SET_MASK) += dma-mask.o
diff --git a/arch/powerpc/kernel/align.c b/arch/powerpc/kernel/align.c
index bf96b954a4eb..3e37ece06739 100644
--- a/arch/powerpc/kernel/align.c
+++ b/arch/powerpc/kernel/align.c
@@ -105,7 +105,7 @@ static struct aligninfo spe_aligninfo[32] = {
* so we don't need the address swizzling.
*/
static int emulate_spe(struct pt_regs *regs, unsigned int reg,
- struct ppc_inst ppc_instr)
+ ppc_inst_t ppc_instr)
{
union {
u64 ll;
@@ -300,7 +300,7 @@ Efault_write:
int fix_alignment(struct pt_regs *regs)
{
- struct ppc_inst instr;
+ ppc_inst_t instr;
struct instruction_op op;
int r, type;
diff --git a/arch/powerpc/kernel/asm-offsets.c b/arch/powerpc/kernel/asm-offsets.c
index cc05522f50bf..7582f3e3a330 100644
--- a/arch/powerpc/kernel/asm-offsets.c
+++ b/arch/powerpc/kernel/asm-offsets.c
@@ -54,7 +54,7 @@
#endif
#ifdef CONFIG_PPC32
-#if defined(CONFIG_BOOKE) || defined(CONFIG_40x)
+#ifdef CONFIG_BOOKE_OR_40x
#include "head_booke.h"
#endif
#endif
@@ -139,6 +139,7 @@ int main(void)
OFFSET(THR11, thread_struct, r11);
OFFSET(THLR, thread_struct, lr);
OFFSET(THCTR, thread_struct, ctr);
+ OFFSET(THSR0, thread_struct, sr0);
#endif
#ifdef CONFIG_SPE
OFFSET(THREAD_EVR0, thread_struct, evr[0]);
@@ -218,10 +219,12 @@ int main(void)
OFFSET(PACA_EXGEN, paca_struct, exgen);
OFFSET(PACA_EXMC, paca_struct, exmc);
OFFSET(PACA_EXNMI, paca_struct, exnmi);
+#ifdef CONFIG_PPC_64S_HASH_MMU
OFFSET(PACA_SLBSHADOWPTR, paca_struct, slb_shadow_ptr);
OFFSET(SLBSHADOW_STACKVSID, slb_shadow, save_area[SLB_NUM_BOLTED - 1].vsid);
OFFSET(SLBSHADOW_STACKESID, slb_shadow, save_area[SLB_NUM_BOLTED - 1].esid);
OFFSET(SLBSHADOW_SAVEAREA, slb_shadow, save_area);
+#endif
OFFSET(LPPACA_PMCINUSE, lppaca, pmcregs_in_use);
#ifdef CONFIG_KVM_BOOK3S_HV_POSSIBLE
OFFSET(PACA_PMCINUSE, paca_struct, pmcregs_in_use);
diff --git a/arch/powerpc/kernel/btext.c b/arch/powerpc/kernel/btext.c
index 803c2a45b22a..9d9d56b574cc 100644
--- a/arch/powerpc/kernel/btext.c
+++ b/arch/powerpc/kernel/btext.c
@@ -161,7 +161,7 @@ void btext_map(void)
boot_text_mapped = 1;
}
-static int btext_initialize(struct device_node *np)
+static int __init btext_initialize(struct device_node *np)
{
unsigned int width, height, depth, pitch;
unsigned long address = 0;
@@ -241,8 +241,10 @@ int __init btext_find_display(int allow_nonstdout)
rc = btext_initialize(np);
printk("result: %d\n", rc);
}
- if (rc == 0)
+ if (rc == 0) {
+ of_node_put(np);
break;
+ }
}
return rc;
}
@@ -290,7 +292,7 @@ void btext_update_display(unsigned long phys, int width, int height,
}
EXPORT_SYMBOL(btext_update_display);
-void btext_clearscreen(void)
+void __init btext_clearscreen(void)
{
unsigned int *base = (unsigned int *)calc_base(0, 0);
unsigned long width = ((dispDeviceRect[2] - dispDeviceRect[0]) *
@@ -308,7 +310,7 @@ void btext_clearscreen(void)
rmci_maybe_off();
}
-void btext_flushscreen(void)
+void __init btext_flushscreen(void)
{
unsigned int *base = (unsigned int *)calc_base(0, 0);
unsigned long width = ((dispDeviceRect[2] - dispDeviceRect[0]) *
@@ -327,7 +329,7 @@ void btext_flushscreen(void)
__asm__ __volatile__ ("sync" ::: "memory");
}
-void btext_flushline(void)
+void __init btext_flushline(void)
{
unsigned int *base = (unsigned int *)calc_base(0, g_loc_Y << 4);
unsigned long width = ((dispDeviceRect[2] - dispDeviceRect[0]) *
@@ -542,7 +544,7 @@ void btext_drawstring(const char *c)
btext_drawchar(*c++);
}
-void btext_drawtext(const char *c, unsigned int len)
+void __init btext_drawtext(const char *c, unsigned int len)
{
if (!boot_text_mapped)
return;
@@ -550,7 +552,7 @@ void btext_drawtext(const char *c, unsigned int len)
btext_drawchar(*c++);
}
-void btext_drawhex(unsigned long v)
+void __init btext_drawhex(unsigned long v)
{
if (!boot_text_mapped)
return;
diff --git a/arch/powerpc/kernel/cacheinfo.c b/arch/powerpc/kernel/cacheinfo.c
index cf1be75b7833..00b0992be3e7 100644
--- a/arch/powerpc/kernel/cacheinfo.c
+++ b/arch/powerpc/kernel/cacheinfo.c
@@ -710,7 +710,7 @@ static struct kobj_attribute cache_shared_cpu_list_attr =
__ATTR(shared_cpu_list, 0444, shared_cpu_list_show, NULL);
/* Attributes which should always be created -- the kobject/sysfs core
- * does this automatically via kobj_type->default_attrs. This is the
+ * does this automatically via kobj_type->default_groups. This is the
* minimum data required to uniquely identify a cache.
*/
static struct attribute *cache_index_default_attrs[] = {
@@ -720,6 +720,7 @@ static struct attribute *cache_index_default_attrs[] = {
&cache_shared_cpu_list_attr.attr,
NULL,
};
+ATTRIBUTE_GROUPS(cache_index_default);
/* Attributes which should be created if the cache device node has the
* right properties -- see cacheinfo_create_index_opt_attrs
@@ -738,7 +739,7 @@ static const struct sysfs_ops cache_index_ops = {
static struct kobj_type cache_index_type = {
.release = cache_index_release,
.sysfs_ops = &cache_index_ops,
- .default_attrs = cache_index_default_attrs,
+ .default_groups = cache_index_default_groups,
};
static void cacheinfo_create_index_opt_attrs(struct cache_index_dir *dir)
diff --git a/arch/powerpc/kernel/cpu_setup_power.c b/arch/powerpc/kernel/cpu_setup_power.c
index 3cca88ee96d7..3dc61e203f37 100644
--- a/arch/powerpc/kernel/cpu_setup_power.c
+++ b/arch/powerpc/kernel/cpu_setup_power.c
@@ -109,7 +109,7 @@ static void init_PMU_HV_ISA207(void)
static void init_PMU(void)
{
mtspr(SPRN_MMCRA, 0);
- mtspr(SPRN_MMCR0, 0);
+ mtspr(SPRN_MMCR0, MMCR0_FC);
mtspr(SPRN_MMCR1, 0);
mtspr(SPRN_MMCR2, 0);
}
@@ -123,7 +123,7 @@ static void init_PMU_ISA31(void)
{
mtspr(SPRN_MMCR3, 0);
mtspr(SPRN_MMCRA, MMCRA_BHRB_DISABLE);
- mtspr(SPRN_MMCR0, MMCR0_PMCCEXT);
+ mtspr(SPRN_MMCR0, MMCR0_FC | MMCR0_PMCCEXT);
}
/*
@@ -137,6 +137,7 @@ void __setup_cpu_power7(unsigned long offset, struct cpu_spec *t)
return;
mtspr(SPRN_LPID, 0);
+ mtspr(SPRN_AMOR, ~0);
mtspr(SPRN_PCR, PCR_MASK);
init_LPCR_ISA206(mfspr(SPRN_LPCR), LPCR_LPES1 >> LPCR_LPES_SH);
}
@@ -150,6 +151,7 @@ void __restore_cpu_power7(void)
return;
mtspr(SPRN_LPID, 0);
+ mtspr(SPRN_AMOR, ~0);
mtspr(SPRN_PCR, PCR_MASK);
init_LPCR_ISA206(mfspr(SPRN_LPCR), LPCR_LPES1 >> LPCR_LPES_SH);
}
@@ -164,6 +166,7 @@ void __setup_cpu_power8(unsigned long offset, struct cpu_spec *t)
return;
mtspr(SPRN_LPID, 0);
+ mtspr(SPRN_AMOR, ~0);
mtspr(SPRN_PCR, PCR_MASK);
init_LPCR_ISA206(mfspr(SPRN_LPCR) | LPCR_PECEDH, 0); /* LPES = 0 */
init_HFSCR();
@@ -184,6 +187,7 @@ void __restore_cpu_power8(void)
return;
mtspr(SPRN_LPID, 0);
+ mtspr(SPRN_AMOR, ~0);
mtspr(SPRN_PCR, PCR_MASK);
init_LPCR_ISA206(mfspr(SPRN_LPCR) | LPCR_PECEDH, 0); /* LPES = 0 */
init_HFSCR();
@@ -202,6 +206,7 @@ void __setup_cpu_power9(unsigned long offset, struct cpu_spec *t)
mtspr(SPRN_PSSCR, 0);
mtspr(SPRN_LPID, 0);
mtspr(SPRN_PID, 0);
+ mtspr(SPRN_AMOR, ~0);
mtspr(SPRN_PCR, PCR_MASK);
init_LPCR_ISA300((mfspr(SPRN_LPCR) | LPCR_PECEDH | LPCR_PECE_HVEE |\
LPCR_HVICE | LPCR_HEIC) & ~(LPCR_UPRT | LPCR_HR), 0);
@@ -223,6 +228,7 @@ void __restore_cpu_power9(void)
mtspr(SPRN_PSSCR, 0);
mtspr(SPRN_LPID, 0);
mtspr(SPRN_PID, 0);
+ mtspr(SPRN_AMOR, ~0);
mtspr(SPRN_PCR, PCR_MASK);
init_LPCR_ISA300((mfspr(SPRN_LPCR) | LPCR_PECEDH | LPCR_PECE_HVEE |\
LPCR_HVICE | LPCR_HEIC) & ~(LPCR_UPRT | LPCR_HR), 0);
@@ -242,6 +248,7 @@ void __setup_cpu_power10(unsigned long offset, struct cpu_spec *t)
mtspr(SPRN_PSSCR, 0);
mtspr(SPRN_LPID, 0);
mtspr(SPRN_PID, 0);
+ mtspr(SPRN_AMOR, ~0);
mtspr(SPRN_PCR, PCR_MASK);
init_LPCR_ISA300((mfspr(SPRN_LPCR) | LPCR_PECEDH | LPCR_PECE_HVEE |\
LPCR_HVICE | LPCR_HEIC) & ~(LPCR_UPRT | LPCR_HR), 0);
@@ -264,6 +271,7 @@ void __restore_cpu_power10(void)
mtspr(SPRN_PSSCR, 0);
mtspr(SPRN_LPID, 0);
mtspr(SPRN_PID, 0);
+ mtspr(SPRN_AMOR, ~0);
mtspr(SPRN_PCR, PCR_MASK);
init_LPCR_ISA300((mfspr(SPRN_LPCR) | LPCR_PECEDH | LPCR_PECE_HVEE |\
LPCR_HVICE | LPCR_HEIC) & ~(LPCR_UPRT | LPCR_HR), 0);
diff --git a/arch/powerpc/kernel/dbell.c b/arch/powerpc/kernel/dbell.c
index 5545c9cd17c1..f55c6fb34a3a 100644
--- a/arch/powerpc/kernel/dbell.c
+++ b/arch/powerpc/kernel/dbell.c
@@ -27,7 +27,8 @@ DEFINE_INTERRUPT_HANDLER_ASYNC(doorbell_exception)
ppc_msgsync();
- may_hard_irq_enable();
+ if (should_hard_irq_enable())
+ do_hard_irq_enable();
kvmppc_clear_host_ipi(smp_processor_id());
__this_cpu_inc(irq_stat.doorbell_irqs);
diff --git a/arch/powerpc/kernel/dt_cpu_ftrs.c b/arch/powerpc/kernel/dt_cpu_ftrs.c
index ba527fb52993..7d1b2c4a4891 100644
--- a/arch/powerpc/kernel/dt_cpu_ftrs.c
+++ b/arch/powerpc/kernel/dt_cpu_ftrs.c
@@ -80,6 +80,7 @@ static void __restore_cpu_cpufeatures(void)
mtspr(SPRN_LPCR, system_registers.lpcr);
if (hv_mode) {
mtspr(SPRN_LPID, 0);
+ mtspr(SPRN_AMOR, ~0);
mtspr(SPRN_HFSCR, system_registers.hfscr);
mtspr(SPRN_PCR, system_registers.pcr);
}
@@ -216,6 +217,7 @@ static int __init feat_enable_hv(struct dt_cpu_feature *f)
}
mtspr(SPRN_LPID, 0);
+ mtspr(SPRN_AMOR, ~0);
lpcr = mfspr(SPRN_LPCR);
lpcr &= ~LPCR_LPES0; /* HV external interrupts */
@@ -271,6 +273,9 @@ static int __init feat_enable_mmu_hash(struct dt_cpu_feature *f)
{
u64 lpcr;
+ if (!IS_ENABLED(CONFIG_PPC_64S_HASH_MMU))
+ return 0;
+
lpcr = mfspr(SPRN_LPCR);
lpcr &= ~LPCR_ISL;
@@ -290,6 +295,9 @@ static int __init feat_enable_mmu_hash_v3(struct dt_cpu_feature *f)
{
u64 lpcr;
+ if (!IS_ENABLED(CONFIG_PPC_64S_HASH_MMU))
+ return 0;
+
lpcr = mfspr(SPRN_LPCR);
lpcr &= ~(LPCR_ISL | LPCR_UPRT | LPCR_HR);
mtspr(SPRN_LPCR, lpcr);
@@ -303,15 +311,15 @@ static int __init feat_enable_mmu_hash_v3(struct dt_cpu_feature *f)
static int __init feat_enable_mmu_radix(struct dt_cpu_feature *f)
{
-#ifdef CONFIG_PPC_RADIX_MMU
+ if (!IS_ENABLED(CONFIG_PPC_RADIX_MMU))
+ return 0;
+
+ cur_cpu_spec->mmu_features |= MMU_FTR_KERNEL_RO;
cur_cpu_spec->mmu_features |= MMU_FTR_TYPE_RADIX;
- cur_cpu_spec->mmu_features |= MMU_FTRS_HASH_BASE;
cur_cpu_spec->mmu_features |= MMU_FTR_GTSE;
cur_cpu_spec->cpu_user_features |= PPC_FEATURE_HAS_MMU;
return 1;
-#endif
- return 0;
}
static int __init feat_enable_dscr(struct dt_cpu_feature *f)
@@ -336,7 +344,7 @@ static int __init feat_enable_dscr(struct dt_cpu_feature *f)
return 1;
}
-static void hfscr_pmu_enable(void)
+static void __init hfscr_pmu_enable(void)
{
u64 hfscr = mfspr(SPRN_HFSCR);
hfscr |= PPC_BIT(60);
@@ -351,7 +359,7 @@ static void init_pmu_power8(void)
}
mtspr(SPRN_MMCRA, 0);
- mtspr(SPRN_MMCR0, 0);
+ mtspr(SPRN_MMCR0, MMCR0_FC);
mtspr(SPRN_MMCR1, 0);
mtspr(SPRN_MMCR2, 0);
mtspr(SPRN_MMCRS, 0);
@@ -390,7 +398,7 @@ static void init_pmu_power9(void)
mtspr(SPRN_MMCRC, 0);
mtspr(SPRN_MMCRA, 0);
- mtspr(SPRN_MMCR0, 0);
+ mtspr(SPRN_MMCR0, MMCR0_FC);
mtspr(SPRN_MMCR1, 0);
mtspr(SPRN_MMCR2, 0);
}
@@ -426,7 +434,7 @@ static void init_pmu_power10(void)
mtspr(SPRN_MMCR3, 0);
mtspr(SPRN_MMCRA, MMCRA_BHRB_DISABLE);
- mtspr(SPRN_MMCR0, MMCR0_PMCCEXT);
+ mtspr(SPRN_MMCR0, MMCR0_FC | MMCR0_PMCCEXT);
}
static int __init feat_enable_pmu_power10(struct dt_cpu_feature *f)
diff --git a/arch/powerpc/kernel/eeh_cache.c b/arch/powerpc/kernel/eeh_cache.c
index 9bdaaf7fddc9..2f9dbf8ad2ee 100644
--- a/arch/powerpc/kernel/eeh_cache.c
+++ b/arch/powerpc/kernel/eeh_cache.c
@@ -280,7 +280,7 @@ static int eeh_addr_cache_show(struct seq_file *s, void *v)
}
DEFINE_SHOW_ATTRIBUTE(eeh_addr_cache);
-void eeh_cache_debugfs_init(void)
+void __init eeh_cache_debugfs_init(void)
{
debugfs_create_file_unsafe("eeh_address_cache", 0400,
arch_debugfs_dir, NULL,
diff --git a/arch/powerpc/kernel/eeh_driver.c b/arch/powerpc/kernel/eeh_driver.c
index 350dab18e137..422f80b5b27b 100644
--- a/arch/powerpc/kernel/eeh_driver.c
+++ b/arch/powerpc/kernel/eeh_driver.c
@@ -905,18 +905,19 @@ void eeh_handle_normal_event(struct eeh_pe *pe)
}
#endif /* CONFIG_STACKTRACE */
+ eeh_for_each_pe(pe, tmp_pe)
+ eeh_pe_for_each_dev(tmp_pe, edev, tmp)
+ edev->mode &= ~EEH_DEV_NO_HANDLER;
+
eeh_pe_update_time_stamp(pe);
pe->freeze_count++;
if (pe->freeze_count > eeh_max_freezes) {
pr_err("EEH: PHB#%x-PE#%x has failed %d times in the last hour and has been permanently disabled.\n",
pe->phb->global_number, pe->addr,
pe->freeze_count);
- result = PCI_ERS_RESULT_DISCONNECT;
- }
- eeh_for_each_pe(pe, tmp_pe)
- eeh_pe_for_each_dev(tmp_pe, edev, tmp)
- edev->mode &= ~EEH_DEV_NO_HANDLER;
+ goto recover_failed;
+ }
/* Walk the various device drivers attached to this slot through
* a reset sequence, giving each an opportunity to do what it needs
@@ -928,39 +929,38 @@ void eeh_handle_normal_event(struct eeh_pe *pe)
* the error. Override the result if necessary to have partially
* hotplug for this case.
*/
- if (result != PCI_ERS_RESULT_DISCONNECT) {
- pr_warn("EEH: This PCI device has failed %d times in the last hour and will be permanently disabled after %d failures.\n",
- pe->freeze_count, eeh_max_freezes);
- pr_info("EEH: Notify device drivers to shutdown\n");
- eeh_set_channel_state(pe, pci_channel_io_frozen);
- eeh_set_irq_state(pe, false);
- eeh_pe_report("error_detected(IO frozen)", pe,
- eeh_report_error, &result);
- if ((pe->type & EEH_PE_PHB) &&
- result != PCI_ERS_RESULT_NONE &&
- result != PCI_ERS_RESULT_NEED_RESET)
- result = PCI_ERS_RESULT_NEED_RESET;
- }
+ pr_warn("EEH: This PCI device has failed %d times in the last hour and will be permanently disabled after %d failures.\n",
+ pe->freeze_count, eeh_max_freezes);
+ pr_info("EEH: Notify device drivers to shutdown\n");
+ eeh_set_channel_state(pe, pci_channel_io_frozen);
+ eeh_set_irq_state(pe, false);
+ eeh_pe_report("error_detected(IO frozen)", pe,
+ eeh_report_error, &result);
+ if (result == PCI_ERS_RESULT_DISCONNECT)
+ goto recover_failed;
+
+ /*
+ * Error logged on a PHB are always fences which need a full
+ * PHB reset to clear so force that to happen.
+ */
+ if ((pe->type & EEH_PE_PHB) && result != PCI_ERS_RESULT_NONE)
+ result = PCI_ERS_RESULT_NEED_RESET;
/* Get the current PCI slot state. This can take a long time,
* sometimes over 300 seconds for certain systems.
*/
- if (result != PCI_ERS_RESULT_DISCONNECT) {
- rc = eeh_wait_state(pe, MAX_WAIT_FOR_RECOVERY*1000);
- if (rc < 0 || rc == EEH_STATE_NOT_SUPPORT) {
- pr_warn("EEH: Permanent failure\n");
- result = PCI_ERS_RESULT_DISCONNECT;
- }
+ rc = eeh_wait_state(pe, MAX_WAIT_FOR_RECOVERY * 1000);
+ if (rc < 0 || rc == EEH_STATE_NOT_SUPPORT) {
+ pr_warn("EEH: Permanent failure\n");
+ goto recover_failed;
}
/* Since rtas may enable MMIO when posting the error log,
* don't post the error log until after all dev drivers
* have been informed.
*/
- if (result != PCI_ERS_RESULT_DISCONNECT) {
- pr_info("EEH: Collect temporary log\n");
- eeh_slot_error_detail(pe, EEH_LOG_TEMP);
- }
+ pr_info("EEH: Collect temporary log\n");
+ eeh_slot_error_detail(pe, EEH_LOG_TEMP);
/* If all device drivers were EEH-unaware, then shut
* down all of the device drivers, and hope they
@@ -970,9 +970,8 @@ void eeh_handle_normal_event(struct eeh_pe *pe)
pr_info("EEH: Reset with hotplug activity\n");
rc = eeh_reset_device(pe, bus, NULL, false);
if (rc) {
- pr_warn("%s: Unable to reset, err=%d\n",
- __func__, rc);
- result = PCI_ERS_RESULT_DISCONNECT;
+ pr_warn("%s: Unable to reset, err=%d\n", __func__, rc);
+ goto recover_failed;
}
}
@@ -980,10 +979,10 @@ void eeh_handle_normal_event(struct eeh_pe *pe)
if (result == PCI_ERS_RESULT_CAN_RECOVER) {
pr_info("EEH: Enable I/O for affected devices\n");
rc = eeh_pci_enable(pe, EEH_OPT_THAW_MMIO);
+ if (rc < 0)
+ goto recover_failed;
- if (rc < 0) {
- result = PCI_ERS_RESULT_DISCONNECT;
- } else if (rc) {
+ if (rc) {
result = PCI_ERS_RESULT_NEED_RESET;
} else {
pr_info("EEH: Notify device drivers to resume I/O\n");
@@ -991,15 +990,13 @@ void eeh_handle_normal_event(struct eeh_pe *pe)
eeh_report_mmio_enabled, &result);
}
}
-
- /* If all devices reported they can proceed, then re-enable DMA */
if (result == PCI_ERS_RESULT_CAN_RECOVER) {
pr_info("EEH: Enabled DMA for affected devices\n");
rc = eeh_pci_enable(pe, EEH_OPT_THAW_DMA);
+ if (rc < 0)
+ goto recover_failed;
- if (rc < 0) {
- result = PCI_ERS_RESULT_DISCONNECT;
- } else if (rc) {
+ if (rc) {
result = PCI_ERS_RESULT_NEED_RESET;
} else {
/*
@@ -1017,16 +1014,15 @@ void eeh_handle_normal_event(struct eeh_pe *pe)
pr_info("EEH: Reset without hotplug activity\n");
rc = eeh_reset_device(pe, bus, &rmv_data, true);
if (rc) {
- pr_warn("%s: Cannot reset, err=%d\n",
- __func__, rc);
- result = PCI_ERS_RESULT_DISCONNECT;
- } else {
- result = PCI_ERS_RESULT_NONE;
- eeh_set_channel_state(pe, pci_channel_io_normal);
- eeh_set_irq_state(pe, true);
- eeh_pe_report("slot_reset", pe, eeh_report_reset,
- &result);
+ pr_warn("%s: Cannot reset, err=%d\n", __func__, rc);
+ goto recover_failed;
}
+
+ result = PCI_ERS_RESULT_NONE;
+ eeh_set_channel_state(pe, pci_channel_io_normal);
+ eeh_set_irq_state(pe, true);
+ eeh_pe_report("slot_reset", pe, eeh_report_reset,
+ &result);
}
if ((result == PCI_ERS_RESULT_RECOVERED) ||
@@ -1054,45 +1050,47 @@ void eeh_handle_normal_event(struct eeh_pe *pe)
}
pr_info("EEH: Recovery successful.\n");
- } else {
- /*
- * About 90% of all real-life EEH failures in the field
- * are due to poorly seated PCI cards. Only 10% or so are
- * due to actual, failed cards.
- */
- pr_err("EEH: Unable to recover from failure from PHB#%x-PE#%x.\n"
- "Please try reseating or replacing it\n",
- pe->phb->global_number, pe->addr);
+ goto out;
+ }
- eeh_slot_error_detail(pe, EEH_LOG_PERM);
+recover_failed:
+ /*
+ * About 90% of all real-life EEH failures in the field
+ * are due to poorly seated PCI cards. Only 10% or so are
+ * due to actual, failed cards.
+ */
+ pr_err("EEH: Unable to recover from failure from PHB#%x-PE#%x.\n"
+ "Please try reseating or replacing it\n",
+ pe->phb->global_number, pe->addr);
- /* Notify all devices that they're about to go down. */
- eeh_set_channel_state(pe, pci_channel_io_perm_failure);
- eeh_set_irq_state(pe, false);
- eeh_pe_report("error_detected(permanent failure)", pe,
- eeh_report_failure, NULL);
+ eeh_slot_error_detail(pe, EEH_LOG_PERM);
- /* Mark the PE to be removed permanently */
- eeh_pe_state_mark(pe, EEH_PE_REMOVED);
+ /* Notify all devices that they're about to go down. */
+ eeh_set_channel_state(pe, pci_channel_io_perm_failure);
+ eeh_set_irq_state(pe, false);
+ eeh_pe_report("error_detected(permanent failure)", pe,
+ eeh_report_failure, NULL);
- /*
- * Shut down the device drivers for good. We mark
- * all removed devices correctly to avoid access
- * the their PCI config any more.
- */
- if (pe->type & EEH_PE_VF) {
- eeh_pe_dev_traverse(pe, eeh_rmv_device, NULL);
- eeh_pe_dev_mode_mark(pe, EEH_DEV_REMOVED);
- } else {
- eeh_pe_state_clear(pe, EEH_PE_PRI_BUS, true);
- eeh_pe_dev_mode_mark(pe, EEH_DEV_REMOVED);
+ /* Mark the PE to be removed permanently */
+ eeh_pe_state_mark(pe, EEH_PE_REMOVED);
- pci_lock_rescan_remove();
- pci_hp_remove_devices(bus);
- pci_unlock_rescan_remove();
- /* The passed PE should no longer be used */
- return;
- }
+ /*
+ * Shut down the device drivers for good. We mark
+ * all removed devices correctly to avoid access
+ * the their PCI config any more.
+ */
+ if (pe->type & EEH_PE_VF) {
+ eeh_pe_dev_traverse(pe, eeh_rmv_device, NULL);
+ eeh_pe_dev_mode_mark(pe, EEH_DEV_REMOVED);
+ } else {
+ eeh_pe_state_clear(pe, EEH_PE_PRI_BUS, true);
+ eeh_pe_dev_mode_mark(pe, EEH_DEV_REMOVED);
+
+ pci_lock_rescan_remove();
+ pci_hp_remove_devices(bus);
+ pci_unlock_rescan_remove();
+ /* The passed PE should no longer be used */
+ return;
}
out:
diff --git a/arch/powerpc/kernel/entry_32.S b/arch/powerpc/kernel/entry_32.S
index 61fdd53cdd9a..7748c278d13c 100644
--- a/arch/powerpc/kernel/entry_32.S
+++ b/arch/powerpc/kernel/entry_32.S
@@ -73,13 +73,39 @@ prepare_transfer_to_handler:
_ASM_NOKPROBE_SYMBOL(prepare_transfer_to_handler)
#endif /* CONFIG_PPC_BOOK3S_32 || CONFIG_E500 */
+#if defined(CONFIG_PPC_KUEP) && defined(CONFIG_PPC_BOOK3S_32)
+ .globl __kuep_lock
+__kuep_lock:
+ lwz r9, THREAD+THSR0(r2)
+ update_user_segments_by_4 r9, r10, r11, r12
+ blr
+
+__kuep_unlock:
+ lwz r9, THREAD+THSR0(r2)
+ rlwinm r9,r9,0,~SR_NX
+ update_user_segments_by_4 r9, r10, r11, r12
+ blr
+
+.macro kuep_lock
+ bl __kuep_lock
+.endm
+.macro kuep_unlock
+ bl __kuep_unlock
+.endm
+#else
+.macro kuep_lock
+.endm
+.macro kuep_unlock
+.endm
+#endif
+
.globl transfer_to_syscall
transfer_to_syscall:
stw r11, GPR1(r1)
stw r11, 0(r1)
mflr r12
stw r12, _LINK(r1)
-#if defined(CONFIG_BOOKE) || defined(CONFIG_40x)
+#ifdef CONFIG_BOOKE_OR_40x
rlwinm r9,r9,0,14,12 /* clear MSR_WE (necessary?) */
#endif
lis r12,STACK_FRAME_REGS_MARKER@ha /* exception frame marker */
@@ -90,10 +116,10 @@ transfer_to_syscall:
stw r12,8(r1)
stw r2,_TRAP(r1)
SAVE_GPR(0, r1)
- SAVE_4GPRS(3, r1)
- SAVE_2GPRS(7, r1)
+ SAVE_GPRS(3, 8, r1)
addi r2,r10,-THREAD
SAVE_NVGPRS(r1)
+ kuep_lock
/* Calling convention has r9 = orig r0, r10 = regs */
addi r10,r1,STACK_FRAME_OVERHEAD
@@ -110,6 +136,7 @@ ret_from_syscall:
cmplwi cr0,r5,0
bne- 2f
#endif /* CONFIG_PPC_47x */
+ kuep_unlock
lwz r4,_LINK(r1)
lwz r5,_CCR(r1)
mtlr r4
@@ -139,7 +166,7 @@ syscall_exit_finish:
mtxer r5
lwz r0,GPR0(r1)
lwz r3,GPR3(r1)
- REST_8GPRS(4,r1)
+ REST_GPRS(4, 11, r1)
lwz r12,GPR12(r1)
b 1b
@@ -232,9 +259,9 @@ fast_exception_return:
beq 3f /* if not, we've got problems */
#endif
-2: REST_4GPRS(3, r11)
+2: REST_GPRS(3, 6, r11)
lwz r10,_CCR(r11)
- REST_2GPRS(1, r11)
+ REST_GPRS(1, 2, r11)
mtcr r10
lwz r10,_LINK(r11)
mtlr r10
@@ -273,6 +300,7 @@ interrupt_return:
beq .Lkernel_interrupt_return
bl interrupt_exit_user_prepare
cmpwi r3,0
+ kuep_unlock
bne- .Lrestore_nvgprs
.Lfast_user_interrupt_return:
@@ -298,16 +326,14 @@ ALT_FTR_SECTION_END_IFCLR(CPU_FTR_STCX_CHECKS_ADDRESS)
* the reliable stack unwinder later on. Clear it.
*/
stw r0,8(r1)
- REST_4GPRS(7, r1)
- REST_2GPRS(11, r1)
+ REST_GPRS(7, 12, r1)
mtcr r3
mtlr r4
mtctr r5
mtspr SPRN_XER,r6
- REST_4GPRS(2, r1)
- REST_GPR(6, r1)
+ REST_GPRS(2, 6, r1)
REST_GPR(0, r1)
REST_GPR(1, r1)
rfi
@@ -341,8 +367,7 @@ ALT_FTR_SECTION_END_IFCLR(CPU_FTR_STCX_CHECKS_ADDRESS)
lwz r6,_CCR(r1)
li r0,0
- REST_4GPRS(7, r1)
- REST_2GPRS(11, r1)
+ REST_GPRS(7, 12, r1)
mtlr r3
mtctr r4
@@ -354,7 +379,7 @@ ALT_FTR_SECTION_END_IFCLR(CPU_FTR_STCX_CHECKS_ADDRESS)
*/
stw r0,8(r1)
- REST_4GPRS(2, r1)
+ REST_GPRS(2, 5, r1)
bne- cr1,1f /* emulate stack store */
mtcr r6
@@ -430,8 +455,7 @@ _ASM_NOKPROBE_SYMBOL(interrupt_return)
bne interrupt_return; \
lwz r0,GPR0(r1); \
lwz r2,GPR2(r1); \
- REST_4GPRS(3, r1); \
- REST_2GPRS(7, r1); \
+ REST_GPRS(3, 8, r1); \
lwz r10,_XER(r1); \
lwz r11,_CTR(r1); \
mtspr SPRN_XER,r10; \
diff --git a/arch/powerpc/kernel/entry_64.S b/arch/powerpc/kernel/entry_64.S
index 70cff7b49e17..9581906b5ee9 100644
--- a/arch/powerpc/kernel/entry_64.S
+++ b/arch/powerpc/kernel/entry_64.S
@@ -180,7 +180,7 @@ _GLOBAL(_switch)
#endif
ld r8,KSP(r4) /* new stack pointer */
-#ifdef CONFIG_PPC_BOOK3S_64
+#ifdef CONFIG_PPC_64S_HASH_MMU
BEGIN_MMU_FTR_SECTION
b 2f
END_MMU_FTR_SECTION_IFSET(MMU_FTR_TYPE_RADIX)
@@ -232,7 +232,7 @@ END_FTR_SECTION_IFCLR(CPU_FTR_ARCH_207S)
slbmte r7,r0
isync
2:
-#endif /* CONFIG_PPC_BOOK3S_64 */
+#endif /* CONFIG_PPC_64S_HASH_MMU */
clrrdi r7, r8, THREAD_SHIFT /* base of new stack */
/* Note: this uses SWITCH_FRAME_SIZE rather than INT_FRAME_SIZE
diff --git a/arch/powerpc/kernel/epapr_paravirt.c b/arch/powerpc/kernel/epapr_paravirt.c
index 93b0f3ec8fb0..d4b8aff20815 100644
--- a/arch/powerpc/kernel/epapr_paravirt.c
+++ b/arch/powerpc/kernel/epapr_paravirt.c
@@ -37,7 +37,7 @@ static int __init early_init_dt_scan_epapr(unsigned long node,
return -1;
for (i = 0; i < (len / 4); i++) {
- struct ppc_inst inst = ppc_inst(be32_to_cpu(insts[i]));
+ ppc_inst_t inst = ppc_inst(be32_to_cpu(insts[i]));
patch_instruction(epapr_hypercall_start + i, inst);
#if !defined(CONFIG_64BIT) || defined(CONFIG_PPC_BOOK3E_64)
patch_instruction(epapr_ev_idle_start + i, inst);
diff --git a/arch/powerpc/kernel/exceptions-64e.S b/arch/powerpc/kernel/exceptions-64e.S
index 711c66b76df1..67dc4e3179a0 100644
--- a/arch/powerpc/kernel/exceptions-64e.S
+++ b/arch/powerpc/kernel/exceptions-64e.S
@@ -198,8 +198,7 @@ END_FTR_SECTION_IFSET(CPU_FTR_EMB_HV)
stdcx. r0,0,r1 /* to clear the reservation */
- REST_4GPRS(2, r1)
- REST_4GPRS(6, r1)
+ REST_GPRS(2, 9, r1)
ld r10,_CTR(r1)
ld r11,_XER(r1)
@@ -375,9 +374,7 @@ ret_from_mc_except:
exc_##n##_common: \
std r0,GPR0(r1); /* save r0 in stackframe */ \
std r2,GPR2(r1); /* save r2 in stackframe */ \
- SAVE_4GPRS(3, r1); /* save r3 - r6 in stackframe */ \
- SAVE_2GPRS(7, r1); /* save r7, r8 in stackframe */ \
- std r9,GPR9(r1); /* save r9 in stackframe */ \
+ SAVE_GPRS(3, 9, r1); /* save r3 - r9 in stackframe */ \
std r10,_NIP(r1); /* save SRR0 to stackframe */ \
std r11,_MSR(r1); /* save SRR1 to stackframe */ \
beq 2f; /* if from kernel mode */ \
@@ -1061,9 +1058,7 @@ bad_stack_book3e:
std r11,_ESR(r1)
std r0,GPR0(r1); /* save r0 in stackframe */ \
std r2,GPR2(r1); /* save r2 in stackframe */ \
- SAVE_4GPRS(3, r1); /* save r3 - r6 in stackframe */ \
- SAVE_2GPRS(7, r1); /* save r7, r8 in stackframe */ \
- std r9,GPR9(r1); /* save r9 in stackframe */ \
+ SAVE_GPRS(3, 9, r1); /* save r3 - r9 in stackframe */ \
ld r3,PACA_EXGEN+EX_R10(r13);/* get back r10 */ \
ld r4,PACA_EXGEN+EX_R11(r13);/* get back r11 */ \
mfspr r5,SPRN_SPRG_GEN_SCRATCH;/* get back r13 XXX can be wrong */ \
@@ -1077,8 +1072,7 @@ bad_stack_book3e:
std r10,_LINK(r1)
std r11,_CTR(r1)
std r12,_XER(r1)
- SAVE_10GPRS(14,r1)
- SAVE_8GPRS(24,r1)
+ SAVE_GPRS(14, 31, r1)
lhz r12,PACA_TRAP_SAVE(r13)
std r12,_TRAP(r1)
addi r11,r1,INT_FRAME_SIZE
diff --git a/arch/powerpc/kernel/exceptions-64s.S b/arch/powerpc/kernel/exceptions-64s.S
index eaf1f72131a1..55caeee37c08 100644
--- a/arch/powerpc/kernel/exceptions-64s.S
+++ b/arch/powerpc/kernel/exceptions-64s.S
@@ -48,7 +48,7 @@
.balign IFETCH_ALIGN_BYTES; \
.global name; \
_ASM_NOKPROBE_SYMBOL(name); \
- DEFINE_FIXED_SYMBOL(name); \
+ DEFINE_FIXED_SYMBOL(name, text); \
name:
#define TRAMP_REAL_BEGIN(name) \
@@ -76,31 +76,18 @@ name:
ld reg,PACAKBASE(r13); /* get high part of &label */ \
ori reg,reg,FIXED_SYMBOL_ABS_ADDR(label)
-#define __LOAD_HANDLER(reg, label) \
+#define __LOAD_HANDLER(reg, label, section) \
ld reg,PACAKBASE(r13); \
- ori reg,reg,(ABS_ADDR(label))@l
+ ori reg,reg,(ABS_ADDR(label, section))@l
/*
* Branches from unrelocated code (e.g., interrupts) to labels outside
* head-y require >64K offsets.
*/
-#define __LOAD_FAR_HANDLER(reg, label) \
+#define __LOAD_FAR_HANDLER(reg, label, section) \
ld reg,PACAKBASE(r13); \
- ori reg,reg,(ABS_ADDR(label))@l; \
- addis reg,reg,(ABS_ADDR(label))@h
-
-/*
- * Branch to label using its 0xC000 address. This results in instruction
- * address suitable for MSR[IR]=0 or 1, which allows relocation to be turned
- * on using mtmsr rather than rfid.
- *
- * This could set the 0xc bits for !RELOCATABLE as an immediate, rather than
- * load KBASE for a slight optimisation.
- */
-#define BRANCH_TO_C000(reg, label) \
- __LOAD_FAR_HANDLER(reg, label); \
- mtctr reg; \
- bctr
+ ori reg,reg,(ABS_ADDR(label, section))@l; \
+ addis reg,reg,(ABS_ADDR(label, section))@h
/*
* Interrupt code generation macros
@@ -111,9 +98,10 @@ name:
#define IAREA .L_IAREA_\name\() /* PACA save area */
#define IVIRT .L_IVIRT_\name\() /* Has virt mode entry point */
#define IISIDE .L_IISIDE_\name\() /* Uses SRR0/1 not DAR/DSISR */
+#define ICFAR .L_ICFAR_\name\() /* Uses CFAR */
+#define ICFAR_IF_HVMODE .L_ICFAR_IF_HVMODE_\name\() /* Uses CFAR if HV */
#define IDAR .L_IDAR_\name\() /* Uses DAR (or SRR0) */
#define IDSISR .L_IDSISR_\name\() /* Uses DSISR (or SRR1) */
-#define ISET_RI .L_ISET_RI_\name\() /* Run common code w/ MSR[RI]=1 */
#define IBRANCH_TO_COMMON .L_IBRANCH_TO_COMMON_\name\() /* ENTRY branch to common */
#define IREALMODE_COMMON .L_IREALMODE_COMMON_\name\() /* Common runs in realmode */
#define IMASK .L_IMASK_\name\() /* IRQ soft-mask bit */
@@ -151,15 +139,18 @@ do_define_int n
.ifndef IISIDE
IISIDE=0
.endif
+ .ifndef ICFAR
+ ICFAR=1
+ .endif
+ .ifndef ICFAR_IF_HVMODE
+ ICFAR_IF_HVMODE=0
+ .endif
.ifndef IDAR
IDAR=0
.endif
.ifndef IDSISR
IDSISR=0
.endif
- .ifndef ISET_RI
- ISET_RI=1
- .endif
.ifndef IBRANCH_TO_COMMON
IBRANCH_TO_COMMON=1
.endif
@@ -291,9 +282,21 @@ BEGIN_FTR_SECTION
END_FTR_SECTION_IFSET(CPU_FTR_HAS_PPR)
HMT_MEDIUM
std r10,IAREA+EX_R10(r13) /* save r10 - r12 */
+ .if ICFAR
BEGIN_FTR_SECTION
mfspr r10,SPRN_CFAR
END_FTR_SECTION_IFSET(CPU_FTR_CFAR)
+ .elseif ICFAR_IF_HVMODE
+BEGIN_FTR_SECTION
+ BEGIN_FTR_SECTION_NESTED(69)
+ mfspr r10,SPRN_CFAR
+ END_FTR_SECTION_NESTED(CPU_FTR_CFAR, CPU_FTR_CFAR, 69)
+FTR_SECTION_ELSE
+ BEGIN_FTR_SECTION_NESTED(69)
+ li r10,0
+ END_FTR_SECTION_NESTED(CPU_FTR_CFAR, CPU_FTR_CFAR, 69)
+ALT_FTR_SECTION_END_IFSET(CPU_FTR_HVMODE | CPU_FTR_ARCH_206)
+ .endif
.if \ool
.if !\virt
b tramp_real_\name
@@ -309,9 +312,11 @@ END_FTR_SECTION_IFSET(CPU_FTR_CFAR)
BEGIN_FTR_SECTION
std r9,IAREA+EX_PPR(r13)
END_FTR_SECTION_IFSET(CPU_FTR_HAS_PPR)
+ .if ICFAR || ICFAR_IF_HVMODE
BEGIN_FTR_SECTION
std r10,IAREA+EX_CFAR(r13)
END_FTR_SECTION_IFSET(CPU_FTR_CFAR)
+ .endif
INTERRUPT_TO_KERNEL
mfctr r10
std r10,IAREA+EX_CTR(r13)
@@ -376,7 +381,7 @@ END_FTR_SECTION_IFSET(CPU_FTR_CFAR)
* This switches to virtual mode and sets MSR[RI].
*/
.macro __GEN_COMMON_ENTRY name
-DEFINE_FIXED_SYMBOL(\name\()_common_real)
+DEFINE_FIXED_SYMBOL(\name\()_common_real, text)
\name\()_common_real:
.if IKVM_REAL
KVMTEST \name kvm_interrupt
@@ -399,7 +404,7 @@ DEFINE_FIXED_SYMBOL(\name\()_common_real)
.endif
.balign IFETCH_ALIGN_BYTES
-DEFINE_FIXED_SYMBOL(\name\()_common_virt)
+DEFINE_FIXED_SYMBOL(\name\()_common_virt, text)
\name\()_common_virt:
.if IKVM_VIRT
KVMTEST \name kvm_interrupt
@@ -413,7 +418,7 @@ DEFINE_FIXED_SYMBOL(\name\()_common_virt)
* want to run in real mode.
*/
.macro __GEN_REALMODE_COMMON_ENTRY name
-DEFINE_FIXED_SYMBOL(\name\()_common_real)
+DEFINE_FIXED_SYMBOL(\name\()_common_real, text)
\name\()_common_real:
.if IKVM_REAL
KVMTEST \name kvm_interrupt
@@ -512,11 +517,6 @@ DEFINE_FIXED_SYMBOL(\name\()_common_real)
stb r10,PACASRR_VALID(r13)
.endif
- .if ISET_RI
- li r10,MSR_RI
- mtmsrd r10,1 /* Set MSR_RI */
- .endif
-
.if ISTACK
.if IKUAP
kuap_save_amr_and_lock r9, r10, cr1, cr0
@@ -568,14 +568,17 @@ END_FTR_SECTION_IFSET(CPU_FTR_HAS_PPR)
.endif
BEGIN_FTR_SECTION
+ .if ICFAR || ICFAR_IF_HVMODE
ld r10,IAREA+EX_CFAR(r13)
+ .else
+ li r10,0
+ .endif
std r10,ORIG_GPR3(r1)
END_FTR_SECTION_IFSET(CPU_FTR_CFAR)
ld r10,IAREA+EX_CTR(r13)
std r10,_CTR(r1)
std r2,GPR2(r1) /* save r2 in stackframe */
- SAVE_4GPRS(3, r1) /* save r3 - r6 in stackframe */
- SAVE_2GPRS(7, r1) /* save r7, r8 in stackframe */
+ SAVE_GPRS(3, 8, r1) /* save r3 - r8 in stackframe */
mflr r9 /* Get LR, later save to stack */
ld r2,PACATOC(r13) /* get kernel TOC into r2 */
std r9,_LINK(r1)
@@ -693,8 +696,7 @@ END_FTR_SECTION_IFSET(CPU_FTR_CFAR)
mtlr r9
ld r9,_CCR(r1)
mtcr r9
- REST_8GPRS(2, r1)
- REST_4GPRS(10, r1)
+ REST_GPRS(2, 13, r1)
REST_GPR(0, r1)
/* restore original r1. */
ld r1,GPR1(r1)
@@ -850,12 +852,12 @@ SOFT_MASK_TABLE(0xc000000000003000, 0xc000000000004000)
#ifdef CONFIG_RELOCATABLE
TRAMP_VIRT_BEGIN(system_call_vectored_tramp)
- __LOAD_HANDLER(r10, system_call_vectored_common)
+ __LOAD_HANDLER(r10, system_call_vectored_common, virt_trampolines)
mtctr r10
bctr
TRAMP_VIRT_BEGIN(system_call_vectored_sigill_tramp)
- __LOAD_HANDLER(r10, system_call_vectored_sigill)
+ __LOAD_HANDLER(r10, system_call_vectored_sigill, virt_trampolines)
mtctr r10
bctr
#endif
@@ -902,11 +904,6 @@ INT_DEFINE_BEGIN(system_reset)
IVEC=0x100
IAREA=PACA_EXNMI
IVIRT=0 /* no virt entry point */
- /*
- * MSR_RI is not enabled, because PACA_EXNMI and nmi stack is
- * being used, so a nested NMI exception would corrupt it.
- */
- ISET_RI=0
ISTACK=0
IKVM_REAL=1
INT_DEFINE_END(system_reset)
@@ -964,7 +961,9 @@ TRAMP_REAL_BEGIN(system_reset_idle_wake)
/* We are waking up from idle, so may clobber any volatile register */
cmpwi cr1,r5,2
bltlr cr1 /* no state loss, return to idle caller with r3=SRR1 */
- BRANCH_TO_C000(r12, DOTSYM(idle_return_gpr_loss))
+ __LOAD_FAR_HANDLER(r12, DOTSYM(idle_return_gpr_loss), real_trampolines)
+ mtctr r12
+ bctr
#endif
#ifdef CONFIG_PPC_PSERIES
@@ -979,16 +978,14 @@ TRAMP_REAL_BEGIN(system_reset_fwnmi)
EXC_COMMON_BEGIN(system_reset_common)
__GEN_COMMON_ENTRY system_reset
/*
- * Increment paca->in_nmi then enable MSR_RI. SLB or MCE will be able
- * to recover, but nested NMI will notice in_nmi and not recover
- * because of the use of the NMI stack. in_nmi reentrancy is tested in
- * system_reset_exception.
+ * Increment paca->in_nmi. When the interrupt entry wrapper later
+ * enable MSR_RI, then SLB or MCE will be able to recover, but a nested
+ * NMI will notice in_nmi and not recover because of the use of the NMI
+ * stack. in_nmi reentrancy is tested in system_reset_exception.
*/
lhz r10,PACA_IN_NMI(r13)
addi r10,r10,1
sth r10,PACA_IN_NMI(r13)
- li r10,MSR_RI
- mtmsrd r10,1
mr r10,r1
ld r1,PACA_NMI_EMERG_SP(r13)
@@ -1062,12 +1059,6 @@ INT_DEFINE_BEGIN(machine_check_early)
IAREA=PACA_EXMC
IVIRT=0 /* no virt entry point */
IREALMODE_COMMON=1
- /*
- * MSR_RI is not enabled, because PACA_EXMC is being used, so a
- * nested machine check corrupts it. machine_check_common enables
- * MSR_RI.
- */
- ISET_RI=0
ISTACK=0
IDAR=1
IDSISR=1
@@ -1078,7 +1069,6 @@ INT_DEFINE_BEGIN(machine_check)
IVEC=0x200
IAREA=PACA_EXMC
IVIRT=0 /* no virt entry point */
- ISET_RI=0
IDAR=1
IDSISR=1
IKVM_REAL=1
@@ -1148,9 +1138,6 @@ EXC_COMMON_BEGIN(machine_check_early_common)
BEGIN_FTR_SECTION
bl enable_machine_check
END_FTR_SECTION_IFSET(CPU_FTR_HVMODE)
- li r10,MSR_RI
- mtmsrd r10,1
-
addi r3,r1,STACK_FRAME_OVERHEAD
bl machine_check_early
std r3,RESULT(r1) /* Save result */
@@ -1238,10 +1225,6 @@ EXC_COMMON_BEGIN(machine_check_common)
* save area: PACA_EXMC instead of PACA_EXGEN.
*/
GEN_COMMON machine_check
-
- /* Enable MSR_RI when finished with PACA_EXMC */
- li r10,MSR_RI
- mtmsrd r10,1
addi r3,r1,STACK_FRAME_OVERHEAD
bl machine_check_exception_async
b interrupt_return_srr
@@ -1369,11 +1352,15 @@ EXC_COMMON_BEGIN(data_access_common)
addi r3,r1,STACK_FRAME_OVERHEAD
andis. r0,r4,DSISR_DABRMATCH@h
bne- 1f
+#ifdef CONFIG_PPC_64S_HASH_MMU
BEGIN_MMU_FTR_SECTION
bl do_hash_fault
MMU_FTR_SECTION_ELSE
bl do_page_fault
ALT_MMU_FTR_SECTION_END_IFCLR(MMU_FTR_TYPE_RADIX)
+#else
+ bl do_page_fault
+#endif
b interrupt_return_srr
1: bl do_break
@@ -1416,6 +1403,7 @@ EXC_VIRT_BEGIN(data_access_slb, 0x4380, 0x80)
EXC_VIRT_END(data_access_slb, 0x4380, 0x80)
EXC_COMMON_BEGIN(data_access_slb_common)
GEN_COMMON data_access_slb
+#ifdef CONFIG_PPC_64S_HASH_MMU
BEGIN_MMU_FTR_SECTION
/* HPT case, do SLB fault */
addi r3,r1,STACK_FRAME_OVERHEAD
@@ -1428,9 +1416,12 @@ MMU_FTR_SECTION_ELSE
/* Radix case, access is outside page table range */
li r3,-EFAULT
ALT_MMU_FTR_SECTION_END_IFCLR(MMU_FTR_TYPE_RADIX)
+#else
+ li r3,-EFAULT
+#endif
std r3,RESULT(r1)
addi r3,r1,STACK_FRAME_OVERHEAD
- bl do_bad_slb_fault
+ bl do_bad_segment_interrupt
b interrupt_return_srr
@@ -1462,11 +1453,15 @@ EXC_VIRT_END(instruction_access, 0x4400, 0x80)
EXC_COMMON_BEGIN(instruction_access_common)
GEN_COMMON instruction_access
addi r3,r1,STACK_FRAME_OVERHEAD
+#ifdef CONFIG_PPC_64S_HASH_MMU
BEGIN_MMU_FTR_SECTION
bl do_hash_fault
MMU_FTR_SECTION_ELSE
bl do_page_fault
ALT_MMU_FTR_SECTION_END_IFCLR(MMU_FTR_TYPE_RADIX)
+#else
+ bl do_page_fault
+#endif
b interrupt_return_srr
@@ -1496,6 +1491,7 @@ EXC_VIRT_BEGIN(instruction_access_slb, 0x4480, 0x80)
EXC_VIRT_END(instruction_access_slb, 0x4480, 0x80)
EXC_COMMON_BEGIN(instruction_access_slb_common)
GEN_COMMON instruction_access_slb
+#ifdef CONFIG_PPC_64S_HASH_MMU
BEGIN_MMU_FTR_SECTION
/* HPT case, do SLB fault */
addi r3,r1,STACK_FRAME_OVERHEAD
@@ -1508,9 +1504,12 @@ MMU_FTR_SECTION_ELSE
/* Radix case, access is outside page table range */
li r3,-EFAULT
ALT_MMU_FTR_SECTION_END_IFCLR(MMU_FTR_TYPE_RADIX)
+#else
+ li r3,-EFAULT
+#endif
std r3,RESULT(r1)
addi r3,r1,STACK_FRAME_OVERHEAD
- bl do_bad_slb_fault
+ bl do_bad_segment_interrupt
b interrupt_return_srr
@@ -1536,6 +1535,12 @@ ALT_MMU_FTR_SECTION_END_IFCLR(MMU_FTR_TYPE_RADIX)
*
* If soft masked, the masked handler will note the pending interrupt for
* replay, and clear MSR[EE] in the interrupted context.
+ *
+ * CFAR is not required because this is an asynchronous interrupt that in
+ * general won't have much bearing on the state of the CPU, with the possible
+ * exception of crash/debug IPIs, but those are generally moving to use SRESET
+ * IPIs. Unless this is an HV interrupt and KVM HV is possible, in which case
+ * it may be exiting the guest and need CFAR to be saved.
*/
INT_DEFINE_BEGIN(hardware_interrupt)
IVEC=0x500
@@ -1543,6 +1548,10 @@ INT_DEFINE_BEGIN(hardware_interrupt)
IMASK=IRQS_DISABLED
IKVM_REAL=1
IKVM_VIRT=1
+ ICFAR=0
+#ifdef CONFIG_KVM_BOOK3S_HV_POSSIBLE
+ ICFAR_IF_HVMODE=1
+#endif
INT_DEFINE_END(hardware_interrupt)
EXC_REAL_BEGIN(hardware_interrupt, 0x500, 0x100)
@@ -1764,6 +1773,10 @@ END_FTR_SECTION_IFSET(CPU_FTR_TM)
* If PPC_WATCHDOG is configured, the soft masked handler will actually set
* things back up to run soft_nmi_interrupt as a regular interrupt handler
* on the emergency stack.
+ *
+ * CFAR is not required because this is asynchronous (see hardware_interrupt).
+ * A watchdog interrupt may like to have CFAR, but usually the interesting
+ * branch is long gone by that point (e.g., infinite loop).
*/
INT_DEFINE_BEGIN(decrementer)
IVEC=0x900
@@ -1771,6 +1784,7 @@ INT_DEFINE_BEGIN(decrementer)
#ifdef CONFIG_KVM_BOOK3S_PR_POSSIBLE
IKVM_REAL=1
#endif
+ ICFAR=0
INT_DEFINE_END(decrementer)
EXC_REAL_BEGIN(decrementer, 0x900, 0x80)
@@ -1846,6 +1860,8 @@ EXC_COMMON_BEGIN(hdecrementer_common)
* If soft masked, the masked handler will note the pending interrupt for
* replay, leaving MSR[EE] enabled in the interrupted context because the
* doorbells are edge triggered.
+ *
+ * CFAR is not required, similarly to hardware_interrupt.
*/
INT_DEFINE_BEGIN(doorbell_super)
IVEC=0xa00
@@ -1853,6 +1869,7 @@ INT_DEFINE_BEGIN(doorbell_super)
#ifdef CONFIG_KVM_BOOK3S_PR_POSSIBLE
IKVM_REAL=1
#endif
+ ICFAR=0
INT_DEFINE_END(doorbell_super)
EXC_REAL_BEGIN(doorbell_super, 0xa00, 0x100)
@@ -1904,6 +1921,7 @@ INT_DEFINE_BEGIN(system_call)
IVEC=0xc00
IKVM_REAL=1
IKVM_VIRT=1
+ ICFAR=0
INT_DEFINE_END(system_call)
.macro SYSTEM_CALL virt
@@ -1942,12 +1960,12 @@ END_FTR_SECTION_IFSET(CPU_FTR_REAL_LE)
HMT_MEDIUM
.if ! \virt
- __LOAD_HANDLER(r10, system_call_common_real)
+ __LOAD_HANDLER(r10, system_call_common_real, real_vectors)
mtctr r10
bctr
.else
#ifdef CONFIG_RELOCATABLE
- __LOAD_HANDLER(r10, system_call_common)
+ __LOAD_HANDLER(r10, system_call_common, virt_vectors)
mtctr r10
bctr
#else
@@ -2001,7 +2019,7 @@ END_FTR_SECTION_IFSET(CPU_FTR_HAS_PPR)
* Requires __LOAD_FAR_HANDLER beause kvmppc_hcall lives
* outside the head section.
*/
- __LOAD_FAR_HANDLER(r10, kvmppc_hcall)
+ __LOAD_FAR_HANDLER(r10, kvmppc_hcall, real_trampolines)
mtctr r10
bctr
#else
@@ -2202,6 +2220,11 @@ EXC_COMMON_BEGIN(hmi_exception_common)
* Interrupt 0xe80 - Directed Hypervisor Doorbell Interrupt.
* This is an asynchronous interrupt in response to a msgsnd doorbell.
* Similar to the 0xa00 doorbell but for host rather than guest.
+ *
+ * CFAR is not required (similar to doorbell_interrupt), unless KVM HV
+ * is enabled, in which case it may be a guest exit. Most PowerNV kernels
+ * include KVM support so it would be nice if this could be dynamically
+ * patched out if KVM was not currently running any guests.
*/
INT_DEFINE_BEGIN(h_doorbell)
IVEC=0xe80
@@ -2209,6 +2232,9 @@ INT_DEFINE_BEGIN(h_doorbell)
IMASK=IRQS_DISABLED
IKVM_REAL=1
IKVM_VIRT=1
+#ifndef CONFIG_KVM_BOOK3S_HV_POSSIBLE
+ ICFAR=0
+#endif
INT_DEFINE_END(h_doorbell)
EXC_REAL_BEGIN(h_doorbell, 0xe80, 0x20)
@@ -2232,6 +2258,9 @@ EXC_COMMON_BEGIN(h_doorbell_common)
* Interrupt 0xea0 - Hypervisor Virtualization Interrupt.
* This is an asynchronous interrupt in response to an "external exception".
* Similar to 0x500 but for host only.
+ *
+ * Like h_doorbell, CFAR is only required for KVM HV because this can be
+ * a guest exit.
*/
INT_DEFINE_BEGIN(h_virt_irq)
IVEC=0xea0
@@ -2239,6 +2268,9 @@ INT_DEFINE_BEGIN(h_virt_irq)
IMASK=IRQS_DISABLED
IKVM_REAL=1
IKVM_VIRT=1
+#ifndef CONFIG_KVM_BOOK3S_HV_POSSIBLE
+ ICFAR=0
+#endif
INT_DEFINE_END(h_virt_irq)
EXC_REAL_BEGIN(h_virt_irq, 0xea0, 0x20)
@@ -2275,6 +2307,8 @@ EXC_VIRT_NONE(0x4ee0, 0x20)
*
* If soft masked, the masked handler will note the pending interrupt for
* replay, and clear MSR[EE] in the interrupted context.
+ *
+ * CFAR is not used by perf interrupts so not required.
*/
INT_DEFINE_BEGIN(performance_monitor)
IVEC=0xf00
@@ -2282,6 +2316,7 @@ INT_DEFINE_BEGIN(performance_monitor)
#ifdef CONFIG_KVM_BOOK3S_PR_POSSIBLE
IKVM_REAL=1
#endif
+ ICFAR=0
INT_DEFINE_END(performance_monitor)
EXC_REAL_BEGIN(performance_monitor, 0xf00, 0x20)
@@ -2706,6 +2741,7 @@ EXC_VIRT_NONE(0x5800, 0x100)
INT_DEFINE_BEGIN(soft_nmi)
IVEC=0x900
ISTACK=0
+ ICFAR=0
INT_DEFINE_END(soft_nmi)
/*
@@ -3025,7 +3061,7 @@ USE_FIXED_SECTION(virt_trampolines)
.align 7
.globl __end_interrupts
__end_interrupts:
-DEFINE_FIXED_SYMBOL(__end_interrupts)
+DEFINE_FIXED_SYMBOL(__end_interrupts, virt_trampolines)
CLOSE_FIXED_SECTION(real_vectors);
CLOSE_FIXED_SECTION(real_trampolines);
diff --git a/arch/powerpc/kernel/fadump.c b/arch/powerpc/kernel/fadump.c
index b7ceb041743c..d03e488cfe9c 100644
--- a/arch/powerpc/kernel/fadump.c
+++ b/arch/powerpc/kernel/fadump.c
@@ -251,7 +251,7 @@ bool is_fadump_reserved_mem_contiguous(void)
}
/* Print firmware assisted dump configurations for debugging purpose. */
-static void fadump_show_config(void)
+static void __init fadump_show_config(void)
{
int i;
@@ -353,7 +353,7 @@ static __init u64 fadump_calculate_reserve_size(void)
* Calculate the total memory size required to be reserved for
* firmware-assisted dump registration.
*/
-static unsigned long get_fadump_area_size(void)
+static unsigned long __init get_fadump_area_size(void)
{
unsigned long size = 0;
@@ -462,7 +462,7 @@ static int __init fadump_get_boot_mem_regions(void)
* with the given memory range.
* False, otherwise.
*/
-static bool overlaps_reserved_ranges(u64 base, u64 end, int *idx)
+static bool __init overlaps_reserved_ranges(u64 base, u64 end, int *idx)
{
bool ret = false;
int i;
@@ -737,7 +737,7 @@ void crash_fadump(struct pt_regs *regs, const char *str)
fw_dump.ops->fadump_trigger(fdh, str);
}
-u32 *fadump_regs_to_elf_notes(u32 *buf, struct pt_regs *regs)
+u32 *__init fadump_regs_to_elf_notes(u32 *buf, struct pt_regs *regs)
{
struct elf_prstatus prstatus;
@@ -752,7 +752,7 @@ u32 *fadump_regs_to_elf_notes(u32 *buf, struct pt_regs *regs)
return buf;
}
-void fadump_update_elfcore_header(char *bufp)
+void __init fadump_update_elfcore_header(char *bufp)
{
struct elf_phdr *phdr;
@@ -770,7 +770,7 @@ void fadump_update_elfcore_header(char *bufp)
return;
}
-static void *fadump_alloc_buffer(unsigned long size)
+static void *__init fadump_alloc_buffer(unsigned long size)
{
unsigned long count, i;
struct page *page;
@@ -792,7 +792,7 @@ static void fadump_free_buffer(unsigned long vaddr, unsigned long size)
free_reserved_area((void *)vaddr, (void *)(vaddr + size), -1, NULL);
}
-s32 fadump_setup_cpu_notes_buf(u32 num_cpus)
+s32 __init fadump_setup_cpu_notes_buf(u32 num_cpus)
{
/* Allocate buffer to hold cpu crash notes. */
fw_dump.cpu_notes_buf_size = num_cpus * sizeof(note_buf_t);
@@ -1447,7 +1447,7 @@ static ssize_t release_mem_store(struct kobject *kobj,
}
/* Release the reserved memory and disable the FADump */
-static void unregister_fadump(void)
+static void __init unregister_fadump(void)
{
fadump_cleanup();
fadump_release_memory(fw_dump.reserve_dump_area_start,
@@ -1547,7 +1547,7 @@ ATTRIBUTE_GROUPS(fadump);
DEFINE_SHOW_ATTRIBUTE(fadump_region);
-static void fadump_init_files(void)
+static void __init fadump_init_files(void)
{
int rc = 0;
@@ -1641,6 +1641,14 @@ int __init setup_fadump(void)
else if (fw_dump.reserve_dump_area_size)
fw_dump.ops->fadump_init_mem_struct(&fw_dump);
+ /*
+ * In case of panic, fadump is triggered via ppc_panic_event()
+ * panic notifier. Setting crash_kexec_post_notifiers to 'true'
+ * lets panic() function take crash friendly path before panic
+ * notifiers are invoked.
+ */
+ crash_kexec_post_notifiers = true;
+
return 1;
}
subsys_initcall(setup_fadump);
diff --git a/arch/powerpc/kernel/fpu.S b/arch/powerpc/kernel/fpu.S
index ba4afe3b5a9c..f71f2bbd4de6 100644
--- a/arch/powerpc/kernel/fpu.S
+++ b/arch/powerpc/kernel/fpu.S
@@ -81,7 +81,12 @@ EXPORT_SYMBOL(store_fp_state)
*/
_GLOBAL(load_up_fpu)
mfmsr r5
+#ifdef CONFIG_PPC_BOOK3S_64
+ /* interrupt doesn't set MSR[RI] and HPT can fault on current access */
+ ori r5,r5,MSR_FP|MSR_RI
+#else
ori r5,r5,MSR_FP
+#endif
#ifdef CONFIG_VSX
BEGIN_FTR_SECTION
oris r5,r5,MSR_VSX@h
diff --git a/arch/powerpc/kernel/head_32.h b/arch/powerpc/kernel/head_32.h
index 6b1ec9e3541b..c3286260a7d1 100644
--- a/arch/powerpc/kernel/head_32.h
+++ b/arch/powerpc/kernel/head_32.h
@@ -115,8 +115,7 @@ _ASM_NOKPROBE_SYMBOL(\name\()_virt)
stw r10,8(r1)
li r10, \trapno
stw r10,_TRAP(r1)
- SAVE_4GPRS(3, r1)
- SAVE_2GPRS(7, r1)
+ SAVE_GPRS(3, 8, r1)
SAVE_NVGPRS(r1)
stw r2,GPR2(r1)
stw r12,_NIP(r1)
@@ -136,6 +135,12 @@ _ASM_NOKPROBE_SYMBOL(\name\()_virt)
andi. r12,r9,MSR_PR
bne 777f
bl prepare_transfer_to_handler
+#ifdef CONFIG_PPC_KUEP
+ b 778f
+777:
+ bl __kuep_lock
+778:
+#endif
777:
#endif
.endm
@@ -202,11 +207,11 @@ vmap_stack_overflow:
mfspr r1, SPRN_SPRG_THREAD
lwz r1, TASK_CPU - THREAD(r1)
slwi r1, r1, 3
- addis r1, r1, emergency_ctx@ha
+ addis r1, r1, emergency_ctx-PAGE_OFFSET@ha
#else
- lis r1, emergency_ctx@ha
+ lis r1, emergency_ctx-PAGE_OFFSET@ha
#endif
- lwz r1, emergency_ctx@l(r1)
+ lwz r1, emergency_ctx-PAGE_OFFSET@l(r1)
addi r1, r1, THREAD_SIZE - INT_FRAME_SIZE
EXCEPTION_PROLOG_2 0 vmap_stack_overflow
prepare_transfer_to_handler
diff --git a/arch/powerpc/kernel/head_40x.S b/arch/powerpc/kernel/head_40x.S
index 7d72ee5ab387..b6c6d1de5fd5 100644
--- a/arch/powerpc/kernel/head_40x.S
+++ b/arch/powerpc/kernel/head_40x.S
@@ -27,6 +27,7 @@
#include <linux/init.h>
#include <linux/pgtable.h>
+#include <linux/sizes.h>
#include <asm/processor.h>
#include <asm/page.h>
#include <asm/mmu.h>
@@ -297,6 +298,10 @@ _ASM_NOKPROBE_SYMBOL(\name\()_virt)
3:
mfspr r11,SPRN_SPRG_THREAD
lwz r11,PGDIR(r11)
+#ifdef CONFIG_PPC_KUAP
+ rlwinm. r9, r9, 0, 0xff
+ beq 5f /* Kuap fault */
+#endif
4:
tophys(r11, r11)
rlwimi r11, r10, 12, 20, 29 /* Create L1 (pgdir/pmd) address */
@@ -377,6 +382,10 @@ _ASM_NOKPROBE_SYMBOL(\name\()_virt)
3:
mfspr r11,SPRN_SPRG_THREAD
lwz r11,PGDIR(r11)
+#ifdef CONFIG_PPC_KUAP
+ rlwinm. r9, r9, 0, 0xff
+ beq 5f /* Kuap fault */
+#endif
4:
tophys(r11, r11)
rlwimi r11, r10, 12, 20, 29 /* Create L1 (pgdir/pmd) address */
@@ -650,7 +659,7 @@ start_here:
b . /* prevent prefetch past rfi */
/* Set up the initial MMU state so we can do the first level of
- * kernel initialization. This maps the first 16 MBytes of memory 1:1
+ * kernel initialization. This maps the first 32 MBytes of memory 1:1
* virtual to physical and more importantly sets the cache mode.
*/
initial_mmu:
@@ -687,6 +696,12 @@ initial_mmu:
tlbwe r4,r0,TLB_DATA /* Load the data portion of the entry */
tlbwe r3,r0,TLB_TAG /* Load the tag portion of the entry */
+ li r0,62 /* TLB slot 62 */
+ addis r4,r4,SZ_16M@h
+ addis r3,r3,SZ_16M@h
+ tlbwe r4,r0,TLB_DATA /* Load the data portion of the entry */
+ tlbwe r3,r0,TLB_TAG /* Load the tag portion of the entry */
+
isync
/* Establish the exception vector base
diff --git a/arch/powerpc/kernel/head_44x.S b/arch/powerpc/kernel/head_44x.S
index 02d2928d1e01..b73a56466903 100644
--- a/arch/powerpc/kernel/head_44x.S
+++ b/arch/powerpc/kernel/head_44x.S
@@ -334,6 +334,10 @@ interrupt_base:
mfspr r12,SPRN_MMUCR
mfspr r13,SPRN_PID /* Get PID */
rlwimi r12,r13,0,24,31 /* Set TID */
+#ifdef CONFIG_PPC_KUAP
+ cmpwi r13,0
+ beq 2f /* KUAP Fault */
+#endif
4:
mtspr SPRN_MMUCR,r12
@@ -444,6 +448,10 @@ interrupt_base:
mfspr r12,SPRN_MMUCR
mfspr r13,SPRN_PID /* Get PID */
rlwimi r12,r13,0,24,31 /* Set TID */
+#ifdef CONFIG_PPC_KUAP
+ cmpwi r13,0
+ beq 2f /* KUAP Fault */
+#endif
4:
mtspr SPRN_MMUCR,r12
@@ -532,10 +540,7 @@ finish_tlb_load_44x:
andi. r10,r12,_PAGE_USER /* User page ? */
beq 1f /* nope, leave U bits empty */
rlwimi r11,r11,3,26,28 /* yes, copy S bits to U */
-#ifdef CONFIG_PPC_KUEP
-0: rlwinm r11,r11,0,~PPC44x_TLB_SX /* Clear SX if User page */
- patch_site 0b, patch__tlb_44x_kuep
-#endif
+ rlwinm r11,r11,0,~PPC44x_TLB_SX /* Clear SX if User page */
1: tlbwe r11,r13,PPC44x_TLB_ATTRIB /* Write ATTRIB */
/* Done...restore registers and get out of here.
@@ -575,6 +580,10 @@ finish_tlb_load_44x:
3: mfspr r11,SPRN_SPRG3
lwz r11,PGDIR(r11)
mfspr r12,SPRN_PID /* Get PID */
+#ifdef CONFIG_PPC_KUAP
+ cmpwi r12,0
+ beq 2f /* KUAP Fault */
+#endif
4: mtspr SPRN_MMUCR,r12 /* Set MMUCR */
/* Mask of required permission bits. Note that while we
@@ -672,6 +681,10 @@ finish_tlb_load_44x:
3: mfspr r11,SPRN_SPRG_THREAD
lwz r11,PGDIR(r11)
mfspr r12,SPRN_PID /* Get PID */
+#ifdef CONFIG_PPC_KUAP
+ cmpwi r12,0
+ beq 2f /* KUAP Fault */
+#endif
4: mtspr SPRN_MMUCR,r12 /* Set MMUCR */
/* Make up the required permissions */
@@ -747,10 +760,7 @@ finish_tlb_load_47x:
andi. r10,r12,_PAGE_USER /* User page ? */
beq 1f /* nope, leave U bits empty */
rlwimi r11,r11,3,26,28 /* yes, copy S bits to U */
-#ifdef CONFIG_PPC_KUEP
-0: rlwinm r11,r11,0,~PPC47x_TLB2_SX /* Clear SX if User page */
- patch_site 0b, patch__tlb_47x_kuep
-#endif
+ rlwinm r11,r11,0,~PPC47x_TLB2_SX /* Clear SX if User page */
1: tlbwe r11,r13,2
/* Done...restore registers and get out of here.
diff --git a/arch/powerpc/kernel/head_64.S b/arch/powerpc/kernel/head_64.S
index f17ae2083733..5c5181e8d5f1 100644
--- a/arch/powerpc/kernel/head_64.S
+++ b/arch/powerpc/kernel/head_64.S
@@ -126,7 +126,7 @@ __secondary_hold_acknowledge:
. = 0x5c
.globl __run_at_load
__run_at_load:
-DEFINE_FIXED_SYMBOL(__run_at_load)
+DEFINE_FIXED_SYMBOL(__run_at_load, first_256B)
.long RUN_AT_LOAD_DEFAULT
#endif
@@ -156,7 +156,7 @@ __secondary_hold:
/* Tell the master cpu we're here */
/* Relocation is off & we are located at an address less */
/* than 0x100, so only need to grab low order offset. */
- std r24,(ABS_ADDR(__secondary_hold_acknowledge))(0)
+ std r24,(ABS_ADDR(__secondary_hold_acknowledge, first_256B))(0)
sync
li r26,0
@@ -164,7 +164,7 @@ __secondary_hold:
tovirt(r26,r26)
#endif
/* All secondary cpus wait here until told to start. */
-100: ld r12,(ABS_ADDR(__secondary_hold_spinloop))(r26)
+100: ld r12,(ABS_ADDR(__secondary_hold_spinloop, first_256B))(r26)
cmpdi 0,r12,0
beq 100b
@@ -649,15 +649,15 @@ __after_prom_start:
3:
#endif
/* # bytes of memory to copy */
- lis r5,(ABS_ADDR(copy_to_here))@ha
- addi r5,r5,(ABS_ADDR(copy_to_here))@l
+ lis r5,(ABS_ADDR(copy_to_here, text))@ha
+ addi r5,r5,(ABS_ADDR(copy_to_here, text))@l
bl copy_and_flush /* copy the first n bytes */
/* this includes the code being */
/* executed here. */
/* Jump to the copy of this code that we just made */
- addis r8,r3,(ABS_ADDR(4f))@ha
- addi r12,r8,(ABS_ADDR(4f))@l
+ addis r8,r3,(ABS_ADDR(4f, text))@ha
+ addi r12,r8,(ABS_ADDR(4f, text))@l
mtctr r12
bctr
@@ -669,8 +669,8 @@ p_end: .8byte _end - copy_to_here
* Now copy the rest of the kernel up to _end, add
* _end - copy_to_here to the copy limit and run again.
*/
- addis r8,r26,(ABS_ADDR(p_end))@ha
- ld r8,(ABS_ADDR(p_end))@l(r8)
+ addis r8,r26,(ABS_ADDR(p_end, text))@ha
+ ld r8,(ABS_ADDR(p_end, text))@l(r8)
add r5,r5,r8
5: bl copy_and_flush /* copy the rest */
@@ -904,7 +904,7 @@ _GLOBAL(relative_toc)
blr
.balign 8
-p_toc: .8byte __toc_start + 0x8000 - 0b
+p_toc: .8byte .TOC. - 0b
/*
* This is where the main kernel code starts.
diff --git a/arch/powerpc/kernel/head_book3s_32.S b/arch/powerpc/kernel/head_book3s_32.S
index 68e5c0a7e99d..fa84744d6b24 100644
--- a/arch/powerpc/kernel/head_book3s_32.S
+++ b/arch/powerpc/kernel/head_book3s_32.S
@@ -931,7 +931,11 @@ END_MMU_FTR_SECTION_IFSET(MMU_FTR_USE_HIGH_BATS)
_GLOBAL(load_segment_registers)
li r0, NUM_USER_SEGMENTS /* load up user segment register values */
mtctr r0 /* for context 0 */
+#ifdef CONFIG_PPC_KUEP
+ lis r3, SR_NX@h /* Kp = 0, Ks = 0, VSID = 0 */
+#else
li r3, 0 /* Kp = 0, Ks = 0, VSID = 0 */
+#endif
li r4, 0
3: mtsrin r3, r4
addi r3, r3, 0x111 /* increment VSID */
diff --git a/arch/powerpc/kernel/head_booke.h b/arch/powerpc/kernel/head_booke.h
index ef8d1b1c234e..bb6d5d0fc4ac 100644
--- a/arch/powerpc/kernel/head_booke.h
+++ b/arch/powerpc/kernel/head_booke.h
@@ -87,8 +87,7 @@ END_BTB_FLUSH_SECTION
stw r10, 8(r1)
li r10, \trapno
stw r10,_TRAP(r1)
- SAVE_4GPRS(3, r1)
- SAVE_2GPRS(7, r1)
+ SAVE_GPRS(3, 8, r1)
SAVE_NVGPRS(r1)
stw r2,GPR2(r1)
stw r12,_NIP(r1)
diff --git a/arch/powerpc/kernel/head_fsl_booke.S b/arch/powerpc/kernel/head_fsl_booke.S
index 0a9a0f301474..ac2b4dcf5fd3 100644
--- a/arch/powerpc/kernel/head_fsl_booke.S
+++ b/arch/powerpc/kernel/head_fsl_booke.S
@@ -462,6 +462,12 @@ END_BTB_FLUSH_SECTION
mfspr r11,SPRN_SPRG_THREAD
lwz r11,PGDIR(r11)
+#ifdef CONFIG_PPC_KUAP
+ mfspr r12, SPRN_MAS1
+ rlwinm. r12,r12,0,0x3fff0000
+ beq 2f /* KUAP fault */
+#endif
+
4:
/* Mask of required permission bits. Note that while we
* do copy ESR:ST to _PAGE_RW position as trying to write
@@ -571,6 +577,12 @@ END_BTB_FLUSH_SECTION
mfspr r11,SPRN_SPRG_THREAD
lwz r11,PGDIR(r11)
+#ifdef CONFIG_PPC_KUAP
+ mfspr r12, SPRN_MAS1
+ rlwinm. r12,r12,0,0x3fff0000
+ beq 2f /* KUAP fault */
+#endif
+
/* Make up the required permissions for user code */
#ifdef CONFIG_PTE_64BIT
li r13,_PAGE_PRESENT | _PAGE_BAP_UX
@@ -777,6 +789,7 @@ END_MMU_FTR_SECTION_IFSET(MMU_FTR_BIG_PHYS)
andi. r10, r11, _PAGE_USER /* Test for _PAGE_USER */
slwi r10, r12, 1
or r10, r10, r12
+ rlwinm r10, r10, 0, ~_PAGE_EXEC /* Clear SX on user pages */
iseleq r12, r12, r10
rlwimi r13, r12, 0, 20, 31 /* Get RPN from PTE, merge w/ perms */
mtspr SPRN_MAS3, r13
diff --git a/arch/powerpc/kernel/hw_breakpoint.c b/arch/powerpc/kernel/hw_breakpoint.c
index 91a3be14808b..2669f80b3a49 100644
--- a/arch/powerpc/kernel/hw_breakpoint.c
+++ b/arch/powerpc/kernel/hw_breakpoint.c
@@ -523,7 +523,7 @@ static void larx_stcx_err(struct perf_event *bp, struct arch_hw_breakpoint *info
static bool stepping_handler(struct pt_regs *regs, struct perf_event **bp,
struct arch_hw_breakpoint **info, int *hit,
- struct ppc_inst instr)
+ ppc_inst_t instr)
{
int i;
int stepped;
@@ -616,7 +616,7 @@ int hw_breakpoint_handler(struct die_args *args)
int hit[HBP_NUM_MAX] = {0};
int nr_hit = 0;
bool ptrace_bp = false;
- struct ppc_inst instr = ppc_inst(0);
+ ppc_inst_t instr = ppc_inst(0);
int type = 0;
int size = 0;
unsigned long ea;
diff --git a/arch/powerpc/kernel/hw_breakpoint_constraints.c b/arch/powerpc/kernel/hw_breakpoint_constraints.c
index 42b967e3d85c..a74623025f3a 100644
--- a/arch/powerpc/kernel/hw_breakpoint_constraints.c
+++ b/arch/powerpc/kernel/hw_breakpoint_constraints.c
@@ -80,7 +80,7 @@ static bool check_dawrx_constraints(struct pt_regs *regs, int type,
* Return true if the event is valid wrt dawr configuration,
* including extraneous exception. Otherwise return false.
*/
-bool wp_check_constraints(struct pt_regs *regs, struct ppc_inst instr,
+bool wp_check_constraints(struct pt_regs *regs, ppc_inst_t instr,
unsigned long ea, int type, int size,
struct arch_hw_breakpoint *info)
{
@@ -127,7 +127,7 @@ bool wp_check_constraints(struct pt_regs *regs, struct ppc_inst instr,
return false;
}
-void wp_get_instr_detail(struct pt_regs *regs, struct ppc_inst *instr,
+void wp_get_instr_detail(struct pt_regs *regs, ppc_inst_t *instr,
int *type, int *size, unsigned long *ea)
{
struct instruction_op op;
diff --git a/arch/powerpc/kernel/idle.c b/arch/powerpc/kernel/idle.c
index 1f835539fda4..4ad79eb638c6 100644
--- a/arch/powerpc/kernel/idle.c
+++ b/arch/powerpc/kernel/idle.c
@@ -82,7 +82,7 @@ void power4_idle(void)
return;
if (cpu_has_feature(CPU_FTR_ALTIVEC))
- asm volatile("DSSALL ; sync" ::: "memory");
+ asm volatile(PPC_DSSALL " ; sync" ::: "memory");
power4_idle_nap();
diff --git a/arch/powerpc/kernel/idle_6xx.S b/arch/powerpc/kernel/idle_6xx.S
index 13cad9297d82..3c097356366b 100644
--- a/arch/powerpc/kernel/idle_6xx.S
+++ b/arch/powerpc/kernel/idle_6xx.S
@@ -129,7 +129,7 @@ BEGIN_FTR_SECTION
END_FTR_SECTION_IFCLR(CPU_FTR_NO_DPM)
mtspr SPRN_HID0,r4
BEGIN_FTR_SECTION
- DSSALL
+ PPC_DSSALL
sync
END_FTR_SECTION_IFSET(CPU_FTR_ALTIVEC)
lwz r8,TI_LOCAL_FLAGS(r2) /* set napping bit */
diff --git a/arch/powerpc/kernel/interrupt.c b/arch/powerpc/kernel/interrupt.c
index 835b626cd476..7cd6ce3ec423 100644
--- a/arch/powerpc/kernel/interrupt.c
+++ b/arch/powerpc/kernel/interrupt.c
@@ -81,7 +81,7 @@ notrace long system_call_exception(long r3, long r4, long r5,
{
syscall_fn f;
- kuep_lock();
+ kuap_lock();
regs->orig_gpr3 = r3;
@@ -148,7 +148,7 @@ notrace long system_call_exception(long r3, long r4, long r5,
*/
if (IS_ENABLED(CONFIG_PPC_TRANSACTIONAL_MEM) &&
unlikely(MSR_TM_TRANSACTIONAL(regs->msr)))
- current_thread_info()->flags |= _TIF_RESTOREALL;
+ set_bits(_TIF_RESTOREALL, &current_thread_info()->flags);
/*
* If the system call was made with a transaction active, doom it and
@@ -181,7 +181,7 @@ notrace long system_call_exception(long r3, long r4, long r5,
local_irq_enable();
- if (unlikely(current_thread_info()->flags & _TIF_SYSCALL_DOTRACE)) {
+ if (unlikely(read_thread_flags() & _TIF_SYSCALL_DOTRACE)) {
if (unlikely(trap_is_unsupported_scv(regs))) {
/* Unsupported scv vector */
_exception(SIGILL, regs, ILL_ILLOPC, regs->nip);
@@ -343,7 +343,7 @@ interrupt_exit_user_prepare_main(unsigned long ret, struct pt_regs *regs)
unsigned long ti_flags;
again:
- ti_flags = READ_ONCE(current_thread_info()->flags);
+ ti_flags = read_thread_flags();
while (unlikely(ti_flags & (_TIF_USER_WORK_MASK & ~_TIF_RESTORE_TM))) {
local_irq_enable();
if (ti_flags & _TIF_NEED_RESCHED) {
@@ -359,7 +359,7 @@ again:
do_notify_resume(regs, ti_flags);
}
local_irq_disable();
- ti_flags = READ_ONCE(current_thread_info()->flags);
+ ti_flags = read_thread_flags();
}
if (IS_ENABLED(CONFIG_PPC_BOOK3S_64) && IS_ENABLED(CONFIG_PPC_FPU)) {
@@ -406,7 +406,6 @@ again:
/* Restore user access locks last */
kuap_user_restore(regs);
- kuep_unlock();
return ret;
}
@@ -437,7 +436,7 @@ notrace unsigned long syscall_exit_prepare(unsigned long r3,
/* Check whether the syscall is issued inside a restartable sequence */
rseq_syscall(regs);
- ti_flags = current_thread_info()->flags;
+ ti_flags = read_thread_flags();
if (unlikely(r3 >= (unsigned long)-MAX_ERRNO) && is_not_scv) {
if (likely(!(ti_flags & (_TIF_NOERROR | _TIF_RESTOREALL)))) {
@@ -532,8 +531,7 @@ notrace unsigned long interrupt_exit_kernel_prepare(struct pt_regs *regs)
unsigned long flags;
unsigned long ret = 0;
unsigned long kuap;
- bool stack_store = current_thread_info()->flags &
- _TIF_EMULATE_STACK_STORE;
+ bool stack_store = read_thread_flags() & _TIF_EMULATE_STACK_STORE;
if (regs_is_unrecoverable(regs))
unrecoverable_exception(regs);
@@ -554,7 +552,7 @@ notrace unsigned long interrupt_exit_kernel_prepare(struct pt_regs *regs)
again:
if (IS_ENABLED(CONFIG_PREEMPT)) {
/* Return to preemptible kernel context */
- if (unlikely(current_thread_info()->flags & _TIF_NEED_RESCHED)) {
+ if (unlikely(read_thread_flags() & _TIF_NEED_RESCHED)) {
if (preempt_count() == 0)
preempt_schedule_irq();
}
diff --git a/arch/powerpc/kernel/interrupt_64.S b/arch/powerpc/kernel/interrupt_64.S
index ec950b08a8dc..7bab2d7de372 100644
--- a/arch/powerpc/kernel/interrupt_64.S
+++ b/arch/powerpc/kernel/interrupt_64.S
@@ -30,21 +30,25 @@ COMPAT_SYS_CALL_TABLE:
.ifc \srr,srr
mfspr r11,SPRN_SRR0
ld r12,_NIP(r1)
+ clrrdi r11,r11,2
+ clrrdi r12,r12,2
100: tdne r11,r12
- EMIT_BUG_ENTRY 100b,__FILE__,__LINE__,(BUGFLAG_WARNING | BUGFLAG_ONCE)
+ EMIT_WARN_ENTRY 100b,__FILE__,__LINE__,(BUGFLAG_WARNING | BUGFLAG_ONCE)
mfspr r11,SPRN_SRR1
ld r12,_MSR(r1)
100: tdne r11,r12
- EMIT_BUG_ENTRY 100b,__FILE__,__LINE__,(BUGFLAG_WARNING | BUGFLAG_ONCE)
+ EMIT_WARN_ENTRY 100b,__FILE__,__LINE__,(BUGFLAG_WARNING | BUGFLAG_ONCE)
.else
mfspr r11,SPRN_HSRR0
ld r12,_NIP(r1)
+ clrrdi r11,r11,2
+ clrrdi r12,r12,2
100: tdne r11,r12
- EMIT_BUG_ENTRY 100b,__FILE__,__LINE__,(BUGFLAG_WARNING | BUGFLAG_ONCE)
+ EMIT_WARN_ENTRY 100b,__FILE__,__LINE__,(BUGFLAG_WARNING | BUGFLAG_ONCE)
mfspr r11,SPRN_HSRR1
ld r12,_MSR(r1)
100: tdne r11,r12
- EMIT_BUG_ENTRY 100b,__FILE__,__LINE__,(BUGFLAG_WARNING | BUGFLAG_ONCE)
+ EMIT_WARN_ENTRY 100b,__FILE__,__LINE__,(BUGFLAG_WARNING | BUGFLAG_ONCE)
.endif
#endif
.endm
@@ -162,10 +166,9 @@ END_FTR_SECTION_IFSET(CPU_FTR_HAS_PPR)
* The value of AMR only matters while we're in the kernel.
*/
mtcr r2
- ld r2,GPR2(r1)
- ld r3,GPR3(r1)
- ld r13,GPR13(r1)
- ld r1,GPR1(r1)
+ REST_GPRS(2, 3, r1)
+ REST_GPR(13, r1)
+ REST_GPR(1, r1)
RFSCV_TO_USER
b . /* prevent speculative execution */
@@ -183,9 +186,8 @@ END_FTR_SECTION_IFSET(CPU_FTR_HAS_PPR)
mtctr r3
mtlr r4
mtspr SPRN_XER,r5
- REST_10GPRS(2, r1)
- REST_2GPRS(12, r1)
- ld r1,GPR1(r1)
+ REST_GPRS(2, 13, r1)
+ REST_GPR(1, r1)
RFI_TO_USER
.Lsyscall_vectored_\name\()_rst_end:
@@ -374,10 +376,9 @@ END_FTR_SECTION_IFSET(CPU_FTR_HAS_PPR)
* The value of AMR only matters while we're in the kernel.
*/
mtcr r2
- ld r2,GPR2(r1)
- ld r3,GPR3(r1)
- ld r13,GPR13(r1)
- ld r1,GPR1(r1)
+ REST_GPRS(2, 3, r1)
+ REST_GPR(13, r1)
+ REST_GPR(1, r1)
RFI_TO_USER
b . /* prevent speculative execution */
@@ -388,8 +389,7 @@ END_FTR_SECTION_IFSET(CPU_FTR_HAS_PPR)
mtctr r3
mtspr SPRN_XER,r4
ld r0,GPR0(r1)
- REST_8GPRS(4, r1)
- ld r12,GPR12(r1)
+ REST_GPRS(4, 12, r1)
b .Lsyscall_restore_regs_cont
.Lsyscall_rst_end:
@@ -518,17 +518,14 @@ ALT_FTR_SECTION_END_IFCLR(CPU_FTR_STCX_CHECKS_ADDRESS)
ld r6,_XER(r1)
li r0,0
- REST_4GPRS(7, r1)
- REST_2GPRS(11, r1)
- REST_GPR(13, r1)
+ REST_GPRS(7, 13, r1)
mtcr r3
mtlr r4
mtctr r5
mtspr SPRN_XER,r6
- REST_4GPRS(2, r1)
- REST_GPR(6, r1)
+ REST_GPRS(2, 6, r1)
REST_GPR(0, r1)
REST_GPR(1, r1)
.ifc \srr,srr
@@ -625,8 +622,7 @@ ALT_FTR_SECTION_END_IFCLR(CPU_FTR_STCX_CHECKS_ADDRESS)
ld r6,_CCR(r1)
li r0,0
- REST_4GPRS(7, r1)
- REST_2GPRS(11, r1)
+ REST_GPRS(7, 12, r1)
mtlr r3
mtctr r4
@@ -638,7 +634,7 @@ ALT_FTR_SECTION_END_IFCLR(CPU_FTR_STCX_CHECKS_ADDRESS)
*/
std r0,STACK_FRAME_OVERHEAD-16(r1)
- REST_4GPRS(2, r1)
+ REST_GPRS(2, 5, r1)
bne- cr1,1f /* emulate stack store */
mtcr r6
@@ -703,7 +699,7 @@ interrupt_return_macro hsrr
.globl __end_soft_masked
__end_soft_masked:
-DEFINE_FIXED_SYMBOL(__end_soft_masked)
+DEFINE_FIXED_SYMBOL(__end_soft_masked, text)
#endif /* CONFIG_PPC_BOOK3S */
#ifdef CONFIG_PPC_BOOK3S
diff --git a/arch/powerpc/kernel/irq.c b/arch/powerpc/kernel/irq.c
index c4f1d6b7d992..2cf31a97126c 100644
--- a/arch/powerpc/kernel/irq.c
+++ b/arch/powerpc/kernel/irq.c
@@ -745,7 +745,8 @@ void __do_irq(struct pt_regs *regs)
irq = ppc_md.get_irq();
/* We can hard enable interrupts now to allow perf interrupts */
- may_hard_irq_enable();
+ if (should_hard_irq_enable())
+ do_hard_irq_enable();
/* And finally process it */
if (unlikely(!irq))
@@ -811,7 +812,7 @@ void __init init_IRQ(void)
ppc_md.init_IRQ();
}
-#if defined(CONFIG_BOOKE) || defined(CONFIG_40x)
+#ifdef CONFIG_BOOKE_OR_40x
void *critirq_ctx[NR_CPUS] __read_mostly;
void *dbgirq_ctx[NR_CPUS] __read_mostly;
void *mcheckirq_ctx[NR_CPUS] __read_mostly;
diff --git a/arch/powerpc/kernel/kgdb.c b/arch/powerpc/kernel/kgdb.c
index bdee7262c080..9f8d0fa7b718 100644
--- a/arch/powerpc/kernel/kgdb.c
+++ b/arch/powerpc/kernel/kgdb.c
@@ -48,7 +48,7 @@ static struct hard_trap_info
{ 0x0800, 0x08 /* SIGFPE */ }, /* fp unavailable */
{ 0x0900, 0x0e /* SIGALRM */ }, /* decrementer */
{ 0x0c00, 0x14 /* SIGCHLD */ }, /* system call */
-#if defined(CONFIG_40x) || defined(CONFIG_BOOKE)
+#ifdef CONFIG_BOOKE_OR_40x
{ 0x2002, 0x05 /* SIGTRAP */ }, /* debug */
#if defined(CONFIG_FSL_BOOKE)
{ 0x2010, 0x08 /* SIGFPE */ }, /* spe unavailable */
@@ -67,7 +67,7 @@ static struct hard_trap_info
{ 0x2010, 0x08 /* SIGFPE */ }, /* fp unavailable */
{ 0x2020, 0x08 /* SIGFPE */ }, /* ap unavailable */
#endif
-#else /* ! (defined(CONFIG_40x) || defined(CONFIG_BOOKE)) */
+#else /* !CONFIG_BOOKE_OR_40x */
{ 0x0d00, 0x05 /* SIGTRAP */ }, /* single-step */
#if defined(CONFIG_PPC_8xx)
{ 0x1000, 0x04 /* SIGILL */ }, /* software emulation */
diff --git a/arch/powerpc/kernel/kprobes.c b/arch/powerpc/kernel/kprobes.c
index 86d77ff056a6..9a492fdec1df 100644
--- a/arch/powerpc/kernel/kprobes.c
+++ b/arch/powerpc/kernel/kprobes.c
@@ -124,7 +124,7 @@ int arch_prepare_kprobe(struct kprobe *p)
{
int ret = 0;
struct kprobe *prev;
- struct ppc_inst insn = ppc_inst_read(p->addr);
+ ppc_inst_t insn = ppc_inst_read(p->addr);
if ((unsigned long)p->addr & 0x03) {
printk("Attempt to register kprobe at an unaligned address\n");
@@ -244,7 +244,7 @@ NOKPROBE_SYMBOL(arch_prepare_kretprobe);
static int try_to_emulate(struct kprobe *p, struct pt_regs *regs)
{
int ret;
- struct ppc_inst insn = ppc_inst_read(p->ainsn.insn);
+ ppc_inst_t insn = ppc_inst_read(p->ainsn.insn);
/* regs->nip is also adjusted if emulate_step returns 1 */
ret = emulate_step(regs, insn);
diff --git a/arch/powerpc/kernel/l2cr_6xx.S b/arch/powerpc/kernel/l2cr_6xx.S
index 225511d73bef..f2e03ed423d0 100644
--- a/arch/powerpc/kernel/l2cr_6xx.S
+++ b/arch/powerpc/kernel/l2cr_6xx.S
@@ -96,7 +96,7 @@ END_FTR_SECTION_IFCLR(CPU_FTR_L2CR)
/* Stop DST streams */
BEGIN_FTR_SECTION
- DSSALL
+ PPC_DSSALL
sync
END_FTR_SECTION_IFSET(CPU_FTR_ALTIVEC)
@@ -292,7 +292,7 @@ END_FTR_SECTION_IFCLR(CPU_FTR_L3CR)
isync
/* Stop DST streams */
- DSSALL
+ PPC_DSSALL
sync
/* Get the current enable bit of the L3CR into r4 */
@@ -401,7 +401,7 @@ END_FTR_SECTION_IFSET(CPU_FTR_L3CR)
_GLOBAL(__flush_disable_L1)
/* Stop pending alitvec streams and memory accesses */
BEGIN_FTR_SECTION
- DSSALL
+ PPC_DSSALL
END_FTR_SECTION_IFSET(CPU_FTR_ALTIVEC)
sync
diff --git a/arch/powerpc/kernel/mce.c b/arch/powerpc/kernel/mce.c
index fd829f7f25a4..2503dd4713b9 100644
--- a/arch/powerpc/kernel/mce.c
+++ b/arch/powerpc/kernel/mce.c
@@ -586,7 +586,7 @@ void machine_check_print_event_info(struct machine_check_event *evt,
mc_error_class[evt->error_class] : "Unknown";
printk("%sMCE: CPU%d: %s\n", level, evt->cpu, subtype);
-#ifdef CONFIG_PPC_BOOK3S_64
+#ifdef CONFIG_PPC_64S_HASH_MMU
/* Display faulty slb contents for SLB errors. */
if (evt->error_type == MCE_ERROR_TYPE_SLB && !in_guest)
slb_dump_contents(local_paca->mce_faulty_slbs);
diff --git a/arch/powerpc/kernel/mce_power.c b/arch/powerpc/kernel/mce_power.c
index c2f55fe7092d..71e8f2a92e36 100644
--- a/arch/powerpc/kernel/mce_power.c
+++ b/arch/powerpc/kernel/mce_power.c
@@ -77,15 +77,15 @@ static bool mce_in_guest(void)
}
/* flush SLBs and reload */
-#ifdef CONFIG_PPC_BOOK3S_64
+#ifdef CONFIG_PPC_64S_HASH_MMU
void flush_and_reload_slb(void)
{
- /* Invalidate all SLBs */
- slb_flush_all_realmode();
-
if (early_radix_enabled())
return;
+ /* Invalidate all SLBs */
+ slb_flush_all_realmode();
+
/*
* This probably shouldn't happen, but it may be possible it's
* called in early boot before SLB shadows are allocated.
@@ -99,7 +99,7 @@ void flush_and_reload_slb(void)
void flush_erat(void)
{
-#ifdef CONFIG_PPC_BOOK3S_64
+#ifdef CONFIG_PPC_64S_HASH_MMU
if (!early_cpu_has_feature(CPU_FTR_ARCH_300)) {
flush_and_reload_slb();
return;
@@ -114,7 +114,7 @@ void flush_erat(void)
static int mce_flush(int what)
{
-#ifdef CONFIG_PPC_BOOK3S_64
+#ifdef CONFIG_PPC_64S_HASH_MMU
if (what == MCE_FLUSH_SLB) {
flush_and_reload_slb();
return 1;
@@ -455,7 +455,7 @@ static int mce_find_instr_ea_and_phys(struct pt_regs *regs, uint64_t *addr,
* in real-mode is tricky and can lead to recursive
* faults
*/
- struct ppc_inst instr;
+ ppc_inst_t instr;
unsigned long pfn, instr_addr;
struct instruction_op op;
struct pt_regs tmp = *regs;
@@ -499,8 +499,10 @@ static int mce_handle_ierror(struct pt_regs *regs, unsigned long srr1,
/* attempt to correct the error */
switch (table[i].error_type) {
case MCE_ERROR_TYPE_SLB:
+#ifdef CONFIG_PPC_64S_HASH_MMU
if (local_paca->in_mce == 1)
slb_save_contents(local_paca->mce_faulty_slbs);
+#endif
handled = mce_flush(MCE_FLUSH_SLB);
break;
case MCE_ERROR_TYPE_ERAT:
@@ -588,8 +590,10 @@ static int mce_handle_derror(struct pt_regs *regs,
/* attempt to correct the error */
switch (table[i].error_type) {
case MCE_ERROR_TYPE_SLB:
+#ifdef CONFIG_PPC_64S_HASH_MMU
if (local_paca->in_mce == 1)
slb_save_contents(local_paca->mce_faulty_slbs);
+#endif
if (mce_flush(MCE_FLUSH_SLB))
handled = 1;
break;
diff --git a/arch/powerpc/kernel/module.c b/arch/powerpc/kernel/module.c
index ed04a3ba66fe..40a583e9d3c7 100644
--- a/arch/powerpc/kernel/module.c
+++ b/arch/powerpc/kernel/module.c
@@ -90,16 +90,17 @@ int module_finalize(const Elf_Ehdr *hdr,
}
static __always_inline void *
-__module_alloc(unsigned long size, unsigned long start, unsigned long end)
+__module_alloc(unsigned long size, unsigned long start, unsigned long end, bool nowarn)
{
pgprot_t prot = strict_module_rwx_enabled() ? PAGE_KERNEL : PAGE_KERNEL_EXEC;
+ gfp_t gfp = GFP_KERNEL | (nowarn ? __GFP_NOWARN : 0);
/*
* Don't do huge page allocations for modules yet until more testing
* is done. STRICT_MODULE_RWX may require extra work to support this
* too.
*/
- return __vmalloc_node_range(size, 1, start, end, GFP_KERNEL, prot,
+ return __vmalloc_node_range(size, 1, start, end, gfp, prot,
VM_FLUSH_RESET_PERMS | VM_NO_HUGE_VMAP,
NUMA_NO_NODE, __builtin_return_address(0));
}
@@ -114,13 +115,13 @@ void *module_alloc(unsigned long size)
/* First try within 32M limit from _etext to avoid branch trampolines */
if (MODULES_VADDR < PAGE_OFFSET && MODULES_END > limit)
- ptr = __module_alloc(size, limit, MODULES_END);
+ ptr = __module_alloc(size, limit, MODULES_END, true);
if (!ptr)
- ptr = __module_alloc(size, MODULES_VADDR, MODULES_END);
+ ptr = __module_alloc(size, MODULES_VADDR, MODULES_END, false);
return ptr;
#else
- return __module_alloc(size, VMALLOC_START, VMALLOC_END);
+ return __module_alloc(size, VMALLOC_START, VMALLOC_END, false);
#endif
}
diff --git a/arch/powerpc/kernel/module_32.c b/arch/powerpc/kernel/module_32.c
index f417afc08d33..a491ad481d85 100644
--- a/arch/powerpc/kernel/module_32.c
+++ b/arch/powerpc/kernel/module_32.c
@@ -273,6 +273,31 @@ int apply_relocate_add(Elf32_Shdr *sechdrs,
}
#ifdef CONFIG_DYNAMIC_FTRACE
+int module_trampoline_target(struct module *mod, unsigned long addr,
+ unsigned long *target)
+{
+ unsigned int jmp[4];
+
+ /* Find where the trampoline jumps to */
+ if (copy_from_kernel_nofault(jmp, (void *)addr, sizeof(jmp)))
+ return -EFAULT;
+
+ /* verify that this is what we expect it to be */
+ if ((jmp[0] & 0xffff0000) != PPC_RAW_LIS(_R12, 0) ||
+ (jmp[1] & 0xffff0000) != PPC_RAW_ADDI(_R12, _R12, 0) ||
+ jmp[2] != PPC_RAW_MTCTR(_R12) ||
+ jmp[3] != PPC_RAW_BCTR())
+ return -EINVAL;
+
+ addr = (jmp[1] & 0xffff) | ((jmp[0] & 0xffff) << 16);
+ if (addr & 0x8000)
+ addr -= 0x10000;
+
+ *target = addr;
+
+ return 0;
+}
+
int module_finalize_ftrace(struct module *module, const Elf_Shdr *sechdrs)
{
module->arch.tramp = do_plt_call(module->core_layout.base,
@@ -281,6 +306,14 @@ int module_finalize_ftrace(struct module *module, const Elf_Shdr *sechdrs)
if (!module->arch.tramp)
return -ENOENT;
+#ifdef CONFIG_DYNAMIC_FTRACE_WITH_REGS
+ module->arch.tramp_regs = do_plt_call(module->core_layout.base,
+ (unsigned long)ftrace_regs_caller,
+ sechdrs, module);
+ if (!module->arch.tramp_regs)
+ return -ENOENT;
+#endif
+
return 0;
}
#endif
diff --git a/arch/powerpc/kernel/module_64.c b/arch/powerpc/kernel/module_64.c
index 6baa676e7cb6..5d77d3f5fbb5 100644
--- a/arch/powerpc/kernel/module_64.c
+++ b/arch/powerpc/kernel/module_64.c
@@ -422,11 +422,17 @@ static inline int create_stub(const Elf64_Shdr *sechdrs,
const char *name)
{
long reladdr;
+ func_desc_t desc;
+ int i;
if (is_mprofile_ftrace_call(name))
return create_ftrace_stub(entry, addr, me);
- memcpy(entry->jump, ppc64_stub_insns, sizeof(ppc64_stub_insns));
+ for (i = 0; i < sizeof(ppc64_stub_insns) / sizeof(u32); i++) {
+ if (patch_instruction(&entry->jump[i],
+ ppc_inst(ppc64_stub_insns[i])))
+ return 0;
+ }
/* Stub uses address relative to r2. */
reladdr = (unsigned long)entry - my_r2(sechdrs, me);
@@ -437,10 +443,24 @@ static inline int create_stub(const Elf64_Shdr *sechdrs,
}
pr_debug("Stub %p get data from reladdr %li\n", entry, reladdr);
- entry->jump[0] |= PPC_HA(reladdr);
- entry->jump[1] |= PPC_LO(reladdr);
- entry->funcdata = func_desc(addr);
- entry->magic = STUB_MAGIC;
+ if (patch_instruction(&entry->jump[0],
+ ppc_inst(entry->jump[0] | PPC_HA(reladdr))))
+ return 0;
+
+ if (patch_instruction(&entry->jump[1],
+ ppc_inst(entry->jump[1] | PPC_LO(reladdr))))
+ return 0;
+
+ // func_desc_t is 8 bytes if ABIv2, else 16 bytes
+ desc = func_desc(addr);
+ for (i = 0; i < sizeof(func_desc_t) / sizeof(u32); i++) {
+ if (patch_instruction(((u32 *)&entry->funcdata) + i,
+ ppc_inst(((u32 *)(&desc))[i])))
+ return 0;
+ }
+
+ if (patch_instruction(&entry->magic, ppc_inst(STUB_MAGIC)))
+ return 0;
return 1;
}
@@ -495,8 +515,11 @@ static int restore_r2(const char *name, u32 *instruction, struct module *me)
me->name, *instruction, instruction);
return 0;
}
+
/* ld r2,R2_STACK_OFFSET(r1) */
- *instruction = PPC_INST_LD_TOC;
+ if (patch_instruction(instruction, ppc_inst(PPC_INST_LD_TOC)))
+ return 0;
+
return 1;
}
@@ -636,9 +659,12 @@ int apply_relocate_add(Elf64_Shdr *sechdrs,
}
/* Only replace bits 2 through 26 */
- *(uint32_t *)location
- = (*(uint32_t *)location & ~0x03fffffc)
+ value = (*(uint32_t *)location & ~0x03fffffc)
| (value & 0x03fffffc);
+
+ if (patch_instruction((u32 *)location, ppc_inst(value)))
+ return -EFAULT;
+
break;
case R_PPC64_REL64:
diff --git a/arch/powerpc/kernel/nvram_64.c b/arch/powerpc/kernel/nvram_64.c
index 3c8d9bbb51cf..0d9f9cd41e13 100644
--- a/arch/powerpc/kernel/nvram_64.c
+++ b/arch/powerpc/kernel/nvram_64.c
@@ -540,7 +540,7 @@ static struct pstore_info nvram_pstore_info = {
.write = nvram_pstore_write,
};
-static int nvram_pstore_init(void)
+static int __init nvram_pstore_init(void)
{
int rc = 0;
@@ -562,7 +562,7 @@ static int nvram_pstore_init(void)
return rc;
}
#else
-static int nvram_pstore_init(void)
+static int __init nvram_pstore_init(void)
{
return -1;
}
@@ -755,7 +755,7 @@ static unsigned char __init nvram_checksum(struct nvram_header *p)
* Per the criteria passed via nvram_remove_partition(), should this
* partition be removed? 1=remove, 0=keep
*/
-static int nvram_can_remove_partition(struct nvram_partition *part,
+static int __init nvram_can_remove_partition(struct nvram_partition *part,
const char *name, int sig, const char *exceptions[])
{
if (part->header.signature != sig)
diff --git a/arch/powerpc/kernel/optprobes.c b/arch/powerpc/kernel/optprobes.c
index ce1903064031..3b1c2236cbee 100644
--- a/arch/powerpc/kernel/optprobes.c
+++ b/arch/powerpc/kernel/optprobes.c
@@ -153,7 +153,7 @@ static void patch_imm_load_insns(unsigned long val, int reg, kprobe_opcode_t *ad
int arch_prepare_optimized_kprobe(struct optimized_kprobe *op, struct kprobe *p)
{
- struct ppc_inst branch_op_callback, branch_emulate_step, temp;
+ ppc_inst_t branch_op_callback, branch_emulate_step, temp;
unsigned long op_callback_addr, emulate_step_addr;
kprobe_opcode_t *buff;
long b_offset;
@@ -228,12 +228,8 @@ int arch_prepare_optimized_kprobe(struct optimized_kprobe *op, struct kprobe *p)
/*
* 3. load instruction to be emulated into relevant register, and
*/
- if (IS_ENABLED(CONFIG_PPC64)) {
- temp = ppc_inst_read(p->ainsn.insn);
- patch_imm_load_insns(ppc_inst_as_ulong(temp), 4, buff + TMPL_INSN_IDX);
- } else {
- patch_imm_load_insns((unsigned long)p->ainsn.insn, 4, buff + TMPL_INSN_IDX);
- }
+ temp = ppc_inst_read(p->ainsn.insn);
+ patch_imm_load_insns(ppc_inst_as_ulong(temp), 4, buff + TMPL_INSN_IDX);
/*
* 4. branch back from trampoline
@@ -269,7 +265,7 @@ int arch_check_optimized_kprobe(struct optimized_kprobe *op)
void arch_optimize_kprobes(struct list_head *oplist)
{
- struct ppc_inst instr;
+ ppc_inst_t instr;
struct optimized_kprobe *op;
struct optimized_kprobe *tmp;
diff --git a/arch/powerpc/kernel/optprobes_head.S b/arch/powerpc/kernel/optprobes_head.S
index 19ea3312403c..5c7f0b4b784b 100644
--- a/arch/powerpc/kernel/optprobes_head.S
+++ b/arch/powerpc/kernel/optprobes_head.S
@@ -10,8 +10,8 @@
#include <asm/asm-offsets.h>
#ifdef CONFIG_PPC64
-#define SAVE_30GPRS(base) SAVE_10GPRS(2,base); SAVE_10GPRS(12,base); SAVE_10GPRS(22,base)
-#define REST_30GPRS(base) REST_10GPRS(2,base); REST_10GPRS(12,base); REST_10GPRS(22,base)
+#define SAVE_30GPRS(base) SAVE_GPRS(2, 31, base)
+#define REST_30GPRS(base) REST_GPRS(2, 31, base)
#define TEMPLATE_FOR_IMM_LOAD_INSNS nop; nop; nop; nop; nop
#else
#define SAVE_30GPRS(base) stmw r2, GPR2(base)
diff --git a/arch/powerpc/kernel/paca.c b/arch/powerpc/kernel/paca.c
index 4208b4044d12..39da688a9455 100644
--- a/arch/powerpc/kernel/paca.c
+++ b/arch/powerpc/kernel/paca.c
@@ -139,8 +139,7 @@ static struct lppaca * __init new_lppaca(int cpu, unsigned long limit)
}
#endif /* CONFIG_PPC_PSERIES */
-#ifdef CONFIG_PPC_BOOK3S_64
-
+#ifdef CONFIG_PPC_64S_HASH_MMU
/*
* 3 persistent SLBs are allocated here. The buffer will be zero
* initially, hence will all be invaild until we actually write them.
@@ -169,8 +168,7 @@ static struct slb_shadow * __init new_slb_shadow(int cpu, unsigned long limit)
return s;
}
-
-#endif /* CONFIG_PPC_BOOK3S_64 */
+#endif /* CONFIG_PPC_64S_HASH_MMU */
#ifdef CONFIG_PPC_PSERIES
/**
@@ -226,7 +224,7 @@ void __init initialise_paca(struct paca_struct *new_paca, int cpu)
new_paca->kexec_state = KEXEC_STATE_NONE;
new_paca->__current = &init_task;
new_paca->data_offset = 0xfeeeeeeeeeeeeeeeULL;
-#ifdef CONFIG_PPC_BOOK3S_64
+#ifdef CONFIG_PPC_64S_HASH_MMU
new_paca->slb_shadow_ptr = NULL;
#endif
@@ -307,7 +305,7 @@ void __init allocate_paca(int cpu)
#ifdef CONFIG_PPC_PSERIES
paca->lppaca_ptr = new_lppaca(cpu, limit);
#endif
-#ifdef CONFIG_PPC_BOOK3S_64
+#ifdef CONFIG_PPC_64S_HASH_MMU
paca->slb_shadow_ptr = new_slb_shadow(cpu, limit);
#endif
#ifdef CONFIG_PPC_PSERIES
@@ -328,7 +326,7 @@ void __init free_unused_pacas(void)
paca_nr_cpu_ids = nr_cpu_ids;
paca_ptrs_size = new_ptrs_size;
-#ifdef CONFIG_PPC_BOOK3S_64
+#ifdef CONFIG_PPC_64S_HASH_MMU
if (early_radix_enabled()) {
/* Ugly fixup, see new_slb_shadow() */
memblock_phys_free(__pa(paca_ptrs[boot_cpuid]->slb_shadow_ptr),
@@ -341,9 +339,9 @@ void __init free_unused_pacas(void)
paca_ptrs_size + paca_struct_size, nr_cpu_ids);
}
+#ifdef CONFIG_PPC_64S_HASH_MMU
void copy_mm_to_paca(struct mm_struct *mm)
{
-#ifdef CONFIG_PPC_BOOK3S
mm_context_t *context = &mm->context;
#ifdef CONFIG_PPC_MM_SLICES
@@ -356,7 +354,5 @@ void copy_mm_to_paca(struct mm_struct *mm)
get_paca()->mm_ctx_user_psize = context->user_psize;
get_paca()->mm_ctx_sllp = context->sllp;
#endif
-#else /* !CONFIG_PPC_BOOK3S */
- return;
-#endif
}
+#endif /* CONFIG_PPC_64S_HASH_MMU */
diff --git a/arch/powerpc/kernel/pci-common.c b/arch/powerpc/kernel/pci-common.c
index 6749905932f4..8bc9cf62cd93 100644
--- a/arch/powerpc/kernel/pci-common.c
+++ b/arch/powerpc/kernel/pci-common.c
@@ -62,7 +62,7 @@ EXPORT_SYMBOL(isa_mem_base);
static const struct dma_map_ops *pci_dma_ops;
-void set_pci_dma_ops(const struct dma_map_ops *dma_ops)
+void __init set_pci_dma_ops(const struct dma_map_ops *dma_ops)
{
pci_dma_ops = dma_ops;
}
diff --git a/arch/powerpc/kernel/pci_32.c b/arch/powerpc/kernel/pci_32.c
index b49e1060a3bf..48537964fba1 100644
--- a/arch/powerpc/kernel/pci_32.c
+++ b/arch/powerpc/kernel/pci_32.c
@@ -37,7 +37,7 @@ int pcibios_assign_bus_offset = 1;
EXPORT_SYMBOL(isa_io_base);
EXPORT_SYMBOL(pci_dram_offset);
-void pcibios_make_OF_bus_map(void);
+void __init pcibios_make_OF_bus_map(void);
static void fixup_cpc710_pci64(struct pci_dev* dev);
static u8* pci_to_OF_bus_map;
@@ -109,7 +109,7 @@ make_one_node_map(struct device_node* node, u8 pci_bus)
}
}
-void
+void __init
pcibios_make_OF_bus_map(void)
{
int i;
diff --git a/arch/powerpc/kernel/proc_powerpc.c b/arch/powerpc/kernel/proc_powerpc.c
index 877817471e3c..6a029f2378e1 100644
--- a/arch/powerpc/kernel/proc_powerpc.c
+++ b/arch/powerpc/kernel/proc_powerpc.c
@@ -25,7 +25,7 @@ static ssize_t page_map_read( struct file *file, char __user *buf, size_t nbytes
loff_t *ppos)
{
return simple_read_from_buffer(buf, nbytes, ppos,
- PDE_DATA(file_inode(file)), PAGE_SIZE);
+ pde_data(file_inode(file)), PAGE_SIZE);
}
static int page_map_mmap( struct file *file, struct vm_area_struct *vma )
@@ -34,7 +34,7 @@ static int page_map_mmap( struct file *file, struct vm_area_struct *vma )
return -EINVAL;
remap_pfn_range(vma, vma->vm_start,
- __pa(PDE_DATA(file_inode(file))) >> PAGE_SHIFT,
+ __pa(pde_data(file_inode(file))) >> PAGE_SHIFT,
PAGE_SIZE, vma->vm_page_prot);
return 0;
}
diff --git a/arch/powerpc/kernel/process.c b/arch/powerpc/kernel/process.c
index 406d7ee9e322..984813a4d5dc 100644
--- a/arch/powerpc/kernel/process.c
+++ b/arch/powerpc/kernel/process.c
@@ -628,7 +628,7 @@ static void do_break_handler(struct pt_regs *regs)
{
struct arch_hw_breakpoint null_brk = {0};
struct arch_hw_breakpoint *info;
- struct ppc_inst instr = ppc_inst(0);
+ ppc_inst_t instr = ppc_inst(0);
int type = 0;
int size = 0;
unsigned long ea;
@@ -1156,6 +1156,40 @@ static inline void save_sprs(struct thread_struct *t)
#endif
}
+#ifdef CONFIG_KVM_BOOK3S_HV_POSSIBLE
+void kvmppc_save_user_regs(void)
+{
+ unsigned long usermsr;
+
+ if (!current->thread.regs)
+ return;
+
+ usermsr = current->thread.regs->msr;
+
+ if (usermsr & MSR_FP)
+ save_fpu(current);
+
+ if (usermsr & MSR_VEC)
+ save_altivec(current);
+
+#ifdef CONFIG_PPC_TRANSACTIONAL_MEM
+ if (usermsr & MSR_TM) {
+ current->thread.tm_tfhar = mfspr(SPRN_TFHAR);
+ current->thread.tm_tfiar = mfspr(SPRN_TFIAR);
+ current->thread.tm_texasr = mfspr(SPRN_TEXASR);
+ current->thread.regs->msr &= ~MSR_TM;
+ }
+#endif
+}
+EXPORT_SYMBOL_GPL(kvmppc_save_user_regs);
+
+void kvmppc_save_current_sprs(void)
+{
+ save_sprs(&current->thread);
+}
+EXPORT_SYMBOL_GPL(kvmppc_save_current_sprs);
+#endif /* CONFIG_KVM_BOOK3S_HV_POSSIBLE */
+
static inline void restore_sprs(struct thread_struct *old_thread,
struct thread_struct *new_thread)
{
@@ -1206,7 +1240,7 @@ struct task_struct *__switch_to(struct task_struct *prev,
{
struct thread_struct *new_thread, *old_thread;
struct task_struct *last;
-#ifdef CONFIG_PPC_BOOK3S_64
+#ifdef CONFIG_PPC_64S_HASH_MMU
struct ppc64_tlb_batch *batch;
#endif
@@ -1215,7 +1249,7 @@ struct task_struct *__switch_to(struct task_struct *prev,
WARN_ON(!irqs_disabled());
-#ifdef CONFIG_PPC_BOOK3S_64
+#ifdef CONFIG_PPC_64S_HASH_MMU
batch = this_cpu_ptr(&ppc64_tlb_batch);
if (batch->active) {
current_thread_info()->local_flags |= _TLF_LAZY_MMU;
@@ -1281,9 +1315,9 @@ struct task_struct *__switch_to(struct task_struct *prev,
set_return_regs_changed(); /* _switch changes stack (and regs) */
-#ifdef CONFIG_PPC32
- kuap_assert_locked();
-#endif
+ if (!IS_ENABLED(CONFIG_PPC_BOOK3S_64))
+ kuap_assert_locked();
+
last = _switch(old_thread, new_thread);
/*
@@ -1294,6 +1328,7 @@ struct task_struct *__switch_to(struct task_struct *prev,
*/
#ifdef CONFIG_PPC_BOOK3S_64
+#ifdef CONFIG_PPC_64S_HASH_MMU
/*
* This applies to a process that was context switched while inside
* arch_enter_lazy_mmu_mode(), to re-activate the batch that was
@@ -1305,6 +1340,7 @@ struct task_struct *__switch_to(struct task_struct *prev,
batch = this_cpu_ptr(&ppc64_tlb_batch);
batch->active = 1;
}
+#endif
/*
* Math facilities are masked out of the child MSR in copy_thread.
@@ -1655,7 +1691,7 @@ int arch_dup_task_struct(struct task_struct *dst, struct task_struct *src)
static void setup_ksp_vsid(struct task_struct *p, unsigned long sp)
{
-#ifdef CONFIG_PPC_BOOK3S_64
+#ifdef CONFIG_PPC_64S_HASH_MMU
unsigned long sp_vsid;
unsigned long llp = mmu_psize_defs[mmu_linear_psize].sllp;
@@ -1767,6 +1803,9 @@ int copy_thread(unsigned long clone_flags, unsigned long usp,
#if defined(CONFIG_PPC_BOOK3S_32) && defined(CONFIG_PPC_KUAP)
p->thread.kuap = KUAP_NONE;
#endif
+#if defined(CONFIG_BOOKE_OR_40x) && defined(CONFIG_PPC_KUAP)
+ p->thread.pid = MMU_NO_CONTEXT;
+#endif
setup_ksp_vsid(p, sp);
@@ -2299,10 +2338,9 @@ unsigned long arch_randomize_brk(struct mm_struct *mm)
* the heap, we can put it above 1TB so it is backed by a 1TB
* segment. Otherwise the heap will be in the bottom 1TB
* which always uses 256MB segments and this may result in a
- * performance penalty. We don't need to worry about radix. For
- * radix, mmu_highuser_ssize remains unchanged from 256MB.
+ * performance penalty.
*/
- if (!is_32bit_task() && (mmu_highuser_ssize == MMU_SEGSIZE_1T))
+ if (!radix_enabled() && !is_32bit_task() && (mmu_highuser_ssize == MMU_SEGSIZE_1T))
base = max_t(unsigned long, mm->brk, 1UL << SID_SHIFT_1T);
#endif
diff --git a/arch/powerpc/kernel/prom.c b/arch/powerpc/kernel/prom.c
index 2e67588f6f6e..3d30d40a0e9c 100644
--- a/arch/powerpc/kernel/prom.c
+++ b/arch/powerpc/kernel/prom.c
@@ -231,7 +231,7 @@ static void __init check_cpu_pa_features(unsigned long node)
ibm_pa_features, ARRAY_SIZE(ibm_pa_features));
}
-#ifdef CONFIG_PPC_BOOK3S_64
+#ifdef CONFIG_PPC_64S_HASH_MMU
static void __init init_mmu_slb_size(unsigned long node)
{
const __be32 *slb_size_ptr;
@@ -402,7 +402,7 @@ static int __init early_init_dt_scan_chosen_ppc(unsigned long node,
const unsigned long *lprop; /* All these set by kernel, so no need to convert endian */
/* Use common scan routine to determine if this is the chosen node */
- if (early_init_dt_scan_chosen(node, uname, depth, data) == 0)
+ if (early_init_dt_scan_chosen(data) < 0)
return 0;
#ifdef CONFIG_PPC64
@@ -447,7 +447,7 @@ static int __init early_init_dt_scan_chosen_ppc(unsigned long node,
*/
#ifdef CONFIG_SPARSEMEM
-static bool validate_mem_limit(u64 base, u64 *size)
+static bool __init validate_mem_limit(u64 base, u64 *size)
{
u64 max_mem = 1UL << (MAX_PHYSMEM_BITS);
@@ -458,7 +458,7 @@ static bool validate_mem_limit(u64 base, u64 *size)
return true;
}
#else
-static bool validate_mem_limit(u64 base, u64 *size)
+static bool __init validate_mem_limit(u64 base, u64 *size)
{
return true;
}
@@ -532,19 +532,18 @@ static int __init early_init_drmem_lmb(struct drmem_lmb *lmb,
}
#endif /* CONFIG_PPC_PSERIES */
-static int __init early_init_dt_scan_memory_ppc(unsigned long node,
- const char *uname,
- int depth, void *data)
+static int __init early_init_dt_scan_memory_ppc(void)
{
#ifdef CONFIG_PPC_PSERIES
- if (depth == 1 &&
- strcmp(uname, "ibm,dynamic-reconfiguration-memory") == 0) {
+ const void *fdt = initial_boot_params;
+ int node = fdt_path_offset(fdt, "/ibm,dynamic-reconfiguration-memory");
+
+ if (node > 0)
walk_drmem_lmbs_early(node, NULL, early_init_drmem_lmb);
- return 0;
- }
+
#endif
-
- return early_init_dt_scan_memory(node, uname, depth, data);
+
+ return early_init_dt_scan_memory();
}
/*
@@ -748,8 +747,8 @@ void __init early_init_devtree(void *params)
of_scan_flat_dt(early_init_dt_scan_chosen_ppc, boot_command_line);
/* Scan memory nodes and rebuild MEMBLOCKs */
- of_scan_flat_dt(early_init_dt_scan_root, NULL);
- of_scan_flat_dt(early_init_dt_scan_memory_ppc, NULL);
+ early_init_dt_scan_root();
+ early_init_dt_scan_memory_ppc();
parse_early_param();
@@ -857,8 +856,8 @@ void __init early_get_first_memblock_info(void *params, phys_addr_t *size)
* mess the memblock.
*/
add_mem_to_memblock = 0;
- of_scan_flat_dt(early_init_dt_scan_root, NULL);
- of_scan_flat_dt(early_init_dt_scan_memory_ppc, NULL);
+ early_init_dt_scan_root();
+ early_init_dt_scan_memory_ppc();
add_mem_to_memblock = 1;
if (size)
diff --git a/arch/powerpc/kernel/prom_init.c b/arch/powerpc/kernel/prom_init.c
index 18b04b08b983..0ac5faacc909 100644
--- a/arch/powerpc/kernel/prom_init.c
+++ b/arch/powerpc/kernel/prom_init.c
@@ -672,7 +672,7 @@ static inline int __init prom_getproplen(phandle node, const char *pname)
return call_prom("getproplen", 2, 1, node, ADDR(pname));
}
-static void add_string(char **str, const char *q)
+static void __init add_string(char **str, const char *q)
{
char *p = *str;
@@ -682,7 +682,7 @@ static void add_string(char **str, const char *q)
*str = p;
}
-static char *tohex(unsigned int x)
+static char *__init tohex(unsigned int x)
{
static const char digits[] __initconst = "0123456789abcdef";
static char result[9] __prombss;
@@ -728,7 +728,7 @@ static int __init prom_setprop(phandle node, const char *nodename,
#define prom_islower(c) ('a' <= (c) && (c) <= 'z')
#define prom_toupper(c) (prom_islower(c) ? ((c) - 'a' + 'A') : (c))
-static unsigned long prom_strtoul(const char *cp, const char **endp)
+static unsigned long __init prom_strtoul(const char *cp, const char **endp)
{
unsigned long result = 0, base = 10, value;
@@ -753,7 +753,7 @@ static unsigned long prom_strtoul(const char *cp, const char **endp)
return result;
}
-static unsigned long prom_memparse(const char *ptr, const char **retptr)
+static unsigned long __init prom_memparse(const char *ptr, const char **retptr)
{
unsigned long ret = prom_strtoul(ptr, retptr);
int shift = 0;
@@ -1786,7 +1786,7 @@ static void __init prom_close_stdin(void)
}
#ifdef CONFIG_PPC_SVM
-static int prom_rtas_hcall(uint64_t args)
+static int __init prom_rtas_hcall(uint64_t args)
{
register uint64_t arg1 asm("r3") = H_RTAS;
register uint64_t arg2 asm("r4") = args;
@@ -2991,7 +2991,7 @@ static void __init fixup_device_tree_efika_add_phy(void)
/* Check if the phy-handle property exists - bail if it does */
rv = prom_getprop(node, "phy-handle", prop, sizeof(prop));
- if (!rv)
+ if (rv <= 0)
return;
/*
@@ -3248,7 +3248,7 @@ static void __init prom_check_initrd(unsigned long r3, unsigned long r4)
/*
* Perform the Enter Secure Mode ultracall.
*/
-static int enter_secure_mode(unsigned long kbase, unsigned long fdt)
+static int __init enter_secure_mode(unsigned long kbase, unsigned long fdt)
{
register unsigned long r3 asm("r3") = UV_ESM;
register unsigned long r4 asm("r4") = kbase;
diff --git a/arch/powerpc/kernel/ptrace/ptrace.c b/arch/powerpc/kernel/ptrace/ptrace.c
index 7c7093c17c45..c43f77e2ac31 100644
--- a/arch/powerpc/kernel/ptrace/ptrace.c
+++ b/arch/powerpc/kernel/ptrace/ptrace.c
@@ -260,8 +260,7 @@ long do_syscall_trace_enter(struct pt_regs *regs)
{
u32 flags;
- flags = READ_ONCE(current_thread_info()->flags) &
- (_TIF_SYSCALL_EMU | _TIF_SYSCALL_TRACE);
+ flags = read_thread_flags() & (_TIF_SYSCALL_EMU | _TIF_SYSCALL_TRACE);
if (flags) {
int rc = tracehook_report_syscall_entry(regs);
diff --git a/arch/powerpc/kernel/rtas.c b/arch/powerpc/kernel/rtas.c
index ff80bbad22a5..733e6ef36758 100644
--- a/arch/powerpc/kernel/rtas.c
+++ b/arch/powerpc/kernel/rtas.c
@@ -492,8 +492,25 @@ int rtas_call(int token, int nargs, int nret, int *outputs, ...)
}
EXPORT_SYMBOL(rtas_call);
-/* For RTAS_BUSY (-2), delay for 1 millisecond. For an extended busy status
- * code of 990n, perform the hinted delay of 10^n (last digit) milliseconds.
+/**
+ * rtas_busy_delay_time() - From an RTAS status value, calculate the
+ * suggested delay time in milliseconds.
+ *
+ * @status: a value returned from rtas_call() or similar APIs which return
+ * the status of a RTAS function call.
+ *
+ * Context: Any context.
+ *
+ * Return:
+ * * 100000 - If @status is 9905.
+ * * 10000 - If @status is 9904.
+ * * 1000 - If @status is 9903.
+ * * 100 - If @status is 9902.
+ * * 10 - If @status is 9901.
+ * * 1 - If @status is either 9900 or -2. This is "wrong" for -2, but
+ * some callers depend on this behavior, and the worst outcome
+ * is that they will delay for longer than necessary.
+ * * 0 - If @status is not a busy or extended delay value.
*/
unsigned int rtas_busy_delay_time(int status)
{
@@ -513,17 +530,77 @@ unsigned int rtas_busy_delay_time(int status)
}
EXPORT_SYMBOL(rtas_busy_delay_time);
-/* For an RTAS busy status code, perform the hinted delay. */
-unsigned int rtas_busy_delay(int status)
+/**
+ * rtas_busy_delay() - helper for RTAS busy and extended delay statuses
+ *
+ * @status: a value returned from rtas_call() or similar APIs which return
+ * the status of a RTAS function call.
+ *
+ * Context: Process context. May sleep or schedule.
+ *
+ * Return:
+ * * true - @status is RTAS_BUSY or an extended delay hint. The
+ * caller may assume that the CPU has been yielded if necessary,
+ * and that an appropriate delay for @status has elapsed.
+ * Generally the caller should reattempt the RTAS call which
+ * yielded @status.
+ *
+ * * false - @status is not @RTAS_BUSY nor an extended delay hint. The
+ * caller is responsible for handling @status.
+ */
+bool rtas_busy_delay(int status)
{
unsigned int ms;
+ bool ret;
- might_sleep();
- ms = rtas_busy_delay_time(status);
- if (ms && need_resched())
- msleep(ms);
+ switch (status) {
+ case RTAS_EXTENDED_DELAY_MIN...RTAS_EXTENDED_DELAY_MAX:
+ ret = true;
+ ms = rtas_busy_delay_time(status);
+ /*
+ * The extended delay hint can be as high as 100 seconds.
+ * Surely any function returning such a status is either
+ * buggy or isn't going to be significantly slowed by us
+ * polling at 1HZ. Clamp the sleep time to one second.
+ */
+ ms = clamp(ms, 1U, 1000U);
+ /*
+ * The delay hint is an order-of-magnitude suggestion, not
+ * a minimum. It is fine, possibly even advantageous, for
+ * us to pause for less time than hinted. For small values,
+ * use usleep_range() to ensure we don't sleep much longer
+ * than actually needed.
+ *
+ * See Documentation/timers/timers-howto.rst for
+ * explanation of the threshold used here. In effect we use
+ * usleep_range() for 9900 and 9901, msleep() for
+ * 9902-9905.
+ */
+ if (ms <= 20)
+ usleep_range(ms * 100, ms * 1000);
+ else
+ msleep(ms);
+ break;
+ case RTAS_BUSY:
+ ret = true;
+ /*
+ * We should call again immediately if there's no other
+ * work to do.
+ */
+ cond_resched();
+ break;
+ default:
+ ret = false;
+ /*
+ * Not a busy or extended delay status; the caller should
+ * handle @status itself. Ensure we warn on misuses in
+ * atomic context regardless.
+ */
+ might_sleep();
+ break;
+ }
- return ms;
+ return ret;
}
EXPORT_SYMBOL(rtas_busy_delay);
@@ -809,13 +886,13 @@ void rtas_os_term(char *str)
/**
* rtas_activate_firmware() - Activate a new version of firmware.
*
+ * Context: This function may sleep.
+ *
* Activate a new version of partition firmware. The OS must call this
* after resuming from a partition hibernation or migration in order
* to maintain the ability to perform live firmware updates. It's not
* catastrophic for this method to be absent or to fail; just log the
* condition in that case.
- *
- * Context: This function may sleep.
*/
void rtas_activate_firmware(void)
{
@@ -890,11 +967,12 @@ int rtas_call_reentrant(int token, int nargs, int nret, int *outputs, ...)
#endif /* CONFIG_PPC_PSERIES */
/**
- * Find a specific pseries error log in an RTAS extended event log.
+ * get_pseries_errorlog() - Find a specific pseries error log in an RTAS
+ * extended event log.
* @log: RTAS error/event log
* @section_id: two character section identifier
*
- * Returns a pointer to the specified errorlog or NULL if not found.
+ * Return: A pointer to the specified errorlog or NULL if not found.
*/
struct pseries_errorlog *get_pseries_errorlog(struct rtas_error_log *log,
uint16_t section_id)
diff --git a/arch/powerpc/kernel/rtasd.c b/arch/powerpc/kernel/rtasd.c
index 32ee17753eb4..cf0f42909ddf 100644
--- a/arch/powerpc/kernel/rtasd.c
+++ b/arch/powerpc/kernel/rtasd.c
@@ -455,7 +455,7 @@ static void rtas_event_scan(struct work_struct *w)
}
#ifdef CONFIG_PPC64
-static void retrieve_nvram_error_log(void)
+static void __init retrieve_nvram_error_log(void)
{
unsigned int err_type ;
int rc ;
@@ -473,12 +473,12 @@ static void retrieve_nvram_error_log(void)
}
}
#else /* CONFIG_PPC64 */
-static void retrieve_nvram_error_log(void)
+static void __init retrieve_nvram_error_log(void)
{
}
#endif /* CONFIG_PPC64 */
-static void start_event_scan(void)
+static void __init start_event_scan(void)
{
printk(KERN_DEBUG "RTAS daemon started\n");
pr_debug("rtasd: will sleep for %d milliseconds\n",
diff --git a/arch/powerpc/kernel/security.c b/arch/powerpc/kernel/security.c
index 15fb5ea1b9ea..e159d4093d98 100644
--- a/arch/powerpc/kernel/security.c
+++ b/arch/powerpc/kernel/security.c
@@ -44,7 +44,7 @@ static void enable_barrier_nospec(bool enable)
do_barrier_nospec_fixups(enable);
}
-void setup_barrier_nospec(void)
+void __init setup_barrier_nospec(void)
{
bool enable;
@@ -132,7 +132,7 @@ early_param("nospectre_v2", handle_nospectre_v2);
#endif /* CONFIG_PPC_FSL_BOOK3E || CONFIG_PPC_BOOK3S_64 */
#ifdef CONFIG_PPC_FSL_BOOK3E
-void setup_spectre_v2(void)
+void __init setup_spectre_v2(void)
{
if (no_spectrev2 || cpu_mitigations_off())
do_btb_flush_fixups();
diff --git a/arch/powerpc/kernel/setup-common.c b/arch/powerpc/kernel/setup-common.c
index 4f1322b65760..f8da937df918 100644
--- a/arch/powerpc/kernel/setup-common.c
+++ b/arch/powerpc/kernel/setup-common.c
@@ -582,7 +582,7 @@ static __init int add_pcspkr(void)
device_initcall(add_pcspkr);
#endif /* CONFIG_PCSPKR_PLATFORM */
-void probe_machine(void)
+static __init void probe_machine(void)
{
extern struct machdep_calls __machine_desc_start;
extern struct machdep_calls __machine_desc_end;
diff --git a/arch/powerpc/kernel/setup.h b/arch/powerpc/kernel/setup.h
index 84058bbc8fe9..93f22da12abe 100644
--- a/arch/powerpc/kernel/setup.h
+++ b/arch/powerpc/kernel/setup.h
@@ -29,7 +29,7 @@ void setup_tlb_core_data(void);
static inline void setup_tlb_core_data(void) { }
#endif
-#if defined(CONFIG_PPC_BOOK3E) || defined(CONFIG_BOOKE) || defined(CONFIG_40x)
+#ifdef CONFIG_BOOKE_OR_40x
void exc_lvl_early_init(void);
#else
static inline void exc_lvl_early_init(void) { }
diff --git a/arch/powerpc/kernel/setup_32.c b/arch/powerpc/kernel/setup_32.c
index 7ec5c47fce0e..a6e9d36d7c01 100644
--- a/arch/powerpc/kernel/setup_32.c
+++ b/arch/powerpc/kernel/setup_32.c
@@ -75,7 +75,7 @@ EXPORT_SYMBOL(DMA_MODE_WRITE);
notrace void __init machine_init(u64 dt_ptr)
{
u32 *addr = (u32 *)patch_site_addr(&patch__memset_nocache);
- struct ppc_inst insn;
+ ppc_inst_t insn;
/* Configure static keys first, now that we're relocated. */
setup_feature_keys();
@@ -175,7 +175,7 @@ void __init emergency_stack_init(void)
}
#endif
-#if defined(CONFIG_BOOKE) || defined(CONFIG_40x)
+#ifdef CONFIG_BOOKE_OR_40x
void __init exc_lvl_early_init(void)
{
unsigned int i, hw_cpu;
diff --git a/arch/powerpc/kernel/setup_64.c b/arch/powerpc/kernel/setup_64.c
index 6052f5d5ded3..be8577ac9397 100644
--- a/arch/powerpc/kernel/setup_64.c
+++ b/arch/powerpc/kernel/setup_64.c
@@ -499,7 +499,7 @@ void smp_release_cpus(void)
* routines and/or provided to userland
*/
-static void init_cache_info(struct ppc_cache_info *info, u32 size, u32 lsize,
+static void __init init_cache_info(struct ppc_cache_info *info, u32 size, u32 lsize,
u32 bsize, u32 sets)
{
info->size = size;
@@ -771,50 +771,6 @@ void __init emergency_stack_init(void)
}
#ifdef CONFIG_SMP
-/**
- * pcpu_alloc_bootmem - NUMA friendly alloc_bootmem wrapper for percpu
- * @cpu: cpu to allocate for
- * @size: size allocation in bytes
- * @align: alignment
- *
- * Allocate @size bytes aligned at @align for cpu @cpu. This wrapper
- * does the right thing for NUMA regardless of the current
- * configuration.
- *
- * RETURNS:
- * Pointer to the allocated area on success, NULL on failure.
- */
-static void * __init pcpu_alloc_bootmem(unsigned int cpu, size_t size,
- size_t align)
-{
- const unsigned long goal = __pa(MAX_DMA_ADDRESS);
-#ifdef CONFIG_NUMA
- int node = early_cpu_to_node(cpu);
- void *ptr;
-
- if (!node_online(node) || !NODE_DATA(node)) {
- ptr = memblock_alloc_from(size, align, goal);
- pr_info("cpu %d has no node %d or node-local memory\n",
- cpu, node);
- pr_debug("per cpu data for cpu%d %lu bytes at %016lx\n",
- cpu, size, __pa(ptr));
- } else {
- ptr = memblock_alloc_try_nid(size, align, goal,
- MEMBLOCK_ALLOC_ACCESSIBLE, node);
- pr_debug("per cpu data for cpu%d %lu bytes on node%d at "
- "%016lx\n", cpu, size, node, __pa(ptr));
- }
- return ptr;
-#else
- return memblock_alloc_from(size, align, goal);
-#endif
-}
-
-static void __init pcpu_free_bootmem(void *ptr, size_t size)
-{
- memblock_free(ptr, size);
-}
-
static int pcpu_cpu_distance(unsigned int from, unsigned int to)
{
if (early_cpu_to_node(from) == early_cpu_to_node(to))
@@ -823,53 +779,13 @@ static int pcpu_cpu_distance(unsigned int from, unsigned int to)
return REMOTE_DISTANCE;
}
-unsigned long __per_cpu_offset[NR_CPUS] __read_mostly;
-EXPORT_SYMBOL(__per_cpu_offset);
-
-static void __init pcpu_populate_pte(unsigned long addr)
+static __init int pcpu_cpu_to_node(int cpu)
{
- pgd_t *pgd = pgd_offset_k(addr);
- p4d_t *p4d;
- pud_t *pud;
- pmd_t *pmd;
-
- p4d = p4d_offset(pgd, addr);
- if (p4d_none(*p4d)) {
- pud_t *new;
-
- new = memblock_alloc(PUD_TABLE_SIZE, PUD_TABLE_SIZE);
- if (!new)
- goto err_alloc;
- p4d_populate(&init_mm, p4d, new);
- }
-
- pud = pud_offset(p4d, addr);
- if (pud_none(*pud)) {
- pmd_t *new;
-
- new = memblock_alloc(PMD_TABLE_SIZE, PMD_TABLE_SIZE);
- if (!new)
- goto err_alloc;
- pud_populate(&init_mm, pud, new);
- }
-
- pmd = pmd_offset(pud, addr);
- if (!pmd_present(*pmd)) {
- pte_t *new;
-
- new = memblock_alloc(PTE_TABLE_SIZE, PTE_TABLE_SIZE);
- if (!new)
- goto err_alloc;
- pmd_populate_kernel(&init_mm, pmd, new);
- }
-
- return;
-
-err_alloc:
- panic("%s: Failed to allocate %lu bytes align=%lx from=%lx\n",
- __func__, PAGE_SIZE, PAGE_SIZE, PAGE_SIZE);
+ return early_cpu_to_node(cpu);
}
+unsigned long __per_cpu_offset[NR_CPUS] __read_mostly;
+EXPORT_SYMBOL(__per_cpu_offset);
void __init setup_per_cpu_areas(void)
{
@@ -880,18 +796,27 @@ void __init setup_per_cpu_areas(void)
int rc = -EINVAL;
/*
- * Linear mapping is one of 4K, 1M and 16M. For 4K, no need
- * to group units. For larger mappings, use 1M atom which
- * should be large enough to contain a number of units.
+ * BookE and BookS radix are historical values and should be revisited.
*/
- if (mmu_linear_psize == MMU_PAGE_4K)
+ if (IS_ENABLED(CONFIG_PPC_BOOK3E)) {
+ atom_size = SZ_1M;
+ } else if (radix_enabled()) {
atom_size = PAGE_SIZE;
- else
- atom_size = 1 << 20;
+ } else if (IS_ENABLED(CONFIG_PPC_64S_HASH_MMU)) {
+ /*
+ * Linear mapping is one of 4K, 1M and 16M. For 4K, no need
+ * to group units. For larger mappings, use 1M atom which
+ * should be large enough to contain a number of units.
+ */
+ if (mmu_linear_psize == MMU_PAGE_4K)
+ atom_size = PAGE_SIZE;
+ else
+ atom_size = SZ_1M;
+ }
if (pcpu_chosen_fc != PCPU_FC_PAGE) {
rc = pcpu_embed_first_chunk(0, dyn_size, atom_size, pcpu_cpu_distance,
- pcpu_alloc_bootmem, pcpu_free_bootmem);
+ pcpu_cpu_to_node);
if (rc)
pr_warn("PERCPU: %s allocator failed (%d), "
"falling back to page size\n",
@@ -899,8 +824,7 @@ void __init setup_per_cpu_areas(void)
}
if (rc < 0)
- rc = pcpu_page_first_chunk(0, pcpu_alloc_bootmem, pcpu_free_bootmem,
- pcpu_populate_pte);
+ rc = pcpu_page_first_chunk(0, pcpu_cpu_to_node);
if (rc < 0)
panic("cannot initialize percpu area (err=%d)", rc);
diff --git a/arch/powerpc/kernel/signal_32.c b/arch/powerpc/kernel/signal_32.c
index 3e053e2fd6b6..d84c434b2b78 100644
--- a/arch/powerpc/kernel/signal_32.c
+++ b/arch/powerpc/kernel/signal_32.c
@@ -527,16 +527,20 @@ static long restore_user_regs(struct pt_regs *regs,
regs_set_return_msr(regs, regs->msr & ~(MSR_FP | MSR_FE0 | MSR_FE1));
#ifdef CONFIG_SPE
- /* force the process to reload the spe registers from
- current->thread when it next does spe instructions */
+ /*
+ * Force the process to reload the spe registers from
+ * current->thread when it next does spe instructions.
+ * Since this is user ABI, we must enforce the sizing.
+ */
+ BUILD_BUG_ON(sizeof(current->thread.spe) != ELF_NEVRREG * sizeof(u32));
regs_set_return_msr(regs, regs->msr & ~MSR_SPE);
if (msr & MSR_SPE) {
/* restore spe registers from the stack */
- unsafe_copy_from_user(current->thread.evr, &sr->mc_vregs,
- ELF_NEVRREG * sizeof(u32), failed);
+ unsafe_copy_from_user(&current->thread.spe, &sr->mc_vregs,
+ sizeof(current->thread.spe), failed);
current->thread.used_spe = true;
} else if (current->thread.used_spe)
- memset(current->thread.evr, 0, ELF_NEVRREG * sizeof(u32));
+ memset(&current->thread.spe, 0, sizeof(current->thread.spe));
/* Always get SPEFSCR back */
unsafe_get_user(current->thread.spefscr, (u32 __user *)&sr->mc_vregs + ELF_NEVRREG, failed);
diff --git a/arch/powerpc/kernel/smp.c b/arch/powerpc/kernel/smp.c
index c23ee842c4c3..b7fd6a72aa76 100644
--- a/arch/powerpc/kernel/smp.c
+++ b/arch/powerpc/kernel/smp.c
@@ -61,6 +61,7 @@
#include <asm/cpu_has_feature.h>
#include <asm/ftrace.h>
#include <asm/kup.h>
+#include <asm/fadump.h>
#ifdef DEBUG
#include <asm/udbg.h>
@@ -621,6 +622,45 @@ void crash_send_ipi(void (*crash_ipi_callback)(struct pt_regs *))
#endif
#ifdef CONFIG_NMI_IPI
+static void crash_stop_this_cpu(struct pt_regs *regs)
+#else
+static void crash_stop_this_cpu(void *dummy)
+#endif
+{
+ /*
+ * Just busy wait here and avoid marking CPU as offline to ensure
+ * register data is captured appropriately.
+ */
+ while (1)
+ cpu_relax();
+}
+
+void crash_smp_send_stop(void)
+{
+ static bool stopped = false;
+
+ /*
+ * In case of fadump, register data for all CPUs is captured by f/w
+ * on ibm,os-term rtas call. Skip IPI callbacks to other CPUs before
+ * this rtas call to avoid tricky post processing of those CPUs'
+ * backtraces.
+ */
+ if (should_fadump_crash())
+ return;
+
+ if (stopped)
+ return;
+
+ stopped = true;
+
+#ifdef CONFIG_NMI_IPI
+ smp_send_nmi_ipi(NMI_IPI_ALL_OTHERS, crash_stop_this_cpu, 1000000);
+#else
+ smp_call_function(crash_stop_this_cpu, NULL, 0);
+#endif /* CONFIG_NMI_IPI */
+}
+
+#ifdef CONFIG_NMI_IPI
static void nmi_stop_this_cpu(struct pt_regs *regs)
{
/*
@@ -896,7 +936,8 @@ out:
return tg;
}
-static int update_mask_from_threadgroup(cpumask_var_t *mask, struct thread_groups *tg, int cpu, int cpu_group_start)
+static int __init update_mask_from_threadgroup(cpumask_var_t *mask, struct thread_groups *tg,
+ int cpu, int cpu_group_start)
{
int first_thread = cpu_first_thread_sibling(cpu);
int i;
@@ -1635,12 +1676,14 @@ void start_secondary(void *unused)
BUG();
}
+#ifdef CONFIG_PROFILING
int setup_profiling_timer(unsigned int multiplier)
{
return 0;
}
+#endif
-static void fixup_topology(void)
+static void __init fixup_topology(void)
{
int i;
diff --git a/arch/powerpc/kernel/swsusp_32.S b/arch/powerpc/kernel/swsusp_32.S
index f73f4d72fea4..e0cbd63007f2 100644
--- a/arch/powerpc/kernel/swsusp_32.S
+++ b/arch/powerpc/kernel/swsusp_32.S
@@ -181,7 +181,7 @@ _GLOBAL(swsusp_arch_resume)
#ifdef CONFIG_ALTIVEC
/* Stop pending alitvec streams and memory accesses */
BEGIN_FTR_SECTION
- DSSALL
+ PPC_DSSALL
END_FTR_SECTION_IFSET(CPU_FTR_ALTIVEC)
#endif
sync
diff --git a/arch/powerpc/kernel/swsusp_asm64.S b/arch/powerpc/kernel/swsusp_asm64.S
index 96bb20715aa9..9f1903c7f540 100644
--- a/arch/powerpc/kernel/swsusp_asm64.S
+++ b/arch/powerpc/kernel/swsusp_asm64.S
@@ -141,7 +141,7 @@ END_FW_FTR_SECTION_IFCLR(FW_FEATURE_LPAR)
_GLOBAL(swsusp_arch_resume)
/* Stop pending alitvec streams and memory accesses */
BEGIN_FTR_SECTION
- DSSALL
+ PPC_DSSALL
END_FTR_SECTION_IFSET(CPU_FTR_ALTIVEC)
sync
diff --git a/arch/powerpc/kernel/syscalls/syscall.tbl b/arch/powerpc/kernel/syscalls/syscall.tbl
index 7bef917cc84e..2600b4237292 100644
--- a/arch/powerpc/kernel/syscalls/syscall.tbl
+++ b/arch/powerpc/kernel/syscalls/syscall.tbl
@@ -528,3 +528,5 @@
446 common landlock_restrict_self sys_landlock_restrict_self
# 447 reserved for memfd_secret
448 common process_mrelease sys_process_mrelease
+449 common futex_waitv sys_futex_waitv
+450 nospu set_mempolicy_home_node sys_set_mempolicy_home_node
diff --git a/arch/powerpc/kernel/sysfs.c b/arch/powerpc/kernel/sysfs.c
index 08d8072d6e7a..d45a415d5374 100644
--- a/arch/powerpc/kernel/sysfs.c
+++ b/arch/powerpc/kernel/sysfs.c
@@ -214,7 +214,7 @@ static ssize_t __used store_dscr_default(struct device *dev,
static DEVICE_ATTR(dscr_default, 0600,
show_dscr_default, store_dscr_default);
-static void sysfs_create_dscr_default(void)
+static void __init sysfs_create_dscr_default(void)
{
if (cpu_has_feature(CPU_FTR_DSCR)) {
int cpu;
@@ -744,12 +744,12 @@ static ssize_t show_svm(struct device *dev, struct device_attribute *attr, char
}
static DEVICE_ATTR(svm, 0444, show_svm, NULL);
-static void create_svm_file(void)
+static void __init create_svm_file(void)
{
device_create_file(cpu_subsys.dev_root, &dev_attr_svm);
}
#else
-static void create_svm_file(void)
+static void __init create_svm_file(void)
{
}
#endif /* CONFIG_PPC_SVM */
@@ -1110,7 +1110,7 @@ EXPORT_SYMBOL_GPL(cpu_remove_dev_attr_group);
/* NUMA stuff */
#ifdef CONFIG_NUMA
-static void register_nodes(void)
+static void __init register_nodes(void)
{
int i;
@@ -1134,7 +1134,7 @@ void sysfs_remove_device_from_node(struct device *dev, int nid)
EXPORT_SYMBOL_GPL(sysfs_remove_device_from_node);
#else
-static void register_nodes(void)
+static void __init register_nodes(void)
{
return;
}
diff --git a/arch/powerpc/kernel/time.c b/arch/powerpc/kernel/time.c
index cae8f03a44fe..cd0b8b71ecdd 100644
--- a/arch/powerpc/kernel/time.c
+++ b/arch/powerpc/kernel/time.c
@@ -88,6 +88,7 @@ static struct clocksource clocksource_timebase = {
#define DECREMENTER_DEFAULT_MAX 0x7FFFFFFF
u64 decrementer_max = DECREMENTER_DEFAULT_MAX;
+EXPORT_SYMBOL_GPL(decrementer_max); /* for KVM HDEC */
static int decrementer_set_next_event(unsigned long evt,
struct clock_event_device *dev);
@@ -107,6 +108,7 @@ struct clock_event_device decrementer_clockevent = {
EXPORT_SYMBOL(decrementer_clockevent);
DEFINE_PER_CPU(u64, decrementers_next_tb);
+EXPORT_SYMBOL_GPL(decrementers_next_tb);
static DEFINE_PER_CPU(struct clock_event_device, decrementers);
#define XSEC_PER_SEC (1024*1024)
@@ -496,6 +498,16 @@ EXPORT_SYMBOL(profile_pc);
* 64-bit uses a byte in the PACA, 32-bit uses a per-cpu variable...
*/
#ifdef CONFIG_PPC64
+static inline unsigned long test_irq_work_pending(void)
+{
+ unsigned long x;
+
+ asm volatile("lbz %0,%1(13)"
+ : "=r" (x)
+ : "i" (offsetof(struct paca_struct, irq_work_pending)));
+ return x;
+}
+
static inline void set_irq_work_pending_flag(void)
{
asm volatile("stb %0,%1(13)" : :
@@ -539,13 +551,44 @@ void arch_irq_work_raise(void)
preempt_enable();
}
+static void set_dec_or_work(u64 val)
+{
+ set_dec(val);
+ /* We may have raced with new irq work */
+ if (unlikely(test_irq_work_pending()))
+ set_dec(1);
+}
+
#else /* CONFIG_IRQ_WORK */
#define test_irq_work_pending() 0
#define clear_irq_work_pending()
+static void set_dec_or_work(u64 val)
+{
+ set_dec(val);
+}
#endif /* CONFIG_IRQ_WORK */
+#ifdef CONFIG_KVM_BOOK3S_HV_POSSIBLE
+void timer_rearm_host_dec(u64 now)
+{
+ u64 *next_tb = this_cpu_ptr(&decrementers_next_tb);
+
+ WARN_ON_ONCE(!arch_irqs_disabled());
+ WARN_ON_ONCE(mfmsr() & MSR_EE);
+
+ if (now >= *next_tb) {
+ local_paca->irq_happened |= PACA_IRQ_DEC;
+ } else {
+ now = *next_tb - now;
+ if (now <= decrementer_max)
+ set_dec_or_work(now);
+ }
+}
+EXPORT_SYMBOL_GPL(timer_rearm_host_dec);
+#endif
+
/*
* timer_interrupt - gets called when the decrementer overflows,
* with interrupts disabled.
@@ -566,22 +609,23 @@ DEFINE_INTERRUPT_HANDLER_ASYNC(timer_interrupt)
return;
}
- /* Ensure a positive value is written to the decrementer, or else
- * some CPUs will continue to take decrementer exceptions. When the
- * PPC_WATCHDOG (decrementer based) is configured, keep this at most
- * 31 bits, which is about 4 seconds on most systems, which gives
- * the watchdog a chance of catching timer interrupt hard lockups.
- */
- if (IS_ENABLED(CONFIG_PPC_WATCHDOG))
- set_dec(0x7fffffff);
- else
- set_dec(decrementer_max);
-
- /* Conditionally hard-enable interrupts now that the DEC has been
- * bumped to its maximum value
- */
- may_hard_irq_enable();
+ /* Conditionally hard-enable interrupts. */
+ if (should_hard_irq_enable()) {
+ /*
+ * Ensure a positive value is written to the decrementer, or
+ * else some CPUs will continue to take decrementer exceptions.
+ * When the PPC_WATCHDOG (decrementer based) is configured,
+ * keep this at most 31 bits, which is about 4 seconds on most
+ * systems, which gives the watchdog a chance of catching timer
+ * interrupt hard lockups.
+ */
+ if (IS_ENABLED(CONFIG_PPC_WATCHDOG))
+ set_dec(0x7fffffff);
+ else
+ set_dec(decrementer_max);
+ do_hard_irq_enable();
+ }
#if defined(CONFIG_PPC32) && defined(CONFIG_PPC_PMAC)
if (atomic_read(&ppc_n_lost_interrupts) != 0)
@@ -605,11 +649,9 @@ DEFINE_INTERRUPT_HANDLER_ASYNC(timer_interrupt)
__this_cpu_inc(irq_stat.timer_irqs_event);
} else {
now = *next_tb - now;
- if (now <= decrementer_max)
- set_dec(now);
- /* We may have raced with new irq work */
- if (test_irq_work_pending())
- set_dec(1);
+ if (now > decrementer_max)
+ now = decrementer_max;
+ set_dec_or_work(now);
__this_cpu_inc(irq_stat.timer_irqs_others);
}
@@ -730,7 +772,7 @@ static int __init get_freq(char *name, int cells, unsigned long *val)
static void start_cpu_decrementer(void)
{
-#if defined(CONFIG_BOOKE) || defined(CONFIG_40x)
+#ifdef CONFIG_BOOKE_OR_40x
unsigned int tcr;
/* Clear any pending timer interrupts */
@@ -843,11 +885,7 @@ static int decrementer_set_next_event(unsigned long evt,
struct clock_event_device *dev)
{
__this_cpu_write(decrementers_next_tb, get_tb() + evt);
- set_dec(evt);
-
- /* We may have raced with new irq work */
- if (test_irq_work_pending())
- set_dec(1);
+ set_dec_or_work(evt);
return 0;
}
diff --git a/arch/powerpc/kernel/tm.S b/arch/powerpc/kernel/tm.S
index 2b91f233b05d..3beecc32940b 100644
--- a/arch/powerpc/kernel/tm.S
+++ b/arch/powerpc/kernel/tm.S
@@ -226,11 +226,8 @@ _GLOBAL(tm_reclaim)
/* Sync the userland GPRs 2-12, 14-31 to thread->regs: */
SAVE_GPR(0, r7) /* user r0 */
- SAVE_GPR(2, r7) /* user r2 */
- SAVE_4GPRS(3, r7) /* user r3-r6 */
- SAVE_GPR(8, r7) /* user r8 */
- SAVE_GPR(9, r7) /* user r9 */
- SAVE_GPR(10, r7) /* user r10 */
+ SAVE_GPRS(2, 6, r7) /* user r2-r6 */
+ SAVE_GPRS(8, 10, r7) /* user r8-r10 */
ld r3, GPR1(r1) /* user r1 */
ld r4, GPR7(r1) /* user r7 */
ld r5, GPR11(r1) /* user r11 */
@@ -445,12 +442,8 @@ restore_gprs:
ld r6, THREAD_TM_PPR(r3)
REST_GPR(0, r7) /* GPR0 */
- REST_2GPRS(2, r7) /* GPR2-3 */
- REST_GPR(4, r7) /* GPR4 */
- REST_4GPRS(8, r7) /* GPR8-11 */
- REST_2GPRS(12, r7) /* GPR12-13 */
-
- REST_NVGPRS(r7) /* GPR14-31 */
+ REST_GPRS(2, 4, r7) /* GPR2-4 */
+ REST_GPRS(8, 31, r7) /* GPR8-31 */
/* Load up PPR and DSCR here so we don't run with user values for long */
mtspr SPRN_DSCR, r5
diff --git a/arch/powerpc/kernel/trace/ftrace.c b/arch/powerpc/kernel/trace/ftrace.c
index d89c5df4f206..80b6285769f2 100644
--- a/arch/powerpc/kernel/trace/ftrace.c
+++ b/arch/powerpc/kernel/trace/ftrace.c
@@ -41,10 +41,10 @@
#define NUM_FTRACE_TRAMPS 8
static unsigned long ftrace_tramps[NUM_FTRACE_TRAMPS];
-static struct ppc_inst
+static ppc_inst_t
ftrace_call_replace(unsigned long ip, unsigned long addr, int link)
{
- struct ppc_inst op;
+ ppc_inst_t op;
addr = ppc_function_entry((void *)addr);
@@ -55,9 +55,9 @@ ftrace_call_replace(unsigned long ip, unsigned long addr, int link)
}
static int
-ftrace_modify_code(unsigned long ip, struct ppc_inst old, struct ppc_inst new)
+ftrace_modify_code(unsigned long ip, ppc_inst_t old, ppc_inst_t new)
{
- struct ppc_inst replaced;
+ ppc_inst_t replaced;
/*
* Note:
@@ -90,24 +90,24 @@ ftrace_modify_code(unsigned long ip, struct ppc_inst old, struct ppc_inst new)
*/
static int test_24bit_addr(unsigned long ip, unsigned long addr)
{
- struct ppc_inst op;
+ ppc_inst_t op;
addr = ppc_function_entry((void *)addr);
/* use the create_branch to verify that this offset can be branched */
return create_branch(&op, (u32 *)ip, addr, 0) == 0;
}
-static int is_bl_op(struct ppc_inst op)
+static int is_bl_op(ppc_inst_t op)
{
return (ppc_inst_val(op) & 0xfc000003) == 0x48000001;
}
-static int is_b_op(struct ppc_inst op)
+static int is_b_op(ppc_inst_t op)
{
return (ppc_inst_val(op) & 0xfc000003) == 0x48000000;
}
-static unsigned long find_bl_target(unsigned long ip, struct ppc_inst op)
+static unsigned long find_bl_target(unsigned long ip, ppc_inst_t op)
{
int offset;
@@ -127,7 +127,7 @@ __ftrace_make_nop(struct module *mod,
{
unsigned long entry, ptr, tramp;
unsigned long ip = rec->ip;
- struct ppc_inst op, pop;
+ ppc_inst_t op, pop;
/* read where this goes */
if (copy_inst_from_kernel_nofault(&op, (void *)ip)) {
@@ -221,10 +221,9 @@ static int
__ftrace_make_nop(struct module *mod,
struct dyn_ftrace *rec, unsigned long addr)
{
- struct ppc_inst op;
- unsigned int jmp[4];
+ ppc_inst_t op;
unsigned long ip = rec->ip;
- unsigned long tramp;
+ unsigned long tramp, ptr;
if (copy_from_kernel_nofault(&op, (void *)ip, MCOUNT_INSN_SIZE))
return -EFAULT;
@@ -238,41 +237,13 @@ __ftrace_make_nop(struct module *mod,
/* lets find where the pointer goes */
tramp = find_bl_target(ip, op);
- /*
- * On PPC32 the trampoline looks like:
- * 0x3d, 0x80, 0x00, 0x00 lis r12,sym@ha
- * 0x39, 0x8c, 0x00, 0x00 addi r12,r12,sym@l
- * 0x7d, 0x89, 0x03, 0xa6 mtctr r12
- * 0x4e, 0x80, 0x04, 0x20 bctr
- */
-
- pr_devel("ip:%lx jumps to %lx", ip, tramp);
-
/* Find where the trampoline jumps to */
- if (copy_from_kernel_nofault(jmp, (void *)tramp, sizeof(jmp))) {
- pr_err("Failed to read %lx\n", tramp);
+ if (module_trampoline_target(mod, tramp, &ptr)) {
+ pr_err("Failed to get trampoline target\n");
return -EFAULT;
}
- pr_devel(" %08x %08x ", jmp[0], jmp[1]);
-
- /* verify that this is what we expect it to be */
- if (((jmp[0] & 0xffff0000) != 0x3d800000) ||
- ((jmp[1] & 0xffff0000) != 0x398c0000) ||
- (jmp[2] != 0x7d8903a6) ||
- (jmp[3] != 0x4e800420)) {
- pr_err("Not a trampoline\n");
- return -EINVAL;
- }
-
- tramp = (jmp[1] & 0xffff) |
- ((jmp[0] & 0xffff) << 16);
- if (tramp & 0x8000)
- tramp -= 0x10000;
-
- pr_devel(" %lx ", tramp);
-
- if (tramp != addr) {
+ if (ptr != addr) {
pr_err("Trampoline location %08lx does not match addr\n",
tramp);
return -EINVAL;
@@ -291,7 +262,7 @@ __ftrace_make_nop(struct module *mod,
static unsigned long find_ftrace_tramp(unsigned long ip)
{
int i;
- struct ppc_inst instr;
+ ppc_inst_t instr;
/*
* We have the compiler generated long_branch tramps at the end
@@ -329,9 +300,9 @@ static int add_ftrace_tramp(unsigned long tramp)
static int setup_mcount_compiler_tramp(unsigned long tramp)
{
int i;
- struct ppc_inst op;
+ ppc_inst_t op;
unsigned long ptr;
- struct ppc_inst instr;
+ ppc_inst_t instr;
static unsigned long ftrace_plt_tramps[NUM_FTRACE_TRAMPS];
/* Is this a known long jump tramp? */
@@ -396,7 +367,7 @@ static int setup_mcount_compiler_tramp(unsigned long tramp)
static int __ftrace_make_nop_kernel(struct dyn_ftrace *rec, unsigned long addr)
{
unsigned long tramp, ip = rec->ip;
- struct ppc_inst op;
+ ppc_inst_t op;
/* Read where this goes */
if (copy_inst_from_kernel_nofault(&op, (void *)ip)) {
@@ -436,7 +407,7 @@ int ftrace_make_nop(struct module *mod,
struct dyn_ftrace *rec, unsigned long addr)
{
unsigned long ip = rec->ip;
- struct ppc_inst old, new;
+ ppc_inst_t old, new;
/*
* If the calling address is more that 24 bits away,
@@ -489,7 +460,7 @@ int ftrace_make_nop(struct module *mod,
*/
#ifndef CONFIG_MPROFILE_KERNEL
static int
-expected_nop_sequence(void *ip, struct ppc_inst op0, struct ppc_inst op1)
+expected_nop_sequence(void *ip, ppc_inst_t op0, ppc_inst_t op1)
{
/*
* We expect to see:
@@ -507,7 +478,7 @@ expected_nop_sequence(void *ip, struct ppc_inst op0, struct ppc_inst op1)
}
#else
static int
-expected_nop_sequence(void *ip, struct ppc_inst op0, struct ppc_inst op1)
+expected_nop_sequence(void *ip, ppc_inst_t op0, ppc_inst_t op1)
{
/* look for patched "NOP" on ppc64 with -mprofile-kernel */
if (!ppc_inst_equal(op0, ppc_inst(PPC_RAW_NOP())))
@@ -519,8 +490,8 @@ expected_nop_sequence(void *ip, struct ppc_inst op0, struct ppc_inst op1)
static int
__ftrace_make_call(struct dyn_ftrace *rec, unsigned long addr)
{
- struct ppc_inst op[2];
- struct ppc_inst instr;
+ ppc_inst_t op[2];
+ ppc_inst_t instr;
void *ip = (void *)rec->ip;
unsigned long entry, ptr, tramp;
struct module *mod = rec->arch.mod;
@@ -588,8 +559,10 @@ static int
__ftrace_make_call(struct dyn_ftrace *rec, unsigned long addr)
{
int err;
- struct ppc_inst op;
+ ppc_inst_t op;
u32 *ip = (u32 *)rec->ip;
+ struct module *mod = rec->arch.mod;
+ unsigned long tramp;
/* read where this goes */
if (copy_inst_from_kernel_nofault(&op, ip))
@@ -602,13 +575,23 @@ __ftrace_make_call(struct dyn_ftrace *rec, unsigned long addr)
}
/* If we never set up a trampoline to ftrace_caller, then bail */
- if (!rec->arch.mod->arch.tramp) {
+#ifdef CONFIG_DYNAMIC_FTRACE_WITH_REGS
+ if (!mod->arch.tramp || !mod->arch.tramp_regs) {
+#else
+ if (!mod->arch.tramp) {
+#endif
pr_err("No ftrace trampoline\n");
return -EINVAL;
}
+#ifdef CONFIG_DYNAMIC_FTRACE_WITH_REGS
+ if (rec->flags & FTRACE_FL_REGS)
+ tramp = mod->arch.tramp_regs;
+ else
+#endif
+ tramp = mod->arch.tramp;
/* create the branch to the trampoline */
- err = create_branch(&op, ip, rec->arch.mod->arch.tramp, BRANCH_SET_LINK);
+ err = create_branch(&op, ip, tramp, BRANCH_SET_LINK);
if (err) {
pr_err("REL24 out of range!\n");
return -EINVAL;
@@ -626,7 +609,7 @@ __ftrace_make_call(struct dyn_ftrace *rec, unsigned long addr)
static int __ftrace_make_call_kernel(struct dyn_ftrace *rec, unsigned long addr)
{
- struct ppc_inst op;
+ ppc_inst_t op;
void *ip = (void *)rec->ip;
unsigned long tramp, entry, ptr;
@@ -674,7 +657,7 @@ static int __ftrace_make_call_kernel(struct dyn_ftrace *rec, unsigned long addr)
int ftrace_make_call(struct dyn_ftrace *rec, unsigned long addr)
{
unsigned long ip = rec->ip;
- struct ppc_inst old, new;
+ ppc_inst_t old, new;
/*
* If the calling address is more that 24 bits away,
@@ -713,7 +696,7 @@ static int
__ftrace_modify_call(struct dyn_ftrace *rec, unsigned long old_addr,
unsigned long addr)
{
- struct ppc_inst op;
+ ppc_inst_t op;
unsigned long ip = rec->ip;
unsigned long entry, ptr, tramp;
struct module *mod = rec->arch.mod;
@@ -807,7 +790,7 @@ int ftrace_modify_call(struct dyn_ftrace *rec, unsigned long old_addr,
unsigned long addr)
{
unsigned long ip = rec->ip;
- struct ppc_inst old, new;
+ ppc_inst_t old, new;
/*
* If the calling address is more that 24 bits away,
@@ -847,7 +830,7 @@ int ftrace_modify_call(struct dyn_ftrace *rec, unsigned long old_addr,
int ftrace_update_ftrace_func(ftrace_func_t func)
{
unsigned long ip = (unsigned long)(&ftrace_call);
- struct ppc_inst old, new;
+ ppc_inst_t old, new;
int ret;
old = ppc_inst_read((u32 *)&ftrace_call);
@@ -932,7 +915,7 @@ int ftrace_enable_ftrace_graph_caller(void)
unsigned long ip = (unsigned long)(&ftrace_graph_call);
unsigned long addr = (unsigned long)(&ftrace_graph_caller);
unsigned long stub = (unsigned long)(&ftrace_graph_stub);
- struct ppc_inst old, new;
+ ppc_inst_t old, new;
old = ftrace_call_replace(ip, stub, 0);
new = ftrace_call_replace(ip, addr, 0);
@@ -945,7 +928,7 @@ int ftrace_disable_ftrace_graph_caller(void)
unsigned long ip = (unsigned long)(&ftrace_graph_call);
unsigned long addr = (unsigned long)(&ftrace_graph_caller);
unsigned long stub = (unsigned long)(&ftrace_graph_stub);
- struct ppc_inst old, new;
+ ppc_inst_t old, new;
old = ftrace_call_replace(ip, addr, 0);
new = ftrace_call_replace(ip, stub, 0);
diff --git a/arch/powerpc/kernel/trace/ftrace_32.S b/arch/powerpc/kernel/trace/ftrace_32.S
index e023ae59c429..0a02c0cb12d9 100644
--- a/arch/powerpc/kernel/trace/ftrace_32.S
+++ b/arch/powerpc/kernel/trace/ftrace_32.S
@@ -9,55 +9,135 @@
#include <asm/asm-offsets.h>
#include <asm/ftrace.h>
#include <asm/export.h>
+#include <asm/ptrace.h>
_GLOBAL(mcount)
_GLOBAL(_mcount)
/*
* It is required that _mcount on PPC32 must preserve the
- * link register. But we have r0 to play with. We use r0
+ * link register. But we have r12 to play with. We use r12
* to push the return address back to the caller of mcount
* into the ctr register, restore the link register and
* then jump back using the ctr register.
*/
- mflr r0
- mtctr r0
- lwz r0, 4(r1)
+ mflr r12
+ mtctr r12
mtlr r0
bctr
+EXPORT_SYMBOL(_mcount)
_GLOBAL(ftrace_caller)
MCOUNT_SAVE_FRAME
/* r3 ends up with link register */
subi r3, r3, MCOUNT_INSN_SIZE
+ lis r5,function_trace_op@ha
+ lwz r5,function_trace_op@l(r5)
+ li r6, 0
.globl ftrace_call
ftrace_call:
bl ftrace_stub
nop
+ MCOUNT_RESTORE_FRAME
+ftrace_caller_common:
#ifdef CONFIG_FUNCTION_GRAPH_TRACER
.globl ftrace_graph_call
ftrace_graph_call:
b ftrace_graph_stub
_GLOBAL(ftrace_graph_stub)
#endif
- MCOUNT_RESTORE_FRAME
/* old link register ends up in ctr reg */
bctr
-EXPORT_SYMBOL(_mcount)
_GLOBAL(ftrace_stub)
blr
+_GLOBAL(ftrace_regs_caller)
+ /* Save the original return address in A's stack frame */
+ stw r0,LRSAVE(r1)
+
+ /* Create our stack frame + pt_regs */
+ stwu r1,-INT_FRAME_SIZE(r1)
+
+ /* Save all gprs to pt_regs */
+ stw r0, GPR0(r1)
+ stmw r2, GPR2(r1)
+
+ /* Save previous stack pointer (r1) */
+ addi r8, r1, INT_FRAME_SIZE
+ stw r8, GPR1(r1)
+
+ /* Load special regs for save below */
+ mfmsr r8
+ mfctr r9
+ mfxer r10
+ mfcr r11
+
+ /* Get the _mcount() call site out of LR */
+ mflr r7
+ /* Save it as pt_regs->nip */
+ stw r7, _NIP(r1)
+ /* Save the read LR in pt_regs->link */
+ stw r0, _LINK(r1)
+
+ lis r3,function_trace_op@ha
+ lwz r5,function_trace_op@l(r3)
+
+ /* Calculate ip from nip-4 into r3 for call below */
+ subi r3, r7, MCOUNT_INSN_SIZE
+
+ /* Put the original return address in r4 as parent_ip */
+ mr r4, r0
+
+ /* Save special regs */
+ stw r8, _MSR(r1)
+ stw r9, _CTR(r1)
+ stw r10, _XER(r1)
+ stw r11, _CCR(r1)
+
+ /* Load &pt_regs in r6 for call below */
+ addi r6, r1, STACK_FRAME_OVERHEAD
+
+ /* ftrace_call(r3, r4, r5, r6) */
+.globl ftrace_regs_call
+ftrace_regs_call:
+ bl ftrace_stub
+ nop
+
+ /* Load ctr with the possibly modified NIP */
+ lwz r3, _NIP(r1)
+ mtctr r3
+
+ /* Restore gprs */
+ lmw r2, GPR2(r1)
+
+ /* Restore possibly modified LR */
+ lwz r0, _LINK(r1)
+ mtlr r0
+
+ /* Pop our stack frame */
+ addi r1, r1, INT_FRAME_SIZE
+
+ b ftrace_caller_common
+
#ifdef CONFIG_FUNCTION_GRAPH_TRACER
_GLOBAL(ftrace_graph_caller)
+ stwu r1,-48(r1)
+ stw r3, 12(r1)
+ stw r4, 16(r1)
+ stw r5, 20(r1)
+ stw r6, 24(r1)
+ stw r7, 28(r1)
+ stw r8, 32(r1)
+ stw r9, 36(r1)
+ stw r10,40(r1)
+
addi r5, r1, 48
- /* load r4 with local address */
- lwz r4, 44(r1)
+ mfctr r4 /* ftrace_caller has moved local addr here */
+ stw r4, 44(r1)
+ mflr r3 /* ftrace_caller has restored LR from stack */
subi r4, r4, MCOUNT_INSN_SIZE
- /* Grab the LR out of the caller stack frame */
- lwz r3,52(r1)
-
bl prepare_ftrace_return
nop
@@ -66,9 +146,21 @@ _GLOBAL(ftrace_graph_caller)
* Change the LR in the callers stack frame to this.
*/
stw r3,52(r1)
+ mtlr r3
+ lwz r0,44(r1)
+ mtctr r0
+
+ lwz r3, 12(r1)
+ lwz r4, 16(r1)
+ lwz r5, 20(r1)
+ lwz r6, 24(r1)
+ lwz r7, 28(r1)
+ lwz r8, 32(r1)
+ lwz r9, 36(r1)
+ lwz r10,40(r1)
+
+ addi r1, r1, 48
- MCOUNT_RESTORE_FRAME
- /* old link register ends up in ctr reg */
bctr
_GLOBAL(return_to_handler)
diff --git a/arch/powerpc/kernel/trace/ftrace_64_mprofile.S b/arch/powerpc/kernel/trace/ftrace_64_mprofile.S
index f9fd5f743eba..d636fc755f60 100644
--- a/arch/powerpc/kernel/trace/ftrace_64_mprofile.S
+++ b/arch/powerpc/kernel/trace/ftrace_64_mprofile.S
@@ -41,15 +41,14 @@ _GLOBAL(ftrace_regs_caller)
/* Save all gprs to pt_regs */
SAVE_GPR(0, r1)
- SAVE_10GPRS(2, r1)
+ SAVE_GPRS(2, 11, r1)
/* Ok to continue? */
lbz r3, PACA_FTRACE_ENABLED(r13)
cmpdi r3, 0
beq ftrace_no_trace
- SAVE_10GPRS(12, r1)
- SAVE_10GPRS(22, r1)
+ SAVE_GPRS(12, 31, r1)
/* Save previous stack pointer (r1) */
addi r8, r1, SWITCH_FRAME_SIZE
@@ -108,10 +107,8 @@ ftrace_regs_call:
#endif
/* Restore gprs */
- REST_GPR(0,r1)
- REST_10GPRS(2,r1)
- REST_10GPRS(12,r1)
- REST_10GPRS(22,r1)
+ REST_GPR(0, r1)
+ REST_GPRS(2, 31, r1)
/* Restore possibly modified LR */
ld r0, _LINK(r1)
@@ -157,7 +154,7 @@ _GLOBAL(ftrace_caller)
stdu r1, -SWITCH_FRAME_SIZE(r1)
/* Save all gprs to pt_regs */
- SAVE_8GPRS(3, r1)
+ SAVE_GPRS(3, 10, r1)
lbz r3, PACA_FTRACE_ENABLED(r13)
cmpdi r3, 0
@@ -194,7 +191,7 @@ ftrace_call:
mtctr r3
/* Restore gprs */
- REST_8GPRS(3,r1)
+ REST_GPRS(3, 10, r1)
/* Restore callee's TOC */
ld r2, 24(r1)
diff --git a/arch/powerpc/kernel/traps.c b/arch/powerpc/kernel/traps.c
index 11741703d26e..a08bb7cefdc5 100644
--- a/arch/powerpc/kernel/traps.c
+++ b/arch/powerpc/kernel/traps.c
@@ -245,7 +245,7 @@ static void oops_end(unsigned long flags, struct pt_regs *regs,
if (panic_on_oops)
panic("Fatal exception");
- do_exit(signr);
+ make_task_dead(signr);
}
NOKPROBE_SYMBOL(oops_end);
@@ -792,9 +792,9 @@ int machine_check_generic(struct pt_regs *regs)
void die_mce(const char *str, struct pt_regs *regs, long err)
{
/*
- * The machine check wants to kill the interrupted context, but
- * do_exit() checks for in_interrupt() and panics in that case, so
- * exit the irq/nmi before calling die.
+ * The machine check wants to kill the interrupted context,
+ * but make_task_dead() checks for in_interrupt() and panics
+ * in that case, so exit the irq/nmi before calling die.
*/
if (in_nmi())
nmi_exit();
diff --git a/arch/powerpc/kernel/udbg_16550.c b/arch/powerpc/kernel/udbg_16550.c
index 8513aa49614e..d3942de254c6 100644
--- a/arch/powerpc/kernel/udbg_16550.c
+++ b/arch/powerpc/kernel/udbg_16550.c
@@ -84,7 +84,7 @@ static int udbg_uart_getc(void)
return udbg_uart_in(UART_RBR);
}
-static void udbg_use_uart(void)
+static void __init udbg_use_uart(void)
{
udbg_putc = udbg_uart_putc;
udbg_flush = udbg_uart_flush;
@@ -92,7 +92,7 @@ static void udbg_use_uart(void)
udbg_getc_poll = udbg_uart_getc_poll;
}
-void udbg_uart_setup(unsigned int speed, unsigned int clock)
+void __init udbg_uart_setup(unsigned int speed, unsigned int clock)
{
unsigned int dll, base_bauds;
@@ -121,7 +121,7 @@ void udbg_uart_setup(unsigned int speed, unsigned int clock)
udbg_uart_out(UART_FCR, 0x7);
}
-unsigned int udbg_probe_uart_speed(unsigned int clock)
+unsigned int __init udbg_probe_uart_speed(unsigned int clock)
{
unsigned int dll, dlm, divisor, prescaler, speed;
u8 old_lcr;
@@ -172,7 +172,7 @@ static void udbg_uart_out_pio(unsigned int reg, u8 data)
outb(data, udbg_uart.pio_base + (reg * udbg_uart_stride));
}
-void udbg_uart_init_pio(unsigned long port, unsigned int stride)
+void __init udbg_uart_init_pio(unsigned long port, unsigned int stride)
{
if (!port)
return;
@@ -194,7 +194,7 @@ static void udbg_uart_out_mmio(unsigned int reg, u8 data)
}
-void udbg_uart_init_mmio(void __iomem *addr, unsigned int stride)
+void __init udbg_uart_init_mmio(void __iomem *addr, unsigned int stride)
{
if (!addr)
return;
diff --git a/arch/powerpc/kernel/vecemu.c b/arch/powerpc/kernel/vecemu.c
index ae632569446f..fd9432875ebc 100644
--- a/arch/powerpc/kernel/vecemu.c
+++ b/arch/powerpc/kernel/vecemu.c
@@ -261,7 +261,7 @@ static unsigned int rfin(unsigned int x)
int emulate_altivec(struct pt_regs *regs)
{
- struct ppc_inst instr;
+ ppc_inst_t instr;
unsigned int i, word;
unsigned int va, vb, vc, vd;
vector128 *vrs;
diff --git a/arch/powerpc/kernel/vector.S b/arch/powerpc/kernel/vector.S
index ba03eedfdcd8..5cc24d8cce94 100644
--- a/arch/powerpc/kernel/vector.S
+++ b/arch/powerpc/kernel/vector.S
@@ -47,6 +47,10 @@ EXPORT_SYMBOL(store_vr_state)
*/
_GLOBAL(load_up_altivec)
mfmsr r5 /* grab the current MSR */
+#ifdef CONFIG_PPC_BOOK3S_64
+ /* interrupt doesn't set MSR[RI] and HPT can fault on current access */
+ ori r5,r5,MSR_RI
+#endif
oris r5,r5,MSR_VEC@h
MTMSRD(r5) /* enable use of AltiVec now */
isync
@@ -126,6 +130,12 @@ _GLOBAL(load_up_vsx)
andis. r5,r12,MSR_VEC@h
beql+ load_up_altivec /* skip if already loaded */
+#ifdef CONFIG_PPC_BOOK3S_64
+ /* interrupt doesn't set MSR[RI] and HPT can fault on current access */
+ li r5,MSR_RI
+ mtmsrd r5,1
+#endif
+
ld r4,PACACURRENT(r13)
addi r4,r4,THREAD /* Get THREAD */
li r6,1
diff --git a/arch/powerpc/kernel/vmlinux.lds.S b/arch/powerpc/kernel/vmlinux.lds.S
index 18e42c74abdd..2bcca818136a 100644
--- a/arch/powerpc/kernel/vmlinux.lds.S
+++ b/arch/powerpc/kernel/vmlinux.lds.S
@@ -322,10 +322,6 @@ SECTIONS
#ifdef CONFIG_PPC32
.data : AT(ADDR(.data) - LOAD_OFFSET) {
DATA_DATA
-#ifdef CONFIG_UBSAN
- *(.data..Lubsan_data*)
- *(.data..Lubsan_type*)
-#endif
*(.data.rel*)
*(SDATA_MAIN)
*(.sdata2)
@@ -336,24 +332,18 @@ SECTIONS
#else
.data : AT(ADDR(.data) - LOAD_OFFSET) {
DATA_DATA
-#ifdef CONFIG_UBSAN
- *(.data..Lubsan_data*)
- *(.data..Lubsan_type*)
-#endif
*(.data.rel*)
*(.toc1)
*(.branch_lt)
}
- . = ALIGN(256);
- .got : AT(ADDR(.got) - LOAD_OFFSET) {
- __toc_start = .;
+ .got : AT(ADDR(.got) - LOAD_OFFSET) ALIGN(256) {
+ *(.got)
#ifndef CONFIG_RELOCATABLE
__prom_init_toc_start = .;
- arch/powerpc/kernel/prom_init.o*(.toc .got)
+ arch/powerpc/kernel/prom_init.o*(.toc)
__prom_init_toc_end = .;
#endif
- *(.got)
*(.toc)
}
#endif
diff --git a/arch/powerpc/kernel/watchdog.c b/arch/powerpc/kernel/watchdog.c
index 3fa6d240bade..bfc27496fe7e 100644
--- a/arch/powerpc/kernel/watchdog.c
+++ b/arch/powerpc/kernel/watchdog.c
@@ -85,10 +85,37 @@ static DEFINE_PER_CPU(u64, wd_timer_tb);
/* SMP checker bits */
static unsigned long __wd_smp_lock;
+static unsigned long __wd_reporting;
+static unsigned long __wd_nmi_output;
static cpumask_t wd_smp_cpus_pending;
static cpumask_t wd_smp_cpus_stuck;
static u64 wd_smp_last_reset_tb;
+/*
+ * Try to take the exclusive watchdog action / NMI IPI / printing lock.
+ * wd_smp_lock must be held. If this fails, we should return and wait
+ * for the watchdog to kick in again (or another CPU to trigger it).
+ *
+ * Importantly, if hardlockup_panic is set, wd_try_report failure should
+ * not delay the panic, because whichever other CPU is reporting will
+ * call panic.
+ */
+static bool wd_try_report(void)
+{
+ if (__wd_reporting)
+ return false;
+ __wd_reporting = 1;
+ return true;
+}
+
+/* End printing after successful wd_try_report. wd_smp_lock not required. */
+static void wd_end_reporting(void)
+{
+ smp_mb(); /* End printing "critical section" */
+ WARN_ON_ONCE(__wd_reporting == 0);
+ WRITE_ONCE(__wd_reporting, 0);
+}
+
static inline void wd_smp_lock(unsigned long *flags)
{
/*
@@ -128,109 +155,182 @@ static void wd_lockup_ipi(struct pt_regs *regs)
else
dump_stack();
+ /*
+ * __wd_nmi_output must be set after we printk from NMI context.
+ *
+ * printk from NMI context defers printing to the console to irq_work.
+ * If that NMI was taken in some code that is hard-locked, then irqs
+ * are disabled so irq_work will never fire. That can result in the
+ * hard lockup messages being delayed (indefinitely, until something
+ * else kicks the console drivers).
+ *
+ * Setting __wd_nmi_output will cause another CPU to notice and kick
+ * the console drivers for us.
+ *
+ * xchg is not needed here (it could be a smp_mb and store), but xchg
+ * gives the memory ordering and atomicity required.
+ */
+ xchg(&__wd_nmi_output, 1);
+
/* Do not panic from here because that can recurse into NMI IPI layer */
}
-static void set_cpumask_stuck(const struct cpumask *cpumask, u64 tb)
+static bool set_cpu_stuck(int cpu)
{
- cpumask_or(&wd_smp_cpus_stuck, &wd_smp_cpus_stuck, cpumask);
- cpumask_andnot(&wd_smp_cpus_pending, &wd_smp_cpus_pending, cpumask);
+ cpumask_set_cpu(cpu, &wd_smp_cpus_stuck);
+ cpumask_clear_cpu(cpu, &wd_smp_cpus_pending);
+ /*
+ * See wd_smp_clear_cpu_pending()
+ */
+ smp_mb();
if (cpumask_empty(&wd_smp_cpus_pending)) {
- wd_smp_last_reset_tb = tb;
+ wd_smp_last_reset_tb = get_tb();
cpumask_andnot(&wd_smp_cpus_pending,
&wd_cpus_enabled,
&wd_smp_cpus_stuck);
+ return true;
}
-}
-static void set_cpu_stuck(int cpu, u64 tb)
-{
- set_cpumask_stuck(cpumask_of(cpu), tb);
+ return false;
}
-static void watchdog_smp_panic(int cpu, u64 tb)
+static void watchdog_smp_panic(int cpu)
{
+ static cpumask_t wd_smp_cpus_ipi; // protected by reporting
unsigned long flags;
+ u64 tb, last_reset;
int c;
wd_smp_lock(&flags);
/* Double check some things under lock */
- if ((s64)(tb - wd_smp_last_reset_tb) < (s64)wd_smp_panic_timeout_tb)
+ tb = get_tb();
+ last_reset = wd_smp_last_reset_tb;
+ if ((s64)(tb - last_reset) < (s64)wd_smp_panic_timeout_tb)
goto out;
if (cpumask_test_cpu(cpu, &wd_smp_cpus_pending))
goto out;
- if (cpumask_weight(&wd_smp_cpus_pending) == 0)
+ if (!wd_try_report())
+ goto out;
+ for_each_online_cpu(c) {
+ if (!cpumask_test_cpu(c, &wd_smp_cpus_pending))
+ continue;
+ if (c == cpu)
+ continue; // should not happen
+
+ __cpumask_set_cpu(c, &wd_smp_cpus_ipi);
+ if (set_cpu_stuck(c))
+ break;
+ }
+ if (cpumask_empty(&wd_smp_cpus_ipi)) {
+ wd_end_reporting();
goto out;
+ }
+ wd_smp_unlock(&flags);
pr_emerg("CPU %d detected hard LOCKUP on other CPUs %*pbl\n",
- cpu, cpumask_pr_args(&wd_smp_cpus_pending));
+ cpu, cpumask_pr_args(&wd_smp_cpus_ipi));
pr_emerg("CPU %d TB:%lld, last SMP heartbeat TB:%lld (%lldms ago)\n",
- cpu, tb, wd_smp_last_reset_tb,
- tb_to_ns(tb - wd_smp_last_reset_tb) / 1000000);
+ cpu, tb, last_reset, tb_to_ns(tb - last_reset) / 1000000);
if (!sysctl_hardlockup_all_cpu_backtrace) {
/*
* Try to trigger the stuck CPUs, unless we are going to
* get a backtrace on all of them anyway.
*/
- for_each_cpu(c, &wd_smp_cpus_pending) {
- if (c == cpu)
- continue;
+ for_each_cpu(c, &wd_smp_cpus_ipi) {
smp_send_nmi_ipi(c, wd_lockup_ipi, 1000000);
+ __cpumask_clear_cpu(c, &wd_smp_cpus_ipi);
}
- }
-
- /* Take the stuck CPUs out of the watch group */
- set_cpumask_stuck(&wd_smp_cpus_pending, tb);
-
- wd_smp_unlock(&flags);
-
- if (sysctl_hardlockup_all_cpu_backtrace)
+ } else {
trigger_allbutself_cpu_backtrace();
-
- /*
- * Force flush any remote buffers that might be stuck in IRQ context
- * and therefore could not run their irq_work.
- */
- printk_trigger_flush();
+ cpumask_clear(&wd_smp_cpus_ipi);
+ }
if (hardlockup_panic)
nmi_panic(NULL, "Hard LOCKUP");
+ wd_end_reporting();
+
return;
out:
wd_smp_unlock(&flags);
}
-static void wd_smp_clear_cpu_pending(int cpu, u64 tb)
+static void wd_smp_clear_cpu_pending(int cpu)
{
if (!cpumask_test_cpu(cpu, &wd_smp_cpus_pending)) {
if (unlikely(cpumask_test_cpu(cpu, &wd_smp_cpus_stuck))) {
struct pt_regs *regs = get_irq_regs();
unsigned long flags;
- wd_smp_lock(&flags);
-
pr_emerg("CPU %d became unstuck TB:%lld\n",
- cpu, tb);
+ cpu, get_tb());
print_irqtrace_events(current);
if (regs)
show_regs(regs);
else
dump_stack();
+ wd_smp_lock(&flags);
cpumask_clear_cpu(cpu, &wd_smp_cpus_stuck);
wd_smp_unlock(&flags);
+ } else {
+ /*
+ * The last CPU to clear pending should have reset the
+ * watchdog so we generally should not find it empty
+ * here if our CPU was clear. However it could happen
+ * due to a rare race with another CPU taking the
+ * last CPU out of the mask concurrently.
+ *
+ * We can't add a warning for it. But just in case
+ * there is a problem with the watchdog that is causing
+ * the mask to not be reset, try to kick it along here.
+ */
+ if (unlikely(cpumask_empty(&wd_smp_cpus_pending)))
+ goto none_pending;
}
return;
}
+
+ /*
+ * All other updates to wd_smp_cpus_pending are performed under
+ * wd_smp_lock. All of them are atomic except the case where the
+ * mask becomes empty and is reset. This will not happen here because
+ * cpu was tested to be in the bitmap (above), and a CPU only clears
+ * its own bit. _Except_ in the case where another CPU has detected a
+ * hard lockup on our CPU and takes us out of the pending mask. So in
+ * normal operation there will be no race here, no problem.
+ *
+ * In the lockup case, this atomic clear-bit vs a store that refills
+ * other bits in the accessed word wll not be a problem. The bit clear
+ * is atomic so it will not cause the store to get lost, and the store
+ * will never set this bit so it will not overwrite the bit clear. The
+ * only way for a stuck CPU to return to the pending bitmap is to
+ * become unstuck itself.
+ */
cpumask_clear_cpu(cpu, &wd_smp_cpus_pending);
+
+ /*
+ * Order the store to clear pending with the load(s) to check all
+ * words in the pending mask to check they are all empty. This orders
+ * with the same barrier on another CPU. This prevents two CPUs
+ * clearing the last 2 pending bits, but neither seeing the other's
+ * store when checking if the mask is empty, and missing an empty
+ * mask, which ends with a false positive.
+ */
+ smp_mb();
if (cpumask_empty(&wd_smp_cpus_pending)) {
unsigned long flags;
+none_pending:
+ /*
+ * Double check under lock because more than one CPU could see
+ * a clear mask with the lockless check after clearing their
+ * pending bits.
+ */
wd_smp_lock(&flags);
if (cpumask_empty(&wd_smp_cpus_pending)) {
- wd_smp_last_reset_tb = tb;
+ wd_smp_last_reset_tb = get_tb();
cpumask_andnot(&wd_smp_cpus_pending,
&wd_cpus_enabled,
&wd_smp_cpus_stuck);
@@ -245,10 +345,21 @@ static void watchdog_timer_interrupt(int cpu)
per_cpu(wd_timer_tb, cpu) = tb;
- wd_smp_clear_cpu_pending(cpu, tb);
+ wd_smp_clear_cpu_pending(cpu);
if ((s64)(tb - wd_smp_last_reset_tb) >= (s64)wd_smp_panic_timeout_tb)
- watchdog_smp_panic(cpu, tb);
+ watchdog_smp_panic(cpu);
+
+ if (__wd_nmi_output && xchg(&__wd_nmi_output, 0)) {
+ /*
+ * Something has called printk from NMI context. It might be
+ * stuck, so this this triggers a flush that will get that
+ * printk output to the console.
+ *
+ * See wd_lockup_ipi.
+ */
+ printk_trigger_flush();
+ }
}
DEFINE_INTERRUPT_HANDLER_NMI(soft_nmi_interrupt)
@@ -267,12 +378,27 @@ DEFINE_INTERRUPT_HANDLER_NMI(soft_nmi_interrupt)
tb = get_tb();
if (tb - per_cpu(wd_timer_tb, cpu) >= wd_panic_timeout_tb) {
+ /*
+ * Taking wd_smp_lock here means it is a soft-NMI lock, which
+ * means we can't take any regular or irqsafe spin locks while
+ * holding this lock. This is why timers can't printk while
+ * holding the lock.
+ */
wd_smp_lock(&flags);
if (cpumask_test_cpu(cpu, &wd_smp_cpus_stuck)) {
wd_smp_unlock(&flags);
return 0;
}
- set_cpu_stuck(cpu, tb);
+ if (!wd_try_report()) {
+ wd_smp_unlock(&flags);
+ /* Couldn't report, try again in 100ms */
+ mtspr(SPRN_DEC, 100 * tb_ticks_per_usec * 1000);
+ return 0;
+ }
+
+ set_cpu_stuck(cpu);
+
+ wd_smp_unlock(&flags);
pr_emerg("CPU %d self-detected hard LOCKUP @ %pS\n",
cpu, (void *)regs->nip);
@@ -283,14 +409,21 @@ DEFINE_INTERRUPT_HANDLER_NMI(soft_nmi_interrupt)
print_irqtrace_events(current);
show_regs(regs);
- wd_smp_unlock(&flags);
+ xchg(&__wd_nmi_output, 1); // see wd_lockup_ipi
if (sysctl_hardlockup_all_cpu_backtrace)
trigger_allbutself_cpu_backtrace();
if (hardlockup_panic)
nmi_panic(regs, "Hard LOCKUP");
+
+ wd_end_reporting();
}
+ /*
+ * We are okay to change DEC in soft_nmi_interrupt because the masked
+ * handler has marked a DEC as pending, so the timer interrupt will be
+ * replayed as soon as local irqs are enabled again.
+ */
if (wd_panic_timeout_tb < 0x7fffffff)
mtspr(SPRN_DEC, wd_panic_timeout_tb);
@@ -318,11 +451,15 @@ void arch_touch_nmi_watchdog(void)
{
unsigned long ticks = tb_ticks_per_usec * wd_timer_period_ms * 1000;
int cpu = smp_processor_id();
- u64 tb = get_tb();
+ u64 tb;
+ if (!cpumask_test_cpu(cpu, &watchdog_cpumask))
+ return;
+
+ tb = get_tb();
if (tb - per_cpu(wd_timer_tb, cpu) >= ticks) {
per_cpu(wd_timer_tb, cpu) = tb;
- wd_smp_clear_cpu_pending(cpu, tb);
+ wd_smp_clear_cpu_pending(cpu);
}
}
EXPORT_SYMBOL(arch_touch_nmi_watchdog);
@@ -380,7 +517,7 @@ static void stop_watchdog(void *arg)
cpumask_clear_cpu(cpu, &wd_cpus_enabled);
wd_smp_unlock(&flags);
- wd_smp_clear_cpu_pending(cpu, get_tb());
+ wd_smp_clear_cpu_pending(cpu);
}
static int stop_watchdog_on_cpu(unsigned int cpu)