aboutsummaryrefslogtreecommitdiffstats
path: root/arch/powerpc/kernel
diff options
context:
space:
mode:
Diffstat (limited to 'arch/powerpc/kernel')
-rw-r--r--arch/powerpc/kernel/Makefile2
-rw-r--r--arch/powerpc/kernel/asm-offsets.c11
-rw-r--r--arch/powerpc/kernel/cpu_setup_6xx.S2
-rw-r--r--arch/powerpc/kernel/cpu_setup_fsl_booke.S2
-rw-r--r--arch/powerpc/kernel/cpu_setup_power.S6
-rw-r--r--arch/powerpc/kernel/cputable.c59
-rw-r--r--arch/powerpc/kernel/crash.c2
-rw-r--r--arch/powerpc/kernel/dma.c3
-rw-r--r--arch/powerpc/kernel/dt_cpu_ftrs.c49
-rw-r--r--arch/powerpc/kernel/eeh.c33
-rw-r--r--arch/powerpc/kernel/eeh_cache.c3
-rw-r--r--arch/powerpc/kernel/eeh_driver.c205
-rw-r--r--arch/powerpc/kernel/eeh_event.c6
-rw-r--r--arch/powerpc/kernel/eeh_pe.c3
-rw-r--r--arch/powerpc/kernel/entry_64.S2
-rw-r--r--arch/powerpc/kernel/exceptions-64s.S105
-rw-r--r--arch/powerpc/kernel/head_64.S19
-rw-r--r--arch/powerpc/kernel/hw_breakpoint.c3
-rw-r--r--arch/powerpc/kernel/idle_book3s.S52
-rw-r--r--arch/powerpc/kernel/iomap.c40
-rw-r--r--arch/powerpc/kernel/kexec_elf_64.c11
-rw-r--r--arch/powerpc/kernel/kprobes.c30
-rw-r--r--arch/powerpc/kernel/machine_kexec_64.c37
-rw-r--r--arch/powerpc/kernel/machine_kexec_file_64.c41
-rw-r--r--arch/powerpc/kernel/mce_power.c7
-rw-r--r--arch/powerpc/kernel/misc_64.S38
-rw-r--r--arch/powerpc/kernel/nvram_64.c9
-rw-r--r--arch/powerpc/kernel/paca.c242
-rw-r--r--arch/powerpc/kernel/pci-common.c106
-rw-r--r--arch/powerpc/kernel/process.c27
-rw-r--r--arch/powerpc/kernel/prom.c19
-rw-r--r--arch/powerpc/kernel/prom_init.c29
-rw-r--r--arch/powerpc/kernel/prom_init_check.sh2
-rw-r--r--arch/powerpc/kernel/ptrace.c16
-rw-r--r--arch/powerpc/kernel/rtas-proc.c32
-rw-r--r--arch/powerpc/kernel/security.c237
-rw-r--r--arch/powerpc/kernel/setup-common.c37
-rw-r--r--arch/powerpc/kernel/setup.h9
-rw-r--r--arch/powerpc/kernel/setup_32.c8
-rw-r--r--arch/powerpc/kernel/setup_64.c126
-rw-r--r--arch/powerpc/kernel/signal.h5
-rw-r--r--arch/powerpc/kernel/signal_32.c4
-rw-r--r--arch/powerpc/kernel/smp.c58
-rw-r--r--arch/powerpc/kernel/sysfs.c20
-rw-r--r--arch/powerpc/kernel/time.c5
-rw-r--r--arch/powerpc/kernel/traps.c76
-rw-r--r--arch/powerpc/kernel/vdso.c12
-rw-r--r--arch/powerpc/kernel/vmlinux.lds.S36
48 files changed, 1128 insertions, 758 deletions
diff --git a/arch/powerpc/kernel/Makefile b/arch/powerpc/kernel/Makefile
index 2358f97d62ec..2b4c40b255e4 100644
--- a/arch/powerpc/kernel/Makefile
+++ b/arch/powerpc/kernel/Makefile
@@ -42,7 +42,7 @@ obj-$(CONFIG_VDSO32) += vdso32/
obj-$(CONFIG_PPC_WATCHDOG) += watchdog.o
obj-$(CONFIG_HAVE_HW_BREAKPOINT) += hw_breakpoint.o
obj-$(CONFIG_PPC_BOOK3S_64) += cpu_setup_ppc970.o cpu_setup_pa6t.o
-obj-$(CONFIG_PPC_BOOK3S_64) += cpu_setup_power.o
+obj-$(CONFIG_PPC_BOOK3S_64) += cpu_setup_power.o security.o
obj-$(CONFIG_PPC_BOOK3S_64) += mce.o mce_power.o
obj-$(CONFIG_PPC_BOOK3E_64) += exceptions-64e.o idle_book3e.o
obj-$(CONFIG_PPC64) += vdso64/
diff --git a/arch/powerpc/kernel/asm-offsets.c b/arch/powerpc/kernel/asm-offsets.c
index ea5eb91b836e..8817c5a6bcc2 100644
--- a/arch/powerpc/kernel/asm-offsets.c
+++ b/arch/powerpc/kernel/asm-offsets.c
@@ -13,6 +13,7 @@
* 2 of the License, or (at your option) any later version.
*/
+#include <linux/compat.h>
#include <linux/signal.h>
#include <linux/sched.h>
#include <linux/kernel.h>
@@ -42,7 +43,6 @@
#include <asm/paca.h>
#include <asm/lppaca.h>
#include <asm/cache.h>
-#include <asm/compat.h>
#include <asm/mmu.h>
#include <asm/hvcall.h>
#include <asm/xics.h>
@@ -221,12 +221,17 @@ int main(void)
OFFSET(PACA_EXMC, paca_struct, exmc);
OFFSET(PACA_EXSLB, paca_struct, exslb);
OFFSET(PACA_EXNMI, paca_struct, exnmi);
+#ifdef CONFIG_PPC_PSERIES
OFFSET(PACALPPACAPTR, paca_struct, lppaca_ptr);
+#endif
OFFSET(PACA_SLBSHADOWPTR, paca_struct, slb_shadow_ptr);
OFFSET(SLBSHADOW_STACKVSID, slb_shadow, save_area[SLB_NUM_BOLTED - 1].vsid);
OFFSET(SLBSHADOW_STACKESID, slb_shadow, save_area[SLB_NUM_BOLTED - 1].esid);
OFFSET(SLBSHADOW_SAVEAREA, slb_shadow, save_area);
OFFSET(LPPACA_PMCINUSE, lppaca, pmcregs_in_use);
+#ifdef CONFIG_KVM_BOOK3S_HV_POSSIBLE
+ OFFSET(PACA_PMCINUSE, paca_struct, pmcregs_in_use);
+#endif
OFFSET(LPPACA_DTLIDX, lppaca, dtl_idx);
OFFSET(LPPACA_YIELDCOUNT, lppaca, yield_count);
OFFSET(PACA_DTL_RIDX, paca_struct, dtl_ridx);
@@ -557,6 +562,7 @@ int main(void)
OFFSET(VCORE_NAPPING_THREADS, kvmppc_vcore, napping_threads);
OFFSET(VCORE_KVM, kvmppc_vcore, kvm);
OFFSET(VCORE_TB_OFFSET, kvmppc_vcore, tb_offset);
+ OFFSET(VCORE_TB_OFFSET_APPL, kvmppc_vcore, tb_offset_applied);
OFFSET(VCORE_LPCR, kvmppc_vcore, lpcr);
OFFSET(VCORE_PCR, kvmppc_vcore, pcr);
OFFSET(VCORE_DPDES, kvmppc_vcore, dpdes);
@@ -568,6 +574,7 @@ int main(void)
OFFSET(VCPU_TFHAR, kvm_vcpu, arch.tfhar);
OFFSET(VCPU_TFIAR, kvm_vcpu, arch.tfiar);
OFFSET(VCPU_TEXASR, kvm_vcpu, arch.texasr);
+ OFFSET(VCPU_ORIG_TEXASR, kvm_vcpu, arch.orig_texasr);
OFFSET(VCPU_GPR_TM, kvm_vcpu, arch.gpr_tm);
OFFSET(VCPU_FPRS_TM, kvm_vcpu, arch.fp_tm.fpr);
OFFSET(VCPU_VRS_TM, kvm_vcpu, arch.vr_tm.vr);
@@ -650,6 +657,7 @@ int main(void)
HSTATE_FIELD(HSTATE_HOST_IPI, host_ipi);
HSTATE_FIELD(HSTATE_PTID, ptid);
HSTATE_FIELD(HSTATE_TID, tid);
+ HSTATE_FIELD(HSTATE_FAKE_SUSPEND, fake_suspend);
HSTATE_FIELD(HSTATE_MMCR0, host_mmcr[0]);
HSTATE_FIELD(HSTATE_MMCR1, host_mmcr[1]);
HSTATE_FIELD(HSTATE_MMCRA, host_mmcr[2]);
@@ -759,6 +767,7 @@ int main(void)
OFFSET(PACA_SUBCORE_SIBLING_MASK, paca_struct, subcore_sibling_mask);
OFFSET(PACA_SIBLING_PACA_PTRS, paca_struct, thread_sibling_pacas);
OFFSET(PACA_REQ_PSSCR, paca_struct, requested_psscr);
+ OFFSET(PACA_DONT_STOP, paca_struct, dont_stop);
#define STOP_SPR(x, f) OFFSET(x, paca_struct, stop_sprs.f)
STOP_SPR(STOP_PID, pid);
STOP_SPR(STOP_LDBAR, ldbar);
diff --git a/arch/powerpc/kernel/cpu_setup_6xx.S b/arch/powerpc/kernel/cpu_setup_6xx.S
index c5e5a94d9892..a9f3970693e1 100644
--- a/arch/powerpc/kernel/cpu_setup_6xx.S
+++ b/arch/powerpc/kernel/cpu_setup_6xx.S
@@ -226,7 +226,7 @@ BEGIN_FTR_SECTION
beq 1f
END_FTR_SECTION_IFSET(CPU_FTR_L3CR)
lwz r6,CPU_SPEC_FEATURES(r4)
- andi. r0,r6,CPU_FTR_L3_DISABLE_NAP
+ andis. r0,r6,CPU_FTR_L3_DISABLE_NAP@h
beq 1f
li r7,CPU_FTR_CAN_NAP
andc r6,r6,r7
diff --git a/arch/powerpc/kernel/cpu_setup_fsl_booke.S b/arch/powerpc/kernel/cpu_setup_fsl_booke.S
index 462aed9bcf51..8d142e5d84cd 100644
--- a/arch/powerpc/kernel/cpu_setup_fsl_booke.S
+++ b/arch/powerpc/kernel/cpu_setup_fsl_booke.S
@@ -162,7 +162,7 @@ _GLOBAL(__setup_cpu_e5500)
* the feature on the primary core, avoid doing it on the
* secondary core.
*/
- andis. r6, r3, CPU_FTR_EMB_HV@h
+ andi. r6, r3, CPU_FTR_EMB_HV
beq 2f
rlwinm r3, r3, 0, ~CPU_FTR_EMB_HV
stw r3, CPU_SPEC_FEATURES(r4)
diff --git a/arch/powerpc/kernel/cpu_setup_power.S b/arch/powerpc/kernel/cpu_setup_power.S
index 3f30c994e931..458b928dbd84 100644
--- a/arch/powerpc/kernel/cpu_setup_power.S
+++ b/arch/powerpc/kernel/cpu_setup_power.S
@@ -28,6 +28,7 @@ _GLOBAL(__setup_cpu_power7)
beqlr
li r0,0
mtspr SPRN_LPID,r0
+ mtspr SPRN_PCR,r0
mfspr r3,SPRN_LPCR
li r4,(LPCR_LPES1 >> LPCR_LPES_SH)
bl __init_LPCR_ISA206
@@ -41,6 +42,7 @@ _GLOBAL(__restore_cpu_power7)
beqlr
li r0,0
mtspr SPRN_LPID,r0
+ mtspr SPRN_PCR,r0
mfspr r3,SPRN_LPCR
li r4,(LPCR_LPES1 >> LPCR_LPES_SH)
bl __init_LPCR_ISA206
@@ -57,6 +59,7 @@ _GLOBAL(__setup_cpu_power8)
beqlr
li r0,0
mtspr SPRN_LPID,r0
+ mtspr SPRN_PCR,r0
mfspr r3,SPRN_LPCR
ori r3, r3, LPCR_PECEDH
li r4,0 /* LPES = 0 */
@@ -78,6 +81,7 @@ _GLOBAL(__restore_cpu_power8)
beqlr
li r0,0
mtspr SPRN_LPID,r0
+ mtspr SPRN_PCR,r0
mfspr r3,SPRN_LPCR
ori r3, r3, LPCR_PECEDH
li r4,0 /* LPES = 0 */
@@ -99,6 +103,7 @@ _GLOBAL(__setup_cpu_power9)
mtspr SPRN_PSSCR,r0
mtspr SPRN_LPID,r0
mtspr SPRN_PID,r0
+ mtspr SPRN_PCR,r0
mfspr r3,SPRN_LPCR
LOAD_REG_IMMEDIATE(r4, LPCR_PECEDH | LPCR_PECE_HVEE | LPCR_HVICE | LPCR_HEIC)
or r3, r3, r4
@@ -123,6 +128,7 @@ _GLOBAL(__restore_cpu_power9)
mtspr SPRN_PSSCR,r0
mtspr SPRN_LPID,r0
mtspr SPRN_PID,r0
+ mtspr SPRN_PCR,r0
mfspr r3,SPRN_LPCR
LOAD_REG_IMMEDIATE(r4, LPCR_PECEDH | LPCR_PECE_HVEE | LPCR_HVICE | LPCR_HEIC)
or r3, r3, r4
diff --git a/arch/powerpc/kernel/cputable.c b/arch/powerpc/kernel/cputable.c
index c40a9fc1e5d1..c8fc9691f8c7 100644
--- a/arch/powerpc/kernel/cputable.c
+++ b/arch/powerpc/kernel/cputable.c
@@ -133,36 +133,6 @@ extern void __restore_cpu_e6500(void);
static struct cpu_spec __initdata cpu_specs[] = {
#ifdef CONFIG_PPC_BOOK3S_64
- { /* Power4 */
- .pvr_mask = 0xffff0000,
- .pvr_value = 0x00350000,
- .cpu_name = "POWER4 (gp)",
- .cpu_features = CPU_FTRS_POWER4,
- .cpu_user_features = COMMON_USER_POWER4,
- .mmu_features = MMU_FTRS_POWER4 | MMU_FTR_TLBIE_CROP_VA,
- .icache_bsize = 128,
- .dcache_bsize = 128,
- .num_pmcs = 8,
- .pmc_type = PPC_PMC_IBM,
- .oprofile_cpu_type = "ppc64/power4",
- .oprofile_type = PPC_OPROFILE_POWER4,
- .platform = "power4",
- },
- { /* Power4+ */
- .pvr_mask = 0xffff0000,
- .pvr_value = 0x00380000,
- .cpu_name = "POWER4+ (gq)",
- .cpu_features = CPU_FTRS_POWER4,
- .cpu_user_features = COMMON_USER_POWER4,
- .mmu_features = MMU_FTRS_POWER4 | MMU_FTR_TLBIE_CROP_VA,
- .icache_bsize = 128,
- .dcache_bsize = 128,
- .num_pmcs = 8,
- .pmc_type = PPC_PMC_IBM,
- .oprofile_cpu_type = "ppc64/power4",
- .oprofile_type = PPC_OPROFILE_POWER4,
- .platform = "power4",
- },
{ /* PPC970 */
.pvr_mask = 0xffff0000,
.pvr_value = 0x00390000,
@@ -553,11 +523,30 @@ static struct cpu_spec __initdata cpu_specs[] = {
.machine_check_early = __machine_check_early_realmode_p9,
.platform = "power9",
},
- { /* Power9 DD 2.1 or later (see DD2.0 above) */
+ { /* Power9 DD 2.1 */
+ .pvr_mask = 0xffffefff,
+ .pvr_value = 0x004e0201,
+ .cpu_name = "POWER9 (raw)",
+ .cpu_features = CPU_FTRS_POWER9_DD2_1,
+ .cpu_user_features = COMMON_USER_POWER9,
+ .cpu_user_features2 = COMMON_USER2_POWER9,
+ .mmu_features = MMU_FTRS_POWER9,
+ .icache_bsize = 128,
+ .dcache_bsize = 128,
+ .num_pmcs = 6,
+ .pmc_type = PPC_PMC_IBM,
+ .oprofile_cpu_type = "ppc64/power9",
+ .oprofile_type = PPC_OPROFILE_INVALID,
+ .cpu_setup = __setup_cpu_power9,
+ .cpu_restore = __restore_cpu_power9,
+ .machine_check_early = __machine_check_early_realmode_p9,
+ .platform = "power9",
+ },
+ { /* Power9 DD2.2 or later */
.pvr_mask = 0xffff0000,
.pvr_value = 0x004e0000,
.cpu_name = "POWER9 (raw)",
- .cpu_features = CPU_FTRS_POWER9_DD2_1,
+ .cpu_features = CPU_FTRS_POWER9_DD2_2,
.cpu_user_features = COMMON_USER_POWER9,
.cpu_user_features2 = COMMON_USER2_POWER9,
.mmu_features = MMU_FTRS_POWER9,
@@ -609,15 +598,15 @@ static struct cpu_spec __initdata cpu_specs[] = {
{ /* default match */
.pvr_mask = 0x00000000,
.pvr_value = 0x00000000,
- .cpu_name = "POWER4 (compatible)",
+ .cpu_name = "POWER5 (compatible)",
.cpu_features = CPU_FTRS_COMPATIBLE,
.cpu_user_features = COMMON_USER_PPC64,
- .mmu_features = MMU_FTRS_DEFAULT_HPTE_ARCH_V2,
+ .mmu_features = MMU_FTRS_POWER,
.icache_bsize = 128,
.dcache_bsize = 128,
.num_pmcs = 6,
.pmc_type = PPC_PMC_IBM,
- .platform = "power4",
+ .platform = "power5",
}
#endif /* CONFIG_PPC_BOOK3S_64 */
diff --git a/arch/powerpc/kernel/crash.c b/arch/powerpc/kernel/crash.c
index 00b215125d3e..17c8b99680f2 100644
--- a/arch/powerpc/kernel/crash.c
+++ b/arch/powerpc/kernel/crash.c
@@ -238,7 +238,7 @@ static void __maybe_unused crash_kexec_wait_realmode(int cpu)
if (i == cpu)
continue;
- while (paca[i].kexec_state < KEXEC_STATE_REAL_MODE) {
+ while (paca_ptrs[i]->kexec_state < KEXEC_STATE_REAL_MODE) {
barrier();
if (!cpu_possible(i) || !cpu_online(i) || (msecs <= 0))
break;
diff --git a/arch/powerpc/kernel/dma.c b/arch/powerpc/kernel/dma.c
index da20569de9d4..138157deeadf 100644
--- a/arch/powerpc/kernel/dma.c
+++ b/arch/powerpc/kernel/dma.c
@@ -309,8 +309,6 @@ int dma_set_coherent_mask(struct device *dev, u64 mask)
}
EXPORT_SYMBOL(dma_set_coherent_mask);
-#define PREALLOC_DMA_DEBUG_ENTRIES (1 << 16)
-
int dma_set_mask(struct device *dev, u64 dma_mask)
{
if (ppc_md.dma_set_mask)
@@ -361,7 +359,6 @@ EXPORT_SYMBOL_GPL(dma_get_required_mask);
static int __init dma_init(void)
{
- dma_debug_init(PREALLOC_DMA_DEBUG_ENTRIES);
#ifdef CONFIG_PCI
dma_debug_add_bus(&pci_bus_type);
#endif
diff --git a/arch/powerpc/kernel/dt_cpu_ftrs.c b/arch/powerpc/kernel/dt_cpu_ftrs.c
index 8ca5d5b74618..c904477abaf3 100644
--- a/arch/powerpc/kernel/dt_cpu_ftrs.c
+++ b/arch/powerpc/kernel/dt_cpu_ftrs.c
@@ -53,19 +53,6 @@ struct dt_cpu_feature {
int disabled;
};
-#define CPU_FTRS_BASE \
- (CPU_FTR_USE_TB | \
- CPU_FTR_LWSYNC | \
- CPU_FTR_FPU_UNAVAILABLE |\
- CPU_FTR_NODSISRALIGN |\
- CPU_FTR_NOEXECUTE |\
- CPU_FTR_COHERENT_ICACHE | \
- CPU_FTR_STCX_CHECKS_ADDRESS |\
- CPU_FTR_POPCNTB | CPU_FTR_POPCNTD | \
- CPU_FTR_DAWR | \
- CPU_FTR_ARCH_206 |\
- CPU_FTR_ARCH_207S)
-
#define MMU_FTRS_HASH_BASE (MMU_FTRS_POWER8)
#define COMMON_USER_BASE (PPC_FEATURE_32 | PPC_FEATURE_64 | \
@@ -84,6 +71,7 @@ static int hv_mode;
static struct {
u64 lpcr;
+ u64 lpcr_clear;
u64 hfscr;
u64 fscr;
} system_registers;
@@ -92,6 +80,8 @@ static void (*init_pmu_registers)(void);
static void __restore_cpu_cpufeatures(void)
{
+ u64 lpcr;
+
/*
* LPCR is restored by the power on engine already. It can be changed
* after early init e.g., by radix enable, and we have no unified API
@@ -104,11 +94,14 @@ static void __restore_cpu_cpufeatures(void)
* The best we can do to accommodate secondary boot and idle restore
* for now is "or" LPCR with existing.
*/
-
- mtspr(SPRN_LPCR, system_registers.lpcr | mfspr(SPRN_LPCR));
+ lpcr = mfspr(SPRN_LPCR);
+ lpcr |= system_registers.lpcr;
+ lpcr &= ~system_registers.lpcr_clear;
+ mtspr(SPRN_LPCR, lpcr);
if (hv_mode) {
mtspr(SPRN_LPID, 0);
mtspr(SPRN_HFSCR, system_registers.hfscr);
+ mtspr(SPRN_PCR, 0);
}
mtspr(SPRN_FSCR, system_registers.fscr);
@@ -120,7 +113,7 @@ static char dt_cpu_name[64];
static struct cpu_spec __initdata base_cpu_spec = {
.cpu_name = NULL,
- .cpu_features = CPU_FTRS_BASE,
+ .cpu_features = CPU_FTRS_DT_CPU_BASE,
.cpu_user_features = COMMON_USER_BASE,
.cpu_user_features2 = COMMON_USER2_BASE,
.mmu_features = 0,
@@ -325,8 +318,9 @@ static int __init feat_enable_mmu_hash_v3(struct dt_cpu_feature *f)
{
u64 lpcr;
+ system_registers.lpcr_clear |= (LPCR_ISL | LPCR_UPRT | LPCR_HR);
lpcr = mfspr(SPRN_LPCR);
- lpcr &= ~LPCR_ISL;
+ lpcr &= ~(LPCR_ISL | LPCR_UPRT | LPCR_HR);
mtspr(SPRN_LPCR, lpcr);
cur_cpu_spec->mmu_features |= MMU_FTRS_HASH_BASE;
@@ -590,6 +584,8 @@ static struct dt_cpu_feature_match __initdata
{"virtual-page-class-key-protection", feat_enable, 0},
{"transactional-memory", feat_enable_tm, CPU_FTR_TM},
{"transactional-memory-v3", feat_enable_tm, 0},
+ {"tm-suspend-hypervisor-assist", feat_enable, CPU_FTR_P9_TM_HV_ASSIST},
+ {"tm-suspend-xer-so-bug", feat_enable, CPU_FTR_P9_TM_XER_SO_BUG},
{"idle-nap", feat_enable_idle_nap, 0},
{"alignment-interrupt-dsisr", feat_enable_align_dsisr, 0},
{"idle-stop", feat_enable_idle_stop, 0},
@@ -707,11 +703,28 @@ static __init void cpufeatures_cpu_quirks(void)
*/
if ((version & 0xffffff00) == 0x004e0100)
cur_cpu_spec->cpu_features |= CPU_FTR_POWER9_DD1;
+ else if ((version & 0xffffefff) == 0x004e0200)
+ ; /* DD2.0 has no feature flag */
else if ((version & 0xffffefff) == 0x004e0201)
cur_cpu_spec->cpu_features |= CPU_FTR_POWER9_DD2_1;
+ else if ((version & 0xffffefff) == 0x004e0202) {
+ cur_cpu_spec->cpu_features |= CPU_FTR_P9_TM_HV_ASSIST;
+ cur_cpu_spec->cpu_features |= CPU_FTR_P9_TM_XER_SO_BUG;
+ cur_cpu_spec->cpu_features |= CPU_FTR_POWER9_DD2_1;
+ } else /* DD2.1 and up have DD2_1 */
+ cur_cpu_spec->cpu_features |= CPU_FTR_POWER9_DD2_1;
- if ((version & 0xffff0000) == 0x004e0000)
+ if ((version & 0xffff0000) == 0x004e0000) {
+ cur_cpu_spec->cpu_features &= ~(CPU_FTR_DAWR);
cur_cpu_spec->cpu_features |= CPU_FTR_P9_TLBIE_BUG;
+ }
+
+ /*
+ * PKEY was not in the initial base or feature node
+ * specification, but it should become optional in the next
+ * cpu feature version sequence.
+ */
+ cur_cpu_spec->cpu_features |= CPU_FTR_PKEY;
}
static void __init cpufeatures_setup_finished(void)
diff --git a/arch/powerpc/kernel/eeh.c b/arch/powerpc/kernel/eeh.c
index 2b9df0040d6b..90bb39b1a23c 100644
--- a/arch/powerpc/kernel/eeh.c
+++ b/arch/powerpc/kernel/eeh.c
@@ -394,9 +394,7 @@ static int eeh_phb_check_failure(struct eeh_pe *pe)
/* Check PHB state */
ret = eeh_ops->get_state(phb_pe, NULL);
if ((ret < 0) ||
- (ret == EEH_STATE_NOT_SUPPORT) ||
- (ret & (EEH_STATE_MMIO_ACTIVE | EEH_STATE_DMA_ACTIVE)) ==
- (EEH_STATE_MMIO_ACTIVE | EEH_STATE_DMA_ACTIVE)) {
+ (ret == EEH_STATE_NOT_SUPPORT) || eeh_state_active(ret)) {
ret = 0;
goto out;
}
@@ -433,7 +431,6 @@ out:
int eeh_dev_check_failure(struct eeh_dev *edev)
{
int ret;
- int active_flags = (EEH_STATE_MMIO_ACTIVE | EEH_STATE_DMA_ACTIVE);
unsigned long flags;
struct device_node *dn;
struct pci_dev *dev;
@@ -525,8 +522,7 @@ int eeh_dev_check_failure(struct eeh_dev *edev)
* state, PE is in good state.
*/
if ((ret < 0) ||
- (ret == EEH_STATE_NOT_SUPPORT) ||
- ((ret & active_flags) == active_flags)) {
+ (ret == EEH_STATE_NOT_SUPPORT) || eeh_state_active(ret)) {
eeh_stats.false_positives++;
pe->false_positives++;
rc = 0;
@@ -546,8 +542,7 @@ int eeh_dev_check_failure(struct eeh_dev *edev)
/* Frozen parent PE ? */
ret = eeh_ops->get_state(parent_pe, NULL);
- if (ret > 0 &&
- (ret & active_flags) != active_flags)
+ if (ret > 0 && !eeh_state_active(ret))
pe = parent_pe;
/* Next parent level */
@@ -888,7 +883,6 @@ static void *eeh_set_dev_freset(void *data, void *flag)
*/
int eeh_pe_reset_full(struct eeh_pe *pe)
{
- int active_flags = (EEH_STATE_MMIO_ACTIVE | EEH_STATE_DMA_ACTIVE);
int reset_state = (EEH_PE_RESET | EEH_PE_CFG_BLOCKED);
int type = EEH_RESET_HOT;
unsigned int freset = 0;
@@ -919,7 +913,7 @@ int eeh_pe_reset_full(struct eeh_pe *pe)
/* Wait until the PE is in a functioning state */
state = eeh_ops->wait_state(pe, PCI_BUS_RESET_WAIT_MSEC);
- if ((state & active_flags) == active_flags)
+ if (eeh_state_active(state))
break;
if (state < 0) {
@@ -1352,16 +1346,15 @@ static int eeh_pe_change_owner(struct eeh_pe *pe)
struct eeh_dev *edev, *tmp;
struct pci_dev *pdev;
struct pci_device_id *id;
- int flags, ret;
+ int ret;
/* Check PE state */
- flags = (EEH_STATE_MMIO_ACTIVE | EEH_STATE_DMA_ACTIVE);
ret = eeh_ops->get_state(pe, NULL);
if (ret < 0 || ret == EEH_STATE_NOT_SUPPORT)
return 0;
/* Unfrozen PE, nothing to do */
- if ((ret & flags) == flags)
+ if (eeh_state_active(ret))
return 0;
/* Frozen PE, check if it needs PE level reset */
@@ -1782,18 +1775,6 @@ static int proc_eeh_show(struct seq_file *m, void *v)
return 0;
}
-static int proc_eeh_open(struct inode *inode, struct file *file)
-{
- return single_open(file, proc_eeh_show, NULL);
-}
-
-static const struct file_operations proc_eeh_operations = {
- .open = proc_eeh_open,
- .read = seq_read,
- .llseek = seq_lseek,
- .release = single_release,
-};
-
#ifdef CONFIG_DEBUG_FS
static int eeh_enable_dbgfs_set(void *data, u64 val)
{
@@ -1835,7 +1816,7 @@ DEFINE_SIMPLE_ATTRIBUTE(eeh_freeze_dbgfs_ops, eeh_freeze_dbgfs_get,
static int __init eeh_init_proc(void)
{
if (machine_is(pseries) || machine_is(powernv)) {
- proc_create("powerpc/eeh", 0, NULL, &proc_eeh_operations);
+ proc_create_single("powerpc/eeh", 0, NULL, proc_eeh_show);
#ifdef CONFIG_DEBUG_FS
debugfs_create_file("eeh_enable", 0600,
powerpc_debugfs_root, NULL,
diff --git a/arch/powerpc/kernel/eeh_cache.c b/arch/powerpc/kernel/eeh_cache.c
index d4cc26618809..201943d54a6e 100644
--- a/arch/powerpc/kernel/eeh_cache.c
+++ b/arch/powerpc/kernel/eeh_cache.c
@@ -84,8 +84,7 @@ static inline struct eeh_dev *__eeh_addr_cache_get_device(unsigned long addr)
* @addr: mmio (PIO) phys address or i/o port number
*
* Given an mmio phys address, or a port number, find a pci device
- * that implements this address. Be sure to pci_dev_put the device
- * when finished. I/O port numbers are assumed to be offset
+ * that implements this address. I/O port numbers are assumed to be offset
* from zero (that is, they do *not* have pci_io_addr added in).
* It is safe to call this function within an interrupt.
*/
diff --git a/arch/powerpc/kernel/eeh_driver.c b/arch/powerpc/kernel/eeh_driver.c
index 0c0b66fc5bfb..b8a329f04814 100644
--- a/arch/powerpc/kernel/eeh_driver.c
+++ b/arch/powerpc/kernel/eeh_driver.c
@@ -207,18 +207,18 @@ static void *eeh_report_error(void *data, void *userdata)
if (!dev || eeh_dev_removed(edev) || eeh_pe_passed(edev->pe))
return NULL;
+
+ device_lock(&dev->dev);
dev->error_state = pci_channel_io_frozen;
driver = eeh_pcid_get(dev);
- if (!driver) return NULL;
+ if (!driver) goto out_no_dev;
eeh_disable_irq(dev);
if (!driver->err_handler ||
- !driver->err_handler->error_detected) {
- eeh_pcid_put(dev);
- return NULL;
- }
+ !driver->err_handler->error_detected)
+ goto out;
rc = driver->err_handler->error_detected(dev, pci_channel_io_frozen);
@@ -227,8 +227,12 @@ static void *eeh_report_error(void *data, void *userdata)
if (*res == PCI_ERS_RESULT_NONE) *res = rc;
edev->in_error = true;
- eeh_pcid_put(dev);
pci_uevent_ers(dev, PCI_ERS_RESULT_NONE);
+
+out:
+ eeh_pcid_put(dev);
+out_no_dev:
+ device_unlock(&dev->dev);
return NULL;
}
@@ -251,15 +255,14 @@ static void *eeh_report_mmio_enabled(void *data, void *userdata)
if (!dev || eeh_dev_removed(edev) || eeh_pe_passed(edev->pe))
return NULL;
+ device_lock(&dev->dev);
driver = eeh_pcid_get(dev);
- if (!driver) return NULL;
+ if (!driver) goto out_no_dev;
if (!driver->err_handler ||
!driver->err_handler->mmio_enabled ||
- (edev->mode & EEH_DEV_NO_HANDLER)) {
- eeh_pcid_put(dev);
- return NULL;
- }
+ (edev->mode & EEH_DEV_NO_HANDLER))
+ goto out;
rc = driver->err_handler->mmio_enabled(dev);
@@ -267,7 +270,10 @@ static void *eeh_report_mmio_enabled(void *data, void *userdata)
if (rc == PCI_ERS_RESULT_NEED_RESET) *res = rc;
if (*res == PCI_ERS_RESULT_NONE) *res = rc;
+out:
eeh_pcid_put(dev);
+out_no_dev:
+ device_unlock(&dev->dev);
return NULL;
}
@@ -290,20 +296,20 @@ static void *eeh_report_reset(void *data, void *userdata)
if (!dev || eeh_dev_removed(edev) || eeh_pe_passed(edev->pe))
return NULL;
+
+ device_lock(&dev->dev);
dev->error_state = pci_channel_io_normal;
driver = eeh_pcid_get(dev);
- if (!driver) return NULL;
+ if (!driver) goto out_no_dev;
eeh_enable_irq(dev);
if (!driver->err_handler ||
!driver->err_handler->slot_reset ||
(edev->mode & EEH_DEV_NO_HANDLER) ||
- (!edev->in_error)) {
- eeh_pcid_put(dev);
- return NULL;
- }
+ (!edev->in_error))
+ goto out;
rc = driver->err_handler->slot_reset(dev);
if ((*res == PCI_ERS_RESULT_NONE) ||
@@ -311,7 +317,10 @@ static void *eeh_report_reset(void *data, void *userdata)
if (*res == PCI_ERS_RESULT_DISCONNECT &&
rc == PCI_ERS_RESULT_NEED_RESET) *res = rc;
+out:
eeh_pcid_put(dev);
+out_no_dev:
+ device_unlock(&dev->dev);
return NULL;
}
@@ -362,10 +371,12 @@ static void *eeh_report_resume(void *data, void *userdata)
if (!dev || eeh_dev_removed(edev) || eeh_pe_passed(edev->pe))
return NULL;
+
+ device_lock(&dev->dev);
dev->error_state = pci_channel_io_normal;
driver = eeh_pcid_get(dev);
- if (!driver) return NULL;
+ if (!driver) goto out_no_dev;
was_in_error = edev->in_error;
edev->in_error = false;
@@ -375,18 +386,20 @@ static void *eeh_report_resume(void *data, void *userdata)
!driver->err_handler->resume ||
(edev->mode & EEH_DEV_NO_HANDLER) || !was_in_error) {
edev->mode &= ~EEH_DEV_NO_HANDLER;
- eeh_pcid_put(dev);
- return NULL;
+ goto out;
}
driver->err_handler->resume(dev);
- eeh_pcid_put(dev);
pci_uevent_ers(dev, PCI_ERS_RESULT_RECOVERED);
+out:
+ eeh_pcid_put(dev);
#ifdef CONFIG_PCI_IOV
if (eeh_ops->notify_resume && eeh_dev_to_pdn(edev))
eeh_ops->notify_resume(eeh_dev_to_pdn(edev));
#endif
+out_no_dev:
+ device_unlock(&dev->dev);
return NULL;
}
@@ -406,23 +419,26 @@ static void *eeh_report_failure(void *data, void *userdata)
if (!dev || eeh_dev_removed(edev) || eeh_pe_passed(edev->pe))
return NULL;
+
+ device_lock(&dev->dev);
dev->error_state = pci_channel_io_perm_failure;
driver = eeh_pcid_get(dev);
- if (!driver) return NULL;
+ if (!driver) goto out_no_dev;
eeh_disable_irq(dev);
if (!driver->err_handler ||
- !driver->err_handler->error_detected) {
- eeh_pcid_put(dev);
- return NULL;
- }
+ !driver->err_handler->error_detected)
+ goto out;
driver->err_handler->error_detected(dev, pci_channel_io_perm_failure);
- eeh_pcid_put(dev);
pci_uevent_ers(dev, PCI_ERS_RESULT_DISCONNECT);
+out:
+ eeh_pcid_put(dev);
+out_no_dev:
+ device_unlock(&dev->dev);
return NULL;
}
@@ -619,17 +635,19 @@ int eeh_pe_reset_and_recover(struct eeh_pe *pe)
/**
* eeh_reset_device - Perform actual reset of a pci slot
+ * @driver_eeh_aware: Does the device's driver provide EEH support?
* @pe: EEH PE
* @bus: PCI bus corresponding to the isolcated slot
+ * @rmv_data: Optional, list to record removed devices
*
* This routine must be called to do reset on the indicated PE.
* During the reset, udev might be invoked because those affected
* PCI devices will be removed and then added.
*/
static int eeh_reset_device(struct eeh_pe *pe, struct pci_bus *bus,
- struct eeh_rmv_data *rmv_data)
+ struct eeh_rmv_data *rmv_data,
+ bool driver_eeh_aware)
{
- struct pci_bus *frozen_bus = eeh_pe_bus_get(pe);
time64_t tstamp;
int cnt, rc;
struct eeh_dev *edev;
@@ -645,16 +663,12 @@ static int eeh_reset_device(struct eeh_pe *pe, struct pci_bus *bus,
* into pci_hp_add_devices().
*/
eeh_pe_state_mark(pe, EEH_PE_KEEP);
- if (bus) {
- if (pe->type & EEH_PE_VF) {
- eeh_pe_dev_traverse(pe, eeh_rmv_device, NULL);
- } else {
- pci_lock_rescan_remove();
- pci_hp_remove_devices(bus);
- pci_unlock_rescan_remove();
- }
- } else if (frozen_bus) {
+ if (driver_eeh_aware || (pe->type & EEH_PE_VF)) {
eeh_pe_dev_traverse(pe, eeh_rmv_device, rmv_data);
+ } else {
+ pci_lock_rescan_remove();
+ pci_hp_remove_devices(bus);
+ pci_unlock_rescan_remove();
}
/*
@@ -689,8 +703,9 @@ static int eeh_reset_device(struct eeh_pe *pe, struct pci_bus *bus,
* the device up before the scripts have taken it down,
* potentially weird things happen.
*/
- if (bus) {
- pr_info("EEH: Sleep 5s ahead of complete hotplug\n");
+ if (!driver_eeh_aware || rmv_data->removed) {
+ pr_info("EEH: Sleep 5s ahead of %s hotplug\n",
+ (driver_eeh_aware ? "partial" : "complete"));
ssleep(5);
/*
@@ -703,19 +718,10 @@ static int eeh_reset_device(struct eeh_pe *pe, struct pci_bus *bus,
if (pe->type & EEH_PE_VF) {
eeh_add_virt_device(edev, NULL);
} else {
- eeh_pe_state_clear(pe, EEH_PE_PRI_BUS);
+ if (!driver_eeh_aware)
+ eeh_pe_state_clear(pe, EEH_PE_PRI_BUS);
pci_hp_add_devices(bus);
}
- } else if (frozen_bus && rmv_data->removed) {
- pr_info("EEH: Sleep 5s ahead of partial hotplug\n");
- ssleep(5);
-
- edev = list_first_entry(&pe->edevs, struct eeh_dev, list);
- eeh_pe_traverse(pe, eeh_pe_detach_dev, NULL);
- if (pe->type & EEH_PE_VF)
- eeh_add_virt_device(edev, NULL);
- else
- pci_hp_add_devices(frozen_bus);
}
eeh_pe_state_clear(pe, EEH_PE_KEEP);
@@ -733,28 +739,42 @@ static int eeh_reset_device(struct eeh_pe *pe, struct pci_bus *bus,
/**
* eeh_handle_normal_event - Handle EEH events on a specific PE
- * @pe: EEH PE
+ * @pe: EEH PE - which should not be used after we return, as it may
+ * have been invalidated.
*
* Attempts to recover the given PE. If recovery fails or the PE has failed
* too many times, remove the PE.
*
- * Returns true if @pe should no longer be used, else false.
+ * While PHB detects address or data parity errors on particular PCI
+ * slot, the associated PE will be frozen. Besides, DMA's occurring
+ * to wild addresses (which usually happen due to bugs in device
+ * drivers or in PCI adapter firmware) can cause EEH error. #SERR,
+ * #PERR or other misc PCI-related errors also can trigger EEH errors.
+ *
+ * Recovery process consists of unplugging the device driver (which
+ * generated hotplug events to userspace), then issuing a PCI #RST to
+ * the device, then reconfiguring the PCI config space for all bridges
+ * & devices under this slot, and then finally restarting the device
+ * drivers (which cause a second set of hotplug events to go out to
+ * userspace).
*/
-static bool eeh_handle_normal_event(struct eeh_pe *pe)
+void eeh_handle_normal_event(struct eeh_pe *pe)
{
- struct pci_bus *frozen_bus;
+ struct pci_bus *bus;
struct eeh_dev *edev, *tmp;
int rc = 0;
enum pci_ers_result result = PCI_ERS_RESULT_NONE;
struct eeh_rmv_data rmv_data = {LIST_HEAD_INIT(rmv_data.edev_list), 0};
- frozen_bus = eeh_pe_bus_get(pe);
- if (!frozen_bus) {
+ bus = eeh_pe_bus_get(pe);
+ if (!bus) {
pr_err("%s: Cannot find PCI bus for PHB#%x-PE#%x\n",
__func__, pe->phb->global_number, pe->addr);
- return false;
+ return;
}
+ eeh_pe_state_mark(pe, EEH_PE_RECOVERING);
+
eeh_pe_update_time_stamp(pe);
pe->freeze_count++;
if (pe->freeze_count > eeh_max_freezes) {
@@ -806,7 +826,7 @@ static bool eeh_handle_normal_event(struct eeh_pe *pe)
*/
if (result == PCI_ERS_RESULT_NONE) {
pr_info("EEH: Reset with hotplug activity\n");
- rc = eeh_reset_device(pe, frozen_bus, NULL);
+ rc = eeh_reset_device(pe, bus, NULL, false);
if (rc) {
pr_warn("%s: Unable to reset, err=%d\n",
__func__, rc);
@@ -858,7 +878,7 @@ static bool eeh_handle_normal_event(struct eeh_pe *pe)
/* If any device called out for a reset, then reset the slot */
if (result == PCI_ERS_RESULT_NEED_RESET) {
pr_info("EEH: Reset without hotplug activity\n");
- rc = eeh_reset_device(pe, NULL, &rmv_data);
+ rc = eeh_reset_device(pe, bus, &rmv_data, true);
if (rc) {
pr_warn("%s: Cannot reset, err=%d\n",
__func__, rc);
@@ -891,7 +911,7 @@ static bool eeh_handle_normal_event(struct eeh_pe *pe)
pr_info("EEH: Notify device driver to resume\n");
eeh_pe_dev_traverse(pe, eeh_report_resume, NULL);
- return false;
+ goto final;
hard_fail:
/*
@@ -916,23 +936,21 @@ hard_fail:
* all removed devices correctly to avoid access
* the their PCI config any more.
*/
- if (frozen_bus) {
- if (pe->type & EEH_PE_VF) {
- eeh_pe_dev_traverse(pe, eeh_rmv_device, NULL);
- eeh_pe_dev_mode_mark(pe, EEH_DEV_REMOVED);
- } else {
- eeh_pe_state_clear(pe, EEH_PE_PRI_BUS);
- eeh_pe_dev_mode_mark(pe, EEH_DEV_REMOVED);
-
- pci_lock_rescan_remove();
- pci_hp_remove_devices(frozen_bus);
- pci_unlock_rescan_remove();
+ if (pe->type & EEH_PE_VF) {
+ eeh_pe_dev_traverse(pe, eeh_rmv_device, NULL);
+ eeh_pe_dev_mode_mark(pe, EEH_DEV_REMOVED);
+ } else {
+ eeh_pe_state_clear(pe, EEH_PE_PRI_BUS);
+ eeh_pe_dev_mode_mark(pe, EEH_DEV_REMOVED);
- /* The passed PE should no longer be used */
- return true;
- }
+ pci_lock_rescan_remove();
+ pci_hp_remove_devices(bus);
+ pci_unlock_rescan_remove();
+ /* The passed PE should no longer be used */
+ return;
}
- return false;
+final:
+ eeh_pe_state_clear(pe, EEH_PE_RECOVERING);
}
/**
@@ -942,7 +960,7 @@ hard_fail:
* specific PE. Iterates through possible failures and handles them as
* necessary.
*/
-static void eeh_handle_special_event(void)
+void eeh_handle_special_event(void)
{
struct eeh_pe *pe, *phb_pe;
struct pci_bus *bus;
@@ -1005,15 +1023,7 @@ static void eeh_handle_special_event(void)
*/
if (rc == EEH_NEXT_ERR_FROZEN_PE ||
rc == EEH_NEXT_ERR_FENCED_PHB) {
- /*
- * eeh_handle_normal_event() can make the PE stale if it
- * determines that the PE cannot possibly be recovered.
- * Don't modify the PE state if that's the case.
- */
- if (eeh_handle_normal_event(pe))
- continue;
-
- eeh_pe_state_clear(pe, EEH_PE_RECOVERING);
+ eeh_handle_normal_event(pe);
} else {
pci_lock_rescan_remove();
list_for_each_entry(hose, &hose_list, list_node) {
@@ -1049,28 +1059,3 @@ static void eeh_handle_special_event(void)
break;
} while (rc != EEH_NEXT_ERR_NONE);
}
-
-/**
- * eeh_handle_event - Reset a PCI device after hard lockup.
- * @pe: EEH PE
- *
- * While PHB detects address or data parity errors on particular PCI
- * slot, the associated PE will be frozen. Besides, DMA's occurring
- * to wild addresses (which usually happen due to bugs in device
- * drivers or in PCI adapter firmware) can cause EEH error. #SERR,
- * #PERR or other misc PCI-related errors also can trigger EEH errors.
- *
- * Recovery process consists of unplugging the device driver (which
- * generated hotplug events to userspace), then issuing a PCI #RST to
- * the device, then reconfiguring the PCI config space for all bridges
- * & devices under this slot, and then finally restarting the device
- * drivers (which cause a second set of hotplug events to go out to
- * userspace).
- */
-void eeh_handle_event(struct eeh_pe *pe)
-{
- if (pe)
- eeh_handle_normal_event(pe);
- else
- eeh_handle_special_event();
-}
diff --git a/arch/powerpc/kernel/eeh_event.c b/arch/powerpc/kernel/eeh_event.c
index accbf8b5fd46..61c9356bf9c9 100644
--- a/arch/powerpc/kernel/eeh_event.c
+++ b/arch/powerpc/kernel/eeh_event.c
@@ -73,7 +73,6 @@ static int eeh_event_handler(void * dummy)
/* We might have event without binding PE */
pe = event->pe;
if (pe) {
- eeh_pe_state_mark(pe, EEH_PE_RECOVERING);
if (pe->type & EEH_PE_PHB)
pr_info("EEH: Detected error on PHB#%x\n",
pe->phb->global_number);
@@ -81,10 +80,9 @@ static int eeh_event_handler(void * dummy)
pr_info("EEH: Detected PCI bus error on "
"PHB#%x-PE#%x\n",
pe->phb->global_number, pe->addr);
- eeh_handle_event(pe);
- eeh_pe_state_clear(pe, EEH_PE_RECOVERING);
+ eeh_handle_normal_event(pe);
} else {
- eeh_handle_event(NULL);
+ eeh_handle_special_event();
}
kfree(event);
diff --git a/arch/powerpc/kernel/eeh_pe.c b/arch/powerpc/kernel/eeh_pe.c
index 2d4956e97aa9..ee5a67d57aab 100644
--- a/arch/powerpc/kernel/eeh_pe.c
+++ b/arch/powerpc/kernel/eeh_pe.c
@@ -807,7 +807,8 @@ static void eeh_restore_bridge_bars(struct eeh_dev *edev)
eeh_ops->write_config(pdn, 15*4, 4, edev->config_space[15]);
/* PCI Command: 0x4 */
- eeh_ops->write_config(pdn, PCI_COMMAND, 4, edev->config_space[1]);
+ eeh_ops->write_config(pdn, PCI_COMMAND, 4, edev->config_space[1] |
+ PCI_COMMAND_MEMORY | PCI_COMMAND_MASTER);
/* Check the PCIe link is ready */
eeh_bridge_check_link(edev);
diff --git a/arch/powerpc/kernel/entry_64.S b/arch/powerpc/kernel/entry_64.S
index 2cb5109a7ea3..51695608c68b 100644
--- a/arch/powerpc/kernel/entry_64.S
+++ b/arch/powerpc/kernel/entry_64.S
@@ -545,7 +545,7 @@ _GLOBAL(_switch)
/* Cancel all explict user streams as they will have no use after context
* switch and will stop the HW from creating streams itself
*/
- DCBT_STOP_ALL_STREAM_IDS(r6)
+ DCBT_BOOK3S_STOP_ALL_STREAM_IDS(r6)
#endif
addi r6,r4,-THREAD /* Convert THREAD to 'current' */
diff --git a/arch/powerpc/kernel/exceptions-64s.S b/arch/powerpc/kernel/exceptions-64s.S
index 1ecfd8ffb098..f283958129f2 100644
--- a/arch/powerpc/kernel/exceptions-64s.S
+++ b/arch/powerpc/kernel/exceptions-64s.S
@@ -139,6 +139,21 @@ EXC_COMMON_BEGIN(system_reset_idle_common)
b pnv_powersave_wakeup
#endif
+/*
+ * Set IRQS_ALL_DISABLED unconditionally so arch_irqs_disabled does
+ * the right thing. We do not want to reconcile because that goes
+ * through irq tracing which we don't want in NMI.
+ *
+ * Save PACAIRQHAPPENED because some code will do a hard disable
+ * (e.g., xmon). So we want to restore this back to where it was
+ * when we return. DAR is unused in the stack, so save it there.
+ */
+#define ADD_RECONCILE_NMI \
+ li r10,IRQS_ALL_DISABLED; \
+ stb r10,PACAIRQSOFTMASK(r13); \
+ lbz r10,PACAIRQHAPPENED(r13); \
+ std r10,_DAR(r1)
+
EXC_COMMON_BEGIN(system_reset_common)
/*
* Increment paca->in_nmi then enable MSR_RI. SLB or MCE will be able
@@ -157,16 +172,56 @@ EXC_COMMON_BEGIN(system_reset_common)
subi r1,r1,INT_FRAME_SIZE
EXCEPTION_COMMON_NORET_STACK(PACA_EXNMI, 0x100,
system_reset, system_reset_exception,
- ADD_NVGPRS;ADD_RECONCILE)
+ ADD_NVGPRS;ADD_RECONCILE_NMI)
+
+ /* This (and MCE) can be simplified with mtmsrd L=1 */
+ /* Clear MSR_RI before setting SRR0 and SRR1. */
+ li r0,MSR_RI
+ mfmsr r9
+ andc r9,r9,r0
+ mtmsrd r9,1
/*
- * The stack is no longer in use, decrement in_nmi.
+ * MSR_RI is clear, now we can decrement paca->in_nmi.
*/
lhz r10,PACA_IN_NMI(r13)
subi r10,r10,1
sth r10,PACA_IN_NMI(r13)
- b ret_from_except
+ /*
+ * Restore soft mask settings.
+ */
+ ld r10,_DAR(r1)
+ stb r10,PACAIRQHAPPENED(r13)
+ ld r10,SOFTE(r1)
+ stb r10,PACAIRQSOFTMASK(r13)
+
+ /*
+ * Keep below code in synch with MACHINE_CHECK_HANDLER_WINDUP.
+ * Should share common bits...
+ */
+
+ /* Move original SRR0 and SRR1 into the respective regs */
+ ld r9,_MSR(r1)
+ mtspr SPRN_SRR1,r9
+ ld r3,_NIP(r1)
+ mtspr SPRN_SRR0,r3
+ ld r9,_CTR(r1)
+ mtctr r9
+ ld r9,_XER(r1)
+ mtxer r9
+ ld r9,_LINK(r1)
+ mtlr r9
+ REST_GPR(0, r1)
+ REST_8GPRS(2, r1)
+ REST_GPR(10, r1)
+ ld r11,_CCR(r1)
+ mtcr r11
+ REST_GPR(11, r1)
+ REST_2GPRS(12, r1)
+ /* restore original r1. */
+ ld r1,GPR1(r1)
+ RFI_TO_USER_OR_KERNEL
#ifdef CONFIG_PPC_PSERIES
/*
@@ -621,7 +676,10 @@ END_MMU_FTR_SECTION_IFCLR(MMU_FTR_TYPE_RADIX)
lwz r9,PACA_EXSLB+EX_CCR(r13) /* get saved CR */
mtlr r10
- beq- 8f /* if bad address, make full stack frame */
+ /*
+ * Large address, check whether we have to allocate new contexts.
+ */
+ beq- 8f
bne- cr5,2f /* if unrecoverable exception, oops */
@@ -629,14 +687,11 @@ END_MMU_FTR_SECTION_IFCLR(MMU_FTR_TYPE_RADIX)
bne cr4,1f /* returning to kernel */
-.machine push
-.machine "power4"
mtcrf 0x80,r9
mtcrf 0x08,r9 /* MSR[PR] indication is in cr4 */
mtcrf 0x04,r9 /* MSR[RI] indication is in cr5 */
mtcrf 0x02,r9 /* I/D indication is in cr6 */
mtcrf 0x01,r9 /* slb_allocate uses cr0 and cr7 */
-.machine pop
RESTORE_CTR(r9, PACA_EXSLB)
RESTORE_PPR_PACA(PACA_EXSLB, r9)
@@ -649,14 +704,11 @@ END_MMU_FTR_SECTION_IFCLR(MMU_FTR_TYPE_RADIX)
RFI_TO_USER
b . /* prevent speculative execution */
1:
-.machine push
-.machine "power4"
mtcrf 0x80,r9
mtcrf 0x08,r9 /* MSR[PR] indication is in cr4 */
mtcrf 0x04,r9 /* MSR[RI] indication is in cr5 */
mtcrf 0x02,r9 /* I/D indication is in cr6 */
mtcrf 0x01,r9 /* slb_allocate uses cr0 and cr7 */
-.machine pop
RESTORE_CTR(r9, PACA_EXSLB)
RESTORE_PPR_PACA(PACA_EXSLB, r9)
@@ -685,7 +737,7 @@ END_MMU_FTR_SECTION_IFCLR(MMU_FTR_TYPE_RADIX)
mr r3,r12
mfspr r11,SPRN_SRR0
mfspr r12,SPRN_SRR1
- LOAD_HANDLER(r10,bad_addr_slb)
+ LOAD_HANDLER(r10, large_addr_slb)
mtspr SPRN_SRR0,r10
ld r10,PACAKMSR(r13)
mtspr SPRN_SRR1,r10
@@ -700,7 +752,7 @@ EXC_COMMON_BEGIN(unrecov_slb)
bl unrecoverable_exception
b 1b
-EXC_COMMON_BEGIN(bad_addr_slb)
+EXC_COMMON_BEGIN(large_addr_slb)
EXCEPTION_PROLOG_COMMON(0x380, PACA_EXSLB)
RECONCILE_IRQ_STATE(r10, r11)
ld r3, PACA_EXSLB+EX_DAR(r13)
@@ -710,7 +762,7 @@ EXC_COMMON_BEGIN(bad_addr_slb)
std r10, _TRAP(r1)
2: bl save_nvgprs
addi r3, r1, STACK_FRAME_OVERHEAD
- bl slb_miss_bad_addr
+ bl slb_miss_large_addr
b ret_from_except
EXC_REAL_BEGIN(hardware_interrupt, 0x500, 0x100)
@@ -833,7 +885,7 @@ END_FTR_SECTION_IFSET(CPU_FTR_TM)
#endif
-EXC_REAL_MASKABLE(decrementer, 0x900, 0x80, IRQS_DISABLED)
+EXC_REAL_OOL_MASKABLE(decrementer, 0x900, 0x80, IRQS_DISABLED)
EXC_VIRT_MASKABLE(decrementer, 0x4900, 0x80, 0x900, IRQS_DISABLED)
TRAMP_KVM(PACA_EXGEN, 0x900)
EXC_COMMON_ASYNC(decrementer_common, 0x900, timer_interrupt)
@@ -909,6 +961,7 @@ EXC_COMMON(trap_0b_common, 0xb00, unknown_exception)
mtctr r13; \
GET_PACA(r13); \
std r10,PACA_EXGEN+EX_R10(r13); \
+ INTERRUPT_TO_KERNEL; \
KVMTEST_PR(0xc00); /* uses r10, branch to do_kvm_0xc00_system_call */ \
HMT_MEDIUM; \
mfctr r9;
@@ -917,7 +970,8 @@ EXC_COMMON(trap_0b_common, 0xb00, unknown_exception)
#define SYSCALL_KVMTEST \
HMT_MEDIUM; \
mr r9,r13; \
- GET_PACA(r13);
+ GET_PACA(r13); \
+ INTERRUPT_TO_KERNEL;
#endif
#define LOAD_SYSCALL_HANDLER(reg) \
@@ -1273,7 +1327,7 @@ EXC_REAL_BEGIN(denorm_exception_hv, 0x1500, 0x100)
bne+ denorm_assist
#endif
- KVMTEST_PR(0x1500)
+ KVMTEST_HV(0x1500)
EXCEPTION_PROLOG_PSERIES_1(denorm_common, EXC_HV)
EXC_REAL_END(denorm_exception_hv, 0x1500, 0x100)
@@ -1285,7 +1339,7 @@ EXC_VIRT_END(denorm_exception, 0x5500, 0x100)
EXC_VIRT_NONE(0x5500, 0x100)
#endif
-TRAMP_KVM_SKIP(PACA_EXGEN, 0x1500)
+TRAMP_KVM_HV(PACA_EXGEN, 0x1500)
#ifdef CONFIG_PPC_DENORMALISATION
TRAMP_REAL_BEGIN(denorm_assist)
@@ -1455,6 +1509,19 @@ masked_##_H##interrupt: \
b .; \
MASKED_DEC_HANDLER(_H)
+TRAMP_REAL_BEGIN(stf_barrier_fallback)
+ std r9,PACA_EXRFI+EX_R9(r13)
+ std r10,PACA_EXRFI+EX_R10(r13)
+ sync
+ ld r9,PACA_EXRFI+EX_R9(r13)
+ ld r10,PACA_EXRFI+EX_R10(r13)
+ ori 31,31,0
+ .rept 14
+ b 1f
+1:
+ .endr
+ blr
+
TRAMP_REAL_BEGIN(rfi_flush_fallback)
SET_SCRATCH0(r13);
GET_PACA(r13);
@@ -1466,7 +1533,7 @@ TRAMP_REAL_BEGIN(rfi_flush_fallback)
ld r11,PACA_L1D_FLUSH_SIZE(r13)
srdi r11,r11,(7 + 3) /* 128 byte lines, unrolled 8x */
mtctr r11
- DCBT_STOP_ALL_STREAM_IDS(r11) /* Stop prefetch streams */
+ DCBT_BOOK3S_STOP_ALL_STREAM_IDS(r11) /* Stop prefetch streams */
/* order ld/st prior to dcbt stop all streams with flushing */
sync
@@ -1506,7 +1573,7 @@ TRAMP_REAL_BEGIN(hrfi_flush_fallback)
ld r11,PACA_L1D_FLUSH_SIZE(r13)
srdi r11,r11,(7 + 3) /* 128 byte lines, unrolled 8x */
mtctr r11
- DCBT_STOP_ALL_STREAM_IDS(r11) /* Stop prefetch streams */
+ DCBT_BOOK3S_STOP_ALL_STREAM_IDS(r11) /* Stop prefetch streams */
/* order ld/st prior to dcbt stop all streams with flushing */
sync
diff --git a/arch/powerpc/kernel/head_64.S b/arch/powerpc/kernel/head_64.S
index a61151a6ea5e..6eca15f25c73 100644
--- a/arch/powerpc/kernel/head_64.S
+++ b/arch/powerpc/kernel/head_64.S
@@ -392,19 +392,20 @@ generic_secondary_common_init:
* physical cpu id in r24, we need to search the pacas to find
* which logical id maps to our physical one.
*/
- LOAD_REG_ADDR(r13, paca) /* Load paca pointer */
- ld r13,0(r13) /* Get base vaddr of paca array */
#ifndef CONFIG_SMP
- addi r13,r13,PACA_SIZE /* know r13 if used accidentally */
b kexec_wait /* wait for next kernel if !SMP */
#else
+ LOAD_REG_ADDR(r8, paca_ptrs) /* Load paca_ptrs pointe */
+ ld r8,0(r8) /* Get base vaddr of array */
LOAD_REG_ADDR(r7, nr_cpu_ids) /* Load nr_cpu_ids address */
lwz r7,0(r7) /* also the max paca allocated */
li r5,0 /* logical cpu id */
-1: lhz r6,PACAHWCPUID(r13) /* Load HW procid from paca */
+1:
+ sldi r9,r5,3 /* get paca_ptrs[] index from cpu id */
+ ldx r13,r9,r8 /* r13 = paca_ptrs[cpu id] */
+ lhz r6,PACAHWCPUID(r13) /* Load HW procid from paca */
cmpw r6,r24 /* Compare to our id */
beq 2f
- addi r13,r13,PACA_SIZE /* Loop to next PACA on miss */
addi r5,r5,1
cmpw r5,r7 /* Check if more pacas exist */
blt 1b
@@ -756,10 +757,10 @@ _GLOBAL(pmac_secondary_start)
mtmsrd r3 /* RI on */
/* Set up a paca value for this processor. */
- LOAD_REG_ADDR(r4,paca) /* Load paca pointer */
- ld r4,0(r4) /* Get base vaddr of paca array */
- mulli r13,r24,PACA_SIZE /* Calculate vaddr of right paca */
- add r13,r13,r4 /* for this processor. */
+ LOAD_REG_ADDR(r4,paca_ptrs) /* Load paca pointer */
+ ld r4,0(r4) /* Get base vaddr of paca_ptrs array */
+ sldi r5,r24,3 /* get paca_ptrs[] index from cpu id */
+ ldx r13,r5,r4 /* r13 = paca_ptrs[cpu id] */
SET_PACA(r13) /* Save vaddr of paca in an SPRG*/
/* Mark interrupts soft and hard disabled (they might be enabled
diff --git a/arch/powerpc/kernel/hw_breakpoint.c b/arch/powerpc/kernel/hw_breakpoint.c
index 53b9c1dfd7d9..4c1012b80d3b 100644
--- a/arch/powerpc/kernel/hw_breakpoint.c
+++ b/arch/powerpc/kernel/hw_breakpoint.c
@@ -33,6 +33,7 @@
#include <asm/hw_breakpoint.h>
#include <asm/processor.h>
#include <asm/sstep.h>
+#include <asm/debug.h>
#include <linux/uaccess.h>
/*
@@ -171,6 +172,8 @@ int arch_validate_hwbkpt_settings(struct perf_event *bp)
* HW_BREAKPOINT_ALIGN by rounding off to the lower address, the
* 'symbolsize' should satisfy the check below.
*/
+ if (!ppc_breakpoint_available())
+ return -ENODEV;
length_max = 8; /* DABR */
if (cpu_has_feature(CPU_FTR_DAWR)) {
length_max = 512 ; /* 64 doublewords */
diff --git a/arch/powerpc/kernel/idle_book3s.S b/arch/powerpc/kernel/idle_book3s.S
index 01e1c1997893..e734f6e45abc 100644
--- a/arch/powerpc/kernel/idle_book3s.S
+++ b/arch/powerpc/kernel/idle_book3s.S
@@ -325,12 +325,6 @@ enter_winkle:
* r3 - PSSCR value corresponding to the requested stop state.
*/
power_enter_stop:
-#ifdef CONFIG_KVM_BOOK3S_HV_POSSIBLE
- /* Tell KVM we're entering idle */
- li r4,KVM_HWTHREAD_IN_IDLE
- /* DO THIS IN REAL MODE! See comment above. */
- stb r4,HSTATE_HWTHREAD_STATE(r13)
-#endif
/*
* Check if we are executing the lite variant with ESL=EC=0
*/
@@ -339,6 +333,7 @@ power_enter_stop:
bne .Lhandle_esl_ec_set
PPC_STOP
li r3,0 /* Since we didn't lose state, return 0 */
+ std r3, PACA_REQ_PSSCR(r13)
/*
* pnv_wakeup_noloss() expects r12 to contain the SRR1 value so
@@ -427,13 +422,49 @@ ALT_FTR_SECTION_END_NESTED_IFSET(CPU_FTR_ARCH_207S, 66); \
/*
* Entered with MSR[EE]=0 and no soft-masked interrupts pending.
* r3 contains desired PSSCR register value.
+ *
+ * Offline (CPU unplug) case also must notify KVM that the CPU is
+ * idle.
*/
+_GLOBAL(power9_offline_stop)
+#ifdef CONFIG_KVM_BOOK3S_HV_POSSIBLE
+ /*
+ * Tell KVM we're entering idle.
+ * This does not have to be done in real mode because the P9 MMU
+ * is independent per-thread. Some steppings share radix/hash mode
+ * between threads, but in that case KVM has a barrier sync in real
+ * mode before and after switching between radix and hash.
+ */
+ li r4,KVM_HWTHREAD_IN_IDLE
+ stb r4,HSTATE_HWTHREAD_STATE(r13)
+#endif
+ /* fall through */
+
_GLOBAL(power9_idle_stop)
std r3, PACA_REQ_PSSCR(r13)
+#ifdef CONFIG_KVM_BOOK3S_HV_POSSIBLE
+BEGIN_FTR_SECTION
+ sync
+ lwz r5, PACA_DONT_STOP(r13)
+ cmpwi r5, 0
+ bne 1f
+END_FTR_SECTION_IFSET(CPU_FTR_P9_TM_XER_SO_BUG)
+#endif
mtspr SPRN_PSSCR,r3
LOAD_REG_ADDR(r4,power_enter_stop)
b pnv_powersave_common
/* No return */
+#ifdef CONFIG_KVM_BOOK3S_HV_POSSIBLE
+1:
+ /*
+ * We get here when TM / thread reconfiguration bug workaround
+ * code wants to get the CPU into SMT4 mode, and therefore
+ * we are being asked not to stop.
+ */
+ li r3, 0
+ std r3, PACA_REQ_PSSCR(r13)
+ blr /* return 0 for wakeup cause / SRR1 value */
+#endif
/*
* On waking up from stop 0,1,2 with ESL=1 on POWER9 DD1,
@@ -520,11 +551,14 @@ ALT_FTR_SECTION_END_IFSET(CPU_FTR_ARCH_300)
mr r3,r12
#ifdef CONFIG_KVM_BOOK3S_HV_POSSIBLE
+ lbz r0,HSTATE_HWTHREAD_STATE(r13)
+ cmpwi r0,KVM_HWTHREAD_IN_KERNEL
+ beq 0f
li r0,KVM_HWTHREAD_IN_KERNEL
stb r0,HSTATE_HWTHREAD_STATE(r13)
/* Order setting hwthread_state vs. testing hwthread_req */
sync
- lbz r0,HSTATE_HWTHREAD_REQ(r13)
+0: lbz r0,HSTATE_HWTHREAD_REQ(r13)
cmpwi r0,0
beq 1f
b kvm_start_guest
@@ -584,6 +618,8 @@ FTR_SECTION_ELSE_NESTED(71)
mfspr r5, SPRN_PSSCR
rldicl r5,r5,4,60
ALT_FTR_SECTION_END_NESTED_IFSET(CPU_FTR_POWER9_DD1, 71)
+ li r0, 0 /* clear requested_psscr to say we're awake */
+ std r0, PACA_REQ_PSSCR(r13)
cmpd cr4,r5,r4
bge cr4,pnv_wakeup_tb_loss /* returns to caller */
@@ -834,6 +870,8 @@ BEGIN_FTR_SECTION
mtspr SPRN_PTCR,r4
ld r4,_RPR(r1)
mtspr SPRN_RPR,r4
+ ld r4,_AMOR(r1)
+ mtspr SPRN_AMOR,r4
END_FTR_SECTION_IFSET(CPU_FTR_ARCH_300)
ld r4,_TSCR(r1)
diff --git a/arch/powerpc/kernel/iomap.c b/arch/powerpc/kernel/iomap.c
index aab456ed2a00..5ac84efc6ede 100644
--- a/arch/powerpc/kernel/iomap.c
+++ b/arch/powerpc/kernel/iomap.c
@@ -45,12 +45,32 @@ u64 ioread64(void __iomem *addr)
{
return readq(addr);
}
+u64 ioread64_lo_hi(void __iomem *addr)
+{
+ return readq(addr);
+}
+u64 ioread64_hi_lo(void __iomem *addr)
+{
+ return readq(addr);
+}
u64 ioread64be(void __iomem *addr)
{
return readq_be(addr);
}
+u64 ioread64be_lo_hi(void __iomem *addr)
+{
+ return readq_be(addr);
+}
+u64 ioread64be_hi_lo(void __iomem *addr)
+{
+ return readq_be(addr);
+}
EXPORT_SYMBOL(ioread64);
+EXPORT_SYMBOL(ioread64_lo_hi);
+EXPORT_SYMBOL(ioread64_hi_lo);
EXPORT_SYMBOL(ioread64be);
+EXPORT_SYMBOL(ioread64be_lo_hi);
+EXPORT_SYMBOL(ioread64be_hi_lo);
#endif /* __powerpc64__ */
void iowrite8(u8 val, void __iomem *addr)
@@ -83,12 +103,32 @@ void iowrite64(u64 val, void __iomem *addr)
{
writeq(val, addr);
}
+void iowrite64_lo_hi(u64 val, void __iomem *addr)
+{
+ writeq(val, addr);
+}
+void iowrite64_hi_lo(u64 val, void __iomem *addr)
+{
+ writeq(val, addr);
+}
void iowrite64be(u64 val, void __iomem *addr)
{
writeq_be(val, addr);
}
+void iowrite64be_lo_hi(u64 val, void __iomem *addr)
+{
+ writeq_be(val, addr);
+}
+void iowrite64be_hi_lo(u64 val, void __iomem *addr)
+{
+ writeq_be(val, addr);
+}
EXPORT_SYMBOL(iowrite64);
+EXPORT_SYMBOL(iowrite64_lo_hi);
+EXPORT_SYMBOL(iowrite64_hi_lo);
EXPORT_SYMBOL(iowrite64be);
+EXPORT_SYMBOL(iowrite64be_lo_hi);
+EXPORT_SYMBOL(iowrite64be_hi_lo);
#endif /* __powerpc64__ */
/*
diff --git a/arch/powerpc/kernel/kexec_elf_64.c b/arch/powerpc/kernel/kexec_elf_64.c
index 9a42309b091a..ba4f18a43ee8 100644
--- a/arch/powerpc/kernel/kexec_elf_64.c
+++ b/arch/powerpc/kernel/kexec_elf_64.c
@@ -572,7 +572,7 @@ static void *elf64_load(struct kimage *image, char *kernel_buf,
{
int ret;
unsigned int fdt_size;
- unsigned long kernel_load_addr, purgatory_load_addr;
+ unsigned long kernel_load_addr;
unsigned long initrd_load_addr = 0, fdt_load_addr;
void *fdt;
const void *slave_code;
@@ -580,6 +580,8 @@ static void *elf64_load(struct kimage *image, char *kernel_buf,
struct elf_info elf_info;
struct kexec_buf kbuf = { .image = image, .buf_min = 0,
.buf_max = ppc64_rma_size };
+ struct kexec_buf pbuf = { .image = image, .buf_min = 0,
+ .buf_max = ppc64_rma_size, .top_down = true };
ret = build_elf_exec_info(kernel_buf, kernel_len, &ehdr, &elf_info);
if (ret)
@@ -591,14 +593,13 @@ static void *elf64_load(struct kimage *image, char *kernel_buf,
pr_debug("Loaded the kernel at 0x%lx\n", kernel_load_addr);
- ret = kexec_load_purgatory(image, 0, ppc64_rma_size, true,
- &purgatory_load_addr);
+ ret = kexec_load_purgatory(image, &pbuf);
if (ret) {
pr_err("Loading purgatory failed.\n");
goto out;
}
- pr_debug("Loaded purgatory at 0x%lx\n", purgatory_load_addr);
+ pr_debug("Loaded purgatory at 0x%lx\n", pbuf.mem);
if (initrd != NULL) {
kbuf.buffer = initrd;
@@ -657,7 +658,7 @@ out:
return ret ? ERR_PTR(ret) : fdt;
}
-struct kexec_file_ops kexec_elf64_ops = {
+const struct kexec_file_ops kexec_elf64_ops = {
.probe = elf64_probe,
.load = elf64_load,
};
diff --git a/arch/powerpc/kernel/kprobes.c b/arch/powerpc/kernel/kprobes.c
index ca5d5a081e75..e4c5bf33970b 100644
--- a/arch/powerpc/kernel/kprobes.c
+++ b/arch/powerpc/kernel/kprobes.c
@@ -455,29 +455,33 @@ static int trampoline_probe_handler(struct kprobe *p, struct pt_regs *regs)
}
kretprobe_assert(ri, orig_ret_address, trampoline_address);
- regs->nip = orig_ret_address;
+
/*
- * Make LR point to the orig_ret_address.
- * When the 'nop' inside the kretprobe_trampoline
- * is optimized, we can do a 'blr' after executing the
- * detour buffer code.
+ * We get here through one of two paths:
+ * 1. by taking a trap -> kprobe_handler() -> here
+ * 2. by optprobe branch -> optimized_callback() -> opt_pre_handler() -> here
+ *
+ * When going back through (1), we need regs->nip to be setup properly
+ * as it is used to determine the return address from the trap.
+ * For (2), since nip is not honoured with optprobes, we instead setup
+ * the link register properly so that the subsequent 'blr' in
+ * kretprobe_trampoline jumps back to the right instruction.
+ *
+ * For nip, we should set the address to the previous instruction since
+ * we end up emulating it in kprobe_handler(), which increments the nip
+ * again.
*/
+ regs->nip = orig_ret_address - 4;
regs->link = orig_ret_address;
- reset_current_kprobe();
kretprobe_hash_unlock(current, &flags);
- preempt_enable_no_resched();
hlist_for_each_entry_safe(ri, tmp, &empty_rp, hlist) {
hlist_del(&ri->hlist);
kfree(ri);
}
- /*
- * By returning a non-zero value, we are telling
- * kprobe_handler() that we don't want the post_handler
- * to run (and have re-enabled preemption)
- */
- return 1;
+
+ return 0;
}
NOKPROBE_SYMBOL(trampoline_probe_handler);
diff --git a/arch/powerpc/kernel/machine_kexec_64.c b/arch/powerpc/kernel/machine_kexec_64.c
index 49d34d7271e7..1044bf15d5ed 100644
--- a/arch/powerpc/kernel/machine_kexec_64.c
+++ b/arch/powerpc/kernel/machine_kexec_64.c
@@ -168,24 +168,25 @@ static void kexec_prepare_cpus_wait(int wait_state)
* are correctly onlined. If somehow we start a CPU on boot with RTAS
* start-cpu, but somehow that CPU doesn't write callin_cpu_map[] in
* time, the boot CPU will timeout. If it does eventually execute
- * stuff, the secondary will start up (paca[].cpu_start was written) and
- * get into a peculiar state. If the platform supports
- * smp_ops->take_timebase(), the secondary CPU will probably be spinning
- * in there. If not (i.e. pseries), the secondary will continue on and
- * try to online itself/idle/etc. If it survives that, we need to find
- * these possible-but-not-online-but-should-be CPUs and chaperone them
- * into kexec_smp_wait().
+ * stuff, the secondary will start up (paca_ptrs[]->cpu_start was
+ * written) and get into a peculiar state.
+ * If the platform supports smp_ops->take_timebase(), the secondary CPU
+ * will probably be spinning in there. If not (i.e. pseries), the
+ * secondary will continue on and try to online itself/idle/etc. If it
+ * survives that, we need to find these
+ * possible-but-not-online-but-should-be CPUs and chaperone them into
+ * kexec_smp_wait().
*/
for_each_online_cpu(i) {
if (i == my_cpu)
continue;
- while (paca[i].kexec_state < wait_state) {
+ while (paca_ptrs[i]->kexec_state < wait_state) {
barrier();
if (i != notified) {
printk(KERN_INFO "kexec: waiting for cpu %d "
"(physical %d) to enter %i state\n",
- i, paca[i].hw_cpu_id, wait_state);
+ i, paca_ptrs[i]->hw_cpu_id, wait_state);
notified = i;
}
}
@@ -322,18 +323,24 @@ void default_machine_kexec(struct kimage *image)
kexec_stack.thread_info.cpu = current_thread_info()->cpu;
/* We need a static PACA, too; copy this CPU's PACA over and switch to
- * it. Also poison per_cpu_offset to catch anyone using non-static
- * data.
+ * it. Also poison per_cpu_offset and NULL lppaca to catch anyone using
+ * non-static data.
*/
memcpy(&kexec_paca, get_paca(), sizeof(struct paca_struct));
kexec_paca.data_offset = 0xedeaddeadeeeeeeeUL;
- paca = (struct paca_struct *)RELOC_HIDE(&kexec_paca, 0) -
- kexec_paca.paca_index;
+#ifdef CONFIG_PPC_PSERIES
+ kexec_paca.lppaca_ptr = NULL;
+#endif
+ paca_ptrs[kexec_paca.paca_index] = &kexec_paca;
+
setup_paca(&kexec_paca);
- /* XXX: If anyone does 'dynamic lppacas' this will also need to be
- * switched to a static version!
+ /*
+ * The lppaca should be unregistered at this point so the HV won't
+ * touch it. In the case of a crash, none of the lppacas are
+ * unregistered so there is not much we can do about it here.
*/
+
/*
* On Book3S, the copy must happen with the MMU off if we are either
* using Radix page tables or we are not in an LPAR since we can
diff --git a/arch/powerpc/kernel/machine_kexec_file_64.c b/arch/powerpc/kernel/machine_kexec_file_64.c
index e4395f937d63..0bd23dc789a4 100644
--- a/arch/powerpc/kernel/machine_kexec_file_64.c
+++ b/arch/powerpc/kernel/machine_kexec_file_64.c
@@ -31,52 +31,19 @@
#define SLAVE_CODE_SIZE 256
-static struct kexec_file_ops *kexec_file_loaders[] = {
+const struct kexec_file_ops * const kexec_file_loaders[] = {
&kexec_elf64_ops,
+ NULL
};
int arch_kexec_kernel_image_probe(struct kimage *image, void *buf,
unsigned long buf_len)
{
- int i, ret = -ENOEXEC;
- struct kexec_file_ops *fops;
-
/* We don't support crash kernels yet. */
if (image->type == KEXEC_TYPE_CRASH)
- return -ENOTSUPP;
-
- for (i = 0; i < ARRAY_SIZE(kexec_file_loaders); i++) {
- fops = kexec_file_loaders[i];
- if (!fops || !fops->probe)
- continue;
-
- ret = fops->probe(buf, buf_len);
- if (!ret) {
- image->fops = fops;
- return ret;
- }
- }
-
- return ret;
-}
-
-void *arch_kexec_kernel_image_load(struct kimage *image)
-{
- if (!image->fops || !image->fops->load)
- return ERR_PTR(-ENOEXEC);
-
- return image->fops->load(image, image->kernel_buf,
- image->kernel_buf_len, image->initrd_buf,
- image->initrd_buf_len, image->cmdline_buf,
- image->cmdline_buf_len);
-}
-
-int arch_kimage_file_post_load_cleanup(struct kimage *image)
-{
- if (!image->fops || !image->fops->cleanup)
- return 0;
+ return -EOPNOTSUPP;
- return image->fops->cleanup(image->image_loader_data);
+ return kexec_image_probe_default(image, buf, buf_len);
}
/**
diff --git a/arch/powerpc/kernel/mce_power.c b/arch/powerpc/kernel/mce_power.c
index fe6fc63251fe..38c5b4764bfe 100644
--- a/arch/powerpc/kernel/mce_power.c
+++ b/arch/powerpc/kernel/mce_power.c
@@ -441,7 +441,6 @@ static int mce_handle_ierror(struct pt_regs *regs,
if (pfn != ULONG_MAX) {
*phys_addr =
(pfn << PAGE_SHIFT);
- handled = 1;
}
}
}
@@ -532,9 +531,7 @@ static int mce_handle_derror(struct pt_regs *regs,
* kernel/exception-64s.h
*/
if (get_paca()->in_mce < MAX_MCE_DEPTH)
- if (!mce_find_instr_ea_and_pfn(regs, addr,
- phys_addr))
- handled = 1;
+ mce_find_instr_ea_and_pfn(regs, addr, phys_addr);
}
found = 1;
}
@@ -572,7 +569,7 @@ static long mce_handle_error(struct pt_regs *regs,
const struct mce_ierror_table itable[])
{
struct mce_error_info mce_err = { 0 };
- uint64_t addr, phys_addr;
+ uint64_t addr, phys_addr = ULONG_MAX;
uint64_t srr1 = regs->msr;
long handled;
diff --git a/arch/powerpc/kernel/misc_64.S b/arch/powerpc/kernel/misc_64.S
index 3280953a82cf..fa267e94090a 100644
--- a/arch/powerpc/kernel/misc_64.S
+++ b/arch/powerpc/kernel/misc_64.S
@@ -144,44 +144,6 @@ _GLOBAL_TOC(flush_dcache_range)
blr
EXPORT_SYMBOL(flush_dcache_range)
-/*
- * Like above, but works on non-mapped physical addresses.
- * Use only for non-LPAR setups ! It also assumes real mode
- * is cacheable. Used for flushing out the DART before using
- * it as uncacheable memory
- *
- * flush_dcache_phys_range(unsigned long start, unsigned long stop)
- *
- * flush all bytes from start to stop-1 inclusive
- */
-_GLOBAL(flush_dcache_phys_range)
- ld r10,PPC64_CACHES@toc(r2)
- lwz r7,DCACHEL1BLOCKSIZE(r10) /* Get dcache block size */
- addi r5,r7,-1
- andc r6,r3,r5 /* round low to line bdy */
- subf r8,r6,r4 /* compute length */
- add r8,r8,r5 /* ensure we get enough */
- lwz r9,DCACHEL1LOGBLOCKSIZE(r10) /* Get log-2 of dcache block size */
- srw. r8,r8,r9 /* compute line count */
- beqlr /* nothing to do? */
- mfmsr r5 /* Disable MMU Data Relocation */
- ori r0,r5,MSR_DR
- xori r0,r0,MSR_DR
- sync
- mtmsr r0
- sync
- isync
- mtctr r8
-0: dcbst 0,r6
- add r6,r6,r7
- bdnz 0b
- sync
- isync
- mtmsr r5 /* Re-enable MMU Data Relocation */
- sync
- isync
- blr
-
_GLOBAL(flush_inval_dcache_range)
ld r10,PPC64_CACHES@toc(r2)
lwz r7,DCACHEL1BLOCKSIZE(r10) /* Get dcache block size */
diff --git a/arch/powerpc/kernel/nvram_64.c b/arch/powerpc/kernel/nvram_64.c
index 496d6393bd41..ba681dac7b46 100644
--- a/arch/powerpc/kernel/nvram_64.c
+++ b/arch/powerpc/kernel/nvram_64.c
@@ -207,8 +207,7 @@ int nvram_write_os_partition(struct nvram_os_partition *part,
tmp_index = part->index;
- rc = ppc_md.nvram_write((char *)&info, sizeof(struct err_log_info),
- &tmp_index);
+ rc = ppc_md.nvram_write((char *)&info, sizeof(info), &tmp_index);
if (rc <= 0) {
pr_err("%s: Failed nvram_write (%d)\n", __func__, rc);
return rc;
@@ -244,9 +243,7 @@ int nvram_read_partition(struct nvram_os_partition *part, char *buff,
tmp_index = part->index;
if (part->os_partition) {
- rc = ppc_md.nvram_read((char *)&info,
- sizeof(struct err_log_info),
- &tmp_index);
+ rc = ppc_md.nvram_read((char *)&info, sizeof(info), &tmp_index);
if (rc <= 0) {
pr_err("%s: Failed nvram_read (%d)\n", __func__, rc);
return rc;
@@ -1173,7 +1170,7 @@ int __init nvram_scan_partitions(void)
"detected: 0-length partition\n");
goto out;
}
- tmp_part = kmalloc(sizeof(struct nvram_partition), GFP_KERNEL);
+ tmp_part = kmalloc(sizeof(*tmp_part), GFP_KERNEL);
err = -ENOMEM;
if (!tmp_part) {
printk(KERN_ERR "nvram_scan_partitions: kmalloc failed\n");
diff --git a/arch/powerpc/kernel/paca.c b/arch/powerpc/kernel/paca.c
index 95ffedf14885..0ee3e6d50f28 100644
--- a/arch/powerpc/kernel/paca.c
+++ b/arch/powerpc/kernel/paca.c
@@ -20,116 +20,105 @@
#include "setup.h"
-#ifdef CONFIG_PPC_BOOK3S
+#ifndef CONFIG_SMP
+#define boot_cpuid 0
+#endif
+
+static void *__init alloc_paca_data(unsigned long size, unsigned long align,
+ unsigned long limit, int cpu)
+{
+ unsigned long pa;
+ int nid;
+
+ /*
+ * boot_cpuid paca is allocated very early before cpu_to_node is up.
+ * Set bottom-up mode, because the boot CPU should be on node-0,
+ * which will put its paca in the right place.
+ */
+ if (cpu == boot_cpuid) {
+ nid = -1;
+ memblock_set_bottom_up(true);
+ } else {
+ nid = early_cpu_to_node(cpu);
+ }
+
+ pa = memblock_alloc_base_nid(size, align, limit, nid, MEMBLOCK_NONE);
+ if (!pa) {
+ pa = memblock_alloc_base(size, align, limit);
+ if (!pa)
+ panic("cannot allocate paca data");
+ }
+
+ if (cpu == boot_cpuid)
+ memblock_set_bottom_up(false);
+
+ return __va(pa);
+}
+
+#ifdef CONFIG_PPC_PSERIES
/*
- * The structure which the hypervisor knows about - this structure
- * should not cross a page boundary. The vpa_init/register_vpa call
- * is now known to fail if the lppaca structure crosses a page
- * boundary. The lppaca is also used on POWER5 pSeries boxes.
- * The lppaca is 640 bytes long, and cannot readily
- * change since the hypervisor knows its layout, so a 1kB alignment
- * will suffice to ensure that it doesn't cross a page boundary.
+ * See asm/lppaca.h for more detail.
+ *
+ * lppaca structures must must be 1kB in size, L1 cache line aligned,
+ * and not cross 4kB boundary. A 1kB size and 1kB alignment will satisfy
+ * these requirements.
*/
-struct lppaca lppaca[] = {
- [0 ... (NR_LPPACAS-1)] = {
+static inline void init_lppaca(struct lppaca *lppaca)
+{
+ BUILD_BUG_ON(sizeof(struct lppaca) != 640);
+
+ *lppaca = (struct lppaca) {
.desc = cpu_to_be32(0xd397d781), /* "LpPa" */
- .size = cpu_to_be16(sizeof(struct lppaca)),
+ .size = cpu_to_be16(0x400),
.fpregs_in_use = 1,
.slb_count = cpu_to_be16(64),
.vmxregs_in_use = 0,
- .page_ins = 0,
- },
+ .page_ins = 0, };
};
-static struct lppaca *extra_lppacas;
-static long __initdata lppaca_size;
-
-static void __init allocate_lppacas(int nr_cpus, unsigned long limit)
-{
- if (nr_cpus <= NR_LPPACAS)
- return;
-
- lppaca_size = PAGE_ALIGN(sizeof(struct lppaca) *
- (nr_cpus - NR_LPPACAS));
- extra_lppacas = __va(memblock_alloc_base(lppaca_size,
- PAGE_SIZE, limit));
-}
-
-static struct lppaca * __init new_lppaca(int cpu)
+static struct lppaca * __init new_lppaca(int cpu, unsigned long limit)
{
struct lppaca *lp;
+ size_t size = 0x400;
- if (cpu < NR_LPPACAS)
- return &lppaca[cpu];
+ BUILD_BUG_ON(size < sizeof(struct lppaca));
+
+ if (early_cpu_has_feature(CPU_FTR_HVMODE))
+ return NULL;
- lp = extra_lppacas + (cpu - NR_LPPACAS);
- *lp = lppaca[0];
+ lp = alloc_paca_data(size, 0x400, limit, cpu);
+ init_lppaca(lp);
return lp;
}
-
-static void __init free_lppacas(void)
-{
- long new_size = 0, nr;
-
- if (!lppaca_size)
- return;
- nr = num_possible_cpus() - NR_LPPACAS;
- if (nr > 0)
- new_size = PAGE_ALIGN(nr * sizeof(struct lppaca));
- if (new_size >= lppaca_size)
- return;
-
- memblock_free(__pa(extra_lppacas) + new_size, lppaca_size - new_size);
- lppaca_size = new_size;
-}
-
-#else
-
-static inline void allocate_lppacas(int nr_cpus, unsigned long limit) { }
-static inline void free_lppacas(void) { }
-
#endif /* CONFIG_PPC_BOOK3S */
#ifdef CONFIG_PPC_BOOK3S_64
/*
- * 3 persistent SLBs are registered here. The buffer will be zero
+ * 3 persistent SLBs are allocated here. The buffer will be zero
* initially, hence will all be invaild until we actually write them.
*
* If you make the number of persistent SLB entries dynamic, please also
* update PR KVM to flush and restore them accordingly.
*/
-static struct slb_shadow * __initdata slb_shadow;
-
-static void __init allocate_slb_shadows(int nr_cpus, int limit)
-{
- int size = PAGE_ALIGN(sizeof(struct slb_shadow) * nr_cpus);
-
- if (early_radix_enabled())
- return;
-
- slb_shadow = __va(memblock_alloc_base(size, PAGE_SIZE, limit));
- memset(slb_shadow, 0, size);
-}
-
-static struct slb_shadow * __init init_slb_shadow(int cpu)
+static struct slb_shadow * __init new_slb_shadow(int cpu, unsigned long limit)
{
struct slb_shadow *s;
- if (early_radix_enabled())
- return NULL;
-
- s = &slb_shadow[cpu];
+ if (cpu != boot_cpuid) {
+ /*
+ * Boot CPU comes here before early_radix_enabled
+ * is parsed (e.g., for disable_radix). So allocate
+ * always and this will be fixed up in free_unused_pacas.
+ */
+ if (early_radix_enabled())
+ return NULL;
+ }
- /*
- * When we come through here to initialise boot_paca, the slb_shadow
- * buffers are not allocated yet. That's OK, we'll get one later in
- * boot, but make sure we don't corrupt memory at 0.
- */
- if (!slb_shadow)
- return NULL;
+ s = alloc_paca_data(sizeof(*s), L1_CACHE_BYTES, limit, cpu);
+ memset(s, 0, sizeof(*s));
s->persistent = cpu_to_be32(SLB_NUM_BOLTED);
s->buffer_length = cpu_to_be32(sizeof(*s));
@@ -137,10 +126,6 @@ static struct slb_shadow * __init init_slb_shadow(int cpu)
return s;
}
-#else /* !CONFIG_PPC_BOOK3S_64 */
-
-static void __init allocate_slb_shadows(int nr_cpus, int limit) { }
-
#endif /* CONFIG_PPC_BOOK3S_64 */
/* The Paca is an array with one entry per processor. Each contains an
@@ -152,14 +137,15 @@ static void __init allocate_slb_shadows(int nr_cpus, int limit) { }
* processors. The processor VPD array needs one entry per physical
* processor (not thread).
*/
-struct paca_struct *paca;
-EXPORT_SYMBOL(paca);
+struct paca_struct **paca_ptrs __read_mostly;
+EXPORT_SYMBOL(paca_ptrs);
void __init initialise_paca(struct paca_struct *new_paca, int cpu)
{
-#ifdef CONFIG_PPC_BOOK3S
- new_paca->lppaca_ptr = new_lppaca(cpu);
-#else
+#ifdef CONFIG_PPC_PSERIES
+ new_paca->lppaca_ptr = NULL;
+#endif
+#ifdef CONFIG_PPC_BOOK3E
new_paca->kernel_pgd = swapper_pg_dir;
#endif
new_paca->lock_token = 0x8000;
@@ -173,7 +159,7 @@ void __init initialise_paca(struct paca_struct *new_paca, int cpu)
new_paca->__current = &init_task;
new_paca->data_offset = 0xfeeeeeeeeeeeeeeeULL;
#ifdef CONFIG_PPC_BOOK3S_64
- new_paca->slb_shadow_ptr = init_slb_shadow(cpu);
+ new_paca->slb_shadow_ptr = NULL;
#endif
#ifdef CONFIG_PPC_BOOK3E
@@ -203,12 +189,25 @@ void setup_paca(struct paca_struct *new_paca)
}
-static int __initdata paca_size;
+static int __initdata paca_nr_cpu_ids;
+static int __initdata paca_ptrs_size;
+static int __initdata paca_struct_size;
+
+void __init allocate_paca_ptrs(void)
+{
+ paca_nr_cpu_ids = nr_cpu_ids;
+
+ paca_ptrs_size = sizeof(struct paca_struct *) * nr_cpu_ids;
+ paca_ptrs = __va(memblock_alloc(paca_ptrs_size, 0));
+ memset(paca_ptrs, 0x88, paca_ptrs_size);
+}
-void __init allocate_pacas(void)
+void __init allocate_paca(int cpu)
{
u64 limit;
- int cpu;
+ struct paca_struct *paca;
+
+ BUG_ON(cpu >= paca_nr_cpu_ids);
#ifdef CONFIG_PPC_BOOK3S_64
/*
@@ -220,40 +219,44 @@ void __init allocate_pacas(void)
limit = ppc64_rma_size;
#endif
- paca_size = PAGE_ALIGN(sizeof(struct paca_struct) * nr_cpu_ids);
-
- paca = __va(memblock_alloc_base(paca_size, PAGE_SIZE, limit));
- memset(paca, 0, paca_size);
-
- printk(KERN_DEBUG "Allocated %u bytes for %u pacas at %p\n",
- paca_size, nr_cpu_ids, paca);
-
- allocate_lppacas(nr_cpu_ids, limit);
-
- allocate_slb_shadows(nr_cpu_ids, limit);
+ paca = alloc_paca_data(sizeof(struct paca_struct), L1_CACHE_BYTES,
+ limit, cpu);
+ paca_ptrs[cpu] = paca;
+ memset(paca, 0, sizeof(struct paca_struct));
- /* Can't use for_each_*_cpu, as they aren't functional yet */
- for (cpu = 0; cpu < nr_cpu_ids; cpu++)
- initialise_paca(&paca[cpu], cpu);
+ initialise_paca(paca, cpu);
+#ifdef CONFIG_PPC_PSERIES
+ paca->lppaca_ptr = new_lppaca(cpu, limit);
+#endif
+#ifdef CONFIG_PPC_BOOK3S_64
+ paca->slb_shadow_ptr = new_slb_shadow(cpu, limit);
+#endif
+ paca_struct_size += sizeof(struct paca_struct);
}
void __init free_unused_pacas(void)
{
- int new_size;
-
- new_size = PAGE_ALIGN(sizeof(struct paca_struct) * nr_cpu_ids);
+ int new_ptrs_size;
- if (new_size >= paca_size)
- return;
+ new_ptrs_size = sizeof(struct paca_struct *) * nr_cpu_ids;
+ if (new_ptrs_size < paca_ptrs_size)
+ memblock_free(__pa(paca_ptrs) + new_ptrs_size,
+ paca_ptrs_size - new_ptrs_size);
- memblock_free(__pa(paca) + new_size, paca_size - new_size);
+ paca_nr_cpu_ids = nr_cpu_ids;
+ paca_ptrs_size = new_ptrs_size;
- printk(KERN_DEBUG "Freed %u bytes for unused pacas\n",
- paca_size - new_size);
-
- paca_size = new_size;
+#ifdef CONFIG_PPC_BOOK3S_64
+ if (early_radix_enabled()) {
+ /* Ugly fixup, see new_slb_shadow() */
+ memblock_free(__pa(paca_ptrs[boot_cpuid]->slb_shadow_ptr),
+ sizeof(struct slb_shadow));
+ paca_ptrs[boot_cpuid]->slb_shadow_ptr = NULL;
+ }
+#endif
- free_lppacas();
+ printk(KERN_DEBUG "Allocated %u bytes for %u pacas\n",
+ paca_ptrs_size + paca_struct_size, nr_cpu_ids);
}
void copy_mm_to_paca(struct mm_struct *mm)
@@ -265,7 +268,8 @@ void copy_mm_to_paca(struct mm_struct *mm)
#ifdef CONFIG_PPC_MM_SLICES
VM_BUG_ON(!mm->context.slb_addr_limit);
get_paca()->mm_ctx_slb_addr_limit = mm->context.slb_addr_limit;
- get_paca()->mm_ctx_low_slices_psize = context->low_slices_psize;
+ memcpy(&get_paca()->mm_ctx_low_slices_psize,
+ &context->low_slices_psize, sizeof(context->low_slices_psize));
memcpy(&get_paca()->mm_ctx_high_slices_psize,
&context->high_slices_psize, TASK_SLICE_ARRAY_SZ(mm));
#else /* CONFIG_PPC_MM_SLICES */
diff --git a/arch/powerpc/kernel/pci-common.c b/arch/powerpc/kernel/pci-common.c
index 446c79611d56..fe9733ffffaa 100644
--- a/arch/powerpc/kernel/pci-common.c
+++ b/arch/powerpc/kernel/pci-common.c
@@ -410,72 +410,22 @@ static int pci_read_irq_line(struct pci_dev *pci_dev)
}
/*
- * Platform support for /proc/bus/pci/X/Y mmap()s,
- * modelled on the sparc64 implementation by Dave Miller.
+ * Platform support for /proc/bus/pci/X/Y mmap()s.
* -- paulus.
*/
-
-/*
- * Adjust vm_pgoff of VMA such that it is the physical page offset
- * corresponding to the 32-bit pci bus offset for DEV requested by the user.
- *
- * Basically, the user finds the base address for his device which he wishes
- * to mmap. They read the 32-bit value from the config space base register,
- * add whatever PAGE_SIZE multiple offset they wish, and feed this into the
- * offset parameter of mmap on /proc/bus/pci/XXX for that device.
- *
- * Returns negative error code on failure, zero on success.
- */
-static struct resource *__pci_mmap_make_offset(struct pci_dev *dev,
- resource_size_t *offset,
- enum pci_mmap_state mmap_state)
+int pci_iobar_pfn(struct pci_dev *pdev, int bar, struct vm_area_struct *vma)
{
- struct pci_controller *hose = pci_bus_to_host(dev->bus);
- unsigned long io_offset = 0;
- int i, res_bit;
-
- if (hose == NULL)
- return NULL; /* should never happen */
-
- /* If memory, add on the PCI bridge address offset */
- if (mmap_state == pci_mmap_mem) {
-#if 0 /* See comment in pci_resource_to_user() for why this is disabled */
- *offset += hose->pci_mem_offset;
-#endif
- res_bit = IORESOURCE_MEM;
- } else {
- io_offset = (unsigned long)hose->io_base_virt - _IO_BASE;
- *offset += io_offset;
- res_bit = IORESOURCE_IO;
- }
-
- /*
- * Check that the offset requested corresponds to one of the
- * resources of the device.
- */
- for (i = 0; i <= PCI_ROM_RESOURCE; i++) {
- struct resource *rp = &dev->resource[i];
- int flags = rp->flags;
+ struct pci_controller *hose = pci_bus_to_host(pdev->bus);
+ resource_size_t ioaddr = pci_resource_start(pdev, bar);
- /* treat ROM as memory (should be already) */
- if (i == PCI_ROM_RESOURCE)
- flags |= IORESOURCE_MEM;
-
- /* Active and same type? */
- if ((flags & res_bit) == 0)
- continue;
-
- /* In the range of this resource? */
- if (*offset < (rp->start & PAGE_MASK) || *offset > rp->end)
- continue;
+ if (!hose)
+ return -EINVAL;
- /* found it! construct the final physical address */
- if (mmap_state == pci_mmap_io)
- *offset += hose->io_base_phys - io_offset;
- return rp;
- }
+ /* Convert to an offset within this PCI controller */
+ ioaddr -= (unsigned long)hose->io_base_virt - _IO_BASE;
- return NULL;
+ vma->vm_pgoff += (ioaddr + hose->io_base_phys) >> PAGE_SHIFT;
+ return 0;
}
/*
@@ -527,42 +477,6 @@ pgprot_t pci_phys_mem_access_prot(struct file *file,
return prot;
}
-
-/*
- * Perform the actual remap of the pages for a PCI device mapping, as
- * appropriate for this architecture. The region in the process to map
- * is described by vm_start and vm_end members of VMA, the base physical
- * address is found in vm_pgoff.
- * The pci device structure is provided so that architectures may make mapping
- * decisions on a per-device or per-bus basis.
- *
- * Returns a negative error code on failure, zero on success.
- */
-int pci_mmap_page_range(struct pci_dev *dev, int bar,
- struct vm_area_struct *vma,
- enum pci_mmap_state mmap_state, int write_combine)
-{
- resource_size_t offset =
- ((resource_size_t)vma->vm_pgoff) << PAGE_SHIFT;
- struct resource *rp;
- int ret;
-
- rp = __pci_mmap_make_offset(dev, &offset, mmap_state);
- if (rp == NULL)
- return -EINVAL;
-
- vma->vm_pgoff = offset >> PAGE_SHIFT;
- if (write_combine)
- vma->vm_page_prot = pgprot_noncached_wc(vma->vm_page_prot);
- else
- vma->vm_page_prot = pgprot_noncached(vma->vm_page_prot);
-
- ret = remap_pfn_range(vma, vma->vm_start, vma->vm_pgoff,
- vma->vm_end - vma->vm_start, vma->vm_page_prot);
-
- return ret;
-}
-
/* This provides legacy IO read access on a bus */
int pci_legacy_read(struct pci_bus *bus, loff_t port, u32 *val, size_t size)
{
diff --git a/arch/powerpc/kernel/process.c b/arch/powerpc/kernel/process.c
index 1738c4127b32..26ea9793d290 100644
--- a/arch/powerpc/kernel/process.c
+++ b/arch/powerpc/kernel/process.c
@@ -173,7 +173,7 @@ void __msr_check_and_clear(unsigned long bits)
EXPORT_SYMBOL(__msr_check_and_clear);
#ifdef CONFIG_PPC_FPU
-void __giveup_fpu(struct task_struct *tsk)
+static void __giveup_fpu(struct task_struct *tsk)
{
unsigned long msr;
@@ -556,7 +556,7 @@ void restore_math(struct pt_regs *regs)
regs->msr = msr;
}
-void save_all(struct task_struct *tsk)
+static void save_all(struct task_struct *tsk)
{
unsigned long usermsr;
@@ -632,6 +632,7 @@ void do_break (struct pt_regs *regs, unsigned long address,
hw_breakpoint_disable();
/* Deliver the signal to userspace */
+ clear_siginfo(&info);
info.si_signo = SIGTRAP;
info.si_errno = 0;
info.si_code = TRAP_HWBKPT;
@@ -718,7 +719,8 @@ static void set_debug_reg_defaults(struct thread_struct *thread)
{
thread->hw_brk.address = 0;
thread->hw_brk.type = 0;
- set_breakpoint(&thread->hw_brk);
+ if (ppc_breakpoint_available())
+ set_breakpoint(&thread->hw_brk);
}
#endif /* !CONFIG_HAVE_HW_BREAKPOINT */
#endif /* CONFIG_PPC_ADV_DEBUG_REGS */
@@ -815,9 +817,14 @@ void __set_breakpoint(struct arch_hw_breakpoint *brk)
memcpy(this_cpu_ptr(&current_brk), brk, sizeof(*brk));
if (cpu_has_feature(CPU_FTR_DAWR))
+ // Power8 or later
set_dawr(brk);
- else
+ else if (!cpu_has_feature(CPU_FTR_ARCH_207S))
+ // Power7 or earlier
set_dabr(brk);
+ else
+ // Shouldn't happen due to higher level checks
+ WARN_ON_ONCE(1);
}
void set_breakpoint(struct arch_hw_breakpoint *brk)
@@ -827,6 +834,18 @@ void set_breakpoint(struct arch_hw_breakpoint *brk)
preempt_enable();
}
+/* Check if we have DAWR or DABR hardware */
+bool ppc_breakpoint_available(void)
+{
+ if (cpu_has_feature(CPU_FTR_DAWR))
+ return true; /* POWER8 DAWR */
+ if (cpu_has_feature(CPU_FTR_ARCH_207S))
+ return false; /* POWER9 with DAWR disabled */
+ /* DABR: Everything but POWER8 and POWER9 */
+ return true;
+}
+EXPORT_SYMBOL_GPL(ppc_breakpoint_available);
+
#ifdef CONFIG_PPC64
DEFINE_PER_CPU(struct cpu_usage, cpu_usage_array);
#endif
diff --git a/arch/powerpc/kernel/prom.c b/arch/powerpc/kernel/prom.c
index 4dffef947b8a..9dbed488aba1 100644
--- a/arch/powerpc/kernel/prom.c
+++ b/arch/powerpc/kernel/prom.c
@@ -291,11 +291,11 @@ static inline void identical_pvr_fixup(unsigned long node)
static void __init check_cpu_feature_properties(unsigned long node)
{
- unsigned long i;
+ int i;
struct feature_property *fp = feature_properties;
const __be32 *prop;
- for (i = 0; i < ARRAY_SIZE(feature_properties); ++i, ++fp) {
+ for (i = 0; i < (int)ARRAY_SIZE(feature_properties); ++i, ++fp) {
prop = of_get_flat_dt_prop(node, fp->name, NULL);
if (prop && be32_to_cpup(prop) >= fp->min_value) {
cur_cpu_spec->cpu_features |= fp->cpu_feature;
@@ -365,7 +365,6 @@ static int __init early_init_dt_scan_cpus(unsigned long node,
DBG("boot cpu: logical %d physical %d\n", found,
be32_to_cpu(intserv[found_thread]));
boot_cpuid = found;
- set_hard_smp_processor_id(found, be32_to_cpu(intserv[found_thread]));
/*
* PAPR defines "logical" PVR values for cpus that
@@ -403,7 +402,9 @@ static int __init early_init_dt_scan_cpus(unsigned long node,
cur_cpu_spec->cpu_features &= ~CPU_FTR_SMT;
else if (!dt_cpu_ftrs_in_use())
cur_cpu_spec->cpu_features |= CPU_FTR_SMT;
+ allocate_paca(boot_cpuid);
#endif
+ set_hard_smp_processor_id(found, be32_to_cpu(intserv[found_thread]));
return 0;
}
@@ -744,7 +745,7 @@ void __init early_init_devtree(void *params)
* FIXME .. and the initrd too? */
move_device_tree();
- allocate_pacas();
+ allocate_paca_ptrs();
DBG("Scanning CPUs ...\n");
@@ -874,5 +875,15 @@ EXPORT_SYMBOL(cpu_to_chip_id);
bool arch_match_cpu_phys_id(int cpu, u64 phys_id)
{
+#ifdef CONFIG_SMP
+ /*
+ * Early firmware scanning must use this rather than
+ * get_hard_smp_processor_id because we don't have pacas allocated
+ * until memory topology is discovered.
+ */
+ if (cpu_to_phys_id != NULL)
+ return (int)phys_id == cpu_to_phys_id[cpu];
+#endif
+
return (int)phys_id == get_hard_smp_processor_id(cpu);
}
diff --git a/arch/powerpc/kernel/prom_init.c b/arch/powerpc/kernel/prom_init.c
index acf4b2e0530c..f9d6befb55a6 100644
--- a/arch/powerpc/kernel/prom_init.c
+++ b/arch/powerpc/kernel/prom_init.c
@@ -171,7 +171,7 @@ static unsigned long __initdata prom_tce_alloc_start;
static unsigned long __initdata prom_tce_alloc_end;
#endif
-static bool __initdata prom_radix_disable;
+static bool prom_radix_disable __initdata = !IS_ENABLED(CONFIG_PPC_RADIX_MMU_DEFAULT);
struct platform_support {
bool hash_mmu;
@@ -641,9 +641,19 @@ static void __init early_cmdline_parse(void)
opt = strstr(prom_cmd_line, "disable_radix");
if (opt) {
- prom_debug("Radix disabled from cmdline\n");
- prom_radix_disable = true;
+ opt += 13;
+ if (*opt && *opt == '=') {
+ bool val;
+
+ if (kstrtobool(++opt, &val))
+ prom_radix_disable = false;
+ else
+ prom_radix_disable = val;
+ } else
+ prom_radix_disable = true;
}
+ if (prom_radix_disable)
+ prom_debug("Radix disabled from cmdline\n");
}
#if defined(CONFIG_PPC_PSERIES) || defined(CONFIG_PPC_POWERNV)
@@ -1110,7 +1120,8 @@ static void __init prom_check_platform_support(void)
}
}
- if (supported.radix_mmu && supported.radix_gtse) {
+ if (supported.radix_mmu && supported.radix_gtse &&
+ IS_ENABLED(CONFIG_PPC_RADIX_MMU)) {
/* Radix preferred - but we require GTSE for now */
prom_debug("Asking for radix with GTSE\n");
ibm_architecture_vec.vec5.mmu = OV5_FEAT(OV5_MMU_RADIX);
@@ -1809,16 +1820,8 @@ static void __init prom_initialize_tce_table(void)
* size to 4 MB. This is enough to map 2GB of PCI DMA space.
* By doing this, we avoid the pitfalls of trying to DMA to
* MMIO space and the DMA alias hole.
- *
- * On POWER4, firmware sets the TCE region by assuming
- * each TCE table is 8MB. Using this memory for anything
- * else will impact performance, so we always allocate 8MB.
- * Anton
*/
- if (pvr_version_is(PVR_POWER4) || pvr_version_is(PVR_POWER4p))
- minsize = 8UL << 20;
- else
- minsize = 4UL << 20;
+ minsize = 4UL << 20;
/* Align to the greater of the align or size */
align = max(minalign, minsize);
diff --git a/arch/powerpc/kernel/prom_init_check.sh b/arch/powerpc/kernel/prom_init_check.sh
index 12640f7e726b..acb6b9226352 100644
--- a/arch/powerpc/kernel/prom_init_check.sh
+++ b/arch/powerpc/kernel/prom_init_check.sh
@@ -19,7 +19,7 @@
WHITELIST="add_reloc_offset __bss_start __bss_stop copy_and_flush
_end enter_prom memcpy memset reloc_offset __secondary_hold
__secondary_hold_acknowledge __secondary_hold_spinloop __start
-strcmp strcpy strlcpy strlen strncmp strstr logo_linux_clut224
+strcmp strcpy strlcpy strlen strncmp strstr kstrtobool logo_linux_clut224
reloc_got2 kernstart_addr memstart_addr linux_banner _stext
__prom_init_toc_start __prom_init_toc_end btext_setup_display TOC."
diff --git a/arch/powerpc/kernel/ptrace.c b/arch/powerpc/kernel/ptrace.c
index ca72d7391d40..d23cf632edf0 100644
--- a/arch/powerpc/kernel/ptrace.c
+++ b/arch/powerpc/kernel/ptrace.c
@@ -41,6 +41,7 @@
#include <asm/switch_to.h>
#include <asm/tm.h>
#include <asm/asm-prototypes.h>
+#include <asm/debug.h>
#define CREATE_TRACE_POINTS
#include <trace/events/syscalls.h>
@@ -2378,6 +2379,7 @@ static int ptrace_set_debugreg(struct task_struct *task, unsigned long addr,
struct perf_event_attr attr;
#endif /* CONFIG_HAVE_HW_BREAKPOINT */
#ifndef CONFIG_PPC_ADV_DEBUG_REGS
+ bool set_bp = true;
struct arch_hw_breakpoint hw_brk;
#endif
@@ -2411,9 +2413,10 @@ static int ptrace_set_debugreg(struct task_struct *task, unsigned long addr,
hw_brk.address = data & (~HW_BRK_TYPE_DABR);
hw_brk.type = (data & HW_BRK_TYPE_DABR) | HW_BRK_TYPE_PRIV_ALL;
hw_brk.len = 8;
+ set_bp = (data) && (hw_brk.type & HW_BRK_TYPE_RDWR);
#ifdef CONFIG_HAVE_HW_BREAKPOINT
bp = thread->ptrace_bps[0];
- if ((!data) || !(hw_brk.type & HW_BRK_TYPE_RDWR)) {
+ if (!set_bp) {
if (bp) {
unregister_hw_breakpoint(bp);
thread->ptrace_bps[0] = NULL;
@@ -2450,6 +2453,9 @@ static int ptrace_set_debugreg(struct task_struct *task, unsigned long addr,
return PTR_ERR(bp);
}
+#else /* !CONFIG_HAVE_HW_BREAKPOINT */
+ if (set_bp && (!ppc_breakpoint_available()))
+ return -ENODEV;
#endif /* CONFIG_HAVE_HW_BREAKPOINT */
task->thread.hw_brk = hw_brk;
#else /* CONFIG_PPC_ADV_DEBUG_REGS */
@@ -2904,6 +2910,9 @@ static long ppc_set_hwdebug(struct task_struct *child,
if (child->thread.hw_brk.address)
return -ENOSPC;
+ if (!ppc_breakpoint_available())
+ return -ENODEV;
+
child->thread.hw_brk = brk;
return 1;
@@ -3052,7 +3061,10 @@ long arch_ptrace(struct task_struct *child, long request,
#endif
#else /* !CONFIG_PPC_ADV_DEBUG_REGS */
dbginfo.num_instruction_bps = 0;
- dbginfo.num_data_bps = 1;
+ if (ppc_breakpoint_available())
+ dbginfo.num_data_bps = 1;
+ else
+ dbginfo.num_data_bps = 0;
dbginfo.num_condition_regs = 0;
#ifdef CONFIG_PPC64
dbginfo.data_bp_alignment = 8;
diff --git a/arch/powerpc/kernel/rtas-proc.c b/arch/powerpc/kernel/rtas-proc.c
index fb070d8cad07..d49063d0baa4 100644
--- a/arch/powerpc/kernel/rtas-proc.c
+++ b/arch/powerpc/kernel/rtas-proc.c
@@ -154,18 +154,6 @@ static ssize_t ppc_rtas_tone_volume_write(struct file *file,
static int ppc_rtas_tone_volume_show(struct seq_file *m, void *v);
static int ppc_rtas_rmo_buf_show(struct seq_file *m, void *v);
-static int sensors_open(struct inode *inode, struct file *file)
-{
- return single_open(file, ppc_rtas_sensors_show, NULL);
-}
-
-static const struct file_operations ppc_rtas_sensors_operations = {
- .open = sensors_open,
- .read = seq_read,
- .llseek = seq_lseek,
- .release = single_release,
-};
-
static int poweron_open(struct inode *inode, struct file *file)
{
return single_open(file, ppc_rtas_poweron_show, NULL);
@@ -231,18 +219,6 @@ static const struct file_operations ppc_rtas_tone_volume_operations = {
.release = single_release,
};
-static int rmo_buf_open(struct inode *inode, struct file *file)
-{
- return single_open(file, ppc_rtas_rmo_buf_show, NULL);
-}
-
-static const struct file_operations ppc_rtas_rmo_buf_ops = {
- .open = rmo_buf_open,
- .read = seq_read,
- .llseek = seq_lseek,
- .release = single_release,
-};
-
static int ppc_rtas_find_all_sensors(void);
static void ppc_rtas_process_sensor(struct seq_file *m,
struct individual_sensor *s, int state, int error, const char *loc);
@@ -267,14 +243,14 @@ static int __init proc_rtas_init(void)
&ppc_rtas_clock_operations);
proc_create("powerpc/rtas/poweron", 0644, NULL,
&ppc_rtas_poweron_operations);
- proc_create("powerpc/rtas/sensors", 0444, NULL,
- &ppc_rtas_sensors_operations);
+ proc_create_single("powerpc/rtas/sensors", 0444, NULL,
+ ppc_rtas_sensors_show);
proc_create("powerpc/rtas/frequency", 0644, NULL,
&ppc_rtas_tone_freq_operations);
proc_create("powerpc/rtas/volume", 0644, NULL,
&ppc_rtas_tone_volume_operations);
- proc_create("powerpc/rtas/rmo_buffer", 0400, NULL,
- &ppc_rtas_rmo_buf_ops);
+ proc_create_single("powerpc/rtas/rmo_buffer", 0400, NULL,
+ ppc_rtas_rmo_buf_show);
return 0;
}
diff --git a/arch/powerpc/kernel/security.c b/arch/powerpc/kernel/security.c
new file mode 100644
index 000000000000..b98a722da915
--- /dev/null
+++ b/arch/powerpc/kernel/security.c
@@ -0,0 +1,237 @@
+// SPDX-License-Identifier: GPL-2.0+
+//
+// Security related flags and so on.
+//
+// Copyright 2018, Michael Ellerman, IBM Corporation.
+
+#include <linux/kernel.h>
+#include <linux/device.h>
+#include <linux/seq_buf.h>
+
+#include <asm/debugfs.h>
+#include <asm/security_features.h>
+
+
+unsigned long powerpc_security_features __read_mostly = SEC_FTR_DEFAULT;
+
+ssize_t cpu_show_meltdown(struct device *dev, struct device_attribute *attr, char *buf)
+{
+ bool thread_priv;
+
+ thread_priv = security_ftr_enabled(SEC_FTR_L1D_THREAD_PRIV);
+
+ if (rfi_flush || thread_priv) {
+ struct seq_buf s;
+ seq_buf_init(&s, buf, PAGE_SIZE - 1);
+
+ seq_buf_printf(&s, "Mitigation: ");
+
+ if (rfi_flush)
+ seq_buf_printf(&s, "RFI Flush");
+
+ if (rfi_flush && thread_priv)
+ seq_buf_printf(&s, ", ");
+
+ if (thread_priv)
+ seq_buf_printf(&s, "L1D private per thread");
+
+ seq_buf_printf(&s, "\n");
+
+ return s.len;
+ }
+
+ if (!security_ftr_enabled(SEC_FTR_L1D_FLUSH_HV) &&
+ !security_ftr_enabled(SEC_FTR_L1D_FLUSH_PR))
+ return sprintf(buf, "Not affected\n");
+
+ return sprintf(buf, "Vulnerable\n");
+}
+
+ssize_t cpu_show_spectre_v1(struct device *dev, struct device_attribute *attr, char *buf)
+{
+ if (!security_ftr_enabled(SEC_FTR_BNDS_CHK_SPEC_BAR))
+ return sprintf(buf, "Not affected\n");
+
+ return sprintf(buf, "Vulnerable\n");
+}
+
+ssize_t cpu_show_spectre_v2(struct device *dev, struct device_attribute *attr, char *buf)
+{
+ bool bcs, ccd, ori;
+ struct seq_buf s;
+
+ seq_buf_init(&s, buf, PAGE_SIZE - 1);
+
+ bcs = security_ftr_enabled(SEC_FTR_BCCTRL_SERIALISED);
+ ccd = security_ftr_enabled(SEC_FTR_COUNT_CACHE_DISABLED);
+ ori = security_ftr_enabled(SEC_FTR_SPEC_BAR_ORI31);
+
+ if (bcs || ccd) {
+ seq_buf_printf(&s, "Mitigation: ");
+
+ if (bcs)
+ seq_buf_printf(&s, "Indirect branch serialisation (kernel only)");
+
+ if (bcs && ccd)
+ seq_buf_printf(&s, ", ");
+
+ if (ccd)
+ seq_buf_printf(&s, "Indirect branch cache disabled");
+ } else
+ seq_buf_printf(&s, "Vulnerable");
+
+ if (ori)
+ seq_buf_printf(&s, ", ori31 speculation barrier enabled");
+
+ seq_buf_printf(&s, "\n");
+
+ return s.len;
+}
+
+/*
+ * Store-forwarding barrier support.
+ */
+
+static enum stf_barrier_type stf_enabled_flush_types;
+static bool no_stf_barrier;
+bool stf_barrier;
+
+static int __init handle_no_stf_barrier(char *p)
+{
+ pr_info("stf-barrier: disabled on command line.");
+ no_stf_barrier = true;
+ return 0;
+}
+
+early_param("no_stf_barrier", handle_no_stf_barrier);
+
+/* This is the generic flag used by other architectures */
+static int __init handle_ssbd(char *p)
+{
+ if (!p || strncmp(p, "auto", 5) == 0 || strncmp(p, "on", 2) == 0 ) {
+ /* Until firmware tells us, we have the barrier with auto */
+ return 0;
+ } else if (strncmp(p, "off", 3) == 0) {
+ handle_no_stf_barrier(NULL);
+ return 0;
+ } else
+ return 1;
+
+ return 0;
+}
+early_param("spec_store_bypass_disable", handle_ssbd);
+
+/* This is the generic flag used by other architectures */
+static int __init handle_no_ssbd(char *p)
+{
+ handle_no_stf_barrier(NULL);
+ return 0;
+}
+early_param("nospec_store_bypass_disable", handle_no_ssbd);
+
+static void stf_barrier_enable(bool enable)
+{
+ if (enable)
+ do_stf_barrier_fixups(stf_enabled_flush_types);
+ else
+ do_stf_barrier_fixups(STF_BARRIER_NONE);
+
+ stf_barrier = enable;
+}
+
+void setup_stf_barrier(void)
+{
+ enum stf_barrier_type type;
+ bool enable, hv;
+
+ hv = cpu_has_feature(CPU_FTR_HVMODE);
+
+ /* Default to fallback in case fw-features are not available */
+ if (cpu_has_feature(CPU_FTR_ARCH_300))
+ type = STF_BARRIER_EIEIO;
+ else if (cpu_has_feature(CPU_FTR_ARCH_207S))
+ type = STF_BARRIER_SYNC_ORI;
+ else if (cpu_has_feature(CPU_FTR_ARCH_206))
+ type = STF_BARRIER_FALLBACK;
+ else
+ type = STF_BARRIER_NONE;
+
+ enable = security_ftr_enabled(SEC_FTR_FAVOUR_SECURITY) &&
+ (security_ftr_enabled(SEC_FTR_L1D_FLUSH_PR) ||
+ (security_ftr_enabled(SEC_FTR_L1D_FLUSH_HV) && hv));
+
+ if (type == STF_BARRIER_FALLBACK) {
+ pr_info("stf-barrier: fallback barrier available\n");
+ } else if (type == STF_BARRIER_SYNC_ORI) {
+ pr_info("stf-barrier: hwsync barrier available\n");
+ } else if (type == STF_BARRIER_EIEIO) {
+ pr_info("stf-barrier: eieio barrier available\n");
+ }
+
+ stf_enabled_flush_types = type;
+
+ if (!no_stf_barrier)
+ stf_barrier_enable(enable);
+}
+
+ssize_t cpu_show_spec_store_bypass(struct device *dev, struct device_attribute *attr, char *buf)
+{
+ if (stf_barrier && stf_enabled_flush_types != STF_BARRIER_NONE) {
+ const char *type;
+ switch (stf_enabled_flush_types) {
+ case STF_BARRIER_EIEIO:
+ type = "eieio";
+ break;
+ case STF_BARRIER_SYNC_ORI:
+ type = "hwsync";
+ break;
+ case STF_BARRIER_FALLBACK:
+ type = "fallback";
+ break;
+ default:
+ type = "unknown";
+ }
+ return sprintf(buf, "Mitigation: Kernel entry/exit barrier (%s)\n", type);
+ }
+
+ if (!security_ftr_enabled(SEC_FTR_L1D_FLUSH_HV) &&
+ !security_ftr_enabled(SEC_FTR_L1D_FLUSH_PR))
+ return sprintf(buf, "Not affected\n");
+
+ return sprintf(buf, "Vulnerable\n");
+}
+
+#ifdef CONFIG_DEBUG_FS
+static int stf_barrier_set(void *data, u64 val)
+{
+ bool enable;
+
+ if (val == 1)
+ enable = true;
+ else if (val == 0)
+ enable = false;
+ else
+ return -EINVAL;
+
+ /* Only do anything if we're changing state */
+ if (enable != stf_barrier)
+ stf_barrier_enable(enable);
+
+ return 0;
+}
+
+static int stf_barrier_get(void *data, u64 *val)
+{
+ *val = stf_barrier ? 1 : 0;
+ return 0;
+}
+
+DEFINE_SIMPLE_ATTRIBUTE(fops_stf_barrier, stf_barrier_get, stf_barrier_set, "%llu\n");
+
+static __init int stf_barrier_debugfs_init(void)
+{
+ debugfs_create_file("stf_barrier", 0600, powerpc_debugfs_root, NULL, &fops_stf_barrier);
+ return 0;
+}
+device_initcall(stf_barrier_debugfs_init);
+#endif /* CONFIG_DEBUG_FS */
diff --git a/arch/powerpc/kernel/setup-common.c b/arch/powerpc/kernel/setup-common.c
index d73ec518ef80..0af5c11b9e78 100644
--- a/arch/powerpc/kernel/setup-common.c
+++ b/arch/powerpc/kernel/setup-common.c
@@ -437,6 +437,8 @@ static void __init cpu_init_thread_core_maps(int tpc)
}
+u32 *cpu_to_phys_id = NULL;
+
/**
* setup_cpu_maps - initialize the following cpu maps:
* cpu_possible_mask
@@ -463,6 +465,10 @@ void __init smp_setup_cpu_maps(void)
DBG("smp_setup_cpu_maps()\n");
+ cpu_to_phys_id = __va(memblock_alloc(nr_cpu_ids * sizeof(u32),
+ __alignof__(u32)));
+ memset(cpu_to_phys_id, 0, nr_cpu_ids * sizeof(u32));
+
for_each_node_by_type(dn, "cpu") {
const __be32 *intserv;
__be32 cpu_be;
@@ -480,6 +486,7 @@ void __init smp_setup_cpu_maps(void)
intserv = of_get_property(dn, "reg", &len);
if (!intserv) {
cpu_be = cpu_to_be32(cpu);
+ /* XXX: what is this? uninitialized?? */
intserv = &cpu_be; /* assume logical == phys */
len = 4;
}
@@ -499,8 +506,8 @@ void __init smp_setup_cpu_maps(void)
"enable-method", "spin-table");
set_cpu_present(cpu, avail);
- set_hard_smp_processor_id(cpu, be32_to_cpu(intserv[j]));
set_cpu_possible(cpu, true);
+ cpu_to_phys_id[cpu] = be32_to_cpu(intserv[j]);
cpu++;
}
@@ -835,6 +842,23 @@ static __init void print_system_info(void)
pr_info("-----------------------------------------------------\n");
}
+#ifdef CONFIG_SMP
+static void smp_setup_pacas(void)
+{
+ int cpu;
+
+ for_each_possible_cpu(cpu) {
+ if (cpu == smp_processor_id())
+ continue;
+ allocate_paca(cpu);
+ set_hard_smp_processor_id(cpu, cpu_to_phys_id[cpu]);
+ }
+
+ memblock_free(__pa(cpu_to_phys_id), nr_cpu_ids * sizeof(u32));
+ cpu_to_phys_id = NULL;
+}
+#endif
+
/*
* Called into from start_kernel this initializes memblock, which is used
* to manage page allocation until mem_init is called.
@@ -888,8 +912,8 @@ void __init setup_arch(char **cmdline_p)
/* Check the SMT related command line arguments (ppc64). */
check_smt_enabled();
- /* On BookE, setup per-core TLB data structures. */
- setup_tlb_core_data();
+ /* Parse memory topology */
+ mem_topology_setup();
/*
* Release secondary cpus out of their spinloops at 0x60 now that
@@ -899,6 +923,11 @@ void __init setup_arch(char **cmdline_p)
* so smp_release_cpus() does nothing for them.
*/
#ifdef CONFIG_SMP
+ smp_setup_pacas();
+
+ /* On BookE, setup per-core TLB data structures. */
+ setup_tlb_core_data();
+
smp_release_cpus();
#endif
@@ -919,6 +948,8 @@ void __init setup_arch(char **cmdline_p)
#ifdef CONFIG_PPC64
if (!radix_enabled())
init_mm.context.slb_addr_limit = DEFAULT_MAP_WINDOW_USER64;
+#elif defined(CONFIG_PPC_8xx)
+ init_mm.context.slb_addr_limit = DEFAULT_MAP_WINDOW;
#else
#error "context.addr_limit not initialized."
#endif
diff --git a/arch/powerpc/kernel/setup.h b/arch/powerpc/kernel/setup.h
index 3fc11e30308f..d144df54ad40 100644
--- a/arch/powerpc/kernel/setup.h
+++ b/arch/powerpc/kernel/setup.h
@@ -46,13 +46,10 @@ static inline void emergency_stack_init(void) { };
#endif
#ifdef CONFIG_PPC64
-void record_spr_defaults(void);
-#else
-static inline void record_spr_defaults(void) { };
-#endif
-
-#ifdef CONFIG_PPC64
u64 ppc64_bolted_size(void);
+
+/* Default SPR values from firmware/kexec */
+extern unsigned long spr_default_dscr;
#endif
/*
diff --git a/arch/powerpc/kernel/setup_32.c b/arch/powerpc/kernel/setup_32.c
index 51ebc01fff52..74457485574b 100644
--- a/arch/powerpc/kernel/setup_32.c
+++ b/arch/powerpc/kernel/setup_32.c
@@ -39,6 +39,7 @@
#include <asm/udbg.h>
#include <asm/code-patching.h>
#include <asm/cpu_has_feature.h>
+#include <asm/asm-prototypes.h>
#define DBG(fmt...)
@@ -121,7 +122,7 @@ notrace void __init machine_init(u64 dt_ptr)
}
/* Checks "l2cr=xxxx" command-line option */
-int __init ppc_setup_l2cr(char *str)
+static int __init ppc_setup_l2cr(char *str)
{
if (cpu_has_feature(CPU_FTR_L2CR)) {
unsigned long val = simple_strtoul(str, NULL, 0);
@@ -134,7 +135,7 @@ int __init ppc_setup_l2cr(char *str)
__setup("l2cr=", ppc_setup_l2cr);
/* Checks "l3cr=xxxx" command-line option */
-int __init ppc_setup_l3cr(char *str)
+static int __init ppc_setup_l3cr(char *str)
{
if (cpu_has_feature(CPU_FTR_L3CR)) {
unsigned long val = simple_strtoul(str, NULL, 0);
@@ -180,7 +181,7 @@ EXPORT_SYMBOL(nvram_sync);
#endif /* CONFIG_NVRAM */
-int __init ppc_init(void)
+static int __init ppc_init(void)
{
/* clear the progress line */
if (ppc_md.progress)
@@ -192,7 +193,6 @@ int __init ppc_init(void)
}
return 0;
}
-
arch_initcall(ppc_init);
void __init irqstack_early_init(void)
diff --git a/arch/powerpc/kernel/setup_64.c b/arch/powerpc/kernel/setup_64.c
index c388cc3357fa..b78f142a4148 100644
--- a/arch/powerpc/kernel/setup_64.c
+++ b/arch/powerpc/kernel/setup_64.c
@@ -110,7 +110,7 @@ void __init setup_tlb_core_data(void)
if (cpu_first_thread_sibling(boot_cpuid) == first)
first = boot_cpuid;
- paca[cpu].tcd_ptr = &paca[first].tcd;
+ paca_ptrs[cpu]->tcd_ptr = &paca_ptrs[first]->tcd;
/*
* If we have threads, we need either tlbsrx.
@@ -254,6 +254,14 @@ static void cpu_ready_for_interrupts(void)
get_paca()->kernel_msr = MSR_KERNEL;
}
+unsigned long spr_default_dscr = 0;
+
+void __init record_spr_defaults(void)
+{
+ if (early_cpu_has_feature(CPU_FTR_DSCR))
+ spr_default_dscr = mfspr(SPRN_DSCR);
+}
+
/*
* Early initialization entry point. This is called by head.S
* with MMU translation disabled. We rely on the "feature" of
@@ -304,7 +312,11 @@ void __init early_setup(unsigned long dt_ptr)
early_init_devtree(__va(dt_ptr));
/* Now we know the logical id of our boot cpu, setup the paca. */
- setup_paca(&paca[boot_cpuid]);
+ if (boot_cpuid != 0) {
+ /* Poison paca_ptrs[0] again if it's not the boot cpu */
+ memset(&paca_ptrs[0], 0x88, sizeof(paca_ptrs[0]));
+ }
+ setup_paca(paca_ptrs[boot_cpuid]);
fixup_boot_paca();
/*
@@ -599,6 +611,21 @@ __init u64 ppc64_bolted_size(void)
#endif
}
+static void *__init alloc_stack(unsigned long limit, int cpu)
+{
+ unsigned long pa;
+
+ pa = memblock_alloc_base_nid(THREAD_SIZE, THREAD_SIZE, limit,
+ early_cpu_to_node(cpu), MEMBLOCK_NONE);
+ if (!pa) {
+ pa = memblock_alloc_base(THREAD_SIZE, THREAD_SIZE, limit);
+ if (!pa)
+ panic("cannot allocate stacks");
+ }
+
+ return __va(pa);
+}
+
void __init irqstack_early_init(void)
{
u64 limit = ppc64_bolted_size();
@@ -610,12 +637,8 @@ void __init irqstack_early_init(void)
* accessed in realmode.
*/
for_each_possible_cpu(i) {
- softirq_ctx[i] = (struct thread_info *)
- __va(memblock_alloc_base(THREAD_SIZE,
- THREAD_SIZE, limit));
- hardirq_ctx[i] = (struct thread_info *)
- __va(memblock_alloc_base(THREAD_SIZE,
- THREAD_SIZE, limit));
+ softirq_ctx[i] = alloc_stack(limit, i);
+ hardirq_ctx[i] = alloc_stack(limit, i);
}
}
@@ -623,20 +646,21 @@ void __init irqstack_early_init(void)
void __init exc_lvl_early_init(void)
{
unsigned int i;
- unsigned long sp;
for_each_possible_cpu(i) {
- sp = memblock_alloc(THREAD_SIZE, THREAD_SIZE);
- critirq_ctx[i] = (struct thread_info *)__va(sp);
- paca[i].crit_kstack = __va(sp + THREAD_SIZE);
+ void *sp;
- sp = memblock_alloc(THREAD_SIZE, THREAD_SIZE);
- dbgirq_ctx[i] = (struct thread_info *)__va(sp);
- paca[i].dbg_kstack = __va(sp + THREAD_SIZE);
+ sp = alloc_stack(ULONG_MAX, i);
+ critirq_ctx[i] = sp;
+ paca_ptrs[i]->crit_kstack = sp + THREAD_SIZE;
- sp = memblock_alloc(THREAD_SIZE, THREAD_SIZE);
- mcheckirq_ctx[i] = (struct thread_info *)__va(sp);
- paca[i].mc_kstack = __va(sp + THREAD_SIZE);
+ sp = alloc_stack(ULONG_MAX, i);
+ dbgirq_ctx[i] = sp;
+ paca_ptrs[i]->dbg_kstack = sp + THREAD_SIZE;
+
+ sp = alloc_stack(ULONG_MAX, i);
+ mcheckirq_ctx[i] = sp;
+ paca_ptrs[i]->mc_kstack = sp + THREAD_SIZE;
}
if (cpu_has_feature(CPU_FTR_DEBUG_LVL_EXC))
@@ -690,23 +714,24 @@ void __init emergency_stack_init(void)
for_each_possible_cpu(i) {
struct thread_info *ti;
- ti = __va(memblock_alloc_base(THREAD_SIZE, THREAD_SIZE, limit));
+
+ ti = alloc_stack(limit, i);
memset(ti, 0, THREAD_SIZE);
emerg_stack_init_thread_info(ti, i);
- paca[i].emergency_sp = (void *)ti + THREAD_SIZE;
+ paca_ptrs[i]->emergency_sp = (void *)ti + THREAD_SIZE;
#ifdef CONFIG_PPC_BOOK3S_64
/* emergency stack for NMI exception handling. */
- ti = __va(memblock_alloc_base(THREAD_SIZE, THREAD_SIZE, limit));
+ ti = alloc_stack(limit, i);
memset(ti, 0, THREAD_SIZE);
emerg_stack_init_thread_info(ti, i);
- paca[i].nmi_emergency_sp = (void *)ti + THREAD_SIZE;
+ paca_ptrs[i]->nmi_emergency_sp = (void *)ti + THREAD_SIZE;
/* emergency stack for machine check exception handling. */
- ti = __va(memblock_alloc_base(THREAD_SIZE, THREAD_SIZE, limit));
+ ti = alloc_stack(limit, i);
memset(ti, 0, THREAD_SIZE);
emerg_stack_init_thread_info(ti, i);
- paca[i].mc_emergency_sp = (void *)ti + THREAD_SIZE;
+ paca_ptrs[i]->mc_emergency_sp = (void *)ti + THREAD_SIZE;
#endif
}
}
@@ -762,7 +787,7 @@ void __init setup_per_cpu_areas(void)
delta = (unsigned long)pcpu_base_addr - (unsigned long)__per_cpu_start;
for_each_possible_cpu(cpu) {
__per_cpu_offset[cpu] = delta + pcpu_unit_offsets[cpu];
- paca[cpu].data_offset = __per_cpu_offset[cpu];
+ paca_ptrs[cpu]->data_offset = __per_cpu_offset[cpu];
}
}
#endif
@@ -846,9 +871,6 @@ static void do_nothing(void *unused)
void rfi_flush_enable(bool enable)
{
- if (rfi_flush == enable)
- return;
-
if (enable) {
do_rfi_flush_fixups(enabled_flush_types);
on_each_cpu(do_nothing, NULL, 1);
@@ -858,12 +880,27 @@ void rfi_flush_enable(bool enable)
rfi_flush = enable;
}
-static void init_fallback_flush(void)
+static void __ref init_fallback_flush(void)
{
u64 l1d_size, limit;
int cpu;
+ /* Only allocate the fallback flush area once (at boot time). */
+ if (l1d_flush_fallback_area)
+ return;
+
l1d_size = ppc64_caches.l1d.size;
+
+ /*
+ * If there is no d-cache-size property in the device tree, l1d_size
+ * could be zero. That leads to the loop in the asm wrapping around to
+ * 2^64-1, and then walking off the end of the fallback area and
+ * eventually causing a page fault which is fatal. Just default to
+ * something vaguely sane.
+ */
+ if (!l1d_size)
+ l1d_size = (64 * 1024);
+
limit = min(ppc64_bolted_size(), ppc64_rma_size);
/*
@@ -875,23 +912,24 @@ static void init_fallback_flush(void)
memset(l1d_flush_fallback_area, 0, l1d_size * 2);
for_each_possible_cpu(cpu) {
- paca[cpu].rfi_flush_fallback_area = l1d_flush_fallback_area;
- paca[cpu].l1d_flush_size = l1d_size;
+ struct paca_struct *paca = paca_ptrs[cpu];
+ paca->rfi_flush_fallback_area = l1d_flush_fallback_area;
+ paca->l1d_flush_size = l1d_size;
}
}
-void __init setup_rfi_flush(enum l1d_flush_type types, bool enable)
+void setup_rfi_flush(enum l1d_flush_type types, bool enable)
{
if (types & L1D_FLUSH_FALLBACK) {
- pr_info("rfi-flush: Using fallback displacement flush\n");
+ pr_info("rfi-flush: fallback displacement flush available\n");
init_fallback_flush();
}
if (types & L1D_FLUSH_ORI)
- pr_info("rfi-flush: Using ori type flush\n");
+ pr_info("rfi-flush: ori type flush available\n");
if (types & L1D_FLUSH_MTTRIG)
- pr_info("rfi-flush: Using mttrig type flush\n");
+ pr_info("rfi-flush: mttrig type flush available\n");
enabled_flush_types = types;
@@ -902,13 +940,19 @@ void __init setup_rfi_flush(enum l1d_flush_type types, bool enable)
#ifdef CONFIG_DEBUG_FS
static int rfi_flush_set(void *data, u64 val)
{
+ bool enable;
+
if (val == 1)
- rfi_flush_enable(true);
+ enable = true;
else if (val == 0)
- rfi_flush_enable(false);
+ enable = false;
else
return -EINVAL;
+ /* Only do anything if we're changing state */
+ if (enable != rfi_flush)
+ rfi_flush_enable(enable);
+
return 0;
}
@@ -927,12 +971,4 @@ static __init int rfi_flush_debugfs_init(void)
}
device_initcall(rfi_flush_debugfs_init);
#endif
-
-ssize_t cpu_show_meltdown(struct device *dev, struct device_attribute *attr, char *buf)
-{
- if (rfi_flush)
- return sprintf(buf, "Mitigation: RFI Flush\n");
-
- return sprintf(buf, "Vulnerable\n");
-}
#endif /* CONFIG_PPC_BOOK3S_64 */
diff --git a/arch/powerpc/kernel/signal.h b/arch/powerpc/kernel/signal.h
index 7c59d88b9d86..a6467f843acf 100644
--- a/arch/powerpc/kernel/signal.h
+++ b/arch/powerpc/kernel/signal.h
@@ -49,6 +49,11 @@ extern int handle_rt_signal64(struct ksignal *ksig, sigset_t *set,
#else /* CONFIG_PPC64 */
+extern long sys_rt_sigreturn(int r3, int r4, int r5, int r6, int r7, int r8,
+ struct pt_regs *regs);
+extern long sys_sigreturn(int r3, int r4, int r5, int r6, int r7, int r8,
+ struct pt_regs *regs);
+
static inline int handle_rt_signal64(struct ksignal *ksig, sigset_t *set,
struct task_struct *tsk)
{
diff --git a/arch/powerpc/kernel/signal_32.c b/arch/powerpc/kernel/signal_32.c
index a46de0035214..492f03451877 100644
--- a/arch/powerpc/kernel/signal_32.c
+++ b/arch/powerpc/kernel/signal_32.c
@@ -1045,7 +1045,7 @@ long sys_swapcontext(struct ucontext __user *old_ctx,
struct ucontext __user *new_ctx,
int ctx_size, int r6, int r7, int r8, struct pt_regs *regs)
{
- unsigned char tmp;
+ unsigned char tmp __maybe_unused;
int ctx_has_vsx_region = 0;
#ifdef CONFIG_PPC64
@@ -1231,7 +1231,7 @@ int sys_debug_setcontext(struct ucontext __user *ctx,
{
struct sig_dbg_op op;
int i;
- unsigned char tmp;
+ unsigned char tmp __maybe_unused;
unsigned long new_msr = regs->msr;
#ifdef CONFIG_PPC_ADV_DEBUG_REGS
unsigned long new_dbcr0 = current->thread.debug.dbcr0;
diff --git a/arch/powerpc/kernel/smp.c b/arch/powerpc/kernel/smp.c
index bbe7634b3a43..9ca7148b5881 100644
--- a/arch/powerpc/kernel/smp.c
+++ b/arch/powerpc/kernel/smp.c
@@ -123,8 +123,8 @@ int smp_generic_kick_cpu(int nr)
* cpu_start field to become non-zero After we set cpu_start,
* the processor will continue on to secondary_start
*/
- if (!paca[nr].cpu_start) {
- paca[nr].cpu_start = 1;
+ if (!paca_ptrs[nr]->cpu_start) {
+ paca_ptrs[nr]->cpu_start = 1;
smp_mb();
return 0;
}
@@ -565,20 +565,64 @@ void crash_send_ipi(void (*crash_ipi_callback)(struct pt_regs *))
}
#endif
+#ifdef CONFIG_NMI_IPI
+static void nmi_stop_this_cpu(struct pt_regs *regs)
+{
+ /*
+ * This is a special case because it never returns, so the NMI IPI
+ * handling would never mark it as done, which makes any later
+ * smp_send_nmi_ipi() call spin forever. Mark it done now.
+ *
+ * IRQs are already hard disabled by the smp_handle_nmi_ipi.
+ */
+ nmi_ipi_lock();
+ nmi_ipi_busy_count--;
+ nmi_ipi_unlock();
+
+ /* Remove this CPU */
+ set_cpu_online(smp_processor_id(), false);
+
+ spin_begin();
+ while (1)
+ spin_cpu_relax();
+}
+
+void smp_send_stop(void)
+{
+ smp_send_nmi_ipi(NMI_IPI_ALL_OTHERS, nmi_stop_this_cpu, 1000000);
+}
+
+#else /* CONFIG_NMI_IPI */
+
static void stop_this_cpu(void *dummy)
{
/* Remove this CPU */
set_cpu_online(smp_processor_id(), false);
- local_irq_disable();
+ hard_irq_disable();
+ spin_begin();
while (1)
- ;
+ spin_cpu_relax();
}
void smp_send_stop(void)
{
+ static bool stopped = false;
+
+ /*
+ * Prevent waiting on csd lock from a previous smp_send_stop.
+ * This is racy, but in general callers try to do the right
+ * thing and only fire off one smp_send_stop (e.g., see
+ * kernel/panic.c)
+ */
+ if (stopped)
+ return;
+
+ stopped = true;
+
smp_call_function(stop_this_cpu, NULL, 0);
}
+#endif /* CONFIG_NMI_IPI */
struct thread_info *current_set[NR_CPUS];
@@ -657,7 +701,7 @@ void smp_prepare_boot_cpu(void)
{
BUG_ON(smp_processor_id() != boot_cpuid);
#ifdef CONFIG_PPC64
- paca[boot_cpuid].__current = current;
+ paca_ptrs[boot_cpuid]->__current = current;
#endif
set_numa_node(numa_cpu_lookup_table[boot_cpuid]);
current_set[boot_cpuid] = task_thread_info(current);
@@ -748,8 +792,8 @@ static void cpu_idle_thread_init(unsigned int cpu, struct task_struct *idle)
struct thread_info *ti = task_thread_info(idle);
#ifdef CONFIG_PPC64
- paca[cpu].__current = idle;
- paca[cpu].kstack = (unsigned long)ti + THREAD_SIZE - STACK_FRAME_OVERHEAD;
+ paca_ptrs[cpu]->__current = idle;
+ paca_ptrs[cpu]->kstack = (unsigned long)ti + THREAD_SIZE - STACK_FRAME_OVERHEAD;
#endif
ti->cpu = cpu;
secondary_ti = current_set[cpu] = ti;
diff --git a/arch/powerpc/kernel/sysfs.c b/arch/powerpc/kernel/sysfs.c
index 04d0bbd7a1dd..755dc98a57ae 100644
--- a/arch/powerpc/kernel/sysfs.c
+++ b/arch/powerpc/kernel/sysfs.c
@@ -20,6 +20,7 @@
#include <asm/firmware.h>
#include "cacheinfo.h"
+#include "setup.h"
#ifdef CONFIG_PPC64
#include <asm/paca.h>
@@ -588,21 +589,18 @@ static DEVICE_ATTR(dscr_default, 0600,
static void sysfs_create_dscr_default(void)
{
- int err = 0;
- if (cpu_has_feature(CPU_FTR_DSCR))
- err = device_create_file(cpu_subsys.dev_root, &dev_attr_dscr_default);
-}
+ if (cpu_has_feature(CPU_FTR_DSCR)) {
+ int err = 0;
+ int cpu;
-void __init record_spr_defaults(void)
-{
- int cpu;
+ dscr_default = spr_default_dscr;
+ for_each_possible_cpu(cpu)
+ paca_ptrs[cpu]->dscr_default = dscr_default;
- if (cpu_has_feature(CPU_FTR_DSCR)) {
- dscr_default = mfspr(SPRN_DSCR);
- for (cpu = 0; cpu < nr_cpu_ids; cpu++)
- paca[cpu].dscr_default = dscr_default;
+ err = device_create_file(cpu_subsys.dev_root, &dev_attr_dscr_default);
}
}
+
#endif /* CONFIG_PPC64 */
#ifdef HAS_PPC_PMC_PA6T
diff --git a/arch/powerpc/kernel/time.c b/arch/powerpc/kernel/time.c
index a32823dcd9a4..360e71d455cc 100644
--- a/arch/powerpc/kernel/time.c
+++ b/arch/powerpc/kernel/time.c
@@ -266,6 +266,9 @@ void accumulate_stolen_time(void)
static inline u64 calculate_stolen_time(u64 stop_tb)
{
+ if (!firmware_has_feature(FW_FEATURE_SPLPAR))
+ return 0;
+
if (get_paca()->dtl_ridx != be64_to_cpu(get_lppaca()->dtl_idx))
return scan_dispatch_log(stop_tb);
@@ -1234,7 +1237,7 @@ void calibrate_delay(void)
static int rtc_generic_get_time(struct device *dev, struct rtc_time *tm)
{
ppc_md.get_rtc_time(tm);
- return rtc_valid_tm(tm);
+ return 0;
}
static int rtc_generic_set_time(struct device *dev, struct rtc_time *tm)
diff --git a/arch/powerpc/kernel/traps.c b/arch/powerpc/kernel/traps.c
index 1e48d157196a..0e17dcb48720 100644
--- a/arch/powerpc/kernel/traps.c
+++ b/arch/powerpc/kernel/traps.c
@@ -208,6 +208,12 @@ static void oops_end(unsigned long flags, struct pt_regs *regs,
}
raw_local_irq_restore(flags);
+ /*
+ * system_reset_excption handles debugger, crash dump, panic, for 0x100
+ */
+ if (TRAP(regs) == 0x100)
+ return;
+
crash_fadump(regs, "die oops");
if (kexec_should_crash(current))
@@ -272,8 +278,13 @@ void die(const char *str, struct pt_regs *regs, long err)
{
unsigned long flags;
- if (debugger(regs))
- return;
+ /*
+ * system_reset_excption handles debugger, crash dump, panic, for 0x100
+ */
+ if (TRAP(regs) != 0x100) {
+ if (debugger(regs))
+ return;
+ }
flags = oops_begin(regs);
if (__die(str, regs, err))
@@ -285,7 +296,6 @@ NOKPROBE_SYMBOL(die);
void user_single_step_siginfo(struct task_struct *tsk,
struct pt_regs *regs, siginfo_t *info)
{
- memset(info, 0, sizeof(*info));
info->si_signo = SIGTRAP;
info->si_code = TRAP_TRACE;
info->si_addr = (void __user *)regs->nip;
@@ -323,7 +333,7 @@ void _exception_pkey(int signr, struct pt_regs *regs, int code,
*/
thread_pkey_regs_save(&current->thread);
- memset(&info, 0, sizeof(info));
+ clear_siginfo(&info);
info.si_signo = signr;
info.si_code = code;
info.si_addr = (void __user *) addr;
@@ -460,7 +470,7 @@ static inline int check_io_access(struct pt_regs *regs)
/* single-step stuff */
#define single_stepping(regs) (current->thread.debug.dbcr0 & DBCR0_IC)
#define clear_single_step(regs) (current->thread.debug.dbcr0 &= ~DBCR0_IC)
-
+#define clear_br_trace(regs) do {} while(0)
#else
/* On non-4xx, the reason for the machine check or program
exception is in the MSR. */
@@ -473,6 +483,7 @@ static inline int check_io_access(struct pt_regs *regs)
#define single_stepping(regs) ((regs)->msr & MSR_SE)
#define clear_single_step(regs) ((regs)->msr &= ~MSR_SE)
+#define clear_br_trace(regs) ((regs)->msr &= ~MSR_BE)
#endif
#if defined(CONFIG_E500)
@@ -958,7 +969,7 @@ void unknown_exception(struct pt_regs *regs)
printk("Bad trap at PC: %lx, SR: %lx, vector=%lx\n",
regs->nip, regs->msr, regs->trap);
- _exception(SIGTRAP, regs, TRAP_FIXME, 0);
+ _exception(SIGTRAP, regs, TRAP_UNK, 0);
exception_exit(prev_state);
}
@@ -980,7 +991,7 @@ bail:
void RunModeException(struct pt_regs *regs)
{
- _exception(SIGTRAP, regs, TRAP_FIXME, 0);
+ _exception(SIGTRAP, regs, TRAP_UNK, 0);
}
void single_step_exception(struct pt_regs *regs)
@@ -988,6 +999,7 @@ void single_step_exception(struct pt_regs *regs)
enum ctx_state prev_state = exception_enter();
clear_single_step(regs);
+ clear_br_trace(regs);
if (kprobe_post_handler(regs))
return;
@@ -1019,7 +1031,7 @@ static void emulate_single_step(struct pt_regs *regs)
static inline int __parse_fpscr(unsigned long fpscr)
{
- int ret = FPE_FIXME;
+ int ret = FPE_FLTUNK;
/* Invalid operation */
if ((fpscr & FPSCR_VE) && (fpscr & FPSCR_VX))
@@ -1495,18 +1507,6 @@ bail:
exception_exit(prev_state);
}
-void slb_miss_bad_addr(struct pt_regs *regs)
-{
- enum ctx_state prev_state = exception_enter();
-
- if (user_mode(regs))
- _exception(SIGSEGV, regs, SEGV_BNDERR, regs->dar);
- else
- bad_page_fault(regs, regs->dar, SIGSEGV);
-
- exception_exit(prev_state);
-}
-
void StackOverflow(struct pt_regs *regs)
{
printk(KERN_CRIT "Kernel stack overflow in process %p, r1=%lx\n",
@@ -1612,6 +1612,22 @@ void facility_unavailable_exception(struct pt_regs *regs)
value = mfspr(SPRN_FSCR);
status = value >> 56;
+ if ((hv || status >= 2) &&
+ (status < ARRAY_SIZE(facility_strings)) &&
+ facility_strings[status])
+ facility = facility_strings[status];
+
+ /* We should not have taken this interrupt in kernel */
+ if (!user_mode(regs)) {
+ pr_emerg("Facility '%s' unavailable (%d) exception in kernel mode at %lx\n",
+ facility, status, regs->nip);
+ die("Unexpected facility unavailable exception", regs, SIGABRT);
+ }
+
+ /* We restore the interrupt state now */
+ if (!arch_irq_disabled_regs(regs))
+ local_irq_enable();
+
if (status == FSCR_DSCR_LG) {
/*
* User is accessing the DSCR register using the problem
@@ -1678,25 +1694,11 @@ void facility_unavailable_exception(struct pt_regs *regs)
return;
}
- if ((hv || status >= 2) &&
- (status < ARRAY_SIZE(facility_strings)) &&
- facility_strings[status])
- facility = facility_strings[status];
-
- /* We restore the interrupt state now */
- if (!arch_irq_disabled_regs(regs))
- local_irq_enable();
-
pr_err_ratelimited("%sFacility '%s' unavailable (%d), exception at 0x%lx, MSR=%lx\n",
hv ? "Hypervisor " : "", facility, status, regs->nip, regs->msr);
out:
- if (user_mode(regs)) {
- _exception(SIGILL, regs, ILL_ILLOPC, regs->nip);
- return;
- }
-
- die("Unexpected facility unavailable exception", regs, SIGABRT);
+ _exception(SIGILL, regs, ILL_ILLOPC, regs->nip);
}
#endif
@@ -1970,7 +1972,7 @@ void SPEFloatingPointException(struct pt_regs *regs)
extern int do_spe_mathemu(struct pt_regs *regs);
unsigned long spefscr;
int fpexc_mode;
- int code = FPE_FIXME;
+ int code = FPE_FLTUNK;
int err;
flush_spe_to_thread(current);
@@ -2039,7 +2041,7 @@ void SPEFloatingPointRoundException(struct pt_regs *regs)
printk(KERN_ERR "unrecognized spe instruction "
"in %s at %lx\n", current->comm, regs->nip);
} else {
- _exception(SIGFPE, regs, FPE_FIXME, regs->nip);
+ _exception(SIGFPE, regs, FPE_FLTUNK, regs->nip);
return;
}
}
diff --git a/arch/powerpc/kernel/vdso.c b/arch/powerpc/kernel/vdso.c
index 22b01a3962f0..b44ec104a5a1 100644
--- a/arch/powerpc/kernel/vdso.c
+++ b/arch/powerpc/kernel/vdso.c
@@ -99,26 +99,28 @@ static struct vdso_patch_def vdso_patches[] = {
CPU_FTR_COHERENT_ICACHE, CPU_FTR_COHERENT_ICACHE,
"__kernel_sync_dicache", "__kernel_sync_dicache_p5"
},
+#ifdef CONFIG_PPC32
{
- CPU_FTR_USE_TB, 0,
+ CPU_FTR_USE_RTC, CPU_FTR_USE_RTC,
"__kernel_gettimeofday", NULL
},
{
- CPU_FTR_USE_TB, 0,
+ CPU_FTR_USE_RTC, CPU_FTR_USE_RTC,
"__kernel_clock_gettime", NULL
},
{
- CPU_FTR_USE_TB, 0,
+ CPU_FTR_USE_RTC, CPU_FTR_USE_RTC,
"__kernel_clock_getres", NULL
},
{
- CPU_FTR_USE_TB, 0,
+ CPU_FTR_USE_RTC, CPU_FTR_USE_RTC,
"__kernel_get_tbfreq", NULL
},
{
- CPU_FTR_USE_TB, 0,
+ CPU_FTR_USE_RTC, CPU_FTR_USE_RTC,
"__kernel_time", NULL
},
+#endif
};
/*
diff --git a/arch/powerpc/kernel/vmlinux.lds.S b/arch/powerpc/kernel/vmlinux.lds.S
index c8af90ff49f0..dd10e6f1d1b7 100644
--- a/arch/powerpc/kernel/vmlinux.lds.S
+++ b/arch/powerpc/kernel/vmlinux.lds.S
@@ -89,7 +89,7 @@ SECTIONS
*/
.text BLOCK(0) : AT(ADDR(.text) - LOAD_OFFSET) {
#ifdef CONFIG_LD_HEAD_STUB_CATCH
- *(.linker_stub_catch);
+ KEEP(*(.linker_stub_catch));
. = . ;
#endif
@@ -98,7 +98,7 @@ SECTIONS
ALIGN_FUNCTION();
#endif
/* careful! __ftr_alt_* sections need to be close to .text */
- *(.text.hot .text .text.fixup .text.unlikely .fixup __ftr_alt_* .ref.text);
+ *(.text.hot TEXT_MAIN .text.fixup .text.unlikely .fixup __ftr_alt_* .ref.text);
SCHED_TEXT
CPUIDLE_TEXT
LOCK_TEXT
@@ -134,6 +134,20 @@ SECTIONS
#ifdef CONFIG_PPC64
. = ALIGN(8);
+ __stf_entry_barrier_fixup : AT(ADDR(__stf_entry_barrier_fixup) - LOAD_OFFSET) {
+ __start___stf_entry_barrier_fixup = .;
+ *(__stf_entry_barrier_fixup)
+ __stop___stf_entry_barrier_fixup = .;
+ }
+
+ . = ALIGN(8);
+ __stf_exit_barrier_fixup : AT(ADDR(__stf_exit_barrier_fixup) - LOAD_OFFSET) {
+ __start___stf_exit_barrier_fixup = .;
+ *(__stf_exit_barrier_fixup)
+ __stop___stf_exit_barrier_fixup = .;
+ }
+
+ . = ALIGN(8);
__rfi_flush_fixup : AT(ADDR(__rfi_flush_fixup) - LOAD_OFFSET) {
__start___rfi_flush_fixup = .;
*(__rfi_flush_fixup)
@@ -170,10 +184,10 @@ SECTIONS
.init.data : AT(ADDR(.init.data) - LOAD_OFFSET) {
INIT_DATA
__vtop_table_begin = .;
- *(.vtop_fixup);
+ KEEP(*(.vtop_fixup));
__vtop_table_end = .;
__ptov_table_begin = .;
- *(.ptov_fixup);
+ KEEP(*(.ptov_fixup));
__ptov_table_end = .;
}
@@ -194,26 +208,26 @@ SECTIONS
. = ALIGN(8);
__ftr_fixup : AT(ADDR(__ftr_fixup) - LOAD_OFFSET) {
__start___ftr_fixup = .;
- *(__ftr_fixup)
+ KEEP(*(__ftr_fixup))
__stop___ftr_fixup = .;
}
. = ALIGN(8);
__mmu_ftr_fixup : AT(ADDR(__mmu_ftr_fixup) - LOAD_OFFSET) {
__start___mmu_ftr_fixup = .;
- *(__mmu_ftr_fixup)
+ KEEP(*(__mmu_ftr_fixup))
__stop___mmu_ftr_fixup = .;
}
. = ALIGN(8);
__lwsync_fixup : AT(ADDR(__lwsync_fixup) - LOAD_OFFSET) {
__start___lwsync_fixup = .;
- *(__lwsync_fixup)
+ KEEP(*(__lwsync_fixup))
__stop___lwsync_fixup = .;
}
#ifdef CONFIG_PPC64
. = ALIGN(8);
__fw_ftr_fixup : AT(ADDR(__fw_ftr_fixup) - LOAD_OFFSET) {
__start___fw_ftr_fixup = .;
- *(__fw_ftr_fixup)
+ KEEP(*(__fw_ftr_fixup))
__stop___fw_ftr_fixup = .;
}
#endif
@@ -226,7 +240,7 @@ SECTIONS
. = ALIGN(8);
.machine.desc : AT(ADDR(.machine.desc) - LOAD_OFFSET) {
__machine_desc_start = . ;
- *(.machine.desc)
+ KEEP(*(.machine.desc))
__machine_desc_end = . ;
}
#ifdef CONFIG_RELOCATABLE
@@ -274,7 +288,7 @@ SECTIONS
.data : AT(ADDR(.data) - LOAD_OFFSET) {
DATA_DATA
*(.data.rel*)
- *(.sdata)
+ *(SDATA_MAIN)
*(.sdata2)
*(.got.plt) *(.got)
*(.plt)
@@ -289,7 +303,7 @@ SECTIONS
.opd : AT(ADDR(.opd) - LOAD_OFFSET) {
__start_opd = .;
- *(.opd)
+ KEEP(*(.opd))
__end_opd = .;
}