aboutsummaryrefslogtreecommitdiffstats
path: root/arch/powerpc/kernel
diff options
context:
space:
mode:
Diffstat (limited to 'arch/powerpc/kernel')
-rw-r--r--arch/powerpc/kernel/Makefile18
-rw-r--r--arch/powerpc/kernel/align.c70
-rw-r--r--arch/powerpc/kernel/asm-offsets.c27
-rw-r--r--arch/powerpc/kernel/cpu_setup_44x.S1
-rw-r--r--arch/powerpc/kernel/cputable.c136
-rw-r--r--arch/powerpc/kernel/crash.c2
-rw-r--r--arch/powerpc/kernel/crash_dump.c7
-rw-r--r--arch/powerpc/kernel/dma_64.c45
-rw-r--r--arch/powerpc/kernel/entry_32.S291
-rw-r--r--arch/powerpc/kernel/entry_64.S82
-rw-r--r--arch/powerpc/kernel/fpu.S41
-rw-r--r--arch/powerpc/kernel/ftrace.c154
-rw-r--r--arch/powerpc/kernel/head_32.S6
-rw-r--r--arch/powerpc/kernel/head_40x.S24
-rw-r--r--arch/powerpc/kernel/head_44x.S295
-rw-r--r--arch/powerpc/kernel/head_64.S82
-rw-r--r--arch/powerpc/kernel/head_booke.h126
-rw-r--r--arch/powerpc/kernel/head_fsl_booke.S253
-rw-r--r--arch/powerpc/kernel/ibmebus.c16
-rw-r--r--arch/powerpc/kernel/idle.c2
-rw-r--r--arch/powerpc/kernel/idle_6xx.S2
-rw-r--r--arch/powerpc/kernel/idle_e500.S93
-rw-r--r--arch/powerpc/kernel/io.c3
-rw-r--r--arch/powerpc/kernel/iommu.c51
-rw-r--r--arch/powerpc/kernel/irq.c41
-rw-r--r--arch/powerpc/kernel/kgdb.c410
-rw-r--r--arch/powerpc/kernel/kprobes.c42
-rw-r--r--arch/powerpc/kernel/lparcfg.c392
-rw-r--r--arch/powerpc/kernel/machine_kexec.c2
-rw-r--r--arch/powerpc/kernel/machine_kexec_64.c6
-rw-r--r--arch/powerpc/kernel/misc.S5
-rw-r--r--arch/powerpc/kernel/misc_32.S2
-rw-r--r--arch/powerpc/kernel/misc_64.S33
-rw-r--r--arch/powerpc/kernel/module.c116
-rw-r--r--arch/powerpc/kernel/module_32.c72
-rw-r--r--arch/powerpc/kernel/module_64.c81
-rw-r--r--arch/powerpc/kernel/msi.c2
-rw-r--r--arch/powerpc/kernel/of_device.c48
-rw-r--r--arch/powerpc/kernel/pci-common.c1
-rw-r--r--arch/powerpc/kernel/ppc32.h1
-rw-r--r--arch/powerpc/kernel/ppc_ksyms.c8
-rw-r--r--arch/powerpc/kernel/process.c159
-rw-r--r--arch/powerpc/kernel/prom.c4
-rw-r--r--arch/powerpc/kernel/prom_init.c14
-rw-r--r--arch/powerpc/kernel/prom_parse.c44
-rw-r--r--arch/powerpc/kernel/ptrace.c186
-rw-r--r--arch/powerpc/kernel/ptrace32.c14
-rw-r--r--arch/powerpc/kernel/rtas-proc.c14
-rw-r--r--arch/powerpc/kernel/rtas.c8
-rw-r--r--arch/powerpc/kernel/rtas_flash.c6
-rw-r--r--arch/powerpc/kernel/rtas_pci.c4
-rw-r--r--arch/powerpc/kernel/setup-common.c2
-rw-r--r--arch/powerpc/kernel/setup_32.c55
-rw-r--r--arch/powerpc/kernel/setup_64.c2
-rw-r--r--arch/powerpc/kernel/signal.c18
-rw-r--r--arch/powerpc/kernel/signal.h10
-rw-r--r--arch/powerpc/kernel/signal_32.c184
-rw-r--r--arch/powerpc/kernel/signal_64.c101
-rw-r--r--arch/powerpc/kernel/smp.c238
-rw-r--r--arch/powerpc/kernel/softemu8xx.c4
-rw-r--r--arch/powerpc/kernel/stacktrace.c38
-rw-r--r--arch/powerpc/kernel/suspend.c1
-rw-r--r--arch/powerpc/kernel/syscalls.c3
-rw-r--r--arch/powerpc/kernel/sysfs.c18
-rw-r--r--arch/powerpc/kernel/tau_6xx.c4
-rw-r--r--arch/powerpc/kernel/time.c16
-rw-r--r--arch/powerpc/kernel/traps.c69
-rw-r--r--arch/powerpc/kernel/vdso.c10
-rw-r--r--arch/powerpc/kernel/vdso32/vdso32.lds.S5
-rw-r--r--arch/powerpc/kernel/vdso64/vdso64.lds.S11
-rw-r--r--arch/powerpc/kernel/vio.c1033
-rw-r--r--arch/powerpc/kernel/vmlinux.lds.S39
72 files changed, 4074 insertions, 1329 deletions
diff --git a/arch/powerpc/kernel/Makefile b/arch/powerpc/kernel/Makefile
index 2346d271fbfd..1a4094704b1f 100644
--- a/arch/powerpc/kernel/Makefile
+++ b/arch/powerpc/kernel/Makefile
@@ -12,6 +12,18 @@ CFLAGS_prom_init.o += -fPIC
CFLAGS_btext.o += -fPIC
endif
+ifdef CONFIG_FTRACE
+# Do not trace early boot code
+CFLAGS_REMOVE_cputable.o = -pg
+CFLAGS_REMOVE_prom_init.o = -pg
+
+ifdef CONFIG_DYNAMIC_FTRACE
+# dynamic ftrace setup.
+CFLAGS_REMOVE_ftrace.o = -pg
+endif
+
+endif
+
obj-y := cputable.o ptrace.o syscalls.o \
irq.o align.o signal_32.o pmc.o vdso.o \
init_task.o process.o systbl.o idle.o \
@@ -38,12 +50,13 @@ obj-$(CONFIG_IBMVIO) += vio.o
obj-$(CONFIG_IBMEBUS) += ibmebus.o
obj-$(CONFIG_GENERIC_TBSYNC) += smp-tbsync.o
obj-$(CONFIG_CRASH_DUMP) += crash_dump.o
+obj-$(CONFIG_E500) += idle_e500.o
obj-$(CONFIG_6xx) += idle_6xx.o l2cr_6xx.o cpu_setup_6xx.o
obj-$(CONFIG_TAU) += tau_6xx.o
obj-$(CONFIG_HIBERNATION) += swsusp.o suspend.o \
swsusp_$(CONFIG_WORD_SIZE).o
obj64-$(CONFIG_HIBERNATION) += swsusp_asm64.o
-obj-$(CONFIG_MODULES) += module_$(CONFIG_WORD_SIZE).o
+obj-$(CONFIG_MODULES) += module.o module_$(CONFIG_WORD_SIZE).o
obj-$(CONFIG_44x) += cpu_setup_44x.o
ifeq ($(CONFIG_PPC_MERGE),y)
@@ -61,6 +74,7 @@ obj-y += time.o prom.o traps.o setup-common.o \
misc_$(CONFIG_WORD_SIZE).o
obj-$(CONFIG_PPC32) += entry_32.o setup_32.o
obj-$(CONFIG_PPC64) += dma_64.o iommu.o
+obj-$(CONFIG_KGDB) += kgdb.o
obj-$(CONFIG_PPC_MULTIPLATFORM) += prom_init.o
obj-$(CONFIG_MODULES) += ppc_ksyms.o
obj-$(CONFIG_BOOTX_TEXT) += btext.o
@@ -78,6 +92,8 @@ obj-$(CONFIG_KEXEC) += machine_kexec.o crash.o \
obj-$(CONFIG_AUDIT) += audit.o
obj64-$(CONFIG_AUDIT) += compat_audit.o
+obj-$(CONFIG_DYNAMIC_FTRACE) += ftrace.o
+
obj-$(CONFIG_8XX_MINIMAL_FPEMU) += softemu8xx.o
ifneq ($(CONFIG_PPC_INDIRECT_IO),y)
diff --git a/arch/powerpc/kernel/align.c b/arch/powerpc/kernel/align.c
index e06f75daeba3..367129789cc0 100644
--- a/arch/powerpc/kernel/align.c
+++ b/arch/powerpc/kernel/align.c
@@ -48,6 +48,7 @@ struct aligninfo {
#define HARD 0x80 /* string, stwcx. */
#define E4 0x40 /* SPE endianness is word */
#define E8 0x80 /* SPE endianness is double word */
+#define SPLT 0x80 /* VSX SPLAT load */
/* DSISR bits reported for a DCBZ instruction: */
#define DCBZ 0x5f /* 8xx/82xx dcbz faults when cache not enabled */
@@ -363,10 +364,10 @@ static int emulate_multiple(struct pt_regs *regs, unsigned char __user *addr,
* Only POWER6 has these instructions, and it does true little-endian,
* so we don't need the address swizzling.
*/
-static int emulate_fp_pair(struct pt_regs *regs, unsigned char __user *addr,
- unsigned int reg, unsigned int flags)
+static int emulate_fp_pair(unsigned char __user *addr, unsigned int reg,
+ unsigned int flags)
{
- char *ptr = (char *) &current->thread.fpr[reg];
+ char *ptr = (char *) &current->thread.TS_FPR(reg);
int i, ret;
if (!(flags & F))
@@ -637,6 +638,36 @@ static int emulate_spe(struct pt_regs *regs, unsigned int reg,
}
#endif /* CONFIG_SPE */
+#ifdef CONFIG_VSX
+/*
+ * Emulate VSX instructions...
+ */
+static int emulate_vsx(unsigned char __user *addr, unsigned int reg,
+ unsigned int areg, struct pt_regs *regs,
+ unsigned int flags, unsigned int length)
+{
+ char *ptr = (char *) &current->thread.TS_FPR(reg);
+ int ret;
+
+ flush_vsx_to_thread(current);
+
+ if (flags & ST)
+ ret = __copy_to_user(addr, ptr, length);
+ else {
+ if (flags & SPLT){
+ ret = __copy_from_user(ptr, addr, length);
+ ptr += length;
+ }
+ ret |= __copy_from_user(ptr, addr, length);
+ }
+ if (flags & U)
+ regs->gpr[areg] = regs->dar;
+ if (ret)
+ return -EFAULT;
+ return 1;
+}
+#endif
+
/*
* Called on alignment exception. Attempts to fixup
*
@@ -647,7 +678,7 @@ static int emulate_spe(struct pt_regs *regs, unsigned int reg,
int fix_alignment(struct pt_regs *regs)
{
- unsigned int instr, nb, flags;
+ unsigned int instr, nb, flags, instruction = 0;
unsigned int reg, areg;
unsigned int dsisr;
unsigned char __user *addr;
@@ -689,6 +720,7 @@ int fix_alignment(struct pt_regs *regs)
if (cpu_has_feature(CPU_FTR_REAL_LE) && (regs->msr & MSR_LE))
instr = cpu_to_le32(instr);
dsisr = make_dsisr(instr);
+ instruction = instr;
}
/* extract the operation and registers from the dsisr */
@@ -728,6 +760,30 @@ int fix_alignment(struct pt_regs *regs)
/* DAR has the operand effective address */
addr = (unsigned char __user *)regs->dar;
+#ifdef CONFIG_VSX
+ if ((instruction & 0xfc00003e) == 0x7c000018) {
+ /* Additional register addressing bit (64 VSX vs 32 FPR/GPR */
+ reg |= (instruction & 0x1) << 5;
+ /* Simple inline decoder instead of a table */
+ if (instruction & 0x200)
+ nb = 16;
+ else if (instruction & 0x080)
+ nb = 8;
+ else
+ nb = 4;
+ flags = 0;
+ if (instruction & 0x100)
+ flags |= ST;
+ if (instruction & 0x040)
+ flags |= U;
+ /* splat load needs a special decoder */
+ if ((instruction & 0x400) == 0){
+ flags |= SPLT;
+ nb = 8;
+ }
+ return emulate_vsx(addr, reg, areg, regs, flags, nb);
+ }
+#endif
/* A size of 0 indicates an instruction we don't support, with
* the exception of DCBZ which is handled as a special case here
*/
@@ -759,7 +815,7 @@ int fix_alignment(struct pt_regs *regs)
/* Special case for 16-byte FP loads and stores */
if (nb == 16)
- return emulate_fp_pair(regs, addr, reg, flags);
+ return emulate_fp_pair(addr, reg, flags);
/* If we are loading, get the data from user space, else
* get it from register values
@@ -784,7 +840,7 @@ int fix_alignment(struct pt_regs *regs)
return -EFAULT;
}
} else if (flags & F) {
- data.dd = current->thread.fpr[reg];
+ data.dd = current->thread.TS_FPR(reg);
if (flags & S) {
/* Single-precision FP store requires conversion... */
#ifdef CONFIG_PPC_FPU
@@ -862,7 +918,7 @@ int fix_alignment(struct pt_regs *regs)
if (unlikely(ret))
return -EFAULT;
} else if (flags & F)
- current->thread.fpr[reg] = data.dd;
+ current->thread.TS_FPR(reg) = data.dd;
else
regs->gpr[reg] = data.ll;
diff --git a/arch/powerpc/kernel/asm-offsets.c b/arch/powerpc/kernel/asm-offsets.c
index ec9228d687b0..92768d3006f7 100644
--- a/arch/powerpc/kernel/asm-offsets.c
+++ b/arch/powerpc/kernel/asm-offsets.c
@@ -52,6 +52,10 @@
#include <asm/iseries/alpaca.h>
#endif
+#if defined(CONFIG_BOOKE) || defined(CONFIG_40x)
+#include "head_booke.h"
+#endif
+
int main(void)
{
DEFINE(THREAD, offsetof(struct task_struct, thread));
@@ -74,6 +78,10 @@ int main(void)
DEFINE(THREAD_VSCR, offsetof(struct thread_struct, vscr));
DEFINE(THREAD_USED_VR, offsetof(struct thread_struct, used_vr));
#endif /* CONFIG_ALTIVEC */
+#ifdef CONFIG_VSX
+ DEFINE(THREAD_VSR0, offsetof(struct thread_struct, fpr));
+ DEFINE(THREAD_USED_VSR, offsetof(struct thread_struct, used_vsr));
+#endif /* CONFIG_VSX */
#ifdef CONFIG_PPC64
DEFINE(KSP_VSID, offsetof(struct thread_struct, ksp_vsid));
#else /* CONFIG_PPC64 */
@@ -242,6 +250,25 @@ int main(void)
DEFINE(_SRR1, STACK_FRAME_OVERHEAD+sizeof(struct pt_regs)+8);
#endif /* CONFIG_PPC64 */
+#if defined(CONFIG_BOOKE) || defined(CONFIG_40x)
+ DEFINE(EXC_LVL_SIZE, STACK_EXC_LVL_FRAME_SIZE);
+ DEFINE(MAS0, STACK_INT_FRAME_SIZE+offsetof(struct exception_regs, mas0));
+ /* we overload MMUCR for 44x on MAS0 since they are mutually exclusive */
+ DEFINE(MMUCR, STACK_INT_FRAME_SIZE+offsetof(struct exception_regs, mas0));
+ DEFINE(MAS1, STACK_INT_FRAME_SIZE+offsetof(struct exception_regs, mas1));
+ DEFINE(MAS2, STACK_INT_FRAME_SIZE+offsetof(struct exception_regs, mas2));
+ DEFINE(MAS3, STACK_INT_FRAME_SIZE+offsetof(struct exception_regs, mas3));
+ DEFINE(MAS6, STACK_INT_FRAME_SIZE+offsetof(struct exception_regs, mas6));
+ DEFINE(MAS7, STACK_INT_FRAME_SIZE+offsetof(struct exception_regs, mas7));
+ DEFINE(_SRR0, STACK_INT_FRAME_SIZE+offsetof(struct exception_regs, srr0));
+ DEFINE(_SRR1, STACK_INT_FRAME_SIZE+offsetof(struct exception_regs, srr1));
+ DEFINE(_CSRR0, STACK_INT_FRAME_SIZE+offsetof(struct exception_regs, csrr0));
+ DEFINE(_CSRR1, STACK_INT_FRAME_SIZE+offsetof(struct exception_regs, csrr1));
+ DEFINE(_DSRR0, STACK_INT_FRAME_SIZE+offsetof(struct exception_regs, dsrr0));
+ DEFINE(_DSRR1, STACK_INT_FRAME_SIZE+offsetof(struct exception_regs, dsrr1));
+ DEFINE(SAVED_KSP_LIMIT, STACK_INT_FRAME_SIZE+offsetof(struct exception_regs, saved_ksp_limit));
+#endif
+
DEFINE(CLONE_VM, CLONE_VM);
DEFINE(CLONE_UNTRACED, CLONE_UNTRACED);
diff --git a/arch/powerpc/kernel/cpu_setup_44x.S b/arch/powerpc/kernel/cpu_setup_44x.S
index e3623e3e3451..5465e8de0e61 100644
--- a/arch/powerpc/kernel/cpu_setup_44x.S
+++ b/arch/powerpc/kernel/cpu_setup_44x.S
@@ -33,6 +33,7 @@ _GLOBAL(__setup_cpu_440grx)
mtlr r4
blr
_GLOBAL(__setup_cpu_460ex)
+_GLOBAL(__setup_cpu_460gt)
b __init_fpu_44x
_GLOBAL(__setup_cpu_440gx)
_GLOBAL(__setup_cpu_440spe)
diff --git a/arch/powerpc/kernel/cputable.c b/arch/powerpc/kernel/cputable.c
index e44d5530f0a6..25c273c761d1 100644
--- a/arch/powerpc/kernel/cputable.c
+++ b/arch/powerpc/kernel/cputable.c
@@ -23,6 +23,9 @@
struct cpu_spec* cur_cpu_spec = NULL;
EXPORT_SYMBOL(cur_cpu_spec);
+/* The platform string corresponding to the real PVR */
+const char *powerpc_base_platform;
+
/* NOTE:
* Unlike ppc32, ppc64 will only call this once for the boot CPU, it's
* the responsibility of the appropriate CPU save/restore functions to
@@ -37,6 +40,7 @@ extern void __setup_cpu_440gx(unsigned long offset, struct cpu_spec* spec);
extern void __setup_cpu_440grx(unsigned long offset, struct cpu_spec* spec);
extern void __setup_cpu_440spe(unsigned long offset, struct cpu_spec* spec);
extern void __setup_cpu_460ex(unsigned long offset, struct cpu_spec* spec);
+extern void __setup_cpu_460gt(unsigned long offset, struct cpu_spec* spec);
extern void __setup_cpu_603(unsigned long offset, struct cpu_spec* spec);
extern void __setup_cpu_604(unsigned long offset, struct cpu_spec* spec);
extern void __setup_cpu_750(unsigned long offset, struct cpu_spec* spec);
@@ -52,6 +56,8 @@ extern void __setup_cpu_ppc970MP(unsigned long offset, struct cpu_spec* spec);
extern void __setup_cpu_pa6t(unsigned long offset, struct cpu_spec* spec);
extern void __restore_cpu_pa6t(void);
extern void __restore_cpu_ppc970(void);
+extern void __setup_cpu_power7(unsigned long offset, struct cpu_spec* spec);
+extern void __restore_cpu_power7(void);
#endif /* CONFIG_PPC64 */
/* This table only contains "desktop" CPUs, it need to be filled with embedded
@@ -67,7 +73,12 @@ extern void __restore_cpu_ppc970(void);
PPC_FEATURE_SMT | PPC_FEATURE_ICACHE_SNOOP)
#define COMMON_USER_POWER6 (COMMON_USER_PPC64 | PPC_FEATURE_ARCH_2_05 |\
PPC_FEATURE_SMT | PPC_FEATURE_ICACHE_SNOOP | \
- PPC_FEATURE_TRUE_LE)
+ PPC_FEATURE_TRUE_LE | \
+ PPC_FEATURE_PSERIES_PERFMON_COMPAT)
+#define COMMON_USER_POWER7 (COMMON_USER_PPC64 | PPC_FEATURE_ARCH_2_06 |\
+ PPC_FEATURE_SMT | PPC_FEATURE_ICACHE_SNOOP | \
+ PPC_FEATURE_TRUE_LE | \
+ PPC_FEATURE_PSERIES_PERFMON_COMPAT)
#define COMMON_USER_PA6T (COMMON_USER_PPC64 | PPC_FEATURE_PA6T |\
PPC_FEATURE_TRUE_LE | \
PPC_FEATURE_HAS_ALTIVEC_COMP)
@@ -347,6 +358,7 @@ static struct cpu_spec __initdata cpu_specs[] = {
.icache_bsize = 128,
.dcache_bsize = 128,
.machine_check = machine_check_generic,
+ .oprofile_cpu_type = "ppc64/compat-power5+",
.platform = "power5+",
},
{ /* Power6 */
@@ -378,8 +390,41 @@ static struct cpu_spec __initdata cpu_specs[] = {
.icache_bsize = 128,
.dcache_bsize = 128,
.machine_check = machine_check_generic,
+ .oprofile_cpu_type = "ppc64/compat-power6",
.platform = "power6",
},
+ { /* 2.06-compliant processor, i.e. Power7 "architected" mode */
+ .pvr_mask = 0xffffffff,
+ .pvr_value = 0x0f000003,
+ .cpu_name = "POWER7 (architected)",
+ .cpu_features = CPU_FTRS_POWER7,
+ .cpu_user_features = COMMON_USER_POWER7,
+ .icache_bsize = 128,
+ .dcache_bsize = 128,
+ .machine_check = machine_check_generic,
+ .oprofile_cpu_type = "ppc64/compat-power7",
+ .platform = "power7",
+ },
+ { /* Power7 */
+ .pvr_mask = 0xffff0000,
+ .pvr_value = 0x003f0000,
+ .cpu_name = "POWER7 (raw)",
+ .cpu_features = CPU_FTRS_POWER7,
+ .cpu_user_features = COMMON_USER_POWER7,
+ .icache_bsize = 128,
+ .dcache_bsize = 128,
+ .num_pmcs = 6,
+ .pmc_type = PPC_PMC_IBM,
+ .cpu_setup = __setup_cpu_power7,
+ .cpu_restore = __restore_cpu_power7,
+ .oprofile_cpu_type = "ppc64/power7",
+ .oprofile_type = PPC_OPROFILE_POWER4,
+ .oprofile_mmcra_sihv = POWER6_MMCRA_SIHV,
+ .oprofile_mmcra_sipr = POWER6_MMCRA_SIPR,
+ .oprofile_mmcra_clear = POWER6_MMCRA_THRM |
+ POWER6_MMCRA_OTHER,
+ .platform = "power7",
+ },
{ /* Cell Broadband Engine */
.pvr_mask = 0xffff0000,
.pvr_value = 0x00700000,
@@ -1410,6 +1455,16 @@ static struct cpu_spec __initdata cpu_specs[] = {
.machine_check = machine_check_440A,
.platform = "ppc440",
},
+ { /* 440 in Xilinx Virtex-5 FXT */
+ .pvr_mask = 0xfffffff0,
+ .pvr_value = 0x7ff21910,
+ .cpu_name = "440 in Virtex-5 FXT",
+ .cpu_features = CPU_FTRS_44X,
+ .cpu_user_features = COMMON_USER_BOOKE,
+ .icache_bsize = 32,
+ .dcache_bsize = 32,
+ .platform = "ppc440",
+ },
{ /* 460EX */
.pvr_mask = 0xffff0002,
.pvr_value = 0x13020002,
@@ -1427,9 +1482,10 @@ static struct cpu_spec __initdata cpu_specs[] = {
.pvr_value = 0x13020000,
.cpu_name = "460GT",
.cpu_features = CPU_FTRS_44X,
- .cpu_user_features = COMMON_USER_BOOKE,
+ .cpu_user_features = COMMON_USER_BOOKE | PPC_FEATURE_HAS_FPU,
.icache_bsize = 32,
.dcache_bsize = 32,
+ .cpu_setup = __setup_cpu_460gt,
.machine_check = machine_check_440A,
.platform = "ppc440",
},
@@ -1491,7 +1547,6 @@ static struct cpu_spec __initdata cpu_specs[] = {
.pvr_mask = 0xffff0000,
.pvr_value = 0x80200000,
.cpu_name = "e500",
- /* xxx - galak: add CPU_FTR_MAYBE_CAN_DOZE */
.cpu_features = CPU_FTRS_E500,
.cpu_user_features = COMMON_USER_BOOKE |
PPC_FEATURE_HAS_SPE_COMP |
@@ -1508,7 +1563,6 @@ static struct cpu_spec __initdata cpu_specs[] = {
.pvr_mask = 0xffff0000,
.pvr_value = 0x80210000,
.cpu_name = "e500v2",
- /* xxx - galak: add CPU_FTR_MAYBE_CAN_DOZE */
.cpu_features = CPU_FTRS_E500_2,
.cpu_user_features = COMMON_USER_BOOKE |
PPC_FEATURE_HAS_SPE_COMP |
@@ -1522,6 +1576,20 @@ static struct cpu_spec __initdata cpu_specs[] = {
.machine_check = machine_check_e500,
.platform = "ppc8548",
},
+ { /* e500mc */
+ .pvr_mask = 0xffff0000,
+ .pvr_value = 0x80230000,
+ .cpu_name = "e500mc",
+ .cpu_features = CPU_FTRS_E500MC,
+ .cpu_user_features = COMMON_USER_BOOKE | PPC_FEATURE_HAS_FPU,
+ .icache_bsize = 64,
+ .dcache_bsize = 64,
+ .num_pmcs = 4,
+ .oprofile_cpu_type = "ppc/e500", /* xxx - galak, e500mc? */
+ .oprofile_type = PPC_OPROFILE_FSL_EMB,
+ .machine_check = machine_check_e500,
+ .platform = "ppce500mc",
+ },
{ /* default match */
.pvr_mask = 0x00000000,
.pvr_value = 0x00000000,
@@ -1567,9 +1635,34 @@ struct cpu_spec * __init identify_cpu(unsigned long offset, unsigned int pvr)
t->cpu_setup = s->cpu_setup;
t->cpu_restore = s->cpu_restore;
t->platform = s->platform;
+ /*
+ * If we have passed through this logic once
+ * before and have pulled the default case
+ * because the real PVR was not found inside
+ * cpu_specs[], then we are possibly running in
+ * compatibility mode. In that case, let the
+ * oprofiler know which set of compatibility
+ * counters to pull from by making sure the
+ * oprofile_cpu_type string is set to that of
+ * compatibility mode. If the oprofile_cpu_type
+ * already has a value, then we are possibly
+ * overriding a real PVR with a logical one, and,
+ * in that case, keep the current value for
+ * oprofile_cpu_type.
+ */
+ if (t->oprofile_cpu_type == NULL)
+ t->oprofile_cpu_type = s->oprofile_cpu_type;
} else
*t = *s;
*PTRRELOC(&cur_cpu_spec) = &the_cpu_spec;
+
+ /*
+ * Set the base platform string once; assumes
+ * we're called with real pvr first.
+ */
+ if (*PTRRELOC(&powerpc_base_platform) == NULL)
+ *PTRRELOC(&powerpc_base_platform) = t->platform;
+
#if defined(CONFIG_PPC64) || defined(CONFIG_BOOKE)
/* ppc64 and booke expect identify_cpu to also call
* setup_cpu for that processor. I will consolidate
@@ -1587,38 +1680,3 @@ struct cpu_spec * __init identify_cpu(unsigned long offset, unsigned int pvr)
BUG();
return NULL;
}
-
-void do_feature_fixups(unsigned long value, void *fixup_start, void *fixup_end)
-{
- struct fixup_entry {
- unsigned long mask;
- unsigned long value;
- long start_off;
- long end_off;
- } *fcur, *fend;
-
- fcur = fixup_start;
- fend = fixup_end;
-
- for (; fcur < fend; fcur++) {
- unsigned int *pstart, *pend, *p;
-
- if ((value & fcur->mask) == fcur->value)
- continue;
-
- /* These PTRRELOCs will disappear once the new scheme for
- * modules and vdso is implemented
- */
- pstart = ((unsigned int *)fcur) + (fcur->start_off / 4);
- pend = ((unsigned int *)fcur) + (fcur->end_off / 4);
-
- for (p = pstart; p < pend; p++) {
- *p = 0x60000000u;
- asm volatile ("dcbst 0, %0" : : "r" (p));
- }
- asm volatile ("sync" : : : "memory");
- for (p = pstart; p < pend; p++)
- asm volatile ("icbi 0,%0" : : "r" (p));
- asm volatile ("sync; isync" : : : "memory");
- }
-}
diff --git a/arch/powerpc/kernel/crash.c b/arch/powerpc/kernel/crash.c
index eae401de3f76..0a8439aafdd1 100644
--- a/arch/powerpc/kernel/crash.c
+++ b/arch/powerpc/kernel/crash.c
@@ -48,7 +48,7 @@ int crashing_cpu = -1;
static cpumask_t cpus_in_crash = CPU_MASK_NONE;
cpumask_t cpus_in_sr = CPU_MASK_NONE;
-#define CRASH_HANDLER_MAX 1
+#define CRASH_HANDLER_MAX 2
/* NULL terminated list of shutdown handles */
static crash_shutdown_t crash_shutdown_handles[CRASH_HANDLER_MAX+1];
static DEFINE_SPINLOCK(crash_handlers_lock);
diff --git a/arch/powerpc/kernel/crash_dump.c b/arch/powerpc/kernel/crash_dump.c
index 9ee3c5278db0..e0debcca0bfa 100644
--- a/arch/powerpc/kernel/crash_dump.c
+++ b/arch/powerpc/kernel/crash_dump.c
@@ -14,6 +14,7 @@
#include <linux/crash_dump.h>
#include <linux/bootmem.h>
#include <linux/lmb.h>
+#include <asm/code-patching.h>
#include <asm/kdump.h>
#include <asm/prom.h>
#include <asm/firmware.h>
@@ -33,6 +34,8 @@ void __init reserve_kdump_trampoline(void)
static void __init create_trampoline(unsigned long addr)
{
+ unsigned int *p = (unsigned int *)addr;
+
/* The maximum range of a single instruction branch, is the current
* instruction's address + (32 MB - 4) bytes. For the trampoline we
* need to branch to current address + 32 MB. So we insert a nop at
@@ -41,8 +44,8 @@ static void __init create_trampoline(unsigned long addr)
* branch to "addr" we jump to ("addr" + 32 MB). Although it requires
* two instructions it doesn't require any registers.
*/
- create_instruction(addr, 0x60000000); /* nop */
- create_branch(addr + 4, addr + PHYSICAL_START, 0);
+ patch_instruction(p, PPC_NOP_INSTR);
+ patch_branch(++p, addr + PHYSICAL_START, 0);
}
void __init setup_kdump_trampoline(void)
diff --git a/arch/powerpc/kernel/dma_64.c b/arch/powerpc/kernel/dma_64.c
index 3a317cb0636a..ae5708e3a312 100644
--- a/arch/powerpc/kernel/dma_64.c
+++ b/arch/powerpc/kernel/dma_64.c
@@ -15,15 +15,6 @@
* Generic iommu implementation
*/
-static inline unsigned long device_to_mask(struct device *dev)
-{
- if (dev->dma_mask && *dev->dma_mask)
- return *dev->dma_mask;
- /* Assume devices without mask can take 32 bit addresses */
- return 0xfffffffful;
-}
-
-
/* Allocates a contiguous real buffer and creates mappings over it.
* Returns the virtual address of the buffer and sets dma_handle
* to the dma address (mapping) of the first page.
@@ -50,32 +41,38 @@ static void dma_iommu_free_coherent(struct device *dev, size_t size,
*/
static dma_addr_t dma_iommu_map_single(struct device *dev, void *vaddr,
size_t size,
- enum dma_data_direction direction)
+ enum dma_data_direction direction,
+ struct dma_attrs *attrs)
{
return iommu_map_single(dev, dev->archdata.dma_data, vaddr, size,
- device_to_mask(dev), direction);
+ device_to_mask(dev), direction, attrs);
}
static void dma_iommu_unmap_single(struct device *dev, dma_addr_t dma_handle,
size_t size,
- enum dma_data_direction direction)
+ enum dma_data_direction direction,
+ struct dma_attrs *attrs)
{
- iommu_unmap_single(dev->archdata.dma_data, dma_handle, size, direction);
+ iommu_unmap_single(dev->archdata.dma_data, dma_handle, size, direction,
+ attrs);
}
static int dma_iommu_map_sg(struct device *dev, struct scatterlist *sglist,
- int nelems, enum dma_data_direction direction)
+ int nelems, enum dma_data_direction direction,
+ struct dma_attrs *attrs)
{
- return iommu_map_sg(dev, sglist, nelems,
- device_to_mask(dev), direction);
+ return iommu_map_sg(dev, dev->archdata.dma_data, sglist, nelems,
+ device_to_mask(dev), direction, attrs);
}
static void dma_iommu_unmap_sg(struct device *dev, struct scatterlist *sglist,
- int nelems, enum dma_data_direction direction)
+ int nelems, enum dma_data_direction direction,
+ struct dma_attrs *attrs)
{
- iommu_unmap_sg(dev->archdata.dma_data, sglist, nelems, direction);
+ iommu_unmap_sg(dev->archdata.dma_data, sglist, nelems, direction,
+ attrs);
}
/* We support DMA to/from any memory page via the iommu */
@@ -148,19 +145,22 @@ static void dma_direct_free_coherent(struct device *dev, size_t size,
static dma_addr_t dma_direct_map_single(struct device *dev, void *ptr,
size_t size,
- enum dma_data_direction direction)
+ enum dma_data_direction direction,
+ struct dma_attrs *attrs)
{
return virt_to_abs(ptr) + get_dma_direct_offset(dev);
}
static void dma_direct_unmap_single(struct device *dev, dma_addr_t dma_addr,
size_t size,
- enum dma_data_direction direction)
+ enum dma_data_direction direction,
+ struct dma_attrs *attrs)
{
}
static int dma_direct_map_sg(struct device *dev, struct scatterlist *sgl,
- int nents, enum dma_data_direction direction)
+ int nents, enum dma_data_direction direction,
+ struct dma_attrs *attrs)
{
struct scatterlist *sg;
int i;
@@ -174,7 +174,8 @@ static int dma_direct_map_sg(struct device *dev, struct scatterlist *sgl,
}
static void dma_direct_unmap_sg(struct device *dev, struct scatterlist *sg,
- int nents, enum dma_data_direction direction)
+ int nents, enum dma_data_direction direction,
+ struct dma_attrs *attrs)
{
}
diff --git a/arch/powerpc/kernel/entry_32.S b/arch/powerpc/kernel/entry_32.S
index 0c8614d9875c..81c8324a4a3c 100644
--- a/arch/powerpc/kernel/entry_32.S
+++ b/arch/powerpc/kernel/entry_32.S
@@ -30,6 +30,7 @@
#include <asm/ppc_asm.h>
#include <asm/asm-offsets.h>
#include <asm/unistd.h>
+#include <asm/ftrace.h>
#undef SHOW_SYSCALLS
#undef SHOW_SYSCALLS_TASK
@@ -44,29 +45,54 @@
#endif
#ifdef CONFIG_BOOKE
-#include "head_booke.h"
-#define TRANSFER_TO_HANDLER_EXC_LEVEL(exc_level) \
- mtspr exc_level##_SPRG,r8; \
- BOOKE_LOAD_EXC_LEVEL_STACK(exc_level); \
- lwz r0,GPR10-INT_FRAME_SIZE(r8); \
- stw r0,GPR10(r11); \
- lwz r0,GPR11-INT_FRAME_SIZE(r8); \
- stw r0,GPR11(r11); \
- mfspr r8,exc_level##_SPRG
-
.globl mcheck_transfer_to_handler
mcheck_transfer_to_handler:
- TRANSFER_TO_HANDLER_EXC_LEVEL(MCHECK)
- b transfer_to_handler_full
+ mfspr r0,SPRN_DSRR0
+ stw r0,_DSRR0(r11)
+ mfspr r0,SPRN_DSRR1
+ stw r0,_DSRR1(r11)
+ /* fall through */
.globl debug_transfer_to_handler
debug_transfer_to_handler:
- TRANSFER_TO_HANDLER_EXC_LEVEL(DEBUG)
- b transfer_to_handler_full
+ mfspr r0,SPRN_CSRR0
+ stw r0,_CSRR0(r11)
+ mfspr r0,SPRN_CSRR1
+ stw r0,_CSRR1(r11)
+ /* fall through */
.globl crit_transfer_to_handler
crit_transfer_to_handler:
- TRANSFER_TO_HANDLER_EXC_LEVEL(CRIT)
+#ifdef CONFIG_FSL_BOOKE
+ mfspr r0,SPRN_MAS0
+ stw r0,MAS0(r11)
+ mfspr r0,SPRN_MAS1
+ stw r0,MAS1(r11)
+ mfspr r0,SPRN_MAS2
+ stw r0,MAS2(r11)
+ mfspr r0,SPRN_MAS3
+ stw r0,MAS3(r11)
+ mfspr r0,SPRN_MAS6
+ stw r0,MAS6(r11)
+#ifdef CONFIG_PHYS_64BIT
+ mfspr r0,SPRN_MAS7
+ stw r0,MAS7(r11)
+#endif /* CONFIG_PHYS_64BIT */
+#endif /* CONFIG_FSL_BOOKE */
+#ifdef CONFIG_44x
+ mfspr r0,SPRN_MMUCR
+ stw r0,MMUCR(r11)
+#endif
+ mfspr r0,SPRN_SRR0
+ stw r0,_SRR0(r11)
+ mfspr r0,SPRN_SRR1
+ stw r0,_SRR1(r11)
+
+ mfspr r8,SPRN_SPRG3
+ lwz r0,KSP_LIMIT(r8)
+ stw r0,SAVED_KSP_LIMIT(r11)
+ rlwimi r0,r1,0,0,(31-THREAD_SHIFT)
+ stw r0,KSP_LIMIT(r8)
/* fall through */
#endif
@@ -77,6 +103,16 @@ crit_transfer_to_handler:
stw r0,GPR10(r11)
lwz r0,crit_r11@l(0)
stw r0,GPR11(r11)
+ mfspr r0,SPRN_SRR0
+ stw r0,crit_srr0@l(0)
+ mfspr r0,SPRN_SRR1
+ stw r0,crit_srr1@l(0)
+
+ mfspr r8,SPRN_SPRG3
+ lwz r0,KSP_LIMIT(r8)
+ stw r0,saved_ksp_limit@l(0)
+ rlwimi r0,r1,0,0,(31-THREAD_SHIFT)
+ stw r0,KSP_LIMIT(r8)
/* fall through */
#endif
@@ -112,7 +148,7 @@ transfer_to_handler:
/* Check to see if the dbcr0 register is set up to debug. Use the
internal debug mode bit to do this. */
lwz r12,THREAD_DBCR0(r12)
- andis. r12,r12,DBCR0_IDM@h
+ andis. r12,r12,(DBCR0_IDM | DBSR_DAC1R | DBSR_DAC1W)@h
beq+ 3f
/* From user and task is ptraced - load up global dbcr0 */
li r12,-1 /* clear all pending debug events */
@@ -141,13 +177,14 @@ transfer_to_handler:
cmplw r1,r9 /* if r1 <= ksp_limit */
ble- stack_ovf /* then the kernel stack overflowed */
5:
-#ifdef CONFIG_6xx
+#if defined(CONFIG_6xx) || defined(CONFIG_E500)
rlwinm r9,r1,0,0,31-THREAD_SHIFT
tophys(r9,r9) /* check local flags */
lwz r12,TI_LOCAL_FLAGS(r9)
mtcrf 0x01,r12
bt- 31-TLF_NAPPING,4f
-#endif /* CONFIG_6xx */
+ bt- 31-TLF_SLEEPING,7f
+#endif /* CONFIG_6xx || CONFIG_E500 */
.globl transfer_to_handler_cont
transfer_to_handler_cont:
3:
@@ -160,10 +197,17 @@ transfer_to_handler_cont:
SYNC
RFI /* jump to handler, enable MMU */
-#ifdef CONFIG_6xx
+#if defined (CONFIG_6xx) || defined(CONFIG_E500)
4: rlwinm r12,r12,0,~_TLF_NAPPING
stw r12,TI_LOCAL_FLAGS(r9)
- b power_save_6xx_restore
+ b power_save_ppc32_restore
+
+7: rlwinm r12,r12,0,~_TLF_SLEEPING
+ stw r12,TI_LOCAL_FLAGS(r9)
+ lwz r9,_MSR(r11) /* if sleeping, clear MSR.EE */
+ rlwinm r9,r9,0,~MSR_EE
+ lwz r12,_LINK(r11) /* and return to address in LR */
+ b fast_exception_return
#endif
/*
@@ -248,7 +292,7 @@ syscall_exit_cont:
/* If the process has its own DBCR0 value, load it up. The internal
debug mode bit tells us that dbcr0 should be loaded. */
lwz r0,THREAD+THREAD_DBCR0(r2)
- andis. r10,r0,DBCR0_IDM@h
+ andis. r10,r0,(DBCR0_IDM | DBSR_DAC1R | DBSR_DAC1W)@h
bnel- load_dbcr0
#endif
#ifdef CONFIG_44x
@@ -668,7 +712,7 @@ user_exc_return: /* r10 contains MSR_KERNEL here */
/* Check current_thread_info()->flags */
rlwinm r9,r1,0,0,(31-THREAD_SHIFT)
lwz r9,TI_FLAGS(r9)
- andi. r0,r9,(_TIF_SIGPENDING|_TIF_RESTORE_SIGMASK|_TIF_NEED_RESCHED)
+ andi. r0,r9,_TIF_USER_WORK_MASK
bne do_work
restore_user:
@@ -676,7 +720,7 @@ restore_user:
/* Check whether this process has its own DBCR0 value. The internal
debug mode bit tells us that dbcr0 should be loaded. */
lwz r0,THREAD+THREAD_DBCR0(r2)
- andis. r10,r0,DBCR0_IDM@h
+ andis. r10,r0,(DBCR0_IDM | DBSR_DAC1R | DBSR_DAC1W)@h
bnel- load_dbcr0
#endif
@@ -859,17 +903,90 @@ exc_exit_restart_end:
exc_lvl_rfi; \
b .; /* prevent prefetch past exc_lvl_rfi */
+#define RESTORE_xSRR(exc_lvl_srr0, exc_lvl_srr1) \
+ lwz r9,_##exc_lvl_srr0(r1); \
+ lwz r10,_##exc_lvl_srr1(r1); \
+ mtspr SPRN_##exc_lvl_srr0,r9; \
+ mtspr SPRN_##exc_lvl_srr1,r10;
+
+#if defined(CONFIG_FSL_BOOKE)
+#ifdef CONFIG_PHYS_64BIT
+#define RESTORE_MAS7 \
+ lwz r11,MAS7(r1); \
+ mtspr SPRN_MAS7,r11;
+#else
+#define RESTORE_MAS7
+#endif /* CONFIG_PHYS_64BIT */
+#define RESTORE_MMU_REGS \
+ lwz r9,MAS0(r1); \
+ lwz r10,MAS1(r1); \
+ lwz r11,MAS2(r1); \
+ mtspr SPRN_MAS0,r9; \
+ lwz r9,MAS3(r1); \
+ mtspr SPRN_MAS1,r10; \
+ lwz r10,MAS6(r1); \
+ mtspr SPRN_MAS2,r11; \
+ mtspr SPRN_MAS3,r9; \
+ mtspr SPRN_MAS6,r10; \
+ RESTORE_MAS7;
+#elif defined(CONFIG_44x)
+#define RESTORE_MMU_REGS \
+ lwz r9,MMUCR(r1); \
+ mtspr SPRN_MMUCR,r9;
+#else
+#define RESTORE_MMU_REGS
+#endif
+
+#ifdef CONFIG_40x
.globl ret_from_crit_exc
ret_from_crit_exc:
+ mfspr r9,SPRN_SPRG3
+ lis r10,saved_ksp_limit@ha;
+ lwz r10,saved_ksp_limit@l(r10);
+ tovirt(r9,r9);
+ stw r10,KSP_LIMIT(r9)
+ lis r9,crit_srr0@ha;
+ lwz r9,crit_srr0@l(r9);
+ lis r10,crit_srr1@ha;
+ lwz r10,crit_srr1@l(r10);
+ mtspr SPRN_SRR0,r9;
+ mtspr SPRN_SRR1,r10;
RET_FROM_EXC_LEVEL(SPRN_CSRR0, SPRN_CSRR1, RFCI)
+#endif /* CONFIG_40x */
#ifdef CONFIG_BOOKE
+ .globl ret_from_crit_exc
+ret_from_crit_exc:
+ mfspr r9,SPRN_SPRG3
+ lwz r10,SAVED_KSP_LIMIT(r1)
+ stw r10,KSP_LIMIT(r9)
+ RESTORE_xSRR(SRR0,SRR1);
+ RESTORE_MMU_REGS;
+ RET_FROM_EXC_LEVEL(SPRN_CSRR0, SPRN_CSRR1, RFCI)
+
.globl ret_from_debug_exc
ret_from_debug_exc:
+ mfspr r9,SPRN_SPRG3
+ lwz r10,SAVED_KSP_LIMIT(r1)
+ stw r10,KSP_LIMIT(r9)
+ lwz r9,THREAD_INFO-THREAD(r9)
+ rlwinm r10,r1,0,0,(31-THREAD_SHIFT)
+ lwz r10,TI_PREEMPT(r10)
+ stw r10,TI_PREEMPT(r9)
+ RESTORE_xSRR(SRR0,SRR1);
+ RESTORE_xSRR(CSRR0,CSRR1);
+ RESTORE_MMU_REGS;
RET_FROM_EXC_LEVEL(SPRN_DSRR0, SPRN_DSRR1, RFDI)
.globl ret_from_mcheck_exc
ret_from_mcheck_exc:
+ mfspr r9,SPRN_SPRG3
+ lwz r10,SAVED_KSP_LIMIT(r1)
+ stw r10,KSP_LIMIT(r9)
+ RESTORE_xSRR(SRR0,SRR1);
+ RESTORE_xSRR(CSRR0,CSRR1);
+ RESTORE_xSRR(DSRR0,DSRR1);
+ RESTORE_MMU_REGS;
RET_FROM_EXC_LEVEL(SPRN_MCSRR0, SPRN_MCSRR1, RFMCI)
#endif /* CONFIG_BOOKE */
@@ -925,7 +1042,7 @@ recheck:
lwz r9,TI_FLAGS(r9)
andi. r0,r9,_TIF_NEED_RESCHED
bne- do_resched
- andi. r0,r9,_TIF_SIGPENDING|_TIF_RESTORE_SIGMASK
+ andi. r0,r9,_TIF_USER_WORK_MASK
beq restore_user
do_user_signal: /* r10 contains MSR_KERNEL here */
ori r10,r10,MSR_EE
@@ -1035,3 +1152,129 @@ machine_check_in_rtas:
/* XXX load up BATs and panic */
#endif /* CONFIG_PPC_RTAS */
+
+#ifdef CONFIG_FTRACE
+#ifdef CONFIG_DYNAMIC_FTRACE
+_GLOBAL(mcount)
+_GLOBAL(_mcount)
+ stwu r1,-48(r1)
+ stw r3, 12(r1)
+ stw r4, 16(r1)
+ stw r5, 20(r1)
+ stw r6, 24(r1)
+ mflr r3
+ stw r7, 28(r1)
+ mfcr r5
+ stw r8, 32(r1)
+ stw r9, 36(r1)
+ stw r10,40(r1)
+ stw r3, 44(r1)
+ stw r5, 8(r1)
+ subi r3, r3, MCOUNT_INSN_SIZE
+ .globl mcount_call
+mcount_call:
+ bl ftrace_stub
+ nop
+ lwz r6, 8(r1)
+ lwz r0, 44(r1)
+ lwz r3, 12(r1)
+ mtctr r0
+ lwz r4, 16(r1)
+ mtcr r6
+ lwz r5, 20(r1)
+ lwz r6, 24(r1)
+ lwz r0, 52(r1)
+ lwz r7, 28(r1)
+ lwz r8, 32(r1)
+ mtlr r0
+ lwz r9, 36(r1)
+ lwz r10,40(r1)
+ addi r1, r1, 48
+ bctr
+
+_GLOBAL(ftrace_caller)
+ /* Based off of objdump optput from glibc */
+ stwu r1,-48(r1)
+ stw r3, 12(r1)
+ stw r4, 16(r1)
+ stw r5, 20(r1)
+ stw r6, 24(r1)
+ mflr r3
+ lwz r4, 52(r1)
+ mfcr r5
+ stw r7, 28(r1)
+ stw r8, 32(r1)
+ stw r9, 36(r1)
+ stw r10,40(r1)
+ stw r3, 44(r1)
+ stw r5, 8(r1)
+ subi r3, r3, MCOUNT_INSN_SIZE
+.globl ftrace_call
+ftrace_call:
+ bl ftrace_stub
+ nop
+ lwz r6, 8(r1)
+ lwz r0, 44(r1)
+ lwz r3, 12(r1)
+ mtctr r0
+ lwz r4, 16(r1)
+ mtcr r6
+ lwz r5, 20(r1)
+ lwz r6, 24(r1)
+ lwz r0, 52(r1)
+ lwz r7, 28(r1)
+ lwz r8, 32(r1)
+ mtlr r0
+ lwz r9, 36(r1)
+ lwz r10,40(r1)
+ addi r1, r1, 48
+ bctr
+#else
+_GLOBAL(mcount)
+_GLOBAL(_mcount)
+ stwu r1,-48(r1)
+ stw r3, 12(r1)
+ stw r4, 16(r1)
+ stw r5, 20(r1)
+ stw r6, 24(r1)
+ mflr r3
+ lwz r4, 52(r1)
+ mfcr r5
+ stw r7, 28(r1)
+ stw r8, 32(r1)
+ stw r9, 36(r1)
+ stw r10,40(r1)
+ stw r3, 44(r1)
+ stw r5, 8(r1)
+
+ subi r3, r3, MCOUNT_INSN_SIZE
+ LOAD_REG_ADDR(r5, ftrace_trace_function)
+ lwz r5,0(r5)
+
+ mtctr r5
+ bctrl
+
+ nop
+
+ lwz r6, 8(r1)
+ lwz r0, 44(r1)
+ lwz r3, 12(r1)
+ mtctr r0
+ lwz r4, 16(r1)
+ mtcr r6
+ lwz r5, 20(r1)
+ lwz r6, 24(r1)
+ lwz r0, 52(r1)
+ lwz r7, 28(r1)
+ lwz r8, 32(r1)
+ mtlr r0
+ lwz r9, 36(r1)
+ lwz r10,40(r1)
+ addi r1, r1, 48
+ bctr
+#endif
+
+_GLOBAL(ftrace_stub)
+ blr
+
+#endif /* CONFIG_MCOUNT */
diff --git a/arch/powerpc/kernel/entry_64.S b/arch/powerpc/kernel/entry_64.S
index c0db5b769e55..d7369243ae44 100644
--- a/arch/powerpc/kernel/entry_64.S
+++ b/arch/powerpc/kernel/entry_64.S
@@ -31,6 +31,7 @@
#include <asm/bug.h>
#include <asm/ptrace.h>
#include <asm/irqflags.h>
+#include <asm/ftrace.h>
/*
* System calls.
@@ -353,6 +354,11 @@ _GLOBAL(_switch)
mflr r20 /* Return to switch caller */
mfmsr r22
li r0, MSR_FP
+#ifdef CONFIG_VSX
+BEGIN_FTR_SECTION
+ oris r0,r0,MSR_VSX@h /* Disable VSX */
+END_FTR_SECTION_IFSET(CPU_FTR_VSX)
+#endif /* CONFIG_VSX */
#ifdef CONFIG_ALTIVEC
BEGIN_FTR_SECTION
oris r0,r0,MSR_VEC@h /* Disable altivec */
@@ -383,16 +389,16 @@ END_FTR_SECTION_IFSET(CPU_FTR_ALTIVEC)
ld r8,KSP(r4) /* new stack pointer */
BEGIN_FTR_SECTION
- b 2f
-END_FTR_SECTION_IFCLR(CPU_FTR_SLB)
-BEGIN_FTR_SECTION
+ BEGIN_FTR_SECTION_NESTED(95)
clrrdi r6,r8,28 /* get its ESID */
clrrdi r9,r1,28 /* get current sp ESID */
-END_FTR_SECTION_IFCLR(CPU_FTR_1T_SEGMENT)
-BEGIN_FTR_SECTION
+ FTR_SECTION_ELSE_NESTED(95)
clrrdi r6,r8,40 /* get its 1T ESID */
clrrdi r9,r1,40 /* get current sp 1T ESID */
-END_FTR_SECTION_IFSET(CPU_FTR_1T_SEGMENT)
+ ALT_FTR_SECTION_END_NESTED_IFCLR(CPU_FTR_1T_SEGMENT, 95)
+FTR_SECTION_ELSE
+ b 2f
+ALT_FTR_SECTION_END_IFSET(CPU_FTR_SLB)
clrldi. r0,r6,2 /* is new ESID c00000000? */
cmpd cr1,r6,r9 /* or is new ESID the same as current ESID? */
cror eq,4*cr1+eq,eq
@@ -870,3 +876,67 @@ _GLOBAL(enter_prom)
ld r0,16(r1)
mtlr r0
blr
+
+#ifdef CONFIG_FTRACE
+#ifdef CONFIG_DYNAMIC_FTRACE
+_GLOBAL(mcount)
+_GLOBAL(_mcount)
+ /* Taken from output of objdump from lib64/glibc */
+ mflr r3
+ stdu r1, -112(r1)
+ std r3, 128(r1)
+ subi r3, r3, MCOUNT_INSN_SIZE
+ .globl mcount_call
+mcount_call:
+ bl ftrace_stub
+ nop
+ ld r0, 128(r1)
+ mtlr r0
+ addi r1, r1, 112
+ blr
+
+_GLOBAL(ftrace_caller)
+ /* Taken from output of objdump from lib64/glibc */
+ mflr r3
+ ld r11, 0(r1)
+ stdu r1, -112(r1)
+ std r3, 128(r1)
+ ld r4, 16(r11)
+ subi r3, r3, MCOUNT_INSN_SIZE
+.globl ftrace_call
+ftrace_call:
+ bl ftrace_stub
+ nop
+ ld r0, 128(r1)
+ mtlr r0
+ addi r1, r1, 112
+_GLOBAL(ftrace_stub)
+ blr
+#else
+_GLOBAL(mcount)
+ blr
+
+_GLOBAL(_mcount)
+ /* Taken from output of objdump from lib64/glibc */
+ mflr r3
+ ld r11, 0(r1)
+ stdu r1, -112(r1)
+ std r3, 128(r1)
+ ld r4, 16(r11)
+
+ subi r3, r3, MCOUNT_INSN_SIZE
+ LOAD_REG_ADDR(r5,ftrace_trace_function)
+ ld r5,0(r5)
+ ld r5,0(r5)
+ mtctr r5
+ bctrl
+
+ nop
+ ld r0, 128(r1)
+ mtlr r0
+ addi r1, r1, 112
+_GLOBAL(ftrace_stub)
+ blr
+
+#endif
+#endif
diff --git a/arch/powerpc/kernel/fpu.S b/arch/powerpc/kernel/fpu.S
index 821e152e093c..a088c064ae40 100644
--- a/arch/powerpc/kernel/fpu.S
+++ b/arch/powerpc/kernel/fpu.S
@@ -24,6 +24,29 @@
#include <asm/ppc_asm.h>
#include <asm/asm-offsets.h>
+#ifdef CONFIG_VSX
+#define REST_32FPVSRS(n,c,base) \
+BEGIN_FTR_SECTION \
+ b 2f; \
+END_FTR_SECTION_IFSET(CPU_FTR_VSX); \
+ REST_32FPRS(n,base); \
+ b 3f; \
+2: REST_32VSRS(n,c,base); \
+3:
+
+#define SAVE_32FPVSRS(n,c,base) \
+BEGIN_FTR_SECTION \
+ b 2f; \
+END_FTR_SECTION_IFSET(CPU_FTR_VSX); \
+ SAVE_32FPRS(n,base); \
+ b 3f; \
+2: SAVE_32VSRS(n,c,base); \
+3:
+#else
+#define REST_32FPVSRS(n,b,base) REST_32FPRS(n, base)
+#define SAVE_32FPVSRS(n,b,base) SAVE_32FPRS(n, base)
+#endif
+
/*
* This task wants to use the FPU now.
* On UP, disable FP for the task which had the FPU previously,
@@ -34,6 +57,11 @@
_GLOBAL(load_up_fpu)
mfmsr r5
ori r5,r5,MSR_FP
+#ifdef CONFIG_VSX
+BEGIN_FTR_SECTION
+ oris r5,r5,MSR_VSX@h
+END_FTR_SECTION_IFSET(CPU_FTR_VSX)
+#endif
SYNC
MTMSRD(r5) /* enable use of fpu now */
isync
@@ -50,7 +78,7 @@ _GLOBAL(load_up_fpu)
beq 1f
toreal(r4)
addi r4,r4,THREAD /* want last_task_used_math->thread */
- SAVE_32FPRS(0, r4)
+ SAVE_32FPVSRS(0, r5, r4)
mffs fr0
stfd fr0,THREAD_FPSCR(r4)
PPC_LL r5,PT_REGS(r4)
@@ -77,7 +105,7 @@ _GLOBAL(load_up_fpu)
#endif
lfd fr0,THREAD_FPSCR(r5)
MTFSF_L(fr0)
- REST_32FPRS(0, r5)
+ REST_32FPVSRS(0, r4, r5)
#ifndef CONFIG_SMP
subi r4,r5,THREAD
fromreal(r4)
@@ -85,7 +113,7 @@ _GLOBAL(load_up_fpu)
#endif /* CONFIG_SMP */
/* restore registers and return */
/* we haven't used ctr or xer or lr */
- b fast_exception_return
+ blr
/*
* giveup_fpu(tsk)
@@ -96,6 +124,11 @@ _GLOBAL(load_up_fpu)
_GLOBAL(giveup_fpu)
mfmsr r5
ori r5,r5,MSR_FP
+#ifdef CONFIG_VSX
+BEGIN_FTR_SECTION
+ oris r5,r5,MSR_VSX@h
+END_FTR_SECTION_IFSET(CPU_FTR_VSX)
+#endif
SYNC_601
ISYNC_601
MTMSRD(r5) /* enable use of fpu now */
@@ -106,7 +139,7 @@ _GLOBAL(giveup_fpu)
addi r3,r3,THREAD /* want THREAD of task */
PPC_LL r5,PT_REGS(r3)
PPC_LCMPI 0,r5,0
- SAVE_32FPRS(0, r3)
+ SAVE_32FPVSRS(0, r4 ,r3)
mffs fr0
stfd fr0,THREAD_FPSCR(r3)
beq 1f
diff --git a/arch/powerpc/kernel/ftrace.c b/arch/powerpc/kernel/ftrace.c
new file mode 100644
index 000000000000..3855ceb937b0
--- /dev/null
+++ b/arch/powerpc/kernel/ftrace.c
@@ -0,0 +1,154 @@
+/*
+ * Code for replacing ftrace calls with jumps.
+ *
+ * Copyright (C) 2007-2008 Steven Rostedt <srostedt@redhat.com>
+ *
+ * Thanks goes out to P.A. Semi, Inc for supplying me with a PPC64 box.
+ *
+ */
+
+#include <linux/spinlock.h>
+#include <linux/hardirq.h>
+#include <linux/ftrace.h>
+#include <linux/percpu.h>
+#include <linux/init.h>
+#include <linux/list.h>
+
+#include <asm/cacheflush.h>
+#include <asm/ftrace.h>
+
+
+static unsigned int ftrace_nop = 0x60000000;
+
+#ifdef CONFIG_PPC32
+# define GET_ADDR(addr) addr
+#else
+/* PowerPC64's functions are data that points to the functions */
+# define GET_ADDR(addr) *(unsigned long *)addr
+#endif
+
+
+static unsigned int notrace ftrace_calc_offset(long ip, long addr)
+{
+ return (int)(addr - ip);
+}
+
+notrace unsigned char *ftrace_nop_replace(void)
+{
+ return (char *)&ftrace_nop;
+}
+
+notrace unsigned char *ftrace_call_replace(unsigned long ip, unsigned long addr)
+{
+ static unsigned int op;
+
+ /*
+ * It would be nice to just use create_function_call, but that will
+ * update the code itself. Here we need to just return the
+ * instruction that is going to be modified, without modifying the
+ * code.
+ */
+ addr = GET_ADDR(addr);
+
+ /* Set to "bl addr" */
+ op = 0x48000001 | (ftrace_calc_offset(ip, addr) & 0x03fffffc);
+
+ /*
+ * No locking needed, this must be called via kstop_machine
+ * which in essence is like running on a uniprocessor machine.
+ */
+ return (unsigned char *)&op;
+}
+
+#ifdef CONFIG_PPC64
+# define _ASM_ALIGN " .align 3 "
+# define _ASM_PTR " .llong "
+#else
+# define _ASM_ALIGN " .align 2 "
+# define _ASM_PTR " .long "
+#endif
+
+notrace int
+ftrace_modify_code(unsigned long ip, unsigned char *old_code,
+ unsigned char *new_code)
+{
+ unsigned replaced;
+ unsigned old = *(unsigned *)old_code;
+ unsigned new = *(unsigned *)new_code;
+ int faulted = 0;
+
+ /*
+ * Note: Due to modules and __init, code can
+ * disappear and change, we need to protect against faulting
+ * as well as code changing.
+ *
+ * No real locking needed, this code is run through
+ * kstop_machine.
+ */
+ asm volatile (
+ "1: lwz %1, 0(%2)\n"
+ " cmpw %1, %5\n"
+ " bne 2f\n"
+ " stwu %3, 0(%2)\n"
+ "2:\n"
+ ".section .fixup, \"ax\"\n"
+ "3: li %0, 1\n"
+ " b 2b\n"
+ ".previous\n"
+ ".section __ex_table,\"a\"\n"
+ _ASM_ALIGN "\n"
+ _ASM_PTR "1b, 3b\n"
+ ".previous"
+ : "=r"(faulted), "=r"(replaced)
+ : "r"(ip), "r"(new),
+ "0"(faulted), "r"(old)
+ : "memory");
+
+ if (replaced != old && replaced != new)
+ faulted = 2;
+
+ if (!faulted)
+ flush_icache_range(ip, ip + 8);
+
+ return faulted;
+}
+
+notrace int ftrace_update_ftrace_func(ftrace_func_t func)
+{
+ unsigned long ip = (unsigned long)(&ftrace_call);
+ unsigned char old[MCOUNT_INSN_SIZE], *new;
+ int ret;
+
+ memcpy(old, &ftrace_call, MCOUNT_INSN_SIZE);
+ new = ftrace_call_replace(ip, (unsigned long)func);
+ ret = ftrace_modify_code(ip, old, new);
+
+ return ret;
+}
+
+notrace int ftrace_mcount_set(unsigned long *data)
+{
+ unsigned long ip = (long)(&mcount_call);
+ unsigned long *addr = data;
+ unsigned char old[MCOUNT_INSN_SIZE], *new;
+
+ /*
+ * Replace the mcount stub with a pointer to the
+ * ip recorder function.
+ */
+ memcpy(old, &mcount_call, MCOUNT_INSN_SIZE);
+ new = ftrace_call_replace(ip, *addr);
+ *addr = ftrace_modify_code(ip, old, new);
+
+ return 0;
+}
+
+int __init ftrace_dyn_arch_init(void *data)
+{
+ /* This is running in kstop_machine */
+
+ ftrace_mcount_set(data);
+
+ return 0;
+}
+
diff --git a/arch/powerpc/kernel/head_32.S b/arch/powerpc/kernel/head_32.S
index 785af9b56591..99ee2f0f0f2b 100644
--- a/arch/powerpc/kernel/head_32.S
+++ b/arch/powerpc/kernel/head_32.S
@@ -421,8 +421,10 @@ BEGIN_FTR_SECTION
b ProgramCheck
END_FTR_SECTION_IFSET(CPU_FTR_FPU_UNAVAILABLE)
EXCEPTION_PROLOG
- bne load_up_fpu /* if from user, just load it up */
- addi r3,r1,STACK_FRAME_OVERHEAD
+ beq 1f
+ bl load_up_fpu /* if from user, just load it up */
+ b fast_exception_return
+1: addi r3,r1,STACK_FRAME_OVERHEAD
EXC_XFER_EE_LITE(0x800, kernel_fp_unavailable_exception)
/* Decrementer */
diff --git a/arch/powerpc/kernel/head_40x.S b/arch/powerpc/kernel/head_40x.S
index 8552e67e3a8b..56d8e5d90c5b 100644
--- a/arch/powerpc/kernel/head_40x.S
+++ b/arch/powerpc/kernel/head_40x.S
@@ -93,6 +93,12 @@ _ENTRY(crit_r10)
.space 4
_ENTRY(crit_r11)
.space 4
+_ENTRY(crit_srr0)
+ .space 4
+_ENTRY(crit_srr1)
+ .space 4
+_ENTRY(saved_ksp_limit)
+ .space 4
/*
* Exception vector entry code. This code runs with address translation
@@ -148,14 +154,14 @@ _ENTRY(crit_r11)
mfcr r10; /* save CR in r10 for now */\
mfspr r11,SPRN_SRR3; /* check whether user or kernel */\
andi. r11,r11,MSR_PR; \
- lis r11,critical_stack_top@h; \
- ori r11,r11,critical_stack_top@l; \
+ lis r11,critirq_ctx@ha; \
+ tophys(r11,r11); \
+ lwz r11,critirq_ctx@l(r11); \
beq 1f; \
/* COMING FROM USER MODE */ \
mfspr r11,SPRN_SPRG3; /* if from user, start at top of */\
lwz r11,THREAD_INFO-THREAD(r11); /* this thread's kernel stack */\
- addi r11,r11,THREAD_SIZE; \
-1: subi r11,r11,INT_FRAME_SIZE; /* Allocate an exception frame */\
+1: addi r11,r11,THREAD_SIZE-INT_FRAME_SIZE; /* Alloc an excpt frm */\
tophys(r11,r11); \
stw r10,_CCR(r11); /* save various registers */\
stw r12,GPR12(r11); \
@@ -996,16 +1002,6 @@ empty_zero_page:
swapper_pg_dir:
.space PGD_TABLE_SIZE
-
-/* Stack for handling critical exceptions from kernel mode */
- .section .bss
- .align 12
-exception_stack_bottom:
- .space 4096
-critical_stack_top:
- .globl exception_stack_top
-exception_stack_top:
-
/* Room for two PTE pointers, usually the kernel and current user pointers
* to their respective root page table.
*/
diff --git a/arch/powerpc/kernel/head_44x.S b/arch/powerpc/kernel/head_44x.S
index 22b5d2c459a3..f3a1ea9d7fe4 100644
--- a/arch/powerpc/kernel/head_44x.S
+++ b/arch/powerpc/kernel/head_44x.S
@@ -293,119 +293,9 @@ interrupt_base:
MCHECK_EXCEPTION(0x0210, MachineCheckA, machine_check_exception)
/* Data Storage Interrupt */
- START_EXCEPTION(DataStorage)
- mtspr SPRN_SPRG0, r10 /* Save some working registers */
- mtspr SPRN_SPRG1, r11
- mtspr SPRN_SPRG4W, r12
- mtspr SPRN_SPRG5W, r13
- mfcr r11
- mtspr SPRN_SPRG7W, r11
-
- /*
- * Check if it was a store fault, if not then bail
- * because a user tried to access a kernel or
- * read-protected page. Otherwise, get the
- * offending address and handle it.
- */
- mfspr r10, SPRN_ESR
- andis. r10, r10, ESR_ST@h
- beq 2f
-
- mfspr r10, SPRN_DEAR /* Get faulting address */
-
- /* If we are faulting a kernel address, we have to use the
- * kernel page tables.
- */
- lis r11, PAGE_OFFSET@h
- cmplw r10, r11
- blt+ 3f
- lis r11, swapper_pg_dir@h
- ori r11, r11, swapper_pg_dir@l
-
- mfspr r12,SPRN_MMUCR
- rlwinm r12,r12,0,0,23 /* Clear TID */
-
- b 4f
-
- /* Get the PGD for the current thread */
-3:
- mfspr r11,SPRN_SPRG3
- lwz r11,PGDIR(r11)
-
- /* Load PID into MMUCR TID */
- mfspr r12,SPRN_MMUCR /* Get MMUCR */
- mfspr r13,SPRN_PID /* Get PID */
- rlwimi r12,r13,0,24,31 /* Set TID */
-
-4:
- mtspr SPRN_MMUCR,r12
-
- rlwinm r12, r10, 13, 19, 29 /* Compute pgdir/pmd offset */
- lwzx r11, r12, r11 /* Get pgd/pmd entry */
- rlwinm. r12, r11, 0, 0, 20 /* Extract pt base address */
- beq 2f /* Bail if no table */
-
- rlwimi r12, r10, 23, 20, 28 /* Compute pte address */
- lwz r11, 4(r12) /* Get pte entry */
-
- andi. r13, r11, _PAGE_RW /* Is it writeable? */
- beq 2f /* Bail if not */
-
- /* Update 'changed'.
- */
- ori r11, r11, _PAGE_DIRTY|_PAGE_ACCESSED|_PAGE_HWWRITE
- stw r11, 4(r12) /* Update Linux page table */
-
- li r13, PPC44x_TLB_SR@l /* Set SR */
- rlwimi r13, r11, 29, 29, 29 /* SX = _PAGE_HWEXEC */
- rlwimi r13, r11, 0, 30, 30 /* SW = _PAGE_RW */
- rlwimi r13, r11, 29, 28, 28 /* UR = _PAGE_USER */
- rlwimi r12, r11, 31, 26, 26 /* (_PAGE_USER>>1)->r12 */
- rlwimi r12, r11, 29, 30, 30 /* (_PAGE_USER>>3)->r12 */
- and r12, r12, r11 /* HWEXEC/RW & USER */
- rlwimi r13, r12, 0, 26, 26 /* UX = HWEXEC & USER */
- rlwimi r13, r12, 3, 27, 27 /* UW = RW & USER */
-
- rlwimi r11,r13,0,26,31 /* Insert static perms */
-
- /*
- * Clear U0-U3 and WL1 IL1I IL1D IL2I IL2D bits which are added
- * on newer 440 cores like the 440x6 used on AMCC 460EX/460GT (see
- * include/asm-powerpc/pgtable-ppc32.h for details).
- */
- rlwinm r11,r11,0,20,10
-
- /* find the TLB index that caused the fault. It has to be here. */
- tlbsx r10, 0, r10
-
- tlbwe r11, r10, PPC44x_TLB_ATTRIB /* Write ATTRIB */
-
- /* Done...restore registers and get out of here.
- */
- mfspr r11, SPRN_SPRG7R
- mtcr r11
- mfspr r13, SPRN_SPRG5R
- mfspr r12, SPRN_SPRG4R
+ DATA_STORAGE_EXCEPTION
- mfspr r11, SPRN_SPRG1
- mfspr r10, SPRN_SPRG0
- rfi /* Force context change */
-
-2:
- /*
- * The bailout. Restore registers to pre-exception conditions
- * and call the heavyweights to help us out.
- */
- mfspr r11, SPRN_SPRG7R
- mtcr r11
- mfspr r13, SPRN_SPRG5R
- mfspr r12, SPRN_SPRG4R
-
- mfspr r11, SPRN_SPRG1
- mfspr r10, SPRN_SPRG0
- b data_access
-
- /* Instruction Storage Interrupt */
+ /* Instruction Storage Interrupt */
INSTRUCTION_STORAGE_EXCEPTION
/* External Input Interrupt */
@@ -423,7 +313,6 @@ interrupt_base:
#else
EXCEPTION(0x2010, FloatingPointUnavailable, unknown_exception, EXC_XFER_EE)
#endif
-
/* System Call Interrupt */
START_EXCEPTION(SystemCall)
NORMAL_EXCEPTION_PROLOG
@@ -484,18 +373,57 @@ interrupt_base:
4:
mtspr SPRN_MMUCR,r12
+ /* Mask of required permission bits. Note that while we
+ * do copy ESR:ST to _PAGE_RW position as trying to write
+ * to an RO page is pretty common, we don't do it with
+ * _PAGE_DIRTY. We could do it, but it's a fairly rare
+ * event so I'd rather take the overhead when it happens
+ * rather than adding an instruction here. We should measure
+ * whether the whole thing is worth it in the first place
+ * as we could avoid loading SPRN_ESR completely in the first
+ * place...
+ *
+ * TODO: Is it worth doing that mfspr & rlwimi in the first
+ * place or can we save a couple of instructions here ?
+ */
+ mfspr r12,SPRN_ESR
+ li r13,_PAGE_PRESENT|_PAGE_ACCESSED
+ rlwimi r13,r12,10,30,30
+
+ /* Load the PTE */
rlwinm r12, r10, 13, 19, 29 /* Compute pgdir/pmd offset */
lwzx r11, r12, r11 /* Get pgd/pmd entry */
rlwinm. r12, r11, 0, 0, 20 /* Extract pt base address */
beq 2f /* Bail if no table */
rlwimi r12, r10, 23, 20, 28 /* Compute pte address */
- lwz r11, 4(r12) /* Get pte entry */
- andi. r13, r11, _PAGE_PRESENT /* Is the page present? */
- beq 2f /* Bail if not present */
+ lwz r11, 0(r12) /* Get high word of pte entry */
+ lwz r12, 4(r12) /* Get low word of pte entry */
- ori r11, r11, _PAGE_ACCESSED
- stw r11, 4(r12)
+ lis r10,tlb_44x_index@ha
+
+ andc. r13,r13,r12 /* Check permission */
+
+ /* Load the next available TLB index */
+ lwz r13,tlb_44x_index@l(r10)
+
+ bne 2f /* Bail if permission mismach */
+
+ /* Increment, rollover, and store TLB index */
+ addi r13,r13,1
+
+ /* Compare with watermark (instruction gets patched) */
+ .globl tlb_44x_patch_hwater_D
+tlb_44x_patch_hwater_D:
+ cmpwi 0,r13,1 /* reserve entries */
+ ble 5f
+ li r13,0
+5:
+ /* Store the next available TLB index */
+ stw r13,tlb_44x_index@l(r10)
+
+ /* Re-load the faulting address */
+ mfspr r10,SPRN_DEAR
/* Jump to common tlb load */
b finish_tlb_load
@@ -510,7 +438,7 @@ interrupt_base:
mfspr r12, SPRN_SPRG4R
mfspr r11, SPRN_SPRG1
mfspr r10, SPRN_SPRG0
- b data_access
+ b DataStorage
/* Instruction TLB Error Interrupt */
/*
@@ -554,18 +482,42 @@ interrupt_base:
4:
mtspr SPRN_MMUCR,r12
+ /* Make up the required permissions */
+ li r13,_PAGE_PRESENT | _PAGE_ACCESSED | _PAGE_HWEXEC
+
rlwinm r12, r10, 13, 19, 29 /* Compute pgdir/pmd offset */
lwzx r11, r12, r11 /* Get pgd/pmd entry */
rlwinm. r12, r11, 0, 0, 20 /* Extract pt base address */
beq 2f /* Bail if no table */
rlwimi r12, r10, 23, 20, 28 /* Compute pte address */
- lwz r11, 4(r12) /* Get pte entry */
- andi. r13, r11, _PAGE_PRESENT /* Is the page present? */
- beq 2f /* Bail if not present */
+ lwz r11, 0(r12) /* Get high word of pte entry */
+ lwz r12, 4(r12) /* Get low word of pte entry */
- ori r11, r11, _PAGE_ACCESSED
- stw r11, 4(r12)
+ lis r10,tlb_44x_index@ha
+
+ andc. r13,r13,r12 /* Check permission */
+
+ /* Load the next available TLB index */
+ lwz r13,tlb_44x_index@l(r10)
+
+ bne 2f /* Bail if permission mismach */
+
+ /* Increment, rollover, and store TLB index */
+ addi r13,r13,1
+
+ /* Compare with watermark (instruction gets patched) */
+ .globl tlb_44x_patch_hwater_I
+tlb_44x_patch_hwater_I:
+ cmpwi 0,r13,1 /* reserve entries */
+ ble 5f
+ li r13,0
+5:
+ /* Store the next available TLB index */
+ stw r13,tlb_44x_index@l(r10)
+
+ /* Re-load the faulting address */
+ mfspr r10,SPRN_SRR0
/* Jump to common TLB load point */
b finish_tlb_load
@@ -587,86 +539,40 @@ interrupt_base:
/*
* Local functions
- */
- /*
- * Data TLB exceptions will bail out to this point
- * if they can't resolve the lightweight TLB fault.
- */
-data_access:
- NORMAL_EXCEPTION_PROLOG
- mfspr r5,SPRN_ESR /* Grab the ESR, save it, pass arg3 */
- stw r5,_ESR(r11)
- mfspr r4,SPRN_DEAR /* Grab the DEAR, save it, pass arg2 */
- EXC_XFER_EE_LITE(0x0300, handle_page_fault)
+ */
/*
* Both the instruction and data TLB miss get to this
* point to load the TLB.
* r10 - EA of fault
- * r11 - available to use
- * r12 - Pointer to the 64-bit PTE
- * r13 - available to use
+ * r11 - PTE high word value
+ * r12 - PTE low word value
+ * r13 - TLB index
* MMUCR - loaded with proper value when we get here
* Upon exit, we reload everything and RFI.
*/
finish_tlb_load:
- /*
- * We set execute, because we don't have the granularity to
- * properly set this at the page level (Linux problem).
- * If shared is set, we cause a zero PID->TID load.
- * Many of these bits are software only. Bits we don't set
- * here we (properly should) assume have the appropriate value.
- */
-
- /* Load the next available TLB index */
- lis r13, tlb_44x_index@ha
- lwz r13, tlb_44x_index@l(r13)
- /* Load the TLB high watermark */
- lis r11, tlb_44x_hwater@ha
- lwz r11, tlb_44x_hwater@l(r11)
-
- /* Increment, rollover, and store TLB index */
- addi r13, r13, 1
- cmpw 0, r13, r11 /* reserve entries */
- ble 7f
- li r13, 0
-7:
- /* Store the next available TLB index */
- lis r11, tlb_44x_index@ha
- stw r13, tlb_44x_index@l(r11)
-
- lwz r11, 0(r12) /* Get MS word of PTE */
- lwz r12, 4(r12) /* Get LS word of PTE */
- rlwimi r11, r12, 0, 0 , 19 /* Insert RPN */
- tlbwe r11, r13, PPC44x_TLB_XLAT /* Write XLAT */
+ /* Combine RPN & ERPN an write WS 0 */
+ rlwimi r11,r12,0,0,19
+ tlbwe r11,r13,PPC44x_TLB_XLAT
/*
- * Create PAGEID. This is the faulting address,
+ * Create WS1. This is the faulting address (EPN),
* page size, and valid flag.
*/
- li r11, PPC44x_TLB_VALID | PPC44x_TLB_4K
- rlwimi r10, r11, 0, 20, 31 /* Insert valid and page size */
- tlbwe r10, r13, PPC44x_TLB_PAGEID /* Write PAGEID */
-
- li r10, PPC44x_TLB_SR@l /* Set SR */
- rlwimi r10, r12, 0, 30, 30 /* Set SW = _PAGE_RW */
- rlwimi r10, r12, 29, 29, 29 /* SX = _PAGE_HWEXEC */
- rlwimi r10, r12, 29, 28, 28 /* UR = _PAGE_USER */
- rlwimi r11, r12, 31, 26, 26 /* (_PAGE_USER>>1)->r12 */
- and r11, r12, r11 /* HWEXEC & USER */
- rlwimi r10, r11, 0, 26, 26 /* UX = HWEXEC & USER */
-
- rlwimi r12, r10, 0, 26, 31 /* Insert static perms */
-
- /*
- * Clear U0-U3 and WL1 IL1I IL1D IL2I IL2D bits which are added
- * on newer 440 cores like the 440x6 used on AMCC 460EX/460GT (see
- * include/asm-powerpc/pgtable-ppc32.h for details).
- */
- rlwinm r12, r12, 0, 20, 10
-
- tlbwe r12, r13, PPC44x_TLB_ATTRIB /* Write ATTRIB */
+ li r11,PPC44x_TLB_VALID | PPC44x_TLB_4K
+ rlwimi r10,r11,0,20,31 /* Insert valid and page size*/
+ tlbwe r10,r13,PPC44x_TLB_PAGEID /* Write PAGEID */
+
+ /* And WS 2 */
+ li r10,0xf85 /* Mask to apply from PTE */
+ rlwimi r10,r12,29,30,30 /* DIRTY -> SW position */
+ and r11,r12,r10 /* Mask PTE bits to keep */
+ andi. r10,r12,_PAGE_USER /* User page ? */
+ beq 1f /* nope, leave U bits empty */
+ rlwimi r11,r11,3,26,28 /* yes, copy S bits to U */
+1: tlbwe r11,r13,PPC44x_TLB_ATTRIB /* Write ATTRIB */
/* Done...restore registers and get out of here.
*/
@@ -742,15 +648,6 @@ empty_zero_page:
swapper_pg_dir:
.space PGD_TABLE_SIZE
-/* Reserved 4k for the critical exception stack & 4k for the machine
- * check stack per CPU for kernel mode exceptions */
- .section .bss
- .align 12
-exception_stack_bottom:
- .space BOOKE_EXCEPTION_STACK_SIZE
- .globl exception_stack_top
-exception_stack_top:
-
/*
* Room for two PTE pointers, usually the kernel and current user pointers
* to their respective root page table.
diff --git a/arch/powerpc/kernel/head_64.S b/arch/powerpc/kernel/head_64.S
index 25e84c0e1166..cc8fb474d520 100644
--- a/arch/powerpc/kernel/head_64.S
+++ b/arch/powerpc/kernel/head_64.S
@@ -275,7 +275,11 @@ END_FTR_SECTION_IFSET(CPU_FTR_REAL_LE)
. = 0xf00
b performance_monitor_pSeries
- STD_EXCEPTION_PSERIES(0xf20, altivec_unavailable)
+ . = 0xf20
+ b altivec_unavailable_pSeries
+
+ . = 0xf40
+ b vsx_unavailable_pSeries
#ifdef CONFIG_CBE_RAS
HSTD_EXCEPTION_PSERIES(0x1200, cbe_system_error)
@@ -295,6 +299,8 @@ END_FTR_SECTION_IFSET(CPU_FTR_REAL_LE)
/* moved from 0xf00 */
STD_EXCEPTION_PSERIES(., performance_monitor)
+ STD_EXCEPTION_PSERIES(., altivec_unavailable)
+ STD_EXCEPTION_PSERIES(., vsx_unavailable)
/*
* An interrupt came in while soft-disabled; clear EE in SRR1,
@@ -739,7 +745,8 @@ fp_unavailable_common:
ENABLE_INTS
bl .kernel_fp_unavailable_exception
BUG_OPCODE
-1: b .load_up_fpu
+1: bl .load_up_fpu
+ b fast_exception_return
.align 7
.globl altivec_unavailable_common
@@ -747,7 +754,10 @@ altivec_unavailable_common:
EXCEPTION_PROLOG_COMMON(0xf20, PACA_EXGEN)
#ifdef CONFIG_ALTIVEC
BEGIN_FTR_SECTION
- bne .load_up_altivec /* if from user, just load it up */
+ beq 1f
+ bl .load_up_altivec
+ b fast_exception_return
+1:
END_FTR_SECTION_IFSET(CPU_FTR_ALTIVEC)
#endif
bl .save_nvgprs
@@ -827,9 +837,70 @@ _STATIC(load_up_altivec)
std r4,0(r3)
#endif /* CONFIG_SMP */
/* restore registers and return */
- b fast_exception_return
+ blr
#endif /* CONFIG_ALTIVEC */
+ .align 7
+ .globl vsx_unavailable_common
+vsx_unavailable_common:
+ EXCEPTION_PROLOG_COMMON(0xf40, PACA_EXGEN)
+#ifdef CONFIG_VSX
+BEGIN_FTR_SECTION
+ bne .load_up_vsx
+1:
+END_FTR_SECTION_IFSET(CPU_FTR_VSX)
+#endif
+ bl .save_nvgprs
+ addi r3,r1,STACK_FRAME_OVERHEAD
+ ENABLE_INTS
+ bl .vsx_unavailable_exception
+ b .ret_from_except
+
+#ifdef CONFIG_VSX
+/*
+ * load_up_vsx(unused, unused, tsk)
+ * Disable VSX for the task which had it previously,
+ * and save its vector registers in its thread_struct.
+ * Reuse the fp and vsx saves, but first check to see if they have
+ * been saved already.
+ * On entry: r13 == 'current' && last_task_used_vsx != 'current'
+ */
+_STATIC(load_up_vsx)
+/* Load FP and VSX registers if they haven't been done yet */
+ andi. r5,r12,MSR_FP
+ beql+ load_up_fpu /* skip if already loaded */
+ andis. r5,r12,MSR_VEC@h
+ beql+ load_up_altivec /* skip if already loaded */
+
+#ifndef CONFIG_SMP
+ ld r3,last_task_used_vsx@got(r2)
+ ld r4,0(r3)
+ cmpdi 0,r4,0
+ beq 1f
+ /* Disable VSX for last_task_used_vsx */
+ addi r4,r4,THREAD
+ ld r5,PT_REGS(r4)
+ ld r4,_MSR-STACK_FRAME_OVERHEAD(r5)
+ lis r6,MSR_VSX@h
+ andc r6,r4,r6
+ std r6,_MSR-STACK_FRAME_OVERHEAD(r5)
+1:
+#endif /* CONFIG_SMP */
+ ld r4,PACACURRENT(r13)
+ addi r4,r4,THREAD /* Get THREAD */
+ li r6,1
+ stw r6,THREAD_USED_VSR(r4) /* ... also set thread used vsr */
+ /* enable use of VSX after return */
+ oris r12,r12,MSR_VSX@h
+ std r12,_MSR(r1)
+#ifndef CONFIG_SMP
+ /* Update last_task_used_math to 'current' */
+ ld r4,PACACURRENT(r13)
+ std r4,0(r3)
+#endif /* CONFIG_SMP */
+ b fast_exception_return
+#endif /* CONFIG_VSX */
+
/*
* Hash table stuff
*/
@@ -1127,7 +1198,6 @@ _GLOBAL(generic_secondary_smp_init)
3: HMT_LOW
lbz r23,PACAPROCSTART(r13) /* Test if this processor should */
/* start. */
- sync
#ifndef CONFIG_SMP
b 3b /* Never go on non-SMP */
@@ -1135,6 +1205,8 @@ _GLOBAL(generic_secondary_smp_init)
cmpwi 0,r23,0
beq 3b /* Loop until told to go */
+ sync /* order paca.run and cur_cpu_spec */
+
/* See if we need to call a cpu state restore handler */
LOAD_REG_IMMEDIATE(r23, cur_cpu_spec)
ld r23,0(r23)
diff --git a/arch/powerpc/kernel/head_booke.h b/arch/powerpc/kernel/head_booke.h
index aefafc6330c9..fce2df988504 100644
--- a/arch/powerpc/kernel/head_booke.h
+++ b/arch/powerpc/kernel/head_booke.h
@@ -43,9 +43,7 @@
SAVE_2GPRS(7, r11)
/* To handle the additional exception priority levels on 40x and Book-E
- * processors we allocate a 4k stack per additional priority level. The various
- * head_xxx.S files allocate space (exception_stack_top) for each priority's
- * stack times the number of CPUs
+ * processors we allocate a stack per additional priority level.
*
* On 40x critical is the only additional level
* On 44x/e500 we have critical and machine check
@@ -61,36 +59,37 @@
* going to critical or their own debug level we aren't currently
* providing configurations that micro-optimize space usage.
*/
-#ifdef CONFIG_44x
-#define NUM_EXCEPTION_LVLS 2
-#else
-#define NUM_EXCEPTION_LVLS 3
-#endif
-#define BOOKE_EXCEPTION_STACK_SIZE (4096 * NUM_EXCEPTION_LVLS)
/* CRIT_SPRG only used in critical exception handling */
#define CRIT_SPRG SPRN_SPRG2
/* MCHECK_SPRG only used in machine check exception handling */
#define MCHECK_SPRG SPRN_SPRG6W
-#define MCHECK_STACK_TOP (exception_stack_top - 4096)
-#define CRIT_STACK_TOP (exception_stack_top)
+#define MCHECK_STACK_BASE mcheckirq_ctx
+#define CRIT_STACK_BASE critirq_ctx
-/* only on e200 for now */
-#define DEBUG_STACK_TOP (exception_stack_top - 8192)
+/* only on e500mc/e200 */
+#define DEBUG_STACK_BASE dbgirq_ctx
+#ifdef CONFIG_PPC_E500MC
+#define DEBUG_SPRG SPRN_SPRG9
+#else
#define DEBUG_SPRG SPRN_SPRG6W
+#endif
+
+#define EXC_LVL_FRAME_OVERHEAD (THREAD_SIZE - INT_FRAME_SIZE - EXC_LVL_SIZE)
#ifdef CONFIG_SMP
#define BOOKE_LOAD_EXC_LEVEL_STACK(level) \
mfspr r8,SPRN_PIR; \
- mulli r8,r8,BOOKE_EXCEPTION_STACK_SIZE; \
- neg r8,r8; \
- addis r8,r8,level##_STACK_TOP@ha; \
- addi r8,r8,level##_STACK_TOP@l
+ slwi r8,r8,2; \
+ addis r8,r8,level##_STACK_BASE@ha; \
+ lwz r8,level##_STACK_BASE@l(r8); \
+ addi r8,r8,EXC_LVL_FRAME_OVERHEAD;
#else
#define BOOKE_LOAD_EXC_LEVEL_STACK(level) \
- lis r8,level##_STACK_TOP@h; \
- ori r8,r8,level##_STACK_TOP@l
+ lis r8,level##_STACK_BASE@ha; \
+ lwz r8,level##_STACK_BASE@l(r8); \
+ addi r8,r8,EXC_LVL_FRAME_OVERHEAD;
#endif
/*
@@ -104,22 +103,36 @@
#define EXC_LEVEL_EXCEPTION_PROLOG(exc_level, exc_level_srr0, exc_level_srr1) \
mtspr exc_level##_SPRG,r8; \
BOOKE_LOAD_EXC_LEVEL_STACK(exc_level);/* r8 points to the exc_level stack*/ \
- stw r10,GPR10-INT_FRAME_SIZE(r8); \
- stw r11,GPR11-INT_FRAME_SIZE(r8); \
- mfcr r10; /* save CR in r10 for now */\
- mfspr r11,exc_level_srr1; /* check whether user or kernel */\
- andi. r11,r11,MSR_PR; \
- mr r11,r8; \
- mfspr r8,exc_level##_SPRG; \
- beq 1f; \
- /* COMING FROM USER MODE */ \
+ stw r9,GPR9(r8); /* save various registers */\
+ mfcr r9; /* save CR in r9 for now */\
+ stw r10,GPR10(r8); \
+ stw r11,GPR11(r8); \
+ stw r9,_CCR(r8); /* save CR on stack */\
+ mfspr r10,exc_level_srr1; /* check whether user or kernel */\
+ andi. r10,r10,MSR_PR; \
mfspr r11,SPRN_SPRG3; /* if from user, start at top of */\
lwz r11,THREAD_INFO-THREAD(r11); /* this thread's kernel stack */\
- addi r11,r11,THREAD_SIZE; \
-1: subi r11,r11,INT_FRAME_SIZE; /* Allocate an exception frame */\
- stw r10,_CCR(r11); /* save various registers */\
- stw r12,GPR12(r11); \
+ addi r11,r11,EXC_LVL_FRAME_OVERHEAD; /* allocate stack frame */\
+ beq 1f; \
+ /* COMING FROM USER MODE */ \
+ stw r9,_CCR(r11); /* save CR */\
+ lwz r10,GPR10(r8); /* copy regs from exception stack */\
+ lwz r9,GPR9(r8); \
+ stw r10,GPR10(r11); \
+ lwz r10,GPR11(r8); \
stw r9,GPR9(r11); \
+ stw r10,GPR11(r11); \
+ b 2f; \
+ /* COMING FROM PRIV MODE */ \
+1: lwz r9,TI_FLAGS-EXC_LVL_FRAME_OVERHEAD(r11); \
+ lwz r10,TI_PREEMPT-EXC_LVL_FRAME_OVERHEAD(r11); \
+ stw r9,TI_FLAGS-EXC_LVL_FRAME_OVERHEAD(r8); \
+ stw r10,TI_PREEMPT-EXC_LVL_FRAME_OVERHEAD(r8); \
+ lwz r9,TI_TASK-EXC_LVL_FRAME_OVERHEAD(r11); \
+ stw r9,TI_TASK-EXC_LVL_FRAME_OVERHEAD(r8); \
+ mr r11,r8; \
+2: mfspr r8,exc_level##_SPRG; \
+ stw r12,GPR12(r11); /* save various registers */\
mflr r10; \
stw r10,_LINK(r11); \
mfspr r12,SPRN_DEAR; /* save DEAR and ESR in the frame */\
@@ -231,7 +244,7 @@ label:
* the code where the exception occurred (since exception entry \
* doesn't turn off DE automatically). We simulate the effect \
* of turning off DE on entry to an exception handler by turning \
- * off DE in the CSRR1 value and clearing the debug status. \
+ * off DE in the DSRR1 value and clearing the debug status. \
*/ \
mfspr r10,SPRN_DBSR; /* check single-step/branch taken */ \
andis. r10,r10,DBSR_IC@h; \
@@ -262,17 +275,17 @@ label:
lwz r12,GPR12(r11); \
mtspr DEBUG_SPRG,r8; \
BOOKE_LOAD_EXC_LEVEL_STACK(DEBUG); /* r8 points to the debug stack */ \
- lwz r10,GPR10-INT_FRAME_SIZE(r8); \
- lwz r11,GPR11-INT_FRAME_SIZE(r8); \
+ lwz r10,GPR10(r8); \
+ lwz r11,GPR11(r8); \
mfspr r8,DEBUG_SPRG; \
\
RFDI; \
b .; \
\
- /* continue normal handling for a critical exception... */ \
+ /* continue normal handling for a debug exception... */ \
2: mfspr r4,SPRN_DBSR; \
addi r3,r1,STACK_FRAME_OVERHEAD; \
- EXC_XFER_TEMPLATE(DebugException, 0x2002, (MSR_KERNEL & ~(MSR_ME|MSR_DE|MSR_CE)), NOCOPY, debug_transfer_to_handler, ret_from_debug_exc)
+ EXC_XFER_TEMPLATE(DebugException, 0x2008, (MSR_KERNEL & ~(MSR_ME|MSR_DE|MSR_CE)), NOCOPY, debug_transfer_to_handler, ret_from_debug_exc)
#define DEBUG_CRIT_EXCEPTION \
START_EXCEPTION(DebugCrit); \
@@ -315,8 +328,8 @@ label:
lwz r12,GPR12(r11); \
mtspr CRIT_SPRG,r8; \
BOOKE_LOAD_EXC_LEVEL_STACK(CRIT); /* r8 points to the debug stack */ \
- lwz r10,GPR10-INT_FRAME_SIZE(r8); \
- lwz r11,GPR11-INT_FRAME_SIZE(r8); \
+ lwz r10,GPR10(r8); \
+ lwz r11,GPR11(r8); \
mfspr r8,CRIT_SPRG; \
\
rfci; \
@@ -327,6 +340,14 @@ label:
addi r3,r1,STACK_FRAME_OVERHEAD; \
EXC_XFER_TEMPLATE(DebugException, 0x2002, (MSR_KERNEL & ~(MSR_ME|MSR_DE|MSR_CE)), NOCOPY, crit_transfer_to_handler, ret_from_crit_exc)
+#define DATA_STORAGE_EXCEPTION \
+ START_EXCEPTION(DataStorage) \
+ NORMAL_EXCEPTION_PROLOG; \
+ mfspr r5,SPRN_ESR; /* Grab the ESR and save it */ \
+ stw r5,_ESR(r11); \
+ mfspr r4,SPRN_DEAR; /* Grab the DEAR */ \
+ EXC_XFER_EE_LITE(0x0300, handle_page_fault)
+
#define INSTRUCTION_STORAGE_EXCEPTION \
START_EXCEPTION(InstructionStorage) \
NORMAL_EXCEPTION_PROLOG; \
@@ -363,8 +384,31 @@ label:
#define FP_UNAVAILABLE_EXCEPTION \
START_EXCEPTION(FloatingPointUnavailable) \
NORMAL_EXCEPTION_PROLOG; \
- bne load_up_fpu; /* if from user, just load it up */ \
- addi r3,r1,STACK_FRAME_OVERHEAD; \
+ beq 1f; \
+ bl load_up_fpu; /* if from user, just load it up */ \
+ b fast_exception_return; \
+1: addi r3,r1,STACK_FRAME_OVERHEAD; \
EXC_XFER_EE_LITE(0x800, kernel_fp_unavailable_exception)
+#ifndef __ASSEMBLY__
+struct exception_regs {
+ unsigned long mas0;
+ unsigned long mas1;
+ unsigned long mas2;
+ unsigned long mas3;
+ unsigned long mas6;
+ unsigned long mas7;
+ unsigned long srr0;
+ unsigned long srr1;
+ unsigned long csrr0;
+ unsigned long csrr1;
+ unsigned long dsrr0;
+ unsigned long dsrr1;
+ unsigned long saved_ksp_limit;
+};
+
+/* ensure this structure is always sized to a multiple of the stack alignment */
+#define STACK_EXC_LVL_FRAME_SIZE _ALIGN_UP(sizeof (struct exception_regs), 16)
+
+#endif /* __ASSEMBLY__ */
#endif /* __HEAD_BOOKE_H__ */
diff --git a/arch/powerpc/kernel/head_fsl_booke.S b/arch/powerpc/kernel/head_fsl_booke.S
index e581524d85bc..3cb52fa0eda3 100644
--- a/arch/powerpc/kernel/head_fsl_booke.S
+++ b/arch/powerpc/kernel/head_fsl_booke.S
@@ -39,6 +39,7 @@
#include <asm/thread_info.h>
#include <asm/ppc_asm.h>
#include <asm/asm-offsets.h>
+#include <asm/cache.h>
#include "head_booke.h"
/* As with the other PowerPC ports, it is expected that when code
@@ -150,16 +151,11 @@ skpinv: addi r6,r6,1 /* Increment */
/* Invalidate TLB0 */
li r6,0x04
tlbivax 0,r6
-#ifdef CONFIG_SMP
- tlbsync
-#endif
+ TLBSYNC
/* Invalidate TLB1 */
li r6,0x0c
tlbivax 0,r6
-#ifdef CONFIG_SMP
- tlbsync
-#endif
- msync
+ TLBSYNC
/* 3. Setup a temp mapping and jump to it */
andi. r5, r3, 0x1 /* Find an entry not used and is non-zero */
@@ -237,10 +233,7 @@ skpinv: addi r6,r6,1 /* Increment */
/* Invalidate TLB1 */
li r9,0x0c
tlbivax 0,r9
-#ifdef CONFIG_SMP
- tlbsync
-#endif
- msync
+ TLBSYNC
/* 6. Setup KERNELBASE mapping in TLB1[0] */
lis r6,0x1000 /* Set MAS0(TLBSEL) = TLB1(1), ESEL = 0 */
@@ -282,10 +275,7 @@ skpinv: addi r6,r6,1 /* Increment */
/* Invalidate TLB1 */
li r9,0x0c
tlbivax 0,r9
-#ifdef CONFIG_SMP
- tlbsync
-#endif
- msync
+ TLBSYNC
/* Establish the interrupt vector offsets */
SET_IVOR(0, CriticalInput);
@@ -304,7 +294,7 @@ skpinv: addi r6,r6,1 /* Increment */
SET_IVOR(13, DataTLBError);
SET_IVOR(14, InstructionTLBError);
SET_IVOR(15, DebugDebug);
-#if defined(CONFIG_E500)
+#if defined(CONFIG_E500) && !defined(CONFIG_PPC_E500MC)
SET_IVOR(15, DebugCrit);
#endif
SET_IVOR(32, SPEUnavailable);
@@ -313,6 +303,9 @@ skpinv: addi r6,r6,1 /* Increment */
#ifndef CONFIG_E200
SET_IVOR(35, PerformanceMonitor);
#endif
+#ifdef CONFIG_PPC_E500MC
+ SET_IVOR(36, Doorbell);
+#endif
/* Establish the interrupt vector base */
lis r4,interrupt_base@h /* IVPR only uses the high 16-bits */
@@ -479,90 +472,16 @@ interrupt_base:
/* Data Storage Interrupt */
START_EXCEPTION(DataStorage)
- mtspr SPRN_SPRG0, r10 /* Save some working registers */
- mtspr SPRN_SPRG1, r11
- mtspr SPRN_SPRG4W, r12
- mtspr SPRN_SPRG5W, r13
- mfcr r11
- mtspr SPRN_SPRG7W, r11
-
- /*
- * Check if it was a store fault, if not then bail
- * because a user tried to access a kernel or
- * read-protected page. Otherwise, get the
- * offending address and handle it.
- */
- mfspr r10, SPRN_ESR
- andis. r10, r10, ESR_ST@h
- beq 2f
-
- mfspr r10, SPRN_DEAR /* Get faulting address */
-
- /* If we are faulting a kernel address, we have to use the
- * kernel page tables.
- */
- lis r11, PAGE_OFFSET@h
- cmplw 0, r10, r11
- bge 2f
-
- /* Get the PGD for the current thread */
-3:
- mfspr r11,SPRN_SPRG3
- lwz r11,PGDIR(r11)
-4:
- FIND_PTE
-
- /* Are _PAGE_USER & _PAGE_RW set & _PAGE_HWWRITE not? */
- andi. r13, r11, _PAGE_RW|_PAGE_USER|_PAGE_HWWRITE
- cmpwi 0, r13, _PAGE_RW|_PAGE_USER
- bne 2f /* Bail if not */
-
- /* Update 'changed'. */
- ori r11, r11, _PAGE_DIRTY|_PAGE_ACCESSED|_PAGE_HWWRITE
- stw r11, PTE_FLAGS_OFFSET(r12) /* Update Linux page table */
-
- /* MAS2 not updated as the entry does exist in the tlb, this
- fault taken to detect state transition (eg: COW -> DIRTY)
- */
- andi. r11, r11, _PAGE_HWEXEC
- rlwimi r11, r11, 31, 27, 27 /* SX <- _PAGE_HWEXEC */
- ori r11, r11, (MAS3_UW|MAS3_SW|MAS3_UR|MAS3_SR)@l /* set static perms */
-
- /* update search PID in MAS6, AS = 0 */
- mfspr r12, SPRN_PID0
- slwi r12, r12, 16
- mtspr SPRN_MAS6, r12
-
- /* find the TLB index that caused the fault. It has to be here. */
- tlbsx 0, r10
-
- /* only update the perm bits, assume the RPN is fine */
- mfspr r12, SPRN_MAS3
- rlwimi r12, r11, 0, 20, 31
- mtspr SPRN_MAS3,r12
- tlbwe
-
- /* Done...restore registers and get out of here. */
- mfspr r11, SPRN_SPRG7R
- mtcr r11
- mfspr r13, SPRN_SPRG5R
- mfspr r12, SPRN_SPRG4R
- mfspr r11, SPRN_SPRG1
- mfspr r10, SPRN_SPRG0
- rfi /* Force context change */
-
-2:
- /*
- * The bailout. Restore registers to pre-exception conditions
- * and call the heavyweights to help us out.
- */
- mfspr r11, SPRN_SPRG7R
- mtcr r11
- mfspr r13, SPRN_SPRG5R
- mfspr r12, SPRN_SPRG4R
- mfspr r11, SPRN_SPRG1
- mfspr r10, SPRN_SPRG0
- b data_access
+ NORMAL_EXCEPTION_PROLOG
+ mfspr r5,SPRN_ESR /* Grab the ESR, save it, pass arg3 */
+ stw r5,_ESR(r11)
+ mfspr r4,SPRN_DEAR /* Grab the DEAR, save it, pass arg2 */
+ andis. r10,r5,(ESR_ILK|ESR_DLK)@h
+ bne 1f
+ EXC_XFER_EE_LITE(0x0300, handle_page_fault)
+1:
+ addi r3,r1,STACK_FRAME_OVERHEAD
+ EXC_XFER_EE_LITE(0x0300, CacheLockingException)
/* Instruction Storage Interrupt */
INSTRUCTION_STORAGE_EXCEPTION
@@ -641,15 +560,30 @@ interrupt_base:
lwz r11,PGDIR(r11)
4:
+ /* Mask of required permission bits. Note that while we
+ * do copy ESR:ST to _PAGE_RW position as trying to write
+ * to an RO page is pretty common, we don't do it with
+ * _PAGE_DIRTY. We could do it, but it's a fairly rare
+ * event so I'd rather take the overhead when it happens
+ * rather than adding an instruction here. We should measure
+ * whether the whole thing is worth it in the first place
+ * as we could avoid loading SPRN_ESR completely in the first
+ * place...
+ *
+ * TODO: Is it worth doing that mfspr & rlwimi in the first
+ * place or can we save a couple of instructions here ?
+ */
+ mfspr r12,SPRN_ESR
+ li r13,_PAGE_PRESENT|_PAGE_ACCESSED
+ rlwimi r13,r12,11,29,29
+
FIND_PTE
- andi. r13, r11, _PAGE_PRESENT /* Is the page present? */
- beq 2f /* Bail if not present */
+ andc. r13,r13,r11 /* Check permission */
+ bne 2f /* Bail if permission mismach */
#ifdef CONFIG_PTE_64BIT
lwz r13, 0(r12)
#endif
- ori r11, r11, _PAGE_ACCESSED
- stw r11, PTE_FLAGS_OFFSET(r12)
/* Jump to common tlb load */
b finish_tlb_load
@@ -663,7 +597,7 @@ interrupt_base:
mfspr r12, SPRN_SPRG4R
mfspr r11, SPRN_SPRG1
mfspr r10, SPRN_SPRG0
- b data_access
+ b DataStorage
/* Instruction TLB Error Interrupt */
/*
@@ -701,15 +635,16 @@ interrupt_base:
lwz r11,PGDIR(r11)
4:
+ /* Make up the required permissions */
+ li r13,_PAGE_PRESENT | _PAGE_ACCESSED | _PAGE_HWEXEC
+
FIND_PTE
- andi. r13, r11, _PAGE_PRESENT /* Is the page present? */
- beq 2f /* Bail if not present */
+ andc. r13,r13,r11 /* Check permission */
+ bne 2f /* Bail if permission mismach */
#ifdef CONFIG_PTE_64BIT
lwz r13, 0(r12)
#endif
- ori r11, r11, _PAGE_ACCESSED
- stw r11, PTE_FLAGS_OFFSET(r12)
/* Jump to common TLB load point */
b finish_tlb_load
@@ -750,10 +685,13 @@ interrupt_base:
/* Performance Monitor */
EXCEPTION(0x2060, PerformanceMonitor, performance_monitor_exception, EXC_XFER_STD)
+#ifdef CONFIG_PPC_E500MC
+ EXCEPTION(0x2070, Doorbell, unknown_exception, EXC_XFER_EE)
+#endif
/* Debug Interrupt */
DEBUG_DEBUG_EXCEPTION
-#if defined(CONFIG_E500)
+#if defined(CONFIG_E500) && !defined(CONFIG_PPC_E500MC)
DEBUG_CRIT_EXCEPTION
#endif
@@ -761,29 +699,13 @@ interrupt_base:
* Local functions
*/
- /*
- * Data TLB exceptions will bail out to this point
- * if they can't resolve the lightweight TLB fault.
- */
-data_access:
- NORMAL_EXCEPTION_PROLOG
- mfspr r5,SPRN_ESR /* Grab the ESR, save it, pass arg3 */
- stw r5,_ESR(r11)
- mfspr r4,SPRN_DEAR /* Grab the DEAR, save it, pass arg2 */
- andis. r10,r5,(ESR_ILK|ESR_DLK)@h
- bne 1f
- EXC_XFER_EE_LITE(0x0300, handle_page_fault)
-1:
- addi r3,r1,STACK_FRAME_OVERHEAD
- EXC_XFER_EE_LITE(0x0300, CacheLockingException)
-
/*
-
* Both the instruction and data TLB miss get to this
* point to load the TLB.
* r10 - EA of fault
* r11 - TLB (info from Linux PTE)
- * r12, r13 - available to use
+ * r12 - available to use
+ * r13 - upper bits of PTE (if PTE_64BIT) or available to use
* CR5 - results of addr >= PAGE_OFFSET
* MAS0, MAS1 - loaded with proper value when we get here
* MAS2, MAS3 - will need additional info from Linux PTE
@@ -805,20 +727,14 @@ finish_tlb_load:
#endif
mtspr SPRN_MAS2, r12
- bge 5, 1f
-
- /* is user addr */
- andi. r12, r11, (_PAGE_USER | _PAGE_HWWRITE | _PAGE_HWEXEC)
+ li r10, (_PAGE_HWEXEC | _PAGE_PRESENT)
+ rlwimi r10, r11, 31, 29, 29 /* extract _PAGE_DIRTY into SW */
+ and r12, r11, r10
andi. r10, r11, _PAGE_USER /* Test for _PAGE_USER */
- srwi r10, r12, 1
- or r12, r12, r10 /* Copy user perms into supervisor */
- iseleq r12, 0, r12
- b 2f
-
- /* is kernel addr */
-1: rlwinm r12, r11, 31, 29, 29 /* Extract _PAGE_HWWRITE into SW */
- ori r12, r12, (MAS3_SX | MAS3_SR)
-
+ slwi r10, r12, 1
+ or r10, r10, r12
+ iseleq r12, r12, r10
+
#ifdef CONFIG_PTE_64BIT
2: rlwimi r12, r13, 24, 0, 7 /* grab RPN[32:39] */
rlwimi r12, r11, 24, 8, 19 /* grab RPN[40:51] */
@@ -1065,6 +981,52 @@ _GLOBAL(set_context)
isync /* Force context change */
blr
+_GLOBAL(flush_dcache_L1)
+ mfspr r3,SPRN_L1CFG0
+
+ rlwinm r5,r3,9,3 /* Extract cache block size */
+ twlgti r5,1 /* Only 32 and 64 byte cache blocks
+ * are currently defined.
+ */
+ li r4,32
+ subfic r6,r5,2 /* r6 = log2(1KiB / cache block size) -
+ * log2(number of ways)
+ */
+ slw r5,r4,r5 /* r5 = cache block size */
+
+ rlwinm r7,r3,0,0xff /* Extract number of KiB in the cache */
+ mulli r7,r7,13 /* An 8-way cache will require 13
+ * loads per set.
+ */
+ slw r7,r7,r6
+
+ /* save off HID0 and set DCFA */
+ mfspr r8,SPRN_HID0
+ ori r9,r8,HID0_DCFA@l
+ mtspr SPRN_HID0,r9
+ isync
+
+ lis r4,KERNELBASE@h
+ mtctr r7
+
+1: lwz r3,0(r4) /* Load... */
+ add r4,r4,r5
+ bdnz 1b
+
+ msync
+ lis r4,KERNELBASE@h
+ mtctr r7
+
+1: dcbf 0,r4 /* ...and flush. */
+ add r4,r4,r5
+ bdnz 1b
+
+ /* restore HID0 */
+ mtspr SPRN_HID0,r8
+ isync
+
+ blr
+
/*
* We put a few things here that have to be page-aligned. This stuff
* goes at the beginning of the data segment, which is page-aligned.
@@ -1080,15 +1042,6 @@ empty_zero_page:
swapper_pg_dir:
.space PGD_TABLE_SIZE
-/* Reserved 4k for the critical exception stack & 4k for the machine
- * check stack per CPU for kernel mode exceptions */
- .section .bss
- .align 12
-exception_stack_bottom:
- .space BOOKE_EXCEPTION_STACK_SIZE * NR_CPUS
- .globl exception_stack_top
-exception_stack_top:
-
/*
* Room for two PTE pointers, usually the kernel and current user pointers
* to their respective root page table.
diff --git a/arch/powerpc/kernel/ibmebus.c b/arch/powerpc/kernel/ibmebus.c
index 9971159c8040..9d42eb57aea3 100644
--- a/arch/powerpc/kernel/ibmebus.c
+++ b/arch/powerpc/kernel/ibmebus.c
@@ -53,7 +53,7 @@ static struct device ibmebus_bus_device = { /* fake "parent" device */
struct bus_type ibmebus_bus_type;
/* These devices will automatically be added to the bus during init */
-static struct of_device_id __initdata builtin_matches[] = {
+static struct of_device_id __initdata ibmebus_matches[] = {
{ .compatible = "IBM,lhca" },
{ .compatible = "IBM,lhea" },
{},
@@ -82,7 +82,8 @@ static void ibmebus_free_coherent(struct device *dev,
static dma_addr_t ibmebus_map_single(struct device *dev,
void *ptr,
size_t size,
- enum dma_data_direction direction)
+ enum dma_data_direction direction,
+ struct dma_attrs *attrs)
{
return (dma_addr_t)(ptr);
}
@@ -90,14 +91,16 @@ static dma_addr_t ibmebus_map_single(struct device *dev,
static void ibmebus_unmap_single(struct device *dev,
dma_addr_t dma_addr,
size_t size,
- enum dma_data_direction direction)
+ enum dma_data_direction direction,
+ struct dma_attrs *attrs)
{
return;
}
static int ibmebus_map_sg(struct device *dev,
struct scatterlist *sgl,
- int nents, enum dma_data_direction direction)
+ int nents, enum dma_data_direction direction,
+ struct dma_attrs *attrs)
{
struct scatterlist *sg;
int i;
@@ -112,7 +115,8 @@ static int ibmebus_map_sg(struct device *dev,
static void ibmebus_unmap_sg(struct device *dev,
struct scatterlist *sg,
- int nents, enum dma_data_direction direction)
+ int nents, enum dma_data_direction direction,
+ struct dma_attrs *attrs)
{
return;
}
@@ -350,7 +354,7 @@ static int __init ibmebus_bus_init(void)
return err;
}
- err = ibmebus_create_devices(builtin_matches);
+ err = ibmebus_create_devices(ibmebus_matches);
if (err) {
device_unregister(&ibmebus_bus_device);
bus_unregister(&ibmebus_bus_type);
diff --git a/arch/powerpc/kernel/idle.c b/arch/powerpc/kernel/idle.c
index c3cf0e8f3ac1..d308a9f70f1b 100644
--- a/arch/powerpc/kernel/idle.c
+++ b/arch/powerpc/kernel/idle.c
@@ -60,7 +60,7 @@ void cpu_idle(void)
set_thread_flag(TIF_POLLING_NRFLAG);
while (1) {
- tick_nohz_stop_sched_tick();
+ tick_nohz_stop_sched_tick(1);
while (!need_resched() && !cpu_should_die()) {
ppc64_runlatch_off();
diff --git a/arch/powerpc/kernel/idle_6xx.S b/arch/powerpc/kernel/idle_6xx.S
index 01bcd52bbf8e..019b02d8844f 100644
--- a/arch/powerpc/kernel/idle_6xx.S
+++ b/arch/powerpc/kernel/idle_6xx.S
@@ -153,7 +153,7 @@ END_FTR_SECTION_IFSET(CPU_FTR_ALTIVEC)
* address of current. R11 points to the exception frame (physical
* address). We have to preserve r10.
*/
-_GLOBAL(power_save_6xx_restore)
+_GLOBAL(power_save_ppc32_restore)
lwz r9,_LINK(r11) /* interrupted in ppc6xx_idle: */
stw r9,_NIP(r11) /* make it do a blr */
diff --git a/arch/powerpc/kernel/idle_e500.S b/arch/powerpc/kernel/idle_e500.S
new file mode 100644
index 000000000000..06304034b393
--- /dev/null
+++ b/arch/powerpc/kernel/idle_e500.S
@@ -0,0 +1,93 @@
+/*
+ * Copyright (C) 2008 Freescale Semiconductor, Inc. All rights reserved.
+ * Dave Liu <daveliu@freescale.com>
+ * copy from idle_6xx.S and modify for e500 based processor,
+ * implement the power_save function in idle.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ */
+
+#include <linux/threads.h>
+#include <asm/reg.h>
+#include <asm/page.h>
+#include <asm/cputable.h>
+#include <asm/thread_info.h>
+#include <asm/ppc_asm.h>
+#include <asm/asm-offsets.h>
+
+ .text
+
+_GLOBAL(e500_idle)
+ rlwinm r3,r1,0,0,31-THREAD_SHIFT /* current thread_info */
+ lwz r4,TI_LOCAL_FLAGS(r3) /* set napping bit */
+ ori r4,r4,_TLF_NAPPING /* so when we take an exception */
+ stw r4,TI_LOCAL_FLAGS(r3) /* it will return to our caller */
+
+ /* Check if we can nap or doze, put HID0 mask in r3 */
+ lis r3,0
+BEGIN_FTR_SECTION
+ lis r3,HID0_DOZE@h
+END_FTR_SECTION_IFSET(CPU_FTR_CAN_DOZE)
+
+BEGIN_FTR_SECTION
+ /* Now check if user enabled NAP mode */
+ lis r4,powersave_nap@ha
+ lwz r4,powersave_nap@l(r4)
+ cmpwi 0,r4,0
+ beq 1f
+ stwu r1,-16(r1)
+ mflr r0
+ stw r0,20(r1)
+ bl flush_dcache_L1
+ lwz r0,20(r1)
+ addi r1,r1,16
+ mtlr r0
+ lis r3,HID0_NAP@h
+END_FTR_SECTION_IFSET(CPU_FTR_CAN_NAP)
+BEGIN_FTR_SECTION
+ msync
+ li r7,L2CSR0_L2FL@l
+ mtspr SPRN_L2CSR0,r7
+2:
+ mfspr r7,SPRN_L2CSR0
+ andi. r4,r7,L2CSR0_L2FL@l
+ bne 2b
+END_FTR_SECTION_IFSET(CPU_FTR_L2CSR|CPU_FTR_CAN_NAP)
+1:
+ /* Go to NAP or DOZE now */
+ mfspr r4,SPRN_HID0
+ rlwinm r4,r4,0,~(HID0_DOZE|HID0_NAP|HID0_SLEEP)
+ or r4,r4,r3
+ isync
+ mtspr SPRN_HID0,r4
+ isync
+
+ mfmsr r7
+ oris r7,r7,MSR_WE@h
+ ori r7,r7,MSR_EE
+ msync
+ mtmsr r7
+ isync
+2: b 2b
+
+/*
+ * Return from NAP/DOZE mode, restore some CPU specific registers,
+ * r2 containing physical address of current.
+ * r11 points to the exception frame (physical address).
+ * We have to preserve r10.
+ */
+_GLOBAL(power_save_ppc32_restore)
+ lwz r9,_LINK(r11) /* interrupted in e500_idle */
+ stw r9,_NIP(r11) /* make it do a blr */
+
+#ifdef CONFIG_SMP
+ mfspr r12,SPRN_SPRG3
+ lwz r11,TI_CPU(r12) /* get cpu number * 4 */
+ slwi r11,r11,2
+#else
+ li r11,0
+#endif
+ b transfer_to_handler_cont
diff --git a/arch/powerpc/kernel/io.c b/arch/powerpc/kernel/io.c
index e31aca9208eb..1882bf419fa6 100644
--- a/arch/powerpc/kernel/io.c
+++ b/arch/powerpc/kernel/io.c
@@ -120,7 +120,8 @@ EXPORT_SYMBOL(_outsl_ns);
#define IO_CHECK_ALIGN(v,a) ((((unsigned long)(v)) & ((a) - 1)) == 0)
-void _memset_io(volatile void __iomem *addr, int c, unsigned long n)
+notrace void
+_memset_io(volatile void __iomem *addr, int c, unsigned long n)
{
void *p = (void __force *)addr;
u32 lc = c;
diff --git a/arch/powerpc/kernel/iommu.c b/arch/powerpc/kernel/iommu.c
index 0c663669bc32..550a19399bfa 100644
--- a/arch/powerpc/kernel/iommu.c
+++ b/arch/powerpc/kernel/iommu.c
@@ -49,6 +49,8 @@ static int novmerge = 1;
static int protect4gb = 1;
+static void __iommu_free(struct iommu_table *, dma_addr_t, unsigned int);
+
static inline unsigned long iommu_num_pages(unsigned long vaddr,
unsigned long slen)
{
@@ -186,10 +188,12 @@ static unsigned long iommu_range_alloc(struct device *dev,
static dma_addr_t iommu_alloc(struct device *dev, struct iommu_table *tbl,
void *page, unsigned int npages,
enum dma_data_direction direction,
- unsigned long mask, unsigned int align_order)
+ unsigned long mask, unsigned int align_order,
+ struct dma_attrs *attrs)
{
unsigned long entry, flags;
dma_addr_t ret = DMA_ERROR_CODE;
+ int build_fail;
spin_lock_irqsave(&(tbl->it_lock), flags);
@@ -204,9 +208,21 @@ static dma_addr_t iommu_alloc(struct device *dev, struct iommu_table *tbl,
ret = entry << IOMMU_PAGE_SHIFT; /* Set the return dma address */
/* Put the TCEs in the HW table */
- ppc_md.tce_build(tbl, entry, npages, (unsigned long)page & IOMMU_PAGE_MASK,
- direction);
+ build_fail = ppc_md.tce_build(tbl, entry, npages,
+ (unsigned long)page & IOMMU_PAGE_MASK,
+ direction, attrs);
+
+ /* ppc_md.tce_build() only returns non-zero for transient errors.
+ * Clean up the table bitmap in this case and return
+ * DMA_ERROR_CODE. For all other errors the functionality is
+ * not altered.
+ */
+ if (unlikely(build_fail)) {
+ __iommu_free(tbl, ret, npages);
+ spin_unlock_irqrestore(&(tbl->it_lock), flags);
+ return DMA_ERROR_CODE;
+ }
/* Flush/invalidate TLB caches if necessary */
if (ppc_md.tce_flush)
@@ -267,15 +283,15 @@ static void iommu_free(struct iommu_table *tbl, dma_addr_t dma_addr,
spin_unlock_irqrestore(&(tbl->it_lock), flags);
}
-int iommu_map_sg(struct device *dev, struct scatterlist *sglist,
- int nelems, unsigned long mask,
- enum dma_data_direction direction)
+int iommu_map_sg(struct device *dev, struct iommu_table *tbl,
+ struct scatterlist *sglist, int nelems,
+ unsigned long mask, enum dma_data_direction direction,
+ struct dma_attrs *attrs)
{
- struct iommu_table *tbl = dev->archdata.dma_data;
dma_addr_t dma_next = 0, dma_addr;
unsigned long flags;
struct scatterlist *s, *outs, *segstart;
- int outcount, incount, i;
+ int outcount, incount, i, build_fail = 0;
unsigned int align;
unsigned long handle;
unsigned int max_seg_size;
@@ -336,7 +352,11 @@ int iommu_map_sg(struct device *dev, struct scatterlist *sglist,
npages, entry, dma_addr);
/* Insert into HW table */
- ppc_md.tce_build(tbl, entry, npages, vaddr & IOMMU_PAGE_MASK, direction);
+ build_fail = ppc_md.tce_build(tbl, entry, npages,
+ vaddr & IOMMU_PAGE_MASK,
+ direction, attrs);
+ if(unlikely(build_fail))
+ goto failure;
/* If we are in an open segment, try merging */
if (segstart != s) {
@@ -412,7 +432,8 @@ int iommu_map_sg(struct device *dev, struct scatterlist *sglist,
void iommu_unmap_sg(struct iommu_table *tbl, struct scatterlist *sglist,
- int nelems, enum dma_data_direction direction)
+ int nelems, enum dma_data_direction direction,
+ struct dma_attrs *attrs)
{
struct scatterlist *sg;
unsigned long flags;
@@ -554,7 +575,7 @@ void iommu_free_table(struct iommu_table *tbl, const char *node_name)
*/
dma_addr_t iommu_map_single(struct device *dev, struct iommu_table *tbl,
void *vaddr, size_t size, unsigned long mask,
- enum dma_data_direction direction)
+ enum dma_data_direction direction, struct dma_attrs *attrs)
{
dma_addr_t dma_handle = DMA_ERROR_CODE;
unsigned long uaddr;
@@ -572,7 +593,8 @@ dma_addr_t iommu_map_single(struct device *dev, struct iommu_table *tbl,
align = PAGE_SHIFT - IOMMU_PAGE_SHIFT;
dma_handle = iommu_alloc(dev, tbl, vaddr, npages, direction,
- mask >> IOMMU_PAGE_SHIFT, align);
+ mask >> IOMMU_PAGE_SHIFT, align,
+ attrs);
if (dma_handle == DMA_ERROR_CODE) {
if (printk_ratelimit()) {
printk(KERN_INFO "iommu_alloc failed, "
@@ -587,7 +609,8 @@ dma_addr_t iommu_map_single(struct device *dev, struct iommu_table *tbl,
}
void iommu_unmap_single(struct iommu_table *tbl, dma_addr_t dma_handle,
- size_t size, enum dma_data_direction direction)
+ size_t size, enum dma_data_direction direction,
+ struct dma_attrs *attrs)
{
unsigned int npages;
@@ -640,7 +663,7 @@ void *iommu_alloc_coherent(struct device *dev, struct iommu_table *tbl,
nio_pages = size >> IOMMU_PAGE_SHIFT;
io_order = get_iommu_order(size);
mapping = iommu_alloc(dev, tbl, ret, nio_pages, DMA_BIDIRECTIONAL,
- mask >> IOMMU_PAGE_SHIFT, io_order);
+ mask >> IOMMU_PAGE_SHIFT, io_order, NULL);
if (mapping == DMA_ERROR_CODE) {
free_pages((unsigned long)ret, order);
return NULL;
diff --git a/arch/powerpc/kernel/irq.c b/arch/powerpc/kernel/irq.c
index bcc249d90c4d..6ac8612da3c3 100644
--- a/arch/powerpc/kernel/irq.c
+++ b/arch/powerpc/kernel/irq.c
@@ -98,7 +98,7 @@ EXPORT_SYMBOL(irq_desc);
int distribute_irqs = 1;
-static inline unsigned long get_hard_enabled(void)
+static inline notrace unsigned long get_hard_enabled(void)
{
unsigned long enabled;
@@ -108,13 +108,13 @@ static inline unsigned long get_hard_enabled(void)
return enabled;
}
-static inline void set_soft_enabled(unsigned long enable)
+static inline notrace void set_soft_enabled(unsigned long enable)
{
__asm__ __volatile__("stb %0,%1(13)"
: : "r" (enable), "i" (offsetof(struct paca_struct, soft_enabled)));
}
-void raw_local_irq_restore(unsigned long en)
+notrace void raw_local_irq_restore(unsigned long en)
{
/*
* get_paca()->soft_enabled = en;
@@ -356,9 +356,42 @@ void __init init_IRQ(void)
{
if (ppc_md.init_IRQ)
ppc_md.init_IRQ();
+
+ exc_lvl_ctx_init();
+
irq_ctx_init();
}
+#if defined(CONFIG_BOOKE) || defined(CONFIG_40x)
+struct thread_info *critirq_ctx[NR_CPUS] __read_mostly;
+struct thread_info *dbgirq_ctx[NR_CPUS] __read_mostly;
+struct thread_info *mcheckirq_ctx[NR_CPUS] __read_mostly;
+
+void exc_lvl_ctx_init(void)
+{
+ struct thread_info *tp;
+ int i;
+
+ for_each_possible_cpu(i) {
+ memset((void *)critirq_ctx[i], 0, THREAD_SIZE);
+ tp = critirq_ctx[i];
+ tp->cpu = i;
+ tp->preempt_count = 0;
+
+#ifdef CONFIG_BOOKE
+ memset((void *)dbgirq_ctx[i], 0, THREAD_SIZE);
+ tp = dbgirq_ctx[i];
+ tp->cpu = i;
+ tp->preempt_count = 0;
+
+ memset((void *)mcheckirq_ctx[i], 0, THREAD_SIZE);
+ tp = mcheckirq_ctx[i];
+ tp->cpu = i;
+ tp->preempt_count = HARDIRQ_OFFSET;
+#endif
+ }
+}
+#endif
#ifdef CONFIG_IRQSTACKS
struct thread_info *softirq_ctx[NR_CPUS] __read_mostly;
@@ -465,7 +498,7 @@ struct irq_host *irq_alloc_host(struct device_node *of_node,
host->revmap_type = revmap_type;
host->inval_irq = inval_irq;
host->ops = ops;
- host->of_node = of_node;
+ host->of_node = of_node_get(of_node);
if (host->ops->match == NULL)
host->ops->match = default_irq_host_match;
diff --git a/arch/powerpc/kernel/kgdb.c b/arch/powerpc/kernel/kgdb.c
new file mode 100644
index 000000000000..b4fdf2f2743c
--- /dev/null
+++ b/arch/powerpc/kernel/kgdb.c
@@ -0,0 +1,410 @@
+/*
+ * PowerPC backend to the KGDB stub.
+ *
+ * 1998 (c) Michael AK Tesch (tesch@cs.wisc.edu)
+ * Copyright (C) 2003 Timesys Corporation.
+ * Copyright (C) 2004-2006 MontaVista Software, Inc.
+ * PPC64 Mods (C) 2005 Frank Rowand (frowand@mvista.com)
+ * PPC32 support restored by Vitaly Wool <vwool@ru.mvista.com> and
+ * Sergei Shtylyov <sshtylyov@ru.mvista.com>
+ * Copyright (C) 2007-2008 Wind River Systems, Inc.
+ *
+ * This file is licensed under the terms of the GNU General Public License
+ * version 2. This program as licensed "as is" without any warranty of any
+ * kind, whether express or implied.
+ */
+
+#include <linux/kernel.h>
+#include <linux/init.h>
+#include <linux/kgdb.h>
+#include <linux/smp.h>
+#include <linux/signal.h>
+#include <linux/ptrace.h>
+#include <asm/current.h>
+#include <asm/processor.h>
+#include <asm/machdep.h>
+
+/*
+ * This table contains the mapping between PowerPC hardware trap types, and
+ * signals, which are primarily what GDB understands. GDB and the kernel
+ * don't always agree on values, so we use constants taken from gdb-6.2.
+ */
+static struct hard_trap_info
+{
+ unsigned int tt; /* Trap type code for powerpc */
+ unsigned char signo; /* Signal that we map this trap into */
+} hard_trap_info[] = {
+ { 0x0100, 0x02 /* SIGINT */ }, /* system reset */
+ { 0x0200, 0x0b /* SIGSEGV */ }, /* machine check */
+ { 0x0300, 0x0b /* SIGSEGV */ }, /* data access */
+ { 0x0400, 0x0b /* SIGSEGV */ }, /* instruction access */
+ { 0x0500, 0x02 /* SIGINT */ }, /* external interrupt */
+ { 0x0600, 0x0a /* SIGBUS */ }, /* alignment */
+ { 0x0700, 0x05 /* SIGTRAP */ }, /* program check */
+ { 0x0800, 0x08 /* SIGFPE */ }, /* fp unavailable */
+ { 0x0900, 0x0e /* SIGALRM */ }, /* decrementer */
+ { 0x0c00, 0x14 /* SIGCHLD */ }, /* system call */
+#if defined(CONFIG_40x) || defined(CONFIG_BOOKE)
+ { 0x2002, 0x05 /* SIGTRAP */ }, /* debug */
+#if defined(CONFIG_FSL_BOOKE)
+ { 0x2010, 0x08 /* SIGFPE */ }, /* spe unavailable */
+ { 0x2020, 0x08 /* SIGFPE */ }, /* spe unavailable */
+ { 0x2030, 0x08 /* SIGFPE */ }, /* spe fp data */
+ { 0x2040, 0x08 /* SIGFPE */ }, /* spe fp data */
+ { 0x2050, 0x08 /* SIGFPE */ }, /* spe fp round */
+ { 0x2060, 0x0e /* SIGILL */ }, /* performace monitor */
+ { 0x2900, 0x08 /* SIGFPE */ }, /* apu unavailable */
+ { 0x3100, 0x0e /* SIGALRM */ }, /* fixed interval timer */
+ { 0x3200, 0x02 /* SIGINT */ }, /* watchdog */
+#else /* ! CONFIG_FSL_BOOKE */
+ { 0x1000, 0x0e /* SIGALRM */ }, /* prog interval timer */
+ { 0x1010, 0x0e /* SIGALRM */ }, /* fixed interval timer */
+ { 0x1020, 0x02 /* SIGINT */ }, /* watchdog */
+ { 0x2010, 0x08 /* SIGFPE */ }, /* fp unavailable */
+ { 0x2020, 0x08 /* SIGFPE */ }, /* ap unavailable */
+#endif
+#else /* ! (defined(CONFIG_40x) || defined(CONFIG_BOOKE)) */
+ { 0x0d00, 0x05 /* SIGTRAP */ }, /* single-step */
+#if defined(CONFIG_8xx)
+ { 0x1000, 0x04 /* SIGILL */ }, /* software emulation */
+#else /* ! CONFIG_8xx */
+ { 0x0f00, 0x04 /* SIGILL */ }, /* performance monitor */
+ { 0x0f20, 0x08 /* SIGFPE */ }, /* altivec unavailable */
+ { 0x1300, 0x05 /* SIGTRAP */ }, /* instruction address break */
+#if defined(CONFIG_PPC64)
+ { 0x1200, 0x05 /* SIGILL */ }, /* system error */
+ { 0x1500, 0x04 /* SIGILL */ }, /* soft patch */
+ { 0x1600, 0x04 /* SIGILL */ }, /* maintenance */
+ { 0x1700, 0x08 /* SIGFPE */ }, /* altivec assist */
+ { 0x1800, 0x04 /* SIGILL */ }, /* thermal */
+#else /* ! CONFIG_PPC64 */
+ { 0x1400, 0x02 /* SIGINT */ }, /* SMI */
+ { 0x1600, 0x08 /* SIGFPE */ }, /* altivec assist */
+ { 0x1700, 0x04 /* SIGILL */ }, /* TAU */
+ { 0x2000, 0x05 /* SIGTRAP */ }, /* run mode */
+#endif
+#endif
+#endif
+ { 0x0000, 0x00 } /* Must be last */
+};
+
+static int computeSignal(unsigned int tt)
+{
+ struct hard_trap_info *ht;
+
+ for (ht = hard_trap_info; ht->tt && ht->signo; ht++)
+ if (ht->tt == tt)
+ return ht->signo;
+
+ return SIGHUP; /* default for things we don't know about */
+}
+
+static int kgdb_call_nmi_hook(struct pt_regs *regs)
+{
+ kgdb_nmicallback(raw_smp_processor_id(), regs);
+ return 0;
+}
+
+#ifdef CONFIG_SMP
+void kgdb_roundup_cpus(unsigned long flags)
+{
+ smp_send_debugger_break(MSG_ALL_BUT_SELF);
+}
+#endif
+
+/* KGDB functions to use existing PowerPC64 hooks. */
+static int kgdb_debugger(struct pt_regs *regs)
+{
+ return kgdb_handle_exception(0, computeSignal(TRAP(regs)), 0, regs);
+}
+
+static int kgdb_handle_breakpoint(struct pt_regs *regs)
+{
+ if (user_mode(regs))
+ return 0;
+
+ if (kgdb_handle_exception(0, SIGTRAP, 0, regs) != 0)
+ return 0;
+
+ if (*(u32 *) (regs->nip) == *(u32 *) (&arch_kgdb_ops.gdb_bpt_instr))
+ regs->nip += 4;
+
+ return 1;
+}
+
+static int kgdb_singlestep(struct pt_regs *regs)
+{
+ struct thread_info *thread_info, *exception_thread_info;
+
+ if (user_mode(regs))
+ return 0;
+
+ /*
+ * On Book E and perhaps other processsors, singlestep is handled on
+ * the critical exception stack. This causes current_thread_info()
+ * to fail, since it it locates the thread_info by masking off
+ * the low bits of the current stack pointer. We work around
+ * this issue by copying the thread_info from the kernel stack
+ * before calling kgdb_handle_exception, and copying it back
+ * afterwards. On most processors the copy is avoided since
+ * exception_thread_info == thread_info.
+ */
+ thread_info = (struct thread_info *)(regs->gpr[1] & ~(THREAD_SIZE-1));
+ exception_thread_info = current_thread_info();
+
+ if (thread_info != exception_thread_info)
+ memcpy(exception_thread_info, thread_info, sizeof *thread_info);
+
+ kgdb_handle_exception(0, SIGTRAP, 0, regs);
+
+ if (thread_info != exception_thread_info)
+ memcpy(thread_info, exception_thread_info, sizeof *thread_info);
+
+ return 1;
+}
+
+static int kgdb_iabr_match(struct pt_regs *regs)
+{
+ if (user_mode(regs))
+ return 0;
+
+ if (kgdb_handle_exception(0, computeSignal(TRAP(regs)), 0, regs) != 0)
+ return 0;
+ return 1;
+}
+
+static int kgdb_dabr_match(struct pt_regs *regs)
+{
+ if (user_mode(regs))
+ return 0;
+
+ if (kgdb_handle_exception(0, computeSignal(TRAP(regs)), 0, regs) != 0)
+ return 0;
+ return 1;
+}
+
+#define PACK64(ptr, src) do { *(ptr++) = (src); } while (0)
+
+#define PACK32(ptr, src) do { \
+ u32 *ptr32; \
+ ptr32 = (u32 *)ptr; \
+ *(ptr32++) = (src); \
+ ptr = (unsigned long *)ptr32; \
+ } while (0)
+
+
+void pt_regs_to_gdb_regs(unsigned long *gdb_regs, struct pt_regs *regs)
+{
+ unsigned long *ptr = gdb_regs;
+ int reg;
+
+ memset(gdb_regs, 0, NUMREGBYTES);
+
+ for (reg = 0; reg < 32; reg++)
+ PACK64(ptr, regs->gpr[reg]);
+
+#ifdef CONFIG_FSL_BOOKE
+#ifdef CONFIG_SPE
+ for (reg = 0; reg < 32; reg++)
+ PACK64(ptr, current->thread.evr[reg]);
+#else
+ ptr += 32;
+#endif
+#else
+ /* fp registers not used by kernel, leave zero */
+ ptr += 32 * 8 / sizeof(long);
+#endif
+
+ PACK64(ptr, regs->nip);
+ PACK64(ptr, regs->msr);
+ PACK32(ptr, regs->ccr);
+ PACK64(ptr, regs->link);
+ PACK64(ptr, regs->ctr);
+ PACK32(ptr, regs->xer);
+
+ BUG_ON((unsigned long)ptr >
+ (unsigned long)(((void *)gdb_regs) + NUMREGBYTES));
+}
+
+void sleeping_thread_to_gdb_regs(unsigned long *gdb_regs, struct task_struct *p)
+{
+ struct pt_regs *regs = (struct pt_regs *)(p->thread.ksp +
+ STACK_FRAME_OVERHEAD);
+ unsigned long *ptr = gdb_regs;
+ int reg;
+
+ memset(gdb_regs, 0, NUMREGBYTES);
+
+ /* Regs GPR0-2 */
+ for (reg = 0; reg < 3; reg++)
+ PACK64(ptr, regs->gpr[reg]);
+
+ /* Regs GPR3-13 are caller saved, not in regs->gpr[] */
+ ptr += 11;
+
+ /* Regs GPR14-31 */
+ for (reg = 14; reg < 32; reg++)
+ PACK64(ptr, regs->gpr[reg]);
+
+#ifdef CONFIG_FSL_BOOKE
+#ifdef CONFIG_SPE
+ for (reg = 0; reg < 32; reg++)
+ PACK64(ptr, p->thread.evr[reg]);
+#else
+ ptr += 32;
+#endif
+#else
+ /* fp registers not used by kernel, leave zero */
+ ptr += 32 * 8 / sizeof(long);
+#endif
+
+ PACK64(ptr, regs->nip);
+ PACK64(ptr, regs->msr);
+ PACK32(ptr, regs->ccr);
+ PACK64(ptr, regs->link);
+ PACK64(ptr, regs->ctr);
+ PACK32(ptr, regs->xer);
+
+ BUG_ON((unsigned long)ptr >
+ (unsigned long)(((void *)gdb_regs) + NUMREGBYTES));
+}
+
+#define UNPACK64(dest, ptr) do { dest = *(ptr++); } while (0)
+
+#define UNPACK32(dest, ptr) do { \
+ u32 *ptr32; \
+ ptr32 = (u32 *)ptr; \
+ dest = *(ptr32++); \
+ ptr = (unsigned long *)ptr32; \
+ } while (0)
+
+void gdb_regs_to_pt_regs(unsigned long *gdb_regs, struct pt_regs *regs)
+{
+ unsigned long *ptr = gdb_regs;
+ int reg;
+#ifdef CONFIG_SPE
+ union {
+ u32 v32[2];
+ u64 v64;
+ } acc;
+#endif
+
+ for (reg = 0; reg < 32; reg++)
+ UNPACK64(regs->gpr[reg], ptr);
+
+#ifdef CONFIG_FSL_BOOKE
+#ifdef CONFIG_SPE
+ for (reg = 0; reg < 32; reg++)
+ UNPACK64(current->thread.evr[reg], ptr);
+#else
+ ptr += 32;
+#endif
+#else
+ /* fp registers not used by kernel, leave zero */
+ ptr += 32 * 8 / sizeof(int);
+#endif
+
+ UNPACK64(regs->nip, ptr);
+ UNPACK64(regs->msr, ptr);
+ UNPACK32(regs->ccr, ptr);
+ UNPACK64(regs->link, ptr);
+ UNPACK64(regs->ctr, ptr);
+ UNPACK32(regs->xer, ptr);
+
+ BUG_ON((unsigned long)ptr >
+ (unsigned long)(((void *)gdb_regs) + NUMREGBYTES));
+}
+
+/*
+ * This function does PowerPC specific procesing for interfacing to gdb.
+ */
+int kgdb_arch_handle_exception(int vector, int signo, int err_code,
+ char *remcom_in_buffer, char *remcom_out_buffer,
+ struct pt_regs *linux_regs)
+{
+ char *ptr = &remcom_in_buffer[1];
+ unsigned long addr;
+
+ switch (remcom_in_buffer[0]) {
+ /*
+ * sAA..AA Step one instruction from AA..AA
+ * This will return an error to gdb ..
+ */
+ case 's':
+ case 'c':
+ /* handle the optional parameter */
+ if (kgdb_hex2long(&ptr, &addr))
+ linux_regs->nip = addr;
+
+ atomic_set(&kgdb_cpu_doing_single_step, -1);
+ /* set the trace bit if we're stepping */
+ if (remcom_in_buffer[0] == 's') {
+#if defined(CONFIG_40x) || defined(CONFIG_BOOKE)
+ mtspr(SPRN_DBCR0,
+ mfspr(SPRN_DBCR0) | DBCR0_IC | DBCR0_IDM);
+ linux_regs->msr |= MSR_DE;
+#else
+ linux_regs->msr |= MSR_SE;
+#endif
+ kgdb_single_step = 1;
+ if (kgdb_contthread)
+ atomic_set(&kgdb_cpu_doing_single_step,
+ raw_smp_processor_id());
+ }
+ return 0;
+ }
+
+ return -1;
+}
+
+/*
+ * Global data
+ */
+struct kgdb_arch arch_kgdb_ops = {
+ .gdb_bpt_instr = {0x7d, 0x82, 0x10, 0x08},
+};
+
+static int kgdb_not_implemented(struct pt_regs *regs)
+{
+ return 0;
+}
+
+static void *old__debugger_ipi;
+static void *old__debugger;
+static void *old__debugger_bpt;
+static void *old__debugger_sstep;
+static void *old__debugger_iabr_match;
+static void *old__debugger_dabr_match;
+static void *old__debugger_fault_handler;
+
+int kgdb_arch_init(void)
+{
+ old__debugger_ipi = __debugger_ipi;
+ old__debugger = __debugger;
+ old__debugger_bpt = __debugger_bpt;
+ old__debugger_sstep = __debugger_sstep;
+ old__debugger_iabr_match = __debugger_iabr_match;
+ old__debugger_dabr_match = __debugger_dabr_match;
+ old__debugger_fault_handler = __debugger_fault_handler;
+
+ __debugger_ipi = kgdb_call_nmi_hook;
+ __debugger = kgdb_debugger;
+ __debugger_bpt = kgdb_handle_breakpoint;
+ __debugger_sstep = kgdb_singlestep;
+ __debugger_iabr_match = kgdb_iabr_match;
+ __debugger_dabr_match = kgdb_dabr_match;
+ __debugger_fault_handler = kgdb_not_implemented;
+
+ return 0;
+}
+
+void kgdb_arch_exit(void)
+{
+ __debugger_ipi = old__debugger_ipi;
+ __debugger = old__debugger;
+ __debugger_bpt = old__debugger_bpt;
+ __debugger_sstep = old__debugger_sstep;
+ __debugger_iabr_match = old__debugger_iabr_match;
+ __debugger_dabr_match = old__debugger_dabr_match;
+ __debugger_fault_handler = old__debugger_fault_handler;
+}
diff --git a/arch/powerpc/kernel/kprobes.c b/arch/powerpc/kernel/kprobes.c
index c176c513566b..de79915452c8 100644
--- a/arch/powerpc/kernel/kprobes.c
+++ b/arch/powerpc/kernel/kprobes.c
@@ -34,6 +34,13 @@
#include <asm/cacheflush.h>
#include <asm/sstep.h>
#include <asm/uaccess.h>
+#include <asm/system.h>
+
+#ifdef CONFIG_BOOKE
+#define MSR_SINGLESTEP (MSR_DE)
+#else
+#define MSR_SINGLESTEP (MSR_SE)
+#endif
DEFINE_PER_CPU(struct kprobe *, current_kprobe) = NULL;
DEFINE_PER_CPU(struct kprobe_ctlblk, kprobe_ctlblk);
@@ -53,7 +60,8 @@ int __kprobes arch_prepare_kprobe(struct kprobe *p)
ret = -EINVAL;
}
- /* insn must be on a special executable page on ppc64 */
+ /* insn must be on a special executable page on ppc64. This is
+ * not explicitly required on ppc32 (right now), but it doesn't hurt */
if (!ret) {
p->ainsn.insn = get_insn_slot();
if (!p->ainsn.insn)
@@ -95,7 +103,16 @@ void __kprobes arch_remove_kprobe(struct kprobe *p)
static void __kprobes prepare_singlestep(struct kprobe *p, struct pt_regs *regs)
{
- regs->msr |= MSR_SE;
+ /* We turn off async exceptions to ensure that the single step will
+ * be for the instruction we have the kprobe on, if we dont its
+ * possible we'd get the single step reported for an exception handler
+ * like Decrementer or External Interrupt */
+ regs->msr &= ~MSR_EE;
+ regs->msr |= MSR_SINGLESTEP;
+#ifdef CONFIG_BOOKE
+ regs->msr &= ~MSR_CE;
+ mtspr(SPRN_DBCR0, mfspr(SPRN_DBCR0) | DBCR0_IC | DBCR0_IDM);
+#endif
/*
* On powerpc we should single step on the original
@@ -127,7 +144,6 @@ static void __kprobes set_current_kprobe(struct kprobe *p, struct pt_regs *regs,
kcb->kprobe_saved_msr = regs->msr;
}
-/* Called with kretprobe_lock held */
void __kprobes arch_prepare_kretprobe(struct kretprobe_instance *ri,
struct pt_regs *regs)
{
@@ -158,7 +174,8 @@ static int __kprobes kprobe_handler(struct pt_regs *regs)
kprobe_opcode_t insn = *p->ainsn.insn;
if (kcb->kprobe_status == KPROBE_HIT_SS &&
is_trap(insn)) {
- regs->msr &= ~MSR_SE;
+ /* Turn off 'trace' bits */
+ regs->msr &= ~MSR_SINGLESTEP;
regs->msr |= kcb->kprobe_saved_msr;
goto no_kprobe;
}
@@ -294,8 +311,7 @@ static int __kprobes trampoline_probe_handler(struct kprobe *p,
unsigned long trampoline_address =(unsigned long)&kretprobe_trampoline;
INIT_HLIST_HEAD(&empty_rp);
- spin_lock_irqsave(&kretprobe_lock, flags);
- head = kretprobe_inst_table_head(current);
+ kretprobe_hash_lock(current, &head, &flags);
/*
* It is possible to have multiple instances associated with a given
@@ -334,7 +350,7 @@ static int __kprobes trampoline_probe_handler(struct kprobe *p,
regs->nip = orig_ret_address;
reset_current_kprobe();
- spin_unlock_irqrestore(&kretprobe_lock, flags);
+ kretprobe_hash_unlock(current, &flags);
preempt_enable_no_resched();
hlist_for_each_entry_safe(ri, node, tmp, &empty_rp, hlist) {
@@ -376,6 +392,10 @@ static int __kprobes post_kprobe_handler(struct pt_regs *regs)
if (!cur)
return 0;
+ /* make sure we got here for instruction we have a kprobe on */
+ if (((unsigned long)cur->ainsn.insn + 4) != regs->nip)
+ return 0;
+
if ((kcb->kprobe_status != KPROBE_REENTER) && cur->post_handler) {
kcb->kprobe_status = KPROBE_HIT_SSDONE;
cur->post_handler(cur, regs, 0);
@@ -395,10 +415,10 @@ out:
/*
* if somebody else is singlestepping across a probe point, msr
- * will have SE set, in which case, continue the remaining processing
+ * will have DE/SE set, in which case, continue the remaining processing
* of do_debug, as if this is not a probe hit.
*/
- if (regs->msr & MSR_SE)
+ if (regs->msr & MSR_SINGLESTEP)
return 0;
return 1;
@@ -421,7 +441,7 @@ int __kprobes kprobe_fault_handler(struct pt_regs *regs, int trapnr)
* normal page fault.
*/
regs->nip = (unsigned long)cur->addr;
- regs->msr &= ~MSR_SE;
+ regs->msr &= ~MSR_SINGLESTEP; /* Turn off 'trace' bits */
regs->msr |= kcb->kprobe_saved_msr;
if (kcb->kprobe_status == KPROBE_REENTER)
restore_previous_kprobe(kcb);
@@ -498,7 +518,7 @@ int __kprobes kprobe_exceptions_notify(struct notifier_block *self,
#ifdef CONFIG_PPC64
unsigned long arch_deref_entry_point(void *entry)
{
- return (unsigned long)(((func_descr_t *)entry)->entry);
+ return ((func_descr_t *)entry)->entry;
}
#endif
diff --git a/arch/powerpc/kernel/lparcfg.c b/arch/powerpc/kernel/lparcfg.c
index 1e656b43ad7f..9f856a0c3e38 100644
--- a/arch/powerpc/kernel/lparcfg.c
+++ b/arch/powerpc/kernel/lparcfg.c
@@ -34,8 +34,9 @@
#include <asm/time.h>
#include <asm/prom.h>
#include <asm/vdso_datapage.h>
+#include <asm/vio.h>
-#define MODULE_VERS "1.7"
+#define MODULE_VERS "1.8"
#define MODULE_NAME "lparcfg"
/* #define LPARCFG_DEBUG */
@@ -129,32 +130,46 @@ static int iseries_lparcfg_data(struct seq_file *m, void *v)
/*
* Methods used to fetch LPAR data when running on a pSeries platform.
*/
-static void log_plpar_hcall_return(unsigned long rc, char *tag)
+/**
+ * h_get_mpp
+ * H_GET_MPP hcall returns info in 7 parms
+ */
+int h_get_mpp(struct hvcall_mpp_data *mpp_data)
{
- switch(rc) {
- case 0:
- return;
- case H_HARDWARE:
- printk(KERN_INFO "plpar-hcall (%s) "
- "Hardware fault\n", tag);
- return;
- case H_FUNCTION:
- printk(KERN_INFO "plpar-hcall (%s) "
- "Function not allowed\n", tag);
- return;
- case H_AUTHORITY:
- printk(KERN_INFO "plpar-hcall (%s) "
- "Not authorized to this function\n", tag);
- return;
- case H_PARAMETER:
- printk(KERN_INFO "plpar-hcall (%s) "
- "Bad parameter(s)\n",tag);
- return;
- default:
- printk(KERN_INFO "plpar-hcall (%s) "
- "Unexpected rc(0x%lx)\n", tag, rc);
- }
+ int rc;
+ unsigned long retbuf[PLPAR_HCALL9_BUFSIZE];
+
+ rc = plpar_hcall9(H_GET_MPP, retbuf);
+
+ mpp_data->entitled_mem = retbuf[0];
+ mpp_data->mapped_mem = retbuf[1];
+
+ mpp_data->group_num = (retbuf[2] >> 2 * 8) & 0xffff;
+ mpp_data->pool_num = retbuf[2] & 0xffff;
+
+ mpp_data->mem_weight = (retbuf[3] >> 7 * 8) & 0xff;
+ mpp_data->unallocated_mem_weight = (retbuf[3] >> 6 * 8) & 0xff;
+ mpp_data->unallocated_entitlement = retbuf[3] & 0xffffffffffff;
+
+ mpp_data->pool_size = retbuf[4];
+ mpp_data->loan_request = retbuf[5];
+ mpp_data->backing_mem = retbuf[6];
+
+ return rc;
}
+EXPORT_SYMBOL(h_get_mpp);
+
+struct hvcall_ppp_data {
+ u64 entitlement;
+ u64 unallocated_entitlement;
+ u16 group_num;
+ u16 pool_num;
+ u8 capped;
+ u8 weight;
+ u8 unallocated_weight;
+ u16 active_procs_in_pool;
+ u16 active_system_procs;
+};
/*
* H_GET_PPP hcall returns info in 4 parms.
@@ -176,27 +191,30 @@ static void log_plpar_hcall_return(unsigned long rc, char *tag)
* XXXX - Active processors in Physical Processor Pool.
* XXXX - Processors active on platform.
*/
-static unsigned int h_get_ppp(unsigned long *entitled,
- unsigned long *unallocated,
- unsigned long *aggregation,
- unsigned long *resource)
+static unsigned int h_get_ppp(struct hvcall_ppp_data *ppp_data)
{
unsigned long rc;
unsigned long retbuf[PLPAR_HCALL_BUFSIZE];
rc = plpar_hcall(H_GET_PPP, retbuf);
- *entitled = retbuf[0];
- *unallocated = retbuf[1];
- *aggregation = retbuf[2];
- *resource = retbuf[3];
+ ppp_data->entitlement = retbuf[0];
+ ppp_data->unallocated_entitlement = retbuf[1];
+
+ ppp_data->group_num = (retbuf[2] >> 2 * 8) & 0xffff;
+ ppp_data->pool_num = retbuf[2] & 0xffff;
- log_plpar_hcall_return(rc, "H_GET_PPP");
+ ppp_data->capped = (retbuf[3] >> 6 * 8) & 0x01;
+ ppp_data->weight = (retbuf[3] >> 5 * 8) & 0xff;
+ ppp_data->unallocated_weight = (retbuf[3] >> 4 * 8) & 0xff;
+ ppp_data->active_procs_in_pool = (retbuf[3] >> 2 * 8) & 0xffff;
+ ppp_data->active_system_procs = retbuf[3] & 0xffff;
return rc;
}
-static void h_pic(unsigned long *pool_idle_time, unsigned long *num_procs)
+static unsigned h_pic(unsigned long *pool_idle_time,
+ unsigned long *num_procs)
{
unsigned long rc;
unsigned long retbuf[PLPAR_HCALL_BUFSIZE];
@@ -206,8 +224,87 @@ static void h_pic(unsigned long *pool_idle_time, unsigned long *num_procs)
*pool_idle_time = retbuf[0];
*num_procs = retbuf[1];
- if (rc != H_AUTHORITY)
- log_plpar_hcall_return(rc, "H_PIC");
+ return rc;
+}
+
+/*
+ * parse_ppp_data
+ * Parse out the data returned from h_get_ppp and h_pic
+ */
+static void parse_ppp_data(struct seq_file *m)
+{
+ struct hvcall_ppp_data ppp_data;
+ int rc;
+
+ rc = h_get_ppp(&ppp_data);
+ if (rc)
+ return;
+
+ seq_printf(m, "partition_entitled_capacity=%ld\n",
+ ppp_data.entitlement);
+ seq_printf(m, "group=%d\n", ppp_data.group_num);
+ seq_printf(m, "system_active_processors=%d\n",
+ ppp_data.active_system_procs);
+
+ /* pool related entries are apropriate for shared configs */
+ if (lppaca[0].shared_proc) {
+ unsigned long pool_idle_time, pool_procs;
+
+ seq_printf(m, "pool=%d\n", ppp_data.pool_num);
+
+ /* report pool_capacity in percentage */
+ seq_printf(m, "pool_capacity=%d\n",
+ ppp_data.active_procs_in_pool * 100);
+
+ h_pic(&pool_idle_time, &pool_procs);
+ seq_printf(m, "pool_idle_time=%ld\n", pool_idle_time);
+ seq_printf(m, "pool_num_procs=%ld\n", pool_procs);
+ }
+
+ seq_printf(m, "unallocated_capacity_weight=%d\n",
+ ppp_data.unallocated_weight);
+ seq_printf(m, "capacity_weight=%d\n", ppp_data.weight);
+ seq_printf(m, "capped=%d\n", ppp_data.capped);
+ seq_printf(m, "unallocated_capacity=%ld\n",
+ ppp_data.unallocated_entitlement);
+}
+
+/**
+ * parse_mpp_data
+ * Parse out data returned from h_get_mpp
+ */
+static void parse_mpp_data(struct seq_file *m)
+{
+ struct hvcall_mpp_data mpp_data;
+ int rc;
+
+ rc = h_get_mpp(&mpp_data);
+ if (rc)
+ return;
+
+ seq_printf(m, "entitled_memory=%ld\n", mpp_data.entitled_mem);
+
+ if (mpp_data.mapped_mem != -1)
+ seq_printf(m, "mapped_entitled_memory=%ld\n",
+ mpp_data.mapped_mem);
+
+ seq_printf(m, "entitled_memory_group_number=%d\n", mpp_data.group_num);
+ seq_printf(m, "entitled_memory_pool_number=%d\n", mpp_data.pool_num);
+
+ seq_printf(m, "entitled_memory_weight=%d\n", mpp_data.mem_weight);
+ seq_printf(m, "unallocated_entitled_memory_weight=%d\n",
+ mpp_data.unallocated_mem_weight);
+ seq_printf(m, "unallocated_io_mapping_entitlement=%ld\n",
+ mpp_data.unallocated_entitlement);
+
+ if (mpp_data.pool_size != -1)
+ seq_printf(m, "entitled_memory_pool_size=%ld bytes\n",
+ mpp_data.pool_size);
+
+ seq_printf(m, "entitled_memory_loan_request=%ld\n",
+ mpp_data.loan_request);
+
+ seq_printf(m, "backing_memory=%ld bytes\n", mpp_data.backing_mem);
}
#define SPLPAR_CHARACTERISTICS_TOKEN 20
@@ -313,6 +410,25 @@ static int lparcfg_count_active_processors(void)
return count;
}
+static void pseries_cmo_data(struct seq_file *m)
+{
+ int cpu;
+ unsigned long cmo_faults = 0;
+ unsigned long cmo_fault_time = 0;
+
+ if (!firmware_has_feature(FW_FEATURE_CMO))
+ return;
+
+ for_each_possible_cpu(cpu) {
+ cmo_faults += lppaca[cpu].cmo_faults;
+ cmo_fault_time += lppaca[cpu].cmo_fault_time;
+ }
+
+ seq_printf(m, "cmo_faults=%lu\n", cmo_faults);
+ seq_printf(m, "cmo_fault_time_usec=%lu\n",
+ cmo_fault_time / tb_ticks_per_usec);
+}
+
static int pseries_lparcfg_data(struct seq_file *m, void *v)
{
int partition_potential_processors;
@@ -334,60 +450,13 @@ static int pseries_lparcfg_data(struct seq_file *m, void *v)
partition_active_processors = lparcfg_count_active_processors();
if (firmware_has_feature(FW_FEATURE_SPLPAR)) {
- unsigned long h_entitled, h_unallocated;
- unsigned long h_aggregation, h_resource;
- unsigned long pool_idle_time, pool_procs;
- unsigned long purr;
-
- h_get_ppp(&h_entitled, &h_unallocated, &h_aggregation,
- &h_resource);
-
- seq_printf(m, "R4=0x%lx\n", h_entitled);
- seq_printf(m, "R5=0x%lx\n", h_unallocated);
- seq_printf(m, "R6=0x%lx\n", h_aggregation);
- seq_printf(m, "R7=0x%lx\n", h_resource);
-
- purr = get_purr();
-
/* this call handles the ibm,get-system-parameter contents */
parse_system_parameter_string(m);
+ parse_ppp_data(m);
+ parse_mpp_data(m);
+ pseries_cmo_data(m);
- seq_printf(m, "partition_entitled_capacity=%ld\n", h_entitled);
-
- seq_printf(m, "group=%ld\n", (h_aggregation >> 2 * 8) & 0xffff);
-
- seq_printf(m, "system_active_processors=%ld\n",
- (h_resource >> 0 * 8) & 0xffff);
-
- /* pool related entries are apropriate for shared configs */
- if (lppaca[0].shared_proc) {
-
- h_pic(&pool_idle_time, &pool_procs);
-
- seq_printf(m, "pool=%ld\n",
- (h_aggregation >> 0 * 8) & 0xffff);
-
- /* report pool_capacity in percentage */
- seq_printf(m, "pool_capacity=%ld\n",
- ((h_resource >> 2 * 8) & 0xffff) * 100);
-
- seq_printf(m, "pool_idle_time=%ld\n", pool_idle_time);
-
- seq_printf(m, "pool_num_procs=%ld\n", pool_procs);
- }
-
- seq_printf(m, "unallocated_capacity_weight=%ld\n",
- (h_resource >> 4 * 8) & 0xFF);
-
- seq_printf(m, "capacity_weight=%ld\n",
- (h_resource >> 5 * 8) & 0xFF);
-
- seq_printf(m, "capped=%ld\n", (h_resource >> 6 * 8) & 0x01);
-
- seq_printf(m, "unallocated_capacity=%ld\n", h_unallocated);
-
- seq_printf(m, "purr=%ld\n", purr);
-
+ seq_printf(m, "purr=%ld\n", get_purr());
} else { /* non SPLPAR case */
seq_printf(m, "system_active_processors=%d\n",
@@ -414,6 +483,83 @@ static int pseries_lparcfg_data(struct seq_file *m, void *v)
return 0;
}
+static ssize_t update_ppp(u64 *entitlement, u8 *weight)
+{
+ struct hvcall_ppp_data ppp_data;
+ u8 new_weight;
+ u64 new_entitled;
+ ssize_t retval;
+
+ /* Get our current parameters */
+ retval = h_get_ppp(&ppp_data);
+ if (retval)
+ return retval;
+
+ if (entitlement) {
+ new_weight = ppp_data.weight;
+ new_entitled = *entitlement;
+ } else if (weight) {
+ new_weight = *weight;
+ new_entitled = ppp_data.entitlement;
+ } else
+ return -EINVAL;
+
+ pr_debug("%s: current_entitled = %lu, current_weight = %u\n",
+ __FUNCTION__, ppp_data.entitlement, ppp_data.weight);
+
+ pr_debug("%s: new_entitled = %lu, new_weight = %u\n",
+ __FUNCTION__, new_entitled, new_weight);
+
+ retval = plpar_hcall_norets(H_SET_PPP, new_entitled, new_weight);
+ return retval;
+}
+
+/**
+ * update_mpp
+ *
+ * Update the memory entitlement and weight for the partition. Caller must
+ * specify either a new entitlement or weight, not both, to be updated
+ * since the h_set_mpp call takes both entitlement and weight as parameters.
+ */
+static ssize_t update_mpp(u64 *entitlement, u8 *weight)
+{
+ struct hvcall_mpp_data mpp_data;
+ u64 new_entitled;
+ u8 new_weight;
+ ssize_t rc;
+
+ if (entitlement) {
+ /* Check with vio to ensure the new memory entitlement
+ * can be handled.
+ */
+ rc = vio_cmo_entitlement_update(*entitlement);
+ if (rc)
+ return rc;
+ }
+
+ rc = h_get_mpp(&mpp_data);
+ if (rc)
+ return rc;
+
+ if (entitlement) {
+ new_weight = mpp_data.mem_weight;
+ new_entitled = *entitlement;
+ } else if (weight) {
+ new_weight = *weight;
+ new_entitled = mpp_data.entitled_mem;
+ } else
+ return -EINVAL;
+
+ pr_debug("%s: current_entitled = %lu, current_weight = %u\n",
+ __FUNCTION__, mpp_data.entitled_mem, mpp_data.mem_weight);
+
+ pr_debug("%s: new_entitled = %lu, new_weight = %u\n",
+ __FUNCTION__, new_entitled, new_weight);
+
+ rc = plpar_hcall_norets(H_SET_MPP, new_entitled, new_weight);
+ return rc;
+}
+
/*
* Interface for changing system parameters (variable capacity weight
* and entitled capacity). Format of input is "param_name=value";
@@ -427,35 +573,27 @@ static int pseries_lparcfg_data(struct seq_file *m, void *v)
static ssize_t lparcfg_write(struct file *file, const char __user * buf,
size_t count, loff_t * off)
{
- char *kbuf;
+ int kbuf_sz = 64;
+ char kbuf[kbuf_sz];
char *tmp;
u64 new_entitled, *new_entitled_ptr = &new_entitled;
u8 new_weight, *new_weight_ptr = &new_weight;
-
- unsigned long current_entitled; /* parameters for h_get_ppp */
- unsigned long dummy;
- unsigned long resource;
- u8 current_weight;
-
- ssize_t retval = -ENOMEM;
+ ssize_t retval;
if (!firmware_has_feature(FW_FEATURE_SPLPAR) ||
firmware_has_feature(FW_FEATURE_ISERIES))
return -EINVAL;
- kbuf = kmalloc(count, GFP_KERNEL);
- if (!kbuf)
- goto out;
+ if (count > kbuf_sz)
+ return -EINVAL;
- retval = -EFAULT;
if (copy_from_user(kbuf, buf, count))
- goto out;
+ return -EFAULT;
- retval = -EINVAL;
kbuf[count - 1] = '\0';
tmp = strchr(kbuf, '=');
if (!tmp)
- goto out;
+ return -EINVAL;
*tmp++ = '\0';
@@ -463,34 +601,32 @@ static ssize_t lparcfg_write(struct file *file, const char __user * buf,
char *endp;
*new_entitled_ptr = (u64) simple_strtoul(tmp, &endp, 10);
if (endp == tmp)
- goto out;
- new_weight_ptr = &current_weight;
+ return -EINVAL;
+
+ retval = update_ppp(new_entitled_ptr, NULL);
} else if (!strcmp(kbuf, "capacity_weight")) {
char *endp;
*new_weight_ptr = (u8) simple_strtoul(tmp, &endp, 10);
if (endp == tmp)
- goto out;
- new_entitled_ptr = &current_entitled;
- } else
- goto out;
-
- /* Get our current parameters */
- retval = h_get_ppp(&current_entitled, &dummy, &dummy, &resource);
- if (retval) {
- retval = -EIO;
- goto out;
- }
-
- current_weight = (resource >> 5 * 8) & 0xFF;
+ return -EINVAL;
- pr_debug("%s: current_entitled = %lu, current_weight = %u\n",
- __func__, current_entitled, current_weight);
+ retval = update_ppp(NULL, new_weight_ptr);
+ } else if (!strcmp(kbuf, "entitled_memory")) {
+ char *endp;
+ *new_entitled_ptr = (u64) simple_strtoul(tmp, &endp, 10);
+ if (endp == tmp)
+ return -EINVAL;
- pr_debug("%s: new_entitled = %lu, new_weight = %u\n",
- __func__, *new_entitled_ptr, *new_weight_ptr);
+ retval = update_mpp(new_entitled_ptr, NULL);
+ } else if (!strcmp(kbuf, "entitled_memory_weight")) {
+ char *endp;
+ *new_weight_ptr = (u8) simple_strtoul(tmp, &endp, 10);
+ if (endp == tmp)
+ return -EINVAL;
- retval = plpar_hcall_norets(H_SET_PPP, *new_entitled_ptr,
- *new_weight_ptr);
+ retval = update_mpp(NULL, new_weight_ptr);
+ } else
+ return -EINVAL;
if (retval == H_SUCCESS || retval == H_CONSTRAINED) {
retval = count;
@@ -506,8 +642,6 @@ static ssize_t lparcfg_write(struct file *file, const char __user * buf,
retval = -EIO;
}
-out:
- kfree(kbuf);
return retval;
}
@@ -573,7 +707,7 @@ static int lparcfg_open(struct inode *inode, struct file *file)
return single_open(file, lparcfg_data, NULL);
}
-const struct file_operations lparcfg_fops = {
+static const struct file_operations lparcfg_fops = {
.owner = THIS_MODULE,
.read = seq_read,
.write = lparcfg_write,
@@ -581,7 +715,7 @@ const struct file_operations lparcfg_fops = {
.release = single_release,
};
-int __init lparcfg_init(void)
+static int __init lparcfg_init(void)
{
struct proc_dir_entry *ent;
mode_t mode = S_IRUSR | S_IRGRP | S_IROTH;
@@ -601,7 +735,7 @@ int __init lparcfg_init(void)
return 0;
}
-void __exit lparcfg_cleanup(void)
+static void __exit lparcfg_cleanup(void)
{
if (proc_ppc64_lparcfg)
remove_proc_entry("lparcfg", proc_ppc64_lparcfg->parent);
diff --git a/arch/powerpc/kernel/machine_kexec.c b/arch/powerpc/kernel/machine_kexec.c
index 29a0e039d436..aab76887a842 100644
--- a/arch/powerpc/kernel/machine_kexec.c
+++ b/arch/powerpc/kernel/machine_kexec.c
@@ -48,7 +48,7 @@ void machine_kexec_cleanup(struct kimage *image)
* Do not allocate memory (or fail in any way) in machine_kexec().
* We are past the point of no return, committed to rebooting now.
*/
-NORET_TYPE void machine_kexec(struct kimage *image)
+void machine_kexec(struct kimage *image)
{
if (ppc_md.machine_kexec)
ppc_md.machine_kexec(image);
diff --git a/arch/powerpc/kernel/machine_kexec_64.c b/arch/powerpc/kernel/machine_kexec_64.c
index 704375bda73a..a168514d8609 100644
--- a/arch/powerpc/kernel/machine_kexec_64.c
+++ b/arch/powerpc/kernel/machine_kexec_64.c
@@ -158,7 +158,7 @@ void kexec_copy_flush(struct kimage *image)
* on calling the interrupts, but we would like to call it off irq level
* so that the interrupt controller is clean.
*/
-void kexec_smp_down(void *arg)
+static void kexec_smp_down(void *arg)
{
if (ppc_md.kexec_cpu_down)
ppc_md.kexec_cpu_down(0, 1);
@@ -172,7 +172,7 @@ static void kexec_prepare_cpus(void)
{
int my_cpu, i, notified=-1;
- smp_call_function(kexec_smp_down, NULL, 0, /* wait */0);
+ smp_call_function(kexec_smp_down, NULL, /* wait */0);
my_cpu = get_cpu();
/* check the others cpus are now down (via paca hw cpu id == -1) */
@@ -249,7 +249,7 @@ static void kexec_prepare_cpus(void)
* We could use a smaller stack if we don't care about anything using
* current, but that audit has not been performed.
*/
-union thread_union kexec_stack
+static union thread_union kexec_stack
__attribute__((__section__(".data.init_task"))) = { };
/* Our assembly helper, in kexec_stub.S */
diff --git a/arch/powerpc/kernel/misc.S b/arch/powerpc/kernel/misc.S
index 7b9160220698..85cb6f340846 100644
--- a/arch/powerpc/kernel/misc.S
+++ b/arch/powerpc/kernel/misc.S
@@ -116,3 +116,8 @@ _GLOBAL(longjmp)
mtlr r0
mr r3,r4
blr
+
+_GLOBAL(__setup_cpu_power7)
+_GLOBAL(__restore_cpu_power7)
+ /* place holder */
+ blr
diff --git a/arch/powerpc/kernel/misc_32.S b/arch/powerpc/kernel/misc_32.S
index 89aaaa6f3561..6321ae36f729 100644
--- a/arch/powerpc/kernel/misc_32.S
+++ b/arch/powerpc/kernel/misc_32.S
@@ -489,7 +489,7 @@ END_FTR_SECTION_IFSET(CPU_FTR_UNIFIED_ID_CACHE)
*
* flush_icache_range(unsigned long start, unsigned long stop)
*/
-_GLOBAL(__flush_icache_range)
+_KPROBE(__flush_icache_range)
BEGIN_FTR_SECTION
blr /* for 601, do nothing */
END_FTR_SECTION_IFSET(CPU_FTR_COHERENT_ICACHE)
diff --git a/arch/powerpc/kernel/misc_64.S b/arch/powerpc/kernel/misc_64.S
index 942951e76586..4dd70cf7bb4e 100644
--- a/arch/powerpc/kernel/misc_64.S
+++ b/arch/powerpc/kernel/misc_64.S
@@ -506,6 +506,39 @@ _GLOBAL(giveup_altivec)
#endif /* CONFIG_ALTIVEC */
+#ifdef CONFIG_VSX
+/*
+ * __giveup_vsx(tsk)
+ * Disable VSX for the task given as the argument.
+ * Does NOT save vsx registers.
+ * Enables the VSX for use in the kernel on return.
+ */
+_GLOBAL(__giveup_vsx)
+ mfmsr r5
+ oris r5,r5,MSR_VSX@h
+ mtmsrd r5 /* enable use of VSX now */
+ isync
+
+ cmpdi 0,r3,0
+ beqlr- /* if no previous owner, done */
+ addi r3,r3,THREAD /* want THREAD of task */
+ ld r5,PT_REGS(r3)
+ cmpdi 0,r5,0
+ beq 1f
+ ld r4,_MSR-STACK_FRAME_OVERHEAD(r5)
+ lis r3,MSR_VSX@h
+ andc r4,r4,r3 /* disable VSX for previous task */
+ std r4,_MSR-STACK_FRAME_OVERHEAD(r5)
+1:
+#ifndef CONFIG_SMP
+ li r5,0
+ ld r4,last_task_used_vsx@got(r2)
+ std r5,0(r4)
+#endif /* CONFIG_SMP */
+ blr
+
+#endif /* CONFIG_VSX */
+
/* kexec_wait(phys_cpu)
*
* wait for the flag to change, indicating this kernel is going away but
diff --git a/arch/powerpc/kernel/module.c b/arch/powerpc/kernel/module.c
new file mode 100644
index 000000000000..af07003573c4
--- /dev/null
+++ b/arch/powerpc/kernel/module.c
@@ -0,0 +1,116 @@
+/* Kernel module help for powerpc.
+ Copyright (C) 2001, 2003 Rusty Russell IBM Corporation.
+ Copyright (C) 2008 Freescale Semiconductor, Inc.
+
+ This program is free software; you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation; either version 2 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program; if not, write to the Free Software
+ Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+*/
+#include <linux/module.h>
+#include <linux/elf.h>
+#include <linux/moduleloader.h>
+#include <linux/err.h>
+#include <linux/vmalloc.h>
+#include <linux/bug.h>
+#include <asm/module.h>
+#include <asm/uaccess.h>
+#include <asm/firmware.h>
+#include <linux/sort.h>
+
+#include "setup.h"
+
+LIST_HEAD(module_bug_list);
+
+void *module_alloc(unsigned long size)
+{
+ if (size == 0)
+ return NULL;
+
+ return vmalloc_exec(size);
+}
+
+/* Free memory returned from module_alloc */
+void module_free(struct module *mod, void *module_region)
+{
+ vfree(module_region);
+ /* FIXME: If module_region == mod->init_region, trim exception
+ table entries. */
+}
+
+static const Elf_Shdr *find_section(const Elf_Ehdr *hdr,
+ const Elf_Shdr *sechdrs,
+ const char *name)
+{
+ char *secstrings;
+ unsigned int i;
+
+ secstrings = (char *)hdr + sechdrs[hdr->e_shstrndx].sh_offset;
+ for (i = 1; i < hdr->e_shnum; i++)
+ if (strcmp(secstrings+sechdrs[i].sh_name, name) == 0)
+ return &sechdrs[i];
+ return NULL;
+}
+
+int module_finalize(const Elf_Ehdr *hdr,
+ const Elf_Shdr *sechdrs, struct module *me)
+{
+ const Elf_Shdr *sect;
+ int err;
+
+ err = module_bug_finalize(hdr, sechdrs, me);
+ if (err)
+ return err;
+
+ /* Apply feature fixups */
+ sect = find_section(hdr, sechdrs, "__ftr_fixup");
+ if (sect != NULL)
+ do_feature_fixups(cur_cpu_spec->cpu_features,
+ (void *)sect->sh_addr,
+ (void *)sect->sh_addr + sect->sh_size);
+
+#ifdef CONFIG_PPC64
+ sect = find_section(hdr, sechdrs, "__fw_ftr_fixup");
+ if (sect != NULL)
+ do_feature_fixups(powerpc_firmware_features,
+ (void *)sect->sh_addr,
+ (void *)sect->sh_addr + sect->sh_size);
+#endif
+
+ sect = find_section(hdr, sechdrs, "__lwsync_fixup");
+ if (sect != NULL)
+ do_lwsync_fixups(cur_cpu_spec->cpu_features,
+ (void *)sect->sh_addr,
+ (void *)sect->sh_addr + sect->sh_size);
+
+ return 0;
+}
+
+void module_arch_cleanup(struct module *mod)
+{
+ module_bug_cleanup(mod);
+}
+
+struct bug_entry *module_find_bug(unsigned long bugaddr)
+{
+ struct mod_arch_specific *mod;
+ unsigned int i;
+ struct bug_entry *bug;
+
+ list_for_each_entry(mod, &module_bug_list, bug_list) {
+ bug = mod->bug_table;
+ for (i = 0; i < mod->num_bugs; ++i, ++bug)
+ if (bugaddr == bug->bug_addr)
+ return bug;
+ }
+ return NULL;
+}
diff --git a/arch/powerpc/kernel/module_32.c b/arch/powerpc/kernel/module_32.c
index eab313858315..2df91a03462a 100644
--- a/arch/powerpc/kernel/module_32.c
+++ b/arch/powerpc/kernel/module_32.c
@@ -34,23 +34,6 @@
#define DEBUGP(fmt , ...)
#endif
-LIST_HEAD(module_bug_list);
-
-void *module_alloc(unsigned long size)
-{
- if (size == 0)
- return NULL;
- return vmalloc(size);
-}
-
-/* Free memory returned from module_alloc */
-void module_free(struct module *mod, void *module_region)
-{
- vfree(module_region);
- /* FIXME: If module_region == mod->init_region, trim exception
- table entries. */
-}
-
/* Count how many different relocations (different symbol, different
addend) */
static unsigned int count_relocs(const Elf32_Rela *rela, unsigned int num)
@@ -325,58 +308,3 @@ int apply_relocate_add(Elf32_Shdr *sechdrs,
}
return 0;
}
-
-static const Elf_Shdr *find_section(const Elf_Ehdr *hdr,
- const Elf_Shdr *sechdrs,
- const char *name)
-{
- char *secstrings;
- unsigned int i;
-
- secstrings = (char *)hdr + sechdrs[hdr->e_shstrndx].sh_offset;
- for (i = 1; i < hdr->e_shnum; i++)
- if (strcmp(secstrings+sechdrs[i].sh_name, name) == 0)
- return &sechdrs[i];
- return NULL;
-}
-
-int module_finalize(const Elf_Ehdr *hdr,
- const Elf_Shdr *sechdrs,
- struct module *me)
-{
- const Elf_Shdr *sect;
- int err;
-
- err = module_bug_finalize(hdr, sechdrs, me);
- if (err) /* never true, currently */
- return err;
-
- /* Apply feature fixups */
- sect = find_section(hdr, sechdrs, "__ftr_fixup");
- if (sect != NULL)
- do_feature_fixups(cur_cpu_spec->cpu_features,
- (void *)sect->sh_addr,
- (void *)sect->sh_addr + sect->sh_size);
-
- return 0;
-}
-
-void module_arch_cleanup(struct module *mod)
-{
- module_bug_cleanup(mod);
-}
-
-struct bug_entry *module_find_bug(unsigned long bugaddr)
-{
- struct mod_arch_specific *mod;
- unsigned int i;
- struct bug_entry *bug;
-
- list_for_each_entry(mod, &module_bug_list, bug_list) {
- bug = mod->bug_table;
- for (i = 0; i < mod->num_bugs; ++i, ++bug)
- if (bugaddr == bug->bug_addr)
- return bug;
- }
- return NULL;
-}
diff --git a/arch/powerpc/kernel/module_64.c b/arch/powerpc/kernel/module_64.c
index 3a82b02b784b..ee6a2982d567 100644
--- a/arch/powerpc/kernel/module_64.c
+++ b/arch/powerpc/kernel/module_64.c
@@ -24,6 +24,7 @@
#include <asm/module.h>
#include <asm/uaccess.h>
#include <asm/firmware.h>
+#include <asm/code-patching.h>
#include <linux/sort.h>
#include "setup.h"
@@ -101,22 +102,6 @@ static unsigned int count_relocs(const Elf64_Rela *rela, unsigned int num)
return _count_relocs;
}
-void *module_alloc(unsigned long size)
-{
- if (size == 0)
- return NULL;
-
- return vmalloc_exec(size);
-}
-
-/* Free memory returned from module_alloc */
-void module_free(struct module *mod, void *module_region)
-{
- vfree(module_region);
- /* FIXME: If module_region == mod->init_region, trim exception
- table entries. */
-}
-
static int relacmp(const void *_x, const void *_y)
{
const Elf64_Rela *x, *y;
@@ -346,7 +331,7 @@ static unsigned long stub_for_addr(Elf64_Shdr *sechdrs,
restore r2. */
static int restore_r2(u32 *instruction, struct module *me)
{
- if (*instruction != 0x60000000) {
+ if (*instruction != PPC_NOP_INSTR) {
printk("%s: Expect noop after relocate, got %08x\n",
me->name, *instruction);
return 0;
@@ -466,65 +451,3 @@ int apply_relocate_add(Elf64_Shdr *sechdrs,
return 0;
}
-
-LIST_HEAD(module_bug_list);
-
-static const Elf_Shdr *find_section(const Elf_Ehdr *hdr,
- const Elf_Shdr *sechdrs,
- const char *name)
-{
- char *secstrings;
- unsigned int i;
-
- secstrings = (char *)hdr + sechdrs[hdr->e_shstrndx].sh_offset;
- for (i = 1; i < hdr->e_shnum; i++)
- if (strcmp(secstrings+sechdrs[i].sh_name, name) == 0)
- return &sechdrs[i];
- return NULL;
-}
-
-int module_finalize(const Elf_Ehdr *hdr,
- const Elf_Shdr *sechdrs, struct module *me)
-{
- const Elf_Shdr *sect;
- int err;
-
- err = module_bug_finalize(hdr, sechdrs, me);
- if (err)
- return err;
-
- /* Apply feature fixups */
- sect = find_section(hdr, sechdrs, "__ftr_fixup");
- if (sect != NULL)
- do_feature_fixups(cur_cpu_spec->cpu_features,
- (void *)sect->sh_addr,
- (void *)sect->sh_addr + sect->sh_size);
-
- sect = find_section(hdr, sechdrs, "__fw_ftr_fixup");
- if (sect != NULL)
- do_feature_fixups(powerpc_firmware_features,
- (void *)sect->sh_addr,
- (void *)sect->sh_addr + sect->sh_size);
-
- return 0;
-}
-
-void module_arch_cleanup(struct module *mod)
-{
- module_bug_cleanup(mod);
-}
-
-struct bug_entry *module_find_bug(unsigned long bugaddr)
-{
- struct mod_arch_specific *mod;
- unsigned int i;
- struct bug_entry *bug;
-
- list_for_each_entry(mod, &module_bug_list, bug_list) {
- bug = mod->bug_table;
- for (i = 0; i < mod->num_bugs; ++i, ++bug)
- if (bugaddr == bug->bug_addr)
- return bug;
- }
- return NULL;
-}
diff --git a/arch/powerpc/kernel/msi.c b/arch/powerpc/kernel/msi.c
index c62d1012c013..3bb7d3dd28be 100644
--- a/arch/powerpc/kernel/msi.c
+++ b/arch/powerpc/kernel/msi.c
@@ -34,5 +34,5 @@ int arch_setup_msi_irqs(struct pci_dev *dev, int nvec, int type)
void arch_teardown_msi_irqs(struct pci_dev *dev)
{
- return ppc_md.teardown_msi_irqs(dev);
+ ppc_md.teardown_msi_irqs(dev);
}
diff --git a/arch/powerpc/kernel/of_device.c b/arch/powerpc/kernel/of_device.c
index 5748ddb47d9f..e9be908f199b 100644
--- a/arch/powerpc/kernel/of_device.c
+++ b/arch/powerpc/kernel/of_device.c
@@ -89,54 +89,6 @@ struct of_device *of_device_alloc(struct device_node *np,
}
EXPORT_SYMBOL(of_device_alloc);
-ssize_t of_device_get_modalias(struct of_device *ofdev,
- char *str, ssize_t len)
-{
- const char *compat;
- int cplen, i;
- ssize_t tsize, csize, repend;
-
- /* Name & Type */
- csize = snprintf(str, len, "of:N%sT%s",
- ofdev->node->name, ofdev->node->type);
-
- /* Get compatible property if any */
- compat = of_get_property(ofdev->node, "compatible", &cplen);
- if (!compat)
- return csize;
-
- /* Find true end (we tolerate multiple \0 at the end */
- for (i=(cplen-1); i>=0 && !compat[i]; i--)
- cplen--;
- if (!cplen)
- return csize;
- cplen++;
-
- /* Check space (need cplen+1 chars including final \0) */
- tsize = csize + cplen;
- repend = tsize;
-
- if (csize>=len) /* @ the limit, all is already filled */
- return tsize;
-
- if (tsize>=len) { /* limit compat list */
- cplen = len-csize-1;
- repend = len;
- }
-
- /* Copy and do char replacement */
- memcpy(&str[csize+1], compat, cplen);
- for (i=csize; i<repend; i++) {
- char c = str[i];
- if (c=='\0')
- str[i] = 'C';
- else if (c==' ')
- str[i] = '_';
- }
-
- return tsize;
-}
-
int of_device_uevent(struct device *dev, struct kobj_uevent_env *env)
{
struct of_device *ofdev;
diff --git a/arch/powerpc/kernel/pci-common.c b/arch/powerpc/kernel/pci-common.c
index 063cdd413049..224e9a11765c 100644
--- a/arch/powerpc/kernel/pci-common.c
+++ b/arch/powerpc/kernel/pci-common.c
@@ -598,6 +598,7 @@ void __devinit pci_process_bridge_OF_ranges(struct pci_controller *hose,
res->start = pci_addr;
break;
case 2: /* PCI Memory space */
+ case 3: /* PCI 64 bits Memory space */
printk(KERN_INFO
" MEM 0x%016llx..0x%016llx -> 0x%016llx %s\n",
cpu_addr, cpu_addr + size - 1, pci_addr,
diff --git a/arch/powerpc/kernel/ppc32.h b/arch/powerpc/kernel/ppc32.h
index 90e562771791..dc16aefe1dd0 100644
--- a/arch/powerpc/kernel/ppc32.h
+++ b/arch/powerpc/kernel/ppc32.h
@@ -120,6 +120,7 @@ struct mcontext32 {
elf_fpregset_t mc_fregs;
unsigned int mc_pad[2];
elf_vrregset_t32 mc_vregs __attribute__((__aligned__(16)));
+ elf_vsrreghalf_t32 mc_vsregs __attribute__((__aligned__(16)));
};
struct ucontext32 {
diff --git a/arch/powerpc/kernel/ppc_ksyms.c b/arch/powerpc/kernel/ppc_ksyms.c
index d3ac631cbd26..e1ea4fe5cfbd 100644
--- a/arch/powerpc/kernel/ppc_ksyms.c
+++ b/arch/powerpc/kernel/ppc_ksyms.c
@@ -42,6 +42,7 @@
#include <asm/div64.h>
#include <asm/signal.h>
#include <asm/dcr.h>
+#include <asm/ftrace.h>
#ifdef CONFIG_PPC32
extern void transfer_to_handler(void);
@@ -67,6 +68,10 @@ EXPORT_SYMBOL(single_step_exception);
EXPORT_SYMBOL(sys_sigreturn);
#endif
+#ifdef CONFIG_FTRACE
+EXPORT_SYMBOL(_mcount);
+#endif
+
EXPORT_SYMBOL(strcpy);
EXPORT_SYMBOL(strncpy);
EXPORT_SYMBOL(strcat);
@@ -102,6 +107,9 @@ EXPORT_SYMBOL(giveup_fpu);
#ifdef CONFIG_ALTIVEC
EXPORT_SYMBOL(giveup_altivec);
#endif /* CONFIG_ALTIVEC */
+#ifdef CONFIG_VSX
+EXPORT_SYMBOL(giveup_vsx);
+#endif /* CONFIG_VSX */
#ifdef CONFIG_SPE
EXPORT_SYMBOL(giveup_spe);
#endif /* CONFIG_SPE */
diff --git a/arch/powerpc/kernel/process.c b/arch/powerpc/kernel/process.c
index 7de41c3948ec..db2497ccc111 100644
--- a/arch/powerpc/kernel/process.c
+++ b/arch/powerpc/kernel/process.c
@@ -47,12 +47,15 @@
#ifdef CONFIG_PPC64
#include <asm/firmware.h>
#endif
+#include <linux/kprobes.h>
+#include <linux/kdebug.h>
extern unsigned long _get_SP(void);
#ifndef CONFIG_SMP
struct task_struct *last_task_used_math = NULL;
struct task_struct *last_task_used_altivec = NULL;
+struct task_struct *last_task_used_vsx = NULL;
struct task_struct *last_task_used_spe = NULL;
#endif
@@ -104,17 +107,6 @@ void enable_kernel_fp(void)
}
EXPORT_SYMBOL(enable_kernel_fp);
-int dump_task_fpu(struct task_struct *tsk, elf_fpregset_t *fpregs)
-{
- if (!tsk->thread.regs)
- return 0;
- flush_fp_to_thread(current);
-
- memcpy(fpregs, &tsk->thread.fpr[0], sizeof(*fpregs));
-
- return 1;
-}
-
#ifdef CONFIG_ALTIVEC
void enable_kernel_altivec(void)
{
@@ -148,36 +140,48 @@ void flush_altivec_to_thread(struct task_struct *tsk)
preempt_enable();
}
}
+#endif /* CONFIG_ALTIVEC */
-int dump_task_altivec(struct task_struct *tsk, elf_vrregset_t *vrregs)
+#ifdef CONFIG_VSX
+#if 0
+/* not currently used, but some crazy RAID module might want to later */
+void enable_kernel_vsx(void)
{
- /* ELF_NVRREG includes the VSCR and VRSAVE which we need to save
- * separately, see below */
- const int nregs = ELF_NVRREG - 2;
- elf_vrreg_t *reg;
- u32 *dest;
-
- if (tsk == current)
- flush_altivec_to_thread(tsk);
-
- reg = (elf_vrreg_t *)vrregs;
-
- /* copy the 32 vr registers */
- memcpy(reg, &tsk->thread.vr[0], nregs * sizeof(*reg));
- reg += nregs;
+ WARN_ON(preemptible());
- /* copy the vscr */
- memcpy(reg, &tsk->thread.vscr, sizeof(*reg));
- reg++;
+#ifdef CONFIG_SMP
+ if (current->thread.regs && (current->thread.regs->msr & MSR_VSX))
+ giveup_vsx(current);
+ else
+ giveup_vsx(NULL); /* just enable vsx for kernel - force */
+#else
+ giveup_vsx(last_task_used_vsx);
+#endif /* CONFIG_SMP */
+}
+EXPORT_SYMBOL(enable_kernel_vsx);
+#endif
- /* vrsave is stored in the high 32bit slot of the final 128bits */
- memset(reg, 0, sizeof(*reg));
- dest = (u32 *)reg;
- *dest = tsk->thread.vrsave;
+void giveup_vsx(struct task_struct *tsk)
+{
+ giveup_fpu(tsk);
+ giveup_altivec(tsk);
+ __giveup_vsx(tsk);
+}
- return 1;
+void flush_vsx_to_thread(struct task_struct *tsk)
+{
+ if (tsk->thread.regs) {
+ preempt_disable();
+ if (tsk->thread.regs->msr & MSR_VSX) {
+#ifdef CONFIG_SMP
+ BUG_ON(tsk != current);
+#endif
+ giveup_vsx(tsk);
+ }
+ preempt_enable();
+ }
}
-#endif /* CONFIG_ALTIVEC */
+#endif /* CONFIG_VSX */
#ifdef CONFIG_SPE
@@ -209,14 +213,6 @@ void flush_spe_to_thread(struct task_struct *tsk)
preempt_enable();
}
}
-
-int dump_spe(struct pt_regs *regs, elf_vrregset_t *evrregs)
-{
- flush_spe_to_thread(current);
- /* We copy u32 evr[32] + u64 acc + u32 spefscr -> 35 */
- memcpy(evrregs, &current->thread.evr[0], sizeof(u32) * 35);
- return 1;
-}
#endif /* CONFIG_SPE */
#ifndef CONFIG_SMP
@@ -233,6 +229,10 @@ void discard_lazy_cpu_state(void)
if (last_task_used_altivec == current)
last_task_used_altivec = NULL;
#endif /* CONFIG_ALTIVEC */
+#ifdef CONFIG_VSX
+ if (last_task_used_vsx == current)
+ last_task_used_vsx = NULL;
+#endif /* CONFIG_VSX */
#ifdef CONFIG_SPE
if (last_task_used_spe == current)
last_task_used_spe = NULL;
@@ -241,6 +241,35 @@ void discard_lazy_cpu_state(void)
}
#endif /* CONFIG_SMP */
+void do_dabr(struct pt_regs *regs, unsigned long address,
+ unsigned long error_code)
+{
+ siginfo_t info;
+
+ if (notify_die(DIE_DABR_MATCH, "dabr_match", regs, error_code,
+ 11, SIGSEGV) == NOTIFY_STOP)
+ return;
+
+ if (debugger_dabr_match(regs))
+ return;
+
+ /* Clear the DAC and struct entries. One shot trigger */
+#if (defined(CONFIG_44x) || defined(CONFIG_BOOKE))
+ mtspr(SPRN_DBCR0, mfspr(SPRN_DBCR0) & ~(DBSR_DAC1R | DBSR_DAC1W
+ | DBCR0_IDM));
+#endif
+
+ /* Clear the DABR */
+ set_dabr(0);
+
+ /* Deliver the signal to userspace */
+ info.si_signo = SIGTRAP;
+ info.si_errno = 0;
+ info.si_code = TRAP_HWBKPT;
+ info.si_addr = (void __user *)address;
+ force_sig_info(SIGTRAP, &info, current);
+}
+
static DEFINE_PER_CPU(unsigned long, current_dabr);
int set_dabr(unsigned long dabr)
@@ -256,6 +285,11 @@ int set_dabr(unsigned long dabr)
#if defined(CONFIG_PPC64) || defined(CONFIG_6xx)
mtspr(SPRN_DABR, dabr);
#endif
+
+#if defined(CONFIG_44x) || defined(CONFIG_BOOKE)
+ mtspr(SPRN_DAC1, dabr);
+#endif
+
return 0;
}
@@ -297,6 +331,11 @@ struct task_struct *__switch_to(struct task_struct *prev,
if (prev->thread.regs && (prev->thread.regs->msr & MSR_VEC))
giveup_altivec(prev);
#endif /* CONFIG_ALTIVEC */
+#ifdef CONFIG_VSX
+ if (prev->thread.regs && (prev->thread.regs->msr & MSR_VSX))
+ /* VMX and FPU registers are already save here */
+ __giveup_vsx(prev);
+#endif /* CONFIG_VSX */
#ifdef CONFIG_SPE
/*
* If the previous thread used spe in the last quantum
@@ -317,6 +356,10 @@ struct task_struct *__switch_to(struct task_struct *prev,
if (new->thread.regs && last_task_used_altivec == new)
new->thread.regs->msr |= MSR_VEC;
#endif /* CONFIG_ALTIVEC */
+#ifdef CONFIG_VSX
+ if (new->thread.regs && last_task_used_vsx == new)
+ new->thread.regs->msr |= MSR_VSX;
+#endif /* CONFIG_VSX */
#ifdef CONFIG_SPE
/* Avoid the trap. On smp this this never happens since
* we don't set last_task_used_spe
@@ -330,6 +373,12 @@ struct task_struct *__switch_to(struct task_struct *prev,
if (unlikely(__get_cpu_var(current_dabr) != new->thread.dabr))
set_dabr(new->thread.dabr);
+#if defined(CONFIG_44x) || defined(CONFIG_BOOKE)
+ /* If new thread DAC (HW breakpoint) is the same then leave it */
+ if (new->thread.dabr)
+ set_dabr(new->thread.dabr);
+#endif
+
new_thread = &new->thread;
old_thread = &current->thread;
@@ -417,6 +466,8 @@ static struct regbit {
{MSR_EE, "EE"},
{MSR_PR, "PR"},
{MSR_FP, "FP"},
+ {MSR_VEC, "VEC"},
+ {MSR_VSX, "VSX"},
{MSR_ME, "ME"},
{MSR_IR, "IR"},
{MSR_DR, "DR"},
@@ -484,10 +535,8 @@ void show_regs(struct pt_regs * regs)
* Lookup NIP late so we have the best change of getting the
* above info out without failing
*/
- printk("NIP ["REG"] ", regs->nip);
- print_symbol("%s\n", regs->nip);
- printk("LR ["REG"] ", regs->link);
- print_symbol("%s\n", regs->link);
+ printk("NIP ["REG"] %pS\n", regs->nip, (void *)regs->nip);
+ printk("LR ["REG"] %pS\n", regs->link, (void *)regs->link);
#endif
show_stack(current, (unsigned long *) regs->gpr[1]);
if (!user_mode(regs))
@@ -518,6 +567,10 @@ void flush_thread(void)
if (current->thread.dabr) {
current->thread.dabr = 0;
set_dabr(0);
+
+#if defined(CONFIG_44x) || defined(CONFIG_BOOKE)
+ current->thread.dbcr0 &= ~(DBSR_DAC1R | DBSR_DAC1W);
+#endif
}
}
@@ -534,6 +587,7 @@ void prepare_to_copy(struct task_struct *tsk)
{
flush_fp_to_thread(current);
flush_altivec_to_thread(current);
+ flush_vsx_to_thread(current);
flush_spe_to_thread(current);
}
@@ -689,6 +743,9 @@ void start_thread(struct pt_regs *regs, unsigned long start, unsigned long sp)
#endif
discard_lazy_cpu_state();
+#ifdef CONFIG_VSX
+ current->thread.used_vsr = 0;
+#endif
memset(current->thread.fpr, 0, sizeof(current->thread.fpr));
current->thread.fpscr.val = 0;
#ifdef CONFIG_ALTIVEC
@@ -971,8 +1028,7 @@ void show_stack(struct task_struct *tsk, unsigned long *stack)
newsp = stack[0];
ip = stack[STACK_FRAME_LR_SAVE];
if (!firstframe || ip != lr) {
- printk("["REG"] ["REG"] ", sp, ip);
- print_symbol("%s", ip);
+ printk("["REG"] ["REG"] %pS", sp, ip, (void *)ip);
if (firstframe)
printk(" (unreliable)");
printk("\n");
@@ -987,10 +1043,9 @@ void show_stack(struct task_struct *tsk, unsigned long *stack)
&& stack[STACK_FRAME_MARKER] == STACK_FRAME_REGS_MARKER) {
struct pt_regs *regs = (struct pt_regs *)
(sp + STACK_FRAME_OVERHEAD);
- printk("--- Exception: %lx", regs->trap);
- print_symbol(" at %s\n", regs->nip);
lr = regs->link;
- print_symbol(" LR = %s\n", lr);
+ printk("--- Exception: %lx at %pS\n LR = %pS\n",
+ regs->trap, (void *)regs->nip, (void *)lr);
firstframe = 1;
}
diff --git a/arch/powerpc/kernel/prom.c b/arch/powerpc/kernel/prom.c
index 2aefe2a4129a..87d83c56b31e 100644
--- a/arch/powerpc/kernel/prom.c
+++ b/arch/powerpc/kernel/prom.c
@@ -609,6 +609,10 @@ static struct feature_property {
{"altivec", 0, CPU_FTR_ALTIVEC, PPC_FEATURE_HAS_ALTIVEC},
{"ibm,vmx", 1, CPU_FTR_ALTIVEC, PPC_FEATURE_HAS_ALTIVEC},
#endif /* CONFIG_ALTIVEC */
+#ifdef CONFIG_VSX
+ /* Yes, this _really_ is ibm,vmx == 2 to enable VSX */
+ {"ibm,vmx", 2, CPU_FTR_VSX, PPC_FEATURE_HAS_VSX},
+#endif /* CONFIG_VSX */
#ifdef CONFIG_PPC64
{"ibm,dfp", 1, 0, PPC_FEATURE_HAS_DFP},
{"ibm,purr", 1, CPU_FTR_PURR, 0},
diff --git a/arch/powerpc/kernel/prom_init.c b/arch/powerpc/kernel/prom_init.c
index 6d6df1e60325..c4ab2195b9cb 100644
--- a/arch/powerpc/kernel/prom_init.c
+++ b/arch/powerpc/kernel/prom_init.c
@@ -620,6 +620,7 @@ static void __init early_cmdline_parse(void)
#define OV1_PPC_2_03 0x10 /* set if we support PowerPC 2.03 */
#define OV1_PPC_2_04 0x08 /* set if we support PowerPC 2.04 */
#define OV1_PPC_2_05 0x04 /* set if we support PowerPC 2.05 */
+#define OV1_PPC_2_06 0x02 /* set if we support PowerPC 2.06 */
/* Option vector 2: Open Firmware options supported */
#define OV2_REAL_MODE 0x20 /* set if we want OF in real mode */
@@ -642,6 +643,11 @@ static void __init early_cmdline_parse(void)
#else
#define OV5_MSI 0x00
#endif /* CONFIG_PCI_MSI */
+#ifdef CONFIG_PPC_SMLPAR
+#define OV5_CMO 0x80 /* Cooperative Memory Overcommitment */
+#else
+#define OV5_CMO 0x00
+#endif
/*
* The architecture vector has an array of PVR mask/value pairs,
@@ -650,6 +656,8 @@ static void __init early_cmdline_parse(void)
static unsigned char ibm_architecture_vec[] = {
W(0xfffe0000), W(0x003a0000), /* POWER5/POWER5+ */
W(0xffff0000), W(0x003e0000), /* POWER6 */
+ W(0xffff0000), W(0x003f0000), /* POWER7 */
+ W(0xffffffff), W(0x0f000003), /* all 2.06-compliant */
W(0xffffffff), W(0x0f000002), /* all 2.05-compliant */
W(0xfffffffe), W(0x0f000001), /* all 2.04-compliant and earlier */
5 - 1, /* 5 option vectors */
@@ -658,7 +666,7 @@ static unsigned char ibm_architecture_vec[] = {
3 - 2, /* length */
0, /* don't ignore, don't halt */
OV1_PPC_2_00 | OV1_PPC_2_01 | OV1_PPC_2_02 | OV1_PPC_2_03 |
- OV1_PPC_2_04 | OV1_PPC_2_05,
+ OV1_PPC_2_04 | OV1_PPC_2_05 | OV1_PPC_2_06,
/* option vector 2: Open Firmware options supported */
34 - 2, /* length */
@@ -684,10 +692,12 @@ static unsigned char ibm_architecture_vec[] = {
0, /* don't halt */
/* option vector 5: PAPR/OF options */
- 3 - 2, /* length */
+ 5 - 2, /* length */
0, /* don't ignore, don't halt */
OV5_LPAR | OV5_SPLPAR | OV5_LARGE_PAGES | OV5_DRCONF_MEMORY |
OV5_DONATE_DEDICATE_CPU | OV5_MSI,
+ 0,
+ OV5_CMO,
};
/* Old method - ELF header with PT_NOTE sections */
diff --git a/arch/powerpc/kernel/prom_parse.c b/arch/powerpc/kernel/prom_parse.c
index 90eb3a3e383e..bc1fb27368af 100644
--- a/arch/powerpc/kernel/prom_parse.c
+++ b/arch/powerpc/kernel/prom_parse.c
@@ -128,12 +128,35 @@ static void of_bus_pci_count_cells(struct device_node *np,
*sizec = 2;
}
+static unsigned int of_bus_pci_get_flags(const u32 *addr)
+{
+ unsigned int flags = 0;
+ u32 w = addr[0];
+
+ switch((w >> 24) & 0x03) {
+ case 0x01:
+ flags |= IORESOURCE_IO;
+ break;
+ case 0x02: /* 32 bits */
+ case 0x03: /* 64 bits */
+ flags |= IORESOURCE_MEM;
+ break;
+ }
+ if (w & 0x40000000)
+ flags |= IORESOURCE_PREFETCH;
+ return flags;
+}
+
static u64 of_bus_pci_map(u32 *addr, const u32 *range, int na, int ns, int pna)
{
u64 cp, s, da;
+ unsigned int af, rf;
+
+ af = of_bus_pci_get_flags(addr);
+ rf = of_bus_pci_get_flags(range);
/* Check address type match */
- if ((addr[0] ^ range[0]) & 0x03000000)
+ if ((af ^ rf) & (IORESOURCE_MEM | IORESOURCE_IO))
return OF_BAD_ADDR;
/* Read address values, skipping high cell */
@@ -153,25 +176,6 @@ static int of_bus_pci_translate(u32 *addr, u64 offset, int na)
return of_bus_default_translate(addr + 1, offset, na - 1);
}
-static unsigned int of_bus_pci_get_flags(const u32 *addr)
-{
- unsigned int flags = 0;
- u32 w = addr[0];
-
- switch((w >> 24) & 0x03) {
- case 0x01:
- flags |= IORESOURCE_IO;
- break;
- case 0x02: /* 32 bits */
- case 0x03: /* 64 bits */
- flags |= IORESOURCE_MEM;
- break;
- }
- if (w & 0x40000000)
- flags |= IORESOURCE_PREFETCH;
- return flags;
-}
-
const u32 *of_get_pci_address(struct device_node *dev, int bar_no, u64 *size,
unsigned int *flags)
{
diff --git a/arch/powerpc/kernel/ptrace.c b/arch/powerpc/kernel/ptrace.c
index 2a9fe97e4521..a5d0e78779c8 100644
--- a/arch/powerpc/kernel/ptrace.c
+++ b/arch/powerpc/kernel/ptrace.c
@@ -215,29 +215,56 @@ static int fpr_get(struct task_struct *target, const struct user_regset *regset,
unsigned int pos, unsigned int count,
void *kbuf, void __user *ubuf)
{
+#ifdef CONFIG_VSX
+ double buf[33];
+ int i;
+#endif
flush_fp_to_thread(target);
+#ifdef CONFIG_VSX
+ /* copy to local buffer then write that out */
+ for (i = 0; i < 32 ; i++)
+ buf[i] = target->thread.TS_FPR(i);
+ memcpy(&buf[32], &target->thread.fpscr, sizeof(double));
+ return user_regset_copyout(&pos, &count, &kbuf, &ubuf, buf, 0, -1);
+
+#else
BUILD_BUG_ON(offsetof(struct thread_struct, fpscr) !=
- offsetof(struct thread_struct, fpr[32]));
+ offsetof(struct thread_struct, TS_FPR(32)));
return user_regset_copyout(&pos, &count, &kbuf, &ubuf,
&target->thread.fpr, 0, -1);
+#endif
}
static int fpr_set(struct task_struct *target, const struct user_regset *regset,
unsigned int pos, unsigned int count,
const void *kbuf, const void __user *ubuf)
{
+#ifdef CONFIG_VSX
+ double buf[33];
+ int i;
+#endif
flush_fp_to_thread(target);
+#ifdef CONFIG_VSX
+ /* copy to local buffer then write that out */
+ i = user_regset_copyin(&pos, &count, &kbuf, &ubuf, buf, 0, -1);
+ if (i)
+ return i;
+ for (i = 0; i < 32 ; i++)
+ target->thread.TS_FPR(i) = buf[i];
+ memcpy(&target->thread.fpscr, &buf[32], sizeof(double));
+ return 0;
+#else
BUILD_BUG_ON(offsetof(struct thread_struct, fpscr) !=
- offsetof(struct thread_struct, fpr[32]));
+ offsetof(struct thread_struct, TS_FPR(32)));
return user_regset_copyin(&pos, &count, &kbuf, &ubuf,
&target->thread.fpr, 0, -1);
+#endif
}
-
#ifdef CONFIG_ALTIVEC
/*
* Get/set all the altivec registers vr0..vr31, vscr, vrsave, in one go.
@@ -323,6 +350,56 @@ static int vr_set(struct task_struct *target, const struct user_regset *regset,
}
#endif /* CONFIG_ALTIVEC */
+#ifdef CONFIG_VSX
+/*
+ * Currently to set and and get all the vsx state, you need to call
+ * the fp and VMX calls aswell. This only get/sets the lower 32
+ * 128bit VSX registers.
+ */
+
+static int vsr_active(struct task_struct *target,
+ const struct user_regset *regset)
+{
+ flush_vsx_to_thread(target);
+ return target->thread.used_vsr ? regset->n : 0;
+}
+
+static int vsr_get(struct task_struct *target, const struct user_regset *regset,
+ unsigned int pos, unsigned int count,
+ void *kbuf, void __user *ubuf)
+{
+ double buf[32];
+ int ret, i;
+
+ flush_vsx_to_thread(target);
+
+ for (i = 0; i < 32 ; i++)
+ buf[i] = current->thread.fpr[i][TS_VSRLOWOFFSET];
+ ret = user_regset_copyout(&pos, &count, &kbuf, &ubuf,
+ buf, 0, 32 * sizeof(double));
+
+ return ret;
+}
+
+static int vsr_set(struct task_struct *target, const struct user_regset *regset,
+ unsigned int pos, unsigned int count,
+ const void *kbuf, const void __user *ubuf)
+{
+ double buf[32];
+ int ret,i;
+
+ flush_vsx_to_thread(target);
+
+ ret = user_regset_copyin(&pos, &count, &kbuf, &ubuf,
+ buf, 0, 32 * sizeof(double));
+ for (i = 0; i < 32 ; i++)
+ current->thread.fpr[i][TS_VSRLOWOFFSET] = buf[i];
+
+
+ return ret;
+}
+#endif /* CONFIG_VSX */
+
#ifdef CONFIG_SPE
/*
@@ -399,6 +476,9 @@ enum powerpc_regset {
#ifdef CONFIG_ALTIVEC
REGSET_VMX,
#endif
+#ifdef CONFIG_VSX
+ REGSET_VSX,
+#endif
#ifdef CONFIG_SPE
REGSET_SPE,
#endif
@@ -422,6 +502,13 @@ static const struct user_regset native_regsets[] = {
.active = vr_active, .get = vr_get, .set = vr_set
},
#endif
+#ifdef CONFIG_VSX
+ [REGSET_VSX] = {
+ .core_note_type = NT_PPC_VSX, .n = 32,
+ .size = sizeof(double), .align = sizeof(double),
+ .active = vsr_active, .get = vsr_get, .set = vsr_set
+ },
+#endif
#ifdef CONFIG_SPE
[REGSET_SPE] = {
.n = 35,
@@ -616,7 +703,7 @@ void user_enable_single_step(struct task_struct *task)
if (regs != NULL) {
#if defined(CONFIG_40x) || defined(CONFIG_BOOKE)
- task->thread.dbcr0 = DBCR0_IDM | DBCR0_IC;
+ task->thread.dbcr0 |= DBCR0_IDM | DBCR0_IC;
regs->msr |= MSR_DE;
#else
regs->msr |= MSR_SE;
@@ -629,9 +716,16 @@ void user_disable_single_step(struct task_struct *task)
{
struct pt_regs *regs = task->thread.regs;
+
+#if defined(CONFIG_44x) || defined(CONFIG_BOOKE)
+ /* If DAC then do not single step, skip */
+ if (task->thread.dabr)
+ return;
+#endif
+
if (regs != NULL) {
#if defined(CONFIG_40x) || defined(CONFIG_BOOKE)
- task->thread.dbcr0 = 0;
+ task->thread.dbcr0 &= ~(DBCR0_IC | DBCR0_IDM);
regs->msr &= ~MSR_DE;
#else
regs->msr &= ~MSR_SE;
@@ -640,22 +734,75 @@ void user_disable_single_step(struct task_struct *task)
clear_tsk_thread_flag(task, TIF_SINGLESTEP);
}
-static int ptrace_set_debugreg(struct task_struct *task, unsigned long addr,
+int ptrace_set_debugreg(struct task_struct *task, unsigned long addr,
unsigned long data)
{
- /* We only support one DABR and no IABRS at the moment */
+ /* For ppc64 we support one DABR and no IABR's at the moment (ppc64).
+ * For embedded processors we support one DAC and no IAC's at the
+ * moment.
+ */
if (addr > 0)
return -EINVAL;
- /* The bottom 3 bits are flags */
if ((data & ~0x7UL) >= TASK_SIZE)
return -EIO;
- /* Ensure translation is on */
+#ifdef CONFIG_PPC64
+
+ /* For processors using DABR (i.e. 970), the bottom 3 bits are flags.
+ * It was assumed, on previous implementations, that 3 bits were
+ * passed together with the data address, fitting the design of the
+ * DABR register, as follows:
+ *
+ * bit 0: Read flag
+ * bit 1: Write flag
+ * bit 2: Breakpoint translation
+ *
+ * Thus, we use them here as so.
+ */
+
+ /* Ensure breakpoint translation bit is set */
if (data && !(data & DABR_TRANSLATION))
return -EIO;
+ /* Move contents to the DABR register */
task->thread.dabr = data;
+
+#endif
+#if defined(CONFIG_44x) || defined(CONFIG_BOOKE)
+
+ /* As described above, it was assumed 3 bits were passed with the data
+ * address, but we will assume only the mode bits will be passed
+ * as to not cause alignment restrictions for DAC-based processors.
+ */
+
+ /* DAC's hold the whole address without any mode flags */
+ task->thread.dabr = data & ~0x3UL;
+
+ if (task->thread.dabr == 0) {
+ task->thread.dbcr0 &= ~(DBSR_DAC1R | DBSR_DAC1W | DBCR0_IDM);
+ task->thread.regs->msr &= ~MSR_DE;
+ return 0;
+ }
+
+ /* Read or Write bits must be set */
+
+ if (!(data & 0x3UL))
+ return -EINVAL;
+
+ /* Set the Internal Debugging flag (IDM bit 1) for the DBCR0
+ register */
+ task->thread.dbcr0 = DBCR0_IDM;
+
+ /* Check for write and read flags and set DBCR0
+ accordingly */
+ if (data & 0x1UL)
+ task->thread.dbcr0 |= DBSR_DAC1R;
+ if (data & 0x2UL)
+ task->thread.dbcr0 |= DBSR_DAC1W;
+
+ task->thread.regs->msr |= MSR_DE;
+#endif
return 0;
}
@@ -728,7 +875,8 @@ long arch_ptrace(struct task_struct *child, long request, long addr, long data)
tmp = ptrace_get_reg(child, (int) index);
} else {
flush_fp_to_thread(child);
- tmp = ((unsigned long *)child->thread.fpr)[index - PT_FPR0];
+ tmp = ((unsigned long *)child->thread.fpr)
+ [TS_FPRWIDTH * (index - PT_FPR0)];
}
ret = put_user(tmp,(unsigned long __user *) data);
break;
@@ -755,7 +903,8 @@ long arch_ptrace(struct task_struct *child, long request, long addr, long data)
ret = ptrace_put_reg(child, index, data);
} else {
flush_fp_to_thread(child);
- ((unsigned long *)child->thread.fpr)[index - PT_FPR0] = data;
+ ((unsigned long *)child->thread.fpr)
+ [TS_FPRWIDTH * (index - PT_FPR0)] = data;
ret = 0;
}
break;
@@ -820,6 +969,21 @@ long arch_ptrace(struct task_struct *child, long request, long addr, long data)
sizeof(u32)),
(const void __user *) data);
#endif
+#ifdef CONFIG_VSX
+ case PTRACE_GETVSRREGS:
+ return copy_regset_to_user(child, &user_ppc_native_view,
+ REGSET_VSX,
+ 0, (32 * sizeof(vector128) +
+ sizeof(u32)),
+ (void __user *) data);
+
+ case PTRACE_SETVSRREGS:
+ return copy_regset_from_user(child, &user_ppc_native_view,
+ REGSET_VSX,
+ 0, (32 * sizeof(vector128) +
+ sizeof(u32)),
+ (const void __user *) data);
+#endif
#ifdef CONFIG_SPE
case PTRACE_GETEVRREGS:
/* Get the child spe register state. */
diff --git a/arch/powerpc/kernel/ptrace32.c b/arch/powerpc/kernel/ptrace32.c
index 4c1de6af4c09..67bf1a1e7e14 100644
--- a/arch/powerpc/kernel/ptrace32.c
+++ b/arch/powerpc/kernel/ptrace32.c
@@ -64,6 +64,11 @@ static long compat_ptrace_old(struct task_struct *child, long request,
return -EPERM;
}
+/* Macros to workout the correct index for the FPR in the thread struct */
+#define FPRNUMBER(i) (((i) - PT_FPR0) >> 1)
+#define FPRHALF(i) (((i) - PT_FPR0) & 1)
+#define FPRINDEX(i) TS_FPRWIDTH * FPRNUMBER(i) + FPRHALF(i)
+
long compat_arch_ptrace(struct task_struct *child, compat_long_t request,
compat_ulong_t caddr, compat_ulong_t cdata)
{
@@ -122,7 +127,8 @@ long compat_arch_ptrace(struct task_struct *child, compat_long_t request,
* to be an array of unsigned int (32 bits) - the
* index passed in is based on this assumption.
*/
- tmp = ((unsigned int *)child->thread.fpr)[index - PT_FPR0];
+ tmp = ((unsigned int *)child->thread.fpr)
+ [FPRINDEX(index)];
}
ret = put_user((unsigned int)tmp, (u32 __user *)data);
break;
@@ -162,7 +168,8 @@ long compat_arch_ptrace(struct task_struct *child, compat_long_t request,
CHECK_FULL_REGS(child->thread.regs);
if (numReg >= PT_FPR0) {
flush_fp_to_thread(child);
- tmp = ((unsigned long int *)child->thread.fpr)[numReg - PT_FPR0];
+ tmp = ((unsigned long int *)child->thread.fpr)
+ [FPRINDEX(numReg)];
} else { /* register within PT_REGS struct */
tmp = ptrace_get_reg(child, numReg);
}
@@ -217,7 +224,8 @@ long compat_arch_ptrace(struct task_struct *child, compat_long_t request,
* to be an array of unsigned int (32 bits) - the
* index passed in is based on this assumption.
*/
- ((unsigned int *)child->thread.fpr)[index - PT_FPR0] = data;
+ ((unsigned int *)child->thread.fpr)
+ [FPRINDEX(index)] = data;
ret = 0;
}
break;
diff --git a/arch/powerpc/kernel/rtas-proc.c b/arch/powerpc/kernel/rtas-proc.c
index f9c6abc84a94..1be9fe38bcb5 100644
--- a/arch/powerpc/kernel/rtas-proc.c
+++ b/arch/powerpc/kernel/rtas-proc.c
@@ -160,7 +160,7 @@ static int sensors_open(struct inode *inode, struct file *file)
return single_open(file, ppc_rtas_sensors_show, NULL);
}
-const struct file_operations ppc_rtas_sensors_operations = {
+static const struct file_operations ppc_rtas_sensors_operations = {
.open = sensors_open,
.read = seq_read,
.llseek = seq_lseek,
@@ -172,7 +172,7 @@ static int poweron_open(struct inode *inode, struct file *file)
return single_open(file, ppc_rtas_poweron_show, NULL);
}
-const struct file_operations ppc_rtas_poweron_operations = {
+static const struct file_operations ppc_rtas_poweron_operations = {
.open = poweron_open,
.read = seq_read,
.llseek = seq_lseek,
@@ -185,7 +185,7 @@ static int progress_open(struct inode *inode, struct file *file)
return single_open(file, ppc_rtas_progress_show, NULL);
}
-const struct file_operations ppc_rtas_progress_operations = {
+static const struct file_operations ppc_rtas_progress_operations = {
.open = progress_open,
.read = seq_read,
.llseek = seq_lseek,
@@ -198,7 +198,7 @@ static int clock_open(struct inode *inode, struct file *file)
return single_open(file, ppc_rtas_clock_show, NULL);
}
-const struct file_operations ppc_rtas_clock_operations = {
+static const struct file_operations ppc_rtas_clock_operations = {
.open = clock_open,
.read = seq_read,
.llseek = seq_lseek,
@@ -211,7 +211,7 @@ static int tone_freq_open(struct inode *inode, struct file *file)
return single_open(file, ppc_rtas_tone_freq_show, NULL);
}
-const struct file_operations ppc_rtas_tone_freq_operations = {
+static const struct file_operations ppc_rtas_tone_freq_operations = {
.open = tone_freq_open,
.read = seq_read,
.llseek = seq_lseek,
@@ -224,7 +224,7 @@ static int tone_volume_open(struct inode *inode, struct file *file)
return single_open(file, ppc_rtas_tone_volume_show, NULL);
}
-const struct file_operations ppc_rtas_tone_volume_operations = {
+static const struct file_operations ppc_rtas_tone_volume_operations = {
.open = tone_volume_open,
.read = seq_read,
.llseek = seq_lseek,
@@ -237,7 +237,7 @@ static int rmo_buf_open(struct inode *inode, struct file *file)
return single_open(file, ppc_rtas_rmo_buf_show, NULL);
}
-const struct file_operations ppc_rtas_rmo_buf_ops = {
+static const struct file_operations ppc_rtas_rmo_buf_ops = {
.open = rmo_buf_open,
.read = seq_read,
.llseek = seq_lseek,
diff --git a/arch/powerpc/kernel/rtas.c b/arch/powerpc/kernel/rtas.c
index 34843c318419..c680f1bbd387 100644
--- a/arch/powerpc/kernel/rtas.c
+++ b/arch/powerpc/kernel/rtas.c
@@ -340,8 +340,8 @@ int rtas_get_error_log_max(void)
EXPORT_SYMBOL(rtas_get_error_log_max);
-char rtas_err_buf[RTAS_ERROR_LOG_MAX];
-int rtas_last_error_token;
+static char rtas_err_buf[RTAS_ERROR_LOG_MAX];
+static int rtas_last_error_token;
/** Return a copy of the detailed error text associated with the
* most recent failed call to rtas. Because the error text
@@ -484,7 +484,7 @@ unsigned int rtas_busy_delay(int status)
}
EXPORT_SYMBOL(rtas_busy_delay);
-int rtas_error_rc(int rtas_rc)
+static int rtas_error_rc(int rtas_rc)
{
int rc;
@@ -747,7 +747,7 @@ static int rtas_ibm_suspend_me(struct rtas_args *args)
/* Call function on all CPUs. One of us will make the
* rtas call
*/
- if (on_each_cpu(rtas_percpu_suspend_me, &data, 1, 0))
+ if (on_each_cpu(rtas_percpu_suspend_me, &data, 0))
data.error = -EINVAL;
wait_for_completion(&done);
diff --git a/arch/powerpc/kernel/rtas_flash.c b/arch/powerpc/kernel/rtas_flash.c
index 0a5e22b22729..149cb112cd1a 100644
--- a/arch/powerpc/kernel/rtas_flash.c
+++ b/arch/powerpc/kernel/rtas_flash.c
@@ -286,7 +286,7 @@ static ssize_t rtas_flash_read(struct file *file, char __user *buf,
}
/* constructor for flash_block_cache */
-void rtas_block_ctor(struct kmem_cache *cache, void *ptr)
+void rtas_block_ctor(void *ptr)
{
memset(ptr, 0, RTAS_BLK_SIZE);
}
@@ -731,7 +731,7 @@ static const struct file_operations validate_flash_operations = {
.release = validate_flash_release,
};
-int __init rtas_flash_init(void)
+static int __init rtas_flash_init(void)
{
int rc;
@@ -817,7 +817,7 @@ cleanup:
return rc;
}
-void __exit rtas_flash_cleanup(void)
+static void __exit rtas_flash_cleanup(void)
{
rtas_flash_term_hook = NULL;
diff --git a/arch/powerpc/kernel/rtas_pci.c b/arch/powerpc/kernel/rtas_pci.c
index 3ab88a9dc70d..589a2797eac2 100644
--- a/arch/powerpc/kernel/rtas_pci.c
+++ b/arch/powerpc/kernel/rtas_pci.c
@@ -155,12 +155,12 @@ static int rtas_pci_write_config(struct pci_bus *bus,
return PCIBIOS_DEVICE_NOT_FOUND;
}
-struct pci_ops rtas_pci_ops = {
+static struct pci_ops rtas_pci_ops = {
.read = rtas_pci_read_config,
.write = rtas_pci_write_config,
};
-int is_python(struct device_node *dev)
+static int is_python(struct device_node *dev)
{
const char *model = of_get_property(dev, "model", NULL);
diff --git a/arch/powerpc/kernel/setup-common.c b/arch/powerpc/kernel/setup-common.c
index db540eab09f4..61a3f4132087 100644
--- a/arch/powerpc/kernel/setup-common.c
+++ b/arch/powerpc/kernel/setup-common.c
@@ -500,6 +500,7 @@ void __init smp_setup_cpu_sibling_map(void)
}
#endif /* CONFIG_SMP */
+#ifdef CONFIG_PCSPKR_PLATFORM
static __init int add_pcspkr(void)
{
struct device_node *np;
@@ -522,6 +523,7 @@ static __init int add_pcspkr(void)
return ret;
}
device_initcall(add_pcspkr);
+#endif /* CONFIG_PCSPKR_PLATFORM */
void probe_machine(void)
{
diff --git a/arch/powerpc/kernel/setup_32.c b/arch/powerpc/kernel/setup_32.c
index 5112a4aa801d..066e65c59b58 100644
--- a/arch/powerpc/kernel/setup_32.c
+++ b/arch/powerpc/kernel/setup_32.c
@@ -43,10 +43,6 @@
#define DBG(fmt...)
-#if defined CONFIG_KGDB
-#include <asm/kgdb.h>
-#endif
-
extern void bootx_init(unsigned long r4, unsigned long phys);
int boot_cpuid;
@@ -81,7 +77,7 @@ int ucache_bsize;
* from the address that it was linked at, so we must use RELOC/PTRRELOC
* to access static data (including strings). -- paulus
*/
-unsigned long __init early_init(unsigned long dt_ptr)
+notrace unsigned long __init early_init(unsigned long dt_ptr)
{
unsigned long offset = reloc_offset();
struct cpu_spec *spec;
@@ -101,6 +97,10 @@ unsigned long __init early_init(unsigned long dt_ptr)
PTRRELOC(&__start___ftr_fixup),
PTRRELOC(&__stop___ftr_fixup));
+ do_lwsync_fixups(spec->cpu_features,
+ PTRRELOC(&__start___lwsync_fixup),
+ PTRRELOC(&__stop___lwsync_fixup));
+
return KERNELBASE + offset;
}
@@ -111,7 +111,7 @@ unsigned long __init early_init(unsigned long dt_ptr)
* This is called very early on the boot process, after a minimal
* MMU environment has been set up but before MMU_init is called.
*/
-void __init machine_init(unsigned long dt_ptr, unsigned long phys)
+notrace void __init machine_init(unsigned long dt_ptr, unsigned long phys)
{
/* Enable early debugging if any specified (see udbg.h) */
udbg_early_init();
@@ -127,13 +127,18 @@ void __init machine_init(unsigned long dt_ptr, unsigned long phys)
ppc_md.power_save = ppc6xx_idle;
#endif
+#ifdef CONFIG_E500
+ if (cpu_has_feature(CPU_FTR_CAN_DOZE) ||
+ cpu_has_feature(CPU_FTR_CAN_NAP))
+ ppc_md.power_save = e500_idle;
+#endif
if (ppc_md.progress)
ppc_md.progress("id mach(): done", 0x200);
}
#ifdef CONFIG_BOOKE_WDT
/* Checks wdt=x and wdt_period=xx command-line option */
-int __init early_parse_wdt(char *p)
+notrace int __init early_parse_wdt(char *p)
{
if (p && strncmp(p, "0", 1) != 0)
booke_wdt_enabled = 1;
@@ -248,6 +253,28 @@ static void __init irqstack_early_init(void)
#define irqstack_early_init()
#endif
+#if defined(CONFIG_BOOKE) || defined(CONFIG_40x)
+static void __init exc_lvl_early_init(void)
+{
+ unsigned int i;
+
+ /* interrupt stacks must be in lowmem, we get that for free on ppc32
+ * as the lmb is limited to lowmem by LMB_REAL_LIMIT */
+ for_each_possible_cpu(i) {
+ critirq_ctx[i] = (struct thread_info *)
+ __va(lmb_alloc(THREAD_SIZE, THREAD_SIZE));
+#ifdef CONFIG_BOOKE
+ dbgirq_ctx[i] = (struct thread_info *)
+ __va(lmb_alloc(THREAD_SIZE, THREAD_SIZE));
+ mcheckirq_ctx[i] = (struct thread_info *)
+ __va(lmb_alloc(THREAD_SIZE, THREAD_SIZE));
+#endif
+ }
+}
+#else
+#define exc_lvl_early_init()
+#endif
+
/* Warning, IO base is not yet inited */
void __init setup_arch(char **cmdline_p)
{
@@ -271,18 +298,6 @@ void __init setup_arch(char **cmdline_p)
xmon_setup();
-#if defined(CONFIG_KGDB)
- if (ppc_md.kgdb_map_scc)
- ppc_md.kgdb_map_scc();
- set_debug_traps();
- if (strstr(cmd_line, "gdb")) {
- if (ppc_md.progress)
- ppc_md.progress("setup_arch: kgdb breakpoint", 0x4000);
- printk("kgdb breakpoint activated\n");
- breakpoint();
- }
-#endif
-
/*
* Set cache line size based on type of cpu as a default.
* Systems with OF can look in the properties on the cpu node(s)
@@ -305,6 +320,8 @@ void __init setup_arch(char **cmdline_p)
init_mm.end_data = (unsigned long) _edata;
init_mm.brk = klimit;
+ exc_lvl_early_init();
+
irqstack_early_init();
/* set up the bootmem stuff with available memory */
diff --git a/arch/powerpc/kernel/setup_64.c b/arch/powerpc/kernel/setup_64.c
index 098fd96a394a..04d8de9f0fc6 100644
--- a/arch/powerpc/kernel/setup_64.c
+++ b/arch/powerpc/kernel/setup_64.c
@@ -363,6 +363,8 @@ void __init setup_system(void)
&__start___ftr_fixup, &__stop___ftr_fixup);
do_feature_fixups(powerpc_firmware_features,
&__start___fw_ftr_fixup, &__stop___fw_ftr_fixup);
+ do_lwsync_fixups(cur_cpu_spec->cpu_features,
+ &__start___lwsync_fixup, &__stop___lwsync_fixup);
/*
* Unflatten the device-tree passed by prom_init or kexec
diff --git a/arch/powerpc/kernel/signal.c b/arch/powerpc/kernel/signal.c
index a65a44fbe523..7aada783ec6a 100644
--- a/arch/powerpc/kernel/signal.c
+++ b/arch/powerpc/kernel/signal.c
@@ -120,7 +120,7 @@ int do_signal(sigset_t *oldset, struct pt_regs *regs)
int ret;
int is32 = is_32bit_task();
- if (test_thread_flag(TIF_RESTORE_SIGMASK))
+ if (current_thread_info()->local_flags & _TLF_RESTORE_SIGMASK)
oldset = &current->saved_sigmask;
else if (!oldset)
oldset = &current->blocked;
@@ -131,9 +131,10 @@ int do_signal(sigset_t *oldset, struct pt_regs *regs)
check_syscall_restart(regs, &ka, signr > 0);
if (signr <= 0) {
+ struct thread_info *ti = current_thread_info();
/* No signal to deliver -- put the saved sigmask back */
- if (test_thread_flag(TIF_RESTORE_SIGMASK)) {
- clear_thread_flag(TIF_RESTORE_SIGMASK);
+ if (ti->local_flags & _TLF_RESTORE_SIGMASK) {
+ ti->local_flags &= ~_TLF_RESTORE_SIGMASK;
sigprocmask(SIG_SETMASK, &current->saved_sigmask, NULL);
}
return 0; /* no signals delivered */
@@ -144,8 +145,12 @@ int do_signal(sigset_t *oldset, struct pt_regs *regs)
* user space. The DABR will have been cleared if it
* triggered inside the kernel.
*/
- if (current->thread.dabr)
+ if (current->thread.dabr) {
set_dabr(current->thread.dabr);
+#if defined(CONFIG_44x) || defined(CONFIG_BOOKE)
+ mtspr(SPRN_DBCR0, current->thread.dbcr0);
+#endif
+ }
if (is32) {
if (ka.sa.sa_flags & SA_SIGINFO)
@@ -169,10 +174,9 @@ int do_signal(sigset_t *oldset, struct pt_regs *regs)
/*
* A signal was successfully delivered; the saved sigmask is in
- * its frame, and we can clear the TIF_RESTORE_SIGMASK flag.
+ * its frame, and we can clear the TLF_RESTORE_SIGMASK flag.
*/
- if (test_thread_flag(TIF_RESTORE_SIGMASK))
- clear_thread_flag(TIF_RESTORE_SIGMASK);
+ current_thread_info()->local_flags &= ~_TLF_RESTORE_SIGMASK;
}
return ret;
diff --git a/arch/powerpc/kernel/signal.h b/arch/powerpc/kernel/signal.h
index 77efb3d5465a..28f4b9f5fe5e 100644
--- a/arch/powerpc/kernel/signal.h
+++ b/arch/powerpc/kernel/signal.h
@@ -24,6 +24,16 @@ extern int handle_rt_signal32(unsigned long sig, struct k_sigaction *ka,
siginfo_t *info, sigset_t *oldset,
struct pt_regs *regs);
+extern unsigned long copy_fpr_to_user(void __user *to,
+ struct task_struct *task);
+extern unsigned long copy_fpr_from_user(struct task_struct *task,
+ void __user *from);
+#ifdef CONFIG_VSX
+extern unsigned long copy_vsx_to_user(void __user *to,
+ struct task_struct *task);
+extern unsigned long copy_vsx_from_user(struct task_struct *task,
+ void __user *from);
+#endif
#ifdef CONFIG_PPC64
diff --git a/arch/powerpc/kernel/signal_32.c b/arch/powerpc/kernel/signal_32.c
index ad6943468ee9..3e80aa32b8b0 100644
--- a/arch/powerpc/kernel/signal_32.c
+++ b/arch/powerpc/kernel/signal_32.c
@@ -68,6 +68,13 @@
#define ucontext ucontext32
/*
+ * Userspace code may pass a ucontext which doesn't include VSX added
+ * at the end. We need to check for this case.
+ */
+#define UCONTEXTSIZEWITHOUTVSX \
+ (sizeof(struct ucontext) - sizeof(elf_vsrreghalf_t32))
+
+/*
* Returning 0 means we return to userspace via
* ret_from_except and thus restore all user
* registers from *regs. This is what we need
@@ -243,7 +250,7 @@ long sys_sigsuspend(old_sigset_t mask)
current->state = TASK_INTERRUPTIBLE;
schedule();
- set_thread_flag(TIF_RESTORE_SIGMASK);
+ set_restore_sigmask();
return -ERESTARTNOHAND;
}
@@ -328,6 +335,75 @@ struct rt_sigframe {
int abigap[56];
};
+#ifdef CONFIG_VSX
+unsigned long copy_fpr_to_user(void __user *to,
+ struct task_struct *task)
+{
+ double buf[ELF_NFPREG];
+ int i;
+
+ /* save FPR copy to local buffer then write to the thread_struct */
+ for (i = 0; i < (ELF_NFPREG - 1) ; i++)
+ buf[i] = task->thread.TS_FPR(i);
+ memcpy(&buf[i], &task->thread.fpscr, sizeof(double));
+ return __copy_to_user(to, buf, ELF_NFPREG * sizeof(double));
+}
+
+unsigned long copy_fpr_from_user(struct task_struct *task,
+ void __user *from)
+{
+ double buf[ELF_NFPREG];
+ int i;
+
+ if (__copy_from_user(buf, from, ELF_NFPREG * sizeof(double)))
+ return 1;
+ for (i = 0; i < (ELF_NFPREG - 1) ; i++)
+ task->thread.TS_FPR(i) = buf[i];
+ memcpy(&task->thread.fpscr, &buf[i], sizeof(double));
+
+ return 0;
+}
+
+unsigned long copy_vsx_to_user(void __user *to,
+ struct task_struct *task)
+{
+ double buf[ELF_NVSRHALFREG];
+ int i;
+
+ /* save FPR copy to local buffer then write to the thread_struct */
+ for (i = 0; i < ELF_NVSRHALFREG; i++)
+ buf[i] = task->thread.fpr[i][TS_VSRLOWOFFSET];
+ return __copy_to_user(to, buf, ELF_NVSRHALFREG * sizeof(double));
+}
+
+unsigned long copy_vsx_from_user(struct task_struct *task,
+ void __user *from)
+{
+ double buf[ELF_NVSRHALFREG];
+ int i;
+
+ if (__copy_from_user(buf, from, ELF_NVSRHALFREG * sizeof(double)))
+ return 1;
+ for (i = 0; i < ELF_NVSRHALFREG ; i++)
+ task->thread.fpr[i][TS_VSRLOWOFFSET] = buf[i];
+ return 0;
+}
+#else
+inline unsigned long copy_fpr_to_user(void __user *to,
+ struct task_struct *task)
+{
+ return __copy_to_user(to, task->thread.fpr,
+ ELF_NFPREG * sizeof(double));
+}
+
+inline unsigned long copy_fpr_from_user(struct task_struct *task,
+ void __user *from)
+{
+ return __copy_from_user(task->thread.fpr, from,
+ ELF_NFPREG * sizeof(double));
+}
+#endif
+
/*
* Save the current user registers on the user stack.
* We only save the altivec/spe registers if the process has used
@@ -336,13 +412,13 @@ struct rt_sigframe {
static int save_user_regs(struct pt_regs *regs, struct mcontext __user *frame,
int sigret)
{
+ unsigned long msr = regs->msr;
+
/* Make sure floating point registers are stored in regs */
flush_fp_to_thread(current);
- /* save general and floating-point registers */
- if (save_general_regs(regs, frame) ||
- __copy_to_user(&frame->mc_fregs, current->thread.fpr,
- ELF_NFPREG * sizeof(double)))
+ /* save general registers */
+ if (save_general_regs(regs, frame))
return 1;
#ifdef CONFIG_ALTIVEC
@@ -354,8 +430,7 @@ static int save_user_regs(struct pt_regs *regs, struct mcontext __user *frame,
return 1;
/* set MSR_VEC in the saved MSR value to indicate that
frame->mc_vregs contains valid data */
- if (__put_user(regs->msr | MSR_VEC, &frame->mc_gregs[PT_MSR]))
- return 1;
+ msr |= MSR_VEC;
}
/* else assert((regs->msr & MSR_VEC) == 0) */
@@ -367,7 +442,22 @@ static int save_user_regs(struct pt_regs *regs, struct mcontext __user *frame,
if (__put_user(current->thread.vrsave, (u32 __user *)&frame->mc_vregs[32]))
return 1;
#endif /* CONFIG_ALTIVEC */
-
+ if (copy_fpr_to_user(&frame->mc_fregs, current))
+ return 1;
+#ifdef CONFIG_VSX
+ /*
+ * Copy VSR 0-31 upper half from thread_struct to local
+ * buffer, then write that to userspace. Also set MSR_VSX in
+ * the saved MSR value to indicate that frame->mc_vregs
+ * contains valid data
+ */
+ if (current->thread.used_vsr) {
+ __giveup_vsx(current);
+ if (copy_vsx_to_user(&frame->mc_vsregs, current))
+ return 1;
+ msr |= MSR_VSX;
+ }
+#endif /* CONFIG_VSX */
#ifdef CONFIG_SPE
/* save spe registers */
if (current->thread.used_spe) {
@@ -377,8 +467,7 @@ static int save_user_regs(struct pt_regs *regs, struct mcontext __user *frame,
return 1;
/* set MSR_SPE in the saved MSR value to indicate that
frame->mc_vregs contains valid data */
- if (__put_user(regs->msr | MSR_SPE, &frame->mc_gregs[PT_MSR]))
- return 1;
+ msr |= MSR_SPE;
}
/* else assert((regs->msr & MSR_SPE) == 0) */
@@ -387,6 +476,8 @@ static int save_user_regs(struct pt_regs *regs, struct mcontext __user *frame,
return 1;
#endif /* CONFIG_SPE */
+ if (__put_user(msr, &frame->mc_gregs[PT_MSR]))
+ return 1;
if (sigret) {
/* Set up the sigreturn trampoline: li r0,sigret; sc */
if (__put_user(0x38000000UL + sigret, &frame->tramp[0])
@@ -409,6 +500,9 @@ static long restore_user_regs(struct pt_regs *regs,
long err;
unsigned int save_r2 = 0;
unsigned long msr;
+#ifdef CONFIG_VSX
+ int i;
+#endif
/*
* restore general registers but not including MSR or SOFTE. Also
@@ -436,16 +530,11 @@ static long restore_user_regs(struct pt_regs *regs,
*/
discard_lazy_cpu_state();
- /* force the process to reload the FP registers from
- current->thread when it next does FP instructions */
- regs->msr &= ~(MSR_FP | MSR_FE0 | MSR_FE1);
- if (__copy_from_user(current->thread.fpr, &sr->mc_fregs,
- sizeof(sr->mc_fregs)))
- return 1;
-
#ifdef CONFIG_ALTIVEC
- /* force the process to reload the altivec registers from
- current->thread when it next does altivec instructions */
+ /*
+ * Force the process to reload the altivec registers from
+ * current->thread when it next does altivec instructions
+ */
regs->msr &= ~MSR_VEC;
if (msr & MSR_VEC) {
/* restore altivec registers from the stack */
@@ -459,6 +548,31 @@ static long restore_user_regs(struct pt_regs *regs,
if (__get_user(current->thread.vrsave, (u32 __user *)&sr->mc_vregs[32]))
return 1;
#endif /* CONFIG_ALTIVEC */
+ if (copy_fpr_from_user(current, &sr->mc_fregs))
+ return 1;
+
+#ifdef CONFIG_VSX
+ /*
+ * Force the process to reload the VSX registers from
+ * current->thread when it next does VSX instruction.
+ */
+ regs->msr &= ~MSR_VSX;
+ if (msr & MSR_VSX) {
+ /*
+ * Restore altivec registers from the stack to a local
+ * buffer, then write this out to the thread_struct
+ */
+ if (copy_vsx_from_user(current, &sr->mc_vsregs))
+ return 1;
+ } else if (current->thread.used_vsr)
+ for (i = 0; i < 32 ; i++)
+ current->thread.fpr[i][TS_VSRLOWOFFSET] = 0;
+#endif /* CONFIG_VSX */
+ /*
+ * force the process to reload the FP registers from
+ * current->thread when it next does FP instructions
+ */
+ regs->msr &= ~(MSR_FP | MSR_FE0 | MSR_FE1);
#ifdef CONFIG_SPE
/* force the process to reload the spe registers from
@@ -823,12 +937,42 @@ long sys_swapcontext(struct ucontext __user *old_ctx,
{
unsigned char tmp;
+#ifdef CONFIG_PPC64
+ unsigned long new_msr = 0;
+
+ if (new_ctx &&
+ __get_user(new_msr, &new_ctx->uc_mcontext.mc_gregs[PT_MSR]))
+ return -EFAULT;
+ /*
+ * Check that the context is not smaller than the original
+ * size (with VMX but without VSX)
+ */
+ if (ctx_size < UCONTEXTSIZEWITHOUTVSX)
+ return -EINVAL;
+ /*
+ * If the new context state sets the MSR VSX bits but
+ * it doesn't provide VSX state.
+ */
+ if ((ctx_size < sizeof(struct ucontext)) &&
+ (new_msr & MSR_VSX))
+ return -EINVAL;
+#ifdef CONFIG_VSX
+ /*
+ * If userspace doesn't provide enough room for VSX data,
+ * but current thread has used VSX, we don't have anywhere
+ * to store the full context back into.
+ */
+ if ((ctx_size < sizeof(struct ucontext)) &&
+ (current->thread.used_vsr && old_ctx))
+ return -EINVAL;
+#endif
+#else
/* Context size is for future use. Right now, we only make sure
* we are passed something we understand
*/
if (ctx_size < sizeof(struct ucontext))
return -EINVAL;
-
+#endif
if (old_ctx != NULL) {
struct mcontext __user *mctx;
diff --git a/arch/powerpc/kernel/signal_64.c b/arch/powerpc/kernel/signal_64.c
index da7c058e3731..65ad925c3a8f 100644
--- a/arch/powerpc/kernel/signal_64.c
+++ b/arch/powerpc/kernel/signal_64.c
@@ -112,11 +112,29 @@ static long setup_sigcontext(struct sigcontext __user *sc, struct pt_regs *regs,
#else /* CONFIG_ALTIVEC */
err |= __put_user(0, &sc->v_regs);
#endif /* CONFIG_ALTIVEC */
+ flush_fp_to_thread(current);
+ /* copy fpr regs and fpscr */
+ err |= copy_fpr_to_user(&sc->fp_regs, current);
+#ifdef CONFIG_VSX
+ /*
+ * Copy VSX low doubleword to local buffer for formatting,
+ * then out to userspace. Update v_regs to point after the
+ * VMX data.
+ */
+ if (current->thread.used_vsr) {
+ __giveup_vsx(current);
+ v_regs += ELF_NVRREG;
+ err |= copy_vsx_to_user(v_regs, current);
+ /* set MSR_VSX in the MSR value in the frame to
+ * indicate that sc->vs_reg) contains valid data.
+ */
+ msr |= MSR_VSX;
+ }
+#endif /* CONFIG_VSX */
err |= __put_user(&sc->gp_regs, &sc->regs);
WARN_ON(!FULL_REGS(regs));
err |= __copy_to_user(&sc->gp_regs, regs, GP_REGS_SIZE);
err |= __put_user(msr, &sc->gp_regs[PT_MSR]);
- err |= __copy_to_user(&sc->fp_regs, &current->thread.fpr, FP_REGS_SIZE);
err |= __put_user(signr, &sc->signal);
err |= __put_user(handler, &sc->handler);
if (set != NULL)
@@ -137,29 +155,32 @@ static long restore_sigcontext(struct pt_regs *regs, sigset_t *set, int sig,
#endif
unsigned long err = 0;
unsigned long save_r13 = 0;
- elf_greg_t *gregs = (elf_greg_t *)regs;
unsigned long msr;
+#ifdef CONFIG_VSX
int i;
+#endif
/* If this is not a signal return, we preserve the TLS in r13 */
if (!sig)
save_r13 = regs->gpr[13];
- /* copy everything before MSR */
- err |= __copy_from_user(regs, &sc->gp_regs,
- PT_MSR*sizeof(unsigned long));
-
+ /* copy the GPRs */
+ err |= __copy_from_user(regs->gpr, sc->gp_regs, sizeof(regs->gpr));
+ err |= __get_user(regs->nip, &sc->gp_regs[PT_NIP]);
/* get MSR separately, transfer the LE bit if doing signal return */
err |= __get_user(msr, &sc->gp_regs[PT_MSR]);
if (sig)
regs->msr = (regs->msr & ~MSR_LE) | (msr & MSR_LE);
-
+ err |= __get_user(regs->orig_gpr3, &sc->gp_regs[PT_ORIG_R3]);
+ err |= __get_user(regs->ctr, &sc->gp_regs[PT_CTR]);
+ err |= __get_user(regs->link, &sc->gp_regs[PT_LNK]);
+ err |= __get_user(regs->xer, &sc->gp_regs[PT_XER]);
+ err |= __get_user(regs->ccr, &sc->gp_regs[PT_CCR]);
/* skip SOFTE */
- for (i = PT_MSR+1; i <= PT_RESULT; i++) {
- if (i == PT_SOFTE)
- continue;
- err |= __get_user(gregs[i], &sc->gp_regs[i]);
- }
+ err |= __get_user(regs->trap, &sc->gp_regs[PT_TRAP]);
+ err |= __get_user(regs->dar, &sc->gp_regs[PT_DAR]);
+ err |= __get_user(regs->dsisr, &sc->gp_regs[PT_DSISR]);
+ err |= __get_user(regs->result, &sc->gp_regs[PT_RESULT]);
if (!sig)
regs->gpr[13] = save_r13;
@@ -180,9 +201,7 @@ static long restore_sigcontext(struct pt_regs *regs, sigset_t *set, int sig,
* This has to be done before copying stuff into current->thread.fpr/vr
* for the reasons explained in the previous comment.
*/
- regs->msr &= ~(MSR_FP | MSR_FE0 | MSR_FE1 | MSR_VEC);
-
- err |= __copy_from_user(&current->thread.fpr, &sc->fp_regs, FP_REGS_SIZE);
+ regs->msr &= ~(MSR_FP | MSR_FE0 | MSR_FE1 | MSR_VEC | MSR_VSX);
#ifdef CONFIG_ALTIVEC
err |= __get_user(v_regs, &sc->v_regs);
@@ -202,7 +221,23 @@ static long restore_sigcontext(struct pt_regs *regs, sigset_t *set, int sig,
else
current->thread.vrsave = 0;
#endif /* CONFIG_ALTIVEC */
+ /* restore floating point */
+ err |= copy_fpr_from_user(current, &sc->fp_regs);
+#ifdef CONFIG_VSX
+ /*
+ * Get additional VSX data. Update v_regs to point after the
+ * VMX data. Copy VSX low doubleword from userspace to local
+ * buffer for formatting, then into the taskstruct.
+ */
+ v_regs += ELF_NVRREG;
+ if ((msr & MSR_VSX) != 0)
+ err |= copy_vsx_from_user(current, v_regs);
+ else
+ for (i = 0; i < 32 ; i++)
+ current->thread.fpr[i][TS_VSRLOWOFFSET] = 0;
+#else
+#endif
return err;
}
@@ -233,6 +268,13 @@ static long setup_trampoline(unsigned int syscall, unsigned int __user *tramp)
}
/*
+ * Userspace code may pass a ucontext which doesn't include VSX added
+ * at the end. We need to check for this case.
+ */
+#define UCONTEXTSIZEWITHOUTVSX \
+ (sizeof(struct ucontext) - 32*sizeof(long))
+
+/*
* Handle {get,set,swap}_context operations
*/
int sys_swapcontext(struct ucontext __user *old_ctx,
@@ -241,13 +283,34 @@ int sys_swapcontext(struct ucontext __user *old_ctx,
{
unsigned char tmp;
sigset_t set;
+ unsigned long new_msr = 0;
- /* Context size is for future use. Right now, we only make sure
- * we are passed something we understand
+ if (new_ctx &&
+ __get_user(new_msr, &new_ctx->uc_mcontext.gp_regs[PT_MSR]))
+ return -EFAULT;
+ /*
+ * Check that the context is not smaller than the original
+ * size (with VMX but without VSX)
*/
- if (ctx_size < sizeof(struct ucontext))
+ if (ctx_size < UCONTEXTSIZEWITHOUTVSX)
return -EINVAL;
-
+ /*
+ * If the new context state sets the MSR VSX bits but
+ * it doesn't provide VSX state.
+ */
+ if ((ctx_size < sizeof(struct ucontext)) &&
+ (new_msr & MSR_VSX))
+ return -EINVAL;
+#ifdef CONFIG_VSX
+ /*
+ * If userspace doesn't provide enough room for VSX data,
+ * but current thread has used VSX, we don't have anywhere
+ * to store the full context back into.
+ */
+ if ((ctx_size < sizeof(struct ucontext)) &&
+ (current->thread.used_vsr && old_ctx))
+ return -EINVAL;
+#endif
if (old_ctx != NULL) {
if (!access_ok(VERIFY_WRITE, old_ctx, sizeof(*old_ctx))
|| setup_sigcontext(&old_ctx->uc_mcontext, regs, 0, NULL, 0)
diff --git a/arch/powerpc/kernel/smp.c b/arch/powerpc/kernel/smp.c
index 1457aa0a08f1..f5ae9fa222ea 100644
--- a/arch/powerpc/kernel/smp.c
+++ b/arch/powerpc/kernel/smp.c
@@ -72,12 +72,8 @@ struct smp_ops_t *smp_ops;
static volatile unsigned int cpu_callin_map[NR_CPUS];
-void smp_call_function_interrupt(void);
-
int smt_enabled_at_boot = 1;
-static int ipi_fail_ok;
-
static void (*crash_ipi_function_ptr)(struct pt_regs *) = NULL;
#ifdef CONFIG_PPC64
@@ -99,12 +95,15 @@ void smp_message_recv(int msg)
{
switch(msg) {
case PPC_MSG_CALL_FUNCTION:
- smp_call_function_interrupt();
+ generic_smp_call_function_interrupt();
break;
case PPC_MSG_RESCHEDULE:
/* XXX Do we have to do this? */
set_need_resched();
break;
+ case PPC_MSG_CALL_FUNC_SINGLE:
+ generic_smp_call_function_single_interrupt();
+ break;
case PPC_MSG_DEBUGGER_BREAK:
if (crash_ipi_function_ptr) {
crash_ipi_function_ptr(get_irq_regs());
@@ -128,6 +127,19 @@ void smp_send_reschedule(int cpu)
smp_ops->message_pass(cpu, PPC_MSG_RESCHEDULE);
}
+void arch_send_call_function_single_ipi(int cpu)
+{
+ smp_ops->message_pass(cpu, PPC_MSG_CALL_FUNC_SINGLE);
+}
+
+void arch_send_call_function_ipi(cpumask_t mask)
+{
+ unsigned int cpu;
+
+ for_each_cpu_mask(cpu, mask)
+ smp_ops->message_pass(cpu, PPC_MSG_CALL_FUNCTION);
+}
+
#ifdef CONFIG_DEBUGGER
void smp_send_debugger_break(int cpu)
{
@@ -154,223 +166,13 @@ static void stop_this_cpu(void *dummy)
;
}
-/*
- * Structure and data for smp_call_function(). This is designed to minimise
- * static memory requirements. It also looks cleaner.
- * Stolen from the i386 version.
- */
-static __cacheline_aligned_in_smp DEFINE_SPINLOCK(call_lock);
-
-static struct call_data_struct {
- void (*func) (void *info);
- void *info;
- atomic_t started;
- atomic_t finished;
- int wait;
-} *call_data;
-
-/* delay of at least 8 seconds */
-#define SMP_CALL_TIMEOUT 8
-
-/*
- * These functions send a 'generic call function' IPI to other online
- * CPUS in the system.
- *
- * [SUMMARY] Run a function on other CPUs.
- * <func> The function to run. This must be fast and non-blocking.
- * <info> An arbitrary pointer to pass to the function.
- * <nonatomic> currently unused.
- * <wait> If true, wait (atomically) until function has completed on other CPUs.
- * [RETURNS] 0 on success, else a negative status code. Does not return until
- * remote CPUs are nearly ready to execute <<func>> or are or have executed.
- * <map> is a cpu map of the cpus to send IPI to.
- *
- * You must not call this function with disabled interrupts or from a
- * hardware interrupt handler or from a bottom half handler.
- */
-static int __smp_call_function_map(void (*func) (void *info), void *info,
- int nonatomic, int wait, cpumask_t map)
-{
- struct call_data_struct data;
- int ret = -1, num_cpus;
- int cpu;
- u64 timeout;
-
- if (unlikely(smp_ops == NULL))
- return ret;
-
- data.func = func;
- data.info = info;
- atomic_set(&data.started, 0);
- data.wait = wait;
- if (wait)
- atomic_set(&data.finished, 0);
-
- /* remove 'self' from the map */
- if (cpu_isset(smp_processor_id(), map))
- cpu_clear(smp_processor_id(), map);
-
- /* sanity check the map, remove any non-online processors. */
- cpus_and(map, map, cpu_online_map);
-
- num_cpus = cpus_weight(map);
- if (!num_cpus)
- goto done;
-
- call_data = &data;
- smp_wmb();
- /* Send a message to all CPUs in the map */
- for_each_cpu_mask(cpu, map)
- smp_ops->message_pass(cpu, PPC_MSG_CALL_FUNCTION);
-
- timeout = get_tb() + (u64) SMP_CALL_TIMEOUT * tb_ticks_per_sec;
-
- /* Wait for indication that they have received the message */
- while (atomic_read(&data.started) != num_cpus) {
- HMT_low();
- if (get_tb() >= timeout) {
- printk("smp_call_function on cpu %d: other cpus not "
- "responding (%d)\n", smp_processor_id(),
- atomic_read(&data.started));
- if (!ipi_fail_ok)
- debugger(NULL);
- goto out;
- }
- }
-
- /* optionally wait for the CPUs to complete */
- if (wait) {
- while (atomic_read(&data.finished) != num_cpus) {
- HMT_low();
- if (get_tb() >= timeout) {
- printk("smp_call_function on cpu %d: other "
- "cpus not finishing (%d/%d)\n",
- smp_processor_id(),
- atomic_read(&data.finished),
- atomic_read(&data.started));
- debugger(NULL);
- goto out;
- }
- }
- }
-
- done:
- ret = 0;
-
- out:
- call_data = NULL;
- HMT_medium();
- return ret;
-}
-
-static int __smp_call_function(void (*func)(void *info), void *info,
- int nonatomic, int wait)
-{
- int ret;
- spin_lock(&call_lock);
- ret =__smp_call_function_map(func, info, nonatomic, wait,
- cpu_online_map);
- spin_unlock(&call_lock);
- return ret;
-}
-
-int smp_call_function(void (*func) (void *info), void *info, int nonatomic,
- int wait)
-{
- /* Can deadlock when called with interrupts disabled */
- WARN_ON(irqs_disabled());
-
- return __smp_call_function(func, info, nonatomic, wait);
-}
-EXPORT_SYMBOL(smp_call_function);
-
-int smp_call_function_single(int cpu, void (*func) (void *info), void *info,
- int nonatomic, int wait)
-{
- cpumask_t map = CPU_MASK_NONE;
- int ret = 0;
-
- /* Can deadlock when called with interrupts disabled */
- WARN_ON(irqs_disabled());
-
- if (!cpu_online(cpu))
- return -EINVAL;
-
- cpu_set(cpu, map);
- if (cpu != get_cpu()) {
- spin_lock(&call_lock);
- ret = __smp_call_function_map(func, info, nonatomic, wait, map);
- spin_unlock(&call_lock);
- } else {
- local_irq_disable();
- func(info);
- local_irq_enable();
- }
- put_cpu();
- return ret;
-}
-EXPORT_SYMBOL(smp_call_function_single);
-
void smp_send_stop(void)
{
- int nolock;
-
- /* It's OK to fail sending the IPI, since the alternative is to
- * be stuck forever waiting on the other CPU to take the interrupt.
- *
- * It's better to at least continue and go through reboot, since this
- * function is usually called at panic or reboot time in the first
- * place.
- */
- ipi_fail_ok = 1;
-
- /* Don't deadlock in case we got called through panic */
- nolock = !spin_trylock(&call_lock);
- __smp_call_function_map(stop_this_cpu, NULL, 1, 0, cpu_online_map);
- if (!nolock)
- spin_unlock(&call_lock);
-}
-
-void smp_call_function_interrupt(void)
-{
- void (*func) (void *info);
- void *info;
- int wait;
-
- /* call_data will be NULL if the sender timed out while
- * waiting on us to receive the call.
- */
- if (!call_data)
- return;
-
- func = call_data->func;
- info = call_data->info;
- wait = call_data->wait;
-
- if (!wait)
- smp_mb__before_atomic_inc();
-
- /*
- * Notify initiating CPU that I've grabbed the data and am
- * about to execute the function
- */
- atomic_inc(&call_data->started);
- /*
- * At this point the info structure may be out of scope unless wait==1
- */
- (*func)(info);
- if (wait) {
- smp_mb__before_atomic_inc();
- atomic_inc(&call_data->finished);
- }
+ smp_call_function(stop_this_cpu, NULL, 0);
}
-extern struct gettimeofday_struct do_gtod;
-
struct thread_info *current_set[NR_CPUS];
-DECLARE_PER_CPU(unsigned int, pvr);
-
static void __devinit smp_store_cpu_info(int id)
{
per_cpu(pvr, id) = mfspr(SPRN_PVR);
@@ -596,9 +398,9 @@ int __devinit start_secondary(void *unused)
secondary_cpu_time_init();
- spin_lock(&call_lock);
+ ipi_call_lock();
cpu_set(cpu, cpu_online_map);
- spin_unlock(&call_lock);
+ ipi_call_unlock();
local_irq_enable();
diff --git a/arch/powerpc/kernel/softemu8xx.c b/arch/powerpc/kernel/softemu8xx.c
index 67d6f6890edc..c906c4bf6835 100644
--- a/arch/powerpc/kernel/softemu8xx.c
+++ b/arch/powerpc/kernel/softemu8xx.c
@@ -124,7 +124,7 @@ int Soft_emulate_8xx(struct pt_regs *regs)
disp = instword & 0xffff;
ea = (u32 *)(regs->gpr[idxreg] + disp);
- ip = (u32 *)&current->thread.fpr[flreg];
+ ip = (u32 *)&current->thread.TS_FPR(flreg);
switch ( inst )
{
@@ -168,7 +168,7 @@ int Soft_emulate_8xx(struct pt_regs *regs)
break;
case FMR:
/* assume this is a fp move -- Cort */
- memcpy(ip, &current->thread.fpr[(instword>>11)&0x1f],
+ memcpy(ip, &current->thread.TS_FPR((instword>>11)&0x1f),
sizeof(double));
break;
default:
diff --git a/arch/powerpc/kernel/stacktrace.c b/arch/powerpc/kernel/stacktrace.c
index 962944038430..f2589645870a 100644
--- a/arch/powerpc/kernel/stacktrace.c
+++ b/arch/powerpc/kernel/stacktrace.c
@@ -10,33 +10,35 @@
* 2 of the License, or (at your option) any later version.
*/
+#include <linux/module.h>
#include <linux/sched.h>
#include <linux/stacktrace.h>
+#include <linux/module.h>
#include <asm/ptrace.h>
+#include <asm/processor.h>
/*
* Save stack-backtrace addresses into a stack_trace buffer.
*/
-void save_stack_trace(struct stack_trace *trace)
+static void save_context_stack(struct stack_trace *trace, unsigned long sp,
+ struct task_struct *tsk, int savesched)
{
- unsigned long sp;
-
- asm("mr %0,1" : "=r" (sp));
-
for (;;) {
unsigned long *stack = (unsigned long *) sp;
unsigned long newsp, ip;
- if (!validate_sp(sp, current, STACK_FRAME_OVERHEAD))
+ if (!validate_sp(sp, tsk, STACK_FRAME_OVERHEAD))
return;
newsp = stack[0];
ip = stack[STACK_FRAME_LR_SAVE];
- if (!trace->skip)
- trace->entries[trace->nr_entries++] = ip;
- else
- trace->skip--;
+ if (savesched || !in_sched_functions(ip)) {
+ if (!trace->skip)
+ trace->entries[trace->nr_entries++] = ip;
+ else
+ trace->skip--;
+ }
if (trace->nr_entries >= trace->max_entries)
return;
@@ -44,3 +46,19 @@ void save_stack_trace(struct stack_trace *trace)
sp = newsp;
}
}
+
+void save_stack_trace(struct stack_trace *trace)
+{
+ unsigned long sp;
+
+ asm("mr %0,1" : "=r" (sp));
+
+ save_context_stack(trace, sp, current, 1);
+}
+EXPORT_SYMBOL_GPL(save_stack_trace);
+
+void save_stack_trace_tsk(struct task_struct *tsk, struct stack_trace *trace)
+{
+ save_context_stack(trace, tsk->thread.ksp, tsk, 0);
+}
+EXPORT_SYMBOL_GPL(save_stack_trace_tsk);
diff --git a/arch/powerpc/kernel/suspend.c b/arch/powerpc/kernel/suspend.c
index 8cee57107541..6fc6328dc626 100644
--- a/arch/powerpc/kernel/suspend.c
+++ b/arch/powerpc/kernel/suspend.c
@@ -7,6 +7,7 @@
* Copyright (c) 2001 Patrick Mochel <mochel@osdl.org>
*/
+#include <linux/mm.h>
#include <asm/page.h>
/* References to section boundaries */
diff --git a/arch/powerpc/kernel/syscalls.c b/arch/powerpc/kernel/syscalls.c
index 4fe69ca24481..c04832c4a02e 100644
--- a/arch/powerpc/kernel/syscalls.c
+++ b/arch/powerpc/kernel/syscalls.c
@@ -143,6 +143,9 @@ static inline unsigned long do_mmap2(unsigned long addr, size_t len,
struct file * file = NULL;
unsigned long ret = -EINVAL;
+ if (!arch_validate_prot(prot))
+ goto out;
+
if (shift) {
if (off & ((1 << shift) - 1))
goto out;
diff --git a/arch/powerpc/kernel/sysfs.c b/arch/powerpc/kernel/sysfs.c
index c8127f832df0..800e5e9a087b 100644
--- a/arch/powerpc/kernel/sysfs.c
+++ b/arch/powerpc/kernel/sysfs.c
@@ -28,7 +28,9 @@ static DEFINE_PER_CPU(struct cpu, cpu_devices);
/* Time in microseconds we delay before sleeping in the idle loop */
DEFINE_PER_CPU(unsigned long, smt_snooze_delay) = { 100 };
-static ssize_t store_smt_snooze_delay(struct sys_device *dev, const char *buf,
+static ssize_t store_smt_snooze_delay(struct sys_device *dev,
+ struct sysdev_attribute *attr,
+ const char *buf,
size_t count)
{
struct cpu *cpu = container_of(dev, struct cpu, sysdev);
@@ -44,7 +46,9 @@ static ssize_t store_smt_snooze_delay(struct sys_device *dev, const char *buf,
return count;
}
-static ssize_t show_smt_snooze_delay(struct sys_device *dev, char *buf)
+static ssize_t show_smt_snooze_delay(struct sys_device *dev,
+ struct sysdev_attribute *attr,
+ char *buf)
{
struct cpu *cpu = container_of(dev, struct cpu, sysdev);
@@ -152,14 +156,17 @@ static unsigned long write_##NAME(unsigned long val) \
mtspr(ADDRESS, val); \
return 0; \
} \
-static ssize_t show_##NAME(struct sys_device *dev, char *buf) \
+static ssize_t show_##NAME(struct sys_device *dev, \
+ struct sysdev_attribute *attr, \
+ char *buf) \
{ \
struct cpu *cpu = container_of(dev, struct cpu, sysdev); \
unsigned long val = run_on_cpu(cpu->sysdev.id, read_##NAME, 0); \
return sprintf(buf, "%lx\n", val); \
} \
static ssize_t __used \
- store_##NAME(struct sys_device *dev, const char *buf, size_t count) \
+ store_##NAME(struct sys_device *dev, struct sysdev_attribute *attr, \
+ const char *buf, size_t count) \
{ \
struct cpu *cpu = container_of(dev, struct cpu, sysdev); \
unsigned long val; \
@@ -522,7 +529,8 @@ static void register_nodes(void)
#endif
/* Only valid if CPU is present. */
-static ssize_t show_physical_id(struct sys_device *dev, char *buf)
+static ssize_t show_physical_id(struct sys_device *dev,
+ struct sysdev_attribute *attr, char *buf)
{
struct cpu *cpu = container_of(dev, struct cpu, sysdev);
diff --git a/arch/powerpc/kernel/tau_6xx.c b/arch/powerpc/kernel/tau_6xx.c
index 368a4934f7ee..c3a56d65c5a9 100644
--- a/arch/powerpc/kernel/tau_6xx.c
+++ b/arch/powerpc/kernel/tau_6xx.c
@@ -192,7 +192,7 @@ static void tau_timeout_smp(unsigned long unused)
/* schedule ourselves to be run again */
mod_timer(&tau_timer, jiffies + shrink_timer) ;
- on_each_cpu(tau_timeout, NULL, 1, 0);
+ on_each_cpu(tau_timeout, NULL, 0);
}
/*
@@ -234,7 +234,7 @@ int __init TAU_init(void)
tau_timer.expires = jiffies + shrink_timer;
add_timer(&tau_timer);
- on_each_cpu(TAU_init_smp, NULL, 1, 0);
+ on_each_cpu(TAU_init_smp, NULL, 0);
printk("Thermal assist unit ");
#ifdef CONFIG_TAU_INT
diff --git a/arch/powerpc/kernel/time.c b/arch/powerpc/kernel/time.c
index 73401e83739a..e2ee66b5831d 100644
--- a/arch/powerpc/kernel/time.c
+++ b/arch/powerpc/kernel/time.c
@@ -129,7 +129,7 @@ static unsigned long __initdata iSeries_recal_titan;
static signed long __initdata iSeries_recal_tb;
/* Forward declaration is only needed for iSereis compiles */
-void __init clocksource_init(void);
+static void __init clocksource_init(void);
#endif
#define XSEC_PER_SEC (1024*1024)
@@ -150,8 +150,8 @@ u64 tb_to_xs;
unsigned tb_to_us;
#define TICKLEN_SCALE NTP_SCALE_SHIFT
-u64 last_tick_len; /* units are ns / 2^TICKLEN_SCALE */
-u64 ticklen_to_xs; /* 0.64 fraction */
+static u64 last_tick_len; /* units are ns / 2^TICKLEN_SCALE */
+static u64 ticklen_to_xs; /* 0.64 fraction */
/* If last_tick_len corresponds to about 1/HZ seconds, then
last_tick_len << TICKLEN_SHIFT will be about 2^63. */
@@ -164,7 +164,7 @@ static u64 tb_to_ns_scale __read_mostly;
static unsigned tb_to_ns_shift __read_mostly;
static unsigned long boot_tb __read_mostly;
-struct gettimeofday_struct do_gtod;
+static struct gettimeofday_struct do_gtod;
extern struct timezone sys_tz;
static long timezone_offset;
@@ -322,7 +322,7 @@ void snapshot_timebases(void)
{
if (!cpu_has_feature(CPU_FTR_PURR))
return;
- on_each_cpu(snapshot_tb_and_purr, NULL, 0, 1);
+ on_each_cpu(snapshot_tb_and_purr, NULL, 1);
}
/*
@@ -742,10 +742,6 @@ void __init generic_calibrate_decr(void)
}
#if defined(CONFIG_BOOKE) || defined(CONFIG_40x)
- /* Set the time base to zero */
- mtspr(SPRN_TBWL, 0);
- mtspr(SPRN_TBWU, 0);
-
/* Clear any pending timer interrupts */
mtspr(SPRN_TSR, TSR_ENW | TSR_WIS | TSR_DIS | TSR_FIS);
@@ -832,7 +828,7 @@ void update_vsyscall_tz(void)
++vdso_data->tb_update_count;
}
-void __init clocksource_init(void)
+static void __init clocksource_init(void)
{
struct clocksource *clock;
diff --git a/arch/powerpc/kernel/traps.c b/arch/powerpc/kernel/traps.c
index 4b5b7ff4f78b..81ccb8dd1a54 100644
--- a/arch/powerpc/kernel/traps.c
+++ b/arch/powerpc/kernel/traps.c
@@ -967,6 +967,20 @@ void altivec_unavailable_exception(struct pt_regs *regs)
die("Unrecoverable VMX/Altivec Unavailable Exception", regs, SIGABRT);
}
+void vsx_unavailable_exception(struct pt_regs *regs)
+{
+ if (user_mode(regs)) {
+ /* A user program has executed an vsx instruction,
+ but this kernel doesn't support vsx. */
+ _exception(SIGILL, regs, ILL_ILLOPC, regs->nip);
+ return;
+ }
+
+ printk(KERN_EMERG "Unrecoverable VSX Unavailable Exception "
+ "%lx at %lx\n", regs->trap, regs->nip);
+ die("Unrecoverable VSX Unavailable Exception", regs, SIGABRT);
+}
+
void performance_monitor_exception(struct pt_regs *regs)
{
perf_irq(regs);
@@ -1030,21 +1044,45 @@ void SoftwareEmulation(struct pt_regs *regs)
#if defined(CONFIG_40x) || defined(CONFIG_BOOKE)
-void DebugException(struct pt_regs *regs, unsigned long debug_status)
+void __kprobes DebugException(struct pt_regs *regs, unsigned long debug_status)
{
if (debug_status & DBSR_IC) { /* instruction completion */
regs->msr &= ~MSR_DE;
+
+ /* Disable instruction completion */
+ mtspr(SPRN_DBCR0, mfspr(SPRN_DBCR0) & ~DBCR0_IC);
+ /* Clear the instruction completion event */
+ mtspr(SPRN_DBSR, DBSR_IC);
+
+ if (notify_die(DIE_SSTEP, "single_step", regs, 5,
+ 5, SIGTRAP) == NOTIFY_STOP) {
+ return;
+ }
+
+ if (debugger_sstep(regs))
+ return;
+
if (user_mode(regs)) {
current->thread.dbcr0 &= ~DBCR0_IC;
+ }
+
+ _exception(SIGTRAP, regs, TRAP_TRACE, regs->nip);
+ } else if (debug_status & (DBSR_DAC1R | DBSR_DAC1W)) {
+ regs->msr &= ~MSR_DE;
+
+ if (user_mode(regs)) {
+ current->thread.dbcr0 &= ~(DBSR_DAC1R | DBSR_DAC1W |
+ DBCR0_IDM);
} else {
- /* Disable instruction completion */
- mtspr(SPRN_DBCR0, mfspr(SPRN_DBCR0) & ~DBCR0_IC);
- /* Clear the instruction completion event */
- mtspr(SPRN_DBSR, DBSR_IC);
- if (debugger_sstep(regs))
- return;
+ /* Disable DAC interupts */
+ mtspr(SPRN_DBCR0, mfspr(SPRN_DBCR0) & ~(DBSR_DAC1R |
+ DBSR_DAC1W | DBCR0_IDM));
+
+ /* Clear the DAC event */
+ mtspr(SPRN_DBSR, (DBSR_DAC1R | DBSR_DAC1W));
}
- _exception(SIGTRAP, regs, TRAP_TRACE, 0);
+ /* Setup and send the trap to the handler */
+ do_dabr(regs, mfspr(SPRN_DAC1), debug_status);
}
}
#endif /* CONFIG_4xx || CONFIG_BOOKE */
@@ -1091,6 +1129,21 @@ void altivec_assist_exception(struct pt_regs *regs)
}
#endif /* CONFIG_ALTIVEC */
+#ifdef CONFIG_VSX
+void vsx_assist_exception(struct pt_regs *regs)
+{
+ if (!user_mode(regs)) {
+ printk(KERN_EMERG "VSX assist exception in kernel mode"
+ " at %lx\n", regs->nip);
+ die("Kernel VSX assist exception", regs, SIGILL);
+ }
+
+ flush_vsx_to_thread(current);
+ printk(KERN_INFO "VSX assist not supported at %lx\n", regs->nip);
+ _exception(SIGILL, regs, ILL_ILLOPC, regs->nip);
+}
+#endif /* CONFIG_VSX */
+
#ifdef CONFIG_FSL_BOOKE
void CacheLockingException(struct pt_regs *regs, unsigned long address,
unsigned long error_code)
diff --git a/arch/powerpc/kernel/vdso.c b/arch/powerpc/kernel/vdso.c
index ce245a850db2..f177c60ea766 100644
--- a/arch/powerpc/kernel/vdso.c
+++ b/arch/powerpc/kernel/vdso.c
@@ -571,6 +571,11 @@ static __init int vdso_fixup_features(struct lib32_elfinfo *v32,
if (start64)
do_feature_fixups(powerpc_firmware_features,
start64, start64 + size64);
+
+ start64 = find_section64(v64->hdr, "__lwsync_fixup", &size64);
+ if (start64)
+ do_lwsync_fixups(cur_cpu_spec->cpu_features,
+ start64, start64 + size64);
#endif /* CONFIG_PPC64 */
start32 = find_section32(v32->hdr, "__ftr_fixup", &size32);
@@ -585,6 +590,11 @@ static __init int vdso_fixup_features(struct lib32_elfinfo *v32,
start32, start32 + size32);
#endif /* CONFIG_PPC64 */
+ start32 = find_section32(v32->hdr, "__lwsync_fixup", &size32);
+ if (start32)
+ do_lwsync_fixups(cur_cpu_spec->cpu_features,
+ start32, start32 + size32);
+
return 0;
}
diff --git a/arch/powerpc/kernel/vdso32/vdso32.lds.S b/arch/powerpc/kernel/vdso32/vdso32.lds.S
index 9352ab5200e5..be3b6a41dc09 100644
--- a/arch/powerpc/kernel/vdso32/vdso32.lds.S
+++ b/arch/powerpc/kernel/vdso32/vdso32.lds.S
@@ -24,7 +24,7 @@ SECTIONS
. = ALIGN(16);
.text : {
- *(.text .stub .text.* .gnu.linkonce.t.*)
+ *(.text .stub .text.* .gnu.linkonce.t.* __ftr_alt_*)
}
PROVIDE(__etext = .);
PROVIDE(_etext = .);
@@ -33,6 +33,9 @@ SECTIONS
. = ALIGN(8);
__ftr_fixup : { *(__ftr_fixup) }
+ . = ALIGN(8);
+ __lwsync_fixup : { *(__lwsync_fixup) }
+
#ifdef CONFIG_PPC64
. = ALIGN(8);
__fw_ftr_fixup : { *(__fw_ftr_fixup) }
diff --git a/arch/powerpc/kernel/vdso64/vdso64.lds.S b/arch/powerpc/kernel/vdso64/vdso64.lds.S
index 932b3fdb34b9..d0b2526dd38d 100644
--- a/arch/powerpc/kernel/vdso64/vdso64.lds.S
+++ b/arch/powerpc/kernel/vdso64/vdso64.lds.S
@@ -24,7 +24,7 @@ SECTIONS
. = ALIGN(16);
.text : {
- *(.text .stub .text.* .gnu.linkonce.t.*)
+ *(.text .stub .text.* .gnu.linkonce.t.* __ftr_alt_*)
*(.sfpr .glink)
} :text
PROVIDE(__etext = .);
@@ -35,6 +35,9 @@ SECTIONS
__ftr_fixup : { *(__ftr_fixup) }
. = ALIGN(8);
+ __lwsync_fixup : { *(__lwsync_fixup) }
+
+ . = ALIGN(8);
__fw_ftr_fixup : { *(__fw_ftr_fixup) }
/*
@@ -43,15 +46,15 @@ SECTIONS
.rodata : { *(.rodata .rodata.* .gnu.linkonce.r.*) }
.rodata1 : { *(.rodata1) }
+ .dynamic : { *(.dynamic) } :text :dynamic
+
.eh_frame_hdr : { *(.eh_frame_hdr) } :text :eh_frame_hdr
.eh_frame : { KEEP (*(.eh_frame)) } :text
.gcc_except_table : { *(.gcc_except_table) }
+ .rela.dyn ALIGN(8) : { *(.rela.dyn) }
.opd ALIGN(8) : { KEEP (*(.opd)) }
.got ALIGN(8) : { *(.got .toc) }
- .rela.dyn ALIGN(8) : { *(.rela.dyn) }
-
- .dynamic : { *(.dynamic) } :text :dynamic
_end = .;
PROVIDE(end = .);
diff --git a/arch/powerpc/kernel/vio.c b/arch/powerpc/kernel/vio.c
index b77f8af7ddde..ade8aeaa2e70 100644
--- a/arch/powerpc/kernel/vio.c
+++ b/arch/powerpc/kernel/vio.c
@@ -1,11 +1,12 @@
/*
* IBM PowerPC Virtual I/O Infrastructure Support.
*
- * Copyright (c) 2003-2005 IBM Corp.
+ * Copyright (c) 2003,2008 IBM Corp.
* Dave Engebretsen engebret@us.ibm.com
* Santiago Leon santil@us.ibm.com
* Hollis Blanchard <hollisb@us.ibm.com>
* Stephen Rothwell
+ * Robert Jennings <rcjenn@us.ibm.com>
*
* This program is free software; you can redistribute it and/or
* modify it under the terms of the GNU General Public License
@@ -46,6 +47,996 @@ static struct vio_dev vio_bus_device = { /* fake "parent" device */
.dev.bus = &vio_bus_type,
};
+#ifdef CONFIG_PPC_SMLPAR
+/**
+ * vio_cmo_pool - A pool of IO memory for CMO use
+ *
+ * @size: The size of the pool in bytes
+ * @free: The amount of free memory in the pool
+ */
+struct vio_cmo_pool {
+ size_t size;
+ size_t free;
+};
+
+/* How many ms to delay queued balance work */
+#define VIO_CMO_BALANCE_DELAY 100
+
+/* Portion out IO memory to CMO devices by this chunk size */
+#define VIO_CMO_BALANCE_CHUNK 131072
+
+/**
+ * vio_cmo_dev_entry - A device that is CMO-enabled and requires entitlement
+ *
+ * @vio_dev: struct vio_dev pointer
+ * @list: pointer to other devices on bus that are being tracked
+ */
+struct vio_cmo_dev_entry {
+ struct vio_dev *viodev;
+ struct list_head list;
+};
+
+/**
+ * vio_cmo - VIO bus accounting structure for CMO entitlement
+ *
+ * @lock: spinlock for entire structure
+ * @balance_q: work queue for balancing system entitlement
+ * @device_list: list of CMO-enabled devices requiring entitlement
+ * @entitled: total system entitlement in bytes
+ * @reserve: pool of memory from which devices reserve entitlement, incl. spare
+ * @excess: pool of excess entitlement not needed for device reserves or spare
+ * @spare: IO memory for device hotplug functionality
+ * @min: minimum necessary for system operation
+ * @desired: desired memory for system operation
+ * @curr: bytes currently allocated
+ * @high: high water mark for IO data usage
+ */
+struct vio_cmo {
+ spinlock_t lock;
+ struct delayed_work balance_q;
+ struct list_head device_list;
+ size_t entitled;
+ struct vio_cmo_pool reserve;
+ struct vio_cmo_pool excess;
+ size_t spare;
+ size_t min;
+ size_t desired;
+ size_t curr;
+ size_t high;
+} vio_cmo;
+
+/**
+ * vio_cmo_OF_devices - Count the number of OF devices that have DMA windows
+ */
+static int vio_cmo_num_OF_devs(void)
+{
+ struct device_node *node_vroot;
+ int count = 0;
+
+ /*
+ * Count the number of vdevice entries with an
+ * ibm,my-dma-window OF property
+ */
+ node_vroot = of_find_node_by_name(NULL, "vdevice");
+ if (node_vroot) {
+ struct device_node *of_node;
+ struct property *prop;
+
+ for_each_child_of_node(node_vroot, of_node) {
+ prop = of_find_property(of_node, "ibm,my-dma-window",
+ NULL);
+ if (prop)
+ count++;
+ }
+ }
+ of_node_put(node_vroot);
+ return count;
+}
+
+/**
+ * vio_cmo_alloc - allocate IO memory for CMO-enable devices
+ *
+ * @viodev: VIO device requesting IO memory
+ * @size: size of allocation requested
+ *
+ * Allocations come from memory reserved for the devices and any excess
+ * IO memory available to all devices. The spare pool used to service
+ * hotplug must be equal to %VIO_CMO_MIN_ENT for the excess pool to be
+ * made available.
+ *
+ * Return codes:
+ * 0 for successful allocation and -ENOMEM for a failure
+ */
+static inline int vio_cmo_alloc(struct vio_dev *viodev, size_t size)
+{
+ unsigned long flags;
+ size_t reserve_free = 0;
+ size_t excess_free = 0;
+ int ret = -ENOMEM;
+
+ spin_lock_irqsave(&vio_cmo.lock, flags);
+
+ /* Determine the amount of free entitlement available in reserve */
+ if (viodev->cmo.entitled > viodev->cmo.allocated)
+ reserve_free = viodev->cmo.entitled - viodev->cmo.allocated;
+
+ /* If spare is not fulfilled, the excess pool can not be used. */
+ if (vio_cmo.spare >= VIO_CMO_MIN_ENT)
+ excess_free = vio_cmo.excess.free;
+
+ /* The request can be satisfied */
+ if ((reserve_free + excess_free) >= size) {
+ vio_cmo.curr += size;
+ if (vio_cmo.curr > vio_cmo.high)
+ vio_cmo.high = vio_cmo.curr;
+ viodev->cmo.allocated += size;
+ size -= min(reserve_free, size);
+ vio_cmo.excess.free -= size;
+ ret = 0;
+ }
+
+ spin_unlock_irqrestore(&vio_cmo.lock, flags);
+ return ret;
+}
+
+/**
+ * vio_cmo_dealloc - deallocate IO memory from CMO-enable devices
+ * @viodev: VIO device freeing IO memory
+ * @size: size of deallocation
+ *
+ * IO memory is freed by the device back to the correct memory pools.
+ * The spare pool is replenished first from either memory pool, then
+ * the reserve pool is used to reduce device entitlement, the excess
+ * pool is used to increase the reserve pool toward the desired entitlement
+ * target, and then the remaining memory is returned to the pools.
+ *
+ */
+static inline void vio_cmo_dealloc(struct vio_dev *viodev, size_t size)
+{
+ unsigned long flags;
+ size_t spare_needed = 0;
+ size_t excess_freed = 0;
+ size_t reserve_freed = size;
+ size_t tmp;
+ int balance = 0;
+
+ spin_lock_irqsave(&vio_cmo.lock, flags);
+ vio_cmo.curr -= size;
+
+ /* Amount of memory freed from the excess pool */
+ if (viodev->cmo.allocated > viodev->cmo.entitled) {
+ excess_freed = min(reserve_freed, (viodev->cmo.allocated -
+ viodev->cmo.entitled));
+ reserve_freed -= excess_freed;
+ }
+
+ /* Remove allocation from device */
+ viodev->cmo.allocated -= (reserve_freed + excess_freed);
+
+ /* Spare is a subset of the reserve pool, replenish it first. */
+ spare_needed = VIO_CMO_MIN_ENT - vio_cmo.spare;
+
+ /*
+ * Replenish the spare in the reserve pool from the excess pool.
+ * This moves entitlement into the reserve pool.
+ */
+ if (spare_needed && excess_freed) {
+ tmp = min(excess_freed, spare_needed);
+ vio_cmo.excess.size -= tmp;
+ vio_cmo.reserve.size += tmp;
+ vio_cmo.spare += tmp;
+ excess_freed -= tmp;
+ spare_needed -= tmp;
+ balance = 1;
+ }
+
+ /*
+ * Replenish the spare in the reserve pool from the reserve pool.
+ * This removes entitlement from the device down to VIO_CMO_MIN_ENT,
+ * if needed, and gives it to the spare pool. The amount of used
+ * memory in this pool does not change.
+ */
+ if (spare_needed && reserve_freed) {
+ tmp = min(spare_needed, min(reserve_freed,
+ (viodev->cmo.entitled -
+ VIO_CMO_MIN_ENT)));
+
+ vio_cmo.spare += tmp;
+ viodev->cmo.entitled -= tmp;
+ reserve_freed -= tmp;
+ spare_needed -= tmp;
+ balance = 1;
+ }
+
+ /*
+ * Increase the reserve pool until the desired allocation is met.
+ * Move an allocation freed from the excess pool into the reserve
+ * pool and schedule a balance operation.
+ */
+ if (excess_freed && (vio_cmo.desired > vio_cmo.reserve.size)) {
+ tmp = min(excess_freed, (vio_cmo.desired - vio_cmo.reserve.size));
+
+ vio_cmo.excess.size -= tmp;
+ vio_cmo.reserve.size += tmp;
+ excess_freed -= tmp;
+ balance = 1;
+ }
+
+ /* Return memory from the excess pool to that pool */
+ if (excess_freed)
+ vio_cmo.excess.free += excess_freed;
+
+ if (balance)
+ schedule_delayed_work(&vio_cmo.balance_q, VIO_CMO_BALANCE_DELAY);
+ spin_unlock_irqrestore(&vio_cmo.lock, flags);
+}
+
+/**
+ * vio_cmo_entitlement_update - Manage system entitlement changes
+ *
+ * @new_entitlement: new system entitlement to attempt to accommodate
+ *
+ * Increases in entitlement will be used to fulfill the spare entitlement
+ * and the rest is given to the excess pool. Decreases, if they are
+ * possible, come from the excess pool and from unused device entitlement
+ *
+ * Returns: 0 on success, -ENOMEM when change can not be made
+ */
+int vio_cmo_entitlement_update(size_t new_entitlement)
+{
+ struct vio_dev *viodev;
+ struct vio_cmo_dev_entry *dev_ent;
+ unsigned long flags;
+ size_t avail, delta, tmp;
+
+ spin_lock_irqsave(&vio_cmo.lock, flags);
+
+ /* Entitlement increases */
+ if (new_entitlement > vio_cmo.entitled) {
+ delta = new_entitlement - vio_cmo.entitled;
+
+ /* Fulfill spare allocation */
+ if (vio_cmo.spare < VIO_CMO_MIN_ENT) {
+ tmp = min(delta, (VIO_CMO_MIN_ENT - vio_cmo.spare));
+ vio_cmo.spare += tmp;
+ vio_cmo.reserve.size += tmp;
+ delta -= tmp;
+ }
+
+ /* Remaining new allocation goes to the excess pool */
+ vio_cmo.entitled += delta;
+ vio_cmo.excess.size += delta;
+ vio_cmo.excess.free += delta;
+
+ goto out;
+ }
+
+ /* Entitlement decreases */
+ delta = vio_cmo.entitled - new_entitlement;
+ avail = vio_cmo.excess.free;
+
+ /*
+ * Need to check how much unused entitlement each device can
+ * sacrifice to fulfill entitlement change.
+ */
+ list_for_each_entry(dev_ent, &vio_cmo.device_list, list) {
+ if (avail >= delta)
+ break;
+
+ viodev = dev_ent->viodev;
+ if ((viodev->cmo.entitled > viodev->cmo.allocated) &&
+ (viodev->cmo.entitled > VIO_CMO_MIN_ENT))
+ avail += viodev->cmo.entitled -
+ max_t(size_t, viodev->cmo.allocated,
+ VIO_CMO_MIN_ENT);
+ }
+
+ if (delta <= avail) {
+ vio_cmo.entitled -= delta;
+
+ /* Take entitlement from the excess pool first */
+ tmp = min(vio_cmo.excess.free, delta);
+ vio_cmo.excess.size -= tmp;
+ vio_cmo.excess.free -= tmp;
+ delta -= tmp;
+
+ /*
+ * Remove all but VIO_CMO_MIN_ENT bytes from devices
+ * until entitlement change is served
+ */
+ list_for_each_entry(dev_ent, &vio_cmo.device_list, list) {
+ if (!delta)
+ break;
+
+ viodev = dev_ent->viodev;
+ tmp = 0;
+ if ((viodev->cmo.entitled > viodev->cmo.allocated) &&
+ (viodev->cmo.entitled > VIO_CMO_MIN_ENT))
+ tmp = viodev->cmo.entitled -
+ max_t(size_t, viodev->cmo.allocated,
+ VIO_CMO_MIN_ENT);
+ viodev->cmo.entitled -= min(tmp, delta);
+ delta -= min(tmp, delta);
+ }
+ } else {
+ spin_unlock_irqrestore(&vio_cmo.lock, flags);
+ return -ENOMEM;
+ }
+
+out:
+ schedule_delayed_work(&vio_cmo.balance_q, 0);
+ spin_unlock_irqrestore(&vio_cmo.lock, flags);
+ return 0;
+}
+
+/**
+ * vio_cmo_balance - Balance entitlement among devices
+ *
+ * @work: work queue structure for this operation
+ *
+ * Any system entitlement above the minimum needed for devices, or
+ * already allocated to devices, can be distributed to the devices.
+ * The list of devices is iterated through to recalculate the desired
+ * entitlement level and to determine how much entitlement above the
+ * minimum entitlement is allocated to devices.
+ *
+ * Small chunks of the available entitlement are given to devices until
+ * their requirements are fulfilled or there is no entitlement left to give.
+ * Upon completion sizes of the reserve and excess pools are calculated.
+ *
+ * The system minimum entitlement level is also recalculated here.
+ * Entitlement will be reserved for devices even after vio_bus_remove to
+ * accommodate reloading the driver. The OF tree is walked to count the
+ * number of devices present and this will remove entitlement for devices
+ * that have actually left the system after having vio_bus_remove called.
+ */
+static void vio_cmo_balance(struct work_struct *work)
+{
+ struct vio_cmo *cmo;
+ struct vio_dev *viodev;
+ struct vio_cmo_dev_entry *dev_ent;
+ unsigned long flags;
+ size_t avail = 0, level, chunk, need;
+ int devcount = 0, fulfilled;
+
+ cmo = container_of(work, struct vio_cmo, balance_q.work);
+
+ spin_lock_irqsave(&vio_cmo.lock, flags);
+
+ /* Calculate minimum entitlement and fulfill spare */
+ cmo->min = vio_cmo_num_OF_devs() * VIO_CMO_MIN_ENT;
+ BUG_ON(cmo->min > cmo->entitled);
+ cmo->spare = min_t(size_t, VIO_CMO_MIN_ENT, (cmo->entitled - cmo->min));
+ cmo->min += cmo->spare;
+ cmo->desired = cmo->min;
+
+ /*
+ * Determine how much entitlement is available and reset device
+ * entitlements
+ */
+ avail = cmo->entitled - cmo->spare;
+ list_for_each_entry(dev_ent, &vio_cmo.device_list, list) {
+ viodev = dev_ent->viodev;
+ devcount++;
+ viodev->cmo.entitled = VIO_CMO_MIN_ENT;
+ cmo->desired += (viodev->cmo.desired - VIO_CMO_MIN_ENT);
+ avail -= max_t(size_t, viodev->cmo.allocated, VIO_CMO_MIN_ENT);
+ }
+
+ /*
+ * Having provided each device with the minimum entitlement, loop
+ * over the devices portioning out the remaining entitlement
+ * until there is nothing left.
+ */
+ level = VIO_CMO_MIN_ENT;
+ while (avail) {
+ fulfilled = 0;
+ list_for_each_entry(dev_ent, &vio_cmo.device_list, list) {
+ viodev = dev_ent->viodev;
+
+ if (viodev->cmo.desired <= level) {
+ fulfilled++;
+ continue;
+ }
+
+ /*
+ * Give the device up to VIO_CMO_BALANCE_CHUNK
+ * bytes of entitlement, but do not exceed the
+ * desired level of entitlement for the device.
+ */
+ chunk = min_t(size_t, avail, VIO_CMO_BALANCE_CHUNK);
+ chunk = min(chunk, (viodev->cmo.desired -
+ viodev->cmo.entitled));
+ viodev->cmo.entitled += chunk;
+
+ /*
+ * If the memory for this entitlement increase was
+ * already allocated to the device it does not come
+ * from the available pool being portioned out.
+ */
+ need = max(viodev->cmo.allocated, viodev->cmo.entitled)-
+ max(viodev->cmo.allocated, level);
+ avail -= need;
+
+ }
+ if (fulfilled == devcount)
+ break;
+ level += VIO_CMO_BALANCE_CHUNK;
+ }
+
+ /* Calculate new reserve and excess pool sizes */
+ cmo->reserve.size = cmo->min;
+ cmo->excess.free = 0;
+ cmo->excess.size = 0;
+ need = 0;
+ list_for_each_entry(dev_ent, &vio_cmo.device_list, list) {
+ viodev = dev_ent->viodev;
+ /* Calculated reserve size above the minimum entitlement */
+ if (viodev->cmo.entitled)
+ cmo->reserve.size += (viodev->cmo.entitled -
+ VIO_CMO_MIN_ENT);
+ /* Calculated used excess entitlement */
+ if (viodev->cmo.allocated > viodev->cmo.entitled)
+ need += viodev->cmo.allocated - viodev->cmo.entitled;
+ }
+ cmo->excess.size = cmo->entitled - cmo->reserve.size;
+ cmo->excess.free = cmo->excess.size - need;
+
+ cancel_delayed_work(container_of(work, struct delayed_work, work));
+ spin_unlock_irqrestore(&vio_cmo.lock, flags);
+}
+
+static void *vio_dma_iommu_alloc_coherent(struct device *dev, size_t size,
+ dma_addr_t *dma_handle, gfp_t flag)
+{
+ struct vio_dev *viodev = to_vio_dev(dev);
+ void *ret;
+
+ if (vio_cmo_alloc(viodev, roundup(size, IOMMU_PAGE_SIZE))) {
+ atomic_inc(&viodev->cmo.allocs_failed);
+ return NULL;
+ }
+
+ ret = dma_iommu_ops.alloc_coherent(dev, size, dma_handle, flag);
+ if (unlikely(ret == NULL)) {
+ vio_cmo_dealloc(viodev, roundup(size, IOMMU_PAGE_SIZE));
+ atomic_inc(&viodev->cmo.allocs_failed);
+ }
+
+ return ret;
+}
+
+static void vio_dma_iommu_free_coherent(struct device *dev, size_t size,
+ void *vaddr, dma_addr_t dma_handle)
+{
+ struct vio_dev *viodev = to_vio_dev(dev);
+
+ dma_iommu_ops.free_coherent(dev, size, vaddr, dma_handle);
+
+ vio_cmo_dealloc(viodev, roundup(size, IOMMU_PAGE_SIZE));
+}
+
+static dma_addr_t vio_dma_iommu_map_single(struct device *dev, void *vaddr,
+ size_t size,
+ enum dma_data_direction direction,
+ struct dma_attrs *attrs)
+{
+ struct vio_dev *viodev = to_vio_dev(dev);
+ dma_addr_t ret = DMA_ERROR_CODE;
+
+ if (vio_cmo_alloc(viodev, roundup(size, IOMMU_PAGE_SIZE))) {
+ atomic_inc(&viodev->cmo.allocs_failed);
+ return ret;
+ }
+
+ ret = dma_iommu_ops.map_single(dev, vaddr, size, direction, attrs);
+ if (unlikely(dma_mapping_error(ret))) {
+ vio_cmo_dealloc(viodev, roundup(size, IOMMU_PAGE_SIZE));
+ atomic_inc(&viodev->cmo.allocs_failed);
+ }
+
+ return ret;
+}
+
+static void vio_dma_iommu_unmap_single(struct device *dev,
+ dma_addr_t dma_handle, size_t size,
+ enum dma_data_direction direction,
+ struct dma_attrs *attrs)
+{
+ struct vio_dev *viodev = to_vio_dev(dev);
+
+ dma_iommu_ops.unmap_single(dev, dma_handle, size, direction, attrs);
+
+ vio_cmo_dealloc(viodev, roundup(size, IOMMU_PAGE_SIZE));
+}
+
+static int vio_dma_iommu_map_sg(struct device *dev, struct scatterlist *sglist,
+ int nelems, enum dma_data_direction direction,
+ struct dma_attrs *attrs)
+{
+ struct vio_dev *viodev = to_vio_dev(dev);
+ struct scatterlist *sgl;
+ int ret, count = 0;
+ size_t alloc_size = 0;
+
+ for (sgl = sglist; count < nelems; count++, sgl++)
+ alloc_size += roundup(sgl->length, IOMMU_PAGE_SIZE);
+
+ if (vio_cmo_alloc(viodev, alloc_size)) {
+ atomic_inc(&viodev->cmo.allocs_failed);
+ return 0;
+ }
+
+ ret = dma_iommu_ops.map_sg(dev, sglist, nelems, direction, attrs);
+
+ if (unlikely(!ret)) {
+ vio_cmo_dealloc(viodev, alloc_size);
+ atomic_inc(&viodev->cmo.allocs_failed);
+ }
+
+ for (sgl = sglist, count = 0; count < ret; count++, sgl++)
+ alloc_size -= roundup(sgl->dma_length, IOMMU_PAGE_SIZE);
+ if (alloc_size)
+ vio_cmo_dealloc(viodev, alloc_size);
+
+ return ret;
+}
+
+static void vio_dma_iommu_unmap_sg(struct device *dev,
+ struct scatterlist *sglist, int nelems,
+ enum dma_data_direction direction,
+ struct dma_attrs *attrs)
+{
+ struct vio_dev *viodev = to_vio_dev(dev);
+ struct scatterlist *sgl;
+ size_t alloc_size = 0;
+ int count = 0;
+
+ for (sgl = sglist; count < nelems; count++, sgl++)
+ alloc_size += roundup(sgl->dma_length, IOMMU_PAGE_SIZE);
+
+ dma_iommu_ops.unmap_sg(dev, sglist, nelems, direction, attrs);
+
+ vio_cmo_dealloc(viodev, alloc_size);
+}
+
+struct dma_mapping_ops vio_dma_mapping_ops = {
+ .alloc_coherent = vio_dma_iommu_alloc_coherent,
+ .free_coherent = vio_dma_iommu_free_coherent,
+ .map_single = vio_dma_iommu_map_single,
+ .unmap_single = vio_dma_iommu_unmap_single,
+ .map_sg = vio_dma_iommu_map_sg,
+ .unmap_sg = vio_dma_iommu_unmap_sg,
+};
+
+/**
+ * vio_cmo_set_dev_desired - Set desired entitlement for a device
+ *
+ * @viodev: struct vio_dev for device to alter
+ * @new_desired: new desired entitlement level in bytes
+ *
+ * For use by devices to request a change to their entitlement at runtime or
+ * through sysfs. The desired entitlement level is changed and a balancing
+ * of system resources is scheduled to run in the future.
+ */
+void vio_cmo_set_dev_desired(struct vio_dev *viodev, size_t desired)
+{
+ unsigned long flags;
+ struct vio_cmo_dev_entry *dev_ent;
+ int found = 0;
+
+ if (!firmware_has_feature(FW_FEATURE_CMO))
+ return;
+
+ spin_lock_irqsave(&vio_cmo.lock, flags);
+ if (desired < VIO_CMO_MIN_ENT)
+ desired = VIO_CMO_MIN_ENT;
+
+ /*
+ * Changes will not be made for devices not in the device list.
+ * If it is not in the device list, then no driver is loaded
+ * for the device and it can not receive entitlement.
+ */
+ list_for_each_entry(dev_ent, &vio_cmo.device_list, list)
+ if (viodev == dev_ent->viodev) {
+ found = 1;
+ break;
+ }
+ if (!found)
+ return;
+
+ /* Increase/decrease in desired device entitlement */
+ if (desired >= viodev->cmo.desired) {
+ /* Just bump the bus and device values prior to a balance*/
+ vio_cmo.desired += desired - viodev->cmo.desired;
+ viodev->cmo.desired = desired;
+ } else {
+ /* Decrease bus and device values for desired entitlement */
+ vio_cmo.desired -= viodev->cmo.desired - desired;
+ viodev->cmo.desired = desired;
+ /*
+ * If less entitlement is desired than current entitlement, move
+ * any reserve memory in the change region to the excess pool.
+ */
+ if (viodev->cmo.entitled > desired) {
+ vio_cmo.reserve.size -= viodev->cmo.entitled - desired;
+ vio_cmo.excess.size += viodev->cmo.entitled - desired;
+ /*
+ * If entitlement moving from the reserve pool to the
+ * excess pool is currently unused, add to the excess
+ * free counter.
+ */
+ if (viodev->cmo.allocated < viodev->cmo.entitled)
+ vio_cmo.excess.free += viodev->cmo.entitled -
+ max(viodev->cmo.allocated, desired);
+ viodev->cmo.entitled = desired;
+ }
+ }
+ schedule_delayed_work(&vio_cmo.balance_q, 0);
+ spin_unlock_irqrestore(&vio_cmo.lock, flags);
+}
+
+/**
+ * vio_cmo_bus_probe - Handle CMO specific bus probe activities
+ *
+ * @viodev - Pointer to struct vio_dev for device
+ *
+ * Determine the devices IO memory entitlement needs, attempting
+ * to satisfy the system minimum entitlement at first and scheduling
+ * a balance operation to take care of the rest at a later time.
+ *
+ * Returns: 0 on success, -EINVAL when device doesn't support CMO, and
+ * -ENOMEM when entitlement is not available for device or
+ * device entry.
+ *
+ */
+static int vio_cmo_bus_probe(struct vio_dev *viodev)
+{
+ struct vio_cmo_dev_entry *dev_ent;
+ struct device *dev = &viodev->dev;
+ struct vio_driver *viodrv = to_vio_driver(dev->driver);
+ unsigned long flags;
+ size_t size;
+
+ /*
+ * Check to see that device has a DMA window and configure
+ * entitlement for the device.
+ */
+ if (of_get_property(viodev->dev.archdata.of_node,
+ "ibm,my-dma-window", NULL)) {
+ /* Check that the driver is CMO enabled and get desired DMA */
+ if (!viodrv->get_desired_dma) {
+ dev_err(dev, "%s: device driver does not support CMO\n",
+ __func__);
+ return -EINVAL;
+ }
+
+ viodev->cmo.desired = IOMMU_PAGE_ALIGN(viodrv->get_desired_dma(viodev));
+ if (viodev->cmo.desired < VIO_CMO_MIN_ENT)
+ viodev->cmo.desired = VIO_CMO_MIN_ENT;
+ size = VIO_CMO_MIN_ENT;
+
+ dev_ent = kmalloc(sizeof(struct vio_cmo_dev_entry),
+ GFP_KERNEL);
+ if (!dev_ent)
+ return -ENOMEM;
+
+ dev_ent->viodev = viodev;
+ spin_lock_irqsave(&vio_cmo.lock, flags);
+ list_add(&dev_ent->list, &vio_cmo.device_list);
+ } else {
+ viodev->cmo.desired = 0;
+ size = 0;
+ spin_lock_irqsave(&vio_cmo.lock, flags);
+ }
+
+ /*
+ * If the needs for vio_cmo.min have not changed since they
+ * were last set, the number of devices in the OF tree has
+ * been constant and the IO memory for this is already in
+ * the reserve pool.
+ */
+ if (vio_cmo.min == ((vio_cmo_num_OF_devs() + 1) *
+ VIO_CMO_MIN_ENT)) {
+ /* Updated desired entitlement if device requires it */
+ if (size)
+ vio_cmo.desired += (viodev->cmo.desired -
+ VIO_CMO_MIN_ENT);
+ } else {
+ size_t tmp;
+
+ tmp = vio_cmo.spare + vio_cmo.excess.free;
+ if (tmp < size) {
+ dev_err(dev, "%s: insufficient free "
+ "entitlement to add device. "
+ "Need %lu, have %lu\n", __func__,
+ size, (vio_cmo.spare + tmp));
+ spin_unlock_irqrestore(&vio_cmo.lock, flags);
+ return -ENOMEM;
+ }
+
+ /* Use excess pool first to fulfill request */
+ tmp = min(size, vio_cmo.excess.free);
+ vio_cmo.excess.free -= tmp;
+ vio_cmo.excess.size -= tmp;
+ vio_cmo.reserve.size += tmp;
+
+ /* Use spare if excess pool was insufficient */
+ vio_cmo.spare -= size - tmp;
+
+ /* Update bus accounting */
+ vio_cmo.min += size;
+ vio_cmo.desired += viodev->cmo.desired;
+ }
+ spin_unlock_irqrestore(&vio_cmo.lock, flags);
+ return 0;
+}
+
+/**
+ * vio_cmo_bus_remove - Handle CMO specific bus removal activities
+ *
+ * @viodev - Pointer to struct vio_dev for device
+ *
+ * Remove the device from the cmo device list. The minimum entitlement
+ * will be reserved for the device as long as it is in the system. The
+ * rest of the entitlement the device had been allocated will be returned
+ * to the system.
+ */
+static void vio_cmo_bus_remove(struct vio_dev *viodev)
+{
+ struct vio_cmo_dev_entry *dev_ent;
+ unsigned long flags;
+ size_t tmp;
+
+ spin_lock_irqsave(&vio_cmo.lock, flags);
+ if (viodev->cmo.allocated) {
+ dev_err(&viodev->dev, "%s: device had %lu bytes of IO "
+ "allocated after remove operation.\n",
+ __func__, viodev->cmo.allocated);
+ BUG();
+ }
+
+ /*
+ * Remove the device from the device list being maintained for
+ * CMO enabled devices.
+ */
+ list_for_each_entry(dev_ent, &vio_cmo.device_list, list)
+ if (viodev == dev_ent->viodev) {
+ list_del(&dev_ent->list);
+ kfree(dev_ent);
+ break;
+ }
+
+ /*
+ * Devices may not require any entitlement and they do not need
+ * to be processed. Otherwise, return the device's entitlement
+ * back to the pools.
+ */
+ if (viodev->cmo.entitled) {
+ /*
+ * This device has not yet left the OF tree, it's
+ * minimum entitlement remains in vio_cmo.min and
+ * vio_cmo.desired
+ */
+ vio_cmo.desired -= (viodev->cmo.desired - VIO_CMO_MIN_ENT);
+
+ /*
+ * Save min allocation for device in reserve as long
+ * as it exists in OF tree as determined by later
+ * balance operation
+ */
+ viodev->cmo.entitled -= VIO_CMO_MIN_ENT;
+
+ /* Replenish spare from freed reserve pool */
+ if (viodev->cmo.entitled && (vio_cmo.spare < VIO_CMO_MIN_ENT)) {
+ tmp = min(viodev->cmo.entitled, (VIO_CMO_MIN_ENT -
+ vio_cmo.spare));
+ vio_cmo.spare += tmp;
+ viodev->cmo.entitled -= tmp;
+ }
+
+ /* Remaining reserve goes to excess pool */
+ vio_cmo.excess.size += viodev->cmo.entitled;
+ vio_cmo.excess.free += viodev->cmo.entitled;
+ vio_cmo.reserve.size -= viodev->cmo.entitled;
+
+ /*
+ * Until the device is removed it will keep a
+ * minimum entitlement; this will guarantee that
+ * a module unload/load will result in a success.
+ */
+ viodev->cmo.entitled = VIO_CMO_MIN_ENT;
+ viodev->cmo.desired = VIO_CMO_MIN_ENT;
+ atomic_set(&viodev->cmo.allocs_failed, 0);
+ }
+
+ spin_unlock_irqrestore(&vio_cmo.lock, flags);
+}
+
+static void vio_cmo_set_dma_ops(struct vio_dev *viodev)
+{
+ vio_dma_mapping_ops.dma_supported = dma_iommu_ops.dma_supported;
+ viodev->dev.archdata.dma_ops = &vio_dma_mapping_ops;
+}
+
+/**
+ * vio_cmo_bus_init - CMO entitlement initialization at bus init time
+ *
+ * Set up the reserve and excess entitlement pools based on available
+ * system entitlement and the number of devices in the OF tree that
+ * require entitlement in the reserve pool.
+ */
+static void vio_cmo_bus_init(void)
+{
+ struct hvcall_mpp_data mpp_data;
+ int err;
+
+ memset(&vio_cmo, 0, sizeof(struct vio_cmo));
+ spin_lock_init(&vio_cmo.lock);
+ INIT_LIST_HEAD(&vio_cmo.device_list);
+ INIT_DELAYED_WORK(&vio_cmo.balance_q, vio_cmo_balance);
+
+ /* Get current system entitlement */
+ err = h_get_mpp(&mpp_data);
+
+ /*
+ * On failure, continue with entitlement set to 0, will panic()
+ * later when spare is reserved.
+ */
+ if (err != H_SUCCESS) {
+ printk(KERN_ERR "%s: unable to determine system IO "\
+ "entitlement. (%d)\n", __func__, err);
+ vio_cmo.entitled = 0;
+ } else {
+ vio_cmo.entitled = mpp_data.entitled_mem;
+ }
+
+ /* Set reservation and check against entitlement */
+ vio_cmo.spare = VIO_CMO_MIN_ENT;
+ vio_cmo.reserve.size = vio_cmo.spare;
+ vio_cmo.reserve.size += (vio_cmo_num_OF_devs() *
+ VIO_CMO_MIN_ENT);
+ if (vio_cmo.reserve.size > vio_cmo.entitled) {
+ printk(KERN_ERR "%s: insufficient system entitlement\n",
+ __func__);
+ panic("%s: Insufficient system entitlement", __func__);
+ }
+
+ /* Set the remaining accounting variables */
+ vio_cmo.excess.size = vio_cmo.entitled - vio_cmo.reserve.size;
+ vio_cmo.excess.free = vio_cmo.excess.size;
+ vio_cmo.min = vio_cmo.reserve.size;
+ vio_cmo.desired = vio_cmo.reserve.size;
+}
+
+/* sysfs device functions and data structures for CMO */
+
+#define viodev_cmo_rd_attr(name) \
+static ssize_t viodev_cmo_##name##_show(struct device *dev, \
+ struct device_attribute *attr, \
+ char *buf) \
+{ \
+ return sprintf(buf, "%lu\n", to_vio_dev(dev)->cmo.name); \
+}
+
+static ssize_t viodev_cmo_allocs_failed_show(struct device *dev,
+ struct device_attribute *attr, char *buf)
+{
+ struct vio_dev *viodev = to_vio_dev(dev);
+ return sprintf(buf, "%d\n", atomic_read(&viodev->cmo.allocs_failed));
+}
+
+static ssize_t viodev_cmo_allocs_failed_reset(struct device *dev,
+ struct device_attribute *attr, const char *buf, size_t count)
+{
+ struct vio_dev *viodev = to_vio_dev(dev);
+ atomic_set(&viodev->cmo.allocs_failed, 0);
+ return count;
+}
+
+static ssize_t viodev_cmo_desired_set(struct device *dev,
+ struct device_attribute *attr, const char *buf, size_t count)
+{
+ struct vio_dev *viodev = to_vio_dev(dev);
+ size_t new_desired;
+ int ret;
+
+ ret = strict_strtoul(buf, 10, &new_desired);
+ if (ret)
+ return ret;
+
+ vio_cmo_set_dev_desired(viodev, new_desired);
+ return count;
+}
+
+viodev_cmo_rd_attr(desired);
+viodev_cmo_rd_attr(entitled);
+viodev_cmo_rd_attr(allocated);
+
+static ssize_t name_show(struct device *, struct device_attribute *, char *);
+static ssize_t devspec_show(struct device *, struct device_attribute *, char *);
+static struct device_attribute vio_cmo_dev_attrs[] = {
+ __ATTR_RO(name),
+ __ATTR_RO(devspec),
+ __ATTR(cmo_desired, S_IWUSR|S_IRUSR|S_IWGRP|S_IRGRP|S_IROTH,
+ viodev_cmo_desired_show, viodev_cmo_desired_set),
+ __ATTR(cmo_entitled, S_IRUGO, viodev_cmo_entitled_show, NULL),
+ __ATTR(cmo_allocated, S_IRUGO, viodev_cmo_allocated_show, NULL),
+ __ATTR(cmo_allocs_failed, S_IWUSR|S_IRUSR|S_IWGRP|S_IRGRP|S_IROTH,
+ viodev_cmo_allocs_failed_show, viodev_cmo_allocs_failed_reset),
+ __ATTR_NULL
+};
+
+/* sysfs bus functions and data structures for CMO */
+
+#define viobus_cmo_rd_attr(name) \
+static ssize_t \
+viobus_cmo_##name##_show(struct bus_type *bt, char *buf) \
+{ \
+ return sprintf(buf, "%lu\n", vio_cmo.name); \
+}
+
+#define viobus_cmo_pool_rd_attr(name, var) \
+static ssize_t \
+viobus_cmo_##name##_pool_show_##var(struct bus_type *bt, char *buf) \
+{ \
+ return sprintf(buf, "%lu\n", vio_cmo.name.var); \
+}
+
+static ssize_t viobus_cmo_high_reset(struct bus_type *bt, const char *buf,
+ size_t count)
+{
+ unsigned long flags;
+
+ spin_lock_irqsave(&vio_cmo.lock, flags);
+ vio_cmo.high = vio_cmo.curr;
+ spin_unlock_irqrestore(&vio_cmo.lock, flags);
+
+ return count;
+}
+
+viobus_cmo_rd_attr(entitled);
+viobus_cmo_pool_rd_attr(reserve, size);
+viobus_cmo_pool_rd_attr(excess, size);
+viobus_cmo_pool_rd_attr(excess, free);
+viobus_cmo_rd_attr(spare);
+viobus_cmo_rd_attr(min);
+viobus_cmo_rd_attr(desired);
+viobus_cmo_rd_attr(curr);
+viobus_cmo_rd_attr(high);
+
+static struct bus_attribute vio_cmo_bus_attrs[] = {
+ __ATTR(cmo_entitled, S_IRUGO, viobus_cmo_entitled_show, NULL),
+ __ATTR(cmo_reserve_size, S_IRUGO, viobus_cmo_reserve_pool_show_size, NULL),
+ __ATTR(cmo_excess_size, S_IRUGO, viobus_cmo_excess_pool_show_size, NULL),
+ __ATTR(cmo_excess_free, S_IRUGO, viobus_cmo_excess_pool_show_free, NULL),
+ __ATTR(cmo_spare, S_IRUGO, viobus_cmo_spare_show, NULL),
+ __ATTR(cmo_min, S_IRUGO, viobus_cmo_min_show, NULL),
+ __ATTR(cmo_desired, S_IRUGO, viobus_cmo_desired_show, NULL),
+ __ATTR(cmo_curr, S_IRUGO, viobus_cmo_curr_show, NULL),
+ __ATTR(cmo_high, S_IWUSR|S_IRUSR|S_IWGRP|S_IRGRP|S_IROTH,
+ viobus_cmo_high_show, viobus_cmo_high_reset),
+ __ATTR_NULL
+};
+
+static void vio_cmo_sysfs_init(void)
+{
+ vio_bus_type.dev_attrs = vio_cmo_dev_attrs;
+ vio_bus_type.bus_attrs = vio_cmo_bus_attrs;
+}
+#else /* CONFIG_PPC_SMLPAR */
+/* Dummy functions for iSeries platform */
+int vio_cmo_entitlement_update(size_t new_entitlement) { return 0; }
+void vio_cmo_set_dev_desired(struct vio_dev *viodev, size_t desired) {}
+static int vio_cmo_bus_probe(struct vio_dev *viodev) { return 0; }
+static void vio_cmo_bus_remove(struct vio_dev *viodev) {}
+static void vio_cmo_set_dma_ops(struct vio_dev *viodev) {}
+static void vio_cmo_bus_init() {}
+static void vio_cmo_sysfs_init() { }
+#endif /* CONFIG_PPC_SMLPAR */
+EXPORT_SYMBOL(vio_cmo_entitlement_update);
+EXPORT_SYMBOL(vio_cmo_set_dev_desired);
+
static struct iommu_table *vio_build_iommu_table(struct vio_dev *dev)
{
const unsigned char *dma_window;
@@ -114,8 +1105,17 @@ static int vio_bus_probe(struct device *dev)
return error;
id = vio_match_device(viodrv->id_table, viodev);
- if (id)
+ if (id) {
+ memset(&viodev->cmo, 0, sizeof(viodev->cmo));
+ if (firmware_has_feature(FW_FEATURE_CMO)) {
+ error = vio_cmo_bus_probe(viodev);
+ if (error)
+ return error;
+ }
error = viodrv->probe(viodev, id);
+ if (error)
+ vio_cmo_bus_remove(viodev);
+ }
return error;
}
@@ -125,12 +1125,23 @@ static int vio_bus_remove(struct device *dev)
{
struct vio_dev *viodev = to_vio_dev(dev);
struct vio_driver *viodrv = to_vio_driver(dev->driver);
+ struct device *devptr;
+ int ret = 1;
+
+ /*
+ * Hold a reference to the device after the remove function is called
+ * to allow for CMO accounting cleanup for the device.
+ */
+ devptr = get_device(dev);
if (viodrv->remove)
- return viodrv->remove(viodev);
+ ret = viodrv->remove(viodev);
+
+ if (!ret && firmware_has_feature(FW_FEATURE_CMO))
+ vio_cmo_bus_remove(viodev);
- /* driver can't remove */
- return 1;
+ put_device(devptr);
+ return ret;
}
/**
@@ -215,7 +1226,11 @@ struct vio_dev *vio_register_device_node(struct device_node *of_node)
viodev->unit_address = *unit_address;
}
viodev->dev.archdata.of_node = of_node_get(of_node);
- viodev->dev.archdata.dma_ops = &dma_iommu_ops;
+
+ if (firmware_has_feature(FW_FEATURE_CMO))
+ vio_cmo_set_dma_ops(viodev);
+ else
+ viodev->dev.archdata.dma_ops = &dma_iommu_ops;
viodev->dev.archdata.dma_data = vio_build_iommu_table(viodev);
viodev->dev.archdata.numa_node = of_node_to_nid(of_node);
@@ -245,6 +1260,9 @@ static int __init vio_bus_init(void)
int err;
struct device_node *node_vroot;
+ if (firmware_has_feature(FW_FEATURE_CMO))
+ vio_cmo_sysfs_init();
+
err = bus_register(&vio_bus_type);
if (err) {
printk(KERN_ERR "failed to register VIO bus\n");
@@ -262,6 +1280,9 @@ static int __init vio_bus_init(void)
return err;
}
+ if (firmware_has_feature(FW_FEATURE_CMO))
+ vio_cmo_bus_init();
+
node_vroot = of_find_node_by_name(NULL, "vdevice");
if (node_vroot) {
struct device_node *of_node;
diff --git a/arch/powerpc/kernel/vmlinux.lds.S b/arch/powerpc/kernel/vmlinux.lds.S
index 0c3000bf8d75..4a8ce62fe112 100644
--- a/arch/powerpc/kernel/vmlinux.lds.S
+++ b/arch/powerpc/kernel/vmlinux.lds.S
@@ -9,6 +9,25 @@
ENTRY(_stext)
+PHDRS {
+ kernel PT_LOAD FLAGS(7); /* RWX */
+ notes PT_NOTE FLAGS(0);
+ dummy PT_NOTE FLAGS(0);
+
+ /* binutils < 2.18 has a bug that makes it misbehave when taking an
+ ELF file with all segments at load address 0 as input. This
+ happens when running "strip" on vmlinux, because of the AT() magic
+ in this linker script. People using GCC >= 4.2 won't run into
+ this problem, because the "build-id" support will put some data
+ into the "notes" segment (at a non-zero load address).
+
+ To work around this, we force some data into both the "dummy"
+ segment and the kernel segment, so the dummy segment will get a
+ non-zero load address. It's not enough to always create the
+ "notes" segment, since if nothing gets assigned to it, its load
+ address will be zero. */
+}
+
#ifdef CONFIG_PPC64
OUTPUT_ARCH(powerpc:common64)
jiffies = jiffies_64;
@@ -35,7 +54,7 @@ SECTIONS
ALIGN_FUNCTION();
*(.text.head)
_text = .;
- *(.text .fixup .text.init.refok .exit.text.refok)
+ *(.text .fixup .text.init.refok .exit.text.refok __ftr_alt_*)
SCHED_TEXT
LOCK_TEXT
KPROBES_TEXT
@@ -50,7 +69,7 @@ SECTIONS
. = ALIGN(PAGE_SIZE);
_etext = .;
PROVIDE32 (etext = .);
- }
+ } :kernel
/* Read-only data */
RODATA
@@ -62,9 +81,13 @@ SECTIONS
__stop___ex_table = .;
}
- NOTES
+ NOTES :kernel :notes
- BUG_TABLE
+ /* The dummy segment contents for the bug workaround mentioned above
+ near PHDRS. */
+ .dummy : AT(ADDR(.dummy) - LOAD_OFFSET) {
+ LONG(0xf177)
+ } :kernel :dummy
/*
* Init sections discarded at runtime
@@ -76,7 +99,7 @@ SECTIONS
_sinittext = .;
INIT_TEXT
_einittext = .;
- }
+ } :kernel
/* .exit.text is discarded at runtime, not link time,
* to deal with references from __bug_table
@@ -127,6 +150,12 @@ SECTIONS
*(__ftr_fixup)
__stop___ftr_fixup = .;
}
+ . = ALIGN(8);
+ __lwsync_fixup : AT(ADDR(__lwsync_fixup) - LOAD_OFFSET) {
+ __start___lwsync_fixup = .;
+ *(__lwsync_fixup)
+ __stop___lwsync_fixup = .;
+ }
#ifdef CONFIG_PPC64
. = ALIGN(8);
__fw_ftr_fixup : AT(ADDR(__fw_ftr_fixup) - LOAD_OFFSET) {